new data loader thingy

This commit is contained in:
NiiiCo 2021-06-01 19:35:39 +02:00
parent c6f4714a4e
commit 1ed42de47b
6 changed files with 96 additions and 3 deletions

3
.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
__pychache__/
set/

6
RUN.py
View File

@ -4,9 +4,10 @@ import mnist_loader
training_data, validation_data, test_data = mnist_loader.load_data_wrapper()
import network
import dataset_loader
# net = network.Network([784, 30, 10]) #Testé : 94,56%
# net.SGD(training_data, 30, 10, 3.0, test_data=test_data)
net = network.Network([262144, 30, 10]) #Testé : 94,56%
net.SGD(dataset_loader.loadTrainingSet("training"), 30, 10, 3.0, test_data=dataset_loader.loadTestSet("test"))
# net = network.Network([784, 100, 10]) #Marche mieux apparemment
# net.SGD(training_data, 30, 10, 3.0, test_data=test_data)
@ -17,4 +18,3 @@ import network
# net = network.Network([784, 30, 10]) #Marche pas du tout apparemment
# net.SGD(training_data, 30, 10, 100.0, test_data=test_data)
print()

Binary file not shown.

Binary file not shown.

90
dataset_loader.py Normal file
View File

@ -0,0 +1,90 @@
from mnist_loader import load_data
import numpy as np
import os
from PIL import Image
import resource
def vectorized_result(j):
"""Return a 10-dimensional unit vector with a 1.0 in the jth
position and zeroes elsewhere. This is used to convert a digit
(0...9) into a corresponding desired output from the neural
network."""
e = np.zeros((10, 1))
e[j] = 1.0
return e
def loadSet(path):
filelist = []
for root, dirs, files in os.walk(path):
for file in files:
filelist.append(os.path.join(root,file))
i = 0
pixels = []
result = []
for name in filelist:
if i >= 100:
break
if ".png" in name:
with Image.open(path + "/" + name.split("/")[-1]) as im:
pix = im.load()
temparray = []
result.append(name.split("/")[-1][0])
for x in range(im.size[0]):
for y in range(im.size[1]):
temparray.append(pix[x, y] / 255)
pixels.append(temparray)
print(temparray)
print(str("%.2f" % round(i / (len(filelist) if len(filelist) < 100 else 100) * 100, 2)) + "% Done, ram usage: " + str("%.2f" % round(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / (1024*1024), 2)) + "Go", end = '\r')
i += 1
print("max ram usage: " + str(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / (1024*1024)) + "Go")
return (pixels, result)
def loadTrainingSet(path):
print("importing training set...")
set = loadSet(path)
training_inputs = [np.reshape(x, (262144, 1)) for x in set[0]]
training_results = [vectorized_result(int(y)) for y in set[1]]
training_data = zip(training_inputs, training_results)
return training_data
def loadTestSet(path):
print("importing test set...")
set = loadSet(path)
test_inputs = [np.reshape(x, (262144, 1)) for x in set[0]]
test_data = zip(test_inputs, set[1])
return test_data
if __name__ == "__main__":
print(loadSet("set")[0])

Binary file not shown.