diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..71a7689 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +__pychache__/ +set/ + diff --git a/RUN.py b/RUN.py index b977ec9..7f6708d 100644 --- a/RUN.py +++ b/RUN.py @@ -4,9 +4,10 @@ import mnist_loader training_data, validation_data, test_data = mnist_loader.load_data_wrapper() import network +import dataset_loader -# net = network.Network([784, 30, 10]) #Testé : 94,56% -# net.SGD(training_data, 30, 10, 3.0, test_data=test_data) +net = network.Network([262144, 30, 10]) #Testé : 94,56% +net.SGD(dataset_loader.loadTrainingSet("training"), 30, 10, 3.0, test_data=dataset_loader.loadTestSet("test")) # net = network.Network([784, 100, 10]) #Marche mieux apparemment # net.SGD(training_data, 30, 10, 3.0, test_data=test_data) @@ -17,4 +18,3 @@ import network # net = network.Network([784, 30, 10]) #Marche pas du tout apparemment # net.SGD(training_data, 30, 10, 100.0, test_data=test_data) -print() \ No newline at end of file diff --git a/__pycache__/mnist_loader.cpython-38.pyc b/__pycache__/mnist_loader.cpython-38.pyc new file mode 100644 index 0000000..f64b285 Binary files /dev/null and b/__pycache__/mnist_loader.cpython-38.pyc differ diff --git a/__pycache__/network.cpython-38.pyc b/__pycache__/network.cpython-38.pyc new file mode 100644 index 0000000..fa32900 Binary files /dev/null and b/__pycache__/network.cpython-38.pyc differ diff --git a/dataset_loader.py b/dataset_loader.py new file mode 100644 index 0000000..9be3ef0 --- /dev/null +++ b/dataset_loader.py @@ -0,0 +1,90 @@ +from mnist_loader import load_data +import numpy as np +import os +from PIL import Image +import resource + + +def vectorized_result(j): + """Return a 10-dimensional unit vector with a 1.0 in the jth + position and zeroes elsewhere. This is used to convert a digit + (0...9) into a corresponding desired output from the neural + network.""" + e = np.zeros((10, 1)) + e[j] = 1.0 + return e + +def loadSet(path): + + filelist = [] + + for root, dirs, files in os.walk(path): + for file in files: + filelist.append(os.path.join(root,file)) + + i = 0 + pixels = [] + result = [] + + + for name in filelist: + + if i >= 100: + + break + + if ".png" in name: + + with Image.open(path + "/" + name.split("/")[-1]) as im: + + pix = im.load() + temparray = [] + + result.append(name.split("/")[-1][0]) + + for x in range(im.size[0]): + + for y in range(im.size[1]): + + temparray.append(pix[x, y] / 255) + + pixels.append(temparray) + print(temparray) + print(str("%.2f" % round(i / (len(filelist) if len(filelist) < 100 else 100) * 100, 2)) + "% Done, ram usage: " + str("%.2f" % round(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / (1024*1024), 2)) + "Go", end = '\r') + i += 1 + + print("max ram usage: " + str(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / (1024*1024)) + "Go") + + return (pixels, result) + + +def loadTrainingSet(path): + + print("importing training set...") + + set = loadSet(path) + + training_inputs = [np.reshape(x, (262144, 1)) for x in set[0]] + training_results = [vectorized_result(int(y)) for y in set[1]] + training_data = zip(training_inputs, training_results) + + return training_data + +def loadTestSet(path): + + print("importing test set...") + + set = loadSet(path) + + test_inputs = [np.reshape(x, (262144, 1)) for x in set[0]] + test_data = zip(test_inputs, set[1]) + + return test_data + + + + + +if __name__ == "__main__": + + print(loadSet("set")[0]) \ No newline at end of file diff --git a/mnist.pkl.gz b/mnist.pkl.gz deleted file mode 100644 index 059aba0..0000000 Binary files a/mnist.pkl.gz and /dev/null differ