from mnist_loader import load_data import numpy as np import os from PIL import Image import resource def vectorized_result(j): """Return a 10-dimensional unit vector with a 1.0 in the jth position and zeroes elsewhere. This is used to convert a digit (0...9) into a corresponding desired output from the neural network.""" e = np.zeros((10, 1)) e[j] = 1.0 return e def loadSet(path): filelist = [] for root, dirs, files in os.walk(path): for file in files: filelist.append(os.path.join(root,file)) i = 0 pixels = [] result = [] for name in filelist: if i >= 500: break if ".png" in name: with Image.open(path + "/" + name.split("/")[-1]) as im: pix = im.load() temparray = [] result.append(int(name.split("/")[-1][0])) for x in range(im.size[0]): for y in range(im.size[1]): temparray.append(pix[x, y] / 255) pixels.append(temparray) print(str("%.2f" % round(i / (len(filelist) if len(filelist) < 500 else 500) * 100, 2)) + "% Done, ram usage: " + str("%.2f" % round(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / (1024*1024), 2)) + "Go", end = '\r') i += 1 print("max ram usage: " + str(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / (1024*1024)) + "Go") return (pixels, result) def loadTrainingSet(path): print("importing training set...") set = loadSet(path) training_inputs = [np.reshape(x, (784, 1)) for x in set[0]] training_results = [vectorized_result(int(y)) for y in set[1]] training_data = zip(training_inputs, training_results) return training_data def loadTestSet(path): print("importing test set...") set = loadSet(path) test_inputs = [np.reshape(x, (784, 1)) for x in set[0]] test_data = zip(test_inputs, set[1]) return test_data if __name__ == "__main__": print(loadSet("set")[0])