120 lines
3.8 KiB
Python
120 lines
3.8 KiB
Python
"""generate_gradient.py
|
|
~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
Use network2 to figure out the average starting values of the gradient
|
|
error terms \delta^l_j = \partial C / \partial z^l_j = \partial C /
|
|
\partial b^l_j.
|
|
|
|
"""
|
|
|
|
#### Libraries
|
|
# Standard library
|
|
import json
|
|
import math
|
|
import random
|
|
import shutil
|
|
import sys
|
|
sys.path.append("../src/")
|
|
|
|
# My library
|
|
import mnist_loader
|
|
import network2
|
|
|
|
# Third-party libraries
|
|
import matplotlib.pyplot as plt
|
|
import numpy as np
|
|
|
|
def main():
|
|
# Load the data
|
|
full_td, _, _ = mnist_loader.load_data_wrapper()
|
|
td = full_td[:1000] # Just use the first 1000 items of training data
|
|
epochs = 500 # Number of epochs to train for
|
|
|
|
print "\nTwo hidden layers:"
|
|
net = network2.Network([784, 30, 30, 10])
|
|
initial_norms(td, net)
|
|
abbreviated_gradient = [
|
|
ag[:6] for ag in get_average_gradient(net, td)[:-1]]
|
|
print "Saving the averaged gradient for the top six neurons in each "+\
|
|
"layer.\nWARNING: This will affect the look of the book, so be "+\
|
|
"sure to check the\nrelevant material (early chapter 5)."
|
|
f = open("initial_gradient.json", "w")
|
|
json.dump(abbreviated_gradient, f)
|
|
f.close()
|
|
shutil.copy("initial_gradient.json", "../../js/initial_gradient.json")
|
|
training(td, net, epochs, "norms_during_training_2_layers.json")
|
|
plot_training(
|
|
epochs, "norms_during_training_2_layers.json", 2)
|
|
|
|
print "\nThree hidden layers:"
|
|
net = network2.Network([784, 30, 30, 30, 10])
|
|
initial_norms(td, net)
|
|
training(td, net, epochs, "norms_during_training_3_layers.json")
|
|
plot_training(
|
|
epochs, "norms_during_training_3_layers.json", 3)
|
|
|
|
print "\nFour hidden layers:"
|
|
net = network2.Network([784, 30, 30, 30, 30, 10])
|
|
initial_norms(td, net)
|
|
training(td, net, epochs,
|
|
"norms_during_training_4_layers.json")
|
|
plot_training(
|
|
epochs, "norms_during_training_4_layers.json", 4)
|
|
|
|
def initial_norms(training_data, net):
|
|
average_gradient = get_average_gradient(net, training_data)
|
|
norms = [list_norm(avg) for avg in average_gradient[:-1]]
|
|
print "Average gradient for the hidden layers: "+str(norms)
|
|
|
|
def training(training_data, net, epochs, filename):
|
|
norms = []
|
|
for j in range(epochs):
|
|
average_gradient = get_average_gradient(net, training_data)
|
|
norms.append([list_norm(avg) for avg in average_gradient[:-1]])
|
|
print "Epoch: %s" % j
|
|
net.SGD(training_data, 1, 1000, 0.1, lmbda=5.0)
|
|
f = open(filename, "w")
|
|
json.dump(norms, f)
|
|
f.close()
|
|
|
|
def plot_training(epochs, filename, num_layers):
|
|
f = open(filename, "r")
|
|
norms = json.load(f)
|
|
f.close()
|
|
fig = plt.figure()
|
|
ax = fig.add_subplot(111)
|
|
colors = ["#2A6EA6", "#FFA933", "#FF5555", "#55FF55", "#5555FF"]
|
|
for j in range(num_layers):
|
|
ax.plot(np.arange(epochs),
|
|
[n[j] for n in norms],
|
|
color=colors[j],
|
|
label="Hidden layer %s" % (j+1,))
|
|
ax.set_xlim([0, epochs])
|
|
ax.grid(True)
|
|
ax.set_xlabel('Number of epochs of training')
|
|
ax.set_title('Speed of learning: %s hidden layers' % num_layers)
|
|
ax.set_yscale('log')
|
|
plt.legend(loc="upper right")
|
|
fig_filename = "training_speed_%s_layers.png" % num_layers
|
|
plt.savefig(fig_filename)
|
|
shutil.copy(fig_filename, "../../images/"+fig_filename)
|
|
plt.show()
|
|
|
|
def get_average_gradient(net, training_data):
|
|
nabla_b_results = [net.backprop(x, y)[0] for x, y in training_data]
|
|
gradient = list_sum(nabla_b_results)
|
|
return [(np.reshape(g, len(g))/len(training_data)).tolist()
|
|
for g in gradient]
|
|
|
|
def zip_sum(a, b):
|
|
return [x+y for (x, y) in zip(a, b)]
|
|
|
|
def list_sum(l):
|
|
return reduce(zip_sum, l)
|
|
|
|
def list_norm(l):
|
|
return math.sqrt(sum([x*x for x in l]))
|
|
|
|
if __name__ == "__main__":
|
|
main()
|