TIPE-OperationValkyrie/fig/generate_gradient.py

120 lines
3.8 KiB
Python

"""generate_gradient.py
~~~~~~~~~~~~~~~~~~~~~~~
Use network2 to figure out the average starting values of the gradient
error terms \delta^l_j = \partial C / \partial z^l_j = \partial C /
\partial b^l_j.
"""
#### Libraries
# Standard library
import json
import math
import random
import shutil
import sys
sys.path.append("../src/")
# My library
import mnist_loader
import network2
# Third-party libraries
import matplotlib.pyplot as plt
import numpy as np
def main():
# Load the data
full_td, _, _ = mnist_loader.load_data_wrapper()
td = full_td[:1000] # Just use the first 1000 items of training data
epochs = 500 # Number of epochs to train for
print "\nTwo hidden layers:"
net = network2.Network([784, 30, 30, 10])
initial_norms(td, net)
abbreviated_gradient = [
ag[:6] for ag in get_average_gradient(net, td)[:-1]]
print "Saving the averaged gradient for the top six neurons in each "+\
"layer.\nWARNING: This will affect the look of the book, so be "+\
"sure to check the\nrelevant material (early chapter 5)."
f = open("initial_gradient.json", "w")
json.dump(abbreviated_gradient, f)
f.close()
shutil.copy("initial_gradient.json", "../../js/initial_gradient.json")
training(td, net, epochs, "norms_during_training_2_layers.json")
plot_training(
epochs, "norms_during_training_2_layers.json", 2)
print "\nThree hidden layers:"
net = network2.Network([784, 30, 30, 30, 10])
initial_norms(td, net)
training(td, net, epochs, "norms_during_training_3_layers.json")
plot_training(
epochs, "norms_during_training_3_layers.json", 3)
print "\nFour hidden layers:"
net = network2.Network([784, 30, 30, 30, 30, 10])
initial_norms(td, net)
training(td, net, epochs,
"norms_during_training_4_layers.json")
plot_training(
epochs, "norms_during_training_4_layers.json", 4)
def initial_norms(training_data, net):
average_gradient = get_average_gradient(net, training_data)
norms = [list_norm(avg) for avg in average_gradient[:-1]]
print "Average gradient for the hidden layers: "+str(norms)
def training(training_data, net, epochs, filename):
norms = []
for j in range(epochs):
average_gradient = get_average_gradient(net, training_data)
norms.append([list_norm(avg) for avg in average_gradient[:-1]])
print "Epoch: %s" % j
net.SGD(training_data, 1, 1000, 0.1, lmbda=5.0)
f = open(filename, "w")
json.dump(norms, f)
f.close()
def plot_training(epochs, filename, num_layers):
f = open(filename, "r")
norms = json.load(f)
f.close()
fig = plt.figure()
ax = fig.add_subplot(111)
colors = ["#2A6EA6", "#FFA933", "#FF5555", "#55FF55", "#5555FF"]
for j in range(num_layers):
ax.plot(np.arange(epochs),
[n[j] for n in norms],
color=colors[j],
label="Hidden layer %s" % (j+1,))
ax.set_xlim([0, epochs])
ax.grid(True)
ax.set_xlabel('Number of epochs of training')
ax.set_title('Speed of learning: %s hidden layers' % num_layers)
ax.set_yscale('log')
plt.legend(loc="upper right")
fig_filename = "training_speed_%s_layers.png" % num_layers
plt.savefig(fig_filename)
shutil.copy(fig_filename, "../../images/"+fig_filename)
plt.show()
def get_average_gradient(net, training_data):
nabla_b_results = [net.backprop(x, y)[0] for x, y in training_data]
gradient = list_sum(nabla_b_results)
return [(np.reshape(g, len(g))/len(training_data)).tolist()
for g in gradient]
def zip_sum(a, b):
return [x+y for (x, y) in zip(a, b)]
def list_sum(l):
return reduce(zip_sum, l)
def list_norm(l):
return math.sqrt(sum([x*x for x in l]))
if __name__ == "__main__":
main()