First commit

This commit is contained in:
NiiiCo 2021-05-30 21:31:10 +02:00
parent 26247b7afb
commit 09ec5c7e62
84 changed files with 2578 additions and 0 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 29 KiB

View file

@ -0,0 +1,52 @@
"""
backprop_magnitude_nabla
~~~~~~~~~~~~~~~~~~~~~~~~
Using backprop2 I constructed a 784-30-30-30-30-30-10 network to classify
MNIST data. I ran ten mini-batches of size 100, with eta = 0.01 and
lambda = 0.05, using:
net.SGD(otd[:1000], 1, 100, 0.01, 0.05,
I obtained the following norms for the (unregularized) nabla_w for the
respective mini-batches:
[0.90845722175923671, 2.8852730656073566, 10.696793986223632, 37.75701921183488, 157.7365422527995, 304.43990075227839]
[0.22493835119537842, 0.6555126517964851, 2.6036801277234076, 11.408825365731225, 46.882319190445472, 70.499637502698221]
[0.11935180022357521, 0.19756069137133489, 0.8152794148335869, 3.4590802543293977, 15.470507965493903, 31.032396017142556]
[0.15130005837653659, 0.39687135985664701, 1.4810006139254532, 4.392519005642268, 16.831939776937311, 34.082104455938733]
[0.11594085276308999, 0.17177668061395848, 0.72204558746599512, 3.05062409378366, 14.133001132214286, 29.776204839994385]
[0.10790389807606221, 0.20707152756018626, 0.96348134037828603, 3.9043824079499561, 15.986873430586924, 39.195258080490895]
[0.088613291101645356, 0.129173436407863, 0.4242933114455002, 1.6154682713449411, 7.5451567587160069, 20.180545544006566]
[0.086175380639289575, 0.12571016850457151, 0.44231149185805047, 1.8435833504677326, 7.61973813981073, 19.474539356281781]
[0.095372080184163904, 0.15854489503205446, 0.70244235144444678, 2.6294803575724157, 10.427062019753425, 24.309420272033819]
[0.096453131000155692, 0.13574642196947601, 0.53551377709415471, 2.0247466793066895, 9.4503978546018068, 21.73772148470092]
Note that results are listed in order of layer. They clearly show how
the magnitude of nabla_w decreases as we go back through layers.
In this program I take min-batches 7, 8, 9 as representative and plot
them. I omit the results from the first and final layers since they
correspond to 784 input neurons and 10 output neurons, not 30 as in
the other layers, making it difficult to compare results.
Note that I haven't attempted to preserve the whole workflow here. It
involved some minor hacking around with backprop2, which messed up
that code. That's why I've simply put the results in by hand below.
"""
# Third-party libraries
import matplotlib.pyplot as plt
nw1 = [0.129173436407863, 0.4242933114455002,
1.6154682713449411, 7.5451567587160069]
nw2 = [0.12571016850457151, 0.44231149185805047,
1.8435833504677326, 7.61973813981073]
nw3 = [0.15854489503205446, 0.70244235144444678,
2.6294803575724157, 10.427062019753425]
plt.plot(range(1, 5), nw1, "ro-", range(1, 5), nw2, "go-",
range(1, 5), nw3, "bo-")
plt.xlabel('Layer $l$')
plt.ylabel(r"$\Vert\nabla C^l_w\Vert$")
plt.xticks([1, 2, 3, 4])
plt.show()

1
fig/data_1000.json Normal file

File diff suppressed because one or more lines are too long

BIN
fig/digits.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.2 KiB

BIN
fig/digits_separate.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 8 KiB

BIN
fig/false_minima.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 147 KiB

40
fig/false_minima.py Normal file
View file

@ -0,0 +1,40 @@
"""
false_minimum
~~~~~~~~~~~~~
Plots a function of two variables with many false minima."""
#### Libraries
# Third party libraries
from matplotlib.ticker import LinearLocator
# Note that axes3d is not explicitly used in the code, but is needed
# to register the 3d plot type correctly
from mpl_toolkits.mplot3d import axes3d
import matplotlib.pyplot as plt
import numpy
fig = plt.figure()
ax = fig.gca(projection='3d')
X = numpy.arange(-5, 5, 0.1)
Y = numpy.arange(-5, 5, 0.1)
X, Y = numpy.meshgrid(X, Y)
Z = numpy.sin(X)*numpy.sin(Y)+0.2*X
colortuple = ('w', 'b')
colors = numpy.empty(X.shape, dtype=str)
for x in xrange(len(X)):
for y in xrange(len(Y)):
colors[x, y] = colortuple[(x + y) % 2]
surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, facecolors=colors,
linewidth=0)
ax.set_xlim3d(-5, 5)
ax.set_ylim3d(-5, 5)
ax.set_zlim3d(-2, 2)
ax.w_xaxis.set_major_locator(LinearLocator(3))
ax.w_yaxis.set_major_locator(LinearLocator(3))
ax.w_zaxis.set_major_locator(LinearLocator(3))
plt.show()

119
fig/generate_gradient.py Normal file
View file

@ -0,0 +1,119 @@
"""generate_gradient.py
~~~~~~~~~~~~~~~~~~~~~~~
Use network2 to figure out the average starting values of the gradient
error terms \delta^l_j = \partial C / \partial z^l_j = \partial C /
\partial b^l_j.
"""
#### Libraries
# Standard library
import json
import math
import random
import shutil
import sys
sys.path.append("../src/")
# My library
import mnist_loader
import network2
# Third-party libraries
import matplotlib.pyplot as plt
import numpy as np
def main():
# Load the data
full_td, _, _ = mnist_loader.load_data_wrapper()
td = full_td[:1000] # Just use the first 1000 items of training data
epochs = 500 # Number of epochs to train for
print "\nTwo hidden layers:"
net = network2.Network([784, 30, 30, 10])
initial_norms(td, net)
abbreviated_gradient = [
ag[:6] for ag in get_average_gradient(net, td)[:-1]]
print "Saving the averaged gradient for the top six neurons in each "+\
"layer.\nWARNING: This will affect the look of the book, so be "+\
"sure to check the\nrelevant material (early chapter 5)."
f = open("initial_gradient.json", "w")
json.dump(abbreviated_gradient, f)
f.close()
shutil.copy("initial_gradient.json", "../../js/initial_gradient.json")
training(td, net, epochs, "norms_during_training_2_layers.json")
plot_training(
epochs, "norms_during_training_2_layers.json", 2)
print "\nThree hidden layers:"
net = network2.Network([784, 30, 30, 30, 10])
initial_norms(td, net)
training(td, net, epochs, "norms_during_training_3_layers.json")
plot_training(
epochs, "norms_during_training_3_layers.json", 3)
print "\nFour hidden layers:"
net = network2.Network([784, 30, 30, 30, 30, 10])
initial_norms(td, net)
training(td, net, epochs,
"norms_during_training_4_layers.json")
plot_training(
epochs, "norms_during_training_4_layers.json", 4)
def initial_norms(training_data, net):
average_gradient = get_average_gradient(net, training_data)
norms = [list_norm(avg) for avg in average_gradient[:-1]]
print "Average gradient for the hidden layers: "+str(norms)
def training(training_data, net, epochs, filename):
norms = []
for j in range(epochs):
average_gradient = get_average_gradient(net, training_data)
norms.append([list_norm(avg) for avg in average_gradient[:-1]])
print "Epoch: %s" % j
net.SGD(training_data, 1, 1000, 0.1, lmbda=5.0)
f = open(filename, "w")
json.dump(norms, f)
f.close()
def plot_training(epochs, filename, num_layers):
f = open(filename, "r")
norms = json.load(f)
f.close()
fig = plt.figure()
ax = fig.add_subplot(111)
colors = ["#2A6EA6", "#FFA933", "#FF5555", "#55FF55", "#5555FF"]
for j in range(num_layers):
ax.plot(np.arange(epochs),
[n[j] for n in norms],
color=colors[j],
label="Hidden layer %s" % (j+1,))
ax.set_xlim([0, epochs])
ax.grid(True)
ax.set_xlabel('Number of epochs of training')
ax.set_title('Speed of learning: %s hidden layers' % num_layers)
ax.set_yscale('log')
plt.legend(loc="upper right")
fig_filename = "training_speed_%s_layers.png" % num_layers
plt.savefig(fig_filename)
shutil.copy(fig_filename, "../../images/"+fig_filename)
plt.show()
def get_average_gradient(net, training_data):
nabla_b_results = [net.backprop(x, y)[0] for x, y in training_data]
gradient = list_sum(nabla_b_results)
return [(np.reshape(g, len(g))/len(training_data)).tolist()
for g in gradient]
def zip_sum(a, b):
return [x+y for (x, y) in zip(a, b)]
def list_sum(l):
return reduce(zip_sum, l)
def list_norm(l):
return math.sqrt(sum([x*x for x in l]))
if __name__ == "__main__":
main()

View file

@ -0,0 +1 @@
[[-0.003970677333144113, -0.0031684316985881185, 0.008103235909196014, 0.012598010584130365, -0.026465907331998335, 0.0017583319323150341], [0.04152906589960523, 0.044025552524932406, -0.009669682279354514, 0.046736871369353235, 0.03877302528270452, 0.012336459551975156]]

BIN
fig/misleading_gradient.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 186 KiB

View file

@ -0,0 +1,43 @@
"""
misleading_gradient
~~~~~~~~~~~~~~~~~~~
Plots a function which misleads the gradient descent algorithm."""
#### Libraries
# Third party libraries
from matplotlib.ticker import LinearLocator
# Note that axes3d is not explicitly used in the code, but is needed
# to register the 3d plot type correctly
from mpl_toolkits.mplot3d import axes3d
import matplotlib.pyplot as plt
import numpy
fig = plt.figure()
ax = fig.gca(projection='3d')
X = numpy.arange(-1, 1, 0.025)
Y = numpy.arange(-1, 1, 0.025)
X, Y = numpy.meshgrid(X, Y)
Z = X**2 + 10*Y**2
colortuple = ('w', 'b')
colors = numpy.empty(X.shape, dtype=str)
for x in xrange(len(X)):
for y in xrange(len(Y)):
colors[x, y] = colortuple[(x + y) % 2]
surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, facecolors=colors,
linewidth=0)
ax.set_xlim3d(-1, 1)
ax.set_ylim3d(-1, 1)
ax.set_zlim3d(0, 12)
ax.w_xaxis.set_major_locator(LinearLocator(3))
ax.w_yaxis.set_major_locator(LinearLocator(3))
ax.w_zaxis.set_major_locator(LinearLocator(3))
ax.text(0.05, -1.8, 0, "$w_1$", fontsize=20)
ax.text(1.5, -0.25, 0, "$w_2$", fontsize=20)
ax.text(1.79, 0, 9.62, "$C$", fontsize=20)
plt.show()

Binary file not shown.

After

Width:  |  Height:  |  Size: 58 KiB

View file

@ -0,0 +1,21 @@
"""
misleading_gradient_contours
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Plots the contours of the function from misleading_gradient.py"""
#### Libraries
# Third party libraries
import matplotlib.pyplot as plt
import numpy
X = numpy.arange(-1, 1, 0.02)
Y = numpy.arange(-1, 1, 0.02)
X, Y = numpy.meshgrid(X, Y)
Z = X**2 + 10*Y**2
plt.figure()
CS = plt.contour(X, Y, Z, levels=[0.5, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0])
plt.xlabel("$w_1$", fontsize=16)
plt.ylabel("$w_2$", fontsize=16)
plt.show()

234
fig/mnist.py Normal file
View file

@ -0,0 +1,234 @@
"""
mnist
~~~~~
Draws images based on the MNIST data."""
#### Libraries
# Standard library
import cPickle
import sys
# My library
sys.path.append('../src/')
import mnist_loader
# Third-party libraries
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
def main():
training_set, validation_set, test_set = mnist_loader.load_data()
images = get_images(training_set)
plot_rotated_image(images[0])
#### Plotting
def plot_images_together(images):
""" Plot a single image containing all six MNIST images, one after
the other. Note that we crop the sides of the images so that they
appear reasonably close together."""
fig = plt.figure()
images = [image[:, 3:25] for image in images]
image = np.concatenate(images, axis=1)
ax = fig.add_subplot(1, 1, 1)
ax.matshow(image, cmap = matplotlib.cm.binary)
plt.xticks(np.array([]))
plt.yticks(np.array([]))
plt.show()
def plot_10_by_10_images(images):
""" Plot 100 MNIST images in a 10 by 10 table. Note that we crop
the images so that they appear reasonably close together. The
image is post-processed to give the appearance of being continued."""
fig = plt.figure()
images = [image[3:25, 3:25] for image in images]
#image = np.concatenate(images, axis=1)
for x in range(10):
for y in range(10):
ax = fig.add_subplot(10, 10, 10*y+x)
ax.matshow(images[10*y+x], cmap = matplotlib.cm.binary)
plt.xticks(np.array([]))
plt.yticks(np.array([]))
plt.show()
def plot_images_separately(images):
"Plot the six MNIST images separately."
fig = plt.figure()
for j in xrange(1, 7):
ax = fig.add_subplot(1, 6, j)
ax.matshow(images[j-1], cmap = matplotlib.cm.binary)
plt.xticks(np.array([]))
plt.yticks(np.array([]))
plt.show()
def plot_mnist_digit(image):
""" Plot a single MNIST image."""
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.matshow(image, cmap = matplotlib.cm.binary)
plt.xticks(np.array([]))
plt.yticks(np.array([]))
plt.show()
def plot_2_and_1(images):
"Plot a 2 and a 1 image from the MNIST set."
fig = plt.figure()
ax = fig.add_subplot(1, 2, 1)
ax.matshow(images[5], cmap = matplotlib.cm.binary)
plt.xticks(np.array([]))
plt.yticks(np.array([]))
ax = fig.add_subplot(1, 2, 2)
ax.matshow(images[3], cmap = matplotlib.cm.binary)
plt.xticks(np.array([]))
plt.yticks(np.array([]))
plt.show()
def plot_top_left(image):
"Plot the top left of ``image``."
image[14:,:] = np.zeros((14,28))
image[:,14:] = np.zeros((28,14))
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.matshow(image, cmap = matplotlib.cm.binary)
plt.xticks(np.array([]))
plt.yticks(np.array([]))
plt.show()
def plot_bad_images(images):
"""This takes a list of images misclassified by a pretty good
neural network --- one achieving over 93 percent accuracy --- and
turns them into a figure."""
bad_image_indices = [8, 18, 33, 92, 119, 124, 149, 151, 193, 233, 241, 247, 259, 300, 313, 321, 324, 341, 349, 352, 359, 362, 381, 412, 435, 445, 449, 478, 479, 495, 502, 511, 528, 531, 547, 571, 578, 582, 597, 610, 619, 628, 629, 659, 667, 691, 707, 717, 726, 740, 791, 810, 844, 846, 898, 938, 939, 947, 956, 959, 965, 982, 1014, 1033, 1039, 1044, 1050, 1055, 1107, 1112, 1124, 1147, 1181, 1191, 1192, 1198, 1202, 1204, 1206, 1224, 1226, 1232, 1242, 1243, 1247, 1256, 1260, 1263, 1283, 1289, 1299, 1310, 1319, 1326, 1328, 1357, 1378, 1393, 1413, 1422, 1435, 1467, 1469, 1494, 1500, 1522, 1523, 1525, 1527, 1530, 1549, 1553, 1609, 1611, 1634, 1641, 1676, 1678, 1681, 1709, 1717, 1722, 1730, 1732, 1737, 1741, 1754, 1759, 1772, 1773, 1790, 1808, 1813, 1823, 1843, 1850, 1857, 1868, 1878, 1880, 1883, 1901, 1913, 1930, 1938, 1940, 1952, 1969, 1970, 1984, 2001, 2009, 2016, 2018, 2035, 2040, 2043, 2044, 2053, 2063, 2098, 2105, 2109, 2118, 2129, 2130, 2135, 2148, 2161, 2168, 2174, 2182, 2185, 2186, 2189, 2224, 2229, 2237, 2266, 2272, 2293, 2299, 2319, 2325, 2326, 2334, 2369, 2371, 2380, 2381, 2387, 2393, 2395, 2406, 2408, 2414, 2422, 2433, 2450, 2488, 2514, 2526, 2548, 2574, 2589, 2598, 2607, 2610, 2631, 2648, 2654, 2695, 2713, 2720, 2721, 2730, 2770, 2771, 2780, 2863, 2866, 2896, 2907, 2925, 2927, 2939, 2995, 3005, 3023, 3030, 3060, 3073, 3102, 3108, 3110, 3114, 3115, 3117, 3130, 3132, 3157, 3160, 3167, 3183, 3189, 3206, 3240, 3254, 3260, 3280, 3329, 3330, 3333, 3383, 3384, 3475, 3490, 3503, 3520, 3525, 3559, 3567, 3573, 3597, 3598, 3604, 3629, 3664, 3702, 3716, 3718, 3725, 3726, 3727, 3751, 3752, 3757, 3763, 3766, 3767, 3769, 3776, 3780, 3798, 3806, 3808, 3811, 3817, 3821, 3838, 3848, 3853, 3855, 3869, 3876, 3902, 3906, 3926, 3941, 3943, 3951, 3954, 3962, 3976, 3985, 3995, 4000, 4002, 4007, 4017, 4018, 4065, 4075, 4078, 4093, 4102, 4139, 4140, 4152, 4154, 4163, 4165, 4176, 4199, 4201, 4205, 4207, 4212, 4224, 4238, 4248, 4256, 4284, 4289, 4297, 4300, 4306, 4344, 4355, 4356, 4359, 4360, 4369, 4405, 4425, 4433, 4435, 4449, 4487, 4497, 4498, 4500, 4521, 4536, 4548, 4563, 4571, 4575, 4601, 4615, 4620, 4633, 4639, 4662, 4690, 4722, 4731, 4735, 4737, 4739, 4740, 4761, 4798, 4807, 4814, 4823, 4833, 4837, 4874, 4876, 4879, 4880, 4886, 4890, 4910, 4950, 4951, 4952, 4956, 4963, 4966, 4968, 4978, 4990, 5001, 5020, 5054, 5067, 5068, 5078, 5135, 5140, 5143, 5176, 5183, 5201, 5210, 5331, 5409, 5457, 5495, 5600, 5601, 5617, 5623, 5634, 5642, 5677, 5678, 5718, 5734, 5735, 5749, 5752, 5771, 5787, 5835, 5842, 5845, 5858, 5887, 5888, 5891, 5906, 5913, 5936, 5937, 5945, 5955, 5957, 5972, 5973, 5985, 5987, 5997, 6035, 6042, 6043, 6045, 6053, 6059, 6065, 6071, 6081, 6091, 6112, 6124, 6157, 6166, 6168, 6172, 6173, 6347, 6370, 6386, 6390, 6391, 6392, 6421, 6426, 6428, 6505, 6542, 6555, 6556, 6560, 6564, 6568, 6571, 6572, 6597, 6598, 6603, 6608, 6625, 6651, 6694, 6706, 6721, 6725, 6740, 6746, 6768, 6783, 6785, 6796, 6817, 6827, 6847, 6870, 6872, 6926, 6945, 7002, 7035, 7043, 7089, 7121, 7130, 7198, 7216, 7233, 7248, 7265, 7426, 7432, 7434, 7494, 7498, 7691, 7777, 7779, 7797, 7800, 7809, 7812, 7821, 7849, 7876, 7886, 7897, 7902, 7905, 7917, 7921, 7945, 7999, 8020, 8059, 8081, 8094, 8095, 8115, 8246, 8256, 8262, 8272, 8273, 8278, 8279, 8293, 8322, 8339, 8353, 8408, 8453, 8456, 8502, 8520, 8522, 8607, 9009, 9010, 9013, 9015, 9019, 9022, 9024, 9026, 9036, 9045, 9046, 9128, 9214, 9280, 9316, 9342, 9382, 9433, 9446, 9506, 9540, 9544, 9587, 9614, 9634, 9642, 9645, 9700, 9716, 9719, 9729, 9732, 9738, 9740, 9741, 9742, 9744, 9745, 9749, 9752, 9768, 9770, 9777, 9779, 9792, 9808, 9831, 9839, 9856, 9858, 9867, 9879, 9883, 9888, 9890, 9893, 9905, 9944, 9970, 9982]
n = len(bad_image_indices)
bad_images = [images[j] for j in bad_image_indices]
fig = plt.figure(figsize=(10, 15))
for j in xrange(1, n+1):
ax = fig.add_subplot(25, 125, j)
ax.matshow(bad_images[j-1], cmap = matplotlib.cm.binary)
ax.set_title(str(bad_image_indices[j-1]))
plt.xticks(np.array([]))
plt.yticks(np.array([]))
plt.subplots_adjust(hspace = 1.2)
plt.show()
def plot_really_bad_images(images):
"""This takes a list of the worst images from plot_bad_images and
turns them into a figure."""
really_bad_image_indices = [
324, 582, 659, 726, 846, 956, 1124, 1393,
1773, 1868, 2018, 2109, 2654, 4199, 4201, 4620, 5457, 5642]
n = len(really_bad_image_indices)
really_bad_images = [images[j] for j in really_bad_image_indices]
fig = plt.figure(figsize=(10, 2))
for j in xrange(1, n+1):
ax = fig.add_subplot(2, 9, j)
ax.matshow(really_bad_images[j-1], cmap = matplotlib.cm.binary)
#ax.set_title(str(really_bad_image_indices[j-1]))
plt.xticks(np.array([]))
plt.yticks(np.array([]))
plt.show()
def plot_features(image):
"Plot the top right, bottom left, and bottom right of ``image``."
image_1, image_2, image_3 = np.copy(image), np.copy(image), np.copy(image)
image_1[:,:14] = np.zeros((28,14))
image_1[14:,:] = np.zeros((14,28))
image_2[:,14:] = np.zeros((28,14))
image_2[:14,:] = np.zeros((14,28))
image_3[:14,:] = np.zeros((14,28))
image_3[:,:14] = np.zeros((28,14))
fig = plt.figure()
ax = fig.add_subplot(1, 3, 1)
ax.matshow(image_1, cmap = matplotlib.cm.binary)
plt.xticks(np.array([]))
plt.yticks(np.array([]))
ax = fig.add_subplot(1, 3, 2)
ax.matshow(image_2, cmap = matplotlib.cm.binary)
plt.xticks(np.array([]))
plt.yticks(np.array([]))
ax = fig.add_subplot(1, 3, 3)
ax.matshow(image_3, cmap = matplotlib.cm.binary)
plt.xticks(np.array([]))
plt.yticks(np.array([]))
plt.show()
def plot_rotated_image(image):
""" Plot an MNIST digit and a version rotated by 10 degrees."""
# Do the initial plot
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.matshow(image, cmap = matplotlib.cm.binary)
plt.xticks(np.array([]))
plt.yticks(np.array([]))
plt.show()
# Set up the rotated image. There are fast matrix techniques
# for doing this, but we'll do a pedestrian approach
rot_image = np.zeros((28,28))
theta = 15*np.pi/180 # 15 degrees
def to_xy(j, k):
# Converts from matrix indices to x, y co-ords, using the
# 13, 14 matrix entry as the origin
return (k-13, -j+14) # x range: -13..14, y range: -13..14
def to_jk(x, y):
# Converts from x, y co-ords to matrix indices
return (-y+14, x+13)
def image_value(image, x, y):
# returns the value of the image at co-ordinate x, y
# (Note that this would be better done as a closure, if Pythong
# supported closures, so that image didn't need to be passed)
j, k = to_jk(x, y)
return image[j, k]
# Element by element, figure out what should be in the rotated
# image. We simply take each matrix entry, figure out the
# corresponding x, y co-ordinates, rotate backward, and then
# average the nearby matrix elements. It's not perfect, and it's
# not fast, but it works okay.
for j in range(28):
for k in range(28):
x, y = to_xy(j, k)
# rotate by -theta
x1 = np.cos(theta)*x + np.sin(theta)*y
y1 = -np.sin(theta)*x + np.cos(theta)*y
# Nearest integer x entries are x2 and x2+1. delta_x
# measures how to interpolate
x2 = np.floor(x1)
delta_x = x1-x2
# Similarly for y
y2 = np.floor(y1)
delta_y = y1-y2
# Check if we're out of bounds, and if so continue to next entry
# This will miss a boundary row and layer, but that's okay,
# MNIST digits usually don't go that near the boundary.
if x2 < -13 or x2 > 13 or y2 < -13 or y2 > 13: continue
# If we're in bounds, average the nearby entries.
value \
= (1-delta_x)*(1-delta_y)*image_value(image, x2, y2)+\
(1-delta_x)*delta_y*image_value(image, x2, y2+1)+\
delta_x*(1-delta_y)*image_value(image, x2+1, y2)+\
delta_x*delta_y*image_value(image, x2+1, y2+1)
# Rescale the value by a hand-set fudge factor. This
# seems to be necessary because the averaging doesn't
# quite work right. The fudge-factor should probably be
# theta-dependent, but I've set it by hand.
rot_image[j, k] = 1.3*value
plot_mnist_digit(rot_image)
#### Miscellanea
def load_data():
""" Return the MNIST data as a tuple containing the training data,
the validation data, and the test data."""
f = open('../data/mnist.pkl', 'rb')
training_set, validation_set, test_set = cPickle.load(f)
f.close()
return (training_set, validation_set, test_set)
def get_images(training_set):
""" Return a list containing the images from the MNIST data
set. Each image is represented as a 2-d numpy array."""
flattened_images = training_set[0]
return [np.reshape(f, (-1, 28)) for f in flattened_images]
#### Main
if __name__ == "__main__":
main()

BIN
fig/mnist_100_digits.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 57 KiB

BIN
fig/mnist_2_and_1.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.4 KiB

BIN
fig/mnist_complete_zero.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.8 KiB

BIN
fig/mnist_first_digit.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.8 KiB

1
fig/more_data.json Normal file
View file

@ -0,0 +1 @@
[69.09, 76.37, 85.29, 88.85, 91.27, 93.24, 94.89, 95.85, 95.97]

BIN
fig/more_data.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 32 KiB

122
fig/more_data.py Normal file
View file

@ -0,0 +1,122 @@
"""more_data
~~~~~~~~~~~~
Plot graphs to illustrate the performance of MNIST when different size
training sets are used.
"""
# Standard library
import json
import random
import sys
# My library
sys.path.append('../src/')
import mnist_loader
import network2
# Third-party libraries
import matplotlib.pyplot as plt
import numpy as np
from sklearn import svm
# The sizes to use for the different training sets
SIZES = [100, 200, 500, 1000, 2000, 5000, 10000, 20000, 50000]
def main():
run_networks()
run_svms()
make_plots()
def run_networks():
# Make results more easily reproducible
random.seed(12345678)
np.random.seed(12345678)
training_data, validation_data, test_data = mnist_loader.load_data_wrapper()
net = network2.Network([784, 30, 10], cost=network2.CrossEntropyCost())
accuracies = []
for size in SIZES:
print "\n\nTraining network with data set size %s" % size
net.large_weight_initializer()
num_epochs = 1500000 / size
net.SGD(training_data[:size], num_epochs, 10, 0.5, lmbda = size*0.0001)
accuracy = net.accuracy(validation_data) / 100.0
print "Accuracy was %s percent" % accuracy
accuracies.append(accuracy)
f = open("more_data.json", "w")
json.dump(accuracies, f)
f.close()
def run_svms():
svm_training_data, svm_validation_data, svm_test_data \
= mnist_loader.load_data()
accuracies = []
for size in SIZES:
print "\n\nTraining SVM with data set size %s" % size
clf = svm.SVC()
clf.fit(svm_training_data[0][:size], svm_training_data[1][:size])
predictions = [int(a) for a in clf.predict(svm_validation_data[0])]
accuracy = sum(int(a == y) for a, y in
zip(predictions, svm_validation_data[1])) / 100.0
print "Accuracy was %s percent" % accuracy
accuracies.append(accuracy)
f = open("more_data_svm.json", "w")
json.dump(accuracies, f)
f.close()
def make_plots():
f = open("more_data.json", "r")
accuracies = json.load(f)
f.close()
f = open("more_data_svm.json", "r")
svm_accuracies = json.load(f)
f.close()
make_linear_plot(accuracies)
make_log_plot(accuracies)
make_combined_plot(accuracies, svm_accuracies)
def make_linear_plot(accuracies):
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(SIZES, accuracies, color='#2A6EA6')
ax.plot(SIZES, accuracies, "o", color='#FFA933')
ax.set_xlim(0, 50000)
ax.set_ylim(60, 100)
ax.grid(True)
ax.set_xlabel('Training set size')
ax.set_title('Accuracy (%) on the validation data')
plt.show()
def make_log_plot(accuracies):
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(SIZES, accuracies, color='#2A6EA6')
ax.plot(SIZES, accuracies, "o", color='#FFA933')
ax.set_xlim(100, 50000)
ax.set_ylim(60, 100)
ax.set_xscale('log')
ax.grid(True)
ax.set_xlabel('Training set size')
ax.set_title('Accuracy (%) on the validation data')
plt.show()
def make_combined_plot(accuracies, svm_accuracies):
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(SIZES, accuracies, color='#2A6EA6')
ax.plot(SIZES, accuracies, "o", color='#2A6EA6',
label='Neural network accuracy (%)')
ax.plot(SIZES, svm_accuracies, color='#FFA933')
ax.plot(SIZES, svm_accuracies, "o", color='#FFA933',
label='SVM accuracy (%)')
ax.set_xlim(100, 50000)
ax.set_ylim(25, 100)
ax.set_xscale('log')
ax.grid(True)
ax.set_xlabel('Training set size')
plt.legend(loc="lower right")
plt.show()
if __name__ == "__main__":
main()

BIN
fig/more_data_5.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 43 KiB

BIN
fig/more_data_log.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 34 KiB

BIN
fig/more_data_rotated_5.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.2 KiB

1
fig/more_data_svm.json Normal file
View file

@ -0,0 +1 @@
[25.07, 48.93, 75.13, 83.87, 88.49, 91.46, 92.45, 93.47, 94.48]

1
fig/multiple_eta.json Normal file
View file

@ -0,0 +1 @@
[[[], [], [0.87809508908377998, 0.67406552530098141, 0.59798920430275404, 0.55533015743656189, 0.51751101003208144, 0.4942033354556824, 0.47255041042913526, 0.46069879353359433, 0.44304475294352064, 0.43099562372228112, 0.42310993427766375, 0.41408265298981006, 0.40573464183982105, 0.40110722961828227, 0.39162028064538967, 0.38705015774740958, 0.38116357043417587, 0.37603986695304614, 0.37297012040237154, 0.37057334627661631, 0.36551756338853658, 0.36335674264586654, 0.35745296185579917, 0.35535960956849127, 0.35365591135061097, 0.35011353300568238, 0.34946519495897871, 0.34604661988238178, 0.34386077098862522, 0.33919980880230349], []], [[], [], [0.49501954654296704, 0.4063145129425576, 0.40482383242804637, 0.37156577828840276, 0.37380111172151681, 0.37152751786000143, 0.35371985224004426, 0.3557161388797867, 0.34323780090168027, 0.3433514311156789, 0.3367645441708797, 0.34532085892085329, 0.33506383267050244, 0.34760988079085842, 0.34921493732996928, 0.33853424834583179, 0.32837282561262077, 0.33175599401109612, 0.33132920379429243, 0.33024353325326034, 0.32736756892399654, 0.3259638557593546, 0.32004264784244907, 0.33424319076405928, 0.33878125802305081, 0.32521839878261177, 0.32679267619514646, 0.32488571435373748, 0.33056367198473002, 0.33879633130932685], []], [[], [], [0.92489293305102116, 0.83919130289246469, 0.88748421594232696, 0.79625231780396133, 0.78117959228699174, 1.1365919079387048, 0.78787239608336346, 0.76778614131217449, 0.73689525303227721, 0.80127437393519696, 0.74433665287336681, 0.73725544607013882, 0.80249602203179993, 0.85190338199210014, 0.79872168623645712, 0.80243104440756152, 0.80649160680410659, 0.81467254023600921, 0.82526467696100858, 0.75042379852601759, 0.93658673378777402, 0.88236662906752283, 0.86121396033520892, 0.72492681699401829, 0.80405009868466648, 0.83959963179208197, 0.83387510808276821, 0.88282498566307899, 0.88583473645177979, 0.86068501713490919], []]]

BIN
fig/multiple_eta.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 45 KiB

73
fig/multiple_eta.py Normal file
View file

@ -0,0 +1,73 @@
"""multiple_eta
~~~~~~~~~~~~~~~
This program shows how different values for the learning rate affect
training. In particular, we'll plot out how the cost changes using
three different values for eta.
"""
# Standard library
import json
import random
import sys
# My library
sys.path.append('../src/')
import mnist_loader
import network2
# Third-party libraries
import matplotlib.pyplot as plt
import numpy as np
# Constants
LEARNING_RATES = [0.025, 0.25, 2.5]
COLORS = ['#2A6EA6', '#FFCD33', '#FF7033']
NUM_EPOCHS = 30
def main():
run_networks()
make_plot()
def run_networks():
"""Train networks using three different values for the learning rate,
and store the cost curves in the file ``multiple_eta.json``, where
they can later be used by ``make_plot``.
"""
# Make results more easily reproducible
random.seed(12345678)
np.random.seed(12345678)
training_data, validation_data, test_data = mnist_loader.load_data_wrapper()
results = []
for eta in LEARNING_RATES:
print "\nTrain a network using eta = "+str(eta)
net = network2.Network([784, 30, 10])
results.append(
net.SGD(training_data, NUM_EPOCHS, 10, eta, lmbda=5.0,
evaluation_data=validation_data,
monitor_training_cost=True))
f = open("multiple_eta.json", "w")
json.dump(results, f)
f.close()
def make_plot():
f = open("multiple_eta.json", "r")
results = json.load(f)
f.close()
fig = plt.figure()
ax = fig.add_subplot(111)
for eta, result, color in zip(LEARNING_RATES, results, COLORS):
_, _, training_cost, _ = result
ax.plot(np.arange(NUM_EPOCHS), training_cost, "o-",
label="$\eta$ = "+str(eta),
color=color)
ax.set_xlim([0, NUM_EPOCHS])
ax.set_xlabel('Epoch')
ax.set_ylabel('Cost')
plt.legend(loc='upper right')
plt.show()
if __name__ == "__main__":
main()

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

1
fig/overfitting.json Normal file

File diff suppressed because one or more lines are too long

179
fig/overfitting.py Normal file
View file

@ -0,0 +1,179 @@
"""
overfitting
~~~~~~~~~~~
Plot graphs to illustrate the problem of overfitting.
"""
# Standard library
import json
import random
import sys
# My library
sys.path.append('../src/')
import mnist_loader
import network2
# Third-party libraries
import matplotlib.pyplot as plt
import numpy as np
def main(filename, num_epochs,
training_cost_xmin=200,
test_accuracy_xmin=200,
test_cost_xmin=0,
training_accuracy_xmin=0,
training_set_size=1000,
lmbda=0.0):
"""``filename`` is the name of the file where the results will be
stored. ``num_epochs`` is the number of epochs to train for.
``training_set_size`` is the number of images to train on.
``lmbda`` is the regularization parameter. The other parameters
set the epochs at which to start plotting on the x axis.
"""
run_network(filename, num_epochs, training_set_size, lmbda)
make_plots(filename, num_epochs,
training_cost_xmin,
test_accuracy_xmin,
test_cost_xmin,
training_accuracy_xmin,
training_set_size)
def run_network(filename, num_epochs, training_set_size=1000, lmbda=0.0):
"""Train the network for ``num_epochs`` on ``training_set_size``
images, and store the results in ``filename``. Those results can
later be used by ``make_plots``. Note that the results are stored
to disk in large part because it's convenient not to have to
``run_network`` each time we want to make a plot (it's slow).
"""
# Make results more easily reproducible
random.seed(12345678)
np.random.seed(12345678)
training_data, validation_data, test_data = mnist_loader.load_data_wrapper()
net = network2.Network([784, 30, 10], cost=network2.CrossEntropyCost())
net.large_weight_initializer()
test_cost, test_accuracy, training_cost, training_accuracy \
= net.SGD(training_data[:training_set_size], num_epochs, 10, 0.5,
evaluation_data=test_data, lmbda = lmbda,
monitor_evaluation_cost=True,
monitor_evaluation_accuracy=True,
monitor_training_cost=True,
monitor_training_accuracy=True)
f = open(filename, "w")
json.dump([test_cost, test_accuracy, training_cost, training_accuracy], f)
f.close()
def make_plots(filename, num_epochs,
training_cost_xmin=200,
test_accuracy_xmin=200,
test_cost_xmin=0,
training_accuracy_xmin=0,
training_set_size=1000):
"""Load the results from ``filename``, and generate the corresponding
plots. """
f = open(filename, "r")
test_cost, test_accuracy, training_cost, training_accuracy \
= json.load(f)
f.close()
plot_training_cost(training_cost, num_epochs, training_cost_xmin)
plot_test_accuracy(test_accuracy, num_epochs, test_accuracy_xmin)
plot_test_cost(test_cost, num_epochs, test_cost_xmin)
plot_training_accuracy(training_accuracy, num_epochs,
training_accuracy_xmin, training_set_size)
plot_overlay(test_accuracy, training_accuracy, num_epochs,
min(test_accuracy_xmin, training_accuracy_xmin),
training_set_size)
def plot_training_cost(training_cost, num_epochs, training_cost_xmin):
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(np.arange(training_cost_xmin, num_epochs),
training_cost[training_cost_xmin:num_epochs],
color='#2A6EA6')
ax.set_xlim([training_cost_xmin, num_epochs])
ax.grid(True)
ax.set_xlabel('Epoch')
ax.set_title('Cost on the training data')
plt.show()
def plot_test_accuracy(test_accuracy, num_epochs, test_accuracy_xmin):
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(np.arange(test_accuracy_xmin, num_epochs),
[accuracy/100.0
for accuracy in test_accuracy[test_accuracy_xmin:num_epochs]],
color='#2A6EA6')
ax.set_xlim([test_accuracy_xmin, num_epochs])
ax.grid(True)
ax.set_xlabel('Epoch')
ax.set_title('Accuracy (%) on the test data')
plt.show()
def plot_test_cost(test_cost, num_epochs, test_cost_xmin):
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(np.arange(test_cost_xmin, num_epochs),
test_cost[test_cost_xmin:num_epochs],
color='#2A6EA6')
ax.set_xlim([test_cost_xmin, num_epochs])
ax.grid(True)
ax.set_xlabel('Epoch')
ax.set_title('Cost on the test data')
plt.show()
def plot_training_accuracy(training_accuracy, num_epochs,
training_accuracy_xmin, training_set_size):
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(np.arange(training_accuracy_xmin, num_epochs),
[accuracy*100.0/training_set_size
for accuracy in training_accuracy[training_accuracy_xmin:num_epochs]],
color='#2A6EA6')
ax.set_xlim([training_accuracy_xmin, num_epochs])
ax.grid(True)
ax.set_xlabel('Epoch')
ax.set_title('Accuracy (%) on the training data')
plt.show()
def plot_overlay(test_accuracy, training_accuracy, num_epochs, xmin,
training_set_size):
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(np.arange(xmin, num_epochs),
[accuracy/100.0 for accuracy in test_accuracy],
color='#2A6EA6',
label="Accuracy on the test data")
ax.plot(np.arange(xmin, num_epochs),
[accuracy*100.0/training_set_size
for accuracy in training_accuracy],
color='#FFA933',
label="Accuracy on the training data")
ax.grid(True)
ax.set_xlim([xmin, num_epochs])
ax.set_xlabel('Epoch')
ax.set_ylim([90, 100])
plt.legend(loc="lower right")
plt.show()
if __name__ == "__main__":
filename = raw_input("Enter a file name: ")
num_epochs = int(raw_input(
"Enter the number of epochs to run for: "))
training_cost_xmin = int(raw_input(
"training_cost_xmin (suggest 200): "))
test_accuracy_xmin = int(raw_input(
"test_accuracy_xmin (suggest 200): "))
test_cost_xmin = int(raw_input(
"test_cost_xmin (suggest 0): "))
training_accuracy_xmin = int(raw_input(
"training_accuracy_xmin (suggest 0): "))
training_set_size = int(raw_input(
"Training set size (suggest 1000): "))
lmbda = float(raw_input(
"Enter the regularization parameter, lambda (suggest: 5.0): "))
main(filename, num_epochs, training_cost_xmin,
test_accuracy_xmin, test_cost_xmin, training_accuracy_xmin,
training_set_size, lmbda)

BIN
fig/overfitting1.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 36 KiB

BIN
fig/overfitting2.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 62 KiB

BIN
fig/overfitting3.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 38 KiB

BIN
fig/overfitting4.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 30 KiB

View file

@ -0,0 +1 @@
[[0.56135590058630858, 0.47806921271034553, 0.457510836259925, 0.42504920544144992, 0.39449553344420019, 0.39810448800345, 0.37017079712250733, 0.37403997639944547, 0.36290253019659285, 0.4006868170859208, 0.36817548958488616, 0.37299310675826219, 0.36871967242261605, 0.37146610246666006, 0.35704621996697938, 0.35821464151288968, 0.38622103466509744, 0.37010939716781127, 0.36539832104327125, 0.35511546847032671, 0.3828088676932585, 0.36160025922354638, 0.37028708356461698, 0.37605182846277163, 0.36634313696187393, 0.36129044456360238, 0.37531885586439506, 0.36415225595876555, 0.35707895858237054, 0.36631987373588193], [9136, 9275, 9307, 9377, 9450, 9429, 9468, 9488, 9494, 9424, 9483, 9483, 9505, 9499, 9508, 9508, 9445, 9524, 9524, 9524, 9494, 9527, 9518, 9505, 9533, 9529, 9512, 9530, 9532, 9531], [0.55994588582554705, 0.44664870303435988, 0.42455329174078477, 0.38578320429266705, 0.33992291017592285, 0.33162477096795895, 0.3137480626518645, 0.30028971890544093, 0.27353890048167528, 0.30236927117202678, 0.26487026303889277, 0.2661714884193439, 0.24734280015146709, 0.26355551438395558, 0.23088530423416964, 0.22618350577327287, 0.25137541006767478, 0.23085585354651994, 0.21417931191800957, 0.20049587923059808, 0.23713128948069295, 0.20327728799861464, 0.21953883029836488, 0.20264436321820509, 0.19643949703516961, 0.18467980669870671, 0.18788606162530633, 0.18535916502880764, 0.18466759834259142, 0.17218286758911475], [45708, 46605, 46797, 47190, 47543, 47570, 47638, 47838, 48061, 47825, 48160, 48195, 48265, 48156, 48439, 48449, 48267, 48433, 48598, 48697, 48380, 48648, 48500, 48669, 48734, 48796, 48802, 48837, 48810, 48932]]

BIN
fig/overfitting_full.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 44 KiB

BIN
fig/pca_hard_data.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 52 KiB

BIN
fig/pca_hard_data_fit.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 80 KiB

32
fig/pca_limitations.py Normal file
View file

@ -0,0 +1,32 @@
"""
pca_limitations
~~~~~~~~~~~~~~~
Plot graphs to illustrate the limitations of PCA.
"""
# Third-party libraries
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import numpy as np
# Plot just the data
fig = plt.figure()
ax = fig.gca(projection='3d')
z = np.linspace(-2, 2, 20)
theta = np.linspace(-4 * np.pi, 4 * np.pi, 20)
x = np.sin(theta)+0.03*np.random.randn(20)
y = np.cos(theta)+0.03*np.random.randn(20)
ax.plot(x, y, z, 'ro')
plt.show()
# Plot the data and the helix together
fig = plt.figure()
ax = fig.gca(projection='3d')
z_helix = np.linspace(-2, 2, 100)
theta_helix = np.linspace(-4 * np.pi, 4 * np.pi, 100)
x_helix = np.sin(theta_helix)
y_helix = np.cos(theta_helix)
ax.plot(x, y, z, 'ro')
ax.plot(x_helix, y_helix, z_helix, '')
plt.show()

1
fig/regularized.json Normal file

File diff suppressed because one or more lines are too long

BIN
fig/regularized1.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 36 KiB

BIN
fig/regularized2.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 67 KiB

View file

@ -0,0 +1 @@
[[4.3072791918656037, 2.9331304641086344, 2.1348073553576041, 1.6588303607817259, 1.330889938797851, 1.1963223601928472, 1.1170765304219505, 1.0170754480838433, 0.99110935015398149, 1.0071179800661803, 0.96280080386971378, 0.99226609521675169, 0.96023984363523895, 0.97253784945751276, 0.93966545596520334, 0.95330563342376551, 0.96378529404233837, 0.97367336858037301, 0.94435985290781166, 0.94622931411839994, 0.98392022263201184, 0.94091005661041272, 0.9496551347987412, 0.94714964684453073, 0.95026655456196552, 0.92915894672179755, 0.95831053042987979, 1.0153994919718721, 0.92940339906358749, 0.97682851862658082], [9212, 9341, 9375, 9424, 9532, 9537, 9504, 9541, 9578, 9538, 9579, 9530, 9590, 9543, 9607, 9597, 9576, 9546, 9600, 9634, 9544, 9606, 9614, 9607, 9621, 9637, 9620, 9511, 9649, 9561], [1.2925405259017666, 0.92479539229795305, 0.72611252037165497, 0.61618944188425839, 0.49142410439713557, 0.46552608507795468, 0.46074829841290343, 0.40775149802551902, 0.39671750686791218, 0.42031570708192345, 0.38057096091326847, 0.40768033915334978, 0.3895210257834103, 0.40585871820346864, 0.36003072887701948, 0.37700037701783806, 0.39300003862768451, 0.40774598935627593, 0.37194215157507704, 0.3662415845761452, 0.40722309031673021, 0.36476961463606117, 0.36988528906574514, 0.36112644707329011, 0.380710641602238, 0.35700998663848571, 0.37724740623797381, 0.44991741876110503, 0.35820321110078079, 0.39226034353556583], [45919, 46835, 47204, 47434, 47989, 47930, 47839, 48157, 48218, 48105, 48313, 48089, 48282, 48111, 48463, 48362, 48243, 48123, 48416, 48533, 48123, 48483, 48435, 48548, 48434, 48524, 48417, 47797, 48561, 48235]]

BIN
fig/regularized_full.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 49 KiB

View file

@ -0,0 +1,6 @@
# Replaced by d3 directory
This directory contains python code which generated png figures which
were later replaced by d3 in the live version of the site. They've
been preserved here on the off chance that they may be of use at some
point in the future.

BIN
fig/replaced_by_d3/relu.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 25 KiB

View file

@ -0,0 +1,24 @@
"""
relu
~~~~
Plots a graph of the squashing function used by a rectified linear
unit."""
import numpy as np
import matplotlib.pyplot as plt
z = np.arange(-2, 2, .1)
zero = np.zeros(len(z))
y = np.max([zero, z], axis=0)
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(z, y)
ax.set_ylim([-2.0, 2.0])
ax.set_xlim([-2.0, 2.0])
ax.grid(True)
ax.set_xlabel('z')
ax.set_title('Rectified linear unit')
plt.show()

Binary file not shown.

After

Width:  |  Height:  |  Size: 23 KiB

View file

@ -0,0 +1,23 @@
"""
sigmoid
~~~~~~~
Plots a graph of the sigmoid function."""
import numpy
import matplotlib.pyplot as plt
z = numpy.arange(-5, 5, .1)
sigma_fn = numpy.vectorize(lambda z: 1/(1+numpy.exp(-z)))
sigma = sigma_fn(z)
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(z, sigma)
ax.set_ylim([-0.5, 1.5])
ax.set_xlim([-5,5])
ax.grid(True)
ax.set_xlabel('z')
ax.set_title('sigmoid function')
plt.show()

BIN
fig/replaced_by_d3/step.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

View file

@ -0,0 +1,23 @@
"""
step
~~~~~~~
Plots a graph of a step function."""
import numpy
import matplotlib.pyplot as plt
z = numpy.arange(-5, 5, .02)
step_fn = numpy.vectorize(lambda z: 1.0 if z >= 0.0 else 0.0)
step = step_fn(z)
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(z, step)
ax.set_ylim([-0.5, 1.5])
ax.set_xlim([-5,5])
ax.grid(True)
ax.set_xlabel('z')
ax.set_title('step function')
plt.show()

BIN
fig/replaced_by_d3/tanh.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 23 KiB

View file

@ -0,0 +1,22 @@
"""
tanh
~~~~
Plots a graph of the tanh function."""
import numpy as np
import matplotlib.pyplot as plt
z = np.arange(-5, 5, .1)
t = np.tanh(z)
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(z, t)
ax.set_ylim([-1.0, 1.0])
ax.set_xlim([-5,5])
ax.grid(True)
ax.set_xlabel('z')
ax.set_title('tanh function')
plt.show()

View file

@ -0,0 +1,46 @@
"""
serialize_images_to_json
~~~~~~~~~~~~~~~~~~~~~~~~
Utility to serialize parts of the training and validation data to JSON,
for use with Javascript. """
#### Libraries
# Standard library
import json
import sys
# My library
sys.path.append('../src/')
import mnist_loader
# Third-party libraries
import numpy as np
# Number of training and validation data images to serialize
NTD = 1000
NVD = 100
training_data, validation_data, test_data = mnist_loader.load_data_wrapper()
def make_data_integer(td):
# This will be slow, due to the loop. It'd be better if numpy did
# this directly. But numpy.rint followed by tolist() doesn't
# convert to a standard Python int.
return [int(x) for x in (td*256).reshape(784).tolist()]
data = {"training": [
{"x": [x[0] for x in training_data[j][0].tolist()],
"y": [y[0] for y in training_data[j][1].tolist()]}
for j in xrange(NTD)],
"validation": [
{"x": [x[0] for x in validation_data[j][0].tolist()],
"y": validation_data[j][1]}
for j in xrange(NVD)]}
f = open("data_1000.json", "w")
json.dump(data, f)
f.close()

BIN
fig/test.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 42 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 43 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

BIN
fig/valley.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 90 KiB

43
fig/valley.py Normal file
View file

@ -0,0 +1,43 @@
"""
valley
~~~~~~
Plots a function of two variables to minimize. The function is a
fairly generic valley function."""
#### Libraries
# Third party libraries
from matplotlib.ticker import LinearLocator
# Note that axes3d is not explicitly used in the code, but is needed
# to register the 3d plot type correctly
from mpl_toolkits.mplot3d import axes3d
import matplotlib.pyplot as plt
import numpy
fig = plt.figure()
ax = fig.gca(projection='3d')
X = numpy.arange(-1, 1, 0.1)
Y = numpy.arange(-1, 1, 0.1)
X, Y = numpy.meshgrid(X, Y)
Z = X**2 + Y**2
colortuple = ('w', 'b')
colors = numpy.empty(X.shape, dtype=str)
for x in xrange(len(X)):
for y in xrange(len(Y)):
colors[x, y] = colortuple[(x + y) % 2]
surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, facecolors=colors,
linewidth=0)
ax.set_xlim3d(-1, 1)
ax.set_ylim3d(-1, 1)
ax.set_zlim3d(0, 2)
ax.w_xaxis.set_major_locator(LinearLocator(3))
ax.w_yaxis.set_major_locator(LinearLocator(3))
ax.w_zaxis.set_major_locator(LinearLocator(3))
ax.text(1.79, 0, 1.62, "$C$", fontsize=20)
ax.text(0.05, -1.8, 0, "$v_1$", fontsize=20)
ax.text(1.5, -0.25, 0, "$v_2$", fontsize=20)
plt.show()

BIN
fig/valley2.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 83 KiB

48
fig/valley2.py Normal file
View file

@ -0,0 +1,48 @@
"""valley2.py
~~~~~~~~~~~~~
Plots a function of two variables to minimize. The function is a
fairly generic valley function.
Note that this is a duplicate of valley.py, but omits labels on the
axis. It's bad practice to duplicate in this way, but I had
considerable trouble getting matplotlib to update a graph in the way I
needed (adding or removing labels), so finally fell back on this as a
kludge solution.
"""
#### Libraries
# Third party libraries
from matplotlib.ticker import LinearLocator
# Note that axes3d is not explicitly used in the code, but is needed
# to register the 3d plot type correctly
from mpl_toolkits.mplot3d import axes3d
import matplotlib.pyplot as plt
import numpy
fig = plt.figure()
ax = fig.gca(projection='3d')
X = numpy.arange(-1, 1, 0.1)
Y = numpy.arange(-1, 1, 0.1)
X, Y = numpy.meshgrid(X, Y)
Z = X**2 + Y**2
colortuple = ('w', 'b')
colors = numpy.empty(X.shape, dtype=str)
for x in xrange(len(X)):
for y in xrange(len(Y)):
colors[x, y] = colortuple[(x + y) % 2]
surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, facecolors=colors,
linewidth=0)
ax.set_xlim3d(-1, 1)
ax.set_ylim3d(-1, 1)
ax.set_zlim3d(0, 2)
ax.w_xaxis.set_major_locator(LinearLocator(3))
ax.w_yaxis.set_major_locator(LinearLocator(3))
ax.w_zaxis.set_major_locator(LinearLocator(3))
ax.text(1.79, 0, 1.62, "$C$", fontsize=20)
plt.show()

View file

@ -0,0 +1,89 @@
"""weight_initialization
~~~~~~~~~~~~~~~~~~~~~~~~
This program shows how weight initialization affects training. In
particular, we'll plot out how the classification accuracies improve
using either large starting weights, whose standard deviation is 1, or
the default starting weights, whose standard deviation is 1 over the
square root of the number of input neurons.
"""
# Standard library
import json
import random
import sys
# My library
sys.path.append('../src/')
import mnist_loader
import network2
# Third-party libraries
import matplotlib.pyplot as plt
import numpy as np
def main(filename, n, eta):
run_network(filename, n, eta)
make_plot(filename)
def run_network(filename, n, eta):
"""Train the network using both the default and the large starting
weights. Store the results in the file with name ``filename``,
where they can later be used by ``make_plots``.
"""
# Make results more easily reproducible
random.seed(12345678)
np.random.seed(12345678)
training_data, validation_data, test_data = mnist_loader.load_data_wrapper()
net = network2.Network([784, n, 10], cost=network2.CrossEntropyCost)
print "Train the network using the default starting weights."
default_vc, default_va, default_tc, default_ta \
= net.SGD(training_data, 30, 10, eta, lmbda=5.0,
evaluation_data=validation_data,
monitor_evaluation_accuracy=True)
print "Train the network using the large starting weights."
net.large_weight_initializer()
large_vc, large_va, large_tc, large_ta \
= net.SGD(training_data, 30, 10, eta, lmbda=5.0,
evaluation_data=validation_data,
monitor_evaluation_accuracy=True)
f = open(filename, "w")
json.dump({"default_weight_initialization":
[default_vc, default_va, default_tc, default_ta],
"large_weight_initialization":
[large_vc, large_va, large_tc, large_ta]},
f)
f.close()
def make_plot(filename):
"""Load the results from the file ``filename``, and generate the
corresponding plot.
"""
f = open(filename, "r")
results = json.load(f)
f.close()
default_vc, default_va, default_tc, default_ta = results[
"default_weight_initialization"]
large_vc, large_va, large_tc, large_ta = results[
"large_weight_initialization"]
# Convert raw classification numbers to percentages, for plotting
default_va = [x/100.0 for x in default_va]
large_va = [x/100.0 for x in large_va]
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(np.arange(0, 30, 1), large_va, color='#2A6EA6',
label="Old approach to weight initialization")
ax.plot(np.arange(0, 30, 1), default_va, color='#FFA933',
label="New approach to weight initialization")
ax.set_xlim([0, 30])
ax.set_xlabel('Epoch')
ax.set_ylim([85, 100])
ax.set_title('Classification accuracy')
plt.legend(loc="lower right")
plt.show()
if __name__ == "__main__":
main()

View file

@ -0,0 +1 @@
{"default_weight_initialization": [[], [9295, 9481, 9547, 9592, 9664, 9673, 9702, 9719, 9726, 9726, 9732, 9732, 9730, 9734, 9745, 9751, 9757, 9761, 9764, 9766, 9758, 9767, 9756, 9752, 9777, 9775, 9770, 9770, 9771, 9781], [], []], "large_weight_initialization": [[], [8994, 9181, 9260, 9364, 9427, 9449, 9497, 9512, 9560, 9578, 9603, 9616, 9626, 9629, 9644, 9671, 9674, 9679, 9700, 9708, 9707, 9717, 9729, 9720, 9719, 9745, 9751, 9754, 9755, 9742], [], []]}

Binary file not shown.

After

Width:  |  Height:  |  Size: 40 KiB

View file

@ -0,0 +1 @@
{"default_weight_initialization": [[], [9270, 9414, 9470, 9504, 9537, 9550, 9587, 9594, 9596, 9594, 9616, 9595, 9622, 9630, 9636, 9641, 9625, 9652, 9637, 9634, 9642, 9639, 9649, 9646, 9646, 9653, 9646, 9653, 9640, 9650], [], []], "large_weight_initialization": [[], [8643, 9044, 9141, 9231, 9299, 9327, 9385, 9416, 9433, 9449, 9476, 9489, 9500, 9535, 9521, 9548, 9564, 9573, 9585, 9592, 9596, 9615, 9607, 9605, 9606, 9622, 9637, 9648, 9635, 9637], [], []]}

Binary file not shown.

After

Width:  |  Height:  |  Size: 41 KiB