In [1]:
%matplotlib inline


Warm-up: numpy
--------------

A fully-connected ReLU network with one hidden layer and no biases, trained to
predict y from x using Euclidean error.

This implementation uses numpy to manually compute the forward pass, loss, and
backward pass.

A numpy array is a generic n-dimensional array; it does not know anything about
deep learning or gradients or computational graphs, and is just a way to perform
generic numeric computations.



In [2]:
import numpy as np

# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random input and output data
x = np.random.randn(N, D_in)
y = np.random.randn(N, D_out)

# Randomly initialize weights
w1 = np.random.randn(D_in, H)
w2 = np.random.randn(H, D_out)

learning_rate = 1e-6
for t in range(500):
    # Forward pass: compute predicted y
    h = x.dot(w1)
    h_relu = np.maximum(h, 0)
    y_pred = h_relu.dot(w2)

    # Compute and print loss
    loss = np.square(y_pred - y).sum()
    print(t, loss)

    # Backprop to compute gradients of w1 and w2 with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.T.dot(grad_y_pred)
    grad_h_relu = grad_y_pred.dot(w2.T)
    grad_h = grad_h_relu.copy()
    grad_h[h < 0] = 0
    grad_w1 = x.T.dot(grad_h)

    # Update weights
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2

0 36666162.85629468
1 37113592.5189867
2 39960919.072597094
3 37121942.985818654
4 26963712.380085967
5 14825465.079364339
6 7019011.743927153
7 3391853.7050399287
8 1933838.726037356
9 1310747.919959318
10 997382.9508611689
11 805363.9082704766
12 669661.5021993882
13 565600.7181235153
14 482670.99643929175
15 414921.11006781575
16 358831.55648551346
17 312008.7980410571
18 272590.1584278387
19 239186.8987239108
20 210730.08329549475
21 186338.08740552425
22 165336.68865102952
23 147122.1899842762
24 131306.0333945781
25 117546.06120917585
26 105499.9333408148
27 94923.15177738405
28 85606.46016699242
29 77390.17424723721
30 70109.72462616886
31 63640.675016018096
32 57879.94505405976
33 52732.809455186536
34 48128.18808912925
35 43999.96754925663
36 40287.795238309605
37 36939.32368343921
38 33916.27129721288
39 31181.23885434072
40 28702.059344590638
41 26452.56676354073
42 24407.250474534427
43 22543.595089868468
44 20842.907992785746
45 19289.44658673401
46 17868.12732629897
47 16