In [1]:
%matplotlib inline


Warm-up: numpy
--------------

A fully-connected ReLU network with one hidden layer and no biases, trained to
predict y from x using Euclidean error.

This implementation uses numpy to manually compute the forward pass, loss, and
backward pass.

A numpy array is a generic n-dimensional array; it does not know anything about
deep learning or gradients or computational graphs, and is just a way to perform
generic numeric computations.



In [2]:
import numpy as np

# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random input and output data
x = np.random.randn(N, D_in)
y = np.random.randn(N, D_out)

# Randomly initialize weights
w1 = np.random.randn(D_in, H)
w2 = np.random.randn(H, D_out)

learning_rate = 1e-6

for t in range(500):
    # Forward pass: compute predicted y
    h = x.dot(w1)
    h_relu = np.maximum(h, 0)
    y_pred = h_relu.dot(w2)

    # Compute and print loss
    loss = np.square(y_pred - y).sum()
    print(t, loss)

    # Backprop to compute gradients of w1 and w2 with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.T.dot(grad_y_pred)
    grad_h_relu = grad_y_pred.dot(w2.T)
    grad_h = grad_h_relu.copy()
    grad_h[h < 0] = 0
    grad_w1 = x.T.dot(grad_h)

    # Update weights
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2

0 30356438.768342838
1 27753999.62326961
2 28749774.272267465
3 28548201.94706624
4 24622697.683605686
5 17448300.60072209
6 10456985.996654112
7 5680295.130764839
8 3110596.7922078837
9 1845694.1164586328
10 1225235.8925627258
11 897224.5051654081
12 704486.9354969935
13 577329.4551776457
14 485306.94721874193
15 414572.69047803537
16 357938.592938172
17 311302.2316577771
18 272338.2831433959
19 239381.31583986784
20 211277.0822238818
21 187154.7335027785
22 166363.4245938119
23 148337.17240023668
24 132639.10375316843
25 118908.69710392838
26 106881.93488836741
27 96301.7950373566
28 86949.86521219448
29 78656.5116998713
30 71292.0337905966
31 64726.49623897341
32 58862.457966765694
33 53621.08791983462
34 48920.87793905029
35 44700.02363279434
36 40898.681426646595
37 37469.61434302797
38 34369.40152072317
39 31561.077562823397
40 29013.518336575624
41 26698.157738468704
42 24591.930838280805
43 22675.20476595142
44 20929.278969216466
45 19335.49102736542
46 17877.335345950734
47 16

487 1.4312041296250057e-06
488 1.3647809174203886e-06
489 1.3014638373865657e-06
490 1.2410813033853622e-06
491 1.1835212469178252e-06
492 1.1286257454288792e-06
493 1.0762867945371374e-06
494 1.0263674692593804e-06
495 9.78776690510665e-07
496 9.333985400422522e-07
497 8.90126994563509e-07
498 8.488781065927514e-07
499 8.095391111055972e-07
