In [28]:
# Useful starting lines
%matplotlib inline

import numpy as np
import scipy
import scipy.io
import scipy.sparse as sp
import matplotlib.pyplot as plt
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Implement the sigmoid activation function.

In [29]:
def sigmoid(t):
    """apply sigmoid function on t."""
    return 1.0 / (1 + np.exp(-t))

def grad_sigmoid(t):
    """return the gradient of sigmoid on t."""
    return sigmoid(t) * (1 - sigmoid(t))

Note that you are working on a three-layer neural network with one input layer of size $D=4$, $L=1$ hidden layer with size $K=5$, and one output layer with size 1.

Initialize the data.

In [30]:
x = np.array([0.01, 0.02, 0.03, 0.04])

# change to normal initialization
W = {
    "w_1": np.ones((4, 5)),
    "w_2": np.ones(5)
}
y = 1

print(np.shape(x))

(4,)


# Problem 1: Feed-forward in neural network

Implement the neural network described by Equation 1 of the exercise sheet.

In [31]:
def simple_feed_forward(x, W):
    """Do feed forward propagation.""" 
    x_0 = x
    z_1 = W["w_1"].T @ x_0
    x_1 = sigmoid(z_1) # change to relu
    z_2 = W["w_2"].T @ x_1
    y_hat = sigmoid(z_2) # change to relu
    
    return z_1, z_2, y_hat

try:
    expected = 0.93244675427215695
    _, _, yours = simple_feed_forward(x, W)
    assert np.sum((yours - expected) ** 2) < 1e-15
    print("Your implementation is correct!")
except:
    print("Your implementation is not correct.")

Your implementation is correct!


# Problem 2: Backpropagation in neural network

Implement your derivation of backpropagation. 

*Hint*: You might want to slightly change `simple_feed_forward`.

In [37]:
def simple_backpropagation(y, x, W):
    """Do backpropagation and get delta_W."""
    # Feed forward
    z_1, z_2, y_hat = simple_feed_forward(x, W)
    x_1 = sigmoid(z_1)
    # Backpropogation
    # scalar * scalar
    delta_2 = (y_hat - y) * grad_sigmoid(z_2)
#     print(np.shape(delta_2))
    delta_w_2 = delta_2 * x_1
    print("delta_w_2 shape", np.shape(delta_w_2))
    delta_1 = delta_2 * W["w_2"] * grad_sigmoid(z_1)
    print(np.shape(delta_1))
    delta_w_1 = np.outer(x, delta_1)
    return {
        "w_2": delta_w_2,
        "w_1": delta_w_1
    }
  
try:
    expected = {
        'w_1': np.array([
            [ -1.06113639e-05,  -1.06113639e-05,  -1.06113639e-05, -1.06113639e-05,  -1.06113639e-05],
            [ -2.12227277e-05,  -2.12227277e-05,  -2.12227277e-05, -2.12227277e-05,  -2.12227277e-05],
            [ -3.18340916e-05,  -3.18340916e-05,  -3.18340916e-05, -3.18340916e-05,  -3.18340916e-05],
            [ -4.24454555e-05,  -4.24454555e-05,  -4.24454555e-05, -4.24454555e-05,  -4.24454555e-05]]),
        'w_2': np.array(
            [-0.00223387, -0.00223387, -0.00223387, -0.00223387, -0.00223387])
    }
    yours = simple_backpropagation(y, x, W)
    #print(yours)
    assert np.sum(
        [np.sum((yours[key] - expected[key]) ** 2) for key in expected.keys()]) < 1e-15
    print("Your implementation is correct!")
except:
    print("Your implementation is not correct!")

delta_w_2 shape (5,)
(5,)
Your implementation is correct!


In [42]:
# current predictions
z_1, z_2, y_hat = simple_feed_forward(x, W)
print(y_hat)

# back prop
delta_w = simple_backpropagation(y, x, W)
W["w_1"]  = W["w_1"] - delta_w["w_1"]
W["w_2"]  = W["w_2"] - delta_w["w_2"]

z_1, z_2, y_hat = simple_feed_forward(x, W)
print(y_hat)

print("delta_w_1 shape", np.shape(delta_w["w_1"]))
print("delta_w_2 shape", np.shape(delta_w["w_2"]))

0.933887755985
delta_w_2 shape (5,)
(5,)
0.934234439841
delta_w_1 shape (4, 5)
delta_w_2 shape (5,)


In [24]:
while np.abs(simple_feed_forward(x,W)[2] - y) > 1e-2:
    print(np.abs(simple_feed_forward(x,W)[2] - y))
    delta_w = simple_backpropagation(y, x, W)
    W["w_1"]  = W["w_1"] - delta_w["w_1"]
    W["w_2"]  = W["w_2"] - delta_w["w_2"]

0.0668216179429
0.0664642188476
0.0661122440147
0.0657655601587
0.0654240384828
0.0650875544887
0.0647559877946
0.0644292219637
0.0641071443406
0.0637896458959
0.0634766210791
0.0631679676779
0.0628635866849
0.0625633821702
0.0622672611608
0.0619751335252
0.0616869118634
0.0614025114022
0.0611218498953
0.0608448475282
0.0605714268268
0.0603015125708
0.0600350317108
0.0597719132886
0.0595120883617
0.0592554899307
0.0590020528699
0.0587517138611
0.0585044113298
0.0582600853845
0.0580186777585
0.057780131754
0.0575443921882
0.0573114053428
0.057081118914
0.0568534819656
0.0566284448835
0.0564059593326
0.0561859782145
0.0559684556278
0.055753346829
0.055540608196
0.0553301971923
0.0551220723326
0.0549161931498
0.0547125201636
0.0545110148496
0.0543116396102
0.0541143577463
0.05391913343
0.0537259316781
0.0535347183273
0.0533454600095
0.0531581241284
0.0529726788368
0.0527890930147
0.0526073362484
0.05242737881
0.0522491916378
0.0520727463172
0.0518980150627
0.0517249707
0.0515535866491
0.0

0.0157228055208
0.0157176035053
0.0157124065932
0.0157072147762
0.0157020280459
0.0156968463942
0.0156916698127
0.0156864982933
0.0156813318278
0.0156761704078
0.0156710140254
0.0156658626723
0.0156607163403
0.0156555750215
0.0156504387076
0.0156453073906
0.0156401810624
0.015635059715
0.0156299433404
0.0156248319305
0.0156197254774
0.0156146239731
0.0156095274096
0.015604435779
0.0155993490734
0.0155942672849
0.0155891904056
0.0155841184276
0.0155790513431
0.0155739891442
0.0155689318232
0.0155638793723
0.0155588317836
0.0155537890494
0.015548751162
0.0155437181136
0.0155386898966
0.0155336665032
0.0155286479258
0.0155236341567
0.0155186251882
0.0155136210128
0.0155086216228
0.0155036270107
0.0154986371688
0.0154936520897
0.0154886717658
0.0154836961895
0.0154787253533
0.0154737592498
0.0154687978715
0.015463841211
0.0154588892607
0.0154539420133
0.0154489994613
0.0154440615975
0.0154391284143
0.0154341999044
0.0154292760605
0.0154243568753
0.0154194423415
0.0154145324517
0.0154096271

0.0120605616053
0.0120581949591
0.0120558296937
0.0120534658076
0.0120511032996
0.0120487421683
0.0120463824123
0.0120440240305
0.0120416670213
0.0120393113836
0.0120369571159
0.0120346042169
0.0120322526854
0.01202990252
0.0120275537194
0.0120252062822
0.0120228602072
0.0120205154931
0.0120181721385
0.0120158301421
0.0120134895026
0.0120111502188
0.0120088122892
0.0120064757127
0.0120041404878
0.0120018066134
0.0119994740881
0.0119971429106
0.0119948130797
0.0119924845939
0.0119901574521
0.011987831653
0.0119855071952
0.0119831840776
0.0119808622987
0.0119785418573
0.0119762227522
0.011973904982
0.0119715885456
0.0119692734415
0.0119669596686
0.0119646472255
0.0119623361111
0.011960026324
0.0119577178629
0.0119554107267
0.0119531049139
0.0119508004235
0.0119484972541
0.0119461954044
0.0119438948733
0.0119415956594
0.0119392977615
0.0119370011784
0.0119347059087
0.0119324119513
0.011930119305
0.0119278279683
0.0119255379402
0.0119232492194
0.0119209618046
0.0119186756946
0.011916390888