Add a hidden layer with 3 neurons between the input and output.

In [None]:
import numpy as np
from common import *

In [None]:
expected = expected1

# Weights connecting input layer to hidden layer
w0 = np.array([[0.61275508, 0.98539237, 0.27266151],
               [-0.47399039, 0.66266751, -0.72721264]])

# Weights connecting hidden layer to output layer
w1 = np.array([[0.96787958],
               [-0.80183257],
               [0.94478099]])

# Biases
b0 = np.array([0.0450116 , 0.46498468, 0.39401804])
b1 = np.array([0.61789242])

In [None]:
def forward(input):
  h = np.matmul(input, w0) + b0 # Hidden layer values
  out = np.matmul(h, w1) + b1 # Output values
  return h, out

def loss(out):
  return np.square(out - expected).sum()

In [None]:
h, out = forward(input)
l = loss(out)
print("Loss: ", l)

plot(forward(test)[1], expected)

Want to find how changes to the weights and biases affect the loss: dL/dW0, dL/dW1, dL/dB0, and dL/dB1. Work backwards from output.

Using chain rule:

dL/dW1 = dL/dOut * dOut/dW1

dL/dB1 = dL/dOut * dOut/dB1

dL/dW0 = dL/dH * dH/dW0

dL/dB0 = dL/dH * dH/dB0

In [None]:
# loss = (out - expected) ^ 2
# dL/dOut = loss'(out - expected)
#         = 2 * (out - expected)

dLdOut = 2 * (out - expected)
dLdOut

In [None]:
# out = h * w1 + b1

# dOut/dW1 = h
# dL/dW1 = dL/dOut * dOut/dW1
#        = dL/dOut * h

# For each sample, how does a change to each of the W1 weights change the loss
dLdW1PerSample = elemOuter(h, dLdOut)
# Sum over samples to get overall W1 derivatives across all samples
dLdW1 = dLdW1PerSample.sum(0)
dLdW1

# dOut/dB1 = 1
# dL/dB1 = dL/dOut * dOut/dB1
#        = dL/dOut
dLdB1 = dLdOut.sum(0)
dLdB1

In [None]:
# out = h * w1 + b1
# h = input * w0 + b0

# dOut/dH = w1
# dL/dH = dL/dOut * dOut/dH
#       = dL/dOut * w1
dLdH = np.matmul(dLdOut, w1.T)
dLdH

# dH/dW0 = input
# dL/dW0 = dL/dH * dH/dW0
#        = dL/dH * input
dLdW0PerSample = elemOuter(input, dLdH)
dLdW0 = dLdW0PerSample.sum(0)
dLdW0

# dH/dB0 = 1
# dL/dB0 = dL/dH * dH/dB0
#        = dL/dH
dLdB0 = dLdH.sum(0)
dLdB0