In [1]:
# Useful starting lines
%matplotlib inline

import numpy as np
import scipy
import scipy.io
import scipy.sparse as sp
import matplotlib.pyplot as plt
%load_ext autoreload
%autoreload 2
from test_utils import test

Implement the sigmoid activation function.

In [3]:
def sigmoid(t):
    """Apply sigmoid function on t.

    Args:
        t: scalar or numpy array

    Returns:
        scalar or numpy array

    >>> sigmoid(np.array([0.1]))
    array([0.52497919])
    >>> sigmoid(np.array([0.1, 0.1]))
    array([0.52497919, 0.52497919])
    """
    return 1/(1+np.exp(-t))
    
    
def grad_sigmoid(t):
    """Return the gradient of sigmoid on t.
    
    Args:
        t: scalar or numpy array
        
    Returns:
        scalar or numpy array
        
    >>> grad_sigmoid(np.array([0.1]))
    array([0.24937604])
    >>> grad_sigmoid(np.array([0.1, 0.1]))
    array([0.24937604, 0.24937604])
    """
    return sigmoid(t)*(1-sigmoid(t))

In [5]:
test(sigmoid)

✅ Your `sigmoid` passes some basic tests.


In [7]:
test(grad_sigmoid)

✅ Your `grad_sigmoid` passes some basic tests.


Note that you are working on a three-layer neural network with one input layer of size $D=4$, $L=1$ hidden layer with size $K=5$, and one output layer with size 1.

Initialize the data.

In [9]:
x = np.array([0.01, 0.02, 0.03, 0.04])
W = {
    "w_1": np.ones((4, 5)),
    "w_2": np.ones(5)
}
y = 1

# Problem 1: Feed-forward in neural network

Implement the neural network described by Equation 1 of the exercise sheet.

In [29]:
def simple_feed_forward(x, W):
    """Do feed-forward propagation.
    
    Args:
        x: numpy array of shape (D, )
        W: a dictionary of numpy array, with two elements, w_1 and w_2.
            w_1: shape=(D, K)
            w_2: shape=(K, )
        
    Returns:
        z1: a numpy array, generated from the hidden layer (before the sigmoid function) 
        z2: scalar number, generated from the output layer (before the sigmoid function)
        y_hat: a scalar (after the sigmoid function)
        
    >>> x = np.array([0.01, 0.02, 0.03, 0.04])
    >>> W = {"w_1": np.ones((4, 5)), "w_2": np.ones(5)}
    >>> z1, z2, y_hat = simple_feed_forward(x, W)
    >>> z1
    array([0.1, 0.1, 0.1, 0.1, 0.1])
    >>> z2
    2.6248959373947
    >>> y_hat
    0.932446754272157
    """
    w_1 = W["w_1"]
    w_2 = W["w_2"]
    
    z1 = np.dot(w_1.T, x)
    x_1 = sigmoid(z1)
    
    z2 = np.dot(w_2.T, x_1)
    x_2 = sigmoid(z2)
    
    y_hat = x_2
    
    return z1, z2, y_hat

try:
    expected = 0.93244675427215695
    _, _, yours = simple_feed_forward(x, W)
    assert np.sum((yours - expected) ** 2) < 1e-15
    print("Your implementation is correct!")
except:
    print("Your implementation is not correct.")

Your implementation is correct!


In [31]:
test(simple_feed_forward)

✅ Your `simple_feed_forward` passes some basic tests.


# Problem 2: Backpropagation in neural network

Implement your derivation of backpropagation. 

*Hint*: You might want to slightly change `simple_feed_forward`.

In [52]:
def simple_backpropagation(y, x, W):
    """Do backpropagation and get delta_W.
    
    Args:
        y: scalar number
        x: numpy array of shape (D, )
        W: a dictionary of numpy array, with two elements, w_1 and w_2.
            w_1: shape=(D, K)
            w_2: shape=(K, )
        
    Returns:
        grad_W: a dictionary of numpy array. It corresponds to the gradient of weights in W.
        
        
    >>> y = 1
    >>> x = np.array([0.01, 0.02, 0.03, 0.04])
    >>> W = {"w_1": np.ones((4, 5)), "w_2": np.ones(5)}
    >>> grad_W = simple_backpropagation(y, x, W)
    """
    z_1, z_2, y_hat = simple_feed_forward(x, W)
    x_1 = sigmoid(z_1)
    
    loss_function = 1/2 * (y_hat - y)**2
    
    delta_w_2 = np.outer(x_1, (y_hat - y) * grad_sigmoid(z_2))
    delta_w_1 = np.outer(x, ((y_hat - y) * grad_sigmoid(z_2) * W["w_2"] * grad_sigmoid(z_1)))
    
    return {
        "w_1": delta_w_1,
        "w_2": delta_w_2
    }
    
try:
    expected = {
        'w_1': np.array([
            [ -1.06113639e-05,  -1.06113639e-05,  -1.06113639e-05, -1.06113639e-05,  -1.06113639e-05],
            [ -2.12227277e-05,  -2.12227277e-05,  -2.12227277e-05, -2.12227277e-05,  -2.12227277e-05],
            [ -3.18340916e-05,  -3.18340916e-05,  -3.18340916e-05, -3.18340916e-05,  -3.18340916e-05],
            [ -4.24454555e-05,  -4.24454555e-05,  -4.24454555e-05, -4.24454555e-05,  -4.24454555e-05]]),
        'w_2': np.array(
            [-0.00223387, -0.00223387, -0.00223387, -0.00223387, -0.00223387])
    }
    yours = simple_backpropagation(y, x, W)    
    assert np.sum(
        [np.sum((yours[key] - expected[key]) ** 2)
         for key in expected.keys()]) < 1e-15
    print("Your implementation is correct!")
except:
    print("Your implementation is not correct!")

Your implementation is correct!


In [51]:
test(simple_backpropagation)

✅ Your `simple_backpropagation` passes some basic tests.
