Simple implementation of a two layer neural network.

In [4]:
import numpy as np
import random

# Initialization of the network parameters, input X and labels y.
np.random.seed(231)
N, D, H1, C = 3, 5, 50, 7
X = np.random.randn(N, D)
y = np.random.randint(C, size=(N,))

# Initialization of the layer parameters weights and biases.
weight_scale = 5e-2
W1 = weight_scale * np.random.randn(D, H1)
b1 = weight_scale * np.zeros(H1)
W2 = weight_scale * np.random.randn(H1, C)
b2 = weight_scale * np.zeros(C)

# Network's architecture: (affine - relu) - (affine - softmax).
class TwoLayerNet:
    def __init__(self):
        pass

    def affine_forward(self, x, w, b): 
        out = x.reshape(x.shape[0], -1).dot(w) + b
        cache = (x, w, b)
        return out, cache
    
    def affine_backward(self, dout, cache):
        x, w, b = cache
        dw = x.T.dot(dout)
        db = np.sum(dout, axis=0)
        return dw, b

    def relu_forward(self, x):
        out =  np.maximum(0, x)
        cache = (x)
        return out, cache

    def relu_backward(self, dout, cache):
        x = cache
        # (3, 50) * ((3,7)*(7,50))
        return np.where(x > 0, 1, 0) * dout.dot(W2.T)

    def softmax_loss(self, x, y):
        exp_values = np.exp(x - np.max(x, axis=1, keepdims=True))
        softmax = exp_values / np.sum(exp_values, axis=1, keepdims=True)
        correct_scores = np.zeros(x.shape)
        correct_scores[range(len(y)), y] = 1
        loss = np.mean(-np.log(np.sum(softmax * correct_scores, axis=1, keepdims=True)))
        dL = (softmax - correct_scores) / len(softmax)
        return loss, dL
        

In [5]:
# Forward pass.
nn = TwoLayerNet()
layer_1, cache_l1 = nn.affine_forward(X, W1, b1)
relu, cache_relu = nn.relu_forward(layer_1)
layer_2, cache_l2 = nn.affine_forward(relu, W2, b2)
loss, dL = nn.softmax_loss(layer_2, y)
print(f'loss: {loss}')
print(f'shape of the derivative of the loss wrt. the softmax values (layer_2): {dL.shape}')

loss: 1.930094367227203
shape of the derivative of the loss wrt. the softmax values (layer_2): (3, 7)


In [6]:
# Backward pass.
grads = {}
grads['W2'], grads['b2'] = nn.affine_backward(dL, cache_l2)
print('the shape of the gradients of W2:', grads['W2'].shape)
print('the shape of the gradients of b2:', grads['b2'].shape)

drelu = nn.relu_backward(dL, cache_relu)
print(f'the shape of the gradients of relu:', drelu.shape)

grads['W1'], grads['b1'] = nn.affine_backward(drelu, cache_l1)
print(f'the shape of the gradients of W1:', grads['W1'].shape)
print(f'the shape of the gradients of b1:', grads['b1'].shape)


the shape of the gradients of W2: (50, 7)
the shape of the gradients of b2: (7,)
the shape of the gradients of relu: (3, 50)
the shape of the gradients of W1: (5, 50)
the shape of the gradients of b1: (50,)
