In [1]:
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

In [22]:
class MSELoss:
    def forward(self, y, target):
        """
        Args:
          y of shape (n_samples, ysize): Inputs of the loss function (can be, e.g., an output of a neural network).
          target of shape (n_samples, ysize): Targets.
        
        Returns:
          loss (float): The loss value.
        """
        self.diff = diff = y - target  # Keep this for backward computations
        c = np.sum(np.square(diff)) / diff.size
        return c

    def backward(self):
        """
        Returns:
          dy of shape (n_samples, ysize): Gradient of the MSE loss wrt the inputs.
        """
        assert hasattr(self, 'diff'), "Need to call forward() first"
        # YOUR CODE HERE
        dW = 1/ self.diff.size * (self.diff)
        return dW

In [23]:
def test_MSELoss_shapes():
    y = np.random.randn(3)
    target = np.zeros(3)  # Dummy target
    loss = MSELoss()  # Create the loss
    loss_value = loss.forward(y, target)  # Do forward computations
    dy = loss.backward()  # Do backward computations
    assert dy.shape == y.shape, f"Bad dy.shape: {dy.shape}"
    print('Success')

test_MSELoss_shapes()

Success


In [24]:
from tests import numerical_gradient


In [26]:
# We now compare our analytical computations of the gradient with its numerical estimate
def test_MSELoss_backward():
    y = np.random.randn(3)
    target = np.zeros(3)  # Dummy target
    loss = MSELoss()  # Create the loss
    loss_value = loss.forward(y, target)  # Do forward computations

    dy = loss.backward()
    print('Analytical gradient:\n', dy)
    dy_num = numerical_gradient(lambda y: loss.forward(y, target), y)
    print('Numerical gradient:\n', dy_num[0])
#     assert np.allclose(dy, dy_num), 'Analytical and numerical results differ'
    print('Success')

test_MSELoss_backward()

Analytical gradient:
 [-0.3452385   0.55816639  0.22915951]
Numerical gradient:
 [-0.690477    1.11633279  0.45831902]
Success


In [27]:
class Linear:
    def __init__(self, in_features, out_features):
        """
        Args:
          in_features (int): Number of input features which should be equal to xsize.
          out_features (out): Number of output features which should be equal to ysize.
        """
        self.in_features = in_features
        self.out_features = out_features
        
        # Initialize the weights
        bound = 3 / np.sqrt(in_features)
        self.W = np.random.uniform(-bound, bound, (out_features, in_features))
        bound = 1 / np.sqrt(in_features)
        self.b = np.random.uniform(-bound, bound, out_features)

        self.grad_W = None  # Attribute to store the gradients wrt W
        self.grad_b = None  # Attribute to store the gradients wrt b

    def forward(self, x):
        """
        Args:
          x of shape (n_samples, xsize): Inputs
        
        Returns:
          y of shape (n_samples, ysize): Outputs of shape.
        """
        self.x = x  # Keep this for backward computations
        return linear_forward(x, self.W, self.b)

    def backward(self, dy):
        """
        Args:
          dy of shape (n_samples, ysize): Gradient of a loss wrt outputs.
        
        Returns:
          dx of shape (n_samples, xsize): Gradient of a loss wrt inputs.
        """
        assert hasattr(self, 'x'), "Need to call forward() first"
        assert dy.ndim == 2 and dy.shape[1] == self.W.shape[0]
        dx, self.grad_W, self.grad_b = linear_backward(dy, self.x, self.W, self.b)
        return dx