In [1]:
#-----------------------------------------------------------------------------------
# Implementing a Neural Network In this exercise we will develop a neural
# network with fully-connected layers to perform classification, and test it
# out on the CIFAR-10 dataset.  A bit of setup
#-----------------------------------------------------------------------------------
from __future__ import print_function
import numpy as np
import matplotlib.pyplot as plt
from builtins import range
from builtins import object

#from two_layernet import TwoLayerNet
#from gradient_check import eval_numerical_gradient
#from data_utils import get_CIFAR10_data
#from vis_utils import visualize_grid
#-------------------------- * End of setup *-----------------

In [2]:
#-------------------------------------------------------
# Some helper functions
# ------------------------------------------------------
def rel_error(x, y):
    """ returns relative error """
    return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))

def show_net_weights(net):
    W1 = net.params['W1']
    W1 = W1.reshape(32, 32, 3, -1).transpose(3, 0, 1, 2)
    plt.imshow(visualize_grid(W1, padding=3).astype('uint8'))
    plt.gca().axis('off')
    plt.show()

#-------------------------- * End of helper functions *--------------------------------


In [22]:
scores = np.array([[3.0, 1.0, 2.0],[5.0,3.0,9.0]]) # just testing,for better understanding of the code

In [39]:
np.sum((np.exp(scores)),axis=1)

array([  30.19287485, 8271.5826236 ])

In [49]:
np.sum((np.exp(scores)),axis=1,keepdims=True)

array([[  30.19287485],
       [8271.5826236 ]])

In [48]:
np.exp(scores) / np.sum((np.exp(scores)),axis=1,keepdims=True) # this is essentialy the softmax, each row sums to 1

array([[0.66524096, 0.09003057, 0.24472847],
       [0.01794253, 0.00242826, 0.97962921]])

In [27]:
class TwoLayerNet(object):
    """
    A two-layer fully-connected neural network. The net has an input dimension of
    N, a hidden layer dimension of H, and performs classification over C classes.
    We train the network with a softmax loss function and L2 regularization on the
    weight matrices. The network uses a ReLU nonlinearity after the first fully
    connected layer.

    In other words, the network has the following architecture:

    input - fully connected layer - ReLU - fully connected layer - softmax

    The outputs of the second fully-connected layer are the scores for each class.
    """



    def __init__(self, input_size, hidden_size, output_size, std=1e-4):
        """
        Initialize the model. Weights are initialized to small random values and
        biases are initialized to zero. Weights and biases are stored in the
        variable self.params, which is a dictionary with the following keys:

        W1: First layer weights; has shape (D, H)
        b1: First layer biases; has shape (H,)
        W2: Second layer weights; has shape (H, C)
        b2: Second layer biases; has shape (C,)

        Inputs:
        - input_size: The dimension D of the input data.
        - hidden_size: The number of neurons H in the hidden layer.
        - output_size: The number of classes C.
        """
        
        self.params = {}
        self.params['W1'] = std * np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = std * np.random.randn(hidden_size, output_size)
        self.params['b2'] = np.zeros(output_size)



    def loss(self, X, y=None, reg=0.0):
        """
        Compute the loss and gradients for a two-layer fully connected neural
        network.

        Inputs:
        - X: Input data of shape (N, D). Each X[i] is a training sample.
        - y: Vector of training labels. y[i] is the label for X[i], and each y[i] is
          an integer in the range 0 <= y[i] < C. This parameter is optional; if it
          is not passed then we only return scores, and if it is passed then we
          instead return the loss and gradients.
        - reg: Regularization strength.

        Returns:
        If y is None, return a matrix scores of shape (N, C) where scores[i, c] is
        the score for class c on input X[i].

        If y is not None, instead return a tuple of:
        - loss: Loss (data loss and regularization loss) for this batch of training
          samples.
        - grads: Dictionary mapping parameter names to gradients of those parameters
          with respect to the loss function; has the same keys as self.params.
        """
        
        # Unpack variables from the params dictionary
        W1, b1 = self.params['W1'], self.params['b1']
        W2, b2 = self.params['W2'], self.params['b2'] #shapes 10,3 -- 3
        N, D = X.shape

        # Compute the forward pass
        scores = 0.
        
        #############################################################################
        # TODO: Perform the forward pass, computing the class probabilities for the #
        # input. Store the result in the scores variable, which should be an array  #
        # of shape (N, C).                                                          #
        #############################################################################
        
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        first_layer=np.matmul(X,W1)+b1
        
        first_layer_activation=np.clip(a=first_layer,a_min=0,a_max=None)
        
        second_layer=np.matmul(first_layer_activation,W2)+b2
        
        scores=np.exp(second_layer) / np.sum((np.exp(second_layer)),axis=1,keepdims=True)
        

        

        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****


        # If the targets are not given then jump out, we're done
        if y is None:
            return scores


        # Compute the loss
        loss = 0.
        #############################################################################
        # TODO: Finish the forward pass, and compute the loss. This should include  #
        # both the data loss and L2 regularization for W1 and W2. Store the result  #
        # in the variable loss, which should be a scalar. Use the Softmax           #
        # classifier loss.                                                          #
        #############################################################################
        
        # Implement the loss for the softmax output layer
        
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        y_onehot = np.zeros((y.size, y.max()+1))
        y_onehot[np.arange(y.size),y] = 1 # transform to one-hot so we can multiply,and take just the log of the probability. 
                                            #of the correct value,since all the other values will be zero,for each instance
        
        data_loss=-np.sum(y_onehot*np.log(scores))/y.size
        regularization_loss=reg*((np.linalg.norm(W1)**2)+(np.linalg.norm(W2)**2)) # default value of np.linalg.norm is Euclidean-Frobenius-l2 norm
        
        loss=data_loss+regularization_loss
                

        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

        # Backward pass: compute gradients
        grads = {}

        ##############################################################################
        # TODO: Implement the backward pass, computing the derivatives of the weights#
        # and biases. Store the results in the grads dictionary. For example,        #
        # grads['W1'] should store the gradient on W1, and be a matrix of same size  #
        ##############################################################################

        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        
        

        pass

        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

        return loss, grads



In [28]:
# Create a small net and some toy data to check your implementations.
# Note that we set the random seed for repeatable experiments.

input_size = 4
hidden_size = 10
num_classes = 3
num_inputs = 5

def init_toy_model():
    np.random.seed(0)
    return TwoLayerNet(input_size, hidden_size, num_classes, std=1e-1)

def init_toy_data():
    np.random.seed(1)
    X = 10 * np.random.randn(num_inputs, input_size)
    y = np.array([0, 1, 2, 2, 1])
    return X, y

net = init_toy_model()
X, y = init_toy_data()

In [29]:
scores = net.loss(X)
print('Your scores:')
print(scores)
print()
print('correct scores:')
correct_scores = np.asarray([
 [0.36446210, 0.22911264, 0.40642526],
 [0.47590629, 0.17217039, 0.35192332],
 [0.43035767, 0.26164229, 0.30800004],
 [0.41583127, 0.29832280, 0.28584593],
 [0.36328815, 0.32279939, 0.31391246]])
print(correct_scores)
print()

Your scores:
[[0.3644621  0.22911264 0.40642526]
 [0.47590629 0.17217039 0.35192332]
 [0.43035767 0.26164229 0.30800004]
 [0.41583127 0.2983228  0.28584593]
 [0.36328815 0.32279939 0.31391246]]

correct scores:
[[0.3644621  0.22911264 0.40642526]
 [0.47590629 0.17217039 0.35192332]
 [0.43035767 0.26164229 0.30800004]
 [0.41583127 0.2983228  0.28584593]
 [0.36328815 0.32279939 0.31391246]]



In [30]:
# The difference should be very small. We get < 1e-7
print('Difference between your scores and correct scores:')
print(np.sum(np.abs(scores - correct_scores)))


Difference between your scores and correct scores:
2.917341163088949e-08


In [35]:
#y_onehot = np.zeros((y.size, y.max()+1))
#y_onehot[np.arange(y.size),y] = 1 #

In [36]:
#y_onehot*scores

array([[0.3644621 , 0.        , 0.        ],
       [0.        , 0.17217039, 0.        ],
       [0.        , 0.        , 0.30800004],
       [0.        , 0.        , 0.28584593],
       [0.        , 0.32279939, 0.        ]])

In [37]:
#scores

array([[0.3644621 , 0.22911264, 0.40642526],
       [0.47590629, 0.17217039, 0.35192332],
       [0.43035767, 0.26164229, 0.30800004],
       [0.41583127, 0.2983228 , 0.28584593],
       [0.36328815, 0.32279939, 0.31391246]])

In [31]:
# Forward pass: compute loss. In the same function, implement the second part
# that computes the data and regularization loss.
loss,_ = net.loss(X, y, reg=0.05) # remove the comma and the _ if you want to get the loss,right now the function returns just 1 element
correct_loss = 1.30378789133

# should be very small, we get < 1e-12
print('Difference between your loss and correct loss:')
print(np.sum(np.abs(loss - correct_loss)))

Difference between your loss and correct loss:
1.794120407794253e-13
