In [6]:
import numpy as np


In [1]:
def softmax(x):
    """
    Calculate the softmax over n input samples
    
    @param x: of shape(n, m). n is the number of data samples, m is the dimension
    """
    x_exp = np.exp(x)
    x_sum = np.sum(x_exp, axis=1, keepdims=True)
    s = x_exp / x_sum
    cache = x
    return s, x


def relu(Z):
    A = np.maxium(0, Z)
    cache = Z
    return A, cache

In [None]:
def initialize_params_deep(layer_dims):
    """
    @param layerdims -- python array containing the dimensions of each layer
    @return 
    """
    np.random.seed(7)
    params = {}
    L = len(layer_dims)
    
    for l in range(1, L):
        params['W' + str(l)] = np.random.rand(layer_dims[l], layer_dims[l-1])*0.01
        params['b' + str(l)] = np.zeros((layer_dims[l], 1))
        
    return params


def linear_forward(A, W, b):
    Z = np.dot(A, W) + b
    cache = (A, W, b)

    return Z, cache


def linear_activation_forward(A_prev, W, b, activation="relu"):
    if activation == "relu":
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = relu(Z)
    if activation == "softmax":
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = softmax(Z)
    cache = (linear_cache, activation_cache)

    return A, cache


def L_model_forward(X, params):
    caches = []
    A = X
    L = len(parameters) // 2
    
    for i in range(1, L):
        A_prev = A
        A, cache = linear_activation_forward(
            A_prev, 
            params['W'+str(l)], 
            params['b'+str(l)])
        caches.append(cache)    
        
    AL, cache = linear_activation_forward(
            A_prev, 
            params['W'+str(l)], 
            params['b'+str(l)],
            activation = "softmax")
    caches.append(cache)
    
    return AL, caches

In [7]:
def convert_to_onehot(Y, C):
    Y = np.eye(C)[Y.reshape(-1)].T
    return Y


def compute_cost(AL, Y):
    """
    Implement the cross-entropy loss function for one-hot encoded Y.
    """
    m = Y.shape[1]   # Number of input data points
    cost = np.sum(np.multiply(Y, np.log(AL)))*(-1.0/m)
    cost = np.squeeze(cost)
    
    return cost

In [17]:
convert_to_onehot(np.array([3,2,1,0,0]), 4)

array([[0., 0., 0., 1., 1.],
       [0., 0., 1., 0., 0.],
       [0., 1., 0., 0., 0.],
       [1., 0., 0., 0., 0.]])