In [2]:
import numpy as np

In [None]:
class layer_dense:
    'Neural network dense layer'
    
    # initialization (weights and biases)
    def __init__(self, n_inputs, n_neurons):
        self.weights = 0.1*np.random.randn(n_inputs, n_neurons) 
        self.biases = np.zeros((1,n_neurons))

    # output
    def forward(self, inputs):
        self.output = inputs.dot(self.weights) + self.biases
        self.inputs = inputs
      
    def backward(self, dinputs):
        
        # gradient
        self.dinputs = dvalues.dot(self.weights.T)
        
        # Gradients on parameters
        self.dweights = self.inputs.T.dot(dvalues)
        self.dbiases = np.sum(dvalues, axis=0, keepdims=True)

        
class activation_ReLU:
    'rectified linear unit activation function'
    
    def forward(self, inputs):
        self.output = np.maximum(0, inputs)
        # Remember input values
        self.inputs = inputs
        
    def backward(self, dinputs):
        
        self.dinputs = dvalues.copy()
        self.dinputs[self.inputs <= 0] = 0
        
class activation_softmax:
    'softmax activation function'
    
    def forward(self, inputs):
        exp_values = np.exp(inputs-np.max(inputs,axis=1, keepdims=True))
        probabilities = exp_values / np.sum(exp_values, axis=1, keepdims=True)
        self.output = probabilities
        
    def backward(self,dvalues):
        
        # NOTATION:
        # dvalues is the gradient from the Loss function
        # self.output are the probabilities (i.e., the outputs of the softmax function)
        # we'll store the softmax gradient in dinput
        
        # initialized gradient (derivative) array
        self.dinput = np.zeros(dvalues.shape)
        
        # For each data point, compute gradient
        for index, (output_row, dvalues_row) in enumerate(zip(self.output, dvalues)):
            
            # Flatten output array
            single_output = single_output.reshape(-1, 1)

            # Calculate derivatives matrix of the output
            derivative_matrix = np.diagflat(output_row) - output_row.dot(output_row.T)
            
            # store gradient
            self.dinput[index] = derivative_matrix.dot(dvalues_row)

# common loss class.
# We’ll be adding more loss functions and some of the operations that we’ll be performing are common for all of them
class loss:
    def calculate(self, output,y):
        sample_losses = self.forward(output,y)
        loss = np.mean(sample_losses)
        return loss
    
# cross-entropy loss        
class loss_crossentropy(loss):
    
    def forward(self, y_pred, y_true):
        
        # number of data points
        samples = len(y_pred)
        
        # clip data to prevent division by 0
        y_pred_clipped = np.clip(y_pred, 1e-7,1 - 1e-7)
        
        # confidence values
        confidence_values = y_pred_clipped[range(samples),y_true]
       
        # sample losses 
        losses = -np.log(confidence_values)
        return losses
    
    
    def backward(self, dvalues, y_true):
        
        # NOTATION:
        # dvalues are the probabilities (from the Softmax function)
        # y_true is the class vector
        # We'll store the gradient in dinput
        
        # Number of samples
        n_samples = len(dvalues)
    
        #one-hot matrix
        Y = np.zeros(dvalues.shape)
        Y[np.arange(len(y_true)),y_true] = 1
        
        # Calculate gradient
        self.dinputs = -Y / dvalues
        
        # Normalize gradient
        self.dinput = self.dinput / n_samples

In [4]:
p = np.array([0.9, 0.1])
p

array([0.9, 0.1])

In [7]:
p = p.reshape(-1,1)
p

array([[0.9],
       [0.1]])

In [10]:
np.diagflat(p)

array([[0.9, 0. ],
       [0. , 0.1]])

In [11]:
p.dot(p.T)

array([[0.81, 0.09],
       [0.09, 0.01]])