In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [17]:
%pip install nnfs
import nnfs
from nnfs.datasets import spiral_data

nnfs.init()

# Dense layer 
class Layer_Dense:
    
    # layer initialization 
    def __init__(self, n_inputs, n_neurons):
        
        #initialize weights and biases
        self.weights = 0.01 * np.random.randn(n_inputs, n_neurons)
        self.biases = np.zeros((1, n_neurons))
        
    # forward pass
    def forward(self, inputs):
        
        # calculate output values from inputs, weights, and biases
        self.output = np.dot(inputs, self.weights) + self.biases
        
# ReLU activation
class Activation_ReLU:
    
    # forward pass
    def forward(self, inputs):
        
        # calculate output values from inputs
        self.output = np.maximum(0, inputs)
        
# Softmax activation
class Activation_Softmax:
    
    # forward pass
    def forward(self, inputs):
        
        # get unnormalized probabilities 
        exp_values = np.exp(inputs - np.max(inputs, axis = 1, keepdims = True))
        
        # normalize them for each value
        probabilities = exp_values / np.sum(exp_values, axis = 1, keepdims = True)
        
        self.output = probabilities

# a common loss class
class Loss:
    
    def calculate(self, output, y):
        
        # calculate sample losses
        sample_losses = self.forward(output, y)
        
        # calculate mean loss
        data_loss = np.mean(sample_losses)
        
        # return loss
        return data_loss
    

class Loss_CategoricalCrossentropy(Loss):
    
    # forward pass
    def forward(self, y_pred, y_true):
        
        # number of samples
        samples = len(y_pred)
        
        # clip data to prevent a division by 0
        # clip both sides to not drag mean towards any value
        y_pred_clipped = np.clip(y_pred, 1e-7, 1 - 1e-7)
        
        # probabilities for target values - 
        # only if categorical labels
        if len(y_true.shape) == 1:
            correct_confidences = y_pred_clipped[
                range(samples),
                y_true
            ]
            
        # mask values - only for one-hot encoded labels
        elif len(y_true.shape) == 2:
            correct_confidences = np.sum(
                y_pred_clipped * y_true,
                axis = 1
            )
            
        # losses
        negative_log_likelihoods = -np.log(correct_confidences)
        return negative_log_likelihoods
    
    # backward pass
    def backward(self, dvalues, y_true):
        
        # number of samples
        samples = len(values)
        
        labels = len(dvalues[0])
        
        # if labels are sparse, turn them into one-hot vector
        if len(y_true.shape) == 1:
            y_true = np.eye(labels)[y_true]
            
        # calculate gradient
        self.dinputs = -y_true / dvalues
        
        # normalize gradient
        self.dinputs = self.dinputs / samples

Note: you may need to restart the kernel to use updated packages.


In [18]:
# go back to page 120 for the calculation
softmax_outputs = np.array([[0.7, 0.1, 0.2],
[0.1, 0.5, 0.4],
[0.02, 0.9, 0.08]])
class_targets = np.array([[1, 0, 0],
[0, 1, 0],
[0, 1, 0]])


loss_function = Loss_CategoricalCrossentropy()
loss = loss_function.calculate(softmax_outputs, class_targets)
print(loss)


0.38506088005216804


In [19]:
# try using the above functions
X, y = spiral_data(samples = 100, classes = 3)

# create Dense layer with 2 input features and 3 ouptut values
dense1 = Layer_Dense(2, 3)

# create ReLU activation (to be used with Dense layer)
activation1 = Activation_ReLU()

# create second Dense layer with 3 input features (as we take output # of previous layer here) and 3 output values
dense2 = Layer_Dense(3, 3)

# create Softmax activation (to be used with Dense layer)
activation2 = Activation_Softmax()

# create loss function 
loss_function = Loss_CategoricalCrossentropy()

# perform a forward pass of our training data through this layer
dense1.forward(X)

# perform a forward pass through activation function
# it takes the output of first dense layer here
activation1.forward(dense1.output)

# perform a forward pass through the second Dense layer
# it takes outputs of activation function of first layer as inputs 
dense2.forward(activation1.output)

# perform a forward pass through activation function
# it takes the output of second Dense layer here
activation2.forward(dense2.output)

# Let's see output of the first few examples
print(activation2.output[:5])

[[0.33333334 0.33333334 0.33333334]
 [0.3333332  0.3333332  0.33333364]
 [0.3333329  0.33333293 0.3333342 ]
 [0.3333326  0.33333263 0.33333477]
 [0.33333233 0.3333324  0.33333528]]


In [20]:
# perform a forward pass through the loss function
# it takes the output of second dense layer here and returns loss
loss = loss_function.calculate(activation2.output, y)

# print loss value
print('loss: ', loss)

loss:  1.0986104
