In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

# Defining activation functions

In [2]:
class ReLU:
    def forward(self, inputs):
        self.inputs = inputs
        self.output = np.maximum(0, inputs)
    
    def backward(self, dvalues):
        self.dinputs = dvalues.copy()
        self.dinputs[self.inputs <= 0] = 0

class Softmax:
    def forward(self, inputs):
        exp_values = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))
        self.output = exp_values / np.sum(exp_values, axis=1, keepdims=True)

    def backward(self, dvalues):
        self.dinputs = np.empty_like(dvalues)

        for index, (single_output, single_dvalues) in \
                enumerate(zip(self.output, dvalues)):
            single_output = single_output.reshape(-1, 1)
            jacobian_matrix = np.diagflat(single_output) - \
                              np.dot(single_output, single_output.T)


            self.dinputs[index] = np.dot(jacobian_matrix,
                                         single_dvalues)

# Defining Loss

In [3]:
class Loss:
    def calculate(self, output, y):
        sample_losses = self.forward(output, y)
        data_loss = np.mean(sample_losses)
        return data_loss

class CategoricalCrossentropy(Loss):
    def forward(self, y_pred, y_true):
        samples = len(y_pred)
        y_pred_clipped = np.clip(y_pred, 1e-7, 1-1e-7)

        if len(y_true.shape) == 1:
            correct_confidences = y_pred_clipped[range(samples), y_true]

        elif len(y_true.shape) == 2:
            correct_confidences = np.sum(y_pred_clipped*y_true, axis=1)

        negative_log_likelihoods = -np.log(correct_confidences)
        return negative_log_likelihoods
    
    def backward(self, dvalues, y_true):

        samples = len(dvalues)

        labels = len(dvalues[0])

        if len(y_true.shape) == 1:
            y_true = np.eye(labels)[y_true]

        self.dinputs = -y_true / dvalues
        self.dinputs = self.dinputs / samples

# Defining neural layers

In [4]:
class dense_layer:
    def __init__(self, n_inputs, n_neurons):
        self.weights = 0.01 * np.random.randn(n_inputs, n_neurons)
        self.biases = np.zeros((1, n_neurons))

    def forward(self, inputs):
        self.inputs = inputs  # Store input for backward pass
        self.output = np.dot(inputs, self.weights) + self.biases

    def backward(self, dvalues):
        self.dweights = np.dot(self.inputs.T, dvalues)
        self.dbiases = np.sum(dvalues, axis=0, keepdims=True)
        self.dinputs = np.dot(dvalues, self.weights.T)


# Defining Optimizers

In [5]:
class Adam:

    def __init__(self, learning_rate=0.001, decay=0., epsilon=1e-7,
                 beta_1=0.9, beta_2=0.999):
        self.learning_rate = learning_rate
        self.current_learning_rate = learning_rate
        self.decay = decay
        self.iterations = 0
        self.epsilon = epsilon
        self.beta_1 = beta_1
        self.beta_2 = beta_2

    def pre_update_params(self):
        if self.decay:
            self.current_learning_rate = self.learning_rate * \
                (1. / (1. + self.decay * self.iterations))

    def update_params(self, layer):


        if not hasattr(layer, 'weight_cache'):
            layer.weight_momentums = np.zeros_like(layer.weights)
            layer.weight_cache = np.zeros_like(layer.weights)
            layer.bias_momentums = np.zeros_like(layer.biases)
            layer.bias_cache = np.zeros_like(layer.biases)

 
        layer.weight_momentums = self.beta_1 * \
                                 layer.weight_momentums + \
                                 (1 - self.beta_1) * layer.dweights
        layer.bias_momentums = self.beta_1 * \
                               layer.bias_momentums + \
                               (1 - self.beta_1) * layer.dbiases
   
        weight_momentums_corrected = layer.weight_momentums / \
            (1 - self.beta_1 ** (self.iterations + 1))
        bias_momentums_corrected = layer.bias_momentums / \
            (1 - self.beta_1 ** (self.iterations + 1))
     
        layer.weight_cache = self.beta_2 * layer.weight_cache + \
            (1 - self.beta_2) * layer.dweights**2

        layer.bias_cache = self.beta_2 * layer.bias_cache + \
            (1 - self.beta_2) * layer.dbiases**2
 
        weight_cache_corrected = layer.weight_cache / \
            (1 - self.beta_2 ** (self.iterations + 1))
        bias_cache_corrected = layer.bias_cache / \
            (1 - self.beta_2 ** (self.iterations + 1))

        layer.weights += -self.current_learning_rate * \
                         weight_momentums_corrected / \
                         (np.sqrt(weight_cache_corrected) +
                             self.epsilon)
        layer.biases += -self.current_learning_rate * \
                         bias_momentums_corrected / \
                         (np.sqrt(bias_cache_corrected) +
                             self.epsilon)

    def post_update_params(self):
        self.iterations += 1


# Trial

In [6]:
train = pd.read_csv('dataset/train.csv')
test = pd.read_csv('dataset/test.csv')
train.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [7]:
len(train['label'].unique())

10

So we have got 784 pixel values for one sample of our data and total 10 unique labels

input layer will have 784 neurons and output will have 10 layers

In [8]:
X = train.drop(columns=['label'])
y = train['label']

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,random_state=42)

# Training

In [10]:
layer1 = dense_layer(784, 256)
activation1 = ReLU()
layer2 = dense_layer(256, 64)
activation2 = ReLU()
layer3 = dense_layer(64, 10)
activation3 = Softmax()
loss_function = CategoricalCrossentropy()
optimizer = Adam(learning_rate=0.001)

In [11]:
epochs = 10  # Define number of epochs
batch_size = 64  # Mini-batch size

for epoch in range(epochs):
    correct_predictions = 0
    total_loss = 0
    
    for i in range(0, len(X_train), batch_size):
        X_batch = X_train[i:i+batch_size]
        y_batch = y_train[i:i+batch_size]
        
        # Forward pass
        layer1.forward(X_batch)
        activation1.forward(layer1.output)
        
        layer2.forward(activation1.output)
        activation2.forward(layer2.output)
        
        layer3.forward(activation2.output)
        activation3.forward(layer3.output)
        
        # Loss calculation
        loss = loss_function.calculate(activation3.output, y_batch)
        total_loss += loss
        
        predictions = np.argmax(activation3.output, axis=1)
        correct_predictions += np.sum(predictions == y_batch)
        
        # Backward pass
        loss_function.backward(activation3.output, y_batch)
        activation3.backward(loss_function.dinputs)
        layer3.backward(activation3.dinputs)
        
        activation2.backward(layer3.dinputs)
        layer2.backward(activation2.dinputs)
        
        activation1.backward(layer2.dinputs)
        layer1.backward(activation1.dinputs)
        
        # Update weights
        optimizer.pre_update_params()
        optimizer.update_params(layer1)
        optimizer.update_params(layer2)
        optimizer.update_params(layer3)
        optimizer.post_update_params()
    
    accuracy = correct_predictions / len(X_train)
    avg_loss = total_loss / (len(X_train) / batch_size)
    print(f"Epoch {epoch+1}/{epochs} - Loss: {avg_loss:.4f}, Accuracy: {accuracy:.4f}")

Epoch 1/10 - Loss: 0.2623, Accuracy: 0.9192
Epoch 2/10 - Loss: 0.1245, Accuracy: 0.9624
Epoch 3/10 - Loss: 0.0862, Accuracy: 0.9732
Epoch 4/10 - Loss: 0.0817, Accuracy: 0.9755
Epoch 5/10 - Loss: 0.0679, Accuracy: 0.9789
Epoch 6/10 - Loss: 0.0637, Accuracy: 0.9799
Epoch 7/10 - Loss: 0.0585, Accuracy: 0.9830
Epoch 8/10 - Loss: 0.0494, Accuracy: 0.9854
Epoch 9/10 - Loss: 0.0497, Accuracy: 0.9862
Epoch 10/10 - Loss: 0.0488, Accuracy: 0.9862


# IT"S DONEEEE!!!!