## Neural Network to classfy MNIST data

In [1]:
import numpy as np
from keras.datasets import mnist
from keras.utils import to_categorical
import matplotlib.pyplot as plt

ModuleNotFoundError: No module named 'numpy'

##### Loading MNIST data, transformation and noramalization

In [None]:
# transform and normalize data

(x_train, y_train), (x_test, y_test) = mnist.load_data()
train_data=np.reshape(x_train,[60000,784])/ 255.0
train_data = train_data.astype(np.float64)
train_label = to_categorical(y_train)

test_data=np.reshape(x_test,[10000,784])/ 255.0
test_data = test_data.astype(np.float64)
test_label=to_categorical(y_test)
            
print("train_data shape="+str(np.shape(train_data)))
print("train_label shape="+str(np.shape(train_label)))
print("test_data shape="+str(np.shape(test_data)))
print("test_label shape="+str(np.shape(test_label)))


In [None]:
def relu(Z):
    return np.maximum(0, Z)

def relu_derivative(Z):
    Z[Z > 0] = 1
    Z[Z <= 0] = 0
    return Z

def softmax(Z):
    """
    Compute softmax values for each sets of scores in x.
    """
    # Subtract the maximum value for numerical stability
    e_x = np.exp(Z - np.max(Z, axis=-1, keepdims=True))
    return e_x / np.sum(e_x, axis=-1, keepdims=True)

def softmax_derivative(AL):
    return AL*(1 - AL)

In [None]:
class NN:
    def __init__(self, layers_sizes):
        self.layer_sizes = layers_sizes
        self.num_layers = len(layers_sizes)
        self.weights = {f"W{i}": np.random.randn(layers_sizes[i - 1], layers_sizes[i]) for i in range(1, len(layers_sizes))}
        self.biases = {f"b{i}": np.random.randn(1, layers_sizes[i]) for i in range(1, len(layers_sizes))}
        self.cache = {}
        self.grads = {}
        self.costs = []

    def cross_entropy_loss(self, AL, Y):
        # Z = np.max(AL, axis=1, keepdims=True)
        epsilon=1e-8
        AL = np.clip(AL, epsilon, 1 - epsilon)
        cost = np.sum( - (Y*np.log(AL) + (1-Y) * np.log(1 - AL)), axis=1)
        return cost
    
    def cross_entropy_back(self, AL, Y):
        epsilon=1e-8
        AL = np.clip(AL, epsilon, 1 - epsilon)
        dEdAL =  - (Y * np.divide(1,AL) + (1 - Y)*(np.divide(1,1-AL)))
        return dEdAL

    def forward_pass(self, X):
        self.cache['A0'] = X
        for i in range(self.num_layers - 2):
            Z = np.dot(self.cache[f"A{i}"], self.weights[f"W{i + 1}"]) + self.biases[f"b{i + 1}"]
            self.cache[f"Z{i + 1}"] = Z
            A = relu(Z)
            self.cache[f"A{i + 1}"] = A


        Z = np.dot(self.cache[f"A{i + 1}"], self.weights[f"W{i + 2}"]) + self.biases[f"b{i + 2}"]
        self.cache[f"Z{i + 2}"] = Z
        A = softmax(Z)
        self.cache[f"A{i + 2}"] = A
        return A
    
    def linear_activation_backward(self, dEdA, current_layer, activation_function):
        
        if activation_function == "softmax":
            dAdZ = softmax_derivative(self.cache[f"A{current_layer}"])
        elif activation_function == "relu":
            dAdZ = relu_derivative(self.cache[f"Z{current_layer}"])
        else:
            print(f"Error: function {activation_function} is not supported")

        dEdZ = np.multiply(dEdA, dAdZ)
        dW = (1 / dEdZ.shape[0] ) * np.dot(dEdZ.T, self.cache[f"A{current_layer - 1}"]).T
        db = (1 / dEdZ.shape[0] ) * np.sum(dEdZ, axis=0)
        dEdA_prev = np.dot(dEdZ, self.weights[f"W{current_layer}"].T)
        self.grads[f"dW{current_layer}"] = dW
        self.grads[f"db{current_layer}"] = db
        self.grads[f"dEdA{current_layer - 1}"] = dEdA_prev

    def backward_pass(self, AL, Y):
        cost = self.cross_entropy_loss(AL=AL, Y=Y)
        average_cost = (1/cost.shape[0])*np.sum(cost)
        self.costs.append(average_cost)

        dEdAL = self.cross_entropy_back(AL, Y) 

        current_layer = self.num_layers - 1 # 3
        self.linear_activation_backward(dEdA=dEdAL, current_layer=current_layer, activation_function="softmax")
        for i in reversed(range(current_layer - 1)):
            current_layer = i + 1
            self.linear_activation_backward(dEdA=self.grads[f"dEdA{current_layer}"], current_layer=current_layer, activation_function="relu")
            # print(i)

    def update_params(self, learning_rate):
        for i in range(self.num_layers - 1):
            # print(self.weights['W1'][0])
            self.weights[f"W{i + 1}"] = self.weights[f"W{i + 1}"] - (learning_rate * self.grads[f"dW{i + 1}"])
            self.biases[f"b{i + 1}"] = self.biases[f"b{i + 1}"] - (learning_rate * self.grads[f"db{i + 1}"])
            print(self.weights['W1'][0][0])

    def plot_cost_graph(self):
        x_value = list(range(1, len(self.costs) + 1))
        plt.xlabel('iteration')
        plt.ylabel('cost')
        plt.plot(x_value, self.costs, color='g')
        plt.show()

    def test(self, X_test, Y_test, metrics=[]):
        result = self.forward_pass(X_test)
        predictions = result.argmax(axis=0)
        expected = Y_test.argmax(axis=0)

        if "accuracy" in metrics:
            accuracy = accuracy_score(expected, predictions)
            print(f"Accuracy: {accuracy * 100:.2f}%")

        return predictions

    def train(self, X, Y, epochs=100, learning_rate=0.01):

        for i in range(1, epochs + 1):
            AL = self.forward_pass(X)
            self.backward_pass(AL, Y)    
            self.update_params(learning_rate) 

            if i % 10 == 0:
                print(f"[epoch {i}]")
                self.test(X, Y, metrics=['accuracy'])
                print('cost:', self.costs[i-1])

        self.plot_cost_graph()
        print('trained...') 

    def predict(self, X_test):
        result = self.forward_pass(X_test)
        predictions = result.argmax(axis=0)
        return predictions


In [None]:
nn = NN([784, 128, 64, 10])
nn.train(train_data, train_label)