In [1]:
from keras.datasets import mnist

In [2]:
import numpy as np
import pandas as pd
import plotly.express as px

import warnings
warnings.filterwarnings('ignore')

In [3]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [4]:
px.imshow(x_train[16], width=500)

#### Preprocess data for modelling

In [5]:
# convert x values from 0-255 to 0-1
x_train = x_train/255
x_test = x_test/255

# convert y values from integer to binary array
def y_categorical(y):
    res = []
    for i in range(len(y)):
        arr = [0]*10
        arr[y[i]] = 1
        res.append(arr)
    return res
y_train = y_categorical(y_train)
y_test = y_categorical(y_test)

# convert numpy to pandas
x_train = pd.DataFrame(x_train.reshape(len(x_train), 784))
x_test = pd.DataFrame(x_test.reshape(len(x_test), 784))
y_train = pd.DataFrame(y_train)
y_test = pd.DataFrame(y_test)


#### Develop Model

think of a^L as the value inside a neuron
a^L = σ_1(w^L a^(L - 1) + b^L)

the cost function for a given training example is
(a^L - y)^2

 
  

In [250]:
from ast import match_case
from re import S


class VisNet:
    
    def __init__(self, structure, epochs=10, learning_rate=0.1, batch_size=0, regularizer=0.0):
        
        self.size = [x for x in structure if isinstance(x, int)]
        self.activation_functions = [x.lower() for x in structure if isinstance(x, str)]

        self.epochs = epochs
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.regularizer = regularizer
        
        # create weight matrices of size n_output nodes (rows) * n_input_nodes (columns)
        self.weight_matrices = []
        self.weight_matrices_t = [None]*(len(self.size)-1)
        for i in range(1,len(self.size)):
            self.weight_matrices.append(np.random.random((self.size[i], self.size[i-1])))

        # create bias matrices of size n_input nodes (rows) * 1 (columns)
        self.bias_matrices = []
        for i in range(len(self.size) - 1):
            self.bias_matrices.append(np.random.random((self.size[i+1],1)))

    def sigmoid(self, z):
        return 1/(1 + np.exp(-z))

    def dxsigmoid(self, z):
        return np.multiply(self.sigmoid(z),(1-self.sigmoid(z)))

    def softmax(self,z):
        e_x = np.exp(z - np.max(z))
        return e_x / e_x.sum(axis=0)

    def relu(self, z):
        return np.maximum(0,z)
        
    def dxrelu(self, z):
        return np.where(z >= 0, 1, 0)

    def forward(self, x_sample):
        # convert x_sample into len(x_sample)*1 matrix
        inputs = x_sample.reshape(len(x_sample),1)
        # for each layer after input layer
        for i in range(len(self.size)-1):
            # computer weighted sum of inputs
            inputs = self.weight_matrices[i].dot(inputs)
            # add bias
            inputs = np.add(inputs, self.bias_matrices[i])
            # apply activation function
            match self.activation_functions[i]:
                case 'softmax':
                    inputs = self.softmax(inputs)
                case 'sigmoid':
                    inputs = self.sigmoid(inputs)
                case 'relu':
                    inputs = self.relu(inputs)
        return inputs
    
    # def backpropagate(self, outputs, output_errors):
    #     # for each layer (iterating backwards)
    #     errors = output_errors
    #     for i in range(len(self.size)-2,-1,-1):
    #         # create transposed weight matrix
    #         self.weight_matrices_t[i] = self.weight_matrices[i].transpose()
    #         # calculate previous layer errors
    #         errors = np.dot(self.weight_matrices_t[i],errors)
    #         # apply derivative of activation function to outputs

    #         # multiply outputs by errors
            
    #         # multiply outputs by learning rate

    #     return

    def backpropagate(self, outputs, output_errors): # CHAT GPT VERSION
    # for each layer (iterating backwards)
        errors = output_errors
        for i in range(len(self.size)-2,-1,-1):
            # create transposed weight matrix
            self.weight_matrices_t[i] = self.weight_matrices[i].transpose()
            # calculate previous layer errors
            errors = np.dot(self.weight_matrices_t[i],errors)
            # apply derivative of activation function to outputs
            if self.activation_functions[i] == 'sigmoid':
                errors = np.multiply(errors, self.dxsigmoid(outputs[i]))
            elif self.activation_functions[i] == 'relu':
                errors = np.multiply(errors, self.dxrelu(outputs[i]))
            # multiply outputs by errors
            gradients = np.dot(errors, outputs[i].transpose())
            # multiply outputs by learning rate
            self.weight_matrices[i] += self.learning_rate * gradients
            self.bias_matrices[i] += self.learning_rate * errors
        return


    # def train(self, x_train, y_train):
    #     x = x_train.to_numpy()
    #     y = y_train.to_numpy()

    #     for i in range(1):
    #         # get output from given input through feedforward function
    #         outputs = self.forward(x[i])
    #         # calculate the error (i.e. the distance of each output from the actual y value)
    #         output_errors = y[i].reshape(len(y[i]),1) - outputs
    #         # update weights and biases through backpropagation
    #         self.backpropagate(outputs, output_errors)
            
    #         # NEED TO MAKE OUTPUTS (VALUE OF EACH NEURON) A SELF. VARIABLE SO THEY CAN ALL BE ACCESSED IN BACKPROPOGATE FUNCTION


    def train(self, x_train, y_train):
    x = x_train.to_numpy()
    y = y_train.to_numpy()

    for i in range(1):
        # store outputs of each layer
        outputs = []
        # get output from given input through feedforward function
        inputs = x[i].reshape(len(x[i]),1)
        # for each layer after input layer
        for i in range(len(self.size)-1):
            # computer weighted sum of inputs
            inputs = self.weight_matrices[i].dot(inputs)
            # add bias
            inputs = np.add(inputs, self.bias_matrices[i])
            # apply activation function
            if self.activation_functions[i] == 'softmax':
                inputs = self.softmax(


#### Testing

In [251]:
###############################################################
clf = VisNet([784, "relu", 200, 'sigmoid', 80, 'softmax', 10])
###############################################################

# assert(clf.relu(12))==12
# assert(clf.relu(0))==0
# assert(clf.relu(-3))==0
# assert(clf.relu(0.02310320120))>0

# assert(clf.sigmoid(0))==0.5
# assert(clf.sigmoid(1))>0.5
# assert(clf.sigmoid(1000))==1
# assert(clf.sigmoid(-1000))==0

# a,b,c = clf.softmax([1,4,5])
# assert(a<b<c)
# a,b,c,d = clf.softmax([-5,10,25,3])
# assert((a<b)&(b>d))

# the weight matrix between the input and first hidden layer should have 200 rows and 784 columns
assert(clf.weight_matrices[0].shape[0]==200)
assert(clf.weight_matrices[0].shape[1]==784)

# the bias matrix between the first hidden layer and the second hidden layer should have 80 rows and 1 column
assert(clf.bias_matrices[1].shape[0]==80)
assert(clf.bias_matrices[1].shape[1]==1) 
# output layer should have 10 bias values (one for each neuron)
assert(clf.bias_matrices[-1].shape[0 == 10])

clf.train(x_train, y_train)

(10, 1)
(10, 1)
(10, 1)


#### Final

In [None]:
# # initialise the neural network
clf = VisNet([784, "relu", 200, 'sigmoid', 80, 'softmax', 10])
# # train the neural network
clf.train(x_train, y_train)
# # make predicitons using the neural network
# clf.predict
# (x_test)