In [1]:
from keras.datasets import mnist

In [2]:
import numpy as np
import pandas as pd
import plotly.express as px

import warnings
warnings.filterwarnings('ignore')

In [3]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [4]:
px.imshow(x_train[0], width=500)

#### Preprocess data for modelling

In [5]:
# convert x values from 0-255 to 0-1
x_train = x_train/255
x_test = x_test/255

# convert y values from integer to binary array
def y_categorical(y):
    res = []
    for i in range(len(y)):
        arr = [0]*10
        arr[y[i]] = 1
        res.append(arr)
    return res
y_train = y_categorical(y_train)
y_test = y_categorical(y_test)

# convert numpy to pandas
x_train = pd.DataFrame(x_train.reshape(len(x_train), 784))
x_test = pd.DataFrame(x_test.reshape(len(x_test), 784))
y_train = pd.DataFrame(y_train)
y_test = pd.DataFrame(y_test)


#### Develop Model

In [14]:
from ast import match_case
from re import S


class VisNet:
    
    def __init__(self, layers=[784,200,10], epochs=10, learning_rate=0.1):

        self.loss = []
        self.epochs = epochs # num epochs
        self.layers = layers # network architecture
        self.learning_rate = learning_rate # learning rate
        
        np.random.seed(1)
        self.params = {}
        self.params["W1"] = np.random.randn(self.layers[0], self.layers[1]) 
        self.params['b1'] = np.random.randn(self.layers[1],)
        self.params['W2'] = np.random.randn(self.layers[1],self.layers[2]) 
        self.params['b2'] = np.random.randn(self.layers[2],)
   
    def sigmoid(self, z):
        return 1/(1 + np.exp(-z))

    def dxsigmoid(self, z):
        return np.multiply(self.sigmoid(z),(1-self.sigmoid(z)))

    def softmax(self,z):
        e_x = np.exp(z - np.max(z))
        return e_x / e_x.sum(axis=0)

    def relu(self, z):
        return np.maximum(0,z)
        
    def dxrelu(self, z):
        return np.where(z >= 0, 1, 0)

    def eta(self, x):
        ETA = 0.0000000001
        return np.maximum(x, ETA)

    def entropy_loss(self,y, yhat): # MODIFY TO ACCOUNT FOR MULTIPLE Y VALUES
        nsample = len(y)
        yhat_inv = 1.0 - yhat
        y_inv = 1.0 - y
        yhat = self.eta(yhat) ## clips value to avoid NaNs in log
        yhat_inv = self.eta(yhat_inv) 
        loss = -1/nsample * (np.sum(np.multiply(np.log(yhat), y) + np.multiply((y_inv), np.log(yhat_inv))))
        return loss

    def forward(self, x):
        input = x
        Z1 = input.dot(self.params['W1']) + self.params['b1']
        A1 = self.relu(Z1)
        Z2 = A1.dot(self.params['W2']) + self.params['b2']
        yhat = self.softmax(Z2)
        loss = self.entropy_loss(self.y,yhat)

        # save calculated parameters     
        self.params['Z1'] = Z1
        self.params['Z2'] = Z2
        self.params['A1'] = A1

        return yhat, loss
    
    def backpropagate(self, yhat):

        return

    def train(self, x_train: pd.DataFrame, y_train: pd.DataFrame):
        x = x_train.to_numpy()
        y = y_train.to_numpy()

        for i in range(self.epochs):
            yhat, loss = self.forward()
            self.backpropagate(yhat)
            self.loss.append(loss)

#### Testing

In [18]:
###############################################################
clf = VisNet()
###############################################################

# assert(clf.relu(12))==12
# assert(clf.relu(0))==0
# assert(clf.relu(-3))==0
# assert(clf.relu(0.02310320120))>0

# assert(clf.sigmoid(0))==0.5
# assert(clf.sigmoid(1))>0.5
# assert(clf.sigmoid(1000))==1
# assert(clf.sigmoid(-1000))==0

# a,b,c = clf.softmax([1,4,5])
# assert(a<b<c) 
# a,b,c,d = clf.softmax([-5,10,25,3])
# assert((a<b)&(b>d))

# # the weight matrix between the input and first hidden layer should have 200 rows and 784 columns
# assert(clf.weight_matrices[0].shape[0]==200)
# assert(clf.weight_matrices[0].shape[1]==784)

# # the bias matrix between the first hidden layer and the second hidden layer should have 80 rows and 1 column
# assert(clf.bias_matrices[1].shape[0]==80)
# assert(clf.bias_matrices[1].shape[1]==1) 
# # output layer should have 10 bias values (one for each neuron)
# assert(clf.bias_matrices[-1].shape[0 == 10])

print(clf.params['W2'].shape)

(200, 10)
