In [39]:
import numpy as np
import tensorflow as tf
import math
import random
from matplotlib import pyplot as plt

def sigmoid(x):
    return 1/(1+math.exp(-x))

def sigmoidderivative(x):
    return sigmoid(x) * (1-sigmoid(x))

vec_sigmoid = np.vectorize(sigmoid)
vec_sigmoidderivative = np.vectorize(sigmoidderivative)

def oneHot(n):
    zeros = np.zeros(10)
    zeros[n] = 1.0 
    return zeros

In [3]:
class NeuralLayer:

    def __init__(self, numinputs:int, numoutputs:int, activation=None):

        self.numinputs = numinputs
        self.numoutputs = numoutputs
        self.activation = activation
        self.weights = np.random.randn(self.numoutputs, self.numinputs + 1)

    
    def Evaluate(self, inputs):

        inputs = np.append(inputs, np.array([1]))
    
        outputs = self.weights @ inputs # this is \vec{h}

        match self.activation:
            case "Sigmoid":
                outputs = vec_sigmoid(outputs)

            case "Softmax":
                denom = 0
                for i in range(len(outputs)):
                    denom += math.exp[outputs[i]]
                    outputs[i] = math.exp(outputs[i])
                outputs = outputs/denom

            case "ReLU":
                outputs = np.maximum(outputs, 0)
            
            case "Tanh":
                #numerator: e^x - e^-x
                #denom: e^x + e^-x
                outputs = np.tanh(outputs)


        return outputs

    
        
    def ComputeLocalGradient(self, inputs):
        # z is output after activation
        # h is output after linear layer
        # w are weights
        # Need to compute three things:
        # dz/dh
        # dh/dw
        # dh/dx

        inputs = np.append(inputs, np.array([1]))
        outputs = self.weights @ inputs


        # This part computes dzdh, and has cases for various activation functions
        match self.activation:
            case "Sigmoid":
                dzdh = np.diag(vec_sigmoidderivative(outputs))
            case "Softmax":
                n = len(outputs)
                dzdh = np.zeros((n, n))
                denom = 0
                for i in range(n):
                    denom += math.exp(outputs[i])
                
                for i in range(n):
                    for j in range(n):
                        if i == j:
                            dzdh[i][j] = (denom * math.exp(outputs[i]) - (math.exp(outputs[i])**2))/(denom**2)
                        else:
                            dzdh[i][j] = -(math.exp(outputs[j]))*(math.exp(outputs[j]))/(denom**2)

            case "ReLU":
                deriv = np.array([1.0 if num > 0 else 0.0 for num in outputs])
                dzdh = np.diag(deriv)
            case "Tanh":
                dzdh = np.diag(1- np.tanh(outputs)**2)


            

        
        
        
        
        # This part computes dhdw        
        dhdw = np.zeros((self.numoutputs, self.numoutputs, self.numinputs+1)) #because of bias
        for i in range(self.numoutputs):
            for j in range(self.numinputs):
                dhdw[i,i,j] = inputs[j]
            dhdw[i,i,self.numinputs] = 1
            
        # This part computes dhdx
        dhdx = self.weights[:, :-1]


        return (dzdh, dhdw, dhdx)

    
    

        

In [4]:
Layer1 = NeuralLayer(5,3,"Sigmoid")
#Layer1.weights = np.array([[1, 2, -1], [3, -2, 1]])
test1 = np.array([1, 2,3,4,5])
(dzdh, dhdw, dhdx) = Layer1.ComputeLocalGradient(test1)
print(dzdh.shape)
print(dhdw.shape)
print(dhdx.shape)

(3, 3)
(3, 3, 6)
(3, 5)


In [5]:
class NeuralNetwork:

    def __init__(self, errorfunc=None):
        
        self.errorfunc = errorfunc
        self.layers = []
        self.numlayers = 0

    def AppendLayer(self, layer: NeuralLayer):
        # need to check that the new layer to be appended has same 
        # number of inputs as the last layer already in the network
        if len(self.layers) > 0:
            if layer.numinputs == self.layers[-1].numoutputs:
                self.layers.append(layer)
                self.numlayers += 1
            else:
                print("Error: number of inputs does not match previous layer")
        else:
            self.layers.append(layer)
            self.numlayers += 1

        
    def Evaluate(self, inputs):

        outputs = []
        outputs.append(self.layers[0].Evaluate(inputs))

        for i in range(1,self.numlayers):
            outputs.append(self.layers[i].Evaluate(outputs[i-1]))
        
        return outputs

    def ComputeError(self, inputs, trueoutputs):

        outputs = self.Evaluate(inputs)
        
        if self.errorfunc == "MSE":
            n = len(outputs[-1])
            diffs = outputs[-1] - trueoutputs
            err = np.dot(diffs, diffs)
            err = err/(2*n)
            return err

    def BackPropagate(self, inputs, trueoutputs, learningrate):

        outputs = self.Evaluate(inputs)
        gradients = []

        # Compute all the necessary gradients
        for i in range(self.numlayers):
            if i == 0:
                tempinput = inputs
            else:
                tempinput = outputs[i-1]
            
            gradients.append(self.layers[i].ComputeLocalGradient(tempinput))

        match self.errorfunc:
            case "MSE":
                dldz = (0.5) * (outputs[-1] - trueoutputs)

            case "CrossEntropy":
                dldz = np.zeros(len(trueoutputs))
                spot = np.where(1 == trueoutputs)
                dldz[spot] = 1/outputs[-1][spot]
                
            

        # Update weights, working backwards

        currgrad = dldz @ gradients[-1][0]
    
        for i in range(self.numlayers-1, -1, -1):
            self.layers[i].weights -= learningrate * (currgrad @ gradients[i][1])
            currgrad = currgrad @ gradients[i][0] @ gradients[i][2]

In [6]:
MyNN = NeuralNetwork(errorfunc="MSE")
MyLayer1 = NeuralLayer(5, 3, "Sigmoid")
MyLayer2 = NeuralLayer(3, 2, "Sigmoid")


MyNN.AppendLayer(MyLayer1)
MyNN.AppendLayer(MyLayer2)

myinput = np.array([1,2,3,4,5])
mytrue = np.array([1,0])

print(MyNN.ComputeError(myinput, mytrue))

for i in range(10):
    MyNN.BackPropagate(myinput, mytrue, 1)
    print(MyNN.ComputeError(myinput, mytrue))




0.211255137791783
0.19876774433092836
0.18843136732982918
0.1771557257708931
0.1638492459219142
0.14792426256519203
0.1294350639364992
0.10939777319709791
0.08965002703059298
0.07213187412895564
0.058025151539170976


In [38]:
# Load data from MNIST database
(x_train0, y_train0), (x_test0, y_test0) = tf.keras.datasets.mnist.load_data()
assert x_train0.shape == (60000, 28, 28)
assert x_test0.shape == (10000, 28, 28)
assert y_train0.shape == (60000,)
assert y_test0.shape == (10000,)

# Prepare data for processing
# x_train and x_test need to be reshaped and converted to np.float64
# y_train and y_test need to be one-hot encoded
x_train = np.zeros((6000, 28*28)) #creates a empty 2d matrix with 6000 rows, and 28*28 columns
x_test = np.zeros((1000, 28*28))
for i in range(len(x_train)):
    x_train[i] = x_train0[i].reshape(28*28).astype(np.float64) / 255
for i in range(len(x_test)):
    x_test[i] = x_test0[i].reshape(28*28).astype(np.float64) / 255

y_train = []
y_test = []
for i in range(6000):
    y_train.append(oneHot(y_train0[i]))
for i in range(1000):
    y_test.append(oneHot(y_test0[i]))


In [9]:
MyMNISTNetwork = NeuralNetwork("MSE")
MyMNISTNetwork.AppendLayer(NeuralLayer(28*28,10,"Sigmoid"))


y_train0[0]

testinput = np.astype(x_train0[0].reshape(28*28), np.float64)
testinput /= 255.0
#print(testinput.sum())
#print(MyMNISTNetwork.Evaluate(testinput))
#print(MyMNISTNetwork.layers[-1].weights.dtype)

onehot = np.array([0,0,0,0,0,1,0,0,0,0])

print(MyMNISTNetwork.ComputeError(testinput, onehot))
for i in range(100):
    MyMNISTNetwork.BackPropagate(testinput, onehot, 1)
    print(MyMNISTNetwork.ComputeError(testinput, onehot))
    
print(MyMNISTNetwork.ComputeError(testinput, onehot))

print("Final check evaluation: " + str(MyMNISTNetwork.Evaluate(testinput)))

0.22827517506054051
0.22712407488980474
0.22595724260855454
0.2247813184038893
0.22360362521614766
0.22243191994306688
0.2212740982464195
0.22013787690258296
0.219030482399749
0.21795837470078983
0.21692703048650253
0.21594080173075508
0.21500285504810698
0.21411518713761196
0.21327870371542476
0.21249334465460032
0.21175823676383812
0.21107185712385085
0.2104321931604472
0.20983688963497293
0.2092833766508611
0.20876897609096154
0.20829098638703894
0.2078467471705677
0.2074336862811697
0.20704935198712535
0.2066914332746405
0.2063577708367342
0.20604636105093208
0.20575535485475985
0.20548305305761874
0.20522789929353671
0.2049884715331644
0.20476347283745291
0.2045517218466102
0.20435214335045107
0.2041637591735344
0.20398567952403585
0.20381709489329136
0.2036572685483582
0.20350552962865992
0.2033612668364974
0.20322392269731265
0.20309298835708728
0.20296799887960298
0.20284852900434336
0.20273418932572032
0.2026246228554342
0.20251950193167895
0.20241852544124977
0.20232141632318