<a href="https://colab.research.google.com/github/WaiWasabi/Neural-Networks/blob/master/Updated%20Multilayer%20Perceptron.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

***Changelog*** Created 5/29/21

---
*   **[1.0.0 Alpha]** plan to implement various costs + activation classes, but gradient descent optimizer only.

*   **[1.0.1 Alpha]** use of parent classes for activations & losses to inherit from

*   **[1.0.2 Alpha]** feedforward and backpropagation are now two separate functions for better readability.

*   **[1.0.3 Alpha]** activation & loss are currently passed as class instances to Network, but plan to revert back to the old dictionary method /w string keys.

*   **[1.0.0]** Currently functional after correcting incorrect loss derivative function.




In [None]:
import numpy as np
import matplotlib.pyplot as plt
import random

In [None]:
class DefaultActivation(): # Base class for activations implements sigmoid activation.
  def f(self, z):
    return 1.0/(1.0+np.exp(-z))
  
  def prime(self, z):
    return self.f(z) * (1.0-self.f(z))

  def delta(self, z, dLda):
    return dLda * self.prime(z)

class Sigmoid(DefaultActivation):
  pass

In [None]:
class DefaultLoss():
  def f(self, x, y):
    return 0.5*(y-x)**2
  
  def prime(self, x, y):
    return x-y

class MSE(DefaultLoss):
  pass

In [None]:
def to_one_hot(data, max):
  output = []
  for index in data:
    one_hot = np.zeros((max, 1))
    one_hot[index][0] = 1
    output.append(one_hot)
  return np.array(output)

In [26]:
class Network():
  def __init__(self, sizes, activation, loss):
    self.sizes = sizes # array of dimensions for each layer in model
    self.num_layers = len(sizes)
    self.b = [np.random.standard_normal([i,1]) for i in sizes[1:]] # array of biases for each layer in model
    self.w = [np.random.standard_normal([o, i])/np.sqrt(i) for i, o in zip(sizes[:-1], sizes[1:])] # array of weights for each layer in model
    self.R = activation
    self.L = loss

  def summary(self):
    print("SUMMARY\n----------------------------")
    print(f"Number of Layers = {self.num_layers}")
    print(f"Bias Shapes = {[x.shape for x in self.b]}")
    print(f"Weight Shapes = {[x.shape for x in self.w]}")
    print("----------------------------")

  def feedforward(self, a): # feedforward function for evaluation purposes
    for w, b in zip(self.w, self.b):
      a = self.R.f(np.matmul(w, a) + b)
    return a

  def backprop(self, a, y): # calculates gradients of trainable params to be used for gradient descent.
    zs, activations = [], [a] # to store z & a values for backprop
    for w, b in zip(self.w, self.b):
      z = np.matmul(w, a) + b; a = self.R.f(z)
      zs.append(z); activations.append(a)
    grad_w, grad_b = [None]*len(self.w), [None]*len(self.b)
    delta = self.R.delta(zs[-1], self.L.prime(activations[-1], y)) # calculate delta-L (partial derivative of z-L with respect to loss)
    grad_w[-1] = np.einsum("ijk,ilk->ijl", delta, activations[-2])
    grad_b[-1] = delta
    for l in range(2, self.num_layers): # -l represents current layer in backprop
      delta = np.einsum("jk,ijl,ikl->ikl", self.w[-l+1], delta, self.R.prime(zs[-l]))
      grad_w[-l] = np.einsum("ijk,ilk->ijl", delta, activations[-l-1])
      grad_b[-l] = delta
    return map(lambda x: np.sum(x, axis=0), grad_w), map(lambda x: np.sum(x, axis=0), grad_b)
  
  def mini_batch_step(self, mini_batch, lr): # mini_batch data in format (train_features, train_labels)
    features, labels = mini_batch
    grad_w, grad_b = self.backprop(np.array(features), np.array(labels))
    self.w = [w-(lr/len(features))*nw for w, nw in zip(self.w, grad_w)]
    self.b = [b-(lr/len(features))*nb for b, nb in zip(self.b, grad_b)]

  def SGD(self, dataset, epochs, mini_batch_size, lr, validation_data=None): # dataset data as an iterable of individual (feature, label) pairs
    for i in range(epochs):
      random.shuffle(dataset)
      mini_batches = [zip(*dataset[j:j+mini_batch_size]) for j in range(0, len(dataset), mini_batch_size)]
      for mini_batch in mini_batches:
        self.mini_batch_step(mini_batch, lr)
      print(f"Epoch {i+1} Complete")

      if validation_data != None:
        logistics = self.evaluate(validation_data)
        accuracy = round(sum([x == y for x, y in logistics])/len(validation_data)*100, 2)
        print(f"Accuracy: {accuracy} (%)")

  def evaluate(self, test_data): # test_data as an iterable of individual (feature, label pairs)
    return [(np.argmax(self.feedforward(feature)), label) for feature, label in test_data]

In [13]:
from tensorflow.keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = np.array([x.reshape(-1, 1)/255 for x in x_train])
x_test = np.array([x.reshape(-1, 1)/255 for x in x_test])

x_train, x_validate = (x_train[0:50000], x_train[50000:60000])
y_train, y_validate = (y_train[0:50000], y_train[50000:60000])

y_train = to_one_hot(y_train, 10)

train_batch = [(x, y) for x, y in zip(x_train, y_train)]
test_batch = [(x, y) for x, y in zip(x_test, y_test)]
validate_batch = [(x, y) for x, y in zip(x_validate, y_validate)]

In [27]:
x = Network([784, 50, 10], Sigmoid(), MSE())
x.summary()
x.SGD(train_batch, 10, 30, 3.6, validate_batch)

SUMMARY
----------------------------
Number of Layers = 3
Bias Shapes = [(50, 1), (10, 1)]
Weight Shapes = [(50, 784), (10, 50)]
----------------------------
Epoch 1 Complete
Accuracy: 93.92 (%)
Epoch 2 Complete
Accuracy: 95.42 (%)
Epoch 3 Complete
Accuracy: 95.95 (%)
Epoch 4 Complete
Accuracy: 96.44 (%)
Epoch 5 Complete
Accuracy: 96.56 (%)
Epoch 6 Complete
Accuracy: 96.64 (%)
Epoch 7 Complete
Accuracy: 96.61 (%)
Epoch 8 Complete
Accuracy: 96.95 (%)
Epoch 9 Complete
Accuracy: 96.89 (%)
Epoch 10 Complete
Accuracy: 96.89 (%)
