<a href="https://colab.research.google.com/github/WaiWasabi/Neural-Networks/blob/optimize-data-shuffling/neural_network_with_matrices.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Single-Input Network

In [2]:
import random
import numpy as np

def sigmoid(z):
  return 1.0/(1.0+np.exp(-z))

def sigmoid_prime(z):
  return sigmoid(z)*(1-sigmoid(z))

def cost_derivative(a, y):
  return (a - y)

class Network(object):
  def __init__(self, sizes):
    self.sizes = sizes
    self.num_layers = len(sizes)    
    self.weights = [np.random.randn(x, y) for x, y in zip(sizes[1:], sizes[:-1])]
    self.biases = [np.random.randn(x, 1) for x in sizes[1:]]


  def feedforward(self, a):
    for w, b in zip(self.weights, self.biases):
      a = sigmoid(np.dot(w, a) + b)
    return a

  def backprop(self, train_input, train_label):
    nabla_w = [np.zeros(w.shape) for w in self.weights]
    nabla_b = [np.zeros(b.shape) for b in self.biases]
    activation = train_input
    activations = [train_input]
    zs = []
    for w, b in zip(self.weights, self.biases):
      z = np.dot(w, activation) + b
      zs.append(z)
      activation = sigmoid(z)
      activations.append(activation)
    delta = cost_derivative(activations[-1], train_label) * sigmoid_prime(zs[-1])
    nabla_w[-1] = np.dot(delta, activations[-2].transpose())
    nabla_b[-1] = delta

    for i in range(2, self.num_layers):
      delta = np.dot(self.weights[-i + 1].transpose(), delta) * sigmoid_prime(zs[-i])
      nabla_b[-i] = delta
      nabla_w[-i] = np.dot(delta, activations[-(i+1)].transpose())
    return nabla_w, nabla_b

  def update_mini_batch(self, mini_batch, lr):
    nabla_w = [np.zeros(w.shape) for w in self.weights]
    nabla_b = [np.zeros(b.shape) for b in self.biases]
    for train_input, train_label in mini_batch:
      delta_nabla_w, delta_nabla_b = self.backprop(train_input, train_label)
      nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
      nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
    self.weights = [w - (lr/len(mini_batch))*nw for w, nw in zip(self.weights, nabla_w)]
    self.biases = [b - (lr/len(mini_batch))*nb for b, nb in zip(self.biases, nabla_b)]

  def SGD(self, train_data, mini_batch_size, epochs, learning_rate): # where train_data is a list of tuples (train_input, train_label)
    n = len(train_data)
    for i in range(epochs):
      random.shuffle(train_data)
      mini_batches = [train_data[k:(k+mini_batch_size)] for k in range(0, n, mini_batch_size)]
      for mini_batch in mini_batches:
        self.update_mini_batch(mini_batch, learning_rate)
      print(f"Epoch {i + 1} Complete")

# Batch-Input Network

In [26]:
import numpy as np
import random

def sigmoid(z):
  return 1.0/(1.0+np.exp(-z))

def sigmoid_prime(z):
  return sigmoid(z)*(1.0-sigmoid(z))

def cost_derivative(a, y):
  return a - y

class Network(object):
  def __init__(self, sizes):
    self.sizes = sizes
    self.num_layers = len(sizes)
    self.weights = [np.random.randn(x, y) for x, y in zip(sizes[1:], sizes[:-1])]
    self.biases = [np.random.randn(x, 1) for x in sizes[1:]]

  def feedforward(self, a): # single input feedforward for testing purposes
    for w, b in zip(self.weights, self.biases):
      a = sigmoid(np.matmul(w, a) + b)
    return a

  def backprop(self, train_input, train_label): # uses batch (matrix) inputs of shape (mini_batch_size, x, y)
    dCdw = [0]*len(self.weights)
    dCdb = [0]*len(self.biases)
    activation = train_input
    activations = [train_input]
    zs = []
    b_matrix = [np.array([b for i in range(len(activation))]) for b in self.biases]
    for w, b in zip(self.weights, b_matrix): # forward pass
      z = np.matmul(w, activation) + b
      activation = sigmoid(z)
      zs.append(z)
      activations.append(activation)
    delta = cost_derivative(activations[-1], train_label) * sigmoid_prime(zs[-1])
    dCdw[-1] = np.matmul(delta, np.einsum("ijk-> ikj", activations[-2]))
    dCdb[-1] = delta
    for i in range(2, self.num_layers):
      delta = np.matmul(self.weights[-i+1].transpose(), delta) * sigmoid_prime(zs[-i])
      dCdw[-i] = np.matmul(delta, np.einsum("ijk -> ikj", activations[-(i+1)]))
      dCdb[-i] = delta
    sum_dCdw = [np.sum(nw, axis = 0) for nw in dCdw]
    sum_dCdb = [np.sum(nb, axis = 0) for nb in dCdb]
    return sum_dCdw, sum_dCdb
    
  def update_mini_batch(self, mini_batch, lr): # where mini_batch is a size (2,) tuple, (train_input, train_label) <- batches
    train_input, train_label = mini_batch
    dCdw, dCdb = self.backprop(train_input, train_label)
    self.weights = [w-(lr/len(train_input))*nw for w, nw in zip(self.weights, dCdw)]
    self.biases = [b-(lr/len(train_input))*nb for b, nb in zip(self.biases, dCdb)]

  def SGD(self, train_data, mini_batch_size, epochs, lr): 
    """where train_data is a list of tuples (train_data, train_label) ||indivdual inputs, NOT matrices||"""
    for i in range(epochs):
      random.shuffle(train_data)
      mini_batches = [zip(*train_data[i:i+mini_batch_size]) for i in range(0, len(train_data), mini_batch_size)]
      for mini_batch in mini_batches:
        self.update_mini_batch(mini_batch, lr)
      print(f"Epoch {i + 1} Complete")

  def evaluate(self, test_data):
    test_results = [(np.argmax(self.feedforward(x)), y) for x, y in test_data]
    return sum(int(x == y) for x, y in test_results)

def to_one_hot(data, max):
  output = []
  for index in data:
    one_hot = np.zeros((max, 1))
    one_hot[index][0] = 1
    output.append(one_hot)
  return np.array(output)

# Import and Process Data

In [23]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = np.array([x.reshape(-1, 1)/255 for x in x_train])
x_test = np.array([x.reshape(-1, 1)/255 for x in x_test])
y_train = to_one_hot(y_train, 10)

train_batch = [(x, y) for x, y in zip(x_train, y_train)]
test_batch = [(x, y) for x, y in zip(x_test, y_test)]

# Network Testing


In [None]:
x = Network([784, 50, 10])
x.SGD(train_batch, 30, 10, 3.0)

In [29]:
x.evaluate(test_batch)

9403