In [None]:
import numpy as np
from random import random

class MLP(object):
  def __init__(self, num_inputs = 3, hidden_layers = [3, 3], num_outputs = 2):
    self.num_inputs = num_inputs
    self.hidden_layers = hidden_layers
    self.num_outputs = num_outputs

    layers = [num_inputs] + hidden_layers + [num_outputs]

    weights = []
    for i in range(len(layers)-1):
      w = np.random.rand(layers[i], layers[i+1])
      weights.append(w)
    self.weights = weights

    activations = []
    for i in range(len(layers)):
      a = np.zeros(layers[i])
      activations.append(a)
    self.activations = activations

    derivatives = []
    for i in range(len(layers)-1):
      a = np.zeros((layers[i], layers[i+1]))
      derivatives.append(a)
    self.derivatives = derivatives
  
  def forward_propagate(self, inputs):
    activations = inputs
    self.activations[0] = inputs 

    for i, w in enumerate(self.weights):
      net_inputs = np.dot(activations, w)
      activations = self._sigmoid(net_inputs)
      self.activations[i+1] = activations
    
    return activations
  
  def back_propagate(self, error):
    # dE/dW_i = (y - a_[i+1]) s'(h_[i+1]) a_i
    # s'(h_[i+1]) = s(h_[i+1])(1 - s(h_[i+1]))
    # s(h_[i+1]) = a_[i+1]

    # dE/dW_[i-1] = (y - a_[i+1]) s'(h_[i+1]) W_i s'(h_i) a_[i-1]

    for i in reversed(range(len(self.derivatives))):
      activations = self.activations[i+1]
      delta = error * self._sigmoid_derivative(activations) # ndarray([0.1, 0.2]) --> ndarray([[0.1, 0.2]]) row vector/matrix
      delta_reshaped = delta.reshape(delta.shape[0], -1).T
      current_activations = self.activations[i] # ndarray([0.1, 0.2]) --> ndarray([[0.1], [0.2]]) column vector/matrix
      current_activations_reshaped = current_activations.reshape(current_activations.shape[0], -1)
      self.derivatives[i] = np.dot(current_activations_reshaped, delta_reshaped)
      error = np.dot(delta, self.weights[i].T)
      
  
  def gradient_descent(self, learning_rate):
    for i in range(len(self.weights)):
      weights = self.weights[i]
      derivatives = self.derivatives[i]
      weights += derivatives * learning_rate # because we took error = target - output instead of error = output - target
  
  def train(self, inputs, targets, epochs, learning_rate):
    for i in range(epochs):
      sum_error = 0
      for input, target in zip(inputs, targets):
        output = self.forward_propagate(input)
        error = target - output
        self.back_propagate(error)
        self.gradient_descent(learning_rate)
        sum_error += self._mse(target, output)
      print("Error = {} at epoch {}".format(sum_error / len(inputs), i))

  def _mse(self, target, output): # mean squared error
    return np.average((target - output) ** 2)

  def _sigmoid_derivative(self, x):
    return x * (1.0 - x)

  def _sigmoid(self, x):
    y = 1.0 / (1 + np.exp(-x))
    return y

if __name__ == "__main__":
  # dataset to train the network for the sum operation
  inputs = np.array([[random()/2 for _ in range(2)] for _ in range(1000)]) # array([[0.1, 0.2], [0.3, 0.4], ...])
  targets = np.array([[i[0] + i[1]] for i in inputs]) # array([[0.3], [0.7], ...])

  mlp = MLP(2, [5], 1)

  mlp.train(inputs, targets, 50, 0.1)

  #create dummy data
  input = np.array([0.3, 0.1])
  target = np.array([0.4])

  output = mlp.forward_propagate(input)
  print()
  print()
  print()
  print("The network believes that {} + {} = {}".format(input[0], input[1], output[0]))


Error = 0.04800690869025764 at epoch 0
Error = 0.04009613365274767 at epoch 1
Error = 0.03964365640985748 at epoch 2
Error = 0.039084306869239166 at epoch 3
Error = 0.03837558480185881 at epoch 4
Error = 0.03747044175871655 at epoch 5
Error = 0.0363200114644523 at epoch 6
Error = 0.03488007742837241 at epoch 7
Error = 0.03312175403480978 at epoch 8
Error = 0.03104473573906819 at epoch 9
Error = 0.028688156097397437 at epoch 10
Error = 0.026132094492431833 at epoch 11
Error = 0.023485564510967383 at epoch 12
Error = 0.02086435821267689 at epoch 13
Error = 0.01836846836609663 at epoch 14
Error = 0.016068106730461242 at epoch 15
Error = 0.014000969413574847 at epoch 16
Error = 0.012177563397447586 at epoch 17
Error = 0.01058966263101172 at epoch 18
Error = 0.009218307766583157 at epoch 19
Error = 0.008039804247573274 at epoch 20
Error = 0.00702957248216866 at epoch 21
Error = 0.006164303413789147 at epoch 22
Error = 0.005422975258328644 at epoch 23
Error = 0.004787185712272982 at epoch 24