In [1]:
import numpy as np


In [2]:
def sigmoid(x):
  # Sigmoid activation function: f(x) = 1 / (1 + e^(-x))
  return 1 / (1 + np.exp(-x))

def deriv_sigmoid(x):
  # Derivative of sigmoid: f'(x) = f(x) * (1 - f(x))
  fx = sigmoid(x)
  return fx * (1 - fx)

def mse_loss(y_true, y_pred):
  # y_true and y_pred are numpy arrays of the same length.
  return ((y_true - y_pred) ** 2).mean()


In [3]:


class OurNeuralNetwork:
  '''
  A neural network with:
    - 2 inputs
    - a hidden layer with 2 neurons (h1, h2)
    - an output layer with 1 neuron (o1)
  *** DISCLAIMER ***:
  The code below is intended to be simple and educational, NOT optimal.
  Real neural net code looks nothing like this. DO NOT use this code.
  Instead, read/run it to understand how this specific network works.
  '''
  def __init__(self):
    # Weights
    self.w1 = np.random.normal()
    self.w2 = np.random.normal()
    self.w3 = np.random.normal()
    self.w4 = np.random.normal()
    self.w5 = np.random.normal()
    self.w6 = np.random.normal()

    # Biases
    self.b1 = np.random.normal()
    self.b2 = np.random.normal()
    self.b3 = np.random.normal()

  def feedforward(self, x):
    # x is a numpy array with 2 elements.
    h1 = sigmoid(self.w1 * x[0] + self.w2 * x[1] + self.b1)
    h2 = sigmoid(self.w3 * x[0] + self.w4 * x[1] + self.b2)
    o1 = sigmoid(self.w5 * h1 + self.w6 * h2 + self.b3)
    return o1

  def train(self, data, all_y_trues):
    '''
    - data is a (n x 2) numpy array, n = # of samples in the dataset.
    - all_y_trues is a numpy array with n elements.
      Elements in all_y_trues correspond to those in data.
    '''
    learn_rate = 0.1
    epochs = 1000 # number of times to loop through the entire dataset

    for epoch in range(epochs):
      for x, y_true in zip(data, all_y_trues):
        # --- Do a feedforward (we'll need these values later)
        sum_h1 = self.w1 * x[0] + self.w2 * x[1] + self.b1
        h1 = sigmoid(sum_h1)

        sum_h2 = self.w3 * x[0] + self.w4 * x[1] + self.b2
        h2 = sigmoid(sum_h2)

        sum_o1 = self.w5 * h1 + self.w6 * h2 + self.b3
        o1 = sigmoid(sum_o1)
        y_pred = o1

        # --- Calculate partial derivatives.
        # --- Naming: d_L_d_w1 represents "partial L / partial w1"
        d_L_d_ypred = -2 * (y_true - y_pred)

        # Neuron o1
        d_ypred_d_w5 = h1 * deriv_sigmoid(sum_o1)
        d_ypred_d_w6 = h2 * deriv_sigmoid(sum_o1)
        d_ypred_d_b3 = deriv_sigmoid(sum_o1)

        d_ypred_d_h1 = self.w5 * deriv_sigmoid(sum_o1)
        d_ypred_d_h2 = self.w6 * deriv_sigmoid(sum_o1)

        # Neuron h1
        d_h1_d_w1 = x[0] * deriv_sigmoid(sum_h1)
        d_h1_d_w2 = x[1] * deriv_sigmoid(sum_h1)
        d_h1_d_b1 = deriv_sigmoid(sum_h1)

        # Neuron h2
        d_h2_d_w3 = x[0] * deriv_sigmoid(sum_h2)
        d_h2_d_w4 = x[1] * deriv_sigmoid(sum_h2)
        d_h2_d_b2 = deriv_sigmoid(sum_h2)

        # --- Update weights and biases
        # Neuron h1
        self.w1 -= learn_rate * d_L_d_ypred * d_ypred_d_h1 * d_h1_d_w1
        self.w2 -= learn_rate * d_L_d_ypred * d_ypred_d_h1 * d_h1_d_w2
        self.b1 -= learn_rate * d_L_d_ypred * d_ypred_d_h1 * d_h1_d_b1

        # Neuron h2
        self.w3 -= learn_rate * d_L_d_ypred * d_ypred_d_h2 * d_h2_d_w3
        self.w4 -= learn_rate * d_L_d_ypred * d_ypred_d_h2 * d_h2_d_w4
        self.b2 -= learn_rate * d_L_d_ypred * d_ypred_d_h2 * d_h2_d_b2

        # Neuron o1
        self.w5 -= learn_rate * d_L_d_ypred * d_ypred_d_w5
        self.w6 -= learn_rate * d_L_d_ypred * d_ypred_d_w6
        self.b3 -= learn_rate * d_L_d_ypred * d_ypred_d_b3

      # --- Calculate total loss at the end of each epoch
      if epoch % 10 == 0:
        y_preds = np.apply_along_axis(self.feedforward, 1, data)
        loss = mse_loss(all_y_trues, y_preds)
        print("Epoch %d loss: %.3f" % (epoch, loss))



In [None]:
# Define dataset
data = np.array([
  [-2, -1],  # Alice
  [25, 6],   # Bob
  [17, 4],   # Charlie
  [-15, -6], # Diana
])
all_y_trues = np.array([
  1, # Alice
  0, # Bob
  0, # Charlie
  1, # Diana
])

# Train our neural network!
network = OurNeuralNetwork()
network.train(data, all_y_trues)

# Make some predictions
emily = np.array([-7, -3]) # 128 pounds, 63 inches
frank = np.array([20, 2])  # 155 pounds, 68 inches
print("Emily: %.3f" % network.feedforward(emily)) # 0.951 - F
print("Frank: %.3f" % network.feedforward(frank)) # 0.039 - M

In [4]:
import pandas as pd
import numpy as np
import h5py
from scipy.special import xlogy

from sklearn.metrics import accuracy_score

In [5]:
def sigmoid(Z):
  
  """Applies sigmoid function to an array/value

    Arguments
    ---------
    Z: float/int/array_like
      Original Value

    Returns
    -------
    A: same shape as input
      Value after applying sigmoid function
  """
  return 1/(1+np.power(np.e, -Z))

In [6]:
def sigmoid_prime(Z):
  
  """Applies differentiation of sigmoid function to an array/value

    Arguments
    ---------
    Z: float/int/array_like
      Original Value

    Returns
    -------
    A: same shape as input
      Value after applying diff of sigmoid function
  """
  return (1-np.power(Z, 2))

In [7]:
def initialize_nn(X):

  """Initializes random weights and bias

  Arguments
  ---------
  X: array-like
    Train Dataset

  Returns
  -------
  dict
    Contains the randomly initialized weights and bias arrays where-
    shape(weights) = (X.Shape[0], 1)
    bias = float value

    The keys for weights and bias arrays in the dict is 'w' and 'b'
  """

  np.random.seed(999)

  w = np.random.randn(X.shape[1], 1) * 0.01
  b = 0

  return {'w': w, 'b': b}

In [8]:
def forward_prop(X, params):

  """Performs forward propagation and calculates output value

    Arguments
    ---------
    X: array_like
      Data
    params: dictionary
      Parameter dict contaning 'w' and 'b'

    Returns
    -------
    dict
      Dictionary contaning 'z' and 'a'
  """

  w = params['w']
  b = params['b']

  z = np.dot(X, w) + b
  a = sigmoid(z)
  
  return {'z': z, 'a': a}

In [9]:
def backward_prop(X, y, cache):

  """Performs backward propagation and calculates dw and db

    Arguments
    ---------
    X: array_like
      Data
    y: array_like
      True labels
    cache: dictionary
      Dictionary containing 'z' and 'a'

    Returns
    -------
    dict
      Dictionary containing gradients 'dz', 'dw' and 'db'
  """

  z = cache['z']
  a = cache['a']
  m = X.shape[0]

  dz = a - y
  dw = (1./m)*np.dot(X.T, dz)
  db = (1./m)*np.sum(dz)

  return {'dz': dz, 'dw': dw, 'db': db}

In [10]:
def update_weights(params, changes, learning_rate=0.01):

  """Updates weights of the layers

    Arguments
    ---------
    params: dict
      Dictionary containing 'w' and 'b'
    changes: dict
      Dictionary containing 'dw' and 'db'
    learning_rate: int, float
      Learning rate for the weight update

    Returns
    -------
    dict
      Dictionary containing updated weights and biases

      The keys for weights and bias arrays in the dict is 'w' and 'b'
  """

  w = params['w']
  b = params['b']
  dw = changes['dw']
  db = changes['db']

  w -= learning_rate*dw
  b -= learning_rate*db

  return {'w': w, 'b': b}

In [11]:
def calculate_loss(cache, y):

  """Calculate the entropy loss

    Arguments
    ---------
    cache: dict
      Dictionary contaning 'z' and 'a'
      y: array-like
        True lables

    Returns
    -------
    loss: float
      Entropy loss
  """

  a = cache['a']
  m = y.shape[0]

  return -1/m*np.sum(xlogy(y, a) + xlogy(1-y, 1-a))

In [12]:

epochs = 1000
learning_rate = 5e-3

In [None]:
params = initialize_nn(X_train)

for i in range(epochs):
  cache = forward_prop(X_train, params)
  loss = calculate_loss(cache, y_train)
  updates = backward_prop(X_train, y_train, cache)
  params = update_weights(params, updates, learning_rate=learning_rate)

  if i%(epochs/10) == 0:
    print('Epoch: {}\tLoss:{:.5f}'.format(i, loss), end='')
    train_cache = np.where(cache['a']>0.5, 1, 0)
    print('\tTraining accuracy:{:.5f}'.format(accuracy_score(y_train, train_cache)), end='')
    test_cache = forward_prop(X_test, params)['a']
    test_cache = np.where(test_cache>=0.5, 1, 0)
    print('\tTesting accuracy:{:.5f}'.format(accuracy_score(y_test, test_cache)))