In [1]:
def sigmoid(x):
  return 1.0 / (1.0 + np.exp(-x))

def sigmoid_derivative(x):
  return np.multiply(x, (1 - x))

def loss(y, predicted):
    return np.mean((y - predicted)**2)
  
def xavier_initialization(i,o):
  a = np.sqrt(6.0/(i+o))
  return 2 * a * np.random.random((i,o)) - a

def xavier_initialization_hidden(i,o,h):
  a = np.sqrt(6.0/(i+o))
  return 2 * a * np.random.random((i,o,h)) - a

def add_bias(X):
  return np.concatenate((np.ones((np.shape(X)[0], 1)), X), axis=1)

class ANN:
  def __init__(self, h, s):
    self.h = h
    self.s = s
    self.input_synapse = np.empty((0,0))
    self.hidden_synapse = np.empty((0,0))
    self.output_synapse = np.empty((0,0))
  
  def fit(self, X, y, alpha, t):    
    h = self.h
    s = self.s
    X = add_bias(X)
    
    self.input_synapse = xavier_initialization(X.shape[1],s)
    self.hidden_synapse = xavier_initialization_hidden(s+1,s,h)
    self.output_synapse = xavier_initialization(s+1,y.shape[1])   
    
    #print(X.shape)
    #print(self.input_synapse.shape)
    #print(self.hidden_synapse[:,:,0].shape)
    #print(self.output_synapse.shape)
    #print(y.shape)
    
    for i in range(t):      
      input_layer = X
      hidden_layer = np.ones([input_layer.shape[0],s+1,h]) 
      hidden_layer[:,1:,0] = sigmoid(np.dot(input_layer,self.input_synapse))
            
      j = 1
      while(j < h):
        hidden_layer[:,1:,j] = sigmoid(np.dot(hidden_layer[:,:,j-1],self.hidden_synapse[:,:,j-1]))
        j += 1

      output_layer = sigmoid(np.dot(hidden_layer[:,:,h-1],self.output_synapse))

      #Back Propogation
      output_error = output_layer - y
      output_delta = output_error * sigmoid_derivative(output_layer)

      hidden_error = np.empty([input_layer.shape[0],s+1,h]) 
      hidden_delta = np.empty([input_layer.shape[0],s+1,h]) 

      hidden_error[:,:,h-1] = output_delta.dot(self.output_synapse.T)
      hidden_delta[:,:,h-1] = hidden_error[:,:,h-1] * sigmoid_derivative(hidden_layer[:,:,h-1])

      j -= 2
      while(j >= 0):
        hidden_error[:,:,j] = hidden_delta[:,1:,j+1].dot(self.hidden_synapse[:,:,j].T)
        hidden_delta[:,:,j] = hidden_error[:,:,j] * sigmoid_derivative(hidden_layer[:,:,j])
        j -= 1
      
      #input_error = hidden_delta[:,1:,0].dot(input_synapse.T)
      #input_delta = input_error * sigmoid_derivative(input_layer)
      
      self.input_synapse -= alpha * (input_layer.T.dot(hidden_delta[:,1:,0]))

      j += 1
      while(j < h-1):
        self.hidden_synapse[:,:,j] -= alpha * (hidden_layer[:,:,j].T.dot(hidden_delta[:,1:,j+1]))
        j += 1

      self.output_synapse -= alpha * (hidden_layer[:,:,h-1].T.dot(output_delta))

      if(i % 100 == 0): 
        print("Iterations: " + str(i)  + " Loss: " + str(loss(y,output_layer)))
  
  def predict(self, T):
    h = self.h
    s = self.s
    T = add_bias(T)
    
    input_layer = T
    hidden_layer = np.ones([input_layer.shape[0],s+1,h])
    hidden_layer[:,1:,0] = sigmoid(np.dot(input_layer,self.input_synapse))

    j = 1
    while(j < h):
      hidden_layer[:,1:,j] = sigmoid(np.dot(hidden_layer[:,:,j-1],self.hidden_synapse[:,:,j-1]))
      j += 1   
    
    output_layer = sigmoid(np.dot(hidden_layer[:,:,h-1],self.output_synapse))
    return output_layer
  
  def print(self):
    for w in self.input_synapse:
      print(w)
    for w in self.hidden_synapse:
      print(w)
    for w in self.output_synapse:
      print(w)

In [None]:
import numpy as np
from sklearn.preprocessing import StandardScaler
import pandas as pd
from math import floor

def calculate_error(t, v, h, s, iterations):
  X = np.array(t.iloc[0:,1:])
  X = StandardScaler().fit_transform(X)
  y = pd.get_dummies(t.iloc[0:,0]).as_matrix()
  T = np.array(v.iloc[0:,1:])
  y_actual = v.iloc[0:,0].as_matrix()
  
  ann = ANN(h, s)
  ann.fit(X, y, .0001, iterations)
  
  output_layer = ann.predict(T)  
  y_predicted = np.argmax(output_layer, axis=1)
  
  return 1 - np.sum(y_predicted==y_actual)/len(y_actual)

df = pd.read_csv('data/train.csv', sep=",",
                 header=0, quoting=0, low_memory=False)

partition = floor(len(df) / 3)
a = df[0:partition]
b = df[partition: 2 * partition + 1]
c = df[2 * partition + 1: 3 * partition + 2]

frames = [a, b]
t1 = pd.concat(frames)
v1 = c

frames = [b, c]
t2 = pd.concat(frames)
v2 = a

frames = [a, c]
t3 = pd.concat(frames)
v3 = b

h=1
s=50
iterations=11
e1 = calculate_error(t1, v1, h, s, iterations)
e2 = calculate_error(t2, v2, h, s, iterations)
e3 = calculate_error(t3, v3, h, s, iterations)
print("Error estimate from 3-fold validation is " + str(np.mean([e1,e2,e3])))



Iterations: 0 Loss: 0.260706672793


  from ipykernel import kernelapp as app


Iterations: 0 Loss: 0.212564160772
Iterations: 0 Loss: 0.265582945459
Error estimate from 3-fold validation is 0.468164751547


In [6]:
#X = np.array([[0,0,1],[0,1,1],[1,0,1],[0,0,0]])
#X = np.array([[0,1]])
#y = pd.get_dummies(np.array([0,1,1,1]).T).as_matrix()
#T = np.array([[1,1,1]])

