In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from keras.utils import to_categorical
from keras.datasets import cifar10
import random

In [2]:
class mulNode():
  def forward(self,x,y):
    self.x = np.squeeze(np.array(x))
    self.y = np.squeeze(np.array(y))
    z = np.matmul(self.x,self.y)
    return z
  def backward(self,dz):
    dx = dz.dot(self.y.T)
    dy = self.x.T.dot(dz)
    return [dx,dy]

class addNode():
  def forward(self,x,y):
    # print(x.shape,y.shape)
    self.x = np.squeeze(np.array(x))
    self.y = np.squeeze(np.array(y))
    z = np.add(self.x,self.y)
    return z
  def backward(self,dz):
    dx = 1*dz
    dy = 1*dz
    return [dx,dy]

class sigmoidNode():
  def forward(self,x):
    self.x = x
    self.h = 1/(1+np.exp(-x))
    return self.h
  def backward(self,dz):
    dx = dz*self.h*(1-self.h)
    return dx
  

class softmaxNode():
  def forward(self,x):
    self.x = x
    self.z = np.exp(self.x).T/(np.sum(np.exp(self.x),axis = 1))
    return self.z.T
  def backward(self,dldh):
      dldx = []
      h = self.z.T
      for i in range(len(h)): #For each of the m training examples
          s = h[i]
          softmax = s.reshape(-1,1)
          #Create a 3x3 softmax derivative matrix
          d = np.diagflat(s) - np.dot(softmax,softmax.T)
          #dldh * dhdx for this instance
          dldx.append(dldh[i].dot(d))
      dldx = np.array(dldx) #a mx3 matrix of derivative
      return dldx 

class categoricalLossNode():
  def forward(self,y,y_hat):
    self.y = y
    self.y_hat = y_hat
    z = -np.sum(np.log(y_hat[np.where(y == 1)]))
    return z
  def backward(self):
    dz = 1* (self.y_hat - self.y )
    return dz


In [3]:
class computation_graph():
  def __init__(self,x,y,hidden):
    # self.x  = x
    # self.y = y
    self.hidden = hidden
    self.l = categoricalLossNode()
    self.softmax = softmaxNode()
    self.w1 = np.ones((x.shape[1],hidden))*0.01
    self.b1 = np.ones((1,hidden))*0.01
    self.multiply1  = mulNode()
    self.addition1 = addNode()
    self.sigmoid1  = sigmoidNode()

    self.w2 = np.ones((hidden,hidden))*0.01
    self.b2 = np.ones((1,hidden))*0.01
    self.multiply2  = mulNode()
    self.addition2 = addNode()
    self.sigmoid2  = sigmoidNode()

    self.w3 = np.ones((hidden,y.shape[1]))*0.01
    self.b3 = np.ones((1,y.shape[1]))*0.01
    self.multiply3  = mulNode()
    self.addition3 = addNode()
  
  def forward(self,x,y):
    self.x = x
    self.y = y
    m = self.multiply1.forward(self.x,self.w1)
    add = self.addition1.forward(m,self.b1)
    first_out = self.sigmoid1.forward(add)

    m_s = self.multiply2.forward(first_out,self.w2)
    add_s = self.addition2.forward(m_s,self.b2)
    second_out = self.sigmoid2.forward(add_s)

    m_l = self.multiply3.forward(second_out,self.w3)
    add_l = self.addition3.forward(m_l,self.b3)
    pred = self.softmax.forward(add_l)

    loss = self.l.forward(self.y,pred)

    return loss,pred
  
  def backward(self):
    self.bias = []
    self.grad = []
    loss_back = self.l.backward()
    ds = self.softmax.backward(loss_back)
    bias_last = self.addition3.backward(ds)
    grad_last = self.multiply3.backward(bias_last[0])
    self.bias.append(bias_last[0])
    self.grad.append(grad_last[1])

    sig = self.sigmoid2.backward(grad_last[0])
    bias_hs = self.addition2.backward(sig)
    grad_hs = self.multiply2.backward(bias_hs[0])
    self.bias.append(bias_hs[0])
    self.grad.append(grad_hs[1])

    sig = self.sigmoid1.backward(grad_hs[0])
    bias_hf = self.addition1.backward(sig)
    grad_hf = self.multiply1.backward(bias_hf[0])
    self.bias.append(bias_hf[0])
    self.grad.append(grad_hf[1])

    return self.bias,self.grad

  def next_batch(self,x,y,batch_size):
    #returns the data splits equal to batch
    for i in range(0,x.shape[0],batch_size):
      yield (x[i:i+batch_size],y[i:i+batch_size])

  def fit(self,x,y,epochs,learning_rate = 0.01,eta = 0.01,batch_size = 5):
    epoch_loss = []
    epoch_accuracy = []
    self.batch_size = batch_size
    i = 1
    it = epochs
    eps0 = learning_rate
    eps_final = eps0/100
    decay_rates = [((1-(i+1)/it)*eps0 + ((i+1)/it)*eps_final) for i in range(it)]
    for i in range(epochs):
      batch_loss = []
      batch_accuracy = []
      learning_rate = decay_rates[i]
      for batch_x,batch_y in self.next_batch(x,y,self.batch_size):
          l,y_hat = cg.forward(batch_x,batch_y)
          bias,grad = cg.backward()

          batch_loss.append(l)
          batch_accuracy.append(accuracy_score(np.argmax(batch_y,axis=1),np.argmax(y_hat,axis = 1)))

          #updating bias
          self.b1 = self.b1 - learning_rate*bias[2]
          self.b2 = self.b2 - learning_rate*bias[1]
          self.b3 = self.b3 - learning_rate*bias[0]

          #updating weights 
          self.w1 = self.w1 - learning_rate*grad[2]
          self.w2 = self.w2 - learning_rate*grad[1]
          self.w3 = self.w3 - learning_rate*grad[0]
     
      epoch_loss.append(np.mean(batch_loss))
      epoch_accuracy.append(np.mean(batch_accuracy))
      print("Epoch :{} Loss :{} accuracy :{}".format(i,epoch_loss[i],epoch_accuracy[i]))

  def evaluate(self,x,y):
    batch_loss = []
    batch_accuracy = []
    for batch_x,batch_y in self.next_batch(x,y,self.batch_size):
      loss,y_hat = cg.forward(batch_x,batch_y)
      accuracy = accuracy_score(np.argmax(batch_y,axis=1),np.argmax(y_hat,axis = 1))
      batch_loss.append(loss)
      batch_accuracy.append(accuracy)
    print("loss:{} accuracy:{}".format(np.mean(batch_loss),np.mean(batch_accuracy)))
  
  def get_values(self):
    return (self.w1,self.w2,self.w3,self.b1,self.b2,self.b3)
  


In [4]:
def load_iris_data():
  cols = ['sepal_length','sepal_width','petal_length','petal_width','class']
  iris_df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data',header=None)
  iris_df.columns = cols
  x_train,x_test,y_train,y_test = train_test_split(iris_df.iloc[:,:-1],iris_df['class'],test_size = 0.30,random_state = 10)

  return x_train,x_test,to_categorical(np.asarray(y_train.factorize()[0])),to_categorical(np.asarray(y_test.factorize()[0]))

def load_spam_data():
  cols = ['word_freq_make','word_freq_address','word_freq_all','word_freq_3d','word_freq_our','word_freq_over','word_freq_remove',
  'word_freq_internet','word_freq_order','word_freq_mail','word_freq_receive','word_freq_will','word_freq_people','word_freq_report',
  'word_freq_addresses','word_freq_free','word_freq_business','word_freq_email','word_freq_you','word_freq_credit','word_freq_your',
  'word_freq_font','word_freq_000','word_freq_money','word_freq_hp','word_freq_hpl','word_freq_george','word_freq_650','word_freq_lab',
  'word_freq_labs','word_freq_telnet','word_freq_857','word_freq_data','word_freq_415','word_freq_85','word_freq_technology',
  'word_freq_1999','word_freq_parts','word_freq_pm','word_freq_direct','word_freq_cs','word_freq_meeting','word_freq_original',
  'word_freq_project','word_freq_re','word_freq_edu','word_freq_table','word_freq_conference','char_freq_;','char_freq_(','char_freq_[',
  'char_freq_!','char_freq_$','char_freq_#','capital_run_length_average','capital_run_length_longest','capital_run_length_total','class']
  spam_df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/spambase/spambase.data',header=None)
  spam_df.columns = cols
  x_train,x_test,y_train,y_test = train_test_split(spam_df.iloc[:,:-1],spam_df['class'],test_size = 0.30,random_state = 10)
  from sklearn.preprocessing import StandardScaler
  sc = StandardScaler()
  x_train = sc.fit_transform(x_train)
  x_test = sc.transform(x_test)

  return x_train,x_test[1:],to_categorical(y_train),to_categorical(y_test)[1:]

#Cifar Data
def extract_indices(sub_classes,y):
  ind = []
  for i in sub_classes:
    ind.append((y == i))
  return ind
random.seed(10)
sub_classes = random.sample(range(0,9),3)
def load_cifar_data():
  (x_train, y_train), (x_test, y_test) = cifar10.load_data()
  indices = extract_indices(sub_classes,y_train)
  sub_x_train = np.append(np.append(x_train[indices[0].reshape(x_train.shape[0])],(x_train[indices[1].reshape(x_train.shape[0])]),axis=0),x_train[indices[0].reshape(x_train.shape[0])],axis = 0)
  sub_y_train = np.append(np.append(y_train[indices[0]],y_train[indices[1]],axis=0),y_train[indices[2]],axis = 0)
  indices = extract_indices(sub_classes,y_test)
  sub_x_test = np.append(np.append(x_test[indices[0].reshape(x_test.shape[0])],(x_test[indices[1].reshape(x_test.shape[0])]),axis=0),x_test[indices[0].reshape(x_test.shape[0])],axis = 0)
  sub_y_test = np.append(np.append(y_test[indices[0]],y_test[indices[1]],axis=0),y_test[indices[2]],axis = 0)

  x_train = sub_x_train/255
  x_test = sub_x_test/255
  y_train = to_categorical(sub_y_train,num_classes=10)
  y_test = to_categorical(sub_y_test,num_classes=10)
  return (x_train,x_test,y_train,y_test)

In [5]:
x_train,x_test,y_train,y_test = load_iris_data()

In [6]:
cg = computation_graph(x_train,y_train,64)
cg.fit(x_train,y_train,10,learning_rate=0.001)
cg.evaluate(x_test,y_test)

Epoch :0 Loss :5.499128750548144 accuracy :0.2857142857142858
Epoch :1 Loss :5.495734811651935 accuracy :0.3428571428571429
Epoch :2 Loss :5.493051938916548 accuracy :0.35238095238095246
Epoch :3 Loss :5.49081574995295 accuracy :0.36190476190476195
Epoch :4 Loss :5.4888931037224165 accuracy :0.38095238095238093
Epoch :5 Loss :5.487214192298916 accuracy :0.39047619047619053
Epoch :6 Loss :5.485741158365049 accuracy :0.4000000000000001
Epoch :7 Loss :5.484452962185308 accuracy :0.4000000000000001
Epoch :8 Loss :5.483337827900071 accuracy :0.41904761904761917
Epoch :9 Loss :5.482389384909998 accuracy :0.4571428571428572
loss:5.4845968138636545 accuracy:0.4444444444444444


In [7]:
x_train,x_test,y_train,y_test = load_spam_data()

In [8]:
cg = computation_graph(x_train,y_train,512)
cg.fit(x_train,y_train,10,learning_rate=0.001)
cg.evaluate(x_test,y_test)

Epoch :0 Loss :3.5076189209689095 accuracy :0.5795031055900621
Epoch :1 Loss :3.4861751441017246 accuracy :0.584472049689441
Epoch :2 Loss :3.4657666911315226 accuracy :0.584472049689441
Epoch :3 Loss :3.446377914273318 accuracy :0.584472049689441
Epoch :4 Loss :3.427928042569083 accuracy :0.5894409937888199
Epoch :5 Loss :3.410309863505084 accuracy :0.5975155279503105
Epoch :6 Loss :3.393393120078283 accuracy :0.6043478260869565
Epoch :7 Loss :3.3770499652236596 accuracy :0.6090062111801242
Epoch :8 Loss :3.3612140882601578 accuracy :0.6108695652173912
Epoch :9 Loss :3.345288716698243 accuracy :0.6108695652173912
loss:3.381793463270455 accuracy:0.594927536231884


In [9]:
x_train,x_test,y_train,y_test = load_cifar_data()
x_train = np.reshape(x_train,(x_train.shape[0],3072))
x_test = np.reshape(x_test,(x_test.shape[0],3072))

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


In [10]:
cg = computation_graph(x_train,y_train,512)
cg.fit(x_train,y_train,5,learning_rate=0.001)
cg.evaluate(x_test,y_test)

Epoch :0 Loss :0.7942177801425812 accuracy :0.9753333333333334
Epoch :1 Loss :1.0759865301182425 accuracy :0.961
Epoch :2 Loss :1.1874192408746134 accuracy :0.9536666666666667
Epoch :3 Loss :1.5621869219943079 accuracy :0.9333333333333333
Epoch :4 Loss :9.134605116700627 accuracy :0.5
loss:10.03025123391114 accuracy:0.3333333333333333
