In [42]:
import numpy as np
import pandas as pd
import random
import matplotlib.pyplot as plt
import warnings
from sklearn import model_selection
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import ElasticNet
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from math import sqrt
from csv import reader as rdr
from sklearn.model_selection import GridSearchCV
from collections import Counter
import pickle
import random
import seaborn as sns
from PIL import Image
from copy import deepcopy
from sklearn.manifold import TSNE
warnings.filterwarnings( "ignore" )

In [117]:
class ActivationFunctions:
  def relu(self, X):
    return X * (X>=0)

  def relu_gradient(self, X):
    return 1*(X>=0)

  def leaky_relu(self, X):
    return X * ((X>=0)+((X!=0)*0.01))
    
  def leaky_relu_gradient(self, X):
    return 1 * ((X>=0)+((X!=0)*0.01))

  def sigmoid(self, X):
    x_calc= 1/(1+np.exp(-X))
    return x_calc

  def sigmoid_gradient(self, X):
    sig=self.sigmoid(X)
    x_calc=sig*(1-sig)
    return x_calc

  def linear(self, X):
    x_calc=X
    return x_calc

  def linear_gradient(self, X):
    x_calc=np.ones(X.shape)
    return x_calc

  def tanh(self, X):
    x_calc=np.tanh(X)
    return x_calc

  def tanh_gradient(self, X):
    tanh=self.tanh(X)
    x_calc=1-tanh**2
    return x_calc

  def softmax(self, X):
    expo = np.exp(X)
    x_calc=expo/expo.sum(axis=1, keepdims = True)
    return x_calc

  def softmax_gradient(self, X):
    return self.sigmoid(X)

  def linear(self, X):
      x_calc=X
      return x_calc
  def linear_grad(self, X):
      x_calc=np.ones(X.shape)
      return x_calc
  
  def getActivation(self,X,a_function):
    if(a_function == 'relu'):
      return self.relu(X)
    elif(a_function == 'leaky_relu'):
      return self.leaky_relu(X)
    elif(a_function == 'sigmoid'):
      return self.sigmoid(X)
    elif(a_function == 'tanh'):
      return self.tanh(X)
    elif(a_function == 'softmax'):
      return self.softmax(X)
  
  def getGradientActivation(self,X,a_function):
    if(a_function == 'relu'):
      return self.relu_gradient(X)
    elif(a_function == 'leaky_relu'):
      return self.leaky_relu_gradient(X)
    elif(a_function == 'sigmoid'):
      return self.sigmoid_gradient(X)
    elif(a_function == 'tanh'):
      return self.tanh_gradient(X)
    elif(a_function == 'softmax'):
      return self.softmax_gradient(X)

class Neuron:
  def zeroes_initiation(self,p_size):
    return np.zeros(p_size)
  def random_initiation(self,p_size):
    return np.random.rand(p_size[0],p_size[1])*0.01
  def normal_initiation(self,p_size):
    return np.random.normal(0,1,size = p_size)*0.01

class MultiLayerPerceptron():
  def __init__(self, n_layers, layer_sizes, activation, learning_rate, weight_init, batch_size, num_epochs):
      self.n_layers=n_layers
      self.layer_sizes=layer_sizes 
      self.activation=activation 
      self.learning_rate=learning_rate 
      self.weight_init=weight_init.lower()
      self.batch_size=batch_size
      self.num_epochs=num_epochs
      self.train_error = []
      self.test_error = []
      self.act = None
      self.preact = None

      self.activation_functions = ActivationFunctions()
      self.neuron_intialization = Neuron()
      
      self.weights={}
      self.bias={}

      for i in range(self.n_layers-1):
        if(self.weight_init == 'zero'):
          self.weights[i] = np.array(self.neuron_intialization.zeroes_initiation((self.layer_sizes[i],self.layer_sizes[i+1])))
        if(self.weight_init == 'random'):
          self.weights[i] = np.array(self.neuron_intialization.random_initiation((self.layer_sizes[i],self.layer_sizes[i+1])))
        if(self.weight_init == 'normal'):
          self.weights[i] = np.array(self.neuron_intialization.normal_initiation((self.layer_sizes[i],self.layer_sizes[i+1])))
        self.bias[i]=np.zeros(self.layer_sizes[i+1])

  def cross_entropy_loss(self, A, y):
    n = len(y)
    logp = - np.log(A[np.arange(n), y.argmax(axis=1)])
    loss = np.sum(logp)/n
    return loss

  def gradient_decent(self,derivatives,X_sample,act,layer):
    grad=act[layer-1].T.dot(derivatives[layer])/len(X_sample)
    self.weights[layer]=self.weights[layer]-self.learning_rate*grad
    self.bias[layer]=self.bias[layer]-self.learning_rate*np.sum(derivatives[layer],axis=0)/len(X_sample)

  def predict_x_test(self,x_test,act,preact,y_test,y_sample):
    y_test_pred=self.predict_proba(X_test)
    test_cost=self.cross_entropy_loss(y_test_pred,y_test)
    train_cost = self.cross_entropy_loss(act[self.n_layers-2],y_sample)
    self.train_error.append(train_cost)
    self.test_error.append(test_cost)
    self.act=act
    self.preact=preact

  def fit(self, X, y,X_test=None,y_test=None):

    epoch = 0
    while(epoch < self.num_epochs):
      for batch in range(0,X.shape[0],self.batch_size):
        X_sample=deepcopy(X[batch:batch+self.batch_size,:])
        y_sample=deepcopy(y[batch:batch+self.batch_size,:])
        input_data=deepcopy(X[batch:batch+self.batch_size,:])
        output_data=deepcopy(y[batch:batch+self.batch_size,:])

        act,preact = self.forward_propogation(input_data)
        derivatives = self.backward_propogation(output_data,act,preact)

        act[-1]=X_sample
        
        layer = 0
        while(layer < self.n_layers-1):
          self.gradient_decent(derivatives,X_sample,act,layer)
          layer += 1

      if((epoch+1)%5==0):
        train_cost = self.cross_entropy_loss(act[self.n_layers-2],y_sample)
        print("epoch",epoch,"\t",train_cost)
      if(X_test is not None):
        self.predict_x_test(X_test,act,preact,y_test,y_sample)
      epoch += 1
    return self

  def forward_propogation(self,input_data):
    preact={}
    act={}
    
    layer = 0
    while(layer < self.n_layers-2):
      hidden_output=input_data.dot(self.weights[layer])+self.bias[layer]
      hidden_output_A=self.activation_functions.getActivation(hidden_output,self.activation)

      input_data=hidden_output_A 
      preact[layer]=hidden_output  
      act[layer]=hidden_output_A
      layer += 1

    hidden_output=input_data.dot(self.weights[self.n_layers-2])+self.bias[self.n_layers-2]      
    preact[self.n_layers-2]=hidden_output 
    act[self.n_layers-2]=self.activation_functions.getActivation(hidden_output,'softmax')
    return act,preact

  def backward_propogation(self,y,act,preact):
    derivatives={}
    y_pred=act[self.n_layers-2]
    delta=y_pred-y
    derivatives[self.n_layers-2]=delta
    for layer in range(self.n_layers-3,-1,-1):
      error=delta.dot(self.weights[layer+1].T)
      derv=self.activation_functions.getGradientActivation(preact[layer],self.activation)
      delta=error*derv
      derivatives[layer]=delta
    return derivatives

  def predict_proba(self, X):
    y,_=self.forward_propogation(X)
    return y[self.n_layers-2]
  
  def predict(self, X):
    return self.predict_proba(X).argmax(axis = 1)
  
  def getErrorReports(self):
    return self.train_error,self.test_error
    

In [116]:
class Evaluation:
  # To get Confusion matrix
  def confusionMatrix(self, actual_output, predicted_output):
    true_positive = 0
    true_negatives = 0
    false_positives = 0
    false_negative = 0

    assert len(actual_output) == len(predicted_output)
    i = 0
    while(i < len(actual_output)):
      if((actual_output[i] == 1) and (predicted_output[i] == 1)):
        true_positive += 1
      if((actual_output[i] == 1) and (predicted_output[i] == 0)):
        true_negatives += 1
      if((actual_output[i] == 0) and (predicted_output[i] == 1)):
        false_positives += 1
      if((actual_output[i] == 0) and (predicted_output[i] == 0)):
        false_negative += 1
      i += 1
    
    return true_positive,true_negatives,false_positives,false_negative

  # To get accuracy score
  def accuracyScore(self, true_positive, true_negatives, false_positives, false_negative):
    return (true_positive + true_negatives) / (true_positive + true_negatives + false_positives + false_negative)
  
  def accuracy2(self,actual_output,predicted_output):
    assert len(actual_output) == len(predicted_output)
    correctPred = 0
    totalPred = 0
    for i in range(0,len(actual_output)):
      if(int(actual_output[i]) == int(predicted_output[i])):
        correctPred += 1
      totalPred += 1
    return correctPred/totalPred

  # To get recall score
  def recallScore(self, true_positive, true_negatives, false_positives, false_negative):
    result = 0
    try:
      result = (true_positive) / (true_positive + false_negative)
      return result
    except ZeroDivisionError:
      print("ZeroDivisionError")

  # To get prcision Score
  def precisionScore(self, true_positive, true_negatives, false_positives, false_negative):
    result = 0
    try:
      result = (true_positive) / (true_positive + false_positives)
      return result
    except ZeroDivisionError:
      print("ZeroDivisionError")
  
  # To get F1 score
  def F1Score(self, true_positive, true_negatives, false_positives, false_negative):
    result = 0
    try:
      result = (2 * true_positive) / ((2 * true_positive) + false_positives + false_negative)
      return result
    except ZeroDivisionError:
      print("ZeroDivisionError")

In [71]:
def plot_graph(epochs,train_error,test_error,validate_error = None):
  plt.plot(range(epochs),train_error,label = "Training error")
  plt.plot(range(epochs),test_error,label = "Test error")
  if(validate_error is not None):
    plt.plot(range(epochs),validate_error,label = "Test error")
  plt.legend()
  plt.xlabel("Epochs")
  plt.ylabel("Error")
  plt.show()

In [118]:
def read_dataset(filename):
	df=pd.read_csv(filename)
	return df.iloc[:,1:],df.iloc[:,0]

def dataset_split(df):
  dataset_split = np.split(df,[int(0.7*len(df)),int(0.9*len(df))])
  return dataset_split[0],dataset_split[1],dataset_split[2]

def binary_encoding(set_np):
  finalArr=[]
  size = len(set_np)
  i = 0
  while(i<len(set_np)):
    cur =[]
    j = 0
    tmpLen = len(set_np[i])
    while(j < tmpLen):
      if set_np[i][j]<108:
        cur.append(0)
      else:
        cur.append(1)
      j+=1
    finalArr.append(cur)
    i+=1

  return finalArr

def preprocessing(filename):
  x_vals,y_vals = read_dataset(filename)
  y_matrix = np.zeros((y_vals.size,y_vals.max()+1))
  tmpArr = []
  j = 0
  while(j<y_vals.size):
    tmpArr.append(j)
    j+=1
  y_matrix [tmpArr,y_vals] = 1
  y_vals = y_matrix
  return x_vals,y_vals

x,y = preprocessing("mnist_train.csv")
x_tmp_train,x_tmp_test,x_tmp_validation = split_dataset(x)
y_train,y_test,y_validation = split_dataset(y)

x_train = binary_encoding(x_tmp_train.to_numpy())
x_test = binary_encoding(x_tmp_test.to_numpy())
x_validation = binary_encoding(x_tmp_validation.to_numpy())
scaler = StandardScaler()
x_train=scaler.fit_transform(x_train)
x_test=scaler.transform(x_test)
x_validation = scaler.transform(x_validation)

In [None]:
evaluation_metric = Evaluation()
a = MultiLayerPerceptron(6, [784, 256, 128, 64, 32, 10], 'relu', 0.08, 'normal', 3000, 150)
a.fit(x_train, y_train, x_test, y_test)
training_error,test_error = a.getErrorReports()
y_predicted = a.predict(x_test)

print(evaluation_metric.accuracy2(y_test.argmax(axis = 1),y_predicted))
plot_graph(150,training_error,test_error)

In [None]:
evaluation_metric = Evaluation()
a = MultiLayerPerceptron(6, [784, 256, 128, 64, 32, 10], 'sigmoid', 0.08, 'normal', 3000, 150)
a.fit(x_train, y_train, x_test, y_test)
training_error,test_error = a.getErrorReports()
y_predicted = a.predict(x_test)

print(evaluation_metric.accuracy2(y_test.argmax(axis = 1),y_predicted))
plot_graph(150,training_error,test_error)

In [None]:
evaluation_metric = Evaluation()
a = MultiLayerPerceptron(6, [784, 256, 128, 64, 32, 10], 'softmax', 0.08, 'normal', 3000, 150)
a.fit(x_train, y_train, x_test, y_test)
training_error,test_error = a.getErrorReports()
y_predicted = a.predict(x_test)

print(evaluation_metric.accuracy2(y_test.argmax(axis = 1),y_predicted))
plot_graph(150,training_error,test_error)

In [None]:
evaluation_metric = Evaluation()
a = MultiLayerPerceptron(6, [784, 256, 128, 64, 32, 10], 'leaky_relu', 0.08, 'normal', 3000, 150)
a.fit(x_train, y_train, x_test, y_test)
training_error,test_error = a.getErrorReports()
y_predicted = a.predict(x_test)

print(evaluation_metric.accuracy2(y_test.argmax(axis = 1),y_predicted))
plot_graph(150,training_error,test_error)

In [None]:
evaluation_metric = Evaluation()
a = MultiLayerPerceptron(6, [784, 256, 128, 64, 32, 10], 'tanh', 0.08, 'normal', 3000, 150)
a.fit(x_train, y_train, x_test, y_test)
training_error,test_error = a.getErrorReports()
y_predicted = a.predict(x_test)

print(evaluation_metric.accuracy2(y_test.argmax(axis = 1),y_predicted))
plot_graph(150,training_error,test_error)

In [None]:
clf = MLPClassifier(solver='lbfgs', alpha=0.08, hidden_layer_sizes=(256,128,64,32), random_state=1, activation = 'logistic')
clf.fit(x_train,y_train.argmax(axis=1))
clf.score(x_test,y_test.argmax(axis=1))

In [None]:
evaluation_metric = Evaluation()
a = MultiLayerPerceptron(6, [784, 256, 128, 64, 32, 10], 'relu', 0.001, 'normal', 3000, 150)
a.fit(x_train, y_train, x_test, y_test)
training_error,test_error = a.getErrorReports()
y_predicted = a.predict(x_test)

print(evaluation_metric.accuracy2(y_test.argmax(axis = 1),y_predicted))
plot_graph(150,training_error,test_error)

In [None]:
evaluation_metric = Evaluation()
a = MultiLayerPerceptron(6, [784, 256, 128, 64, 32, 10], 'relu', 0.01, 'normal', 3000, 150)
a.fit(x_train, y_train, x_test, y_test)
training_error,test_error = a.getErrorReports()
y_predicted = a.predict(x_test)

print(evaluation_metric.accuracy2(y_test.argmax(axis = 1),y_predicted))
plot_graph(150,training_error,test_error)

In [None]:
evaluation_metric = Evaluation()
a = MultiLayerPerceptron(6, [784, 256, 128, 64, 32, 10], 'relu', 0.1, 'normal', 3000, 150)
a.fit(x_train, y_train, x_test, y_test)
training_error,test_error = a.getErrorReports()
y_predicted = a.predict(x_test)

print(evaluation_metric.accuracy2(y_test.argmax(axis = 1),y_predicted))
plot_graph(150,training_error,test_error)

In [None]:
evaluation_metric = Evaluation()
a = MultiLayerPerceptron(6, [784, 256, 128, 64, 32, 10], 'relu', 1, 'normal', 3000, 150)
a.fit(x_train, y_train, x_test, y_test)
training_error,test_error = a.getErrorReports()
y_predicted = a.predict(x_test)

print(evaluation_metric.accuracy2(y_test.argmax(axis = 1),y_predicted))
plot_graph(150,training_error,test_error)