In [14]:
import numpy as np
from numpy import genfromtxt
np.random.seed(0)
import pandas as pd
from math import exp
from sklearn.model_selection import train_test_split

In [15]:
# read dataset
X = genfromtxt("train_data.csv", delimiter=',')
y = genfromtxt("train_labels.csv", delimiter=',')

In [16]:
#checking the balance the dataset
print('Number of samples in each class\n', y.sum(axis=0))

Number of samples in each class
 [5923. 6742. 5958. 6131.]


In [17]:
#Splitting the data into training and validation set in a ratio of 8:2
#Created validation set to test the model on unknown data
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size = 0.2)

In [18]:
#checking the balance the dataset
print('Number of samples in each class in y_train', y_train.sum(axis=0))
print('Number of samples in each class in y_val', y_val.sum(axis=0))

Number of samples in each class in y_train [4760. 5382. 4770. 4891.]
Number of samples in each class in y_val [1163. 1360. 1188. 1240.]


In [19]:
# finding out the number of samples and features in train dataset
(samples,features) = X_train.shape

In [20]:
#declaring the number of hidden layer neurons, classes, learning rate and number of epoch for the training

hiddenlyr_nodes = 35 
num_classes = 4
learning_rate = 0.0001
num_epoch  = 100

$Sigmoid Function = \frac{1}{1+e^x}$

S(x)	=	sigmoid function
e	=	Euler's number

In [21]:
# sigmoid function
def sigmoid_func(x):
    return 1 / (1 + np.exp(-x))
# derivative of sigmoid function
def sigmoid_drv(x):
    return sigmoid_func(x) * (1 - sigmoid_func(x))

Sigmoid Fuunction
$\sigma(\vec{z})_{i}=\frac{e^{z_{i}}}{\sum_{j=1}^{K} e^{z_{j}}}$

$\sigma$ = softmax
$\vec{z}$ =	input vector
$e^{z_{i}}$	=	standard exponential function for input vector
K	=	number of classes in the multi-class classifier
$e^{z_{j}}$	=	standard exponential function for output vector
$e^{z_{j}}$	=	standard exponential function for output vector


In [22]:
# softmax function
def softmax_func(x):
    val = np.exp(x) / np.exp(x).sum(axis=1, keepdims=True)      
    return val

In [23]:
# Convert predicted data into one hot encoding
def one_hot_enc(x):
    for i in range(0,len(x)):
      x[i,x[i,:].argmax()]=1
    out = (x == 1).astype(float)
    return out

In [24]:
# predicting the accuracy of the model
def accuracy(y_true, y_pred):
    if not (len(y_true) == len(y_pred)):
        print('Size of predicted and true labels not equal.')
        return 0.0

    corr = 0
    for i in range(0,len(y_true)):
        corr += 1 if (y_true[i] == y_pred[i]).all() else 0

    return corr/len(y_true)

# feed forward function
def fwd(inp_data, wt_hidlyr, bias_hidlyr,wt_outlyr,bias_outlyr):
    net_hidden = np.dot(inp_data, wt_hidlyr) + bias_hidlyr
    act_hidden = sigmoid_func(net_hidden)
    net_output = np.dot(act_hidden, wt_outlyr) + bias_outlyr
    act_output = softmax_func(net_output)
    return act_output, act_hidden, net_hidden

# backpropagation function
def bkd(X_train, y_train, net_hidden, act_hidden, weight_output, act_output):
    cf_netHid = act_output - y_train 
    grad_bias_out = cf_netHid
    grad_wt_out = np.dot(act_hidden.T, cf_netHid)
    cf_actHid = np.dot(cf_netHid, weight_output.T)
    grad_wt_hid = np.dot(X_train.T, sigmoid_drv(net_hidden) * cf_actHid)
    grad_bias_hid = cf_actHid * sigmoid_drv(net_hidden)
    return grad_wt_out, grad_bias_out, grad_wt_hid, grad_bias_hid

# Updating Weight
def update_weight(weight, cost):
    if cost.shape == (features, hiddenlyr_nodes) or cost.shape == (hiddenlyr_nodes, num_classes):
        weight = weight - learning_rate * cost
    elif cost.shape == (samples, hiddenlyr_nodes) or cost.shape == (samples, num_classes):
        weight = weight - learning_rate * cost.sum(axis=0)
    return weight


### Loss Function 
$MSE = \frac{1}{n} + \sum \limits _{i=1} ^{n} (Y_{i}-\hat{Y}_{i})^2 $

MSE	=	mean squared error
n	=	number of data points
$Y_{i}$ =	observed values
$\hat{Y}_{i}$ = predicted values

#### The MSE is great for ensuring that our trained model has no outlier predictions with huge errors, since the MSE puts larger weight on theses errors due to the squaring part of the function.

In [25]:
def cross_ent(original_label,predict_label):
    mse = np.square(original_label - predict_label)
    val = np.mean(mse)
    return val

In [26]:
# Weight initialization
weight_hidden = np.random.randn(features, hiddenlyr_nodes)
bias_hidden = np.random.randn(hiddenlyr_nodes)
weight_output = np.random.randn(hiddenlyr_nodes, num_classes)
bias_output = np.random.randn(num_classes)

In [27]:
error_per_epoch = list()
epoch = 0
while epoch < num_epoch:
  epoch+=1
  #forward propagation
  act_output, act_hidden, net_hidden = fwd(X_train, weight_hidden, bias_hidden, weight_output, bias_output)
  #backward propagation
  cost_wo, cost_bo, cost_wh, cost_bh = bkd(X_train, y_train, net_hidden, act_hidden, weight_output, act_output)
  #weight updating
  weight_hidden = update_weight(weight_hidden, cost_wh)
  bias_hidden = update_weight(bias_hidden, cost_bh)
  weight_output = update_weight(weight_output, cost_wo)
  bias_output = update_weight(bias_output, cost_bo)
  
  cal_loss = cross_ent(y_train,act_output)
  error_per_epoch.append(cal_loss)
  
  y_pred, _, _ = fwd(X_val, weight_hidden, bias_hidden, weight_output, bias_output)
  # One hot encoding the prediction
  y_pred_enc = one_hot_enc(y_pred)
  # calculating the accuracy
  ACC = accuracy(y_val,y_pred_enc)
  if epoch%10==0:
    print('epoch = ',epoch,'   ','Loss function value: ', cal_loss,'accuracy = ',ACC)

epoch =  10     Loss function value:  0.07782015133535355 accuracy =  0.8123611391638053
epoch =  20     Loss function value:  0.038782371286173976 accuracy =  0.8889113310442335
epoch =  30     Loss function value:  0.0327373254722133 accuracy =  0.9030498889113311
epoch =  40     Loss function value:  0.029280660098058603 accuracy =  0.9115330236315896
epoch =  50     Loss function value:  0.026876144030628303 accuracy =  0.918400323167037
epoch =  60     Loss function value:  0.025069403111138894 accuracy =  0.9238537669157746
epoch =  70     Loss function value:  0.023643780827894978 accuracy =  0.9284992930721067
epoch =  80     Loss function value:  0.022476938312714093 accuracy =  0.9325388810341345
epoch =  90     Loss function value:  0.021493507960131226 accuracy =  0.9359725308018582
epoch =  100     Loss function value:  0.02064508910771253 accuracy =  0.9375883659866694


In [28]:
y_pred

array([[1.13965178e-01, 6.95177561e-05, 1.00000000e+00, 2.17209706e-01],
       [3.12621456e-05, 1.00000000e+00, 6.67724574e-03, 3.57372858e-03],
       [8.78363481e-05, 1.00000000e+00, 9.06675912e-03, 1.86532339e-02],
       ...,
       [1.00000000e+00, 2.02731193e-06, 2.90643006e-03, 4.86376825e-05],
       [2.91785129e-04, 3.58194542e-02, 1.00000000e+00, 4.25361585e-03],
       [5.55190771e-03, 1.48629551e-05, 1.00000000e+00, 5.97542651e-04]])

In [29]:
ACC= accuracy(y_val,y_pred_enc)
print('accuracy on the validation set is ', ACC*100)

accuracy on the validation set is  93.75883659866695


In [30]:
np.save('Updated Weights/weight_hidden.npy', weight_hidden)
np.save('Updated Weights/bias_hidden.npy', bias_hidden)
np.save('Updated Weights/weight_output.npy', weight_output)
np.save('Updated Weights/bias_output.npy', bias_output)