In [1]:
import numpy as np 
import math
from sklearn import datasets
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn import metrics
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_auc_score
import matplotlib.pyplot as plt
import sys
from sklearn.preprocessing import LabelBinarizer
from sklearn.metrics import f1_score

In [2]:
def sigmoid(z):
    return  1/(1+np.exp(-z))

def build_model(X,neural_layer,input_dim,hidden_nodes,output_dim):
    model = {}
    for i in range(neural_layer):
        if i==0:
            continue
        elif i==1:
            #print("1st elif loop",i)
            model['W'+str(i)] =  np.random.randn(input_dim, hidden_nodes) / np.sqrt(input_dim) 
            model['b'+str(i)] =  np.zeros((1, hidden_nodes))
        elif i==neural_layer-1:
            #print("2nd if loop",i)
            model['W'+str(i)] = np.random.randn(hidden_nodes, output_dim) / np.sqrt(hidden_nodes)
            model['b'+str(i)] =  np.zeros((1, output_dim))
        else:
            #print("else loop",i)
            model['W'+str(i)] =  np.random.randn(hidden_nodes, hidden_nodes) / np.sqrt(hidden_nodes) 
            model['b'+str(i)] =  np.zeros((1, hidden_nodes))
    
        
    return model

def feed_forward(neural_layer,model, x):
    z={}
    a={}
    for i in range(1,neural_layer+1):
        
        # Forward propagation for layer 1
        if i==1:
            continue
        # Forward propagation for layer 2
        elif i==2:
            z[i] = x.dot(model['W'+str(i-1)]) + model['b'+str(i-1)]
            a[i] = sigmoid(z[i])
            #print("z2:",z2)
        # Forward propagation for other layer  
        else:
            z[i] = a[i-1].dot(model['W'+str(i-1)]) + model['b'+str(i-1)]
            a[i] = sigmoid(z[i])
            #print("z3:",z3)
    return a

def backprop(neural_layer,x,y,model,a,tri_Delta):
    #Lower delta error
    delt = {}
    for i in range(neural_layer,1,-1):
        #output layer  error 
        if i==neural_layer:
            delt[i] = a[i] - y
            
            tri_Delta['b'+str(i-1)] += np.sum(delt[i], axis=0, keepdims=True)
            tri_Delta['W'+str(i-1)] += (a[i-1].T).dot(delt[i])
            #print("db2:",db2)
            #print("dW2:",dW2)
             
        #Error in second layer 
        elif i==2:
            delt[i] = np.multiply(delt[i+1].dot(model['W'+str(i)].T),(a[i]*(1-a[i])))
            #print("del2:",del2)
            tri_Delta['b'+str(i-1)] += np.sum(delt[i], axis=0, keepdims=True)
            tri_Delta['W'+str(i-1)] += np.dot(x.T, delt[i])
        #Hidden layer error
        else:
            delt[i] = np.multiply(delt[i+1].dot(model['W'+str(i)].T),(a[i]*(1-a[i])))
            #print("del2:",del2)
            tri_Delta['b'+str(i-1)] += np.sum(delt[i], axis=0, keepdims=True)
            tri_Delta['W'+str(i-1)] += np.dot(a[i-1].T, delt[i])
    #print("delt:",delt)
    return tri_Delta

def calculate_loss(neural_layer,N,model,a,y_true,sum_cost):
    #sum_cost += np.sum((a[neural_layer]-y_true)**2) 
    sum_cost +=np.sum((y_true*np.log(a[neural_layer]))+((1-y_true)*np.log(1-a[neural_layer])))
    #print("sum_cost",sum_cost)
    
    return sum_cost

def train(neural_layer,N,model, X_train, y_train, reg_lambda, learning_rate,hidden_nodes):
    # Batch gradient descent
    done = False
    previous_loss = float('inf')
    iterations = 0
    
    losses = []
    #while done == False:  #comment out while performance testing
    while iterations < 200:
        scost=0
        tri_Delta={}
        for i in range(neural_layer):
            if i==0:
                continue
            elif i==1:
                #print("1st elif loop",i)
                tri_Delta['W'+str(i)] =  np.zeros((input_dim, hidden_nodes)) 
                tri_Delta['b'+str(i)] =  np.zeros((1, hidden_nodes))
            elif i==neural_layer-1:
                #print("2nd if loop",i)
                tri_Delta['W'+str(i)] = np.zeros((hidden_nodes, output_dim))
                tri_Delta['b'+str(i)] =  np.zeros((1, output_dim))
            else:
                #print("else loop",i)
                tri_Delta['W'+str(i)] =  np.zeros((hidden_nodes, hidden_nodes)) 
                tri_Delta['b'+str(i)] =  np.zeros((1, hidden_nodes))
            
        
        for row in zip(X_train, y_train):
            #feed forward
            a = feed_forward(neural_layer,model, row[0][None,:])
           
            tri_Delta = backprop(neural_layer,row[0][None,:],row[1][None,:],model,a,tri_Delta)
            
            #cost
            cost = calculate_loss(neural_layer,N,model, a,row[1][None,:], scost)
            
        #update weights and biases
        for i in range(1,neural_layer):
            model['W'+str(i)]-= learning_rate*((tri_Delta['W'+str(i)]/N) + (reg_lambda/N)* model['W'+str(i)])
            model['b'+str(i)]-= learning_rate*(tri_Delta['b'+str(i)]/N) 
        loss = (-1/N)*cost
        #print("cost:",cost,"loss:",loss)
        losses.append(loss)
        if iterations%100==0:
            print ("Loss after iteration %i: %f" %(i, loss))  #uncomment once testing finished, return mod val to 1000
        if ( previous_loss-loss) < 0.000001:
            done = True
            #print("convergence i:",iterations,previous_loss-loss) 
            #break
        previous_loss = loss
        iterations += 1
    return model, losses


In [3]:
#np.random.seed(222)
sdataset = pd.read_csv('BSOM_DataSet_for_HW3.csv')
X = sdataset.loc[:,['all_mcqs_avg_n20','all_NBME_avg_n4','CBSE_01','CBSE_02']].values
y = sdataset.loc[:,['LEVEL']].values

#Feature Scaling using Mean normalization 
mean_norm_X = (X-np.mean(X,axis=0))/(np.max(X,axis=0)-np.min(X,axis=0))
#One vs all y_train
concat=[]
for i in np.unique(y):
    one_vs_all_y=np.where(y==i,1,0)
    concat.extend(list(zip(*one_vs_all_y)))
actual_y= np.asarray(concat).T 

# Splitting the dataset into the Training set and Test set
X_train, X_test, y_train, y_test = train_test_split(mean_norm_X, actual_y, test_size = 1/3)

N,input_dim = X_train.shape 
# output layer dimensionality 
output_dim = len(np.unique(y)) 
# learning rate for gradient descent
learning_rate = 0.6
#hidden nodes for all layers 
hidden_nodes=5

In [4]:
def fscore(y_true, y_pred):
    f1_macro = f1_score(y_test, y_pred, average='macro')  
    print("F1 macro :" ,f1_macro)
    f1_micro  = f1_score(y_test, y_pred, average='micro')  
    print("F1 micro :" ,f1_micro)
    f1_weighted = f1_score(y_test, y_pred, average='weighted')  
    print("F1 weighted :" ,f1_weighted)
    f1 = f1_score(y_test, y_pred, average=None)
    print("F1 score :" ,f1)

In [5]:

n_layers = [3,4,5,6]
reg_lambda = 0 # regularization strength
for n_layer in n_layers:
    print("For NEURAL LAYER :",n_layer)
    model = build_model(X_train,n_layer,input_dim,hidden_nodes,output_dim)
    model, losses = train(n_layer,N,model,X_train, y_train, reg_lambda, learning_rate,hidden_nodes)
    y_true = []
    y_pred = []
    for row in zip(X_test, y_test):
        a = feed_forward(n_layer,model, row[0][None,:])
        y_pred.append(np.argmax(a[n_layer]))
        y_true.append(np.argmax(row[1][None,:]))
    print("y_true",y_true)
    print("y_pred",y_pred)
    print(classification_report(y_true, y_pred,target_names=['A', 'B', 'C','D']))   
    print("confusion matrix:",confusion_matrix(y_true, y_pred))
    print("Accuracy:",accuracy_score(y_true, y_pred))
    lb = LabelBinarizer()
    lb.fit(y_true)
    y_t= lb.transform(y_true)
    y_p = lb.transform(y_pred)

    print("ROC AUC score",metrics.roc_auc_score(y_t,y_p))
    fscore(y_t,y_p)

For NEURAL LAYER : 3
Loss after iteration 2: 0.040607
Loss after iteration 2: 0.019092
y_true [0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 2, 1, 0, 2, 2, 0, 2, 0, 0, 2, 0, 2, 1, 3, 1, 2, 1, 3, 0, 0, 1, 2, 1, 1, 2, 0, 0, 3, 0]
y_pred [0, 1, 1, 0, 2, 0, 1, 1, 1, 1, 2, 1, 0, 1, 2, 0, 2, 0, 1, 2, 1, 2, 2, 1, 1, 2, 1, 2, 1, 0, 2, 2, 2, 1, 2, 0, 0, 1, 1]
             precision    recall  f1-score   support

          A       0.89      0.67      0.76        12
          B       0.59      0.67      0.62        15
          C       0.62      0.89      0.73         9
          D       0.00      0.00      0.00         3

avg / total       0.64      0.67      0.64        39

confusion matrix: [[ 8  4  0  0]
 [ 1 10  4  0]
 [ 0  1  8  0]
 [ 0  2  1  0]]
Accuracy: 0.6666666666666666
ROC AUC score 0.7158564814814815
F1 macro : 0.5285443722943723
F1 micro : 0.6666666666666666
F1 weighted : 0.6426490176490177
F1 score : [0.76190476 0.625      0.72727273 0.        ]
For NEURAL LAYER : 4
Loss after iteration 3: 0.0377

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Loss after iteration 3: 0.019512
y_true [0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 2, 1, 0, 2, 2, 0, 2, 0, 0, 2, 0, 2, 1, 3, 1, 2, 1, 3, 0, 0, 1, 2, 1, 1, 2, 0, 0, 3, 0]
y_pred [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
             precision    recall  f1-score   support

          A       0.00      0.00      0.00        12
          B       0.38      1.00      0.56        15
          C       0.00      0.00      0.00         9
          D       0.00      0.00      0.00         3

avg / total       0.15      0.38      0.21        39

confusion matrix: [[ 0 12  0  0]
 [ 0 15  0  0]
 [ 0  9  0  0]
 [ 0  3  0  0]]
Accuracy: 0.38461538461538464
ROC AUC score 0.5
F1 macro : 0.1388888888888889
F1 micro : 0.38461538461538464
F1 weighted : 0.2136752136752137
F1 score : [0.         0.55555556 0.         0.        ]
For NEURAL LAYER : 5
Loss after iteration 4: 0.048274
Loss after iteration 4: 0.019007
y_true [0, 1, 1, 1, 1, 0, 1, 1,