In [45]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

%matplotlib inline

df = pd.read_csv('binary.csv')
df.head()


Unnamed: 0,admit,gre,gpa,rank
0,0,380,3.61,3
1,1,660,3.67,3
2,1,800,4.0,1
3,1,640,3.19,4
4,0,520,2.93,4


In [46]:
#standardize columns
cols_to_scale = ['gre','gpa']
for col in cols_to_scale:
    mean,std = df[col].mean(),df[col].std()
    df[col] = (df[col] - mean)/std

df.head()   


Unnamed: 0,admit,gre,gpa,rank
0,0,-1.798011,0.578348,3
1,1,0.625884,0.736008,3
2,1,1.837832,1.603135,1
3,1,0.452749,-0.525269,4
4,0,-0.586063,-1.208461,4


In [47]:
# dummy variables for rank
rank_dummies = pd.get_dummies(df['rank'],prefix='rank')
df = pd.concat([df,rank_dummies],axis=1)
df = df.drop('rank',axis=1)
df.head()

Unnamed: 0,admit,gre,gpa,rank_1,rank_2,rank_3,rank_4
0,0,-1.798011,0.578348,0,0,1,0
1,1,0.625884,0.736008,0,0,1,0
2,1,1.837832,1.603135,1,0,0,0
3,1,0.452749,-0.525269,0,0,0,1
4,0,-0.586063,-1.208461,0,0,0,1


In [48]:
from sklearn.model_selection import train_test_split

targets = df['admit']
features = df.drop('admit',axis = 1)

features,features_test,targets,targets_test = train_test_split(features,targets,test_size=0.1)

In [63]:
from sklearn.metrics import accuracy_score

def sigmoid(x):
    return 1/(1 + np.exp(-x))

#hyper parameters
learnrate = 0.05
epochs = 1000
last_loss = None

n_records,n_features = features.shape
weights = np.random.normal(scale=1/n_features**0.5,size=n_features)

for e in range(epochs):
    del_w = np.zeros(weights.shape)
    for record,target in zip(features.values,targets.values):
        h = np.dot(weights,record)
        output = sigmoid(h)
        error = target - output
        error_term = error*output*(1 - output)
        del_w += error_term*record
    
    weights += learnrate*del_w/float(n_records)
    
    #test the data to see the prediction accuracy vary across the epochs   

    if e% (epochs/10) == 0:
        outputs = []        
        for record,target in zip(features_test.values,targets_test.values):
            output = sigmoid(np.dot(weights,record))
            #Mean squared error
            loss = np.mean((target-output)**2)
            outputs.append(output > 0.5)
        
        if last_loss is not None:            
            if loss > last_loss:
                print('Loss increased from {0} -> {1}'.format(last_loss,loss))
            else:
                print('Loss reduced from {0} -> {1}'.format(last_loss,loss))
                
        last_loss = loss        
                                           
                
        accuracy = accuracy_score(list(targets_test.values),outputs)
        print(accuracy)
            
            
        


0.575
Loss reduced from 0.486373317568 -> 0.46006390272
0.55
Loss reduced from 0.46006390272 -> 0.431872081119
0.475
Loss reduced from 0.431872081119 -> 0.403851757211
0.425
Loss reduced from 0.403851757211 -> 0.377749199842
0.425
Loss reduced from 0.377749199842 -> 0.354056689856
0.425
Loss reduced from 0.354056689856 -> 0.332579301861
0.45
Loss reduced from 0.332579301861 -> 0.312998590875
0.45
Loss reduced from 0.312998590875 -> 0.295045021078
0.475
Loss reduced from 0.295045021078 -> 0.278515156701
0.5
