In [29]:
from subprocess import check_output
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.metrics import accuracy_score  
from sklearn.cross_validation import train_test_split
from pandas.tools.plotting import parallel_coordinates
import time

#Load Data

iris = pd.read_csv('dataset/iris.csv')
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [30]:
#Data setup

Species = ['setosa', 'versicolor', 'virginica']

#Number of examples
m = iris.shape[0]

#Features
n = 4

#Number of classes
k = 3

#copying Iris data Column Wise to new matrix X of dimension m x (n+1)
X = np.ones((m,n + 1))
y = np.array((m,1))
X[:,1] = iris['petal_length'].values
X[:,2] = iris['petal_width'].values
X[:,3] = iris['sepal_length'].values
X[:,4] = iris['sepal_width'].values

#Labels
y = iris['species'].values

#Mean normalization
for j in range(n):
    X[:, j] = (X[:, j] - X[:,j].mean())

#Splitting dataset into training and test data and Targets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 11)
 

In [31]:
#Logistic Regression

def sigmoid(z):
    return 1.0 / (1 + np.exp(-z))

#Regularized gradient function
def Gradient(theta, X, y):
    m, n = X.shape
    theta = theta.reshape((n, 1))
    y = y.reshape((m, 1))
    h = sigmoid(X.dot(theta))
    return ( X.T.dot(y-h))

def Hessian(theta, X, y):
    m, n = X.shape
    theta = theta.reshape((n, 1))
    y = y.reshape((m, 1))
    h = sigmoid(X.dot(theta))
    onesminush=1-h
    D=h.dot(onesminush.T)
    for i in range(D.shape[0]):
        for j in range(D.shape[1]):
            if(i!=j):
                D[i][j]=0
    XD=X.T.dot(D.T)
    #print("XD",XD.shape)
    XDXT= XD.dot(X)
    #print("XDXT.shape",XDXT.shape) 
    #print("hessian",XDXT)
    
    return (-1)*XDXT

#Optimal theta 
def logisticRegression(X, y, theta,iterations):
    for i in range(iterations):
        gradient=Gradient(theta,X,y)
        hessian=Hessian(theta,X,y)
   # print("hessian shape",hessian.shape)
   # print("gradient shape",gradient.shape)
        theta=theta-np.linalg.pinv(hessian).dot(gradient)
    #print("theta",theta)
    return theta

In [32]:
import time
#Training
iterations=8
all_theta = np.zeros((3, n + 1))
#One vs all
i = 0
start_time=time.clock()
for flower in Species:
    tmp_y = np.array(y_train == flower, dtype = int)
    optTheta = logisticRegression(X_train, tmp_y, np.zeros((n + 1,1)),iterations)
    optTheta=optTheta.reshape((5,))
    all_theta[i] = optTheta
    i += 1
end_time=time.clock()

In [33]:
#Predictions
P = sigmoid(X_test.dot(all_theta.T)) #probability for each flower
p = [Species[np.argmax(P[i, :])] for i in range(X_test.shape[0])]

print "Test Accuracy on iris dataset using Logistic Regresion(Newton's method) : ", accuracy_score(y_test, p) * 100 , '%'
print "Time taken to train data : ",(end_time-start_time)

Test Accuracy on iris dataset using Logistic Regresion(Newton's method) :  96.66666666666667 %
Time taken to train data :  0.108174
