In [39]:
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.cross_validation import cross_val_score as cvs

tc_raw = pd.read_csv("tic-tac-toe.txt")
for column in tc_raw:
    tc_raw[column] = tc_raw[column].astype('category')
    tc_raw[column] = tc_raw[column].cat.codes
    
trng_data = tc_raw.sample(frac=0.5, replace=False)
test_data = tc_raw.sample(frac=0.5, replace=False)

Y_train = trng_data['class'].values
X_train = trng_data[['top-left-square', 'top-middle-square', 'top-right-square',
 'middle-left-square', 'middle-middle-square', 'middle-right-square',
 'bottom-left-square', 'bottom-middle-square', 'bottom-right-square']].values

Y_test = test_data['class'].values
X_test = test_data[['top-left-square', 'top-middle-square', 'top-right-square',
 'middle-left-square', 'middle-middle-square', 'middle-right-square',
 'bottom-left-square', 'bottom-middle-square', 'bottom-right-square']].values



In [32]:
#print(trng_data.head(),test_data.head())
logReg = LogisticRegression(solver='newton-cg')
logReg.fit(X_train,Y_train)
predTest = logReg.predict(X_test)
#print(predTest)
score = logReg.score(X_test, Y_test)
print('Using sklearn functions : ' , score*100 ,'%')

cvsScore = cvs(logReg, X_test, Y_test, scoring='accuracy', cv=10)
print('Using Cross Validation with sklearn functions : ', cvsScore)

Using sklearn functions :  67.84968684759917 %
Using Cross Validation with sklearn functions :  [0.63265306 0.70833333 0.6875     0.77083333 0.625      0.72916667
 0.6875     0.64583333 0.63829787 0.74468085]


In [46]:
print('Using python and jupyter')
w = np.array([random.random() for x in range(X_train.shape[1])])
epochs = 10
I = np.eye(w.shape[0])
#print('Initiating Variables : ', w,epochs,I)
for e in range(0, epochs):
    trng_data = trng_data.sample(frac=1, replace=False)
    Y_train = trng_data['class'].values
    X_train = trng_data[['top-left-square', 'top-middle-square', 'top-right-square',
 'middle-left-square', 'middle-middle-square', 'middle-right-square',
 'bottom-left-square', 'bottom-middle-square', 'bottom-right-square']].values
    eta_initial = .5
    s_loc = 0
    for x_s in X_train:
        #find the gradient
        gradient = np.dot(x_s.transpose(), Y_train[s_loc]-(1/(1+np.exp(np.dot(x_s, -w)))))
        #Sigmoid function for Hessian
        P =(1/(1+np.exp(np.dot(x_s, -w))))
        #Hessian
        H = np.dot(np.dot(np.dot(x_s.transpose(), P), I-P), x_s)
        #adjust the eta rate
        eta = eta_initial/(s_loc + 1)
        #adjusting the weights using the Newton Raphson Method
        w = w + eta*gradient/H
        s_loc = s_loc + 1
    
print("Evaluated value of w:", w)

testPredAll = []
testPred = 1/(1+np.exp(np.dot(X_test, -w)))
#print(testPred)
for tp in testPred:
    if tp < .5:
        testPredAll.append(0)
    elif tp >= .5:
        testPredAll.append(1)


print("Newton Raphson Prediction Accuracy:", 100*sum(testPredAll == Y_train)/len(testPredAll))
    

Using python and jupyter
Evaluated value of w: [0.23454711 0.9013282  0.86214541 1.10653618 0.80001858 0.7119123
 0.53606383 1.13565012 0.94190123]
Newton Raphson Prediction Accuracy: 64.71816283924844


The starting values of ETA choosed and varied between .01 and 1. 
Finally settelled on .5 as it gave the maximum accuracy at 64.7%. The number of epochs varied between 1 - 100 
then setteled on 10 as it was not affecting the prediction accuracy.