In [1]:
from importlib import reload
import pandas as pd
import numpy as np
from sklearn import model_selection
from sklearn import metrics
import fca_lazy_clf as fca
from datetime import datetime
import sklearn.tree as tree

# Scaling of dataset

In [2]:
train_data = pd.read_csv('tic-tac-toe/train4.csv')
train_data.head()

Unnamed: 0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10
0,x,x,x,x,o,o,x,o,o,positive
1,x,x,x,x,o,o,o,x,o,positive
2,x,x,x,x,o,o,o,o,x,positive
3,x,x,x,x,o,o,o,b,b,positive
4,x,x,x,x,o,o,b,o,b,positive


In [3]:
def scale(dataset):
    for i in range(9):
        str_i = str(i + 1)
        dataset['v' + str_i] = (dataset['V' + str_i] == 'x').astype(int)
    dataset['v10'] = (dataset['V10'] == 'positive').astype(int)
    dataset.drop(['V' + str(i+1) for i in range(10)], axis=1, inplace = True)
    return dataset

In [4]:
train_data = scale(pd.read_csv('tic-tac-toe/train9.csv')).iloc[:, :-1]
X_train = train_data.iloc[:, :-1]
y_train = train_data.iloc[:, -1]
train_data.head()

Unnamed: 0,v1,v2,v3,v4,v5,v6,v7,v8,v9
0,1,1,1,1,0,0,1,0,0
1,1,1,1,1,0,0,0,1,0
2,1,1,1,1,0,0,0,0,1
3,1,1,1,1,0,0,0,0,0
4,1,1,1,1,0,0,0,0,0


# Lazy classifying

In [5]:
results = {'accuracy': [], 'precision': [], 'recall': [], 'seconds': []}
reload(fca)
clf = fca.LazyClassifier(threshold=0.000001, bias='false')

for i in range(10):
    train_data = scale(pd.read_csv('tic-tac-toe/train{}.csv'.format(i+1)))
    X_train = train_data.iloc[:, :-1]
    y_train = train_data.iloc[:, -1]

    clf.fit(X_train, y_train)

    test_data = scale(pd.read_csv('tic-tac-toe/test{}.csv'.format(i+1)))
    X_test = test_data.iloc[:, :-1]
    y_test = test_data.iloc[:, -1]

    s = datetime.now()
    y_pred = clf.predict(X_test)
    f = datetime.now()

    results['accuracy'].append(metrics.accuracy_score(y_test, y_pred))
    results['precision'].append(metrics.precision_score(y_test, y_pred))
    results['recall'].append(metrics.recall_score(y_test, y_pred))
    results['seconds'].append((f - s).seconds)

pd.DataFrame(results)

Unnamed: 0,accuracy,precision,recall,seconds
0,1.0,1.0,1.0,66
1,1.0,1.0,1.0,52
2,1.0,1.0,1.0,64
3,1.0,1.0,1.0,51
4,1.0,1.0,1.0,66
5,1.0,1.0,1.0,50
6,1.0,1.0,1.0,82
7,1.0,1.0,1.0,84
8,1.0,1.0,1.0,81
9,1.0,1.0,1.0,69


In [6]:
results = {'accuracy': [], 'precision': [], 'recall': [], 'seconds': []}
reload(fca)
clf = fca.LazyClassifier(
    threshold=0.000001, bias='false', 
    random=True, sample_share=0.3, random_seed=1)

for i in range(10):
    train_data = scale(pd.read_csv('tic-tac-toe/train{}.csv'.format(i+1)))
    X_train = train_data.iloc[:, :-1]
    y_train = train_data.iloc[:, -1]

    clf.fit(X_train, y_train)

    test_data = scale(pd.read_csv('tic-tac-toe/test{}.csv'.format(i+1)))
    X_test = test_data.iloc[:, :-1]
    y_test = test_data.iloc[:, -1]

    s = datetime.now()
    y_pred = clf.predict(X_test)
    f = datetime.now()

    results['accuracy'].append(metrics.accuracy_score(y_test, y_pred))
    results['precision'].append(metrics.precision_score(y_test, y_pred))
    results['recall'].append(metrics.recall_score(y_test, y_pred))
    results['seconds'].append((f - s).seconds)

pd.DataFrame(results)

Unnamed: 0,accuracy,precision,recall,seconds
0,0.967742,0.953125,1.0,21
1,0.988506,0.980769,1.0,38
2,1.0,1.0,1.0,35
3,1.0,1.0,1.0,24
4,0.988764,0.984127,1.0,27
5,0.988235,0.982456,1.0,27
6,0.982456,0.972222,1.0,29
7,1.0,1.0,1.0,26
8,0.980583,0.972222,1.0,21
9,0.989011,0.983333,1.0,24


# Classical decision tree

In [7]:
results = {'accuracy': [], 'precision': [], 'recall': [], 'seconds': []}
clf = tree.DecisionTreeClassifier(criterion='entropy')

for i in range(10):
    train_data = scale(pd.read_csv('tic-tac-toe/train{}.csv'.format(i+1)))
    X_train = train_data.iloc[:, :-1]
    y_train = train_data.iloc[:, -1]

    clf.fit(X_train, y_train)

    test_data = scale(pd.read_csv('tic-tac-toe/test{}.csv'.format(i+1)))
    X_test = test_data.iloc[:, :-1]
    y_test = test_data.iloc[:, -1]

    s = datetime.now()
    y_pred = clf.predict(X_test)
    f = datetime.now()

    results['accuracy'].append(metrics.accuracy_score(y_test, y_pred))
    results['precision'].append(metrics.precision_score(y_test, y_pred))
    results['recall'].append(metrics.recall_score(y_test, y_pred))
    results['seconds'].append((f - s).seconds)

pd.DataFrame(results)

Unnamed: 0,accuracy,precision,recall,seconds
0,0.956989,0.967213,0.967213,0
1,0.954023,0.979592,0.941176,0
2,1.0,1.0,1.0,0
3,0.988764,1.0,0.983051,0
4,0.988764,1.0,0.983871,0
5,0.988235,0.982456,1.0,0
6,0.991228,0.985915,1.0,0
7,0.971963,0.972973,0.986301,0
8,0.990291,1.0,0.985714,0
9,0.978022,0.983051,0.983051,0
