In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier

In [2]:
with open("data/tic-tac-toe.names") as file:
    for line in file:
        print(line)

1. Title: Tic-Tac-Toe Endgame database



2. Source Information

   -- Creator: David W. Aha (aha@cs.jhu.edu)

   -- Donor: David W. Aha (aha@cs.jhu.edu)

   -- Date: 19 August 1991

 

3. Known Past Usage: 

   1. Matheus,~C.~J., \& Rendell,~L.~A. (1989).  Constructive

      induction on decision trees.  In {\it Proceedings of the

      Eleventh International Joint Conference on Artificial Intelligence} 

      (pp. 645--650).  Detroit, MI: Morgan Kaufmann.

      -- CITRE was applied to 100-instance training and 200-instance test

         sets.  In a study using various amounts of domain-specific

         knowledge, its highest average accuracy was 76.7% (using the

         final decision tree created for testing).



   2. Matheus,~C.~J. (1990). Adding domain knowledge to SBL through

      feature construction.  In {\it Proceedings of the Eighth National

      Conference on Artificial Intelligence} (pp. 803--808). 

      Boston, MA: AAAI Press.

      -- Similar experiments 

In [3]:
# loading data
df = pd.read_csv("data/tic-tac-toe.data")
df

Unnamed: 0,top-left-square,top-middle-square,top-right-square,middle-left-square,middle-middle-square,middle-right-square,bottom-left-square,bottom-middle-square,bottom-right-square,Class
0,x,x,x,x,o,o,x,o,o,positive
1,x,x,x,x,o,o,o,x,o,positive
2,x,x,x,x,o,o,o,o,x,positive
3,x,x,x,x,o,o,o,b,b,positive
4,x,x,x,x,o,o,b,o,b,positive
...,...,...,...,...,...,...,...,...,...,...
953,o,x,x,x,o,o,o,x,x,negative
954,o,x,o,x,x,o,x,o,x,negative
955,o,x,o,x,o,x,x,o,x,negative
956,o,x,o,o,x,x,x,o,x,negative


In [4]:
# replacing class with ints
df = df.replace({"negative": 0,"positive":1})
df

Unnamed: 0,top-left-square,top-middle-square,top-right-square,middle-left-square,middle-middle-square,middle-right-square,bottom-left-square,bottom-middle-square,bottom-right-square,Class
0,x,x,x,x,o,o,x,o,o,1
1,x,x,x,x,o,o,o,x,o,1
2,x,x,x,x,o,o,o,o,x,1
3,x,x,x,x,o,o,o,b,b,1
4,x,x,x,x,o,o,b,o,b,1
...,...,...,...,...,...,...,...,...,...,...
953,o,x,x,x,o,o,o,x,x,0
954,o,x,o,x,x,o,x,o,x,0
955,o,x,o,x,o,x,x,o,x,0
956,o,x,o,o,x,x,x,o,x,0


In [5]:
# replacing x,o,b with 1,0,-1
df = df.replace({"x": 1,"o":0,"b":-1})
df

Unnamed: 0,top-left-square,top-middle-square,top-right-square,middle-left-square,middle-middle-square,middle-right-square,bottom-left-square,bottom-middle-square,bottom-right-square,Class
0,1,1,1,1,0,0,1,0,0,1
1,1,1,1,1,0,0,0,1,0,1
2,1,1,1,1,0,0,0,0,1,1
3,1,1,1,1,0,0,0,-1,-1,1
4,1,1,1,1,0,0,-1,0,-1,1
...,...,...,...,...,...,...,...,...,...,...
953,0,1,1,1,0,0,0,1,1,0
954,0,1,0,1,1,0,1,0,1,0
955,0,1,0,1,0,1,1,0,1,0
956,0,1,0,0,1,1,1,0,1,0


In [6]:
# separate class and it's position info
y = df.Class
X = df

# deleting extra class info
X.drop(columns=['Class'], axis=1, inplace=True)
X.head()

Unnamed: 0,top-left-square,top-middle-square,top-right-square,middle-left-square,middle-middle-square,middle-right-square,bottom-left-square,bottom-middle-square,bottom-right-square
0,1,1,1,1,0,0,1,0,0
1,1,1,1,1,0,0,0,1,0
2,1,1,1,1,0,0,0,0,1
3,1,1,1,1,0,0,0,-1,-1
4,1,1,1,1,0,0,-1,0,-1


In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)
print()
print('Размеры X и y для обучения: ')
print(X_train.shape)
print(y_train.shape)
print('Размеры X и y для тестов: ')
print(X_test.shape)
print(y_test.shape)


Размеры X и y для обучения: 
(479, 9)
(479,)
Размеры X и y для тестов: 
(479, 9)
(479,)


In [13]:
logreg = LogisticRegression()
logreg.fit(X_train, y_train)
print()
print('Логистическая регрессия: ')
print("training accuracy :", logreg.score(X_train, y_train))
print("testing accuracy :", logreg.score(X_test, y_test))


Логистическая регрессия: 
training accuracy : 0.6847599164926931
testing accuracy : 0.6492693110647182


In [17]:
# Дерево решений
tree = DecisionTreeClassifier()
tree.fit(X_train, y_train)
print()
print('Decision Tree: ')
print("training accuracy :", tree.score(X_train, y_train))
print("testing accuracy :", tree.score(X_test, y_test))


Дерево решений: 
training accuracy : 1.0
testing accuracy : 0.906054279749478


In [21]:
rndfrst = RandomForestClassifier()
rndfrst.fit(X_train, y_train)
print()
print('Random forest: ')
print("training accuracy :", rndfrst.score(X_train, y_train))
print("testing accuracy :", rndfrst.score(X_test, y_test))


Random forest: 
training accuracy : 1.0
testing accuracy : 0.9498956158663883


In [20]:
ada = AdaBoostClassifier()
ada.fit(X_train, y_train)
print()
print('Ada boost: ')
print("training accuracy :", ada.score(X_train, y_train))
print("testing accuracy :", ada.score(X_test, y_test))


Ada boost: 
training accuracy : 0.8121085594989561
testing accuracy : 0.7807933194154488
