### Ссылка на датасет:https://archive.ics.uci.edu/ml/datasets/Tic-Tac-Toe+Endgame

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import sklearn.model_selection

Создание csv-файла и приведение выборки к бинарным параметрам

In [2]:
columns = ['top-left-square', 'top-middle-square', 'top-right-square', 'middle-left-square',
           'middle-middle-square', 'middle-right-square', 'bottom-left-square',
           'bottom-middle-square', 'bottom-right-square', 'Class']
data = pd.read_csv("tic-tac-toe.data", header=None)
data.columns = columns
data.Class = data.Class.map(lambda x: 1 if x == 'positive' else 0)
data.to_csv('tic-tac-toe.csv', index=False)

data = pd.get_dummies(data)
X = data.iloc[:, 1:].values
y = data.iloc[:, 0].values

Разделение выьорки на обучаемую и тестовую (70/30) 

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

Метрики

In [4]:
def TP(y_true, y_pred):
    confusion_matrix = sklearn.metrics.confusion_matrix(y_true, y_pred)
    return confusion_matrix[0,0]

def TN(y_true, y_pred):
    confusion_matrix = sklearn.metrics.confusion_matrix(y_true, y_pred)
    return confusion_matrix[1,1]

def FP(y_true, y_pred):
    confusion_matrix = sklearn.metrics.confusion_matrix(y_true, y_pred)
    return confusion_matrix[1,0]

def FN(y_true, y_pred):
    confusion_matrix = sklearn.metrics.confusion_matrix(y_true, y_pred)
    return confusion_matrix[0,1]

def TPR(y_true, y_pred):
    return TP(y_true, y_pred)/(TP(y_true, y_pred) + FN(y_true, y_pred))

def TNR(y_true, y_pred):
    return TN(y_true, y_pred)/(TN(y_true, y_pred) + FP(y_true, y_pred))

def NPV(y_true, y_pred):
    return TN(y_true, y_pred)/(TN(y_true, y_pred) + FN(y_true, y_pred))

def FPR(y_true, y_pred):
    return FP(y_true, y_pred)/(FP(y_true, y_pred) + TN(y_true, y_pred))

def FDR(y_true, y_pred):
    return FP(y_true, y_pred)/(FP(y_true, y_pred) + TP(y_true, y_pred))

def TNR(y_true, y_pred):
    return TN(y_true, y_pred)/(TN(y_true, y_pred) + FP(y_true, y_pred))

metrics = [TP, TN, FP, FN, TPR, TNR, NPV, FPR, FDR, TNR,
           sklearn.metrics.accuracy_score, sklearn.metrics.precision_score,
           sklearn.metrics.recall_score, sklearn.metrics.roc_auc_score, sklearn.metrics.f1_score]
metrics_names = [func.__name__ for func in metrics]

### 1-й алгоритм классификации - Метод к-ближайших соседей

Вначале произведем маштабирование методом градиентного спуска

In [5]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(X_train)

X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

Обучение и предсказание

In [6]:
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors=5)
classifier.fit(X_train, y_train)

y_pred = classifier.predict(X_test)

Получение результатов отностительно метрик

In [7]:
for metric_name, metric in zip(metrics_names, metrics):
    score = metric(y_test, y_pred)
    print(metric_name, '=', score)

TP = 57
TN = 165
FP = 19
FN = 47
TPR = 0.5480769230769231
TNR = 0.8967391304347826
NPV = 0.7783018867924528
FPR = 0.10326086956521739
FDR = 0.25
TNR = 0.8967391304347826
accuracy_score = 0.7708333333333334
precision_score = 0.7783018867924528
recall_score = 0.8967391304347826
roc_auc_score = 0.7224080267558529
f1_score = 0.8333333333333334


### 2-й алгоритм классификации - Random Forest

In [8]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(X_train)

X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

from sklearn.ensemble import RandomForestClassifier

regressor = RandomForestClassifier(n_estimators=20, random_state=0)
regressor.fit(X_train, y_train)
y_pred = regressor.predict(X_test)

for metric_name, metric in zip(metrics_names, metrics):
    score = metric(y_test, y_pred)
    print(metric_name, '=', score)

TP = 90
TN = 183
FP = 1
FN = 14
TPR = 0.8653846153846154
TNR = 0.9945652173913043
NPV = 0.9289340101522843
FPR = 0.005434782608695652
FDR = 0.01098901098901099
TNR = 0.9945652173913043
accuracy_score = 0.9479166666666666
precision_score = 0.9289340101522843
recall_score = 0.9945652173913043
roc_auc_score = 0.9299749163879598
f1_score = 0.9606299212598426
