In [6]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, classification_report
import pandas as pd
import numpy as np

## White Win/Loss Train-Test

In [28]:
X_train = pd.read_csv("data\\train-test-split\white-win-loss\wwcX_train.csv")
X_test = pd.read_csv("data\\train-test-split\white-win-loss\wwcX_test.csv")
y_train = pd.read_csv("data\\train-test-split\white-win-loss\wwcY_train.csv")
y_test = pd.read_csv("data\\train-test-split\white-win-loss\wwcY_test.csv")

In [29]:
y_test.head(3)

Unnamed: 0,W
0,0
1,0
2,1


In [33]:
from skopt import BayesSearchCV

# Convert y_train and y_test column vectors to 1-dimensional arrays
y_train = np.ravel(y_train)
y_test = np.ravel(y_test)

# Define the search space for hyperparameters
hyperparameters = {
    'n_estimators': (50, 1000),
    'learning_rate': (0.01, 1.0, 'log-uniform'),
    'algorithm': ['SAMME', 'SAMME.R']
}

# Create AdaBoostClassifier object
adaboost_clf = AdaBoostClassifier()

# Define the search object with BayesSearchCV
search = BayesSearchCV(
    adaboost_clf,
    hyperparameters,
    n_iter=30,
    cv=5,
    n_jobs=-1,
    verbose=1
)

# Fit the search object on the training data
search.fit(X_train, y_train)

# Print the best hyperparameters and accuracy score
print('Best hyperparameters:', search.best_params_)
print('Accuracy score:', search.best_score_)

# Make predictions on test data
y_pred = search.predict(X_test)

# Evaluate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy of AdaBoostClassifier:', accuracy)

Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fi

## White Termination Train-Test

In [34]:
X_train = pd.read_csv("data\\train-test-split\white-termination\wtcX_train.csv")
X_test = pd.read_csv("data\\train-test-split\white-termination\wtcX_test.csv")
y_train = pd.read_csv("data\\train-test-split\white-termination\wtcY_train.csv")
y_test = pd.read_csv("data\\train-test-split\white-termination\wtcY_test.csv")

In [None]:
y_train = np.ravel(y_train)
y_test = np.ravel(y_test)

# Create AdaBoostClassifier object
adaboost_clf = AdaBoostClassifier(n_estimators=1000, learning_rate=0.1, random_state=30, algorithm='SAMME.R')

# Fit the model on the training data
adaboost_clf.fit(X_train, y_train)

# Make predictions on test data
y_pred = adaboost_clf.predict(X_test)

In [37]:
report = classification_report(y_test, y_pred, digits=4)
print("Classification Report:\n", report)

Classification Report:
               precision    recall  f1-score   support

           0     0.3751    0.3838    0.3794     53123
           1     0.3832    0.2706    0.3172     53396
           2     0.4934    0.6204    0.5497     56191

    accuracy                         0.4283    162710
   macro avg     0.4172    0.4249    0.4154    162710
weighted avg     0.4186    0.4283    0.4178    162710



## Black Win/Loss Train-Test

In [38]:
X_train = pd.read_csv("data\\train-test-split\\black-win-loss\\bwcX_train.csv")
X_test = pd.read_csv("data\\train-test-split\\black-win-loss\\bwcX_test.csv")
y_train = pd.read_csv("data\\train-test-split\\black-win-loss\\bwcY_train.csv")
y_test = pd.read_csv("data\\train-test-split\\black-win-loss\\bwcY_test.csv")

In [39]:
y_train = np.ravel(y_train)
y_test = np.ravel(y_test)

# Create AdaBoostClassifier object
adaboost_clf = AdaBoostClassifier(n_estimators=1000, learning_rate=0.1, random_state=30, algorithm='SAMME.R')

# Fit the model on the training data
adaboost_clf.fit(X_train, y_train)

# Make predictions on test data
y_pred = adaboost_clf.predict(X_test)

# Evaluate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy of AdaBoostClassifier:', accuracy)

Accuracy of AdaBoostClassifier: 0.6853666031589946


## Black Termination Train-Test

In [40]:
X_train = pd.read_csv("data\\train-test-split\\black-termination\\btcX_train.csv")
X_test = pd.read_csv("data\\train-test-split\\black-termination\\btcX_test.csv")
y_train = pd.read_csv("data\\train-test-split\\black-termination\\btcY_train.csv")
y_test = pd.read_csv("data\\train-test-split\\black-termination\\btcY_test.csv")

In [41]:
y_train = np.ravel(y_train)
y_test = np.ravel(y_test)

# Create AdaBoostClassifier object
adaboost_clf = AdaBoostClassifier(n_estimators=1000, learning_rate=0.1, random_state=30, algorithm='SAMME.R')

# Fit the model on the training data
adaboost_clf.fit(X_train, y_train)

# Make predictions on test data
y_pred = adaboost_clf.predict(X_test)

report = classification_report(y_test, y_pred, digits=4)
print("Classification Report:\n", report)

Classification Report:
               precision    recall  f1-score   support

           0     0.3743    0.4000    0.3867     53123
           1     0.3835    0.2530    0.3049     53396
           2     0.4934    0.6209    0.5498     56191

    accuracy                         0.4280    162710
   macro avg     0.4171    0.4246    0.4138    162710
weighted avg     0.4185    0.4280    0.4162    162710

