In [1]:
import pandas as pd
import scipy.io as sio
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import operator 
import arff
import warnings
from statistics import mean
from sklearn import svm
from sklearn import tree
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import AdaBoostClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import MinMaxScaler
from collections import OrderedDict
%config InlineBackend.figure_format = 'retina'
pd.options.display.max_columns = None
warnings.filterwarnings('ignore')

## Data Cleaning and Preprocessing

In [2]:
#.arff file so using arff module to load
eeg_data = arff.load(open('datasets/EEG Eye State.arff'))
eeg_data = np.array(eeg_data['data'], dtype = np.float64) #shape is (14980, 15)
bank_data = np.genfromtxt('datasets/bank.csv', delimiter = ';') #shape is (4522, 17)
chess_data = np.genfromtxt('datasets/kr-vs-kp.data', delimiter = ',', dtype = np.int16) #shape is (3196, 37)
adult_data = np.genfromtxt('datasets/adult.data', delimiter = ',') #shape is (32561, 15)

In [3]:
#all data loaded into dataframe to view data types
#eeg data is consistently numeric so it is not loaded
bank_df = pd.read_csv('datasets/bank.csv') #shape is (4521, 17)
chess_df = pd.read_table('datasets/kr-vs-kp.data') #shape is (3195, 37)
adult_df = pd.read_table('datasets/adult.data') #shape is (32560, 15)

In [4]:
#bank_df, chess_df, and adult_df all have their attributes in one column
#function splits attributes into lists and returns nested list of elements

def split_df(attribute_column, split_string):
    overall_list = []

    for element in attribute_column:
        split_list = element.split(split_string) #each element in column split according commma 
        overall_list.append(split_list)
        
    return overall_list

In [5]:
#assigning column names associated with the various attributes
thisList = split_df(bank_df['age;"job";"marital";"education";"default";"balance";"housing";"loan";"contact";"day";'\
                            + '"month";"duration";"campaign";"pdays";"previous";"poutcome";"y"'], ';')
bank_df = pd.DataFrame(thisList, columns = ['age','job','marital','education','default','balance','housing',\
                                            'loan','contact','day','month','duration','campaign','pdays','previous',\
                                            'poutcome','y'])

In [6]:
#assigning column names associated with the various attributes
thisList = split_df(chess_df['f,f,f,f,f,f,f,f,f,f,f,f,l,f,n,f,f,t,f,f,f,f,f,f,f,t,f,f,f,f,f,f,f,t,t,n,won'], ',')
chess_df = pd.DataFrame(thisList, columns = ['cat1','cat2','cat3','cat4','cat5','cat6','cat7',\
                                            'cat8','cat9','cat10','cat11','cat12','cat13','cat14','cat15',\
                                            'cat16','cat17','cat18','cat19','cat20','cat21','cat22','cat23',\
                                            'cat24','cat25','cat26','cat27','cat28','cat29','cat30','cat31',\
                                            'cat32','cat33','cat34','cat35','cat36','won'])

In [7]:
#assigning column names associated with the various attributes
thisList = split_df(adult_df['39, State-gov, 77516, Bachelors, 13, Never-married, Adm-clerical, Not-in-family,'\
                             + ' White, Male, 2174, 0, 40, United-States, <=50K'], ',')
adult_df = pd.DataFrame(thisList, columns = ['age','workclass','fnlwg','education','education-num','marital-status',\
                                            'occupation','relationship','race','sex','capital-gain','capital-loss',\
                                            'hours-per-week','native-country', 'output'])

In [8]:
#randomly shuffle all dataframes in place
bank_df = bank_df.sample(frac = 1).reset_index(drop = True)
chess_df = chess_df.sample(frac = 1).reset_index(drop = True)
adult_df = adult_df.sample(frac = 1).reset_index(drop = True)

In [9]:
#randomly shuffle remaining eeg numpy array data
np.random.shuffle(eeg_data)

In [10]:
#one hot encoding of categorical attributes 
#new shape (4521, 49)
bank_df = pd.get_dummies(bank_df, columns = ['job','marital','education','contact','month','poutcome'])
#new shape (3195, 74)
chess_df = pd.get_dummies(chess_df, columns = ['cat1','cat2','cat3','cat4','cat5','cat6','cat7',\
                                               'cat8','cat9','cat10','cat11','cat12','cat13','cat14','cat15',\
                                               'cat16','cat17','cat18','cat19','cat20','cat21','cat22','cat23',\
                                               'cat24','cat25','cat26','cat27','cat28','cat29','cat30','cat31',\
                                               'cat32','cat33','cat34','cat35','cat36'])
#new shape (32560, 108)
adult_df = pd.get_dummies(adult_df, columns = ['workclass','education','marital-status','occupation','relationship',\
                                               'race','native-country'])

In [11]:
#Replacing binary elements of various types to uniform 0 and 1 
#All y values of datasets are binary classifications

#Numerical and boolean data 
bank_df.replace('"no"',0,inplace = True)
bank_df.replace('"yes"',1,inplace = True)

#Boolean data
chess_df.replace('nowin',0,inplace = True)
chess_df.replace('won',1,inplace = True)

#Numerical and boolean data 
adult_df.replace(' Male',0,inplace = True)
adult_df.replace(' Female',1,inplace = True)
adult_df.replace(' <=50K',0,inplace = True)
adult_df.replace(' >50K',1,inplace = True)

In [12]:
bank_data = bank_df.as_matrix(columns = None) #shape is (4521, 49)
chess_data = chess_df.as_matrix(columns = None) #shape is (3195, 74)
adult_data = adult_df.as_matrix(columns = None) #shape is (32560, 108)

In [13]:
bank_data = bank_data.astype(dtype = np.float64)
chess_data = chess_data.astype(dtype = np.float64)
adult_data = adult_data.astype(dtype = np.float64)

In [14]:
#delete last 27560 rows from adult dataset so it will have 5000 rows
adult_data = adult_data[0:5000,:]

In [15]:
#Scaling all relevant data (from 0 to 1) to save computationally 
scaler = MinMaxScaler(feature_range = (-1, 1))
eeg_data = scaler.fit_transform(eeg_data)
bank_data = scaler.fit_transform(bank_data)
adult_data = scaler.fit_transform(adult_data)

## Data Partitioning and Train-Test Splits

In [17]:
#Separating all X and Y values of datasets

X_eeg = eeg_data[:,:-1] #all X values of EEG dataset 
Y_eeg = eeg_data[:,14] #all Y values of EEG dataset 

X_bank = np.concatenate((bank_data[:,:10], bank_data[:,11:]), axis = 1) #all X values of bank dataset 
Y_bank = bank_data[:,10] #all Y values of bank dataset 

X_chess = chess_data[:,1:] #all X values of chess dataset 
Y_chess = chess_data[:,0] #all Y values of chess dataset 

X_adult = np.concatenate((adult_data[:,:7], adult_data[:,8:]), axis = 1) #all X values of adult dataset 
Y_adult = adult_data[:,7] #all Y values of adult dataset 

In [18]:
def partition_dataset(X, Y, X_and_Y, percent):
    """
    Randomly shuffles then partitions a dataset.
    
    X:          numpy array
                X values 
    percent:    int
                Percentage for partition (0.8 = 80/20 split)
    train_data: boolean
                True for training data, False for testing data
                
    Returns the partitioned dataset.
    """
    np.random.shuffle(X_and_Y)
    X_train = X[:int(len(X_and_Y)*percent)]
    Y_train = Y[:int(len(X_and_Y)*percent)]
    X_test = X[int(len(X_and_Y)*percent):]
    Y_test = Y[int(len(X_and_Y)*percent):]
    
    return X_train, Y_train, X_test, Y_test

In [19]:
#eeg_data partitioning

#80/20 partition with 3 trials
X_eeg_train_val_8020_1, Y_eeg_train_val_8020_1, X_eeg_test_8020_1, Y_eeg_test_8020_1\
= partition_dataset(X_eeg, Y_eeg, eeg_data, 0.8) 
X_eeg_train_val_8020_2, Y_eeg_train_val_8020_2, X_eeg_test_8020_2, Y_eeg_test_8020_2\
= partition_dataset(X_eeg, Y_eeg, eeg_data, 0.8)
X_eeg_train_val_8020_3, Y_eeg_train_val_8020_3, X_eeg_test_8020_3, Y_eeg_test_8020_3\
= partition_dataset(X_eeg, Y_eeg, eeg_data, 0.8)

#50/50 partition with 3 trials
X_eeg_train_val_5050_1, Y_eeg_train_val_5050_1, X_eeg_test_5050_1, Y_eeg_test_5050_1\
= partition_dataset(X_eeg, Y_eeg, eeg_data, 0.5) 
X_eeg_train_val_5050_2, Y_eeg_train_val_5050_2, X_eeg_test_5050_2, Y_eeg_test_5050_2\
= partition_dataset(X_eeg, Y_eeg, eeg_data, 0.5) 
X_eeg_train_val_5050_3, Y_eeg_train_val_5050_3, X_eeg_test_5050_3, Y_eeg_test_5050_3\
= partition_dataset(X_eeg, Y_eeg, eeg_data, 0.5) 

#80/20 partition with 3 trials
X_eeg_train_val_2080_1, Y_eeg_train_val_2080_1, X_eeg_test_2080_1, Y_eeg_test_2080_1\
= partition_dataset(X_eeg, Y_eeg, eeg_data, 0.2) 
X_eeg_train_val_2080_2, Y_eeg_train_val_2080_2, X_eeg_test_2080_2, Y_eeg_test_2080_2\
= partition_dataset(X_eeg, Y_eeg, eeg_data, 0.2)
X_eeg_train_val_2080_3, Y_eeg_train_val_2080_3, X_eeg_test_2080_3, Y_eeg_test_2080_3\
= partition_dataset(X_eeg, Y_eeg, eeg_data, 0.2)

In [20]:
#bank_data partitioning

#80/20 partition with 3 trials
X_bank_train_val_8020_1, Y_bank_train_val_8020_1, X_bank_test_8020_1, Y_bank_test_8020_1\
= partition_dataset(X_bank, Y_bank, bank_data, 0.8) 
X_bank_train_val_8020_2, Y_bank_train_val_8020_2, X_bank_test_8020_2, Y_bank_test_8020_2\
= partition_dataset(X_bank, Y_bank, bank_data, 0.8) 
X_bank_train_val_8020_3, Y_bank_train_val_8020_3, X_bank_test_8020_3, Y_bank_test_8020_3\
= partition_dataset(X_bank, Y_bank, bank_data, 0.8) 

#50/50 partition with 3 trials
X_bank_train_val_5050_1, Y_bank_train_val_5050_1, X_bank_test_5050_1, Y_bank_test_5050_1\
= partition_dataset(X_bank, Y_bank, bank_data, 0.5) 
X_bank_train_val_5050_2, Y_bank_train_val_5050_2, X_bank_test_5050_2, Y_bank_test_5050_2\
= partition_dataset(X_bank, Y_bank, bank_data, 0.5)
X_bank_train_val_5050_3, Y_bank_train_val_5050_3, X_bank_test_5050_3, Y_bank_test_5050_3\
= partition_dataset(X_bank, Y_bank, bank_data, 0.5)

#80/20 partition with 3 trials
X_bank_train_val_2080_1, Y_bank_train_val_2080_1, X_bank_test_2080_1, Y_bank_test_2080_1\
= partition_dataset(X_bank, Y_bank, bank_data, 0.2) 
X_bank_train_val_2080_2, Y_bank_train_val_2080_2, X_bank_test_2080_2, Y_bank_test_2080_2\
= partition_dataset(X_bank, Y_bank, bank_data, 0.2) 
X_bank_train_val_2080_3, Y_bank_train_val_2080_3, X_bank_test_2080_3, Y_bank_test_2080_3\
= partition_dataset(X_bank, Y_bank, bank_data, 0.2) 

In [21]:
#chess_data partitioning

#80/20 partition with 3 trials
X_chess_train_val_8020_1, Y_chess_train_val_8020_1, X_chess_test_8020_1, Y_chess_test_8020_1\
= partition_dataset(X_chess, Y_chess, chess_data, 0.8) 
X_chess_train_val_8020_2, Y_chess_train_val_8020_2, X_chess_test_8020_2, Y_chess_test_8020_2\
= partition_dataset(X_chess, Y_chess, chess_data, 0.8) 
X_chess_train_val_8020_3, Y_chess_train_val_8020_3, X_chess_test_8020_3, Y_chess_test_8020_3\
= partition_dataset(X_chess, Y_chess, chess_data, 0.8) 

#50/50 partition with 3 trials
X_chess_train_val_5050_1, Y_chess_train_val_5050_1, X_chess_test_5050_1, Y_chess_test_5050_1\
= partition_dataset(X_chess, Y_chess, chess_data, 0.5) 
X_chess_train_val_5050_2, Y_chess_train_val_5050_2, X_chess_test_5050_2, Y_chess_test_5050_2\
= partition_dataset(X_chess, Y_chess, chess_data, 0.5) 
X_chess_train_val_5050_3, Y_chess_train_val_5050_3, X_chess_test_5050_3, Y_chess_test_5050_3\
= partition_dataset(X_chess, Y_chess, chess_data, 0.5) 

#80/20 partition with 3 trials
X_chess_train_val_2080_1, Y_chess_train_val_2080_1, X_chess_test_2080_1, Y_chess_test_2080_1\
= partition_dataset(X_chess, Y_chess, chess_data, 0.2) 
X_chess_train_val_2080_2, Y_chess_train_val_2080_2, X_chess_test_2080_2, Y_chess_test_2080_2\
= partition_dataset(X_chess, Y_chess, chess_data, 0.2) 
X_chess_train_val_2080_3, Y_chess_train_val_2080_3, X_chess_test_2080_3, Y_chess_test_2080_3\
= partition_dataset(X_chess, Y_chess, chess_data, 0.2) 

In [22]:
#adult_data partitioning

#80/20 partition with 3 trials
X_adult_train_val_8020_1, Y_adult_train_val_8020_1, X_adult_test_8020_1, Y_adult_test_8020_1\
= partition_dataset(X_adult, Y_adult, bank_data, 0.8) 
X_adult_train_val_8020_2, Y_adult_train_val_8020_2, X_adult_test_8020_2, Y_adult_test_8020_2\
= partition_dataset(X_adult, Y_adult, bank_data, 0.8)
X_adult_train_val_8020_3, Y_adult_train_val_8020_3, X_adult_test_8020_3, Y_adult_test_8020_3\
= partition_dataset(X_adult, Y_adult, bank_data, 0.8)

#50/50 partition with 3 trials
X_adult_train_val_5050_1, Y_adult_train_val_5050_1, X_adult_test_5050_1, Y_adult_test_5050_1\
= partition_dataset(X_adult, Y_adult, bank_data, 0.5) 
X_adult_train_val_5050_2, Y_adult_train_val_5050_2, X_adult_test_5050_2, Y_adult_test_5050_2\
= partition_dataset(X_adult, Y_adult, bank_data, 0.5) 
X_adult_train_val_5050_3, Y_adult_train_val_5050_3, X_adult_test_5050_3, Y_adult_test_5050_3\
= partition_dataset(X_adult, Y_adult, bank_data, 0.5) 

#80/20 partition with 3 trials
X_adult_train_val_2080_1, Y_adult_train_val_2080_1, X_adult_test_2080_1, Y_adult_test_2080_1\
= partition_dataset(X_adult, Y_adult, bank_data, 0.2)  
X_adult_train_val_2080_2, Y_adult_train_val_2080_2, X_adult_test_2080_2, Y_adult_test_2080_2\
= partition_dataset(X_adult, Y_adult, bank_data, 0.2)
X_adult_train_val_2080_3, Y_adult_train_val_2080_3, X_adult_test_2080_3, Y_adult_test_2080_3\
= partition_dataset(X_adult, Y_adult, bank_data, 0.2)

## Experiment: Training Algorithms

In [23]:
#report the training and validation errors during cross-validation w.r.t. the hyper-parameters
def clf_report(clf, key, X_test, Y_test, split, trial):
    """
    Reports performance on each hyperparameter, optimal hyperparameter,training accuracy, 
    validation accuracy, and test accuracy,
    
    clf:        GridSearch classifier
                
    key:        string
                The key in the hyperparameter dictionary
    X_test:     numpy array
                Testing data for X
    Y_test:     numpy array
                Testing data for Y 
    split:      string
                The type of split (e.g. "80/20")
    trial:      string
                The trial (e.g. "2")
    """
    print(split + ' Split, Trial ' + trial)
    print('--------------------------------------------------------------------------------------------------')
    
    for param,train_score,test_score in zip(clf.cv_results_['params'], \
                            clf.cv_results_['mean_train_score'],
                            clf.cv_results_['mean_test_score']):
        print(str(param) + ': training accuracy - ' + str(train_score) + ', validation accuracy - ' + str(test_score))

    print('--------------------------------------------------------------------------------------------------')
    
    #printing optimal hyperparameter
    best_para = clf.best_params_
    print('Optimal hyperparameter: ' + str(best_para.get(key)))
    
    #printing training accuracy 
    train_acc = clf.cv_results_['mean_train_score']
    print('Training accuracy: ' + str(mean(train_acc)))

    #printing validation accuracy 
    val_acc = clf.cv_results_['mean_test_score']
    print('Validation accuracy: ' + str(mean(val_acc)))

    #printing test accuracy 
    test_acc = clf.score(X_test, Y_test)
    print('Test accuracy: ' + str(test_acc) +'\n')

    train_list.append(np.mean(train_acc))
    val_list.append(np.mean(val_acc))
    test_list.append(np.mean(test_acc))
    test_final.append(np.mean(test_acc))

In [24]:
def fit_plot(clf, X_train, Y_train, X_test, Y_test, key, split, trial):
    clf.fit(X_train, Y_train)
    clf_report(clf, key, X_test, Y_test, split, trial)

In [25]:
def average_acc_report():
    """
    A report for the average accuracy for the same split.
    """
    print('Average Accuracies')
    print('--------------------------------------------------------------------------------------------------')
    print('Mean training accuracy: ' + str(mean(train_list)))
    print('Mean validation accuracy: ' + str(mean(val_list)))
    print('Mean test accuracy: ' + str(mean(test_list)) +'\n')

In [26]:
def test_acc_report():
    """
    A report for the overall accuracy for the same dataset.
    """
    print('Test Accuracies of all Splits')
    print('--------------------------------------------------------------------------------------------------')
    print('Overall test accuracy: ' + str(mean(test_final)) +'\n')

### Algorithm 1: Support Vector Machine (Linear Kernel)

#### EEG Dataset

In [27]:
train_list, val_list, test_list, test_final = [], [], [], []

C_dict = {'C' : [0.0001, 0.001, 0.01, 1, 10, 100]}

clf_svm_eeg_8020_1 = GridSearchCV(svm.SVC(), C_dict, cv = 3)
fit_plot(clf_svm_eeg_8020_1, X_eeg_train_val_8020_1, Y_eeg_train_val_8020_1,\
         X_eeg_test_8020_1, Y_eeg_test_8020_1,'C', '80/20', '1')

clf_svm_eeg_8020_2 = GridSearchCV(svm.SVC(), C_dict, cv = 3)
fit_plot(clf_svm_eeg_8020_2, X_eeg_train_val_8020_2, Y_eeg_train_val_8020_2,\
         X_eeg_test_8020_2, Y_eeg_test_8020_2,'C', '80/20', '2')

clf_svm_eeg_8020_3 = GridSearchCV(svm.SVC(), C_dict, cv = 3)
fit_plot(clf_svm_eeg_8020_3, X_eeg_train_val_8020_3, Y_eeg_train_val_8020_3,\
         X_eeg_test_8020_3, Y_eeg_test_8020_3,'C', '80/20', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

clf_svm_eeg_5050_1 = GridSearchCV(svm.SVC(), C_dict, cv = 3)
fit_plot(clf_svm_eeg_5050_1, X_eeg_train_val_5050_1, Y_eeg_train_val_5050_1,\
         X_eeg_test_5050_1, Y_eeg_test_5050_1,'C', '50/50', '1')

clf_svm_eeg_5050_2 = GridSearchCV(svm.SVC(), C_dict, cv = 3)
fit_plot(clf_svm_eeg_5050_2, X_eeg_train_val_5050_2, Y_eeg_train_val_5050_2,\
         X_eeg_test_5050_2, Y_eeg_test_5050_2,'C', '50/50', '2')

clf_svm_eeg_5050_3 = GridSearchCV(svm.SVC(), C_dict, cv = 3)
fit_plot(clf_svm_eeg_5050_3, X_eeg_train_val_5050_3, Y_eeg_train_val_5050_3,\
         X_eeg_test_5050_3, Y_eeg_test_5050_3,'C', '50/50', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

clf_svm_eeg_2080_1 = GridSearchCV(svm.SVC(), C_dict, cv = 3)
fit_plot(clf_svm_eeg_2080_1, X_eeg_train_val_2080_1, Y_eeg_train_val_2080_1,\
         X_eeg_test_2080_1, Y_eeg_test_2080_1,'C', '20/80', '1')

clf_svm_eeg_2080_2 = GridSearchCV(svm.SVC(), C_dict, cv = 3)
fit_plot(clf_svm_eeg_2080_2, X_eeg_train_val_2080_2, Y_eeg_train_val_2080_2,\
         X_eeg_test_2080_2, Y_eeg_test_2080_2,'C', '20/80', '2')

clf_svm_eeg_2080_3 = GridSearchCV(svm.SVC(), C_dict, cv = 3)
fit_plot(clf_svm_eeg_2080_3, X_eeg_train_val_2080_3, Y_eeg_train_val_2080_3,\
         X_eeg_test_2080_3, Y_eeg_test_2080_3,'C', '20/80', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

test_acc_report()

80/20 Split, Trial 1
--------------------------------------------------------------------------------------------------
{'C': 0.0001}: training accuracy - 0.5554072080650317, validation accuracy - 0.5554072096128171
{'C': 0.001}: training accuracy - 0.5554072080650317, validation accuracy - 0.5554072096128171
{'C': 0.01}: training accuracy - 0.5554072080650317, validation accuracy - 0.5554072096128171
{'C': 1}: training accuracy - 0.5554906509174353, validation accuracy - 0.5554072096128171
{'C': 10}: training accuracy - 0.5554906509174353, validation accuracy - 0.5553237650200267
{'C': 100}: training accuracy - 0.5655457731226493, validation accuracy - 0.5657543391188251
--------------------------------------------------------------------------------------------------
Optimal hyperparameter: 100
Training accuracy: 0.5571247831921026
Validation accuracy: 0.5571178237650201
Test accuracy: 0.5477303070761015

80/20 Split, Trial 2
----------------------------------------------------------

Test accuracy: 0.5507343124165555

Average Accuracies
--------------------------------------------------------------------------------------------------
Mean training accuracy: 0.5532376899277366
Mean validation accuracy: 0.5530707610146862
Mean test accuracy: 0.5507343124165555

Test Accuracies of all Splits
--------------------------------------------------------------------------------------------------
Overall test accuracy: 0.5526257231864709



#### Bank Dataset

In [28]:
train_list, val_list, test_list, test_final = [], [], [], []

C_dict = {'C' : [0.0001, 0.001, 0.01, 1, 10, 100]}

clf_svm_bank_8020_1 = GridSearchCV(svm.SVC(), C_dict, cv = 3)
fit_plot(clf_svm_bank_8020_1, X_bank_train_val_8020_1, Y_bank_train_val_8020_1,\
         X_bank_test_8020_1, Y_bank_test_8020_1,'C', '80/20', '1')

clf_svm_bank_8020_2 = GridSearchCV(svm.SVC(), C_dict, cv = 3)
fit_plot(clf_svm_bank_8020_2, X_bank_train_val_8020_2, Y_bank_train_val_8020_2,\
         X_bank_test_8020_2, Y_bank_test_8020_2,'C', '80/20', '2')

clf_svm_bank_8020_3 = GridSearchCV(svm.SVC(), C_dict, cv = 3)
fit_plot(clf_svm_bank_8020_3, X_bank_train_val_8020_3, Y_bank_train_val_8020_3,\
         X_bank_test_8020_3, Y_bank_test_8020_3,'C', '80/20', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

clf_svm_bank_5050_1 = GridSearchCV(svm.SVC(), C_dict, cv = 3)
fit_plot(clf_svm_bank_5050_1, X_bank_train_val_5050_1, Y_bank_train_val_5050_1,\
         X_bank_test_5050_1, Y_bank_test_5050_1,'C', '50/50', '1')

clf_svm_bank_5050_2 = GridSearchCV(svm.SVC(), C_dict, cv = 3)
fit_plot(clf_svm_bank_5050_2, X_bank_train_val_5050_2, Y_bank_train_val_5050_2,\
         X_bank_test_5050_2, Y_bank_test_5050_2,'C', '50/50', '2')

clf_svm_bank_5050_3 = GridSearchCV(svm.SVC(), C_dict, cv = 3)
fit_plot(clf_svm_bank_5050_3, X_bank_train_val_5050_3, Y_bank_train_val_5050_3,\
         X_bank_test_5050_3, Y_bank_test_5050_3,'C', '50/50', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

clf_svm_bank_2080_1 = GridSearchCV(svm.SVC(), C_dict, cv = 3)
fit_plot(clf_svm_bank_2080_1, X_bank_train_val_2080_1, Y_bank_train_val_2080_1,\
         X_bank_test_2080_1, Y_bank_test_2080_1,'C', '20/80', '1')

clf_svm_bank_2080_2 = GridSearchCV(svm.SVC(), C_dict, cv = 3)
fit_plot(clf_svm_bank_2080_2, X_bank_train_val_2080_2, Y_bank_train_val_2080_2,\
         X_bank_test_2080_2, Y_bank_test_2080_2,'C', '20/80', '2')

clf_svm_bank_2080_3 = GridSearchCV(svm.SVC(), C_dict, cv = 3)
fit_plot(clf_svm_bank_2080_3, X_bank_train_val_2080_3, Y_bank_train_val_2080_3,\
         X_bank_test_2080_3, Y_bank_test_2080_3,'C', '20/80', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

test_acc_report()

80/20 Split, Trial 1
--------------------------------------------------------------------------------------------------
{'C': 0.0001}: training accuracy - 0.8799779342109186, validation accuracy - 0.8799778761061947
{'C': 0.001}: training accuracy - 0.8799779342109186, validation accuracy - 0.8799778761061947
{'C': 0.01}: training accuracy - 0.8799779342109186, validation accuracy - 0.8799778761061947
{'C': 1}: training accuracy - 0.8799779342109186, validation accuracy - 0.8799778761061947
{'C': 10}: training accuracy - 0.8845408733189745, validation accuracy - 0.8791482300884956
{'C': 100}: training accuracy - 0.9240880658945935, validation accuracy - 0.8445796460176991
--------------------------------------------------------------------------------------------------
Optimal hyperparameter: 0.0001
Training accuracy: 0.8880901126762071
Validation accuracy: 0.8739398967551623
Test accuracy: 0.9038674033149171

80/20 Split, Trial 2
-------------------------------------------------------

#### Chess Dataset

In [29]:
train_list, val_list, test_list, test_final = [], [], [], []

C_dict = {'C' : [0.0001, 0.001, 0.01, 1, 10, 100]}

clf_svm_chess_8020_1 = GridSearchCV(svm.SVC(), C_dict, cv = 3)
fit_plot(clf_svm_chess_8020_1, X_chess_train_val_8020_1, Y_chess_train_val_8020_1,\
         X_chess_test_8020_1, Y_chess_test_8020_1,'C', '80/20', '1')

clf_svm_chess_8020_2 = GridSearchCV(svm.SVC(), C_dict, cv = 3)
fit_plot(clf_svm_chess_8020_2, X_chess_train_val_8020_2, Y_chess_train_val_8020_2,\
         X_chess_test_8020_2, Y_chess_test_8020_2,'C', '80/20', '2')

clf_svm_chess_8020_3 = GridSearchCV(svm.SVC(), C_dict, cv = 3)
fit_plot(clf_svm_chess_8020_3, X_chess_train_val_8020_3, Y_chess_train_val_8020_3,\
         X_chess_test_8020_3, Y_chess_test_8020_3,'C', '80/20', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

clf_svm_chess_5050_1 = GridSearchCV(svm.SVC(), C_dict, cv = 3)
fit_plot(clf_svm_chess_5050_1, X_chess_train_val_5050_1, Y_chess_train_val_5050_1,\
         X_chess_test_5050_1, Y_chess_test_5050_1,'C', '50/50', '1')

clf_svm_chess_5050_2 = GridSearchCV(svm.SVC(), C_dict, cv = 3)
fit_plot(clf_svm_chess_5050_2, X_chess_train_val_5050_2, Y_chess_train_val_5050_2,\
         X_chess_test_5050_2, Y_chess_test_5050_2,'C', '50/50', '2')

clf_svm_chess_5050_3 = GridSearchCV(svm.SVC(), C_dict, cv = 3)
fit_plot(clf_svm_chess_5050_3, X_chess_train_val_5050_3, Y_chess_train_val_5050_3,\
         X_chess_test_5050_3, Y_chess_test_5050_3,'C', '50/50', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

clf_svm_chess_2080_1 = GridSearchCV(svm.SVC(), C_dict, cv = 3)
fit_plot(clf_svm_chess_2080_1, X_chess_train_val_2080_1, Y_chess_train_val_2080_1,\
         X_chess_test_2080_1, Y_chess_test_2080_1,'C', '20/80', '1')

clf_svm_chess_2080_2 = GridSearchCV(svm.SVC(), C_dict, cv = 3)
fit_plot(clf_svm_chess_2080_2, X_chess_train_val_2080_2, Y_chess_train_val_2080_2,\
         X_chess_test_2080_2, Y_chess_test_2080_2,'C', '20/80', '2')

clf_svm_chess_2080_3 = GridSearchCV(svm.SVC(), C_dict, cv = 3)
fit_plot(clf_svm_chess_2080_3, X_chess_train_val_2080_3, Y_chess_train_val_2080_3,\
         X_chess_test_2080_3, Y_chess_test_2080_3,'C', '20/80', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

test_acc_report()

80/20 Split, Trial 1
--------------------------------------------------------------------------------------------------
{'C': 0.0001}: training accuracy - 0.5172144014709504, validation accuracy - 0.5172143974960877
{'C': 0.001}: training accuracy - 0.5172144014709504, validation accuracy - 0.5172143974960877
{'C': 0.01}: training accuracy - 0.5172144014709504, validation accuracy - 0.5172143974960877
{'C': 1}: training accuracy - 0.9338807035281746, validation accuracy - 0.931924882629108
{'C': 10}: training accuracy - 0.974569749270267, validation accuracy - 0.9647887323943662
{'C': 100}: training accuracy - 0.9990220236957494, validation accuracy - 0.986697965571205
--------------------------------------------------------------------------------------------------
Optimal hyperparameter: 100
Training accuracy: 0.7431859468178403
Validation accuracy: 0.7391757955138237
Test accuracy: 0.9921752738654147

80/20 Split, Trial 2
-------------------------------------------------------------

#### Adult Dataset

In [30]:
train_list, val_list, test_list, test_final = [], [], [], []

C_dict = {'C' : [0.0001, 0.001, 0.01, 1, 10, 100]}

clf_svm_adult_8020_1 = GridSearchCV(svm.SVC(), C_dict, cv = 3)
fit_plot(clf_svm_adult_8020_1, X_adult_train_val_8020_1, Y_adult_train_val_8020_1,\
         X_adult_test_8020_1, Y_adult_test_8020_1,'C', '80/20', '1')

clf_svm_adult_8020_2 = GridSearchCV(svm.SVC(), C_dict, cv = 3)
fit_plot(clf_svm_adult_8020_2, X_adult_train_val_8020_2, Y_adult_train_val_8020_2,\
         X_adult_test_8020_2, Y_adult_test_8020_2,'C', '80/20', '2')

clf_svm_adult_8020_3 = GridSearchCV(svm.SVC(), C_dict, cv = 3)
fit_plot(clf_svm_adult_8020_3, X_adult_train_val_8020_3, Y_adult_train_val_8020_3,\
         X_adult_test_8020_3, Y_adult_test_8020_3,'C', '80/20', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

clf_svm_adult_5050_1 = GridSearchCV(svm.SVC(), C_dict, cv = 3)
fit_plot(clf_svm_adult_5050_1, X_adult_train_val_5050_1, Y_adult_train_val_5050_1,\
         X_adult_test_5050_1, Y_adult_test_5050_1,'C', '50/50', '1')

clf_svm_adult_5050_2 = GridSearchCV(svm.SVC(), C_dict, cv = 3)
fit_plot(clf_svm_adult_5050_2, X_adult_train_val_5050_2, Y_adult_train_val_5050_2,\
         X_adult_test_5050_2, Y_adult_test_5050_2,'C', '50/50', '2')

clf_svm_adult_5050_3 = GridSearchCV(svm.SVC(), C_dict, cv = 3)
fit_plot(clf_svm_adult_5050_3, X_adult_train_val_5050_3, Y_adult_train_val_5050_3,\
         X_adult_test_5050_3, Y_adult_test_5050_3,'C', '50/50', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

clf_svm_adult_2080_1 = GridSearchCV(svm.SVC(), C_dict, cv = 3)
fit_plot(clf_svm_adult_2080_1, X_adult_train_val_2080_1, Y_adult_train_val_2080_1,\
         X_adult_test_2080_1, Y_adult_test_2080_1,'C', '20/80', '1')

clf_svm_adult_2080_2 = GridSearchCV(svm.SVC(), C_dict, cv = 3)
fit_plot(clf_svm_adult_2080_2, X_adult_train_val_2080_2, Y_adult_train_val_2080_2,\
         X_adult_test_2080_2, Y_adult_test_2080_2,'C', '20/80', '2')

clf_svm_adult_2080_3 = GridSearchCV(svm.SVC(), C_dict, cv = 3)
fit_plot(clf_svm_adult_2080_3, X_adult_train_val_2080_3, Y_adult_train_val_2080_3,\
         X_adult_test_2080_3, Y_adult_test_2080_3,'C', '20/80', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

test_acc_report()

80/20 Split, Trial 1
--------------------------------------------------------------------------------------------------
{'C': 0.0001}: training accuracy - 0.7574668431285149, validation accuracy - 0.7574668141592921
{'C': 0.001}: training accuracy - 0.7574668431285149, validation accuracy - 0.7574668141592921
{'C': 0.01}: training accuracy - 0.7574668431285149, validation accuracy - 0.7574668141592921
{'C': 1}: training accuracy - 0.8304758675801837, validation accuracy - 0.8199668141592921
{'C': 10}: training accuracy - 0.8534291023220567, validation accuracy - 0.8227323008849557
{'C': 100}: training accuracy - 0.9050051831365348, validation accuracy - 0.8213495575221239
--------------------------------------------------------------------------------------------------
Optimal hyperparameter: 10
Training accuracy: 0.81021844707072
Validation accuracy: 0.789408185840708
Test accuracy: 0.8367052023121387

80/20 Split, Trial 2
--------------------------------------------------------------

Test accuracy: 0.821533203125

Average Accuracies
--------------------------------------------------------------------------------------------------
Mean training accuracy: 0.8120383072945839
Mean validation accuracy: 0.7745206489675516
Mean test accuracy: 0.821533203125

Test Accuracies of all Splits
--------------------------------------------------------------------------------------------------
Overall test accuracy: 0.8281719258999708



### Algorithm 2: Decision Tree

#### EEG Dataset

In [31]:
train_list, val_list, test_list, test_final = [], [], [], []

depth_dict = {'max_depth' : [1,2,3,4,5]}

clf_tree_eeg_8020_1 = GridSearchCV(tree.DecisionTreeClassifier(criterion = 'gini'), depth_dict, cv = 3)
fit_plot(clf_tree_eeg_8020_1, X_eeg_train_val_8020_1, Y_eeg_train_val_8020_1,\
         X_eeg_test_8020_1, Y_eeg_test_8020_1,'max_depth', '80/20', '1')

clf_tree_eeg_8020_2 = GridSearchCV(tree.DecisionTreeClassifier(criterion = 'gini'), depth_dict, cv = 3)
fit_plot(clf_tree_eeg_8020_2, X_eeg_train_val_8020_2, Y_eeg_train_val_8020_2,\
         X_eeg_test_8020_2, Y_eeg_test_8020_2,'max_depth', '80/20', '2')

clf_tree_eeg_8020_3 = GridSearchCV(tree.DecisionTreeClassifier(criterion = 'gini'), depth_dict, cv = 3)
fit_plot(clf_tree_eeg_8020_3, X_eeg_train_val_8020_3, Y_eeg_train_val_8020_3,\
         X_eeg_test_8020_3, Y_eeg_test_8020_3,'max_depth', '80/20', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

clf_tree_eeg_5050_1 = GridSearchCV(tree.DecisionTreeClassifier(criterion = 'gini'), depth_dict, cv = 3)
fit_plot(clf_tree_eeg_5050_1, X_eeg_train_val_5050_1, Y_eeg_train_val_5050_1,\
         X_eeg_test_5050_1, Y_eeg_test_5050_1,'max_depth', '50/50', '1')

clf_tree_eeg_5050_2 = GridSearchCV(tree.DecisionTreeClassifier(criterion = 'gini'), depth_dict, cv = 3)
fit_plot(clf_tree_eeg_5050_2, X_eeg_train_val_5050_2, Y_eeg_train_val_5050_2,\
         X_eeg_test_5050_2, Y_eeg_test_5050_2,'max_depth', '50/50', '2')

clf_tree_eeg_5050_3 = GridSearchCV(tree.DecisionTreeClassifier(criterion = 'gini'), depth_dict, cv = 3)
fit_plot(clf_tree_eeg_5050_3, X_eeg_train_val_5050_3, Y_eeg_train_val_5050_3,\
         X_eeg_test_5050_3, Y_eeg_test_5050_3,'max_depth', '50/50', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

clf_tree_eeg_2080_1 = GridSearchCV(tree.DecisionTreeClassifier(criterion = 'gini'), depth_dict, cv = 3)
fit_plot(clf_tree_eeg_2080_1, X_eeg_train_val_2080_1, Y_eeg_train_val_2080_1,\
         X_eeg_test_2080_1, Y_eeg_test_2080_1,'max_depth', '20/80', '1')

clf_tree_eeg_2080_2 = GridSearchCV(tree.DecisionTreeClassifier(criterion = 'gini'), depth_dict, cv = 3)
fit_plot(clf_tree_eeg_2080_2, X_eeg_train_val_2080_2, Y_eeg_train_val_2080_2,\
         X_eeg_test_2080_2, Y_eeg_test_2080_2,'max_depth', '20/80', '2')

clf_tree_eeg_2080_3 = GridSearchCV(tree.DecisionTreeClassifier(criterion = 'gini'), depth_dict, cv = 3)
fit_plot(clf_tree_eeg_2080_3, X_eeg_train_val_2080_3, Y_eeg_train_val_2080_3,\
         X_eeg_test_2080_3, Y_eeg_test_2080_3,'max_depth', '20/80', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

test_acc_report()

80/20 Split, Trial 1
--------------------------------------------------------------------------------------------------
{'max_depth': 1}: training accuracy - 0.6003838193661466, validation accuracy - 0.5962950600801068
{'max_depth': 2}: training accuracy - 0.6187416385473289, validation accuracy - 0.6143190921228304
{'max_depth': 3}: training accuracy - 0.660255405416908, validation accuracy - 0.6532877169559412
{'max_depth': 4}: training accuracy - 0.6997247310170382, validation accuracy - 0.6871662216288384
{'max_depth': 5}: training accuracy - 0.7230058299289599, validation accuracy - 0.7054405874499332
--------------------------------------------------------------------------------------------------
Optimal hyperparameter: 5
Training accuracy: 0.6604222848552763
Validation accuracy: 0.65130173564753
Test accuracy: 0.7239652870493992

80/20 Split, Trial 2
--------------------------------------------------------------------------------------------------
{'max_depth': 1}: training acc

#### Bank Dataset

In [32]:
train_list, val_list, test_list, test_final = [], [], [], []

depth_dict = {'max_depth' : [1,2,3,4,5]}

clf_tree_bank_8020_1 = GridSearchCV(tree.DecisionTreeClassifier(criterion = 'gini'), depth_dict, cv = 3)
fit_plot(clf_tree_bank_8020_1, X_bank_train_val_8020_1, Y_bank_train_val_8020_1,\
         X_bank_test_8020_1, Y_bank_test_8020_1,'max_depth', '80/20', '1')

clf_tree_bank_8020_2 = GridSearchCV(tree.DecisionTreeClassifier(criterion = 'gini'), depth_dict, cv = 3)
fit_plot(clf_tree_bank_8020_2, X_bank_train_val_8020_2, Y_bank_train_val_8020_2,\
         X_bank_test_8020_2, Y_bank_test_8020_2,'max_depth', '80/20', '2')

clf_tree_bank_8020_3 = GridSearchCV(tree.DecisionTreeClassifier(criterion = 'gini'), depth_dict, cv = 3)
fit_plot(clf_tree_bank_8020_3, X_bank_train_val_8020_3, Y_bank_train_val_8020_3,\
         X_bank_test_8020_3, Y_bank_test_8020_3,'max_depth', '80/20', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

clf_tree_bank_5050_1 = GridSearchCV(tree.DecisionTreeClassifier(criterion = 'gini'), depth_dict, cv = 3)
fit_plot(clf_tree_bank_5050_1, X_bank_train_val_5050_1, Y_bank_train_val_5050_1,\
         X_bank_test_5050_1, Y_bank_test_5050_1,'max_depth', '50/50', '1')

clf_tree_bank_5050_2 = GridSearchCV(tree.DecisionTreeClassifier(criterion = 'gini'), depth_dict, cv = 3)
fit_plot(clf_tree_bank_5050_2, X_bank_train_val_5050_2, Y_bank_train_val_5050_2,\
         X_bank_test_5050_2, Y_bank_test_5050_2,'max_depth', '50/50', '2')

clf_tree_bank_5050_3 = GridSearchCV(tree.DecisionTreeClassifier(criterion = 'gini'), depth_dict, cv = 3)
fit_plot(clf_tree_bank_5050_3, X_bank_train_val_5050_3, Y_bank_train_val_5050_3,\
         X_bank_test_5050_3, Y_bank_test_5050_3,'max_depth', '50/50', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

clf_tree_bank_2080_1 = GridSearchCV(tree.DecisionTreeClassifier(criterion = 'gini'), depth_dict, cv = 3)
fit_plot(clf_tree_bank_2080_1, X_bank_train_val_2080_1, Y_bank_train_val_2080_1,\
         X_bank_test_2080_1, Y_bank_test_2080_1,'max_depth', '20/80', '1')

clf_tree_bank_2080_2 = GridSearchCV(tree.DecisionTreeClassifier(criterion = 'gini'), depth_dict, cv = 3)
fit_plot(clf_tree_bank_2080_2, X_bank_train_val_2080_2, Y_bank_train_val_2080_2,\
         X_bank_test_2080_2, Y_bank_test_2080_2,'max_depth', '20/80', '2')

clf_tree_bank_2080_3 = GridSearchCV(tree.DecisionTreeClassifier(criterion = 'gini'), depth_dict, cv = 3)
fit_plot(clf_tree_bank_2080_3, X_bank_train_val_2080_3, Y_bank_train_val_2080_3,\
         X_bank_test_2080_3, Y_bank_test_2080_3,'max_depth', '20/80', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

test_acc_report()

80/20 Split, Trial 1
--------------------------------------------------------------------------------------------------
{'max_depth': 1}: training accuracy - 0.8799779342109186, validation accuracy - 0.8799778761061947
{'max_depth': 2}: training accuracy - 0.8805310698696468, validation accuracy - 0.8780420353982301
{'max_depth': 3}: training accuracy - 0.8820522789464388, validation accuracy - 0.8777654867256637
{'max_depth': 4}: training accuracy - 0.8838502278831752, validation accuracy - 0.8766592920353983
{'max_depth': 5}: training accuracy - 0.8853708635247002, validation accuracy - 0.875
--------------------------------------------------------------------------------------------------
Optimal hyperparameter: 1
Training accuracy: 0.8823564748869759
Validation accuracy: 0.8774889380530974
Test accuracy: 0.9038674033149171

80/20 Split, Trial 2
--------------------------------------------------------------------------------------------------
{'max_depth': 1}: training accuracy - 0.

#### Chess Dataset

In [33]:
train_list, val_list, test_list, test_final = [], [], [], []

depth_dict = {'max_depth' : [1,2,3,4,5]}

clf_tree_chess_8020_1 = GridSearchCV(tree.DecisionTreeClassifier(criterion = 'gini'), depth_dict, cv = 3)
fit_plot(clf_tree_chess_8020_1, X_chess_train_val_8020_1, Y_chess_train_val_8020_1,\
         X_chess_test_8020_1, Y_chess_test_8020_1,'max_depth', '80/20', '1')

clf_tree_chess_8020_2 = GridSearchCV(tree.DecisionTreeClassifier(criterion = 'gini'), depth_dict, cv = 3)
fit_plot(clf_tree_chess_8020_2, X_chess_train_val_8020_2, Y_chess_train_val_8020_2,\
         X_chess_test_8020_2, Y_chess_test_8020_2,'max_depth', '80/20', '2')

clf_tree_chess_8020_3 = GridSearchCV(tree.DecisionTreeClassifier(criterion = 'gini'), depth_dict, cv = 3)
fit_plot(clf_tree_chess_8020_3, X_chess_train_val_8020_3, Y_chess_train_val_8020_3,\
         X_chess_test_8020_3, Y_chess_test_8020_3,'max_depth', '80/20', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

clf_tree_chess_5050_1 = GridSearchCV(tree.DecisionTreeClassifier(criterion = 'gini'), depth_dict, cv = 3)
fit_plot(clf_tree_chess_5050_1, X_chess_train_val_5050_1, Y_chess_train_val_5050_1,\
         X_chess_test_5050_1, Y_chess_test_5050_1,'max_depth', '50/50', '1')

clf_tree_chess_5050_2 = GridSearchCV(tree.DecisionTreeClassifier(criterion = 'gini'), depth_dict, cv = 3)
fit_plot(clf_tree_chess_5050_2, X_chess_train_val_5050_2, Y_chess_train_val_5050_2,\
         X_chess_test_5050_2, Y_chess_test_5050_2,'max_depth', '50/50', '2')

clf_tree_chess_5050_3 = GridSearchCV(tree.DecisionTreeClassifier(criterion = 'gini'), depth_dict, cv = 3)
fit_plot(clf_tree_chess_5050_3, X_chess_train_val_5050_3, Y_chess_train_val_5050_3,\
         X_chess_test_5050_3, Y_chess_test_5050_3,'max_depth', '50/50', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

clf_tree_chess_2080_1 = GridSearchCV(tree.DecisionTreeClassifier(criterion = 'gini'), depth_dict, cv = 3)
fit_plot(clf_tree_chess_2080_1, X_chess_train_val_2080_1, Y_chess_train_val_2080_1,\
         X_chess_test_2080_1, Y_chess_test_2080_1,'max_depth', '20/80', '1')

clf_tree_chess_2080_2 = GridSearchCV(tree.DecisionTreeClassifier(criterion = 'gini'), depth_dict, cv = 3)
fit_plot(clf_tree_chess_2080_2, X_chess_train_val_2080_2, Y_chess_train_val_2080_2,\
         X_chess_test_2080_2, Y_chess_test_2080_2,'max_depth', '20/80', '2')

clf_tree_chess_2080_3 = GridSearchCV(tree.DecisionTreeClassifier(criterion = 'gini'), depth_dict, cv = 3)
fit_plot(clf_tree_chess_2080_3, X_chess_train_val_2080_3, Y_chess_train_val_2080_3,\
         X_chess_test_2080_3, Y_chess_test_2080_3,'max_depth', '20/80', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

test_acc_report()

80/20 Split, Trial 1
--------------------------------------------------------------------------------------------------
{'max_depth': 1}: training accuracy - 0.6611904291600544, validation accuracy - 0.661189358372457
{'max_depth': 2}: training accuracy - 0.7730862192029219, validation accuracy - 0.7609546165884195
{'max_depth': 3}: training accuracy - 0.9061036088878222, validation accuracy - 0.9061032863849765
{'max_depth': 4}: training accuracy - 0.9424897412228813, validation accuracy - 0.9424882629107981
{'max_depth': 5}: training accuracy - 0.9424897412228813, validation accuracy - 0.9424882629107981
--------------------------------------------------------------------------------------------------
Optimal hyperparameter: 4
Training accuracy: 0.8450719479393122
Validation accuracy: 0.8426447574334899
Test accuracy: 0.9342723004694836

80/20 Split, Trial 2
--------------------------------------------------------------------------------------------------
{'max_depth': 1}: training a

#### Adult Dataset

In [34]:
train_list, val_list, test_list, test_final = [], [], [], []

depth_dict = {'max_depth' : [1,2,3,4,5]}

clf_tree_adult_8020_1 = GridSearchCV(tree.DecisionTreeClassifier(criterion = 'gini'), depth_dict, cv = 3)
fit_plot(clf_tree_adult_8020_1, X_adult_train_val_8020_1, Y_adult_train_val_8020_1,\
         X_adult_test_8020_1, Y_adult_test_8020_1,'max_depth', '80/20', '1')

clf_tree_adult_8020_2 = GridSearchCV(tree.DecisionTreeClassifier(criterion = 'gini'), depth_dict, cv = 3)
fit_plot(clf_tree_adult_8020_2, X_adult_train_val_8020_2, Y_adult_train_val_8020_2,\
         X_adult_test_8020_2, Y_adult_test_8020_2,'max_depth', '80/20', '2')

clf_tree_adult_8020_3 = GridSearchCV(tree.DecisionTreeClassifier(criterion = 'gini'), depth_dict, cv = 3)
fit_plot(clf_tree_adult_8020_3, X_adult_train_val_8020_3, Y_adult_train_val_8020_3,\
         X_adult_test_8020_3, Y_adult_test_8020_3,'max_depth', '80/20', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

clf_tree_adult_5050_1 = GridSearchCV(tree.DecisionTreeClassifier(criterion = 'gini'), depth_dict, cv = 3)
fit_plot(clf_tree_adult_5050_1, X_adult_train_val_5050_1, Y_adult_train_val_5050_1,\
         X_adult_test_5050_1, Y_adult_test_5050_1,'max_depth', '50/50', '1')

clf_tree_adult_5050_2 = GridSearchCV(tree.DecisionTreeClassifier(criterion = 'gini'), depth_dict, cv = 3)
fit_plot(clf_tree_adult_5050_2, X_adult_train_val_5050_2, Y_adult_train_val_5050_2,\
         X_adult_test_5050_2, Y_adult_test_5050_2,'max_depth', '50/50', '2')

clf_tree_adult_5050_3 = GridSearchCV(tree.DecisionTreeClassifier(criterion = 'gini'), depth_dict, cv = 3)
fit_plot(clf_tree_adult_5050_3, X_adult_train_val_5050_3, Y_adult_train_val_5050_3,\
         X_adult_test_5050_3, Y_adult_test_5050_3,'max_depth', '50/50', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

clf_tree_adult_2080_1 = GridSearchCV(tree.DecisionTreeClassifier(criterion = 'gini'), depth_dict, cv = 3)
fit_plot(clf_tree_adult_2080_1, X_adult_train_val_2080_1, Y_adult_train_val_2080_1,\
         X_adult_test_2080_1, Y_adult_test_2080_1,'max_depth', '20/80', '1')

clf_tree_adult_2080_2 = GridSearchCV(tree.DecisionTreeClassifier(criterion = 'gini'), depth_dict, cv = 3)
fit_plot(clf_tree_adult_2080_2, X_adult_train_val_2080_2, Y_adult_train_val_2080_2,\
         X_adult_test_2080_2, Y_adult_test_2080_2,'max_depth', '20/80', '2')

clf_tree_adult_2080_3 = GridSearchCV(tree.DecisionTreeClassifier(criterion = 'gini'), depth_dict, cv = 3)
fit_plot(clf_tree_adult_2080_3, X_adult_train_val_2080_3, Y_adult_train_val_2080_3,\
         X_adult_test_2080_3, Y_adult_test_2080_3,'max_depth', '20/80', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

test_acc_report()

80/20 Split, Trial 1
--------------------------------------------------------------------------------------------------
{'max_depth': 1}: training accuracy - 0.7574668431285149, validation accuracy - 0.7574668141592921
{'max_depth': 2}: training accuracy - 0.8098737747059496, validation accuracy - 0.8003318584070797
{'max_depth': 3}: training accuracy - 0.8357287627649438, validation accuracy - 0.8230088495575221
{'max_depth': 4}: training accuracy - 0.8454086933275508, validation accuracy - 0.8324115044247787
{'max_depth': 5}: training accuracy - 0.849834122420694, validation accuracy - 0.8354535398230089
--------------------------------------------------------------------------------------------------
Optimal hyperparameter: 5
Training accuracy: 0.8196624392695306
Validation accuracy: 0.8097345132743363
Test accuracy: 0.8359826589595376

80/20 Split, Trial 2
--------------------------------------------------------------------------------------------------
{'max_depth': 1}: training a

### Algorithm 3: K-Nearest Neighbors

#### EEG Dataset

In [35]:
train_list, val_list, test_list, test_final = [], [], [], []

neighbors_dict = {'n_neighbors' : [1,3,5,7,9]}

clf_knn_eeg_8020_1 = GridSearchCV(KNeighborsClassifier(), neighbors_dict, cv = 3)
fit_plot(clf_knn_eeg_8020_1, X_eeg_train_val_8020_1, Y_eeg_train_val_8020_1,\
         X_eeg_test_8020_1, Y_eeg_test_8020_1,'n_neighbors', '80/20', '1')

clf_knn_eeg_8020_2 = GridSearchCV(KNeighborsClassifier(), neighbors_dict, cv = 3)
fit_plot(clf_knn_eeg_8020_2, X_eeg_train_val_8020_2, Y_eeg_train_val_8020_2,\
         X_eeg_test_8020_2, Y_eeg_test_8020_2,'n_neighbors', '80/20', '2')

clf_knn_eeg_8020_3 = GridSearchCV(KNeighborsClassifier(), neighbors_dict, cv = 3)
fit_plot(clf_knn_eeg_8020_3, X_eeg_train_val_8020_3, Y_eeg_train_val_8020_3,\
         X_eeg_test_8020_3, Y_eeg_test_8020_3,'n_neighbors', '80/20', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

clf_knn_eeg_5050_1 = GridSearchCV(KNeighborsClassifier(), neighbors_dict, cv = 3)
fit_plot(clf_knn_eeg_5050_1, X_eeg_train_val_5050_1, Y_eeg_train_val_5050_1,\
         X_eeg_test_5050_1, Y_eeg_test_5050_1,'n_neighbors', '50/50', '1')

clf_knn_eeg_5050_2 = GridSearchCV(KNeighborsClassifier(), neighbors_dict, cv = 3)
fit_plot(clf_knn_eeg_5050_2, X_eeg_train_val_5050_2, Y_eeg_train_val_5050_2,\
         X_eeg_test_5050_2, Y_eeg_test_5050_2,'n_neighbors', '50/50', '2')

clf_knn_eeg_5050_3 = GridSearchCV(KNeighborsClassifier(), neighbors_dict, cv = 3)
fit_plot(clf_knn_eeg_5050_3, X_eeg_train_val_5050_3, Y_eeg_train_val_5050_3,\
         X_eeg_test_5050_3, Y_eeg_test_5050_3,'n_neighbors', '50/50', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

clf_knn_eeg_2080_1 = GridSearchCV(KNeighborsClassifier(), neighbors_dict, cv = 3)
fit_plot(clf_knn_eeg_2080_1, X_eeg_train_val_2080_1, Y_eeg_train_val_2080_1,\
         X_eeg_test_2080_1, Y_eeg_test_2080_1,'n_neighbors', '20/80', '1')

clf_knn_eeg_2080_2 = GridSearchCV(KNeighborsClassifier(), neighbors_dict, cv = 3)
fit_plot(clf_knn_eeg_2080_2, X_eeg_train_val_2080_2, Y_eeg_train_val_2080_2,\
         X_eeg_test_2080_2, Y_eeg_test_2080_2,'n_neighbors', '20/80', '2')

clf_knn_eeg_2080_3 = GridSearchCV(KNeighborsClassifier(), neighbors_dict, cv = 3)
fit_plot(clf_knn_eeg_2080_3, X_eeg_train_val_2080_3, Y_eeg_train_val_2080_3,\
         X_eeg_test_2080_3, Y_eeg_test_2080_3,'n_neighbors', '20/80', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

test_acc_report()

80/20 Split, Trial 1
--------------------------------------------------------------------------------------------------
{'n_neighbors': 1}: training accuracy - 1.0, validation accuracy - 0.8130006675567423
{'n_neighbors': 3}: training accuracy - 0.914844817130438, validation accuracy - 0.8166722296395194
{'n_neighbors': 5}: training accuracy - 0.885555806735304, validation accuracy - 0.8144192256341789
{'n_neighbors': 7}: training accuracy - 0.8646529413905739, validation accuracy - 0.8099966622162884
{'n_neighbors': 9}: training accuracy - 0.8558077013799691, validation accuracy - 0.8050734312416555
--------------------------------------------------------------------------------------------------
Optimal hyperparameter: 3
Training accuracy: 0.904172253327257
Validation accuracy: 0.8118324432576769
Test accuracy: 0.8264352469959947

80/20 Split, Trial 2
--------------------------------------------------------------------------------------------------
{'n_neighbors': 1}: training accura

#### Bank Dataset

In [36]:
train_list, val_list, test_list, test_final = [], [], [], []

neighbors_dict = {'n_neighbors' : [1,3,5,7,9]}

clf_knn_bank_8020_1 = GridSearchCV(KNeighborsClassifier(), neighbors_dict, cv = 3)
fit_plot(clf_knn_bank_8020_1, X_bank_train_val_8020_1, Y_bank_train_val_8020_1,\
         X_bank_test_8020_1, Y_bank_test_8020_1,'n_neighbors', '80/20', '1')

clf_knn_bank_8020_2 = GridSearchCV(KNeighborsClassifier(), neighbors_dict, cv = 3)
fit_plot(clf_knn_bank_8020_2, X_bank_train_val_8020_2, Y_bank_train_val_8020_2,\
         X_bank_test_8020_2, Y_bank_test_8020_2,'n_neighbors', '80/20', '2')

clf_knn_bank_8020_3 = GridSearchCV(KNeighborsClassifier(), neighbors_dict, cv = 3)
fit_plot(clf_knn_bank_8020_3, X_bank_train_val_8020_3, Y_bank_train_val_8020_3,\
         X_bank_test_8020_3, Y_bank_test_8020_3,'n_neighbors', '80/20', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

clf_knn_bank_5050_1 = GridSearchCV(KNeighborsClassifier(), neighbors_dict, cv = 3)
fit_plot(clf_knn_bank_5050_1, X_bank_train_val_5050_1, Y_bank_train_val_5050_1,\
         X_bank_test_5050_1, Y_bank_test_5050_1,'n_neighbors', '50/50', '1')

clf_knn_bank_5050_2 = GridSearchCV(KNeighborsClassifier(), neighbors_dict, cv = 3)
fit_plot(clf_knn_bank_5050_2, X_bank_train_val_5050_2, Y_bank_train_val_5050_2,\
         X_bank_test_5050_2, Y_bank_test_5050_2,'n_neighbors', '50/50', '2')

clf_knn_bank_5050_3 = GridSearchCV(KNeighborsClassifier(), neighbors_dict, cv = 3)
fit_plot(clf_knn_bank_5050_3, X_bank_train_val_5050_3, Y_bank_train_val_5050_3,\
         X_bank_test_5050_3, Y_bank_test_5050_3,'n_neighbors', '50/50', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

clf_knn_bank_2080_1 = GridSearchCV(KNeighborsClassifier(), neighbors_dict, cv = 3)
fit_plot(clf_knn_bank_2080_1, X_bank_train_val_2080_1, Y_bank_train_val_2080_1,\
         X_bank_test_2080_1, Y_bank_test_2080_1,'n_neighbors', '20/80', '1')

clf_knn_bank_2080_2 = GridSearchCV(KNeighborsClassifier(), neighbors_dict, cv = 3)
fit_plot(clf_knn_bank_2080_2, X_bank_train_val_2080_2, Y_bank_train_val_2080_2,\
         X_bank_test_2080_2, Y_bank_test_2080_2,'n_neighbors', '20/80', '2')

clf_knn_bank_2080_3 = GridSearchCV(KNeighborsClassifier(), neighbors_dict, cv = 3)
fit_plot(clf_knn_bank_2080_3, X_bank_train_val_2080_3, Y_bank_train_val_2080_3,\
         X_bank_test_2080_3, Y_bank_test_2080_3,'n_neighbors', '20/80', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

test_acc_report()

80/20 Split, Trial 1
--------------------------------------------------------------------------------------------------
{'n_neighbors': 1}: training accuracy - 1.0, validation accuracy - 0.7806969026548672
{'n_neighbors': 3}: training accuracy - 0.8947737109748629, validation accuracy - 0.8459623893805309
{'n_neighbors': 5}: training accuracy - 0.8814987992265505, validation accuracy - 0.8680862831858407
{'n_neighbors': 7}: training accuracy - 0.8814986845394971, validation accuracy - 0.8744469026548672
{'n_neighbors': 9}: training accuracy - 0.8799778195238653, validation accuracy - 0.8788716814159292
--------------------------------------------------------------------------------------------------
Optimal hyperparameter: 9
Training accuracy: 0.9075498028529552
Validation accuracy: 0.8496128318584071
Test accuracy: 0.9038674033149171

80/20 Split, Trial 2
--------------------------------------------------------------------------------------------------
{'n_neighbors': 1}: training acc

#### Chess Dataset

In [37]:
train_list, val_list, test_list, test_final = [], [], [], []

neighbors_dict = {'n_neighbors' : [1,3,5,7,9]}

clf_knn_chess_8020_1 = GridSearchCV(KNeighborsClassifier(), neighbors_dict, cv = 3)
fit_plot(clf_knn_chess_8020_1, X_chess_train_val_8020_1, Y_chess_train_val_8020_1,\
         X_chess_test_8020_1, Y_chess_test_8020_1,'n_neighbors', '80/20', '1')

clf_knn_chess_8020_2 = GridSearchCV(KNeighborsClassifier(), neighbors_dict, cv = 3)
fit_plot(clf_knn_chess_8020_2, X_chess_train_val_8020_2, Y_chess_train_val_8020_2,\
         X_chess_test_8020_2, Y_chess_test_8020_2,'n_neighbors', '80/20', '2')

clf_knn_chess_8020_3 = GridSearchCV(KNeighborsClassifier(), neighbors_dict, cv = 3)
fit_plot(clf_knn_chess_8020_3, X_chess_train_val_8020_3, Y_chess_train_val_8020_3,\
         X_chess_test_8020_3, Y_chess_test_8020_3,'n_neighbors', '80/20', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

clf_knn_chess_5050_1 = GridSearchCV(KNeighborsClassifier(), neighbors_dict, cv = 3)
fit_plot(clf_knn_chess_5050_1, X_chess_train_val_5050_1, Y_chess_train_val_5050_1,\
         X_chess_test_5050_1, Y_chess_test_5050_1,'n_neighbors', '50/50', '1')

clf_knn_chess_5050_2 = GridSearchCV(KNeighborsClassifier(), neighbors_dict, cv = 3)
fit_plot(clf_knn_chess_5050_2, X_chess_train_val_5050_2, Y_chess_train_val_5050_2,\
         X_chess_test_5050_2, Y_chess_test_5050_2,'n_neighbors', '50/50', '2')

clf_knn_chess_5050_3 = GridSearchCV(KNeighborsClassifier(), neighbors_dict, cv = 3)
fit_plot(clf_knn_chess_5050_3, X_chess_train_val_5050_3, Y_chess_train_val_5050_3,\
         X_chess_test_5050_3, Y_chess_test_5050_3,'n_neighbors', '50/50', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

clf_knn_chess_2080_1 = GridSearchCV(KNeighborsClassifier(), neighbors_dict, cv = 3)
fit_plot(clf_knn_chess_2080_1, X_chess_train_val_2080_1, Y_chess_train_val_2080_1,\
         X_chess_test_2080_1, Y_chess_test_2080_1,'n_neighbors', '20/80', '1')

clf_knn_chess_2080_2 = GridSearchCV(KNeighborsClassifier(), neighbors_dict, cv = 3)
fit_plot(clf_knn_chess_2080_2, X_chess_train_val_2080_2, Y_chess_train_val_2080_2,\
         X_chess_test_2080_2, Y_chess_test_2080_2,'n_neighbors', '20/80', '2')

clf_knn_chess_2080_3 = GridSearchCV(KNeighborsClassifier(), neighbors_dict, cv = 3)
fit_plot(clf_knn_chess_2080_3, X_chess_train_val_2080_3, Y_chess_train_val_2080_3,\
         X_chess_test_2080_3, Y_chess_test_2080_3,'n_neighbors', '20/80', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

test_acc_report()

80/20 Split, Trial 1
--------------------------------------------------------------------------------------------------
{'n_neighbors': 1}: training accuracy - 1.0, validation accuracy - 0.8928012519561815
{'n_neighbors': 3}: training accuracy - 0.9747647933591347, validation accuracy - 0.9311424100156495
{'n_neighbors': 5}: training accuracy - 0.9671361426892396, validation accuracy - 0.9366197183098591
{'n_neighbors': 7}: training accuracy - 0.9591149928515664, validation accuracy - 0.937793427230047
{'n_neighbors': 9}: training accuracy - 0.9567672298718389, validation accuracy - 0.9330985915492958
--------------------------------------------------------------------------------------------------
Optimal hyperparameter: 7
Training accuracy: 0.9715566317543559
Validation accuracy: 0.9262910798122066
Test accuracy: 0.9374021909233177

80/20 Split, Trial 2
--------------------------------------------------------------------------------------------------
{'n_neighbors': 1}: training accu

#### Adult Dataset

In [38]:
train_list, val_list, test_list, test_final = [], [], [], []

neighbors_dict = {'n_neighbors' : [1,3,5,7,9]}

clf_knn_adult_8020_1 = GridSearchCV(KNeighborsClassifier(), neighbors_dict, cv = 3)
fit_plot(clf_knn_adult_8020_1, X_adult_train_val_8020_1, Y_adult_train_val_8020_1,\
         X_adult_test_8020_1, Y_adult_test_8020_1,'n_neighbors', '80/20', '1')

clf_knn_adult_8020_2 = GridSearchCV(KNeighborsClassifier(), neighbors_dict, cv = 3)
fit_plot(clf_knn_adult_8020_2, X_adult_train_val_8020_2, Y_adult_train_val_8020_2,\
         X_adult_test_8020_2, Y_adult_test_8020_2,'n_neighbors', '80/20', '2')

clf_knn_adult_8020_3 = GridSearchCV(KNeighborsClassifier(), neighbors_dict, cv = 3)
fit_plot(clf_knn_adult_8020_3, X_adult_train_val_8020_3, Y_adult_train_val_8020_3,\
         X_adult_test_8020_3, Y_adult_test_8020_3,'n_neighbors', '80/20', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

clf_knn_adult_5050_1 = GridSearchCV(KNeighborsClassifier(), neighbors_dict, cv = 3)
fit_plot(clf_knn_adult_5050_1, X_adult_train_val_5050_1, Y_adult_train_val_5050_1,\
         X_adult_test_5050_1, Y_adult_test_5050_1,'n_neighbors', '50/50', '1')

clf_knn_adult_5050_2 = GridSearchCV(KNeighborsClassifier(), neighbors_dict, cv = 3)
fit_plot(clf_knn_adult_5050_2, X_adult_train_val_5050_2, Y_adult_train_val_5050_2,\
         X_adult_test_5050_2, Y_adult_test_5050_2,'n_neighbors', '50/50', '2')

clf_knn_adult_5050_3 = GridSearchCV(KNeighborsClassifier(), neighbors_dict, cv = 3)
fit_plot(clf_knn_adult_5050_3, X_adult_train_val_5050_3, Y_adult_train_val_5050_3,\
         X_adult_test_5050_3, Y_adult_test_5050_3,'n_neighbors', '50/50', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

clf_knn_adult_2080_1 = GridSearchCV(KNeighborsClassifier(), neighbors_dict, cv = 3)
fit_plot(clf_knn_adult_2080_1, X_adult_train_val_2080_1, Y_adult_train_val_2080_1,\
         X_adult_test_2080_1, Y_adult_test_2080_1,'n_neighbors', '20/80', '1')

clf_knn_adult_2080_2 = GridSearchCV(KNeighborsClassifier(), neighbors_dict, cv = 3)
fit_plot(clf_knn_adult_2080_2, X_adult_train_val_2080_2, Y_adult_train_val_2080_2,\
         X_adult_test_2080_2, Y_adult_test_2080_2,'n_neighbors', '20/80', '2')

clf_knn_adult_2080_3 = GridSearchCV(KNeighborsClassifier(), neighbors_dict, cv = 3)
fit_plot(clf_knn_adult_2080_3, X_adult_train_val_2080_3, Y_adult_train_val_2080_3,\
         X_adult_test_2080_3, Y_adult_test_2080_3,'n_neighbors', '20/80', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

test_acc_report()

80/20 Split, Trial 1
--------------------------------------------------------------------------------------------------
{'n_neighbors': 1}: training accuracy - 1.0, validation accuracy - 0.7624446902654868
{'n_neighbors': 3}: training accuracy - 0.8853703604904446, validation accuracy - 0.7898230088495575
{'n_neighbors': 5}: training accuracy - 0.8631087460481094, validation accuracy - 0.7920353982300885
{'n_neighbors': 7}: training accuracy - 0.8517696954885774, validation accuracy - 0.8066924778761062
{'n_neighbors': 9}: training accuracy - 0.8436114902134236, validation accuracy - 0.8097345132743363
--------------------------------------------------------------------------------------------------
Optimal hyperparameter: 9
Training accuracy: 0.888772058448111
Validation accuracy: 0.7921460176991151
Test accuracy: 0.815028901734104

80/20 Split, Trial 2
--------------------------------------------------------------------------------------------------
{'n_neighbors': 1}: training accur

### Algorithm 4: Logistic Regression

#### EEG Dataset

In [39]:
train_list, val_list, test_list, test_final = [], [], [], []

penalty_dict = {'penalty' : ['l1', 'l2']}

clf_loreg_eeg_8020_1 = GridSearchCV(LogisticRegression(), penalty_dict, cv = 3)
fit_plot(clf_loreg_eeg_8020_1, X_eeg_train_val_8020_1, Y_eeg_train_val_8020_1,\
         X_eeg_test_8020_1, Y_eeg_test_8020_1,'penalty', '80/20', '1')

clf_loreg_eeg_8020_2 = GridSearchCV(LogisticRegression(), penalty_dict, cv = 3)
fit_plot(clf_loreg_eeg_8020_2, X_eeg_train_val_8020_2, Y_eeg_train_val_8020_2,\
         X_eeg_test_8020_2, Y_eeg_test_8020_2,'penalty', '80/20', '2')

clf_loreg_eeg_8020_3 = GridSearchCV(LogisticRegression(), penalty_dict, cv = 3)
fit_plot(clf_loreg_eeg_8020_3, X_eeg_train_val_8020_3, Y_eeg_train_val_8020_3,\
         X_eeg_test_8020_3, Y_eeg_test_8020_3,'penalty', '80/20', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

clf_loreg_eeg_5050_1 = GridSearchCV(LogisticRegression(), penalty_dict, cv = 3)
fit_plot(clf_loreg_eeg_5050_1, X_eeg_train_val_5050_1, Y_eeg_train_val_5050_1,\
         X_eeg_test_5050_1, Y_eeg_test_5050_1,'penalty', '50/50', '1')

clf_loreg_eeg_5050_2 = GridSearchCV(LogisticRegression(), penalty_dict, cv = 3)
fit_plot(clf_loreg_eeg_5050_2, X_eeg_train_val_5050_2, Y_eeg_train_val_5050_2,\
         X_eeg_test_5050_2, Y_eeg_test_5050_2,'penalty', '50/50', '2')

clf_loreg_eeg_5050_3 = GridSearchCV(LogisticRegression(), penalty_dict, cv = 3)
fit_plot(clf_loreg_eeg_5050_3, X_eeg_train_val_5050_3, Y_eeg_train_val_5050_3,\
         X_eeg_test_5050_3, Y_eeg_test_5050_3,'penalty', '50/50', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

clf_loreg_eeg_2080_1 = GridSearchCV(LogisticRegression(), penalty_dict, cv = 3)
fit_plot(clf_loreg_eeg_2080_1, X_eeg_train_val_2080_1, Y_eeg_train_val_2080_1,\
         X_eeg_test_2080_1, Y_eeg_test_2080_1,'penalty', '20/80', '1')

clf_loreg_eeg_2080_2 = GridSearchCV(LogisticRegression(), penalty_dict, cv = 3)
fit_plot(clf_loreg_eeg_2080_2, X_eeg_train_val_2080_2, Y_eeg_train_val_2080_2,\
         X_eeg_test_2080_2, Y_eeg_test_2080_2,'penalty', '20/80', '2')

clf_loreg_eeg_2080_3 = GridSearchCV(LogisticRegression(), penalty_dict, cv = 3)
fit_plot(clf_loreg_eeg_2080_3, X_eeg_train_val_2080_3, Y_eeg_train_val_2080_3,\
         X_eeg_test_2080_3, Y_eeg_test_2080_3,'penalty', '20/80', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

test_acc_report()

80/20 Split, Trial 1
--------------------------------------------------------------------------------------------------
{'penalty': 'l1'}: training accuracy - 0.5786880563194083, validation accuracy - 0.5774365821094793
{'penalty': 'l2'}: training accuracy - 0.561874209286413, validation accuracy - 0.5611648865153538
--------------------------------------------------------------------------------------------------
Optimal hyperparameter: l1
Training accuracy: 0.5702811328029107
Validation accuracy: 0.5693007343124166
Test accuracy: 0.5604138851802403

80/20 Split, Trial 2
--------------------------------------------------------------------------------------------------
{'penalty': 'l1'}: training accuracy - 0.5788549472462475, validation accuracy - 0.5776869158878505
{'penalty': 'l2'}: training accuracy - 0.561874209286413, validation accuracy - 0.5611648865153538
--------------------------------------------------------------------------------------------------
Optimal hyperparameter: 

#### Bank Dataset

In [40]:
train_list, val_list, test_list, test_final = [], [], [], []

penalty_dict = {'penalty' : ['l1', 'l2']}

clf_loreg_bank_8020_1 = GridSearchCV(LogisticRegression(), penalty_dict, cv = 3)
fit_plot(clf_loreg_bank_8020_1, X_bank_train_val_8020_1, Y_bank_train_val_8020_1,\
         X_bank_test_8020_1, Y_bank_test_8020_1,'penalty', '80/20', '1')

clf_loreg_bank_8020_2 = GridSearchCV(LogisticRegression(), penalty_dict, cv = 3)
fit_plot(clf_loreg_bank_8020_2, X_bank_train_val_8020_2, Y_bank_train_val_8020_2,\
         X_bank_test_8020_2, Y_bank_test_8020_2,'penalty', '80/20', '2')

clf_loreg_bank_8020_3 = GridSearchCV(LogisticRegression(), penalty_dict, cv = 3)
fit_plot(clf_loreg_bank_8020_3, X_bank_train_val_8020_3, Y_bank_train_val_8020_3,\
         X_bank_test_8020_3, Y_bank_test_8020_3,'penalty', '80/20', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

clf_loreg_bank_5050_1 = GridSearchCV(LogisticRegression(), penalty_dict, cv = 3)
fit_plot(clf_loreg_bank_5050_1, X_bank_train_val_5050_1, Y_bank_train_val_5050_1,\
         X_bank_test_5050_1, Y_bank_test_5050_1,'penalty', '50/50', '1')

clf_loreg_bank_5050_2 = GridSearchCV(LogisticRegression(), penalty_dict, cv = 3)
fit_plot(clf_loreg_bank_5050_2, X_bank_train_val_5050_2, Y_bank_train_val_5050_2,\
         X_bank_test_5050_2, Y_bank_test_5050_2,'penalty', '50/50', '2')

clf_loreg_bank_5050_3 = GridSearchCV(LogisticRegression(), penalty_dict, cv = 3)
fit_plot(clf_loreg_bank_5050_3, X_bank_train_val_5050_3, Y_bank_train_val_5050_3,\
         X_bank_test_5050_3, Y_bank_test_5050_3,'penalty', '50/50', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

clf_loreg_bank_2080_1 = GridSearchCV(LogisticRegression(), penalty_dict, cv = 3)
fit_plot(clf_loreg_bank_2080_1, X_bank_train_val_2080_1, Y_bank_train_val_2080_1,\
         X_bank_test_2080_1, Y_bank_test_2080_1,'penalty', '20/80', '1')

clf_loreg_bank_2080_2 = GridSearchCV(LogisticRegression(), penalty_dict, cv = 3)
fit_plot(clf_loreg_bank_2080_2, X_bank_train_val_2080_2, Y_bank_train_val_2080_2,\
         X_bank_test_2080_2, Y_bank_test_2080_2,'penalty', '20/80', '2')

clf_loreg_bank_2080_3 = GridSearchCV(LogisticRegression(), penalty_dict, cv = 3)
fit_plot(clf_loreg_bank_2080_3, X_bank_train_val_2080_3, Y_bank_train_val_2080_3,\
         X_bank_test_2080_3, Y_bank_test_2080_3,'penalty', '20/80', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

test_acc_report()

80/20 Split, Trial 1
--------------------------------------------------------------------------------------------------
{'penalty': 'l1'}: training accuracy - 0.8799779342109186, validation accuracy - 0.8799778761061947
{'penalty': 'l2'}: training accuracy - 0.8799779342109186, validation accuracy - 0.8799778761061947
--------------------------------------------------------------------------------------------------
Optimal hyperparameter: l1
Training accuracy: 0.8799779342109186
Validation accuracy: 0.8799778761061947
Test accuracy: 0.9038674033149171

80/20 Split, Trial 2
--------------------------------------------------------------------------------------------------
{'penalty': 'l1'}: training accuracy - 0.8799779342109186, validation accuracy - 0.8799778761061947
{'penalty': 'l2'}: training accuracy - 0.8799779342109186, validation accuracy - 0.8799778761061947
--------------------------------------------------------------------------------------------------
Optimal hyperparameter

#### Chess Dataset

In [41]:
train_list, val_list, test_list, test_final = [], [], [], []

penalty_dict = {'penalty' : ['l1', 'l2']}

clf_loreg_chess_8020_1 = GridSearchCV(LogisticRegression(), penalty_dict, cv = 3)
fit_plot(clf_loreg_chess_8020_1, X_chess_train_val_8020_1, Y_chess_train_val_8020_1,\
         X_chess_test_8020_1, Y_chess_test_8020_1,'penalty', '80/20', '1')

clf_loreg_chess_8020_2 = GridSearchCV(LogisticRegression(), penalty_dict, cv = 3)
fit_plot(clf_loreg_chess_8020_2, X_chess_train_val_8020_2, Y_chess_train_val_8020_2,\
         X_chess_test_8020_2, Y_chess_test_8020_2,'penalty', '80/20', '2')

clf_loreg_chess_8020_3 = GridSearchCV(LogisticRegression(), penalty_dict, cv = 3)
fit_plot(clf_loreg_chess_8020_3, X_chess_train_val_8020_3, Y_chess_train_val_8020_3,\
         X_chess_test_8020_3, Y_chess_test_8020_3,'penalty', '80/20', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

clf_loreg_chess_5050_1 = GridSearchCV(LogisticRegression(), penalty_dict, cv = 3)
fit_plot(clf_loreg_chess_5050_1, X_chess_train_val_5050_1, Y_chess_train_val_5050_1,\
         X_chess_test_5050_1, Y_chess_test_5050_1,'penalty', '50/50', '1')

clf_loreg_chess_5050_2 = GridSearchCV(LogisticRegression(), penalty_dict, cv = 3)
fit_plot(clf_loreg_chess_5050_2, X_chess_train_val_5050_2, Y_chess_train_val_5050_2,\
         X_chess_test_5050_2, Y_chess_test_5050_2,'penalty', '50/50', '2')

clf_loreg_chess_5050_3 = GridSearchCV(LogisticRegression(), penalty_dict, cv = 3)
fit_plot(clf_loreg_chess_5050_3, X_chess_train_val_5050_3, Y_chess_train_val_5050_3,\
         X_chess_test_5050_3, Y_chess_test_5050_3,'penalty', '50/50', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

clf_loreg_chess_2080_1 = GridSearchCV(LogisticRegression(), penalty_dict, cv = 3)
fit_plot(clf_loreg_chess_2080_1, X_chess_train_val_2080_1, Y_chess_train_val_2080_1,\
         X_chess_test_2080_1, Y_chess_test_2080_1,'penalty', '20/80', '1')

clf_loreg_chess_2080_2 = GridSearchCV(LogisticRegression(), penalty_dict, cv = 3)
fit_plot(clf_loreg_chess_2080_2, X_chess_train_val_2080_2, Y_chess_train_val_2080_2,\
         X_chess_test_2080_2, Y_chess_test_2080_2,'penalty', '20/80', '2')

clf_loreg_chess_2080_3 = GridSearchCV(LogisticRegression(), penalty_dict, cv = 3)
fit_plot(clf_loreg_chess_2080_3, X_chess_train_val_2080_3, Y_chess_train_val_2080_3,\
         X_chess_test_2080_3, Y_chess_test_2080_3,'penalty', '20/80', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

test_acc_report()

80/20 Split, Trial 1
--------------------------------------------------------------------------------------------------
{'penalty': 'l1'}: training accuracy - 0.9739831238700593, validation accuracy - 0.9636150234741784
{'penalty': 'l2'}: training accuracy - 0.968505699631125, validation accuracy - 0.9616588419405321
--------------------------------------------------------------------------------------------------
Optimal hyperparameter: l1
Training accuracy: 0.9712444117505921
Validation accuracy: 0.9626369327073552
Test accuracy: 0.9608763693270735

80/20 Split, Trial 2
--------------------------------------------------------------------------------------------------
{'penalty': 'l1'}: training accuracy - 0.9739831238700593, validation accuracy - 0.9636150234741784
{'penalty': 'l2'}: training accuracy - 0.968505699631125, validation accuracy - 0.9616588419405321
--------------------------------------------------------------------------------------------------
Optimal hyperparameter: 

#### Adult Dataset

In [42]:
train_list, val_list, test_list, test_final = [], [], [], []

penalty_dict = {'penalty' : ['l1', 'l2']}

clf_loreg_adult_8020_1 = GridSearchCV(LogisticRegression(), penalty_dict, cv = 3)
fit_plot(clf_loreg_adult_8020_1, X_adult_train_val_8020_1, Y_adult_train_val_8020_1,\
         X_adult_test_8020_1, Y_adult_test_8020_1,'penalty', '80/20', '1')

clf_loreg_adult_8020_2 = GridSearchCV(LogisticRegression(), penalty_dict, cv = 3)
fit_plot(clf_loreg_adult_8020_2, X_adult_train_val_8020_2, Y_adult_train_val_8020_2,\
         X_adult_test_8020_2, Y_adult_test_8020_2,'penalty', '80/20', '2')

clf_loreg_adult_8020_3 = GridSearchCV(LogisticRegression(), penalty_dict, cv = 3)
fit_plot(clf_loreg_adult_8020_3, X_adult_train_val_8020_3, Y_adult_train_val_8020_3,\
         X_adult_test_8020_3, Y_adult_test_8020_3,'penalty', '80/20', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

clf_loreg_adult_5050_1 = GridSearchCV(LogisticRegression(), penalty_dict, cv = 3)
fit_plot(clf_loreg_adult_5050_1, X_adult_train_val_5050_1, Y_adult_train_val_5050_1,\
         X_adult_test_5050_1, Y_adult_test_5050_1,'penalty', '50/50', '1')

clf_loreg_adult_5050_2 = GridSearchCV(LogisticRegression(), penalty_dict, cv = 3)
fit_plot(clf_loreg_adult_5050_2, X_adult_train_val_5050_2, Y_adult_train_val_5050_2,\
         X_adult_test_5050_2, Y_adult_test_5050_2,'penalty', '50/50', '2')

clf_loreg_adult_5050_3 = GridSearchCV(LogisticRegression(), penalty_dict, cv = 3)
fit_plot(clf_loreg_adult_5050_3, X_adult_train_val_5050_3, Y_adult_train_val_5050_3,\
         X_adult_test_5050_3, Y_adult_test_5050_3,'penalty', '50/50', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

clf_loreg_adult_2080_1 = GridSearchCV(LogisticRegression(), penalty_dict, cv = 3)
fit_plot(clf_loreg_adult_2080_1, X_adult_train_val_2080_1, Y_adult_train_val_2080_1,\
         X_adult_test_2080_1, Y_adult_test_2080_1,'penalty', '20/80', '1')

clf_loreg_adult_2080_2 = GridSearchCV(LogisticRegression(), penalty_dict, cv = 3)
fit_plot(clf_loreg_adult_2080_2, X_adult_train_val_2080_2, Y_adult_train_val_2080_2,\
         X_adult_test_2080_2, Y_adult_test_2080_2,'penalty', '20/80', '2')

clf_loreg_adult_2080_3 = GridSearchCV(LogisticRegression(), penalty_dict, cv = 3)
fit_plot(clf_loreg_adult_2080_3, X_adult_train_val_2080_3, Y_adult_train_val_2080_3,\
         X_adult_test_2080_3, Y_adult_test_2080_3,'penalty', '20/80', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

test_acc_report()

80/20 Split, Trial 1
--------------------------------------------------------------------------------------------------
{'penalty': 'l1'}: training accuracy - 0.8532906750009897, validation accuracy - 0.8423672566371682
{'penalty': 'l2'}: training accuracy - 0.8492807573402908, validation accuracy - 0.8373893805309734
--------------------------------------------------------------------------------------------------
Optimal hyperparameter: l1
Training accuracy: 0.8512857161706402
Validation accuracy: 0.8398783185840708
Test accuracy: 0.8533236994219653

80/20 Split, Trial 2
--------------------------------------------------------------------------------------------------
{'penalty': 'l1'}: training accuracy - 0.8531523624145443, validation accuracy - 0.8420907079646017
{'penalty': 'l2'}: training accuracy - 0.8492807573402908, validation accuracy - 0.8373893805309734
--------------------------------------------------------------------------------------------------
Optimal hyperparameter

### Algorithm 5: Ensemble AdaBoost

#### EEG Dataset

In [43]:
train_list, val_list, test_list, test_final = [], [], [], []

rate_dict = {'learning_rate' : [0.001, 0.01, 0.1, 1, 10, 100]}

clf_ada_eeg_8020_1 = GridSearchCV(AdaBoostClassifier(), rate_dict, cv = 3)
fit_plot(clf_ada_eeg_8020_1, X_eeg_train_val_8020_1, Y_eeg_train_val_8020_1,\
         X_eeg_test_8020_1, Y_eeg_test_8020_1,'learning_rate', '80/20', '1')

clf_ada_eeg_8020_2 = GridSearchCV(AdaBoostClassifier(), rate_dict, cv = 3)
fit_plot(clf_ada_eeg_8020_2, X_eeg_train_val_8020_2, Y_eeg_train_val_8020_2,\
         X_eeg_test_8020_2, Y_eeg_test_8020_2,'learning_rate', '80/20', '2')

clf_ada_eeg_8020_3 = GridSearchCV(AdaBoostClassifier(), rate_dict, cv = 3)
fit_plot(clf_ada_eeg_8020_3, X_eeg_train_val_8020_3, Y_eeg_train_val_8020_3,\
         X_eeg_test_8020_3, Y_eeg_test_8020_3,'learning_rate', '80/20', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

clf_ada_eeg_5050_1 = GridSearchCV(AdaBoostClassifier(), rate_dict, cv = 3)
fit_plot(clf_ada_eeg_5050_1, X_eeg_train_val_5050_1, Y_eeg_train_val_5050_1,\
         X_eeg_test_5050_1, Y_eeg_test_5050_1,'learning_rate', '50/50', '1')

clf_ada_eeg_5050_2 = GridSearchCV(AdaBoostClassifier(), rate_dict, cv = 3)
fit_plot(clf_ada_eeg_5050_2, X_eeg_train_val_5050_2, Y_eeg_train_val_5050_2,\
         X_eeg_test_5050_2, Y_eeg_test_5050_2,'learning_rate', '50/50', '2')

clf_ada_eeg_5050_3 = GridSearchCV(AdaBoostClassifier(), rate_dict, cv = 3)
fit_plot(clf_ada_eeg_5050_3, X_eeg_train_val_5050_3, Y_eeg_train_val_5050_3,\
         X_eeg_test_5050_3, Y_eeg_test_5050_3,'learning_rate', '50/50', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

clf_ada_eeg_2080_1 = GridSearchCV(AdaBoostClassifier(), rate_dict, cv = 3)
fit_plot(clf_ada_eeg_2080_1, X_eeg_train_val_2080_1, Y_eeg_train_val_2080_1,\
         X_eeg_test_2080_1, Y_eeg_test_2080_1,'learning_rate', '20/80', '1')

clf_ada_eeg_2080_2 = GridSearchCV(AdaBoostClassifier(), rate_dict, cv = 3)
fit_plot(clf_ada_eeg_2080_2, X_eeg_train_val_2080_2, Y_eeg_train_val_2080_2,\
         X_eeg_test_2080_2, Y_eeg_test_2080_2,'learning_rate', '20/80', '2')

clf_ada_eeg_2080_3 = GridSearchCV(AdaBoostClassifier(), rate_dict, cv = 3)
fit_plot(clf_ada_eeg_2080_3, X_eeg_train_val_2080_3, Y_eeg_train_val_2080_3,\
         X_eeg_test_2080_3, Y_eeg_test_2080_3,'learning_rate', '20/80', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

test_acc_report()

80/20 Split, Trial 1
--------------------------------------------------------------------------------------------------
{'learning_rate': 0.001}: training accuracy - 0.6010096851464465, validation accuracy - 0.5972963951935915
{'learning_rate': 0.01}: training accuracy - 0.6147366009573134, validation accuracy - 0.6115654205607477
{'learning_rate': 0.1}: training accuracy - 0.6965116824954295, validation accuracy - 0.6894192256341789
{'learning_rate': 1}: training accuracy - 0.7548394635030342, validation accuracy - 0.7406542056074766
{'learning_rate': 10}: training accuracy - 0.5336670838548185, validation accuracy - 0.5323765020026703
{'learning_rate': 100}: training accuracy - 0.5554072080650317, validation accuracy - 0.5554072096128171
--------------------------------------------------------------------------------------------------
Optimal hyperparameter: 1
Training accuracy: 0.6260286206703456
Validation accuracy: 0.621119826435247
Test accuracy: 0.7439919893190922

80/20 Split, 

20/80 Split, Trial 3
--------------------------------------------------------------------------------------------------
{'learning_rate': 0.001}: training accuracy - 0.632351623500588, validation accuracy - 0.6084779706275033
{'learning_rate': 0.01}: training accuracy - 0.6487068230555204, validation accuracy - 0.6255006675567423
{'learning_rate': 0.1}: training accuracy - 0.7169527577009207, validation accuracy - 0.6835781041388518
{'learning_rate': 1}: training accuracy - 0.7878829062863799, validation accuracy - 0.7379839786381842
{'learning_rate': 10}: training accuracy - 0.5150327348056138, validation accuracy - 0.5267022696929239
{'learning_rate': 100}: training accuracy - 0.5530707728436519, validation accuracy - 0.5530707610146862
--------------------------------------------------------------------------------------------------
Optimal hyperparameter: 1
Training accuracy: 0.6423329363654458
Validation accuracy: 0.6225522919448153
Test accuracy: 0.7169559412550067

Average Accur

#### Bank Dataset

In [44]:
train_list, val_list, test_list, test_final = [], [], [], []

rate_dict = {'learning_rate' : [0.001, 0.01, 0.1, 1, 10, 100]}

clf_ada_bank_8020_1 = GridSearchCV(AdaBoostClassifier(), rate_dict, cv = 3)
fit_plot(clf_ada_bank_8020_1, X_bank_train_val_8020_1, Y_bank_train_val_8020_1,\
         X_bank_test_8020_1, Y_bank_test_8020_1,'learning_rate', '80/20', '1')

clf_ada_bank_8020_2 = GridSearchCV(AdaBoostClassifier(), rate_dict, cv = 3)
fit_plot(clf_ada_bank_8020_2, X_bank_train_val_8020_2, Y_bank_train_val_8020_2,\
         X_bank_test_8020_2, Y_bank_test_8020_2,'learning_rate', '80/20', '2')

clf_ada_bank_8020_3 = GridSearchCV(AdaBoostClassifier(), rate_dict, cv = 3)
fit_plot(clf_ada_bank_8020_3, X_bank_train_val_8020_3, Y_bank_train_val_8020_3,\
         X_bank_test_8020_3, Y_bank_test_8020_3,'learning_rate', '80/20', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

clf_ada_bank_5050_1 = GridSearchCV(AdaBoostClassifier(), rate_dict, cv = 3)
fit_plot(clf_ada_bank_5050_1, X_bank_train_val_5050_1, Y_bank_train_val_5050_1,\
         X_bank_test_5050_1, Y_bank_test_5050_1,'learning_rate', '50/50', '1')

clf_ada_bank_5050_2 = GridSearchCV(AdaBoostClassifier(), rate_dict, cv = 3)
fit_plot(clf_ada_bank_5050_2, X_bank_train_val_5050_2, Y_bank_train_val_5050_2,\
         X_bank_test_5050_2, Y_bank_test_5050_2,'learning_rate', '50/50', '2')

clf_ada_bank_5050_3 = GridSearchCV(AdaBoostClassifier(), rate_dict, cv = 3)
fit_plot(clf_ada_bank_5050_3, X_bank_train_val_5050_3, Y_bank_train_val_5050_3,\
         X_bank_test_5050_3, Y_bank_test_5050_3,'learning_rate', '50/50', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

clf_ada_bank_2080_1 = GridSearchCV(AdaBoostClassifier(), rate_dict, cv = 3)
fit_plot(clf_ada_bank_2080_1, X_bank_train_val_2080_1, Y_bank_train_val_2080_1,\
         X_bank_test_2080_1, Y_bank_test_2080_1,'learning_rate', '20/80', '1')

clf_ada_bank_2080_2 = GridSearchCV(AdaBoostClassifier(), rate_dict, cv = 3)
fit_plot(clf_ada_bank_2080_2, X_bank_train_val_2080_2, Y_bank_train_val_2080_2,\
         X_bank_test_2080_2, Y_bank_test_2080_2,'learning_rate', '20/80', '2')

clf_ada_bank_2080_3 = GridSearchCV(AdaBoostClassifier(), rate_dict, cv = 3)
fit_plot(clf_ada_bank_2080_3, X_bank_train_val_2080_3, Y_bank_train_val_2080_3,\
         X_bank_test_2080_3, Y_bank_test_2080_3,'learning_rate', '20/80', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

test_acc_report()

80/20 Split, Trial 1
--------------------------------------------------------------------------------------------------
{'learning_rate': 0.001}: training accuracy - 0.8799779342109186, validation accuracy - 0.8799778761061947
{'learning_rate': 0.01}: training accuracy - 0.8799779342109186, validation accuracy - 0.8799778761061947
{'learning_rate': 0.1}: training accuracy - 0.880116246797364, validation accuracy - 0.8797013274336283
{'learning_rate': 1}: training accuracy - 0.8819138516729401, validation accuracy - 0.8780420353982301
{'learning_rate': 10}: training accuracy - 0.12057531613486279, validation accuracy - 0.12002212389380532
{'learning_rate': 100}: training accuracy - 0.8799779342109186, validation accuracy - 0.8799778761061947
--------------------------------------------------------------------------------------------------
Optimal hyperparameter: 0.001
Training accuracy: 0.7537565362063204
Validation accuracy: 0.7529498525073747
Test accuracy: 0.9038674033149171

80/20 S

20/80 Split, Trial 3
--------------------------------------------------------------------------------------------------
{'learning_rate': 0.001}: training accuracy - 0.9004424169297477, validation accuracy - 0.9004424778761062
{'learning_rate': 0.01}: training accuracy - 0.9004424169297477, validation accuracy - 0.9004424778761062
{'learning_rate': 0.1}: training accuracy - 0.9015480001248831, validation accuracy - 0.9004424778761062
{'learning_rate': 1}: training accuracy - 0.9120537952540729, validation accuracy - 0.8882743362831859
{'learning_rate': 10}: training accuracy - 0.10454097177457122, validation accuracy - 0.10730088495575221
{'learning_rate': 100}: training accuracy - 0.9004424169297477, validation accuracy - 0.9004424778761062
--------------------------------------------------------------------------------------------------
Optimal hyperparameter: 0.001
Training accuracy: 0.7699116696571284
Validation accuracy: 0.7662241887905605
Test accuracy: 0.8802875311031242

Averag

#### Chess Dataset

In [45]:
train_list, val_list, test_list, test_final = [], [], [], []

rate_dict = {'learning_rate' : [0.001, 0.01, 0.1, 1, 10, 100]}

clf_ada_chess_8020_1 = GridSearchCV(AdaBoostClassifier(), rate_dict, cv = 3)
fit_plot(clf_ada_chess_8020_1, X_chess_train_val_8020_1, Y_chess_train_val_8020_1,\
         X_chess_test_8020_1, Y_chess_test_8020_1,'learning_rate', '80/20', '1')

clf_ada_chess_8020_2 = GridSearchCV(AdaBoostClassifier(), rate_dict, cv = 3)
fit_plot(clf_ada_chess_8020_2, X_chess_train_val_8020_2, Y_chess_train_val_8020_2,\
         X_chess_test_8020_2, Y_chess_test_8020_2,'learning_rate', '80/20', '2')

clf_ada_chess_8020_3 = GridSearchCV(AdaBoostClassifier(), rate_dict, cv = 3)
fit_plot(clf_ada_chess_8020_3, X_chess_train_val_8020_3, Y_chess_train_val_8020_3,\
         X_chess_test_8020_3, Y_chess_test_8020_3,'learning_rate', '80/20', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

clf_ada_chess_5050_1 = GridSearchCV(AdaBoostClassifier(), rate_dict, cv = 3)
fit_plot(clf_ada_chess_5050_1, X_chess_train_val_5050_1, Y_chess_train_val_5050_1,\
         X_chess_test_5050_1, Y_chess_test_5050_1,'learning_rate', '50/50', '1')

clf_ada_chess_5050_2 = GridSearchCV(AdaBoostClassifier(), rate_dict, cv = 3)
fit_plot(clf_ada_chess_5050_2, X_chess_train_val_5050_2, Y_chess_train_val_5050_2,\
         X_chess_test_5050_2, Y_chess_test_5050_2,'learning_rate', '50/50', '2')

clf_ada_chess_5050_3 = GridSearchCV(AdaBoostClassifier(), rate_dict, cv = 3)
fit_plot(clf_ada_chess_5050_3, X_chess_train_val_5050_3, Y_chess_train_val_5050_3,\
         X_chess_test_5050_3, Y_chess_test_5050_3,'learning_rate', '50/50', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

clf_ada_chess_2080_1 = GridSearchCV(AdaBoostClassifier(), rate_dict, cv = 3)
fit_plot(clf_ada_chess_2080_1, X_chess_train_val_2080_1, Y_chess_train_val_2080_1,\
         X_chess_test_2080_1, Y_chess_test_2080_1,'learning_rate', '20/80', '1')

clf_ada_chess_2080_2 = GridSearchCV(AdaBoostClassifier(), rate_dict, cv = 3)
fit_plot(clf_ada_chess_2080_2, X_chess_train_val_2080_2, Y_chess_train_val_2080_2,\
         X_chess_test_2080_2, Y_chess_test_2080_2,'learning_rate', '20/80', '2')

clf_ada_chess_2080_3 = GridSearchCV(AdaBoostClassifier(), rate_dict, cv = 3)
fit_plot(clf_ada_chess_2080_3, X_chess_train_val_2080_3, Y_chess_train_val_2080_3,\
         X_chess_test_2080_3, Y_chess_test_2080_3,'learning_rate', '20/80', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

test_acc_report()

80/20 Split, Trial 1
--------------------------------------------------------------------------------------------------
{'learning_rate': 0.001}: training accuracy - 0.8091041963159425, validation accuracy - 0.8004694835680751
{'learning_rate': 0.01}: training accuracy - 0.9061036088878222, validation accuracy - 0.9061032863849765
{'learning_rate': 0.1}: training accuracy - 0.9407302104310924, validation accuracy - 0.9385758998435054
{'learning_rate': 1}: training accuracy - 0.971831323644084, validation accuracy - 0.9616588419405321
{'learning_rate': 10}: training accuracy - 0.7562562808726073, validation accuracy - 0.7562597809076682
{'learning_rate': 100}: training accuracy - 0.4827855985290496, validation accuracy - 0.48278560250391234
--------------------------------------------------------------------------------------------------
Optimal hyperparameter: 1
Training accuracy: 0.811135203113433
Validation accuracy: 0.807642149191445
Test accuracy: 0.9608763693270735

80/20 Split, T

20/80 Split, Trial 3
--------------------------------------------------------------------------------------------------
{'learning_rate': 0.001}: training accuracy - 0.7519561815336463, validation accuracy - 0.7543035993740219
{'learning_rate': 0.01}: training accuracy - 0.8646322378716745, validation accuracy - 0.8575899843505478
{'learning_rate': 0.1}: training accuracy - 0.9374021909233177, validation accuracy - 0.9311424100156495
{'learning_rate': 1}: training accuracy - 0.9741784037558685, validation accuracy - 0.9561815336463224
{'learning_rate': 10}: training accuracy - 0.6784037558685446, validation accuracy - 0.704225352112676
{'learning_rate': 100}: training accuracy - 0.4694835680751173, validation accuracy - 0.4694835680751174
--------------------------------------------------------------------------------------------------
Optimal hyperparameter: 1
Training accuracy: 0.7793427230046949
Validation accuracy: 0.7788210745957225
Test accuracy: 0.9608763693270735

Average Accur

#### Adult Dataset

In [46]:
train_list, val_list, test_list, test_final = [], [], [], []

rate_dict = {'learning_rate' : [0.001, 0.01, 0.1, 1, 10, 100]}

clf_ada_adult_8020_1 = GridSearchCV(AdaBoostClassifier(), rate_dict, cv = 3)
fit_plot(clf_ada_adult_8020_1, X_adult_train_val_8020_1, Y_adult_train_val_8020_1,\
         X_adult_test_8020_1, Y_adult_test_8020_1,'learning_rate', '80/20', '1')

clf_ada_adult_8020_2 = GridSearchCV(AdaBoostClassifier(), rate_dict, cv = 3)
fit_plot(clf_ada_adult_8020_2, X_adult_train_val_8020_2, Y_adult_train_val_8020_2,\
         X_adult_test_8020_2, Y_adult_test_8020_2,'learning_rate', '80/20', '2')

clf_ada_adult_8020_3 = GridSearchCV(AdaBoostClassifier(), rate_dict, cv = 3)
fit_plot(clf_ada_adult_8020_3, X_adult_train_val_8020_3, Y_adult_train_val_8020_3,\
         X_adult_test_8020_3, Y_adult_test_8020_3,'learning_rate', '80/20', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

clf_ada_adult_5050_1 = GridSearchCV(AdaBoostClassifier(), rate_dict, cv = 3)
fit_plot(clf_ada_adult_5050_1, X_adult_train_val_5050_1, Y_adult_train_val_5050_1,\
         X_adult_test_5050_1, Y_adult_test_5050_1,'learning_rate', '50/50', '1')

clf_ada_adult_5050_2 = GridSearchCV(AdaBoostClassifier(), rate_dict, cv = 3)
fit_plot(clf_ada_adult_5050_2, X_adult_train_val_5050_2, Y_adult_train_val_5050_2,\
         X_adult_test_5050_2, Y_adult_test_5050_2,'learning_rate', '50/50', '2')

clf_ada_adult_5050_3 = GridSearchCV(AdaBoostClassifier(), rate_dict, cv = 3)
fit_plot(clf_ada_adult_5050_3, X_adult_train_val_5050_3, Y_adult_train_val_5050_3,\
         X_adult_test_5050_3, Y_adult_test_5050_3,'learning_rate', '50/50', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

clf_ada_adult_2080_1 = GridSearchCV(AdaBoostClassifier(), rate_dict, cv = 3)
fit_plot(clf_ada_adult_2080_1, X_adult_train_val_2080_1, Y_adult_train_val_2080_1,\
         X_adult_test_2080_1, Y_adult_test_2080_1,'learning_rate', '20/80', '1')

clf_ada_adult_2080_2 = GridSearchCV(AdaBoostClassifier(), rate_dict, cv = 3)
fit_plot(clf_ada_adult_2080_2, X_adult_train_val_2080_2, Y_adult_train_val_2080_2,\
         X_adult_test_2080_2, Y_adult_test_2080_2,'learning_rate', '20/80', '2')

clf_ada_adult_2080_3 = GridSearchCV(AdaBoostClassifier(), rate_dict, cv = 3)
fit_plot(clf_ada_adult_2080_3, X_adult_train_val_2080_3, Y_adult_train_val_2080_3,\
         X_adult_test_2080_3, Y_adult_test_2080_3,'learning_rate', '20/80', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

test_acc_report()

80/20 Split, Trial 1
--------------------------------------------------------------------------------------------------
{'learning_rate': 0.001}: training accuracy - 0.7574668431285149, validation accuracy - 0.7574668141592921
{'learning_rate': 0.01}: training accuracy - 0.7945240607106777, validation accuracy - 0.7945243362831859
{'learning_rate': 0.1}: training accuracy - 0.8434731202596675, validation accuracy - 0.8431969026548672
{'learning_rate': 1}: training accuracy - 0.8679475639831958, validation accuracy - 0.8523230088495575
{'learning_rate': 10}: training accuracy - 0.24253315687148516, validation accuracy - 0.24253318584070796
{'learning_rate': 100}: training accuracy - 0.7574668431285149, validation accuracy - 0.7574668141592921
--------------------------------------------------------------------------------------------------
Optimal hyperparameter: 1
Training accuracy: 0.710568598013676
Validation accuracy: 0.7079185103244838
Test accuracy: 0.865606936416185

80/20 Split,

20/80 Split, Trial 3
--------------------------------------------------------------------------------------------------
{'learning_rate': 0.001}: training accuracy - 0.7444688701196859, validation accuracy - 0.7444690265486725
{'learning_rate': 0.01}: training accuracy - 0.7948096358370568, validation accuracy - 0.793141592920354
{'learning_rate': 0.1}: training accuracy - 0.8423708331726382, validation accuracy - 0.8362831858407079
{'learning_rate': 1}: training accuracy - 0.9032045384006508, validation accuracy - 0.8407079646017699
{'learning_rate': 10}: training accuracy - 0.2555311298803142, validation accuracy - 0.2555309734513274
{'learning_rate': 100}: training accuracy - 0.7444688701196859, validation accuracy - 0.7444690265486725
--------------------------------------------------------------------------------------------------
Optimal hyperparameter: 1
Training accuracy: 0.714142312921672
Validation accuracy: 0.702433628318584
Test accuracy: 0.835693359375

Average Accuracies


### Algorithm 6: Neural Networks: Multi-layer Perceptron 

#### EEG Dataset

In [47]:
train_list, val_list, test_list, test_final = [], [], [], []

layers_dict = {'hidden_layer_sizes' : [10, 50, 100, 200]}

clf_mlp_eeg_8020_1 = GridSearchCV(MLPClassifier(), layers_dict, cv = 3)
fit_plot(clf_mlp_eeg_8020_1, X_eeg_train_val_8020_1, Y_eeg_train_val_8020_1,\
         X_eeg_test_8020_1, Y_eeg_test_8020_1,'hidden_layer_sizes', '80/20', '1')

clf_mlp_eeg_8020_2 = GridSearchCV(MLPClassifier(), layers_dict, cv = 3)
fit_plot(clf_mlp_eeg_8020_2, X_eeg_train_val_8020_2, Y_eeg_train_val_8020_2,\
         X_eeg_test_8020_2, Y_eeg_test_8020_2,'hidden_layer_sizes', '80/20', '2')

clf_mlp_eeg_8020_3 = GridSearchCV(MLPClassifier(), layers_dict, cv = 3)
fit_plot(clf_mlp_eeg_8020_3, X_eeg_train_val_8020_3, Y_eeg_train_val_8020_3,\
         X_eeg_test_8020_3, Y_eeg_test_8020_3,'hidden_layer_sizes', '80/20', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

clf_mlp_eeg_5050_1 = GridSearchCV(MLPClassifier(), layers_dict, cv = 3)
fit_plot(clf_mlp_eeg_5050_1, X_eeg_train_val_5050_1, Y_eeg_train_val_5050_1,\
         X_eeg_test_5050_1, Y_eeg_test_5050_1,'hidden_layer_sizes', '50/50', '1')

clf_mlp_eeg_5050_2 = GridSearchCV(MLPClassifier(), layers_dict, cv = 3)
fit_plot(clf_mlp_eeg_5050_2, X_eeg_train_val_5050_2, Y_eeg_train_val_5050_2,\
         X_eeg_test_5050_2, Y_eeg_test_5050_2,'hidden_layer_sizes', '50/50', '2')

clf_mlp_eeg_5050_3 = GridSearchCV(MLPClassifier(), layers_dict, cv = 3)
fit_plot(clf_mlp_eeg_5050_3, X_eeg_train_val_5050_3, Y_eeg_train_val_5050_3,\
         X_eeg_test_5050_3, Y_eeg_test_5050_3,'hidden_layer_sizes', '50/50', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

clf_mlp_eeg_2080_1 = GridSearchCV(MLPClassifier(), layers_dict, cv = 3)
fit_plot(clf_mlp_eeg_2080_1, X_eeg_train_val_2080_1, Y_eeg_train_val_2080_1,\
         X_eeg_test_2080_1, Y_eeg_test_2080_1,'hidden_layer_sizes', '20/80', '1')

clf_mlp_eeg_2080_2 = GridSearchCV(MLPClassifier(), layers_dict, cv = 3)
fit_plot(clf_mlp_eeg_2080_2, X_eeg_train_val_2080_2, Y_eeg_train_val_2080_2,\
         X_eeg_test_2080_2, Y_eeg_test_2080_2,'hidden_layer_sizes', '20/80', '2')

clf_mlp_eeg_2080_3 = GridSearchCV(MLPClassifier(), layers_dict, cv = 3)
fit_plot(clf_mlp_eeg_2080_3, X_eeg_train_val_2080_3, Y_eeg_train_val_2080_3,\
         X_eeg_test_2080_3, Y_eeg_test_2080_3,'hidden_layer_sizes', '20/80', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

test_acc_report()

80/20 Split, Trial 1
--------------------------------------------------------------------------------------------------
{'hidden_layer_sizes': 10}: training accuracy - 0.5563668661430744, validation accuracy - 0.5560747663551402
{'hidden_layer_sizes': 50}: training accuracy - 0.5649620073658853, validation accuracy - 0.5650033377837116
{'hidden_layer_sizes': 100}: training accuracy - 0.5764352716315765, validation accuracy - 0.5770193591455274
{'hidden_layer_sizes': 200}: training accuracy - 0.5725554009311823, validation accuracy - 0.5739319092122831
--------------------------------------------------------------------------------------------------
Optimal hyperparameter: 100
Training accuracy: 0.5675798865179296
Validation accuracy: 0.5680073431241656
Test accuracy: 0.5463951935914553

80/20 Split, Trial 2
--------------------------------------------------------------------------------------------------
{'hidden_layer_sizes': 10}: training accuracy - 0.571845439544455, validation accu

#### Bank Dataset

In [48]:
train_list, val_list, test_list, test_final = [], [], [], []

layers_dict = {'hidden_layer_sizes' : [10, 50, 100, 200]}

clf_mlp_bank_8020_1 = GridSearchCV(MLPClassifier(), layers_dict, cv = 3)
fit_plot(clf_mlp_bank_8020_1, X_bank_train_val_8020_1, Y_bank_train_val_8020_1,\
         X_bank_test_8020_1, Y_bank_test_8020_1,'hidden_layer_sizes', '80/20', '1')

clf_mlp_bank_8020_2 = GridSearchCV(MLPClassifier(), layers_dict, cv = 3)
fit_plot(clf_mlp_bank_8020_2, X_bank_train_val_8020_2, Y_bank_train_val_8020_2,\
         X_bank_test_8020_2, Y_bank_test_8020_2,'hidden_layer_sizes', '80/20', '2')

clf_mlp_bank_8020_3 = GridSearchCV(MLPClassifier(), layers_dict, cv = 3)
fit_plot(clf_mlp_bank_8020_3, X_bank_train_val_8020_3, Y_bank_train_val_8020_3,\
         X_bank_test_8020_3, Y_bank_test_8020_3,'hidden_layer_sizes', '80/20', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

clf_mlp_bank_5050_1 = GridSearchCV(MLPClassifier(), layers_dict, cv = 3)
fit_plot(clf_mlp_bank_5050_1, X_bank_train_val_5050_1, Y_bank_train_val_5050_1,\
         X_bank_test_5050_1, Y_bank_test_5050_1,'hidden_layer_sizes', '50/50', '1')

clf_mlp_bank_5050_2 = GridSearchCV(MLPClassifier(), layers_dict, cv = 3)
fit_plot(clf_mlp_bank_5050_2, X_bank_train_val_5050_2, Y_bank_train_val_5050_2,\
         X_bank_test_5050_2, Y_bank_test_5050_2,'hidden_layer_sizes', '50/50', '2')

clf_mlp_bank_5050_3 = GridSearchCV(MLPClassifier(), layers_dict, cv = 3)
fit_plot(clf_mlp_bank_5050_3, X_bank_train_val_5050_3, Y_bank_train_val_5050_3,\
         X_bank_test_5050_3, Y_bank_test_5050_3,'hidden_layer_sizes', '50/50', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

clf_mlp_bank_2080_1 = GridSearchCV(MLPClassifier(), layers_dict, cv = 3)
fit_plot(clf_mlp_bank_2080_1, X_bank_train_val_2080_1, Y_bank_train_val_2080_1,\
         X_bank_test_2080_1, Y_bank_test_2080_1,'hidden_layer_sizes', '20/80', '1')

clf_mlp_bank_2080_2 = GridSearchCV(MLPClassifier(), layers_dict, cv = 3)
fit_plot(clf_mlp_bank_2080_2, X_bank_train_val_2080_2, Y_bank_train_val_2080_2,\
         X_bank_test_2080_2, Y_bank_test_2080_2,'hidden_layer_sizes', '20/80', '2')

clf_mlp_bank_2080_3 = GridSearchCV(MLPClassifier(), layers_dict, cv = 3)
fit_plot(clf_mlp_bank_2080_3, X_bank_train_val_2080_3, Y_bank_train_val_2080_3,\
         X_bank_test_2080_3, Y_bank_test_2080_3,'hidden_layer_sizes', '20/80', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

test_acc_report()

80/20 Split, Trial 1
--------------------------------------------------------------------------------------------------
{'hidden_layer_sizes': 10}: training accuracy - 0.8802545593838094, validation accuracy - 0.8791482300884956
{'hidden_layer_sizes': 50}: training accuracy - 0.8899342613809699, validation accuracy - 0.875
{'hidden_layer_sizes': 100}: training accuracy - 0.9030700577334626, validation accuracy - 0.8675331858407079
{'hidden_layer_sizes': 200}: training accuracy - 0.9137193240803819, validation accuracy - 0.8606194690265486
--------------------------------------------------------------------------------------------------
Optimal hyperparameter: 10
Training accuracy: 0.896744550644656
Validation accuracy: 0.870575221238938
Test accuracy: 0.9027624309392265

80/20 Split, Trial 2
--------------------------------------------------------------------------------------------------
{'hidden_layer_sizes': 10}: training accuracy - 0.8801161321103107, validation accuracy - 0.878871

#### Chess Dataset

In [49]:
train_list, val_list, test_list, test_final = [], [], [], []

layers_dict = {'hidden_layer_sizes' : [10, 50, 100, 200]}

clf_mlp_chess_8020_1 = GridSearchCV(MLPClassifier(), layers_dict, cv = 3)
fit_plot(clf_mlp_chess_8020_1, X_chess_train_val_8020_1, Y_chess_train_val_8020_1,\
         X_chess_test_8020_1, Y_chess_test_8020_1,'hidden_layer_sizes', '80/20', '1')

clf_mlp_chess_8020_2 = GridSearchCV(MLPClassifier(), layers_dict, cv = 3)
fit_plot(clf_mlp_chess_8020_2, X_chess_train_val_8020_2, Y_chess_train_val_8020_2,\
         X_chess_test_8020_2, Y_chess_test_8020_2,'hidden_layer_sizes', '80/20', '2')

clf_mlp_chess_8020_3 = GridSearchCV(MLPClassifier(), layers_dict, cv = 3)
fit_plot(clf_mlp_chess_8020_3, X_chess_train_val_8020_3, Y_chess_train_val_8020_3,\
         X_chess_test_8020_3, Y_chess_test_8020_3,'hidden_layer_sizes', '80/20', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

clf_mlp_chess_5050_1 = GridSearchCV(MLPClassifier(), layers_dict, cv = 3)
fit_plot(clf_mlp_chess_5050_1, X_chess_train_val_5050_1, Y_chess_train_val_5050_1,\
         X_chess_test_5050_1, Y_chess_test_5050_1,'hidden_layer_sizes', '50/50', '1')

clf_mlp_chess_5050_2 = GridSearchCV(MLPClassifier(), layers_dict, cv = 3)
fit_plot(clf_mlp_chess_5050_2, X_chess_train_val_5050_2, Y_chess_train_val_5050_2,\
         X_chess_test_5050_2, Y_chess_test_5050_2,'hidden_layer_sizes', '50/50', '2')

clf_mlp_chess_5050_3 = GridSearchCV(MLPClassifier(), layers_dict, cv = 3)
fit_plot(clf_mlp_chess_5050_3, X_chess_train_val_5050_3, Y_chess_train_val_5050_3,\
         X_chess_test_5050_3, Y_chess_test_5050_3,'hidden_layer_sizes', '50/50', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

clf_mlp_chess_2080_1 = GridSearchCV(MLPClassifier(), layers_dict, cv = 3)
fit_plot(clf_mlp_chess_2080_1, X_chess_train_val_2080_1, Y_chess_train_val_2080_1,\
         X_chess_test_2080_1, Y_chess_test_2080_1,'hidden_layer_sizes', '20/80', '1')

clf_mlp_chess_2080_2 = GridSearchCV(MLPClassifier(), layers_dict, cv = 3)
fit_plot(clf_mlp_chess_2080_2, X_chess_train_val_2080_2, Y_chess_train_val_2080_2,\
         X_chess_test_2080_2, Y_chess_test_2080_2,'hidden_layer_sizes', '20/80', '2')

clf_mlp_chess_2080_3 = GridSearchCV(MLPClassifier(), layers_dict, cv = 3)
fit_plot(clf_mlp_chess_2080_3, X_chess_train_val_2080_3, Y_chess_train_val_2080_3,\
         X_chess_test_2080_3, Y_chess_test_2080_3,'hidden_layer_sizes', '20/80', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

test_acc_report()

80/20 Split, Trial 1
--------------------------------------------------------------------------------------------------
{'hidden_layer_sizes': 10}: training accuracy - 0.9808272352673578, validation accuracy - 0.9726134585289515
{'hidden_layer_sizes': 50}: training accuracy - 0.9996087635585296, validation accuracy - 0.9890453834115805
{'hidden_layer_sizes': 100}: training accuracy - 0.9996087635585296, validation accuracy - 0.9890453834115805
{'hidden_layer_sizes': 200}: training accuracy - 1.0, validation accuracy - 0.9890453834115805
--------------------------------------------------------------------------------------------------
Optimal hyperparameter: 50
Training accuracy: 0.9950111905961042
Validation accuracy: 0.9849374021909233
Test accuracy: 0.9874804381846636

80/20 Split, Trial 2
--------------------------------------------------------------------------------------------------
{'hidden_layer_sizes': 10}: training accuracy - 0.9863047747773116, validation accuracy - 0.971439

#### Adult Dataset

In [50]:
train_list, val_list, test_list, test_final = [], [], [], []

layers_dict = {'hidden_layer_sizes' : [10, 50, 100, 200]}

clf_mlp_adult_8020_1 = GridSearchCV(MLPClassifier(), layers_dict, cv = 3)
fit_plot(clf_mlp_adult_8020_1, X_adult_train_val_8020_1, Y_adult_train_val_8020_1,\
         X_adult_test_8020_1, Y_adult_test_8020_1,'hidden_layer_sizes', '80/20', '1')

clf_mlp_adult_8020_2 = GridSearchCV(MLPClassifier(), layers_dict, cv = 3)
fit_plot(clf_mlp_adult_8020_2, X_adult_train_val_8020_2, Y_adult_train_val_8020_2,\
         X_adult_test_8020_2, Y_adult_test_8020_2,'hidden_layer_sizes', '80/20', '2')

clf_mlp_adult_8020_3 = GridSearchCV(MLPClassifier(), layers_dict, cv = 3)
fit_plot(clf_mlp_adult_8020_3, X_adult_train_val_8020_3, Y_adult_train_val_8020_3,\
         X_adult_test_8020_3, Y_adult_test_8020_3,'hidden_layer_sizes', '80/20', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

clf_mlp_adult_5050_1 = GridSearchCV(MLPClassifier(), layers_dict, cv = 3)
fit_plot(clf_mlp_adult_5050_1, X_adult_train_val_5050_1, Y_adult_train_val_5050_1,\
         X_adult_test_5050_1, Y_adult_test_5050_1,'hidden_layer_sizes', '50/50', '1')

clf_mlp_adult_5050_2 = GridSearchCV(MLPClassifier(), layers_dict, cv = 3)
fit_plot(clf_mlp_adult_5050_2, X_adult_train_val_5050_2, Y_adult_train_val_5050_2,\
         X_adult_test_5050_2, Y_adult_test_5050_2,'hidden_layer_sizes', '50/50', '2')

clf_mlp_adult_5050_3 = GridSearchCV(MLPClassifier(), layers_dict, cv = 3)
fit_plot(clf_mlp_adult_5050_3, X_adult_train_val_5050_3, Y_adult_train_val_5050_3,\
         X_adult_test_5050_3, Y_adult_test_5050_3,'hidden_layer_sizes', '50/50', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

clf_mlp_adult_2080_1 = GridSearchCV(MLPClassifier(), layers_dict, cv = 3)
fit_plot(clf_mlp_adult_2080_1, X_adult_train_val_2080_1, Y_adult_train_val_2080_1,\
         X_adult_test_2080_1, Y_adult_test_2080_1,'hidden_layer_sizes', '20/80', '1')

clf_mlp_adult_2080_2 = GridSearchCV(MLPClassifier(), layers_dict, cv = 3)
fit_plot(clf_mlp_adult_2080_2, X_adult_train_val_2080_2, Y_adult_train_val_2080_2,\
         X_adult_test_2080_2, Y_adult_test_2080_2,'hidden_layer_sizes', '20/80', '2')

clf_mlp_adult_2080_3 = GridSearchCV(MLPClassifier(), layers_dict, cv = 3)
fit_plot(clf_mlp_adult_2080_3, X_adult_train_val_2080_3, Y_adult_train_val_2080_3,\
         X_adult_test_2080_3, Y_adult_test_2080_3,'hidden_layer_sizes', '20/80', '3')

average_acc_report()
train_list, val_list, test_list = [], [], []

test_acc_report()

80/20 Split, Trial 1
--------------------------------------------------------------------------------------------------
{'hidden_layer_sizes': 10}: training accuracy - 0.8368360665988585, validation accuracy - 0.8249446902654868
{'hidden_layer_sizes': 50}: training accuracy - 0.863938105260984, validation accuracy - 0.8202433628318584
{'hidden_layer_sizes': 100}: training accuracy - 0.8708516119927511, validation accuracy - 0.8160951327433629
{'hidden_layer_sizes': 200}: training accuracy - 0.8687780705422874, validation accuracy - 0.8147123893805309
--------------------------------------------------------------------------------------------------
Optimal hyperparameter: 10
Training accuracy: 0.8601009635987202
Validation accuracy: 0.8189988938053098
Test accuracy: 0.8316473988439307

80/20 Split, Trial 2
--------------------------------------------------------------------------------------------------
{'hidden_layer_sizes': 10}: training accuracy - 0.8413999230130688, validation accur

## Ranking

In [58]:
svm = (0.5526257231864709 + 0.8891677007332084 + 0.9858645685824025 + 0.8281719258999708)/4
dt = (0.7120141670375315 + 0.8889833859235134 + 0.9413460552868969 + 0.8367407892570168)/4
knn = (0.8021584334668447 + 0.8889281159363515 + 0.9182903707882114 + 0.8091604997492634)/4
logreg = (0.5686248331108145 + 0.8891677007332084 + 0.9627229219971322 + 0.8454759537788343)/4
ada = (0.7297952825990209 + 0.8889833859235134 + 0.9626534994512572 + 0.8522582494486431)/4
mlp = (0.8273970470948249 + 0.98595140117113 + 0.8884306859167792 + 0.556306556890669)/4

In [59]:
print('Overall performance of SVM is: ' + str(svm))
print('Overall performance of DT is: ' + str(dt))
print('Overall performance of KNN is: ' + str(knn))
print('Overall performance of Logistic Regression is: ' + str(logreg))
print('Overall performance of Ensemble is: ' + str(ada))
print('Overall performance of MLP is: ' + str(mlp))

Overall performance of SVM is: 0.8139574796005131
Overall performance of DT is: 0.8447710993762396
Overall performance of KNN is: 0.8546343549851678
Overall performance of Logistic Regression is: 0.8164978524049973
Overall performance of Ensemble is: 0.8584226043556087
Overall performance of MLP is: 0.8145214227683508


## Bonus Points

Testing was thoroughly completed on 4 datasets (EEG, bank, adult, and chess) with 6 classifiers (Support Vector Machine, K-Nearest Neighbors, Decision Tree, Adaboost, MLP, and Logistic Regression). 