In [1]:
# Import common libraries

import tensorflow as tf
import tensorflow.keras as keras
import tensorflow.keras.backend as K
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import GridSearchCV

from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
import warnings
warnings.filterwarnings(action="ignore", message="^internal gelsd")
from dnn_INSE_6180 import DNN
from dagmm_INSE_6180 import DAGMM
from sae_INSE_6180 import SAE

# Experiment 1

In [2]:
load_path = './datasets/chiller/df_dataset/'

def load_data(load_path, filename):
    csv_load_path = os.path.join(load_path, filename)
    return pd.read_csv(csv_load_path)

data = load_data(load_path, 'chiller10.csv') # Chiller data of Severity Level 1
data = data.drop('Unnamed: 0', axis='columns')
data_trn, data_tst = train_test_split(data, test_size=0.5, random_state=0)
X_trn, y_trn = data_trn.iloc[:, :-1], data_trn.iloc[:, -1]
X_tst, y_tst = data_tst.iloc[:, :-1], data_tst.iloc[:, -1]

# Scale data
scaler = StandardScaler().fit(X_trn)
X_trn_scaled, X_tst_scaled = scaler.transform(X_trn), scaler.transform(X_tst)

In [3]:
## Dataset 1 - 10% of Origninal data (2076 labeled data samples)
data_trn_sub, _ = train_test_split(data_trn, test_size=0.9, random_state=0)
X_trn_d1, y_trn_d1 = data_trn_sub.iloc[:, :-1], data_trn_sub.iloc[:, -1]

# Scaling data
X_trn_scaled_d1 = scaler.transform(X_trn_d1)

In [4]:
## Dataset 2 - 3% of Origninal training data (622 labeled data samples)
data_trn_sub, _ = train_test_split(data_trn, test_size=0.97, random_state=0)
X_trn_d2, y_trn_d2 = data_trn_sub.iloc[:, :-1], data_trn_sub.iloc[:, -1]

# Scaling data
X_trn_scaled_d2 = scaler.transform(X_trn_d2)

In [5]:
## Dataset 3 - 1% of Origninal training data (207 labeled data samples)
data_trn_sub, _ = train_test_split(data_trn, test_size=0.99, random_state=0)
X_trn_d3, y_trn_d3 = data_trn_sub.iloc[:, :-1], data_trn_sub.iloc[:, -1]

# Scaling data
X_trn_scaled_d3 = scaler.transform(X_trn_d3)

In [6]:
## Dataset 4 - 0.5% of Origninal data (103 labeled data samples)
data_trn_sub, _ = train_test_split(data_trn, test_size=0.995, random_state=0)
X_trn_d4, y_trn_d4 = data_trn_sub.iloc[:, :-1], data_trn_sub.iloc[:, -1]

# Scaling data
X_trn_scaled_d4 = scaler.transform(X_trn_d4)

# Choose dataset

In [7]:
### Select labeled datasets with different sizes to test model

# # Dataset 1 - 10% of Original dataset
# X_trn_, X_tst_ = X_trn_d1, X_tst
# X_trn_scaled_, X_tst_scaled_ = X_trn_scaled_d1, X_tst_scaled
# y_trn_, y_tst_ = y_trn_d1, y_tst

# # Dataset 2 - 3% of Origninal data
# X_trn_, X_tst_ = X_trn_d2, X_tst
# X_trn_scaled_, X_tst_scaled_ = X_trn_scaled_d2, X_tst_scaled
# y_trn_, y_tst_ = y_trn_d2, y_tst

## Dataset 3 - 1% of Origninal data
X_trn_, X_tst_ = X_trn_d3, X_tst
X_trn_scaled_, X_tst_scaled_ = X_trn_scaled_d3, X_tst_scaled
y_trn_, y_tst_ = y_trn_d3, y_tst

# ## Dataset 4 - 0.5% of Origninal data
# X_trn_, X_tst_ = X_trn_d4, X_tst
# X_trn_scaled_, X_tst_scaled_ = X_trn_scaled_d4, X_tst_scaled
# y_trn_, y_tst_ = y_trn_d4, y_tst

X_trn_.shape

(207, 65)

# DNN classifier without pretraining

In [8]:
# Without pretrain

out_dir = './results/'

# Create and train DNN_classifier
DNN_classifier = DNN(dnn_hiddens=[100, 50, 20], output_size=8, out_directory=out_dir,
                     pretrained_model=None, normalize=True, rate=0, n_epochs=100, pretrain_sae=False,
                     pretrain_dagmm=False, monte_carlo=True)
DNN_classifier.build_model(X_trn_)
DNN_classifier.fit((X_trn_, y_trn_))

print("Training accuracry: %.2f%%" %(100*DNN_classifier.score((X_trn_, y_trn_))))
print("Test accuracry: %.2f%%" %(100*DNN_classifier.score((X_tst_, y_tst_))))

y_tst_pred_ = DNN_classifier.predict(X_tst_)
print("====================================================================")
print("Confusion matrix: \n", confusion_matrix(y_tst_, y_tst_pred_))
print("====================================================================")
print("Classification report: \n", classification_report(y_tst_, y_tst_pred_, digits=4))
print("====================================================================")

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
DNN accuracy: 0.94
Training accuracry: 94.20%
DNN accuracy: 0.77
Test accuracry: 77.45%
Confusion matrix: 
 [[1497   10    1  838   14  207   11    0]
 [   8 2420    0   41   99   11    3    4]
 [   1   10 2515   23    9   16    2    0]
 [ 368    7    0 1008  517  650   10    1]
 [ 164   11    3  175 2039  248    7    7]
 [ 263   13    0  386  359 1567    2    0]
 [   3   13    2   34    7    0 2581    5]
 [   0    9    1   22   82    0    6 2454]]
Classification report: 
               precision    recall  f1-score   support

           0     0.6497    0.5807 

# DNN classifier using SAE as pretrained network

In [14]:
# Pretrain using SAE (Using SAE as pretrained model which is then used to pretrain DNN)

out_dir = './results/'
# create SAE model and train the model using unlabeled data
model_sae = SAE(sae_hiddens=[100, 50, 20], out_directory = out_dir, dropout_rate=0, n_epochs=100, normalize=True)
model_sae.build_model(X_trn) # X_trn is the unlabeled data ()
model_sae.fit(X_trn) # Fit and save the best SAE model to the out_dir path

Epoch 1/100
Consider rewriting this model with the Functional API.
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100


In [15]:
# Using SAE to pretrain

sae_model = model_sae.restore() # load sae_mode saved in the previous step

# apply pretraining technique using sae_model as pretrained network
DNN_classifier = DNN(dnn_hiddens=[100, 50, 20], output_size=8, out_directory=out_dir,
                     pretrained_model=sae_model, normalize=True, rate=0, n_epochs=100, pretrain_sae=True, 
                     pretrain_dagmm=False, monte_carlo=True)
DNN_classifier.build_model(X_trn_)
DNN_classifier.fit((X_trn_, y_trn_))

print("Training accuracry: %.2f%%" %(100*DNN_classifier.score((X_trn_, y_trn_))))
print("Test accuracry: %.2f%%" %(100*DNN_classifier.score((X_tst_, y_tst_))))

y_tst_pred_ = DNN_classifier.predict(X_tst_)
print("====================================================================")
print("Confusion matrix: \n", confusion_matrix(y_tst_, y_tst_pred_))
print("====================================================================")
print("Classification report: \n", classification_report(y_tst_, y_tst_pred_, digits=4))
print("====================================================================")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
DNN accuracy: 0.90
Training accuracry: 90.34%
DNN accuracy: 0.82
Test accuracry: 82.06%
Confusion matrix: 
 [[1736    0    2  441   68  322    9    0]
 [   1 2535    0   35    2    9    1    3]
 [   0    0 2529   39    0    7    1    0]
 [ 483    1    2  738  520  808    9    0]
 [  78    2    1   82 2325  164  

# DNN classifier using DAGMM as pretrained network

In [11]:
# Pretrain using DAGMM
out_dir = './results/'
# create dagmm model and train the model using unlabeled data
model_dagmm = DAGMM(comp_hiddens=[100, 50, 20], comp_activation="elu",
                    est_hiddens=[10, 3], est_activation="elu", est_dropout_ratio=0.1,
                    n_epochs=30, batch_size=1024, normalize=True)
model_dagmm.build(X_trn)
model_dagmm.fit(X_trn)

Epoch 1/30
Best Epoch: 1
Epoch 2/30
Best Epoch: 2
Epoch 3/30
Best Epoch: 3
Epoch 4/30
Best Epoch: 4
Epoch 5/30
Best Epoch: 5
Epoch 6/30
Best Epoch: 5
Epoch 7/30
Best Epoch: 7
Epoch 8/30
Best Epoch: 7
Epoch 9/30
Best Epoch: 7
Epoch 10/30
Best Epoch: 10
Epoch 11/30
Best Epoch: 11
Epoch 12/30
Best Epoch: 11
Epoch 13/30
Best Epoch: 13
Epoch 14/30
Best Epoch: 13
Epoch 15/30
Best Epoch: 15
Epoch 16/30
Best Epoch: 15
Epoch 17/30
Best Epoch: 17
Epoch 18/30
Best Epoch: 18
Epoch 19/30
Best Epoch: 18
Epoch 20/30
Best Epoch: 18
Epoch 21/30
Best Epoch: 21
Epoch 22/30
Best Epoch: 21
Epoch 23/30
Best Epoch: 23
Epoch 24/30
Best Epoch: 23
Epoch 25/30
Best Epoch: 23
Epoch 26/30
Best Epoch: 23
Epoch 27/30
Best Epoch: 27
Epoch 28/30
Best Epoch: 28
Epoch 29/30
Best Epoch: 28
Epoch 30/30
Best Epoch: 28


In [12]:
# Using DAGMM to pretrain DNN

dagmm_model = model_dagmm.restore() # load dagmm model saved in the previous step
out_dir = './results/'

# apply pretraining technique using dagmm as pretrained network
DNN_classifier = DNN(dnn_hiddens=[100, 50, 20], output_size=8, out_directory=out_dir,
                     pretrained_model=dagmm_model, normalize=True, rate=0, n_epochs=100, pretrain_sae=False,
                     pretrain_dagmm=True, monte_carlo=True)
DNN_classifier.build_model(X_trn_)
DNN_classifier.fit((X_trn_, y_trn_))

print("Training accuracry: %.2f%%" %(100*DNN_classifier.score((X_trn_, y_trn_))))
print("Test accuracry: %.2f%%" %(100*DNN_classifier.score((X_tst_, y_tst_))))

y_tst_pred_ = DNN_classifier.predict(X_tst_)
print("====================================================================")
print("Confusion matrix: \n", confusion_matrix(y_tst_, y_tst_pred_))
print("====================================================================")
print("Classification report: \n", classification_report(y_tst_, y_tst_pred_, digits=4))
print("====================================================================")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100


Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
DNN accuracy: 0.98
Training accuracry: 97.58%
DNN accuracy: 0.84
Test accuracry: 84.45%
Confusion matrix: 
 [[1973   10    0  505   26   58    6    0]
 [   8 2547    0   12    2   17    0    0]
 [   7   10 2525   21    0   13    0    0]
 [ 425    8    0 1317  356  444    7    4]
 [  31   11    0  278 2171  151    4    8]
 [ 197   13    0  364  110 1906    0    0]
 [  24   12    1   30    2    1 2570    5]
 [   7    9    0   18    3    3    7 2527]]
Classification report: 
               precision    recall  f1-score   support

           0     0.7384    0.7653    0.7516      2578
           1     0.9721    0.9849    0.9785      2586
           2     0.9996    0.9802    0.9898      2576
           3     0.5175    0.5143    0.5159      2561
           4     0.8131    0.8180    0.8156      2654
           5   

# Baseline models

In [16]:
## 1. KNN MODEL

print("KNN MODEL:")

k = [i for i in range(2,10)]
p = [j for j in range(1,3)]
param_grid = [{'n_neighbors': k, 'p': p}]
knn_grid_search = GridSearchCV(KNeighborsClassifier(), param_grid=param_grid, cv=3,
                           scoring='f1_weighted', n_jobs=-1, return_train_score=True,
                           verbose=1)

knn_grid_search.fit(X_trn_scaled_, y_trn_)
knn_clf = knn_grid_search.best_estimator_
print(knn_clf)

print("Train Accuracy:", 100*knn_clf.score(X_trn_scaled_, y_trn_), chr(37))
print("Test Accuracy:", 100*knn_clf.score(X_tst_scaled_, y_tst_), chr(37))

y_tst_pred_ = knn_clf.predict(X_tst_scaled_)
print("====================================================================")
print("Confusion matrix: \n", confusion_matrix(y_tst_, y_tst_pred_))
print("====================================================================")
print("Classification report: \n", classification_report(y_tst_, y_tst_pred_, digits=4))
print("====================================================================")

## 2. SVM MODEL

print("SVM MODEL:")

C = [2**i for i in range(0, 4)]
gamma = [2**j for j in range(-2,2)]
param_grid = [{'C': C, 'gamma': gamma}]
svm_grid_search = GridSearchCV(SVC(), param_grid=param_grid, cv=3,
                           scoring='f1_weighted', n_jobs=-1, return_train_score=True,
                           verbose=1)
svm_grid_search.fit(X_trn_scaled_, y_trn_)
svm_clf = svm_grid_search.best_estimator_
print(svm_clf)

print("Train Accuracy:", 100*svm_clf.score(X_trn_scaled_, y_trn_), chr(37))
print("Test Accuracy:", 100*svm_clf.score(X_tst_scaled_, y_tst_), chr(37))

y_tst_pred_ = svm_clf.predict(X_tst_scaled_)
print("====================================================================")
print("Confusion matrix: \n", confusion_matrix(y_tst_, y_tst_pred_))
print("====================================================================")
print("Classification report: \n", classification_report(y_tst_, y_tst_pred_, digits=4))
print("====================================================================")

## 3. RANDOM FOREST MODEL

print("RANDOM FOREST MODEL:")

n_estimators_ = [int(x) for x in np.linspace(100, 550, 10)]

param_grid = {'n_estimators':n_estimators_}
rf_grid_search = GridSearchCV(RandomForestClassifier(), param_grid=param_grid, cv=3,
                              scoring='f1_weighted', n_jobs=-1, return_train_score=True,
                              verbose=1)
rf_grid_search.fit(X_trn_, y_trn_)

rf_clf = rf_grid_search.best_estimator_

print(rf_clf)

print("Train Accuracy:", 100*rf_clf.score(X_trn_, y_trn_), chr(37))
print("Test Accuracy:", 100*rf_clf.score(X_tst_, y_tst_), chr(37))

y_tst_pred_ = rf_clf.predict(X_tst_)
print("====================================================================")
print("Confusion matrix: \n", confusion_matrix(y_tst_, y_tst_pred_))
print("====================================================================")
print("Classification report: \n", classification_report(y_tst_, y_tst_pred_, digits=4))
print("====================================================================")



# ## 4. LOGISTIC REGRESSION MODEL

# C= np.logspace(-4,4,9)
# solver=['newton-cg']

# param_grid = [{'solver': solver, 'C': C}]
# lr_grid_search = GridSearchCV(LogisticRegression(max_iter=100),
#                            param_grid=param_grid, cv=3, scoring='f1_weighted',
#                            n_jobs=-1, return_train_score=True, verbose=1)
# lr_grid_search.fit(X_trn_scaled_, y_trn_)
# lr_clf = lr_grid_search.best_estimator_
# print(lr_clf)

# print("Train Accuracy:", 100*lr_clf.score(X_trn_scaled_, y_trn_), chr(37))
# print("Test Accuracy:", 100*lr_clf.score(X_tst_scaled_, y_tst_), chr(37))

## 5. ADABOOST MODEL

print("ADABOOST MODEL:")

param_grid = {
    'n_estimators' : [100, 200, 300],
    'learning_rate' : [0.001, 0.01, 0.1]
}
ad_grid_search = GridSearchCV(AdaBoostClassifier(), param_grid = param_grid,
                                cv=3, scoring='f1_weighted', n_jobs=-1, return_train_score=True, verbose=1
                                )
ad_grid_search.fit(X_trn_, y_trn_)

ad_clf = ad_grid_search.best_estimator_

print(ad_clf)

print("Train Accuracy:", 100*ad_clf.score(X_trn_, y_trn_), chr(37))
print("Test Accuracy:", 100*ad_clf.score(X_tst_, y_tst_), chr(37))

y_tst_pred_ = ad_clf.predict(X_tst_)
print("====================================================================")
print("Confusion matrix: \n", confusion_matrix(y_tst_, y_tst_pred_))
print("====================================================================")
print("Classification report: \n", classification_report(y_tst_, y_tst_pred_, digits=4))
print("====================================================================")


# ## 6. GAUSIAN NAIVE BAYSESSIAN

# params_NB = {'var_smoothing': np.logspace(0,-9, num=100)}
# gnb_grid_search = GridSearchCV(GaussianNB(), param_grid=params_NB, cv=3, scoring='accuracy',return_train_score=True)
# gnb_grid_search.fit(X_trn_scaled_, y_trn_)

# gnb_clf = gnb_grid_search.best_estimator_

# print(gnb_clf)

# print("Train Accuracy:", 100*gnb_clf.score(X_trn_scaled_, y_trn_), chr(37))
# print("Test Accuracy:", 100*gnb_clf.score(X_tst_scaled_, y_tst_), chr(37))


KNN MODEL:
Fitting 3 folds for each of 16 candidates, totalling 48 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  48 out of  48 | elapsed:    2.2s finished


KNeighborsClassifier(n_neighbors=7)
Train Accuracy: 53.62318840579711 %
Test Accuracy: 25.317857830861108 %
Confusion matrix: 
 [[ 770  255  199  318  579  208  241    8]
 [ 779  443   37  241  618  196  264    8]
 [ 443  153 1176  158  394  239   13    0]
 [ 792  270  162  428  453  201  229   26]
 [ 825  271  130  425  512  202  205   84]
 [ 870  273  160  362  550  252  108   15]
 [ 513  172  131  194  352   81 1147   55]
 [ 582  116   82  166  654   98  347  529]]
Classification report: 
               precision    recall  f1-score   support

           0     0.1381    0.2987    0.1889      2578
           1     0.2268    0.1713    0.1952      2586
           2     0.5662    0.4565    0.5055      2576
           3     0.1867    0.1671    0.1764      2561
           4     0.1245    0.1929    0.1513      2654
           5     0.1706    0.0973    0.1239      2590
           6     0.4491    0.4336    0.4412      2645
           7     0.7297    0.2055    0.3207      2574

    accuracy  

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  48 out of  48 | elapsed:    0.0s finished


Test Accuracy: 45.26584473126565 %
Confusion matrix: 
 [[ 971    1    0  418  392  207  501   88]
 [ 491  678    0   13  399   61  926   18]
 [   0    0 1237    0    0    0 1339    0]
 [ 427    0    0  717  732  203  348  134]
 [ 330    0    0  240 1283  157  400  244]
 [ 335    0    0  381  375  965  390  144]
 [  92    0    0   12  168    0 2134  239]
 [  48    0    0    0  516    0  596 1414]]
Classification report: 
               precision    recall  f1-score   support

           0     0.3604    0.3766    0.3684      2578
           1     0.9985    0.2622    0.4153      2586
           2     1.0000    0.4802    0.6488      2576
           3     0.4026    0.2800    0.3303      2561
           4     0.3320    0.4834    0.3936      2654
           5     0.6058    0.3726    0.4614      2590
           6     0.3217    0.8068    0.4600      2645
           7     0.6199    0.5493    0.5825      2574

    accuracy                         0.4527     20764
   macro avg     0.5801    0.4514

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed:    1.6s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed:    1.6s finished


RandomForestClassifier(n_estimators=250)
Train Accuracy: 100.0 %
Test Accuracy: 79.47408977075709 %
Confusion matrix: 
 [[1549    0   33  673   25  285   13    0]
 [   0 2579    7    0    0    0    0    0]
 [   0    0 2576    0    0    0    0    0]
 [ 170    0    8 1304  529  539   11    0]
 [  45    0   12  376 2067  144    8    2]
 [ 344    0   13  436  422 1373    1    1]
 [  20    0   13   33    0    0 2576    3]
 [   0    0   10   25   57    0    4 2478]]
Classification report: 
               precision    recall  f1-score   support

           0     0.7279    0.6009    0.6583      2578
           1     1.0000    0.9973    0.9986      2586
           2     0.9641    1.0000    0.9817      2576
           3     0.4580    0.5092    0.4822      2561
           4     0.6668    0.7788    0.7185      2654
           5     0.5865    0.5301    0.5569      2590
           6     0.9858    0.9739    0.9798      2645
           7     0.9976    0.9627    0.9798      2574

    accuracy          

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  24 out of  27 | elapsed:    0.9s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:    1.1s finished


AdaBoostClassifier(learning_rate=0.01, n_estimators=100)
Train Accuracy: 79.22705314009661 %
Test Accuracy: 72.30784049316124 %
Confusion matrix: 
 [[ 951    0   10  269  115 1191   42    0]
 [   0 2578    7    0    1    0    0    0]
 [   0    0 2576    0    0    0    0    0]
 [ 148    0    7  328  898 1168   11    1]
 [   0    0   12  154 2463   14   10    1]
 [  30    0   13  225 1174 1147    1    0]
 [   4    0   13   14   26    2 2570   16]
 [   0    0   10    4  116   10   33 2401]]
Classification report: 
               precision    recall  f1-score   support

           0     0.8394    0.3689    0.5125      2578
           1     1.0000    0.9969    0.9985      2586
           2     0.9728    1.0000    0.9862      2576
           3     0.3300    0.1281    0.1845      2561
           4     0.5139    0.9280    0.6615      2654
           5     0.3247    0.4429    0.3747      2590
           6     0.9636    0.9716    0.9676      2645
           7     0.9926    0.9328    0.9617      