In [1]:
import tensorflow as tf
import tensorflow.keras as keras
import tensorflow.keras.backend as K
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import GridSearchCV

from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
import warnings
warnings.filterwarnings(action="ignore", message="^internal gelsd")
from dnn_INSE_6180 import DNN
from dagmm_INSE_6180 import DAGMM
from sae_INSE_6180 import SAE

# Experiment 1

In [2]:
load_path = './datasets/chiller/df_dataset/'

def load_data(load_path, filename):
    csv_load_path = os.path.join(load_path, filename)
    return pd.read_csv(csv_load_path)

data = load_data(load_path, 'chiller10.csv') # Chiller data of Severity Level 1
data = data.drop('Unnamed: 0', axis='columns')
data_trn, data_tst = train_test_split(data, test_size=0.5, random_state=0)
X_trn, y_trn = data_trn.iloc[:, :-1], data_trn.iloc[:, -1]
X_tst, y_tst = data_tst.iloc[:, :-1], data_tst.iloc[:, -1]
scaler = StandardScaler().fit(X_trn)
X_trn_scaled, X_tst_scaled = scaler.transform(X_trn), scaler.transform(X_tst)

In [3]:
## Dataset 1 - 10% of Origninal data
data_trn_sub, _ = train_test_split(data_trn, test_size=0.9, random_state=0)
X_trn_d1, y_trn_d1 = data_trn_sub.iloc[:, :-1], data_trn_sub.iloc[:, -1]

# Scaling data
X_trn_scaled_d1 = scaler.transform(X_trn_d1)

In [4]:
## Dataset 2 - 3% of Origninal training data
data_trn_sub, _ = train_test_split(data_trn, test_size=0.97, random_state=0)
X_trn_d2, y_trn_d2 = data_trn_sub.iloc[:, :-1], data_trn_sub.iloc[:, -1]

# Scaling data
X_trn_scaled_d2 = scaler.transform(X_trn_d2)

In [5]:
## Dataset 3 - 1% of Origninal training data
data_trn_sub, _ = train_test_split(data_trn, test_size=0.99, random_state=0)
X_trn_d3, y_trn_d3 = data_trn_sub.iloc[:, :-1], data_trn_sub.iloc[:, -1]

# Scaling data
X_trn_scaled_d3 = scaler.transform(X_trn_d3)

In [6]:
## Dataset 4 - 0.5% of Origninal data
data_trn_sub, _ = train_test_split(data_trn, test_size=0.995, random_state=0)
X_trn_d4, y_trn_d4 = data_trn_sub.iloc[:, :-1], data_trn_sub.iloc[:, -1]

# Scaling data
X_trn_scaled_d4 = scaler.transform(X_trn_d4)

# Choose dataset

In [7]:

# # Dataset 1 - 10% of Original dataset
# X_trn_, X_tst_ = X_trn_d1, X_tst
# X_trn_scaled_, X_tst_scaled_ = X_trn_scaled_d1, X_tst_scaled
# y_trn_, y_tst_ = y_trn_d1, y_tst

# # Dataset 2 - 3% of Origninal data
# X_trn_, X_tst_ = X_trn_d2, X_tst
# X_trn_scaled_, X_tst_scaled_ = X_trn_scaled_d2, X_tst_scaled
# y_trn_, y_tst_ = y_trn_d2, y_tst

## Dataset 3 - 1% of Origninal data
X_trn_, X_tst_ = X_trn_d3, X_tst
X_trn_scaled_, X_tst_scaled_ = X_trn_scaled_d3, X_tst_scaled
y_trn_, y_tst_ = y_trn_d3, y_tst

# ## Dataset 4 - 0.5% of Origninal data
# X_trn_, X_tst_ = X_trn_d4, X_tst
# X_trn_scaled_, X_tst_scaled_ = X_trn_scaled_d4, X_tst_scaled
# y_trn_, y_tst_ = y_trn_d4, y_tst

X_trn_.shape

(207, 65)

# Pretrained model SAE

In [36]:
# Pretrain use SAE

out_dir = './results/'
model_sae = SAE(sae_hiddens=[100, 50, 20], out_directory = out_dir, dropout_rate=0, n_epochs=100, normalize=True)
model_sae.build_model(X_trn)
model_sae.fit(X_trn)

Epoch 1/100
Consider rewriting this model with the Functional API.
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100


In [26]:
# Pretrain using DAGMM
out_dir = './results/'
model_dagmm = DAGMM(comp_hiddens=[100, 50, 20], comp_activation="elu",
                    est_hiddens=[10, 3], est_activation="elu", est_dropout_ratio=0.1,
                    n_epochs=30, batch_size=1024, normalize=True)
model_dagmm.build(X_trn)
model_dagmm.fit(X_trn)

Epoch 1/30
Best Epoch: 1
Epoch 2/30
Best Epoch: 2
Epoch 3/30
Best Epoch: 3
Epoch 4/30
Best Epoch: 4
Epoch 5/30
Best Epoch: 5
Epoch 6/30
Best Epoch: 6
Epoch 7/30
Best Epoch: 7
Epoch 8/30
Best Epoch: 7
Epoch 9/30
Best Epoch: 9
Epoch 10/30
Best Epoch: 9
Epoch 11/30
Best Epoch: 11
Epoch 12/30
Best Epoch: 11
Epoch 13/30
Best Epoch: 11
Epoch 14/30
Best Epoch: 14
Epoch 15/30
Best Epoch: 14
Epoch 16/30
Best Epoch: 16
Epoch 17/30
Best Epoch: 16
Epoch 18/30
Best Epoch: 16
Epoch 19/30
Best Epoch: 16
Epoch 20/30
Best Epoch: 20
Epoch 21/30
Best Epoch: 20
Epoch 22/30
Best Epoch: 22
Epoch 23/30
Best Epoch: 22
Epoch 24/30
Best Epoch: 24
Epoch 25/30
Best Epoch: 25
Epoch 26/30
Best Epoch: 25
Epoch 27/30
Best Epoch: 25
Epoch 28/30
Best Epoch: 28
Epoch 29/30
Best Epoch: 28
Epoch 30/30
Best Epoch: 28


# DNN models with and without pretrain

In [38]:
# Using SAE to pretrain

sae_model = model_sae.restore()

DNN_classifier = DNN(dnn_hiddens=[100, 50, 20], output_size=8, out_directory=out_dir,
                     pretrained_model=sae_model, normalize=True, rate=0, n_epochs=100, pretrain_sae=True, 
                     pretrain_dagmm=False, monte_carlo=True)
DNN_classifier.build_model(X_trn_)
DNN_classifier.fit((X_trn_, y_trn_))

print("Training accuracry: %.2f%%" %(100*DNN_classifier.score((X_trn_, y_trn_))))
print("Test accuracry: %.2f%%" %(100*DNN_classifier.score((X_tst_, y_tst_))))

y_tst_pred_ = DNN_classifier.predict(X_tst_)
print("====================================================================")
print("Confusion matrix: \n", confusion_matrix(y_tst_, y_tst_pred_))
print("====================================================================")
print("Classification report: \n", classification_report(y_tst_, y_tst_pred_, digits=4))
print("====================================================================")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
DNN accuracy: 0.90
Training accuracry: 90.34%


DNN accuracy: 0.80
Test accuracry: 80.49%
Confusion matrix: 
 [[1766    0   26  609   81   49   44    3]
 [   7 2531   33   12    0    1    0    2]
 [   1    0 2574    1    0    0    0    0]
 [ 471    0   17 1014  486  550   23    0]
 [  40    0   24  313 2156  108    2   11]
 [ 371    0   26  532  100 1558    3    0]
 [   3    2   24   17    2    1 2580   16]
 [   2    0   17   12    2    0    7 2534]]
Classification report: 
               precision    recall  f1-score   support

           0     0.6637    0.6850    0.6742      2578
           1     0.9992    0.9787    0.9889      2586
           2     0.9391    0.9992    0.9682      2576
           3     0.4040    0.3959    0.3999      2561
           4     0.7626    0.8124    0.7867      2654
           5     0.6873    0.6015    0.6415      2590
           6     0.9703    0.9754    0.9729      2645
           7     0.9875    0.9845    0.9860      2574

    accuracy                         0.8049     20764
   macro avg     0.8017   

In [28]:
# Using DAGMM to pretrain

dagmm_model = model_dagmm.restore()
out_dir = './results/'
DNN_classifier = DNN(dnn_hiddens=[100, 50, 20], output_size=8, out_directory=out_dir,
                     pretrained_model=dagmm_model, normalize=True, rate=0, n_epochs=100, pretrain_sae=False,
                     pretrain_dagmm=True, monte_carlo=True)
DNN_classifier.build_model(X_trn_)
DNN_classifier.fit((X_trn_, y_trn_))

print("Training accuracry: %.2f%%" %(100*DNN_classifier.score((X_trn_, y_trn_))))
print("Test accuracry: %.2f%%" %(100*DNN_classifier.score((X_tst_, y_tst_))))

y_tst_pred_ = DNN_classifier.predict(X_tst_)
print("====================================================================")
print("Confusion matrix: \n", confusion_matrix(y_tst_, y_tst_pred_))
print("====================================================================")
print("Classification report: \n", classification_report(y_tst_, y_tst_pred_, digits=4))
print("====================================================================")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
DNN accuracy: 0.95
Training accuracry: 94.69%


DNN accuracy: 0.84
Test accuracry: 84.24%
Confusion matrix: 
 [[1844    1   16  579   58   73    7    0]
 [   0 2546   12    7    0   21    0    0]
 [   0    0 2532   21    0   23    0    0]
 [ 463    0   10 1236  424  424    4    0]
 [  33    3   14  227 2207  152    4   14]
 [ 199    0   17  215  122 2037    0    0]
 [   2    1   17   18    1   15 2588    3]
 [   0    0   17   14   34    7    1 2501]]
Classification report: 
               precision    recall  f1-score   support

           0     0.7257    0.7153    0.7205      2578
           1     0.9980    0.9845    0.9912      2586
           2     0.9609    0.9829    0.9718      2576
           3     0.5334    0.4826    0.5068      2561
           4     0.7755    0.8316    0.8025      2654
           5     0.7402    0.7865    0.7626      2590
           6     0.9939    0.9784    0.9861      2645
           7     0.9932    0.9716    0.9823      2574

    accuracy                         0.8424     20764
   macro avg     0.8401   

In [45]:
# Without pretrain

out_dir = './results/'
DNN_classifier = DNN(dnn_hiddens=[100, 50, 20], output_size=8, out_directory=out_dir,
                     pretrained_model=None, normalize=True, rate=0, n_epochs=100, pretrain_sae=False,
                     pretrain_dagmm=False, monte_carlo=True)
DNN_classifier.build_model(X_trn_)
DNN_classifier.fit((X_trn_, y_trn_))

print("Training accuracry: %.2f%%" %(100*DNN_classifier.score((X_trn_, y_trn_))))
print("Test accuracry: %.2f%%" %(100*DNN_classifier.score((X_tst_, y_tst_))))

y_tst_pred_ = DNN_classifier.predict(X_tst_)
print("====================================================================")
print("Confusion matrix: \n", confusion_matrix(y_tst_, y_tst_pred_))
print("====================================================================")
print("Classification report: \n", classification_report(y_tst_, y_tst_pred_, digits=4))
print("====================================================================")

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
DNN accuracy: 0.96
Training accuracry: 95.65%
DNN accuracy: 0.78
Test accuracry: 77.91%
Confusion matrix: 
 [[1662   17    1  692   94  101   10    1]
 [   4 2558    1   14    6    3    0    0]
 [   4   20 2527   15    5    5    0    0]
 [ 328   15    1  997  543  671    6    0]
 [  16   18    2  343 2041  210    2   22]
 [ 310   20    4  442  241 1572    1    0]
 [  29   67   12   

# Baseline models

In [13]:
## 1. KNN MODEL

print("KNN MODEL:")

k = [i for i in range(2,10)]
p = [j for j in range(1,3)]
param_grid = [{'n_neighbors': k, 'p': p}]
knn_grid_search = GridSearchCV(KNeighborsClassifier(), param_grid=param_grid, cv=3,
                           scoring='f1_weighted', n_jobs=-1, return_train_score=True,
                           verbose=1)

knn_grid_search.fit(X_trn_scaled_, y_trn_)
knn_clf = knn_grid_search.best_estimator_
print(knn_clf)

print("Train Accuracy:", 100*knn_clf.score(X_trn_scaled_, y_trn_), chr(37))
print("Test Accuracy:", 100*knn_clf.score(X_tst_scaled_, y_tst_), chr(37))

y_tst_pred_ = knn_clf.predict(X_tst_scaled_)
print("====================================================================")
print("Confusion matrix: \n", confusion_matrix(y_tst_, y_tst_pred_))
print("====================================================================")
print("Classification report: \n", classification_report(y_tst_, y_tst_pred_, digits=4))
print("====================================================================")

## 2. SVM MODEL

print("SVM MODEL:")

C = [2**i for i in range(0, 4)]
gamma = [2**j for j in range(-2,2)]
param_grid = [{'C': C, 'gamma': gamma}]
svm_grid_search = GridSearchCV(SVC(), param_grid=param_grid, cv=3,
                           scoring='f1_weighted', n_jobs=-1, return_train_score=True,
                           verbose=1)
svm_grid_search.fit(X_trn_scaled_, y_trn_)
svm_clf = svm_grid_search.best_estimator_
print(svm_clf)

print("Train Accuracy:", 100*svm_clf.score(X_trn_scaled_, y_trn_), chr(37))
print("Test Accuracy:", 100*svm_clf.score(X_tst_scaled_, y_tst_), chr(37))

y_tst_pred_ = svm_clf.predict(X_tst_scaled_)
print("====================================================================")
print("Confusion matrix: \n", confusion_matrix(y_tst_, y_tst_pred_))
print("====================================================================")
print("Classification report: \n", classification_report(y_tst_, y_tst_pred_, digits=4))
print("====================================================================")

## 3. RANDOM FOREST MODEL

print("RANDOM FOREST MODEL:")

n_estimators_ = [int(x) for x in np.linspace(100, 550, 10)]

param_grid = {'n_estimators':n_estimators_}
rf_grid_search = GridSearchCV(RandomForestClassifier(), param_grid=param_grid, cv=3,
                              scoring='f1_weighted', n_jobs=-1, return_train_score=True,
                              verbose=1)
rf_grid_search.fit(X_trn_, y_trn_)

rf_clf = rf_grid_search.best_estimator_

print(rf_clf)

print("Train Accuracy:", 100*rf_clf.score(X_trn_, y_trn_), chr(37))
print("Test Accuracy:", 100*rf_clf.score(X_tst_, y_tst_), chr(37))

y_tst_pred_ = rf_clf.predict(X_tst_)
print("====================================================================")
print("Confusion matrix: \n", confusion_matrix(y_tst_, y_tst_pred_))
print("====================================================================")
print("Classification report: \n", classification_report(y_tst_, y_tst_pred_, digits=4))
print("====================================================================")



# ## 4. LOGISTIC REGRESSION MODEL

# C= np.logspace(-4,4,9)
# solver=['newton-cg']

# param_grid = [{'solver': solver, 'C': C}]
# lr_grid_search = GridSearchCV(LogisticRegression(max_iter=100),
#                            param_grid=param_grid, cv=3, scoring='f1_weighted',
#                            n_jobs=-1, return_train_score=True, verbose=1)
# lr_grid_search.fit(X_trn_scaled_, y_trn_)
# lr_clf = lr_grid_search.best_estimator_
# print(lr_clf)

# print("Train Accuracy:", 100*lr_clf.score(X_trn_scaled_, y_trn_), chr(37))
# print("Test Accuracy:", 100*lr_clf.score(X_tst_scaled_, y_tst_), chr(37))

## 5. ADABOOST MODEL

print("ADABOOST MODEL:")

param_grid = {
    'n_estimators' : [100, 200, 300],
    'learning_rate' : [0.001, 0.01, 0.1]
}
ad_grid_search = GridSearchCV(AdaBoostClassifier(), param_grid = param_grid,
                                cv=3, scoring='f1_weighted', n_jobs=-1, return_train_score=True, verbose=1
                                )
ad_grid_search.fit(X_trn_, y_trn_)

ad_clf = ad_grid_search.best_estimator_

print(ad_clf)

print("Train Accuracy:", 100*ad_clf.score(X_trn_, y_trn_), chr(37))
print("Test Accuracy:", 100*ad_clf.score(X_tst_, y_tst_), chr(37))

y_tst_pred_ = ad_clf.predict(X_tst_)
print("====================================================================")
print("Confusion matrix: \n", confusion_matrix(y_tst_, y_tst_pred_))
print("====================================================================")
print("Classification report: \n", classification_report(y_tst_, y_tst_pred_, digits=4))
print("====================================================================")


# ## 6. GAUSIAN NAIVE BAYSESSIAN

# params_NB = {'var_smoothing': np.logspace(0,-9, num=100)}
# gnb_grid_search = GridSearchCV(GaussianNB(), param_grid=params_NB, cv=3, scoring='accuracy',return_train_score=True)
# gnb_grid_search.fit(X_trn_scaled_, y_trn_)

# gnb_clf = gnb_grid_search.best_estimator_

# print(gnb_clf)

# print("Train Accuracy:", 100*gnb_clf.score(X_trn_scaled_, y_trn_), chr(37))
# print("Test Accuracy:", 100*gnb_clf.score(X_tst_scaled_, y_tst_), chr(37))


KNN MODEL:
Fitting 3 folds for each of 16 candidates, totalling 48 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  48 out of  48 | elapsed:    2.9s finished


KNeighborsClassifier(n_neighbors=7)
Train Accuracy: 53.62318840579711 %
Test Accuracy: 25.317857830861108 %
Confusion matrix: 
 [[ 770  255  199  318  579  208  241    8]
 [ 779  443   37  241  618  196  264    8]
 [ 443  153 1176  158  394  239   13    0]
 [ 792  270  162  428  453  201  229   26]
 [ 825  271  130  425  512  202  205   84]
 [ 870  273  160  362  550  252  108   15]
 [ 513  172  131  194  352   81 1147   55]
 [ 582  116   82  166  654   98  347  529]]
Classification report: 
               precision    recall  f1-score   support

           0     0.1381    0.2987    0.1889      2578
           1     0.2268    0.1713    0.1952      2586
           2     0.5662    0.4565    0.5055      2576
           3     0.1867    0.1671    0.1764      2561
           4     0.1245    0.1929    0.1513      2654
           5     0.1706    0.0973    0.1239      2590
           6     0.4491    0.4336    0.4412      2645
           7     0.7297    0.2055    0.3207      2574

    accuracy  

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  48 out of  48 | elapsed:    0.0s finished


Test Accuracy: 45.26584473126565 %
Confusion matrix: 
 [[ 971    1    0  418  392  207  501   88]
 [ 491  678    0   13  399   61  926   18]
 [   0    0 1237    0    0    0 1339    0]
 [ 427    0    0  717  732  203  348  134]
 [ 330    0    0  240 1283  157  400  244]
 [ 335    0    0  381  375  965  390  144]
 [  92    0    0   12  168    0 2134  239]
 [  48    0    0    0  516    0  596 1414]]
Classification report: 
               precision    recall  f1-score   support

           0     0.3604    0.3766    0.3684      2578
           1     0.9985    0.2622    0.4153      2586
           2     1.0000    0.4802    0.6488      2576
           3     0.4026    0.2800    0.3303      2561
           4     0.3320    0.4834    0.3936      2654
           5     0.6058    0.3726    0.4614      2590
           6     0.3217    0.8068    0.4600      2645
           7     0.6199    0.5493    0.5825      2574

    accuracy                         0.4527     20764
   macro avg     0.5801    0.4514

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed:    1.9s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed:    1.9s finished


RandomForestClassifier(n_estimators=200)
Train Accuracy: 100.0 %
Test Accuracy: 79.23328838374108 %
Confusion matrix: 
 [[1540    0   28  657   41  299   13    0]
 [   0 2579    7    0    0    0    0    0]
 [   0    0 2576    0    0    0    0    0]
 [ 205    0    8 1270  543  524   11    0]
 [  48    0   12  366 2081  137    9    1]
 [ 348    0   13  454  428 1345    1    1]
 [  17    0   13   29    1    0 2583    2]
 [   0    0   10   22   59    1    4 2478]]
Classification report: 
               precision    recall  f1-score   support

           0     0.7136    0.5974    0.6503      2578
           1     1.0000    0.9973    0.9986      2586
           2     0.9659    1.0000    0.9826      2576
           3     0.4539    0.4959    0.4740      2561
           4     0.6600    0.7841    0.7167      2654
           5     0.5833    0.5193    0.5494      2590
           6     0.9855    0.9766    0.9810      2645
           7     0.9984    0.9627    0.9802      2574

    accuracy          

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  24 out of  27 | elapsed:    1.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:    1.2s finished


AdaBoostClassifier(learning_rate=0.1, n_estimators=200)
Train Accuracy: 76.32850241545893 %
Test Accuracy: 64.48661144288191 %
Confusion matrix: 
 [[   0    0    0 1267  191 1078   42    0]
 [   0 2476    0  109    1    0    0    0]
 [   0    0 2575    1    0    0    0    0]
 [   0    0    0  641 1043  865   11    1]
 [   0    0    0  322 2182  139   10    1]
 [   0    0    0  198 1362 1029    1    0]
 [   0    0    0   54    5    0 2311  275]
 [   0    0    0   28  112    0  258 2176]]
Classification report: 
               precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000      2578
           1     1.0000    0.9575    0.9783      2586
           2     1.0000    0.9996    0.9998      2576
           3     0.2447    0.2503    0.2474      2561
           4     0.4457    0.8222    0.5780      2654
           5     0.3308    0.3973    0.3610      2590
           6     0.8777    0.8737    0.8757      2645
           7     0.8871    0.8454    0.8657      2

  _warn_prf(average, modifier, msg_start, len(result))
