In [1]:
import sys
import numpy as np
import pickle
import logging

sys.path.append('../../analyse')
sys.path.append('../../')

from analyse.utils import download_db as ddb
from analyse.utils.global_config import CONFIG

from sklearn.metrics import f1_score
from sklearn.model_selection import GridSearchCV

from sklearn.neural_network import MLPClassifier


In [2]:
logging.basicConfig(
    filename='run-logs.log', 
    encoding='utf-8', 
    format='%(asctime)s %(levelname)s: %(message)s',
    level=logging.DEBUG, 
    filemode='w'
)

# Download data and preprocess it

In [3]:
signals = ddb.get_all_signals()

KeyboardInterrupt: 

In [None]:
X_train, y_train, X_test, y_test = ddb.split_dbs(test_size=0.5, seed=42)


In [None]:
X_train

Unnamed: 0,median,mean,variance,mean_abs,max,min,sum,AAA,AAB,AAC,...,BCC,CAA,CAB,CAC,CBA,CBB,CBC,CCA,CCB,CCC
0,-0.002326,0.055419,0.099190,0.222344,0.923729,-0.420833,1.662562,4,0,2,...,0,2,0,0,1,4,0,0,0,0
1,-0.004695,0.038917,0.093349,0.207399,0.923729,-0.420833,1.167500,5,0,2,...,0,2,0,0,1,4,0,0,0,0
2,-0.004695,0.040613,0.091894,0.205703,0.923729,-0.420833,1.218393,5,0,3,...,0,2,0,0,1,3,0,0,0,0
3,-0.004695,0.038467,0.091732,0.203557,0.923729,-0.420833,1.154017,5,0,3,...,0,2,0,0,1,3,0,0,0,0
4,-0.004785,0.015771,0.082734,0.188534,0.923729,-0.420833,0.473120,5,0,3,...,0,2,0,1,1,3,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1218250,-0.306454,2.769789,127.350883,3.679040,53.666667,-1.000000,66.474932,0,0,1,...,2,1,1,0,1,0,4,1,2,3
1218251,-0.290503,2.933693,132.243131,3.795520,53.666667,-1.000000,67.474932,0,0,1,...,2,1,1,0,1,0,3,1,2,3
1218252,-0.306454,2.796087,127.168515,3.652742,53.666667,-1.000000,67.106080,0,0,1,...,3,1,1,0,1,0,3,1,2,3
1218253,-0.306454,2.710769,127.488025,3.579329,53.666667,-1.000000,65.058460,0,0,1,...,2,1,1,0,1,0,3,1,2,4


In [None]:
y_train

Unnamed: 0,0
0,False
1,False
2,False
3,False
4,False
...,...
1218250,True
1218251,True
1218252,True
1218253,True


In [None]:
X_test

Unnamed: 0,median,mean,variance,mean_abs,max,min,sum,AAA,AAB,AAC,...,BCC,CAA,CAB,CAC,CBA,CBB,CBC,CCA,CCB,CCC
0,0.000000,0.033000,0.088570,0.118645,1.100000,-0.350000,0.990002,19,0,1,...,0,1,0,1,0,0,2,0,0,0
1,0.000000,0.033160,0.088560,0.118805,1.100000,-0.350000,0.994786,19,0,1,...,0,1,0,1,0,0,2,0,0,0
2,0.000000,0.033328,0.088548,0.118636,1.100000,-0.350000,0.999837,19,0,1,...,0,1,0,1,0,0,2,0,0,0
3,0.000000,0.032989,0.088567,0.118298,1.100000,-0.350000,0.989685,19,0,1,...,0,1,0,1,0,0,2,0,0,0
4,0.000000,0.033148,0.088557,0.118457,1.100000,-0.350000,0.994447,19,0,1,...,0,1,0,1,0,0,2,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1213960,0.012988,0.003562,0.001049,0.028512,0.059603,-0.055556,0.106875,28,0,0,...,0,0,0,0,0,0,0,0,0,0
1213961,0.012988,0.005110,0.001002,0.027792,0.059603,-0.055556,0.153311,28,0,0,...,0,0,0,0,0,0,0,0,0,0
1213962,0.012663,0.003437,0.000912,0.026119,0.059603,-0.055556,0.103108,28,0,0,...,0,0,0,0,0,0,0,0,0,0
1213963,0.009501,0.000534,0.001060,0.027688,0.059603,-0.067073,0.016035,28,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
y_test

Unnamed: 0,0
0,False
1,False
2,False
3,False
4,False
...,...
1213960,True
1213961,True
1213962,True
1213963,True


# Machine learning using Gradient Boosting

In [None]:
classifier = GridSearchCV(
    estimator=MLPClassifier(),
    param_grid=CONFIG.get("nn_params"),
    n_jobs=-1,
    scoring='roc_auc',
    verbose=3,
    refit=True,
    cv=3
)
model = classifier.fit(X_train, y_train.values.ravel())

Fitting 3 folds for each of 144 candidates, totalling 432 fits




KeyboardInterrupt: 

In [None]:
mode_file_name = "../../analyse/models/NNClassifier.pickle" 
 
with open(mode_file_name, 'wb') as bin_file:
    pickle.dump(
        model,
        file=bin_file,
        protocol=pickle.HIGHEST_PROTOCOL
    )

In [None]:
model.best_params_

{'criterion': 'entropy',
 'max_depth': 8,
 'max_features': 'log2',
 'n_estimators': 100}

# TESTS

**Check test dataset**

In [None]:
print(model.score(X_test, y_test))

0.9660291416670443


**Calculate f1 norm**

In [None]:
y_pred = model.predict(X_test)
print(f1_score(y_test, y_pred))

0.9304024263331266
