In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn import metrics
import os, sys

from phm08ds.models import experiment

## Load Dataset

In [2]:
folderpath = '../../../data/interim/'
data_op_1 = pd.read_csv(folderpath + 'data_op_01.csv')
data_op_1.head()

Unnamed: 0,unit,time_step,operational_setting_1,operational_setting_2,operational_setting_3,Sensor_0,Sensor_1,Sensor_2,Sensor_3,Sensor_4,...,Sensor_13,Sensor_14,Sensor_15,Sensor_16,Sensor_17,Sensor_18,Sensor_19,Sensor_20,Health_state,Operational_condition
0,1,1,10.0047,0.2501,20.0,489.05,604.13,1499.45,1309.95,10.52,...,8120.83,8.6216,0.03,368,2319,100.0,28.58,17.1735,1,1
14,1,15,9.9988,0.25,20.0,489.05,604.63,1497.87,1302.51,10.52,...,8124.37,8.6496,0.03,369,2319,100.0,28.84,17.0805,1,1
15,1,16,9.9987,0.25,20.0,489.05,604.61,1500.47,1311.39,10.52,...,8127.07,8.6609,0.03,369,2319,100.0,28.71,17.1759,1,1
17,1,18,10.0066,0.2507,20.0,489.05,604.23,1502.57,1304.56,10.52,...,8119.39,8.6646,0.03,367,2319,100.0,28.6,17.1096,1,1
35,1,36,10.0024,0.2504,20.0,489.05,604.43,1499.85,1306.89,10.52,...,8121.73,8.626,0.03,369,2319,100.0,28.52,17.1036,2,1


## Data preprocessing

Get sensors that a I like it better

In [3]:
from phm08ds.features.feature_selection import SelectSensors

tf_select_sensors = SelectSensors(kind='custom', sensors=[3,6,10,14])
data_op_1 = tf_select_sensors.fit_transform(data_op_1)
data_op_1.head()

Unnamed: 0,unit,time_step,operational_setting_1,operational_setting_2,operational_setting_3,Sensor_3,Sensor_6,Sensor_10,Sensor_14,Operational_condition,Health_state
0,1,1,10.0047,0.2501,20.0,1309.95,394.88,45.4,8.6216,1,1
14,1,15,9.9988,0.25,20.0,1302.51,394.93,45.39,8.6496,1,1
15,1,16,9.9987,0.25,20.0,1311.39,394.44,45.52,8.6609,1,1
17,1,18,10.0066,0.2507,20.0,1304.56,394.31,45.1,8.6646,1,1
35,1,36,10.0024,0.2504,20.0,1306.89,394.36,45.36,8.626,1,2


Before feeding to the classifier, let's remove unwanted information, such as unit, time_step and operational settings.

In [4]:
from phm08ds.features.feature_selection import RemoveInfo

tf_remove_info = RemoveInfo()

data_with_features = tf_remove_info.fit_transform(data_op_1)
data_with_features.head()

Unnamed: 0,Sensor_3,Sensor_6,Sensor_10,Sensor_14,Health_state
0,1309.95,394.88,45.4,8.6216,1
14,1302.51,394.93,45.39,8.6496,1
15,1311.39,394.44,45.52,8.6609,1
17,1304.56,394.31,45.1,8.6646,1
35,1306.89,394.36,45.36,8.626,2


We need to normalize our data. Let's use Z-score standardization.

In [5]:
from sklearn.preprocessing import StandardScaler

tf_std_scaller = preprocessing.StandardScaler()
data_with_features_std = tf_std_scaller.fit_transform(data_with_features.drop(labels='Health_state', axis=1))
data_with_features_std

array([[-0.16893857,  0.85477079, -0.38167972, -1.17034624],
       [-1.06544604,  0.93149834, -0.42173461, -0.42029381],
       [ 0.00457901,  0.17956834,  0.09897901, -0.11759408],
       ...,
       [ 1.64094613, -2.29105881,  1.26057091,  2.10309687],
       [ 1.8867627 , -1.58516534,  1.9415041 ,  0.5708469 ],
       [ 1.865073  , -1.66189289,  2.26194325,  1.52448499]])

In [6]:
labels_op_1 = np.array(data_with_features['Health_state'])
labels_op_1

array([1, 1, 1, ..., 4, 4, 4])

# Classification steps

## Load Experiment model

In [7]:
from phm08ds.models import experiment

## Define classifiers and its specifications

In [8]:
from sklearn.neural_network import MLPClassifier
from sklearn.externals import joblib

In [9]:
best_mlp = joblib.load('clf_mlp.pkl')

## Put all clf in a dictionary:

In [10]:
classifiers = {'MLP': best_mlp}

## Train Classifiers and test them

Stratified cross-validation for model selection are going to be used.

In [11]:
kfolds = 10
clf_outputs = experiment.run_classifiers(data_with_features_std, labels_op_1, classifiers, kfolds)

## Performance assessment

Saving variables in a dictionary:

In [12]:
results = {}
results['train'] = experiment.results_clf(4, clf_outputs['train']['true'], clf_outputs['train']['pred'])
results['test'] = experiment.results_clf(4, clf_outputs['test']['true'], clf_outputs['test']['pred'])

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


## Savel results, models and pipeline to a .pkl file 

In [13]:
from sklearn.pipeline import Pipeline

data_preprocessing = Pipeline([('select_sensors', tf_select_sensors),
                               ('remove_info', tf_remove_info),
                               ('std_scaler', tf_std_scaller)
                              ])

In [14]:
experiment.save_models(classifiers)
experiment.save_pipeline(data_preprocessing)

## Save results to CSVs and figures

In [15]:
experiment.export_results(results['test'], 'test')
experiment.export_results(results['train'], 'train')

/mnt/Work/Mestrado/Mestrado/MP-Safety_ITA/Analyses_and_Experiments/PHM08_data_science/notebooks/E09_PHM08-train_MLP/model_selection-OP_01-navarmn
MLP
/mnt/Work/Mestrado/Mestrado/MP-Safety_ITA/Analyses_and_Experiments/PHM08_data_science/notebooks/E09_PHM08-train_MLP/model_selection-OP_01-navarmn
MLP
