<a href="https://colab.research.google.com/github/vhrique/aerogerador_fault_diagnosis/blob/main/baseline_experiment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install pymoo

Collecting pymoo
  Downloading pymoo-0.6.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.0 kB)
Collecting cma==3.2.2 (from pymoo)
  Downloading cma-3.2.2-py2.py3-none-any.whl.metadata (8.0 kB)
Collecting alive-progress (from pymoo)
  Downloading alive_progress-3.1.5-py3-none-any.whl.metadata (68 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m68.4/68.4 kB[0m [31m769.8 kB/s[0m eta [36m0:00:00[0m
[?25hCollecting dill (from pymoo)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting Deprecated (from pymoo)
  Downloading Deprecated-1.2.14-py2.py3-none-any.whl.metadata (5.4 kB)
Collecting about-time==4.2.1 (from alive-progress->pymoo)
  Downloading about_time-4.2.1-py3-none-any.whl.metadata (13 kB)
Collecting grapheme==0.6.0 (from alive-progress->pymoo)
  Downloading grapheme-0.6.0.tar.gz (207 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.3/207.3 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?2

In [64]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from pymoo.algorithms.moo.nsga2 import NSGA2
from pymoo.core.problem import Problem
from pymoo.decomposition.asf import ASF
from pymoo.optimize import minimize
from pymoo.termination import get_termination

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.model_selection import KFold, train_test_split
from sklearn.preprocessing import OrdinalEncoder

# Load Data

In [6]:
df_features = pd.read_csv('/content/drive/MyDrive/Estudos/Datasets/pas_aerogerador_helon/X10.csv', header=None)
df_labels = pd.read_csv('/content/drive/MyDrive/Estudos/Datasets/pas_aerogerador_helon/y_case2.csv', header=None)

In [10]:
X = df_features.to_numpy()
y = df_labels.to_numpy().squeeze()

# RandomForest Experiment

In [23]:
y_pred = y.copy()

for i, (train_index, test_index) in enumerate(KFold(n_splits=5, shuffle=True).split(X)):
    X_train, y_train = X[train_index], y[train_index]
    X_test = X[test_index]
    mdl = RandomForestClassifier().fit(X_train, y_train)
    y_pred[test_index] = mdl.predict(X_test)

In [24]:
print(
    classification_report(
        y, y_pred
    )
)

              precision    recall  f1-score   support

         0.0       1.00      0.99      0.99       143
         1.0       1.00      1.00      1.00       144
         2.0       1.00      1.00      1.00       144
         3.0       0.99      0.98      0.98       144
         4.0       0.97      0.98      0.98       199
         5.0       0.98      0.96      0.97       168
         6.0       0.99      1.00      1.00       162
         7.0       1.00      0.99      1.00       150
         8.0       1.00      1.00      1.00       156
         9.0       1.00      1.00      1.00       155
        10.0       1.00      1.00      1.00       157
        11.0       1.00      1.00      1.00       152
        12.0       1.00      1.00      1.00       528

    accuracy                           0.99      2402
   macro avg       0.99      0.99      0.99      2402
weighted avg       0.99      0.99      0.99      2402



# Modified RandomForest with Multi-objective Ensemble Model Selection (RF-MOEMS)

In [89]:
class MOEMS(Problem):

    def __init__(self, y_all, y):
        super().__init__(n_var=len(y_all), n_obj=2, xl=0.0, xu=1.0)
        self.y_all = y_all
        self.y = y

    def _evaluate(self, x, out, *args, **kwargs):
        errors = []
        complexities = []
        for xx in x:
            x_bin = [1 if xi >= 0.5 else 0 for xi in xx]
            if sum(x_bin) == 0:
                errors.append(10)
                complexities.append(10)
                continue
            y_out = [out for out, xb in zip(self.y_all, x_bin) if xb]
            y_pred = pd.DataFrame(y_out).mode().to_numpy()[0,:]
            complexity = 1 - sum(x_bin)/len(x_bin)
            error = 1 - classification_report(self.y, y_pred, output_dict=True)['accuracy']
            complexities.append(complexity)
            errors.append(error)
        out['F'] = np.column_stack([complexities, errors])

In [96]:
class RFMOEMS():
    def __init__(self, n_classifiers=100, optimization_set_ratio=0.2):
        self.n_classifiers = n_classifiers
        self.opt_ratio = optimization_set_ratio
        self.rf = RandomForestClassifier(self.n_classifiers)
        self.moems = [True for _ in range(n_classifiers)]

    def _predict_all(self, X):
        y_out = []
        for i, dt in enumerate(self.rf.estimators_):
            if not self.moems[i]:
                continue
            y_out.append(dt.predict(X))
        return y_out

    def _moo(self, X, y):
        y_out_all = self._predict_all(X)
        prob = MOEMS(y_out_all, y)
        alg = NSGA2(pop_size=20, n_offprings=5, eliminate_duplicates=True)
        termination = get_termination("n_gen", 20)
        return minimize(prob, alg, termination)

    def _mcdm(self, res):
        weights = np.array([0.2, 0.8])
        decomp = ASF()
        return decomp.do(res.F, 1/weights).argmin()

    def _fit_moems(self, X, y):
        pareto = self._moo(X, y)
        best = self._mcdm(pareto)
        self.moems = [x >= 0.5 for x in pareto.X[best]]

    def fit(self, X, y):
        X_rf, X_opt, y_rf, y_opt = train_test_split(X, y, test_size=self.opt_ratio)
        self.rf.fit(X_rf, y_rf)
        self._fit_moems(X_opt, y_opt)
        return self

    def predict(self, X):
        y_out_all = self._predict_all(X)
        return pd.DataFrame(y_out_all).mode().to_numpy()[0,:]

In [97]:
y_pred = y.copy()

for i, (train_index, test_index) in enumerate(KFold(n_splits=5, shuffle=True).split(X)):
    X_train, y_train = X[train_index], y[train_index]
    X_test = X[test_index]
    mdl = RFMOEMS().fit(X_train, y_train)
    y_pred[test_index] = mdl.predict(X_test)
    print(f'Iteration {i+1} Done!')

Iteration 1 Done!
Iteration 2 Done!
Iteration 3 Done!
Iteration 4 Done!
Iteration 5 Done!


In [99]:
print(
    classification_report(
        y, y_pred
    )
)

              precision    recall  f1-score   support

         0.0       0.98      0.98      0.98       143
         1.0       0.99      0.98      0.98       144
         2.0       1.00      0.98      0.99       144
         3.0       0.97      0.98      0.97       144
         4.0       0.95      0.97      0.96       199
         5.0       0.97      0.93      0.95       168
         6.0       0.99      1.00      0.99       162
         7.0       1.00      0.99      1.00       150
         8.0       1.00      1.00      1.00       156
         9.0       1.00      1.00      1.00       155
        10.0       1.00      1.00      1.00       157
        11.0       1.00      1.00      1.00       152
        12.0       0.98      0.99      0.99       528

    accuracy                           0.99      2402
   macro avg       0.99      0.98      0.99      2402
weighted avg       0.99      0.99      0.99      2402

