In [2]:
import pandas as pd
import numpy as np
import time
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

In [3]:
# Timing decorator to find exécution time of function
def timing(func):
    def wrapper(*args, **kwargs):
        print("Calculation start")
        start = time.perf_counter()
        data = func(*args, **kwargs)
        print("Finished!")
        end = time.perf_counter()
        print(f"Execution time: {round(end-start,2)} second(s)")
        return data
    return wrapper

In [4]:
@timing
def read_data(path):
    d = pd.read_csv(path)
    return d

In [5]:
X = read_data("X.csv")

Calculation start
Finished!
Execution time: 18.79 second(s)


In [6]:
y = read_data("y_1.csv")

Calculation start
Finished!
Execution time: 1.29 second(s)


In [7]:
X.head()

Unnamed: 0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,...,x136,x137,x138,x139,x140,x141,x142,x143,x144,x145
0,NO,NO,rpoSk1aXo+6hZQxVMp/PAw8+w67/vzWkyQs/xGqFCnw=,hCXwO/JldK5zcd9ejOD1FwmEgCf96eTdEVy7OtY2Y2g=,0.837674,0.072627,0.869502,0.279803,0.297919,NO,...,7.0,0.885,4565,3456,YES,NO,YES,4,0.623843,0.285871
1,NO,NO,/VV6+dCb+in5lV7V/e9b5HoZ/BN34M+dPmKWY8BeHh4=,hCXwO/JldK5zcd9ejOD1FwmEgCf96eTdEVy7OtY2Y2g=,1.289474,0.082577,0.948578,0.069268,0.527802,NO,...,0.0,0.67,4676,3306,YES,NO,YES,0,0.932244,0.526946
2,YES,YES,MZZbXga8gvaCBqWpzrh2iKdOkcsz/bG/z4BVjUnqWT0=,YvZUuCDjLu9VvkCdBWgARWQrvm+FSXgxp0zIrMjcLBc=,0.653912,0.041257,0.941,0.090423,0.422868,YES,...,1.5,0.963333,3306,4678,YES,NO,YES,11,0.668876,0.412886
3,YES,NO,X/hdUOVR5KuExVGLzjhLcM2CyIqym9t0Nh+ZX05M+1w=,+yhSY//Hpg7u0bSA7NYmcmRFgv3bF4Tw3BMHrBqaTtA=,1.179921,0.051104,0.949501,0.270638,0.411161,YES,...,0.0,0.95,4677,3307,YES,NO,YES,4,0.738434,0.404105
4,NO,NO,4FIxS25OrBv/DHbmmVLtScptssXXAhNxD087PPzA9BU=,B+EJpnEbkYtLnwDQYN1dP1rcfnoCnxAjKLYwQZE07Ew=,0.706815,0.0,1.0,0.0,0.503363,YES,...,3.0,1.0,892,1262,NO,NO,YES,7,0.59588,0.48991


In [8]:
y.head()

Unnamed: 0,y1,y2,y3,y4,y5,y6,y7,y8,y9,y10,...,y24,y25,y26,y27,y28,y29,y30,y31,y32,y33
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
3,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,1,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


# Réduction de dimensionnalité (PCA)

Nous allons d'abord normaliser les données

In [None]:
np.where()

In [None]:
df_features1 = scaler.fit_transform(df_features)

In [None]:
df_features2 = pd.DataFrame(data=df_features1, columns=list(df_features.columns))

In [None]:
df_features2.head()

In [None]:
# PCA
from sklearn.decomposition import PCA
pca = PCA(n_components=33)

In [None]:
df = pca.fit_transform(df_features2)

In [None]:
from sklearn.model_selection import train_test_split
X_train, x, y_train, y = train_test_split(df, df_targets, test_size = 0.3, random_state=1236)
X_val, X_test, y_val, y_test = train_test_split(x,y, test_size=0.5, random_state=1236)

In [None]:
print("Train set 70%")
print(f"X Train set size: {X_train.shape}")
print(f"y Train set size: {y_train.shape}\n")

print("Validation set 15%")
print(f"X validation set size: {X_val.shape}")
print(f"y validation set size: {y_val.shape}")

print("test set 15%")
print(f"X test set size: {X_test.shape}")
print(f"y test set size: {y_test.shape}")

# Entrainement des modèles

## 1 - Label Transformation method
Il en existe 3 methode
* Binary Relevance
* Classification chain
* Label Powerset

### 1.a - Binary relevance avec le reseau des neuronnes

In [None]:
from skmultilearn.problem_transform import BinaryRelevance, ClassifierChain, LabelPowerset
from sklearn.metrics import accuracy_score, hamming_loss
from tensorflow import keras
from sklearn.neural_network import MLPClassifier

In [None]:
# MLP Classifier
mlp_clf = MLPClassifier()
br_mlp = BinaryRelevance(mlp_clf)

In [None]:
br_mlp.fit(X_train, y_train)

le modele avec tous ces paramètres  
BinaryRelevance(classifier=MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=None, shuffle=True, solver='adam', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False),
        require_dense=[True, True])

In [None]:
y_pred = br_mlp.predict(X_test)

In [None]:
# accuracy score
accuracy_score(y_test, y_pred)

On obtient un accuracy de 83.65 %
