# Random Forest


In [12]:
# import
import pickle

import numpy as np
np.random.seed(42)

from sklearn.model_selection import KFold
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error, mean_absolute_error, root_mean_squared_error


In [13]:
data_50w_20s = pickle.load(open("Data/donnees_50w_20s.pkl", "rb"))
data_100w_20s = pickle.load(open("Data/donnees_100w_20s.pkl", "rb"))
data_150w_20s = pickle.load(open("Data/donnees_150w_20s.pkl", "rb"))

data_50w_10s = pickle.load(open("Data/donnees_50w_10s.pkl", "rb"))
data_100w_10s = pickle.load(open("Data/donnees_100w_10s.pkl", "rb"))

data_50w_5s = pickle.load(open("Data/donnees_50w_5s.pkl", "rb"))

data_30w_10s = pickle.load(open("Data/donnees_30w_10s.pkl", "rb"))
data_20w_10s = pickle.load(open("Data/donnees_20w_10s.pkl", "rb"))

data_20w_5s = pickle.load(open("Data/donnees_20w_5s.pkl", "rb"))


In [14]:
datasets = {
    # "50w_20s": (data_50w_20s['X_np_label'], data_50w_20s['y_np']),
    # "100w_20s": (data_100w_20s['X_np_label'], data_100w_20s['y_np']),
    # # "150w_20s": (data_150w_20s['X_np_label'], data_150w_20s['y_np']),
    
    # "50w_10s": (data_50w_10s['X_np_label'], data_50w_10s['y_np']),
    # "100w_10s": (data_100w_10s['X_np_label'], data_100w_10s['y_np']),

    # "50w_5s": (data_50w_5s['X_np_label'], data_50w_5s['y_np']),
    # "30w_10s": (data_30w_10s['X_np_label'], data_30w_10s['y_np']),
    # "20w_10s": (data_20w_10s['X_np_label'], data_20w_10s['y_np']),
    "20w_5s": (data_20w_5s['X_np_label'], data_20w_5s['y_np']),
}

In [15]:
rf = RandomForestRegressor(n_estimators=100, random_state=42)

In [16]:
n_splits = 5
kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)

In [17]:
results = {
    'MSE': {},
    'MAE': {},
    'RMSE': {},
    'MAPE': {}
}

In [18]:
for window_size, (X, y) in datasets.items():
    mse_scores = []
    mae_scores = []
    rmse_scores = []
    mape_scores = []

    for train_index, test_index in kf.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        rf.fit(X_train, y_train)
        y_pred = rf.predict(X_test)

        mse = mean_squared_error(y_test, y_pred)
        mae = mean_absolute_error(y_test, y_pred)
        rmse = root_mean_squared_error(y_test, y_pred)
        mape = mean_absolute_percentage_error(y_test, y_pred)

        mse_scores.append(mse)
        mae_scores.append(mae)  
        rmse_scores.append(rmse)
        mape_scores.append(mape)

    results['MSE'][window_size] = np.mean(mse_scores)
    results['MAE'][window_size] = np.mean(mae_scores)
    results['RMSE'][window_size] = np.mean(rmse_scores)
    results['MAPE'][window_size] = np.mean(mape_scores)

    print(f"Window size {window_size} :")
    print(f"MSE : {results['MSE'][window_size]:.6f}")
    print(f"MAE : {results['MAE'][window_size]:.6f}")
    print(f"RMSE : {results['RMSE'][window_size]:.6f}")
    print(f"MAPE : {results['MAPE'][window_size]:.6f}")

for metric in results.keys():
    best_window_size = min(results[metric], key=results[metric].get)
    print(f"Best window size for {metric} : {best_window_size} ({results[metric][best_window_size]:.6f})")

Window size 20w_5s :
MSE : 0.095717
MAE : 0.155772
RMSE : 0.308170
MAPE : 0.001688
Best window size for MSE : 20w_5s (0.095717)
Best window size for MAE : 20w_5s (0.155772)
Best window size for RMSE : 20w_5s (0.308170)
Best window size for MAPE : 20w_5s (0.001688)


In [19]:
# !! GARDER CETTE CELLULE COMME REFERENCE 
# On avait testé aussi avec des fenetres de taille 150 : moins bonnes performances
# On vait commencé par tester 50, 100 et 150 pour des strides de 20 :
# output : 
# 
# Window size 50w_20s :
# MSE : 0.251766
# MAE : 0.281433
# RMSE : 0.489894
# MAPE : 0.003025
# 
# Window size 100w_20s :
# MSE : 0.195945
# MAE : 0.284542
# RMSE : 0.442379
# MAPE : 0.003062
# 
# Window size 150w_20s :
# MSE : 0.269007
# MAE : 0.300674
# RMSE : 0.511903
# MAPE : 0.003244
#
# Best window size for MSE : 100w_20s (0.195945)
# Best window size for MAE : 50w_20s (0.281433)
# Best window size for RMSE : 100w_20s (0.442379)
# Best window size for MAPE : 50w_20s (0.003025)

In [20]:
# Ensuite on a testé pour des strides de 10 et 20 (avec des fenetres de taille 50 et 100 seulement)
# output :
# 
# Window size 50w_10s :
# MSE : 0.160286
# MAE : 0.220491
# RMSE : 0.397901
# MAPE : 0.002376
# 
# Window size 100w_10s :
# MSE : 0.164803
# MAE : 0.228040
# RMSE : 0.403471
# MAPE : 0.002460
#
# Best window size for MSE : 50w_10s (0.160286)
# Best window size for MAE : 50w_10s (0.220491)
# Best window size for RMSE : 50w_10s (0.397901)
# Best window size for MAPE : 50w_10s (0.002376)

# 32 minutes, c'est surtout 100w_10s qui était long 

In [21]:
# On a ensuite testé pour des strides de 5 et 10 (avec des fenetres de taille 50 seulement pour ne pas que ce soit trop long)
# 
# Window size 50w_5s :
# MSE : 0.098205
# MAE : 0.162782
# RMSE : 0.310212
# MAPE : 0.001763

# Bien plus long (17 minutes 40), même si quand même un peu meilleur mais en terme de temps...
# On décide donc de garder 50w_10s comme meilleur modèle


In [22]:
# Window size 30w_10s :
# MSE : 0.163054
# MAE : 0.217409
# RMSE : 0.398092
# MAPE : 0.002343

# Best window size for MSE : 30w_10s (0.163054)         # 50w_10s (0.160286), légèrement moins bon, 0.003 de plus
# Best window size for MAE : 30w_10s (0.217409)         # 50w_10s (0.220491), légèrement meilleur,  0.003 de moins
# Best window size for RMSE : 30w_10s (0.398092)        # 50w_10s (0.397901), légèrement moins bon, 0.0002 de plus -> identique
# Best window size for MAPE : 30w_10s (0.002343) # 50w_10s (0.002376), légèrement meilleur,     0.0003 de moins -> identique

# Valeurs très proches de 50w_10s, on garde donc le modèle le plus rapide 
# temps pour 30w_10s : 5 min 
# temps pour 50w_10s : 7 min 45 

In [23]:
# Window size 20w_10s :
# MSE : 0.162244
# MAE : 0.213713
# RMSE : 0.401487
# MAPE : 0.002303
# Best window size for MSE : 20w_10s (0.162244)     # 30w_10s (0.163054), légèrement meilleur, 0.0008 de moins
# Best window size for MAE : 20w_10s (0.213713)     # 30w_10s (0.217409), légèrement meilleur, 0.0037 de moins
# Best window size for RMSE : 20w_10s (0.401487)    # 30w_10s (0.398092), légèrement moins bon, 0.0034 de plus
# Best window size for MAPE : 20w_10s (0.002303)    # 30w_10s (0.002343), légèrement meilleur, 0.0004 de moins

# temps : 3 min 30s 

In [None]:
# Window size 20w_5s :
# MSE : 0.095717
# MAE : 0.155772
# RMSE : 0.308170
# MAPE : 0.001688
# Best window size for MSE : 20w_5s (0.095717)      # 20w_10s (0.162244), bien meilleur, 0.066 de moins
# Best window size for MAE : 20w_5s (0.155772)      # 20w_10s (0.213713), bien meilleur, 0.058 de moins
# Best window size for RMSE : 20w_5s (0.308170)     # 20w_10s (0.401487), bien meilleur, 0.093 de moins
# Best window size for MAPE : 20w_5s (0.001688)     # 20w_10s (0.002303), bien meilleur, 0.0006 de moins

# 7 min 30 mais 4 minutes de plus 

# On conserve donc ?? 