In [1]:
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from sklearn.model_selection import train_test_split
from lazypredict.Supervised import LazyRegressor
from sklearn.utils import all_estimators
from sklearn.base import RegressorMixin
from imblearn.under_sampling import RandomUnderSampler
from sklearn.preprocessing import KBinsDiscretizer
import time

pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)

In [2]:
from collections import Counter

cols_cible_type = ["TurnoutTimeSeconds", "TravelTimeSeconds", "PumpSecondsOnSite"]
cols_Data = [
    ["CalYear", "HourOfCall", "Postcode_district", "Month", "DayOfWeek"],
    ["CalYear", "HourOfCall", "Postcode_district", "Month", "DayOfWeek"],
    ["CalYear", "PropertyType", "StopCode"],
]
cols_cible = [
    ["TurnoutTimeSeconds_min", "TurnoutTimeSeconds_mean", "TurnoutTimeSeconds_max"],
    ["TravelTimeSeconds_min", "TravelTimeSeconds_mean", "TravelTimeSeconds_max"],
    ["PumpSecondsOnSite_min", "PumpSecondsOnSite_mean", "PumpSecondsOnSite_max"]
    # NumPumpsAttending??
]

def load_df(col_cible_type):
    df = pd.read_csv(f"../data/_df_ready_{col_cible_type}.csv", sep=";", low_memory=False)
    # Contrôle
    # display(df.head(3))
    # display(df.info())
    return df

def load_df_full(col_cible_type):
    df = pd.read_csv(
        f"../data/_df_ready_full_{col_cible_type}.csv", sep=";", low_memory=False
    )
    # Contrôle
    # display(df.head(3))
    # display(df.info())
    return df

def Create_X(df_limited, index):    
    print(df_limited.shape)
    # Crée X sans les target
    cols_to_remove = [item for sublist in cols_cible for item in sublist] + [
        "NumPumpsAttending"
    ]
    print("cols_to_remove", cols_to_remove)
    X = df_limited.drop(cols_to_remove, axis=1)
    # Ne conserve que certaines colonnes explicatives
    # Crée une liste des colonnes à conserver
    cols_to_keep = [
        col
        for col in X.columns
        if any(substring in col for substring in cols_Data[index])
    ]
    print("cols_to_keep", cols_to_keep)
    # Conserver uniquement ces colonnes
    X = X[cols_to_keep]

    display(X.head(2))
    print(X.shape)
    return X

# Simulation d'un RandomUnderSampler
def custom_random_undersampler(X_train, y_train, bins=25, random_state=42):
    print("Resampling")
    print(len(X_train))
    print(type(X_train), type(y_train))
    # Discrétisation de la cible continue
    y_binned, bin_edges = pd.cut(y_train, bins=bins, labels=False, retbins=True)
    print(bin_edges)
    print(y_binned.value_counts())
    # Application de RandomUnderSampler
    rus = RandomUnderSampler(sampling_strategy="majority", random_state=random_state)
    X_resampled, y_resampled_binned = rus.fit_resample(X_train, y_binned)
    print(y_resampled_binned.value_counts())
    # Récupérer les cibles originales à partir des bins
    bin_midpoints = (bin_edges[:-1] + bin_edges[1:]) / 2
    y_resampled = bin_midpoints[y_resampled_binned]
    print(y_resampled)
    print(len(X_resampled))
    return X_resampled, y_resampled

In [9]:
# Créez le cercle de corrélation
# def draw_correlation_circle(df_charges_factorielles, pca, arrow_length=0.1, label_rotation=0):
#     fig, ax = plt.subplots(figsize=(8, 8))
#     for i, var in enumerate(df_charges_factorielles.columns):
#         x = df_charges_factorielles.loc[0, var]
#         y = df_charges_factorielles.loc[1, var]
#         ax.arrow(0, 0, x, y, head_width=arrow_length, head_length=arrow_length, fc='gray', ec='gray')
#         ax.text(x*1.15, y*1.15, var, ha='center', va='center', rotation=label_rotation, fontsize=9)
#     circle = plt.Circle((0, 0), 1, facecolor='none', edgecolor='black')
#     ax.add_artist(circle)
#     ax.set_xlim(-1.1, 1.1)
#     ax.set_ylim(-1.1, 1.1)
#     ax.set_aspect('equal', adjustable='box')
#     ax.set_xlabel('Axe 1 (PC1)')
#     ax.set_ylabel('Axe 2 (PC2)')
#     ax.set_title('Cercle des Corrélations')
#     plt.grid()
#     plt.show()

cols_cible_filter = [
    "TurnoutTimeSeconds_mean",
    "TravelTimeSeconds_mean",
    "PumpSecondsOnSite_mean",
]

ignore_regressors = [
    "ExtraTreesRegressor",
    "NuSVR",
    "QuantileRegressor",
    "SVR",
    "CCA",
    "GammaRegressor",
    "GaussianProcessRegressor",
    "IsotonicRegression",
    "MultiOutputRegressor",
    "MultiTaskElasticNet",
    "MultiTaskElasticNetCV",
    "MultiTaskLasso",
    "MultiTaskLassoCV",
    "PLSCanonical",
    "RadiusNeighborsRegressor",
    "RegressorChain",
    "StackingRegressor",
    "KernelRidge",
    "ARDRegression",
    "TweedieRegressor",
]
REGRESSORS = [
    estimator
    for estimator in all_estimators()
    if (
        issubclass(estimator[1], RegressorMixin)
        and (not (estimator[0] in ignore_regressors))
    )
]

all_results = pd.DataFrame()

for index, name in enumerate(cols_cible_type):
    df = load_df(name)
    # X = df.drop(["PumpSecondsOnSite_min", "PumpSecondsOnSite_mean", "PumpSecondsOnSite_max", "TurnoutTimeSeconds_min", "TurnoutTimeSeconds_mean",
    #               "TurnoutTimeSeconds_max", "TravelTimeSeconds_min", "TravelTimeSeconds_mean", "TravelTimeSeconds_max", "NumPumpsAttending"], axis=1)
    # display(X.head(2))
    # print(X.shape)

    # pca = PCA()
    # data_2D = pca.fit_transform(X)
    # display(data_2D)
    # print("La part de variance expliquée est", round(pca.explained_variance_ratio_.sum(), 5))

    # plt.figure()
    # plt.xlim(0,40)
    # plt.xlabel('Nombre de composantes')
    # plt.ylabel('Part de variance expliquée')
    # plt.axhline(y = 0.95, color ='r', linestyle = '--')
    # plt.plot(pca.explained_variance_ratio_.cumsum());
    # plt.show()

    # variance_expliquee = pca.explained_variance_ratio_
    # charges_factorielles = pca.components_
    # # Créez un DataFrame pour afficher les corrélations de chaque variable avec chaque axe
    # df_charges_factorielles = pd.DataFrame(
    #     charges_factorielles,
    #     columns=X.columns,
    #     #index=["Axe 1", "Axe 2", "Axe 3", "Axe 4", "Axe 5", "Axe 6"],
    # )

    # # Affichez le DataFrame des charges factorielles
    # display(df_charges_factorielles.head(2))

    # charges_factorielles = pca.components_
    # # Appelez la fonction pour tracer le cercle de corrélation
    # draw_correlation_circle(df_charges_factorielles, pca)

    # Auto ML
    for year_floor in range(13, 16):
        print("-----------------------------------------------------")
        print("year", 2024 - 16 + year_floor)
        print("-----------------------------------------------------")
        df_limited = df[df.CalYear >= year_floor]  # >= 2021
        X = Create_X(df_limited, index)

        for col_cible in cols_cible[index]:
            if not(col_cible in cols_cible_filter):
                continue
            print("cible", col_cible)
            y = df_limited[col_cible]
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
            # Réchantillonne
            # X_train_samp, y_train_samp = custom_random_undersampler(X_train, y_train)
            X_train_samp, y_train_samp = X_train, y_train
            # LazyPredict
            myREGRESSORS = REGRESSORS
            print(myREGRESSORS)
            # myREGRESSORS = myREGRESSORS[4:6]
            reg = LazyRegressor(
                verbose=2,
                ignore_warnings=False,
                custom_metric=None,
                regressors=myREGRESSORS,
            )
            models, predictions = reg.fit(X_train_samp, X_test, y_train_samp, y_test)
            # models, predictions = reg.fit(X_train, X_test, y_train, y_test)
            # Ajoute le nom de la colonne
            models["Target"] = col_cible
            models["Year floor"] = 2024 - 16 + year_floor
            # Concaténe dans les résultats
            all_results = pd.concat([all_results, models], axis=0)
            display(models)
            models.to_csv(f"../data/_autoML_regressor_{col_cible}.csv", sep=";", index=True)
            # break
        # break
all_results = all_results.sort_values(by=["Target", "RMSE"], ascending=[False, True])
all_results.to_csv(f"../data/_autoML_regressor.csv", sep=";", index=True)

# tester automl avant et après PCA et aussi gridsearch pour comparer
# save du PCA

-----------------------------------------------------
year 2021
-----------------------------------------------------
(444247, 42)
cols_to_remove ['TurnoutTimeSeconds_min', 'TurnoutTimeSeconds_mean', 'TurnoutTimeSeconds_max', 'TravelTimeSeconds_min', 'TravelTimeSeconds_mean', 'TravelTimeSeconds_max', 'PumpSecondsOnSite_min', 'PumpSecondsOnSite_mean', 'PumpSecondsOnSite_max', 'NumPumpsAttending']
cols_to_keep ['CalYear', 'HourOfCall_0', 'HourOfCall_1', 'HourOfCall_2', 'HourOfCall_3', 'HourOfCall_4', 'Postcode_district_0', 'Postcode_district_1', 'Postcode_district_2', 'Postcode_district_3', 'Postcode_district_4', 'Month_0', 'Month_1', 'Month_2', 'Month_3', 'DayOfWeek_0', 'DayOfWeek_1', 'DayOfWeek_2']


Unnamed: 0,CalYear,HourOfCall_0,HourOfCall_1,HourOfCall_2,HourOfCall_3,HourOfCall_4,Postcode_district_0,Postcode_district_1,Postcode_district_2,Postcode_district_3,Postcode_district_4,Month_0,Month_1,Month_2,Month_3,DayOfWeek_0,DayOfWeek_1,DayOfWeek_2
1147280,13,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0
1147281,13,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0


(444247, 18)
cible TurnoutTimeSeconds_mean
[('AdaBoostRegressor', <class 'sklearn.ensemble._weight_boosting.AdaBoostRegressor'>), ('BaggingRegressor', <class 'sklearn.ensemble._bagging.BaggingRegressor'>), ('BayesianRidge', <class 'sklearn.linear_model._bayes.BayesianRidge'>), ('DecisionTreeRegressor', <class 'sklearn.tree._classes.DecisionTreeRegressor'>), ('DummyRegressor', <class 'sklearn.dummy.DummyRegressor'>), ('ElasticNet', <class 'sklearn.linear_model._coordinate_descent.ElasticNet'>), ('ElasticNetCV', <class 'sklearn.linear_model._coordinate_descent.ElasticNetCV'>), ('ExtraTreeRegressor', <class 'sklearn.tree._classes.ExtraTreeRegressor'>), ('GradientBoostingRegressor', <class 'sklearn.ensemble._gb.GradientBoostingRegressor'>), ('HistGradientBoostingRegressor', <class 'sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingRegressor'>), ('HuberRegressor', <class 'sklearn.linear_model._huber.HuberRegressor'>), ('KNeighborsRegressor', <class 'sklearn.neig

  3%|▎         | 1/35 [00:05<02:54,  5.14s/it]

{'Model': 'AdaBoostRegressor', 'R-Squared': 0.11130882084114446, 'Adjusted R-Squared': 0.11112874360206282, 'RMSE': np.float64(34.86080349122066), 'Time taken': 5.1385064125061035}


  6%|▌         | 2/35 [00:08<02:23,  4.35s/it]

{'Model': 'BaggingRegressor', 'R-Squared': 0.12324017630819317, 'Adjusted R-Squared': 0.12306251674310376, 'RMSE': np.float64(34.62599630194863), 'Time taken': 3.7978832721710205}


  9%|▊         | 3/35 [00:09<01:25,  2.66s/it]

{'Model': 'BayesianRidge', 'R-Squared': 0.08492706856723042, 'Adjusted R-Squared': 0.08474164554186991, 'RMSE': np.float64(35.37445946255802), 'Time taken': 0.6442923545837402}


 14%|█▍        | 5/35 [00:10<00:38,  1.28s/it]

{'Model': 'DecisionTreeRegressor', 'R-Squared': 0.12466085336273203, 'Adjusted R-Squared': 0.12448348167222445, 'RMSE': np.float64(34.59793143130328), 'Time taken': 0.7330455780029297}
{'Model': 'DummyRegressor', 'R-Squared': -4.337018878231014e-05, 'Adjusted R-Squared': -0.0002460109410356015, 'RMSE': np.float64(36.980380507705355), 'Time taken': 0.1907329559326172}


 17%|█▋        | 6/35 [00:10<00:27,  1.04it/s]

{'Model': 'ElasticNet', 'R-Squared': 0.050915505223587565, 'Adjusted R-Squared': 0.05072319036834583, 'RMSE': np.float64(36.02586333427516), 'Time taken': 0.32783985137939453}


 20%|██        | 7/35 [00:12<00:31,  1.14s/it]

{'Model': 'ElasticNetCV', 'R-Squared': 0.08492323291755621, 'Adjusted R-Squared': 0.08473780911497053, 'RMSE': np.float64(35.374533600851855), 'Time taken': 1.5144808292388916}


 23%|██▎       | 8/35 [00:13<00:27,  1.01s/it]

{'Model': 'ExtraTreeRegressor', 'R-Squared': 0.12466085336273203, 'Adjusted R-Squared': 0.12448348167222445, 'RMSE': np.float64(34.59793143130328), 'Time taken': 0.7156343460083008}


 26%|██▌       | 9/35 [00:34<03:15,  7.52s/it]

{'Model': 'GradientBoostingRegressor', 'R-Squared': 0.13328100861805303, 'Adjusted R-Squared': 0.13310538364653546, 'RMSE': np.float64(34.427153379507494), 'Time taken': 21.83571720123291}


 29%|██▊       | 10/35 [00:36<02:26,  5.84s/it]

{'Model': 'HistGradientBoostingRegressor', 'R-Squared': 0.13936000285173455, 'Adjusted R-Squared': 0.13918560967875804, 'RMSE': np.float64(34.306208339696155), 'Time taken': 2.0848283767700195}


 31%|███▏      | 11/35 [00:38<01:44,  4.37s/it]

{'Model': 'HuberRegressor', 'R-Squared': 0.08162038226895174, 'Adjusted R-Squared': 0.08143428920325213, 'RMSE': np.float64(35.4383159896464), 'Time taken': 1.045236349105835}


 34%|███▍      | 12/35 [02:06<11:25, 29.83s/it]

{'Model': 'KNeighborsRegressor', 'R-Squared': -0.02041881576548432, 'Adjusted R-Squared': -0.020625585234293364, 'RMSE': np.float64(37.35521041251704), 'Time taken': 88.03714299201965}


 37%|███▋      | 13/35 [02:06<07:39, 20.87s/it]

{'Model': 'Lars', 'R-Squared': 0.08492677520984082, 'Adjusted R-Squared': 0.08474135212503675, 'RMSE': np.float64(35.374465132793645), 'Time taken': 0.2703068256378174}


 40%|████      | 14/35 [02:07<05:12, 14.86s/it]

{'Model': 'LarsCV', 'R-Squared': 0.08494159860520589, 'Adjusted R-Squared': 0.08475617852409556, 'RMSE': np.float64(35.374178613752385), 'Time taken': 0.96221923828125}


 43%|████▎     | 15/35 [02:07<03:30, 10.51s/it]

{'Model': 'Lasso', 'R-Squared': 0.077436872257581, 'Adjusted R-Squared': 0.07724993147903103, 'RMSE': np.float64(35.518940652613274), 'Time taken': 0.42487025260925293}


 46%|████▌     | 16/35 [02:09<02:27,  7.74s/it]

{'Model': 'LassoCV', 'R-Squared': 0.08494362790069232, 'Adjusted R-Squared': 0.08475820823078206, 'RMSE': np.float64(35.374139389643474), 'Time taken': 1.3226428031921387}


 49%|████▊     | 17/35 [02:09<01:38,  5.50s/it]

{'Model': 'LassoLars', 'R-Squared': 0.07743843201797751, 'Adjusted R-Squared': 0.07725149155548494, 'RMSE': np.float64(35.51891062699296), 'Time taken': 0.2710695266723633}


 51%|█████▏    | 18/35 [02:10<01:10,  4.13s/it]

{'Model': 'LassoLarsCV', 'R-Squared': 0.08494159860520589, 'Adjusted R-Squared': 0.08475617852409556, 'RMSE': np.float64(35.374178613752385), 'Time taken': 0.9330968856811523}


 54%|█████▍    | 19/35 [02:10<00:48,  3.06s/it]

{'Model': 'LassoLarsIC', 'R-Squared': 0.08493633271480994, 'Adjusted R-Squared': 0.08475091156666192, 'RMSE': np.float64(35.3742803975725), 'Time taken': 0.5797040462493896}


 57%|█████▋    | 20/35 [02:11<00:34,  2.27s/it]

{'Model': 'LinearRegression', 'R-Squared': 0.08492677520983727, 'Adjusted R-Squared': 0.0847413521250332, 'RMSE': np.float64(35.37446513279371), 'Time taken': 0.4249231815338135}


 60%|██████    | 21/35 [02:16<00:43,  3.08s/it]

{'Model': 'LinearSVR', 'R-Squared': 0.08086438545753616, 'Adjusted R-Squared': 0.08067813920271782, 'RMSE': np.float64(35.4528991435503), 'Time taken': 4.9836976528167725}


 63%|██████▎   | 22/35 [04:03<07:27, 34.39s/it]

{'Model': 'MLPRegressor', 'R-Squared': 0.1365277877940252, 'Adjusted R-Squared': 0.13635282072374888, 'RMSE': np.float64(34.36260982881066), 'Time taken': 107.38332200050354}


 66%|██████▌   | 23/35 [04:03<04:49, 24.15s/it]

{'Model': 'OrthogonalMatchingPursuit', 'R-Squared': 0.016516336350183014, 'Adjusted R-Squared': 0.016317051123790116, 'RMSE': np.float64(36.67292355513339), 'Time taken': 0.2801024913787842}


 69%|██████▊   | 24/35 [04:04<03:08, 17.17s/it]

{'Model': 'OrthogonalMatchingPursuitCV', 'R-Squared': 0.08419413205076198, 'Adjusted R-Squared': 0.0840085605090356, 'RMSE': np.float64(35.38862338469532), 'Time taken': 0.8838710784912109}


 71%|███████▏  | 25/35 [04:05<02:01, 12.14s/it]

{'Model': 'PLSRegression', 'R-Squared': 0.0848467890634601, 'Adjusted R-Squared': 0.08466134977090611, 'RMSE': np.float64(35.37601113219658), 'Time taken': 0.4066789150238037}


 74%|███████▍  | 26/35 [04:05<01:18,  8.70s/it]

{'Model': 'PassiveAggressiveRegressor', 'R-Squared': -0.05537072300959789, 'Adjusted R-Squared': -0.05558457485201984, 'RMSE': np.float64(37.989578858359266), 'Time taken': 0.6770024299621582}


 77%|███████▋  | 27/35 [04:06<00:49,  6.20s/it]

{'Model': 'PoissonRegressor', 'R-Squared': 0.0924976071989897, 'Adjusted R-Squared': 0.09231371820674128, 'RMSE': np.float64(35.22782639039655), 'Time taken': 0.349346399307251}


 80%|████████  | 28/35 [04:08<00:34,  4.89s/it]

{'Model': 'RANSACRegressor', 'R-Squared': -0.11744160495193867, 'Adjusted R-Squared': -0.11766803433907991, 'RMSE': np.float64(39.09078362799488), 'Time taken': 1.826141119003296}


 83%|████████▎ | 29/35 [04:42<01:22, 13.73s/it]

{'Model': 'RandomForestRegressor', 'R-Squared': 0.12458000664136326, 'Adjusted R-Squared': 0.12440261856872581, 'RMSE': np.float64(34.59952913474774), 'Time taken': 34.362831115722656}


 86%|████████▌ | 30/35 [04:42<00:48,  9.68s/it]

{'Model': 'Ridge', 'R-Squared': 0.08492678083055027, 'Adjusted R-Squared': 0.08474135774688518, 'RMSE': np.float64(35.374465024152286), 'Time taken': 0.24477887153625488}


 89%|████████▊ | 31/35 [04:43<00:28,  7.01s/it]

{'Model': 'RidgeCV', 'R-Squared': 0.08492683095449671, 'Adjusted R-Squared': 0.08474140788098827, 'RMSE': np.float64(35.37446405531844), 'Time taken': 0.7776789665222168}


 91%|█████████▏| 32/35 [04:44<00:15,  5.09s/it]

{'Model': 'SGDRegressor', 'R-Squared': -1.7997106168339192e+18, 'Adjusted R-Squared': -1.80007529573096e+18, 'RMSE': np.float64(49609322644.50895), 'Time taken': 0.6164460182189941}


 94%|█████████▍| 33/35 [05:56<00:50, 25.18s/it]

{'Model': 'TheilSenRegressor', 'R-Squared': 0.0823124219815744, 'Adjusted R-Squared': 0.08212646914524091, 'RMSE': np.float64(35.42496130308906), 'Time taken': 72.03122019767761}


100%|██████████| 35/35 [05:56<00:00, 10.19s/it]

{'Model': 'TransformedTargetRegressor', 'R-Squared': 0.08492677520983727, 'Adjusted R-Squared': 0.0847413521250332, 'RMSE': np.float64(35.37446513279371), 'Time taken': 0.4392545223236084}
VotingRegressor model failed to execute
VotingRegressor.__init__() missing 1 required positional argument: 'estimators'





Unnamed: 0_level_0,Adjusted R-Squared,R-Squared,RMSE,Time Taken,Target,Year floor
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
HistGradientBoostingRegressor,0.14,0.14,34.31,2.08,TurnoutTimeSeconds_mean,2021
MLPRegressor,0.14,0.14,34.36,107.38,TurnoutTimeSeconds_mean,2021
GradientBoostingRegressor,0.13,0.13,34.43,21.84,TurnoutTimeSeconds_mean,2021
DecisionTreeRegressor,0.12,0.12,34.6,0.73,TurnoutTimeSeconds_mean,2021
ExtraTreeRegressor,0.12,0.12,34.6,0.72,TurnoutTimeSeconds_mean,2021
RandomForestRegressor,0.12,0.12,34.6,34.36,TurnoutTimeSeconds_mean,2021
BaggingRegressor,0.12,0.12,34.63,3.8,TurnoutTimeSeconds_mean,2021
AdaBoostRegressor,0.11,0.11,34.86,5.14,TurnoutTimeSeconds_mean,2021
PoissonRegressor,0.09,0.09,35.23,0.35,TurnoutTimeSeconds_mean,2021
LassoCV,0.08,0.08,35.37,1.32,TurnoutTimeSeconds_mean,2021


-----------------------------------------------------
year 2022
-----------------------------------------------------
(342634, 42)
cols_to_remove ['TurnoutTimeSeconds_min', 'TurnoutTimeSeconds_mean', 'TurnoutTimeSeconds_max', 'TravelTimeSeconds_min', 'TravelTimeSeconds_mean', 'TravelTimeSeconds_max', 'PumpSecondsOnSite_min', 'PumpSecondsOnSite_mean', 'PumpSecondsOnSite_max', 'NumPumpsAttending']
cols_to_keep ['CalYear', 'HourOfCall_0', 'HourOfCall_1', 'HourOfCall_2', 'HourOfCall_3', 'HourOfCall_4', 'Postcode_district_0', 'Postcode_district_1', 'Postcode_district_2', 'Postcode_district_3', 'Postcode_district_4', 'Month_0', 'Month_1', 'Month_2', 'Month_3', 'DayOfWeek_0', 'DayOfWeek_1', 'DayOfWeek_2']


Unnamed: 0,CalYear,HourOfCall_0,HourOfCall_1,HourOfCall_2,HourOfCall_3,HourOfCall_4,Postcode_district_0,Postcode_district_1,Postcode_district_2,Postcode_district_3,Postcode_district_4,Month_0,Month_1,Month_2,Month_3,DayOfWeek_0,DayOfWeek_1,DayOfWeek_2
1248893,14,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1
1248894,14,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1


(342634, 18)
cible TurnoutTimeSeconds_mean
[('AdaBoostRegressor', <class 'sklearn.ensemble._weight_boosting.AdaBoostRegressor'>), ('BaggingRegressor', <class 'sklearn.ensemble._bagging.BaggingRegressor'>), ('BayesianRidge', <class 'sklearn.linear_model._bayes.BayesianRidge'>), ('DecisionTreeRegressor', <class 'sklearn.tree._classes.DecisionTreeRegressor'>), ('DummyRegressor', <class 'sklearn.dummy.DummyRegressor'>), ('ElasticNet', <class 'sklearn.linear_model._coordinate_descent.ElasticNet'>), ('ElasticNetCV', <class 'sklearn.linear_model._coordinate_descent.ElasticNetCV'>), ('ExtraTreeRegressor', <class 'sklearn.tree._classes.ExtraTreeRegressor'>), ('GradientBoostingRegressor', <class 'sklearn.ensemble._gb.GradientBoostingRegressor'>), ('HistGradientBoostingRegressor', <class 'sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingRegressor'>), ('HuberRegressor', <class 'sklearn.linear_model._huber.HuberRegressor'>), ('KNeighborsRegressor', <class 'sklearn.neig

  3%|▎         | 1/35 [00:03<01:54,  3.38s/it]

{'Model': 'AdaBoostRegressor', 'R-Squared': 0.12599650459486367, 'Adjusted R-Squared': 0.12576686626186184, 'RMSE': np.float64(34.8312319961197), 'Time taken': 3.379530668258667}


  6%|▌         | 2/35 [00:06<01:38,  3.00s/it]

{'Model': 'BaggingRegressor', 'R-Squared': 0.1318485788834266, 'Adjusted R-Squared': 0.1316204781421979, 'RMSE': np.float64(34.714426222261274), 'Time taken': 2.7314248085021973}


  9%|▊         | 3/35 [00:06<00:59,  1.85s/it]

{'Model': 'BayesianRidge', 'R-Squared': 0.09237368217626019, 'Adjusted R-Squared': 0.09213520968077316, 'RMSE': np.float64(35.49488646216231), 'Time taken': 0.4876210689544678}


 14%|█▍        | 5/35 [00:07<00:27,  1.10it/s]

{'Model': 'DecisionTreeRegressor', 'R-Squared': 0.13344285956965074, 'Adjusted R-Squared': 0.13321517771457192, 'RMSE': np.float64(34.68253664052118), 'Time taken': 0.5508880615234375}
{'Model': 'DummyRegressor', 'R-Squared': -2.205498493657032e-05, 'Adjusted R-Squared': -0.0002848038170397693, 'RMSE': np.float64(37.2577856910285), 'Time taken': 0.14983057975769043}


 17%|█▋        | 6/35 [00:07<00:19,  1.46it/s]

{'Model': 'ElasticNet', 'R-Squared': 0.05414804098238224, 'Adjusted R-Squared': 0.05389952496582484, 'RMSE': np.float64(36.234630333962656), 'Time taken': 0.2513315677642822}


 20%|██        | 7/35 [00:08<00:22,  1.22it/s]

{'Model': 'ElasticNetCV', 'R-Squared': 0.09229566194847783, 'Adjusted R-Squared': 0.09205716895371918, 'RMSE': np.float64(35.49641201269004), 'Time taken': 1.0823402404785156}


 23%|██▎       | 8/35 [00:09<00:19,  1.37it/s]

{'Model': 'ExtraTreeRegressor', 'R-Squared': 0.13344285956965074, 'Adjusted R-Squared': 0.13321517771457192, 'RMSE': np.float64(34.68253664052118), 'Time taken': 0.5405144691467285}


 26%|██▌       | 9/35 [00:23<02:10,  5.02s/it]

{'Model': 'GradientBoostingRegressor', 'R-Squared': 0.14322833522960254, 'Adjusted R-Squared': 0.14300322444012015, 'RMSE': np.float64(34.48615681442885), 'Time taken': 14.443609714508057}


 29%|██▊       | 10/35 [00:25<01:37,  3.92s/it]

{'Model': 'HistGradientBoostingRegressor', 'R-Squared': 0.14791214920791973, 'Adjusted R-Squared': 0.14768826905794818, 'RMSE': np.float64(34.39176287332295), 'Time taken': 1.4544415473937988}


 31%|███▏      | 11/35 [00:26<01:12,  3.02s/it]

{'Model': 'HuberRegressor', 'R-Squared': 0.08852108165958827, 'Adjusted R-Squared': 0.08828159692014004, 'RMSE': np.float64(35.57013924477113), 'Time taken': 0.9779915809631348}


 34%|███▍      | 12/35 [01:19<07:01, 18.32s/it]

{'Model': 'KNeighborsRegressor', 'R-Squared': -0.005384995466507059, 'Adjusted R-Squared': -0.005649153373881166, 'RMSE': np.float64(37.357555548082686), 'Time taken': 53.31371212005615}


 37%|███▋      | 13/35 [01:19<04:42, 12.84s/it]

{'Model': 'Lars', 'R-Squared': 0.09237527313425409, 'Adjusted R-Squared': 0.09213680105678024, 'RMSE': np.float64(35.494855353050006), 'Time taken': 0.21959209442138672}


 40%|████      | 14/35 [01:20<03:14,  9.25s/it]

{'Model': 'LarsCV', 'R-Squared': 0.09235298370961154, 'Adjusted R-Squared': 0.09211450577574654, 'RMSE': np.float64(35.49529119124553), 'Time taken': 0.9686892032623291}


 43%|████▎     | 15/35 [01:20<02:11,  6.57s/it]

{'Model': 'Lasso', 'R-Squared': 0.08306264612850067, 'Adjusted R-Squared': 0.08282172722312198, 'RMSE': np.float64(35.676487013776025), 'Time taken': 0.3395876884460449}


 46%|████▌     | 16/35 [01:22<01:33,  4.93s/it]

{'Model': 'LassoCV', 'R-Squared': 0.09235530205012621, 'Adjusted R-Squared': 0.09211682472538907, 'RMSE': np.float64(35.49524585962281), 'Time taken': 1.1392717361450195}


 49%|████▊     | 17/35 [01:22<01:03,  3.52s/it]

{'Model': 'LassoLars', 'R-Squared': 0.08306349096841703, 'Adjusted R-Squared': 0.0828225722850141, 'RMSE': np.float64(35.67647057812363), 'Time taken': 0.2373497486114502}


 51%|█████▏    | 18/35 [01:23<00:45,  2.70s/it]

{'Model': 'LassoLarsCV', 'R-Squared': 0.09235298370961154, 'Adjusted R-Squared': 0.09211450577574654, 'RMSE': np.float64(35.49529119124553), 'Time taken': 0.7903144359588623}


 54%|█████▍    | 19/35 [01:23<00:32,  2.04s/it]

{'Model': 'LassoLarsIC', 'R-Squared': 0.09236329060693171, 'Adjusted R-Squared': 0.09212481538113226, 'RMSE': np.float64(35.49508965510113), 'Time taken': 0.49103403091430664}


 57%|█████▋    | 20/35 [01:23<00:23,  1.54s/it]

{'Model': 'LinearRegression', 'R-Squared': 0.09237604822603762, 'Adjusted R-Squared': 0.09213757635221376, 'RMSE': np.float64(35.4948401971294), 'Time taken': 0.3812229633331299}


 60%|██████    | 21/35 [01:27<00:29,  2.09s/it]

{'Model': 'LinearSVR', 'R-Squared': 0.08760413780762388, 'Adjusted R-Squared': 0.08736441214756285, 'RMSE': np.float64(35.58802645045286), 'Time taken': 3.361271619796753}


 63%|██████▎   | 22/35 [04:37<12:40, 58.52s/it]

{'Model': 'MLPRegressor', 'R-Squared': 0.14309743683892318, 'Adjusted R-Squared': 0.14287229165679993, 'RMSE': np.float64(34.48879112775074), 'Time taken': 190.12294578552246}


 66%|██████▌   | 23/35 [04:37<08:12, 41.03s/it]

{'Model': 'OrthogonalMatchingPursuit', 'R-Squared': 0.01966724669862452, 'Adjusted R-Squared': 0.019409671093448155, 'RMSE': np.float64(36.88918051485514), 'Time taken': 0.21996831893920898}


 69%|██████▊   | 24/35 [04:38<05:18, 28.93s/it]

{'Model': 'OrthogonalMatchingPursuitCV', 'R-Squared': 0.09133570053518092, 'Adjusted R-Squared': 0.09109695531724482, 'RMSE': np.float64(35.515177033383885), 'Time taken': 0.7184081077575684}


 71%|███████▏  | 25/35 [04:38<03:23, 20.36s/it]

{'Model': 'PLSRegression', 'R-Squared': 0.09081895154622621, 'Adjusted R-Squared': 0.0905800705560913, 'RMSE': np.float64(35.525274174518636), 'Time taken': 0.36638545989990234}


 74%|███████▍  | 26/35 [04:39<02:09, 14.39s/it]

{'Model': 'PassiveAggressiveRegressor', 'R-Squared': -0.0532382541977805, 'Adjusted R-Squared': -0.05351498521569886, 'RMSE': np.float64(38.23627385410224), 'Time taken': 0.4648265838623047}


 77%|███████▋  | 27/35 [04:39<01:21, 10.18s/it]

{'Model': 'PoissonRegressor', 'R-Squared': 0.10013670545769993, 'Adjusted R-Squared': 0.09990027264252865, 'RMSE': np.float64(35.342764738278255), 'Time taken': 0.35178709030151367}


 80%|████████  | 28/35 [04:41<00:52,  7.57s/it]

{'Model': 'RANSACRegressor', 'R-Squared': -0.09020806979055473, 'Adjusted R-Squared': -0.0904945143701108, 'RMSE': np.float64(38.90155372426134), 'Time taken': 1.475822925567627}


 86%|████████▌ | 30/35 [05:07<00:46,  9.28s/it]

{'Model': 'RandomForestRegressor', 'R-Squared': 0.13335612958252208, 'Adjusted R-Squared': 0.13312842493974297, 'RMSE': np.float64(34.68427221025743), 'Time taken': 26.269768238067627}
{'Model': 'Ridge', 'R-Squared': 0.09237600354486908, 'Adjusted R-Squared': 0.0921375316593055, 'RMSE': np.float64(35.49484107081222), 'Time taken': 0.18750929832458496}


 89%|████████▊ | 31/35 [05:08<00:26,  6.68s/it]

{'Model': 'RidgeCV', 'R-Squared': 0.09237560071405959, 'Adjusted R-Squared': 0.092137128722655, 'RMSE': np.float64(35.494848947650006), 'Time taken': 0.5974235534667969}


 91%|█████████▏| 32/35 [05:08<00:14,  4.82s/it]

{'Model': 'SGDRegressor', 'R-Squared': -1.8897816391627126e+20, 'Adjusted R-Squared': -1.8902781661304377e+20, 'RMSE': np.float64(512174631600.04016), 'Time taken': 0.4740917682647705}


 94%|█████████▍| 33/35 [06:03<00:39, 19.70s/it]

{'Model': 'TheilSenRegressor', 'R-Squared': 0.08899367247218182, 'Adjusted R-Squared': 0.08875431190267902, 'RMSE': np.float64(35.560916705336744), 'Time taken': 54.41700053215027}


100%|██████████| 35/35 [06:03<00:00, 10.38s/it]

{'Model': 'TransformedTargetRegressor', 'R-Squared': 0.09237604822603762, 'Adjusted R-Squared': 0.09213757635221376, 'RMSE': np.float64(35.4948401971294), 'Time taken': 0.34829068183898926}
VotingRegressor model failed to execute
VotingRegressor.__init__() missing 1 required positional argument: 'estimators'





Unnamed: 0_level_0,Adjusted R-Squared,R-Squared,RMSE,Time Taken,Target,Year floor
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
HistGradientBoostingRegressor,0.15,0.15,34.39,1.45,TurnoutTimeSeconds_mean,2022
GradientBoostingRegressor,0.14,0.14,34.49,14.44,TurnoutTimeSeconds_mean,2022
MLPRegressor,0.14,0.14,34.49,190.12,TurnoutTimeSeconds_mean,2022
DecisionTreeRegressor,0.13,0.13,34.68,0.55,TurnoutTimeSeconds_mean,2022
ExtraTreeRegressor,0.13,0.13,34.68,0.54,TurnoutTimeSeconds_mean,2022
RandomForestRegressor,0.13,0.13,34.68,26.27,TurnoutTimeSeconds_mean,2022
BaggingRegressor,0.13,0.13,34.71,2.73,TurnoutTimeSeconds_mean,2022
AdaBoostRegressor,0.13,0.13,34.83,3.38,TurnoutTimeSeconds_mean,2022
PoissonRegressor,0.1,0.1,35.34,0.35,TurnoutTimeSeconds_mean,2022
LinearRegression,0.09,0.09,35.49,0.38,TurnoutTimeSeconds_mean,2022


-----------------------------------------------------
year 2023
-----------------------------------------------------
(225027, 42)
cols_to_remove ['TurnoutTimeSeconds_min', 'TurnoutTimeSeconds_mean', 'TurnoutTimeSeconds_max', 'TravelTimeSeconds_min', 'TravelTimeSeconds_mean', 'TravelTimeSeconds_max', 'PumpSecondsOnSite_min', 'PumpSecondsOnSite_mean', 'PumpSecondsOnSite_max', 'NumPumpsAttending']
cols_to_keep ['CalYear', 'HourOfCall_0', 'HourOfCall_1', 'HourOfCall_2', 'HourOfCall_3', 'HourOfCall_4', 'Postcode_district_0', 'Postcode_district_1', 'Postcode_district_2', 'Postcode_district_3', 'Postcode_district_4', 'Month_0', 'Month_1', 'Month_2', 'Month_3', 'DayOfWeek_0', 'DayOfWeek_1', 'DayOfWeek_2']


Unnamed: 0,CalYear,HourOfCall_0,HourOfCall_1,HourOfCall_2,HourOfCall_3,HourOfCall_4,Postcode_district_0,Postcode_district_1,Postcode_district_2,Postcode_district_3,Postcode_district_4,Month_0,Month_1,Month_2,Month_3,DayOfWeek_0,DayOfWeek_1,DayOfWeek_2
1366500,15,0,0,0,0,1,0,0,0,0,1,0,0,0,1,1,0,0
1366501,15,0,0,0,0,1,0,0,0,0,1,0,0,0,1,1,0,0


(225027, 18)
cible TurnoutTimeSeconds_mean
[('AdaBoostRegressor', <class 'sklearn.ensemble._weight_boosting.AdaBoostRegressor'>), ('BaggingRegressor', <class 'sklearn.ensemble._bagging.BaggingRegressor'>), ('BayesianRidge', <class 'sklearn.linear_model._bayes.BayesianRidge'>), ('DecisionTreeRegressor', <class 'sklearn.tree._classes.DecisionTreeRegressor'>), ('DummyRegressor', <class 'sklearn.dummy.DummyRegressor'>), ('ElasticNet', <class 'sklearn.linear_model._coordinate_descent.ElasticNet'>), ('ElasticNetCV', <class 'sklearn.linear_model._coordinate_descent.ElasticNetCV'>), ('ExtraTreeRegressor', <class 'sklearn.tree._classes.ExtraTreeRegressor'>), ('GradientBoostingRegressor', <class 'sklearn.ensemble._gb.GradientBoostingRegressor'>), ('HistGradientBoostingRegressor', <class 'sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingRegressor'>), ('HuberRegressor', <class 'sklearn.linear_model._huber.HuberRegressor'>), ('KNeighborsRegressor', <class 'sklearn.neig

  3%|▎         | 1/35 [00:04<02:45,  4.87s/it]

{'Model': 'AdaBoostRegressor', 'R-Squared': -0.04960069525440902, 'Adjusted R-Squared': -0.05002065685475099, 'RMSE': np.float64(38.39995374737945), 'Time taken': 4.873361825942993}


  6%|▌         | 2/35 [00:06<01:37,  2.94s/it]

{'Model': 'BaggingRegressor', 'R-Squared': 0.13198956912244242, 'Adjusted R-Squared': 0.13164226461767892, 'RMSE': np.float64(34.920553475764216), 'Time taken': 1.58567476272583}


  9%|▊         | 3/35 [00:06<00:56,  1.75s/it]

{'Model': 'BayesianRidge', 'R-Squared': 0.0871286394808981, 'Adjusted R-Squared': 0.08676338541885031, 'RMSE': np.float64(35.81157629571082), 'Time taken': 0.3426947593688965}


 14%|█▍        | 5/35 [00:07<00:24,  1.24it/s]

{'Model': 'DecisionTreeRegressor', 'R-Squared': 0.13319220691419165, 'Adjusted R-Squared': 0.13284538360355647, 'RMSE': np.float64(34.896353689300206), 'Time taken': 0.33929920196533203}
{'Model': 'DummyRegressor', 'R-Squared': -1.0532731738788215e-05, 'Adjusted R-Squared': -0.000410652534996947, 'RMSE': np.float64(37.481842684359655), 'Time taken': 0.12028694152832031}


 17%|█▋        | 6/35 [00:07<00:17,  1.67it/s]

{'Model': 'ElasticNet', 'R-Squared': 0.05256179429984753, 'Adjusted R-Squared': 0.05218270950418202, 'RMSE': np.float64(36.48329822865079), 'Time taken': 0.18975186347961426}


 20%|██        | 7/35 [00:08<00:18,  1.52it/s]

{'Model': 'ElasticNetCV', 'R-Squared': 0.08714147149215945, 'Adjusted R-Squared': 0.08677622256439943, 'RMSE': np.float64(35.81132459750607), 'Time taken': 0.773712158203125}


 23%|██▎       | 8/35 [00:08<00:14,  1.81it/s]

{'Model': 'ExtraTreeRegressor', 'R-Squared': 0.13319220691419165, 'Adjusted R-Squared': 0.13284538360355647, 'RMSE': np.float64(34.896353689300206), 'Time taken': 0.32767224311828613}


 26%|██▌       | 9/35 [00:16<01:17,  3.00s/it]

{'Model': 'GradientBoostingRegressor', 'R-Squared': 0.14313683911196495, 'Adjusted R-Squared': 0.14279399480369848, 'RMSE': np.float64(34.69559839866171), 'Time taken': 8.375473499298096}


 29%|██▊       | 10/35 [00:17<00:59,  2.37s/it]

{'Model': 'HistGradientBoostingRegressor', 'R-Squared': 0.14868415178052263, 'Adjusted R-Squared': 0.1483435270385316, 'RMSE': np.float64(34.583106777437855), 'Time taken': 0.9623157978057861}


 31%|███▏      | 11/35 [00:18<00:45,  1.91s/it]

{'Model': 'HuberRegressor', 'R-Squared': 0.08331955912063871, 'Adjusted R-Squared': 0.08295278098615921, 'RMSE': np.float64(35.88621286415991), 'Time taken': 0.8634343147277832}


 37%|███▋      | 13/35 [00:43<02:16,  6.22s/it]

{'Model': 'KNeighborsRegressor', 'R-Squared': -0.02738788757966093, 'Adjusted R-Squared': -0.027798961489377882, 'RMSE': np.float64(37.99144979786126), 'Time taken': 24.719086170196533}
{'Model': 'Lars', 'R-Squared': 0.08712702392866167, 'Adjusted R-Squared': 0.08676176922020629, 'RMSE': np.float64(35.81160798442902), 'Time taken': 0.16025757789611816}


 40%|████      | 14/35 [00:44<01:34,  4.51s/it]

{'Model': 'LarsCV', 'R-Squared': 0.08718332181826083, 'Adjusted R-Squared': 0.08681808963546866, 'RMSE': np.float64(35.81050369659788), 'Time taken': 0.5794816017150879}


 43%|████▎     | 15/35 [00:44<01:04,  3.22s/it]

{'Model': 'Lasso', 'R-Squared': 0.08054593121604758, 'Adjusted R-Squared': 0.08017804330980549, 'RMSE': np.float64(35.94046286182815), 'Time taken': 0.23059535026550293}


 49%|████▊     | 17/35 [00:45<00:32,  1.79s/it]

{'Model': 'LassoCV', 'R-Squared': 0.08716891291377504, 'Adjusted R-Squared': 0.08680367496575558, 'RMSE': np.float64(35.81078633171151), 'Time taken': 0.7814342975616455}
{'Model': 'LassoLars', 'R-Squared': 0.0805466128116421, 'Adjusted R-Squared': 0.08017872517811708, 'RMSE': np.float64(35.94044954040921), 'Time taken': 0.17270898818969727}


 51%|█████▏    | 18/35 [00:45<00:24,  1.42s/it]

{'Model': 'LassoLarsCV', 'R-Squared': 0.08718332181826083, 'Adjusted R-Squared': 0.08681808963546866, 'RMSE': np.float64(35.81050369659788), 'Time taken': 0.5473830699920654}


 54%|█████▍    | 19/35 [00:46<00:17,  1.10s/it]

{'Model': 'LassoLarsIC', 'R-Squared': 0.08715601607877466, 'Adjusted R-Squared': 0.0867907729705304, 'RMSE': np.float64(35.81103930522316), 'Time taken': 0.3459935188293457}


 57%|█████▋    | 20/35 [00:46<00:12,  1.18it/s]

{'Model': 'LinearRegression', 'R-Squared': 0.08712702392866434, 'Adjusted R-Squared': 0.08676176922020895, 'RMSE': np.float64(35.81160798442896), 'Time taken': 0.2574136257171631}


 60%|██████    | 21/35 [00:48<00:16,  1.14s/it]

{'Model': 'LinearSVR', 'R-Squared': 0.08288283225650439, 'Adjusted R-Squared': 0.08251587938079841, 'RMSE': np.float64(35.8947603395271), 'Time taken': 1.839017391204834}


 66%|██████▌   | 23/35 [02:27<04:17, 21.43s/it]

{'Model': 'MLPRegressor', 'R-Squared': 0.14513737354858813, 'Adjusted R-Squared': 0.14479532968533593, 'RMSE': np.float64(34.65507249967209), 'Time taken': 99.14758729934692}
{'Model': 'OrthogonalMatchingPursuit', 'R-Squared': 0.017155433725365432, 'Adjusted R-Squared': 0.016762182292886174, 'RMSE': np.float64(37.15874749804184), 'Time taken': 0.14978408813476562}


 69%|██████▊   | 24/35 [02:28<02:46, 15.14s/it]

{'Model': 'OrthogonalMatchingPursuitCV', 'R-Squared': 0.08677481289877154, 'Adjusted R-Squared': 0.08640941726519247, 'RMSE': np.float64(35.81851586058651), 'Time taken': 0.4543886184692383}


 71%|███████▏  | 25/35 [02:28<01:46, 10.67s/it]

{'Model': 'PLSRegression', 'R-Squared': 0.08609361644794922, 'Adjusted R-Squared': 0.08572794825705099, 'RMSE': np.float64(35.83187231301825), 'Time taken': 0.23243260383605957}


 74%|███████▍  | 26/35 [02:28<01:08,  7.57s/it]

{'Model': 'PassiveAggressiveRegressor', 'R-Squared': 0.029918759639049886, 'Adjusted R-Squared': 0.029530615012235528, 'RMSE': np.float64(36.916685310174316), 'Time taken': 0.3563191890716553}
{'Model': 'PoissonRegressor', 'R-Squared': 0.09548528119221156, 'Adjusted R-Squared': 0.09512337075278376, 'RMSE': np.float64(35.64728559568091), 'Time taken': 0.20147085189819336}


 80%|████████  | 28/35 [02:29<00:28,  4.06s/it]

{'Model': 'RANSACRegressor', 'R-Squared': -0.06868902726065151, 'Adjusted R-Squared': -0.06911662640019611, 'RMSE': np.float64(38.74755662050952), 'Time taken': 1.0227982997894287}


 86%|████████▌ | 30/35 [02:44<00:25,  5.04s/it]

{'Model': 'RandomForestRegressor', 'R-Squared': 0.13318652142140863, 'Adjusted R-Squared': 0.13283969583591915, 'RMSE': np.float64(34.89646813372681), 'Time taken': 14.31465744972229}
{'Model': 'Ridge', 'R-Squared': 0.08712705596633563, 'Adjusted R-Squared': 0.08676180127069899, 'RMSE': np.float64(35.811607356017035), 'Time taken': 0.13988375663757324}


 89%|████████▊ | 31/35 [02:44<00:14,  3.66s/it]

{'Model': 'RidgeCV', 'R-Squared': 0.08712734228828922, 'Adjusted R-Squared': 0.0867620877072145, 'RMSE': np.float64(35.81160173987392), 'Time taken': 0.4542064666748047}


 91%|█████████▏| 32/35 [02:45<00:08,  2.68s/it]

{'Model': 'SGDRegressor', 'R-Squared': -9.605041198672398e+18, 'Adjusted R-Squared': -9.608884325388474e+18, 'RMSE': np.float64(116163118469.7985), 'Time taken': 0.376969575881958}


 94%|█████████▍| 33/35 [03:27<00:29, 14.51s/it]

{'Model': 'TheilSenRegressor', 'R-Squared': 0.08459855223537383, 'Adjusted R-Squared': 0.08423228584597775, 'RMSE': np.float64(35.86116910948555), 'Time taken': 42.121702671051025}


100%|██████████| 35/35 [03:27<00:00,  5.93s/it]

{'Model': 'TransformedTargetRegressor', 'R-Squared': 0.08712702392866434, 'Adjusted R-Squared': 0.08676176922020895, 'RMSE': np.float64(35.81160798442896), 'Time taken': 0.2899191379547119}
VotingRegressor model failed to execute
VotingRegressor.__init__() missing 1 required positional argument: 'estimators'





Unnamed: 0_level_0,Adjusted R-Squared,R-Squared,RMSE,Time Taken,Target,Year floor
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
HistGradientBoostingRegressor,0.15,0.15,34.58,0.96,TurnoutTimeSeconds_mean,2023
MLPRegressor,0.14,0.15,34.66,99.15,TurnoutTimeSeconds_mean,2023
GradientBoostingRegressor,0.14,0.14,34.7,8.38,TurnoutTimeSeconds_mean,2023
DecisionTreeRegressor,0.13,0.13,34.9,0.34,TurnoutTimeSeconds_mean,2023
ExtraTreeRegressor,0.13,0.13,34.9,0.33,TurnoutTimeSeconds_mean,2023
RandomForestRegressor,0.13,0.13,34.9,14.31,TurnoutTimeSeconds_mean,2023
BaggingRegressor,0.13,0.13,34.92,1.59,TurnoutTimeSeconds_mean,2023
PoissonRegressor,0.1,0.1,35.65,0.2,TurnoutTimeSeconds_mean,2023
LarsCV,0.09,0.09,35.81,0.58,TurnoutTimeSeconds_mean,2023
LassoLarsCV,0.09,0.09,35.81,0.55,TurnoutTimeSeconds_mean,2023


-----------------------------------------------------
year 2021
-----------------------------------------------------
(444247, 45)
cols_to_remove ['TurnoutTimeSeconds_min', 'TurnoutTimeSeconds_mean', 'TurnoutTimeSeconds_max', 'TravelTimeSeconds_min', 'TravelTimeSeconds_mean', 'TravelTimeSeconds_max', 'PumpSecondsOnSite_min', 'PumpSecondsOnSite_mean', 'PumpSecondsOnSite_max', 'NumPumpsAttending']
cols_to_keep ['CalYear', 'HourOfCall_0', 'HourOfCall_1', 'HourOfCall_2', 'HourOfCall_3', 'HourOfCall_4', 'Postcode_district_0', 'Postcode_district_1', 'Postcode_district_2', 'Postcode_district_3', 'Postcode_district_4', 'Postcode_district_5', 'Postcode_district_6', 'Postcode_district_7', 'Postcode_district_8', 'Month_0', 'Month_1', 'Month_2', 'Month_3', 'DayOfWeek_0', 'DayOfWeek_1', 'DayOfWeek_2']


Unnamed: 0,CalYear,HourOfCall_0,HourOfCall_1,HourOfCall_2,HourOfCall_3,HourOfCall_4,Postcode_district_0,Postcode_district_1,Postcode_district_2,Postcode_district_3,Postcode_district_4,Postcode_district_5,Postcode_district_6,Postcode_district_7,Postcode_district_8,Month_0,Month_1,Month_2,Month_3,DayOfWeek_0,DayOfWeek_1,DayOfWeek_2
1147280,13,0,0,0,0,1,0,1,1,0,1,0,0,1,1,0,0,0,1,0,1,0
1147281,13,0,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,0,1,0


(444247, 22)
cible TravelTimeSeconds_mean
[('AdaBoostRegressor', <class 'sklearn.ensemble._weight_boosting.AdaBoostRegressor'>), ('BaggingRegressor', <class 'sklearn.ensemble._bagging.BaggingRegressor'>), ('BayesianRidge', <class 'sklearn.linear_model._bayes.BayesianRidge'>), ('DecisionTreeRegressor', <class 'sklearn.tree._classes.DecisionTreeRegressor'>), ('DummyRegressor', <class 'sklearn.dummy.DummyRegressor'>), ('ElasticNet', <class 'sklearn.linear_model._coordinate_descent.ElasticNet'>), ('ElasticNetCV', <class 'sklearn.linear_model._coordinate_descent.ElasticNetCV'>), ('ExtraTreeRegressor', <class 'sklearn.tree._classes.ExtraTreeRegressor'>), ('GradientBoostingRegressor', <class 'sklearn.ensemble._gb.GradientBoostingRegressor'>), ('HistGradientBoostingRegressor', <class 'sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingRegressor'>), ('HuberRegressor', <class 'sklearn.linear_model._huber.HuberRegressor'>), ('KNeighborsRegressor', <class 'sklearn.neigh

  3%|▎         | 1/35 [00:09<05:28,  9.65s/it]

{'Model': 'AdaBoostRegressor', 'R-Squared': -0.13706815777533254, 'Adjusted R-Squared': -0.1373497782226185, 'RMSE': np.float64(143.5583110000657), 'Time taken': 9.6482253074646}


  6%|▌         | 2/35 [00:21<06:00, 10.93s/it]

{'Model': 'BaggingRegressor', 'R-Squared': -0.12653105653915664, 'Adjusted R-Squared': -0.1268100672368484, 'RMSE': np.float64(142.8915922712637), 'Time taken': 11.833913326263428}


  9%|▊         | 3/35 [00:22<03:24,  6.38s/it]

{'Model': 'BayesianRidge', 'R-Squared': 0.013327806406410847, 'Adjusted R-Squared': 0.013083434894831436, 'RMSE': np.float64(133.72774943953982), 'Time taken': 0.9574871063232422}


 11%|█▏        | 4/35 [00:24<02:23,  4.62s/it]

{'Model': 'DecisionTreeRegressor', 'R-Squared': -0.8432904149664138, 'Adjusted R-Squared': -0.843746947204689, 'RMSE': np.float64(182.78138911795193), 'Time taken': 1.9331340789794922}


 14%|█▍        | 5/35 [00:24<01:31,  3.05s/it]

{'Model': 'DummyRegressor', 'R-Squared': -8.56828541095922e-06, 'Adjusted R-Squared': -0.0002562428494770064, 'RMSE': np.float64(134.6284828773335), 'Time taken': 0.2597684860229492}


 17%|█▋        | 6/35 [00:25<01:03,  2.18s/it]

{'Model': 'ElasticNet', 'R-Squared': 0.009338054081781788, 'Adjusted R-Squared': 0.009092694418501357, 'RMSE': np.float64(133.99785045595354), 'Time taken': 0.4893457889556885}


 20%|██        | 7/35 [00:26<00:58,  2.08s/it]

{'Model': 'ElasticNetCV', 'R-Squared': 0.013325512247449933, 'Adjusted R-Squared': 0.01308114016767048, 'RMSE': np.float64(133.72790490785968), 'Time taken': 1.8572185039520264}


 23%|██▎       | 8/35 [00:28<00:54,  2.02s/it]

{'Model': 'ExtraTreeRegressor', 'R-Squared': -0.8278733853946469, 'Adjusted R-Squared': -0.8283260992595607, 'RMSE': np.float64(182.01540480590586), 'Time taken': 1.9148643016815186}


 26%|██▌       | 9/35 [01:03<05:19, 12.28s/it]

{'Model': 'GradientBoostingRegressor', 'R-Squared': 0.046878685376884555, 'Adjusted R-Squared': 0.04664262349342885, 'RMSE': np.float64(131.4344409066989), 'Time taken': 34.8347544670105}


 29%|██▊       | 10/35 [01:06<03:51,  9.28s/it]

{'Model': 'HistGradientBoostingRegressor', 'R-Squared': 0.09416154721038605, 'Adjusted R-Squared': 0.09393719598878247, 'RMSE': np.float64(128.13284451021914), 'Time taken': 2.5496082305908203}


 31%|███▏      | 11/35 [01:07<02:45,  6.89s/it]

{'Model': 'HuberRegressor', 'R-Squared': 0.0003046327693331241, 'Adjusted R-Squared': 5.703577653715097e-05, 'RMSE': np.float64(134.60739851553205), 'Time taken': 1.4731979370117188}


 34%|███▍      | 12/35 [02:56<14:29, 37.80s/it]

{'Model': 'KNeighborsRegressor', 'R-Squared': -0.07479002955833014, 'Adjusted R-Squared': -0.07505622542952128, 'RMSE': np.float64(139.57155133371376), 'Time taken': 108.51253652572632}


 37%|███▋      | 13/35 [02:56<09:41, 26.45s/it]

{'Model': 'Lars', 'R-Squared': 0.013329299062370237, 'Adjusted R-Squared': 0.013084927920480549, 'RMSE': np.float64(133.72764828659413), 'Time taken': 0.3296382427215576}


 40%|████      | 14/35 [02:57<06:35, 18.86s/it]

{'Model': 'LarsCV', 'R-Squared': 0.013329299062370237, 'Adjusted R-Squared': 0.013084927920480549, 'RMSE': np.float64(133.72764828659413), 'Time taken': 1.3022429943084717}


 43%|████▎     | 15/35 [02:58<04:26, 13.33s/it]

{'Model': 'Lasso', 'R-Squared': 0.011778808568376098, 'Adjusted R-Squared': 0.011534053412719514, 'RMSE': np.float64(133.83267930374208), 'Time taken': 0.5236828327178955}


 46%|████▌     | 16/35 [03:00<03:07,  9.85s/it]

{'Model': 'LassoCV', 'R-Squared': 0.013330147922376856, 'Adjusted R-Squared': 0.013085776990726439, 'RMSE': np.float64(133.72759076179042), 'Time taken': 1.755303144454956}


 49%|████▊     | 17/35 [03:00<02:05,  6.99s/it]

{'Model': 'LassoLars', 'R-Squared': 0.011778880363285449, 'Adjusted R-Squared': 0.01153412522541053, 'RMSE': np.float64(133.8326744422266), 'Time taken': 0.33197641372680664}


 51%|█████▏    | 18/35 [03:01<01:30,  5.31s/it]

{'Model': 'LassoLarsCV', 'R-Squared': 0.013329299062370237, 'Adjusted R-Squared': 0.013084927920480549, 'RMSE': np.float64(133.72764828659413), 'Time taken': 1.3944005966186523}


 54%|█████▍    | 19/35 [03:02<01:02,  3.93s/it]

{'Model': 'LassoLarsIC', 'R-Squared': 0.013329299062370237, 'Adjusted R-Squared': 0.013084927920480549, 'RMSE': np.float64(133.72764828659413), 'Time taken': 0.7274787425994873}


 57%|█████▋    | 20/35 [03:03<00:43,  2.93s/it]

{'Model': 'LinearRegression', 'R-Squared': 0.01332929906237057, 'Adjusted R-Squared': 0.013084927920480882, 'RMSE': np.float64(133.7276482865941), 'Time taken': 0.5811483860015869}


 60%|██████    | 21/35 [03:04<00:34,  2.44s/it]

{'Model': 'LinearSVR', 'R-Squared': -0.006360279347285225, 'Adjusted R-Squared': -0.006609527055140285, 'RMSE': np.float64(135.05536304976215), 'Time taken': 1.30133056640625}


 63%|██████▎   | 22/35 [07:36<18:04, 83.44s/it]

{'Model': 'MLPRegressor', 'R-Squared': 0.11081598047651775, 'Adjusted R-Squared': 0.1105957540990703, 'RMSE': np.float64(126.94947689372721), 'Time taken': 272.3218688964844}


 66%|██████▌   | 23/35 [07:37<11:41, 58.50s/it]

{'Model': 'OrthogonalMatchingPursuit', 'R-Squared': 0.0034368174543716323, 'Adjusted R-Squared': 0.003189996217405233, 'RMSE': np.float64(134.39636122901572), 'Time taken': 0.3366250991821289}


 69%|██████▊   | 24/35 [07:38<07:34, 41.27s/it]

{'Model': 'OrthogonalMatchingPursuitCV', 'R-Squared': 0.008546242904273638, 'Adjusted R-Squared': 0.00830068713118537, 'RMSE': np.float64(134.05139031568788), 'Time taken': 1.0877277851104736}


 71%|███████▏  | 25/35 [07:38<04:50, 29.10s/it]

{'Model': 'PLSRegression', 'R-Squared': 0.012932140121959201, 'Adjusted R-Squared': 0.01268767061474485, 'RMSE': np.float64(133.7545598932182), 'Time taken': 0.68735671043396}


 74%|███████▍  | 26/35 [07:39<03:05, 20.59s/it]

{'Model': 'PassiveAggressiveRegressor', 'R-Squared': -0.061255481434038694, 'Adjusted R-Squared': -0.06151832517064526, 'RMSE': np.float64(138.68997325448532), 'Time taken': 0.7359216213226318}


 77%|███████▋  | 27/35 [07:40<01:56, 14.54s/it]

{'Model': 'PoissonRegressor', 'R-Squared': 0.013079253470130281, 'Adjusted R-Squared': 0.01283482039883821, 'RMSE': np.float64(133.74459208088973), 'Time taken': 0.42414379119873047}


 80%|████████  | 28/35 [07:42<01:15, 10.84s/it]

{'Model': 'RANSACRegressor', 'R-Squared': -0.43676719299132727, 'Adjusted R-Squared': -0.43712304063051155, 'RMSE': np.float64(161.37204125935082), 'Time taken': 2.2237038612365723}


 83%|████████▎ | 29/35 [09:27<03:55, 39.22s/it]

{'Model': 'RandomForestRegressor', 'R-Squared': -0.052076797638889616, 'Adjusted R-Squared': -0.052337368068466894, 'RMSE': np.float64(138.088913523184), 'Time taken': 105.43989872932434}


 86%|████████▌ | 30/35 [09:28<02:17, 27.55s/it]

{'Model': 'Ridge', 'R-Squared': 0.013329297797645245, 'Adjusted R-Squared': 0.013084926655442253, 'RMSE': np.float64(133.72764837230088), 'Time taken': 0.301379919052124}


 89%|████████▊ | 31/35 [09:29<01:18, 19.62s/it]

{'Model': 'RidgeCV', 'R-Squared': 0.013329286377982252, 'Adjusted R-Squared': 0.013084915232950967, 'RMSE': np.float64(133.72764914617846), 'Time taken': 1.137859582901001}


 91%|█████████▏| 32/35 [09:31<00:43, 14.41s/it]

{'Model': 'SGDRegressor', 'R-Squared': 0.011497231465150248, 'Adjusted R-Squared': 0.011252406570605, 'RMSE': np.float64(133.85174463774445), 'Time taken': 2.256321430206299}


 94%|█████████▍| 33/35 [10:50<01:07, 33.70s/it]

{'Model': 'TheilSenRegressor', 'R-Squared': -0.043864914143246336, 'Adjusted R-Squared': -0.04412345071558543, 'RMSE': np.float64(137.5489379605774), 'Time taken': 78.69642782211304}


100%|██████████| 35/35 [10:50<00:00, 18.60s/it]

{'Model': 'TransformedTargetRegressor', 'R-Squared': 0.01332929906237057, 'Adjusted R-Squared': 0.013084927920480882, 'RMSE': np.float64(133.7276482865941), 'Time taken': 0.6677167415618896}
VotingRegressor model failed to execute
VotingRegressor.__init__() missing 1 required positional argument: 'estimators'





Unnamed: 0_level_0,Adjusted R-Squared,R-Squared,RMSE,Time Taken,Target,Year floor
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
MLPRegressor,0.11,0.11,126.95,272.32,TravelTimeSeconds_mean,2021
HistGradientBoostingRegressor,0.09,0.09,128.13,2.55,TravelTimeSeconds_mean,2021
GradientBoostingRegressor,0.05,0.05,131.43,34.83,TravelTimeSeconds_mean,2021
LassoCV,0.01,0.01,133.73,1.76,TravelTimeSeconds_mean,2021
LinearRegression,0.01,0.01,133.73,0.58,TravelTimeSeconds_mean,2021
TransformedTargetRegressor,0.01,0.01,133.73,0.67,TravelTimeSeconds_mean,2021
LassoLarsIC,0.01,0.01,133.73,0.73,TravelTimeSeconds_mean,2021
LassoLarsCV,0.01,0.01,133.73,1.39,TravelTimeSeconds_mean,2021
Lars,0.01,0.01,133.73,0.33,TravelTimeSeconds_mean,2021
LarsCV,0.01,0.01,133.73,1.3,TravelTimeSeconds_mean,2021


-----------------------------------------------------
year 2022
-----------------------------------------------------
(342634, 45)
cols_to_remove ['TurnoutTimeSeconds_min', 'TurnoutTimeSeconds_mean', 'TurnoutTimeSeconds_max', 'TravelTimeSeconds_min', 'TravelTimeSeconds_mean', 'TravelTimeSeconds_max', 'PumpSecondsOnSite_min', 'PumpSecondsOnSite_mean', 'PumpSecondsOnSite_max', 'NumPumpsAttending']
cols_to_keep ['CalYear', 'HourOfCall_0', 'HourOfCall_1', 'HourOfCall_2', 'HourOfCall_3', 'HourOfCall_4', 'Postcode_district_0', 'Postcode_district_1', 'Postcode_district_2', 'Postcode_district_3', 'Postcode_district_4', 'Postcode_district_5', 'Postcode_district_6', 'Postcode_district_7', 'Postcode_district_8', 'Month_0', 'Month_1', 'Month_2', 'Month_3', 'DayOfWeek_0', 'DayOfWeek_1', 'DayOfWeek_2']


Unnamed: 0,CalYear,HourOfCall_0,HourOfCall_1,HourOfCall_2,HourOfCall_3,HourOfCall_4,Postcode_district_0,Postcode_district_1,Postcode_district_2,Postcode_district_3,Postcode_district_4,Postcode_district_5,Postcode_district_6,Postcode_district_7,Postcode_district_8,Month_0,Month_1,Month_2,Month_3,DayOfWeek_0,DayOfWeek_1,DayOfWeek_2
1248893,14,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,1
1248894,14,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1


(342634, 22)
cible TravelTimeSeconds_mean
[('AdaBoostRegressor', <class 'sklearn.ensemble._weight_boosting.AdaBoostRegressor'>), ('BaggingRegressor', <class 'sklearn.ensemble._bagging.BaggingRegressor'>), ('BayesianRidge', <class 'sklearn.linear_model._bayes.BayesianRidge'>), ('DecisionTreeRegressor', <class 'sklearn.tree._classes.DecisionTreeRegressor'>), ('DummyRegressor', <class 'sklearn.dummy.DummyRegressor'>), ('ElasticNet', <class 'sklearn.linear_model._coordinate_descent.ElasticNet'>), ('ElasticNetCV', <class 'sklearn.linear_model._coordinate_descent.ElasticNetCV'>), ('ExtraTreeRegressor', <class 'sklearn.tree._classes.ExtraTreeRegressor'>), ('GradientBoostingRegressor', <class 'sklearn.ensemble._gb.GradientBoostingRegressor'>), ('HistGradientBoostingRegressor', <class 'sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingRegressor'>), ('HuberRegressor', <class 'sklearn.linear_model._huber.HuberRegressor'>), ('KNeighborsRegressor', <class 'sklearn.neigh

  3%|▎         | 1/35 [00:05<02:54,  5.12s/it]

{'Model': 'AdaBoostRegressor', 'R-Squared': -0.09976261123865893, 'Adjusted R-Squared': -0.10011579904444035, 'RMSE': np.float64(142.1572342908685), 'Time taken': 5.124542236328125}


  6%|▌         | 2/35 [00:13<03:53,  7.07s/it]

{'Model': 'BaggingRegressor', 'R-Squared': -0.12746346109395645, 'Adjusted R-Squared': -0.12782554500356835, 'RMSE': np.float64(143.9364304513902), 'Time taken': 8.426851987838745}


  9%|▊         | 3/35 [00:14<02:14,  4.20s/it]

{'Model': 'BayesianRidge', 'R-Squared': 0.01295450042652746, 'Adjusted R-Squared': 0.012637511623091013, 'RMSE': np.float64(134.67533871285048), 'Time taken': 0.795184850692749}


 14%|█▍        | 5/35 [00:15<01:01,  2.03s/it]

{'Model': 'DecisionTreeRegressor', 'R-Squared': -0.8337291336508552, 'Adjusted R-Squared': -0.834318034166742, 'RMSE': np.float64(183.56381820291446), 'Time taken': 1.338803768157959}
{'Model': 'DummyRegressor', 'R-Squared': -4.352980250033056e-06, 'Adjusted R-Squared': -0.00032550350818350005, 'RMSE': np.float64(135.5565275700215), 'Time taken': 0.18893790245056152}


 17%|█▋        | 6/35 [00:16<00:42,  1.46s/it]

{'Model': 'ElasticNet', 'R-Squared': 0.009070930958758772, 'Adjusted R-Squared': 0.008752694950366569, 'RMSE': np.float64(134.94002133467066), 'Time taken': 0.3334999084472656}


 20%|██        | 7/35 [00:17<00:40,  1.43s/it]

{'Model': 'ElasticNetCV', 'R-Squared': 0.012952333366476276, 'Adjusted R-Squared': 0.01263534386709042, 'RMSE': np.float64(134.67548655273544), 'Time taken': 1.3890588283538818}


 23%|██▎       | 8/35 [00:18<00:38,  1.41s/it]

{'Model': 'ExtraTreeRegressor', 'R-Squared': -0.816608296540505, 'Adjusted R-Squared': -0.8171916987144494, 'RMSE': np.float64(182.7048755365297), 'Time taken': 1.3485069274902344}


 26%|██▌       | 9/35 [00:42<03:36,  8.31s/it]

{'Model': 'GradientBoostingRegressor', 'R-Squared': 0.046460402249899224, 'Adjusted R-Squared': 0.0461541738376825, 'RMSE': np.float64(132.36978288312542), 'Time taken': 23.48876714706421}


 29%|██▊       | 10/35 [00:44<02:39,  6.36s/it]

{'Model': 'HistGradientBoostingRegressor', 'R-Squared': 0.09376845473790096, 'Adjusted R-Squared': 0.09347741926558173, 'RMSE': np.float64(129.04437481751597), 'Time taken': 2.001251459121704}


 31%|███▏      | 11/35 [00:45<01:54,  4.76s/it]

{'Model': 'HuberRegressor', 'R-Squared': 0.0008774071656969529, 'Adjusted R-Squared': 0.0005565398142671762, 'RMSE': np.float64(135.49675047829257), 'Time taken': 1.113595962524414}


 34%|███▍      | 12/35 [01:50<08:49, 23.04s/it]

{'Model': 'KNeighborsRegressor', 'R-Squared': -0.07372229760386251, 'Adjusted R-Squared': -0.07406712258557557, 'RMSE': np.float64(140.46414375812108), 'Time taken': 64.8598039150238}


 37%|███▋      | 13/35 [01:50<05:55, 16.15s/it]

{'Model': 'Lars', 'R-Squared': 0.012954764574578892, 'Adjusted R-Squared': 0.012637775855973366, 'RMSE': np.float64(134.67532069228776), 'Time taken': 0.27634668350219727}


 40%|████      | 14/35 [01:51<04:03, 11.60s/it]

{'Model': 'LarsCV', 'R-Squared': 0.012954764574578892, 'Adjusted R-Squared': 0.012637775855973366, 'RMSE': np.float64(134.67532069228776), 'Time taken': 1.09328031539917}


 43%|████▎     | 15/35 [01:52<02:44,  8.23s/it]

{'Model': 'Lasso', 'R-Squared': 0.011227976273173512, 'Adjusted R-Squared': 0.010910432998007313, 'RMSE': np.float64(134.79307322150228), 'Time taken': 0.41236305236816406}


 46%|████▌     | 16/35 [01:53<01:57,  6.20s/it]

{'Model': 'LassoCV', 'R-Squared': 0.012954061870758982, 'Adjusted R-Squared': 0.012637072926480752, 'RMSE': np.float64(134.675368631755), 'Time taken': 1.4993531703948975}


 49%|████▊     | 17/35 [01:53<01:19,  4.42s/it]

{'Model': 'LassoLars', 'R-Squared': 0.011228042329009869, 'Adjusted R-Squared': 0.010910499075057478, 'RMSE': np.float64(134.79306871901377), 'Time taken': 0.2809891700744629}


 51%|█████▏    | 18/35 [01:55<00:57,  3.40s/it]

{'Model': 'LassoLarsCV', 'R-Squared': 0.012954764574578892, 'Adjusted R-Squared': 0.012637775855973366, 'RMSE': np.float64(134.67532069228776), 'Time taken': 1.0092275142669678}


 54%|█████▍    | 19/35 [01:55<00:41,  2.58s/it]

{'Model': 'LassoLarsIC', 'R-Squared': 0.012954764574578892, 'Adjusted R-Squared': 0.012637775855973366, 'RMSE': np.float64(134.67532069228776), 'Time taken': 0.6691567897796631}


 57%|█████▋    | 20/35 [01:56<00:29,  1.94s/it]

{'Model': 'LinearRegression', 'R-Squared': 0.012954764574578559, 'Adjusted R-Squared': 0.012637775855973032, 'RMSE': np.float64(134.67532069228778), 'Time taken': 0.46663761138916016}


 60%|██████    | 21/35 [01:57<00:23,  1.66s/it]

{'Model': 'LinearSVR', 'R-Squared': -0.0065263538622155615, 'Adjusted R-Squared': -0.006849598925058098, 'RMSE': np.float64(135.99785712663987), 'Time taken': 0.9789173603057861}


 63%|██████▎   | 22/35 [05:59<16:02, 74.03s/it]

{'Model': 'MLPRegressor', 'R-Squared': 0.10821285559284022, 'Adjusted R-Squared': 0.10792645892728858, 'RMSE': np.float64(128.011826395485), 'Time taken': 242.81023597717285}


 66%|██████▌   | 23/35 [06:00<10:22, 51.90s/it]

{'Model': 'OrthogonalMatchingPursuit', 'R-Squared': 0.0031963851873932914, 'Adjusted R-Squared': 0.0028762625737376313, 'RMSE': np.float64(135.33941416925242), 'Time taken': 0.27143192291259766}


 69%|██████▊   | 24/35 [06:01<06:42, 36.59s/it]

{'Model': 'OrthogonalMatchingPursuitCV', 'R-Squared': 0.008202775447983202, 'Adjusted R-Squared': 0.007884260632204043, 'RMSE': np.float64(134.9991190437905), 'Time taken': 0.8738996982574463}


 71%|███████▏  | 25/35 [06:01<04:17, 25.73s/it]

{'Model': 'PLSRegression', 'R-Squared': 0.012199396732609946, 'Adjusted R-Squared': 0.011882165428279134, 'RMSE': np.float64(134.72684312876436), 'Time taken': 0.39042234420776367}


 74%|███████▍  | 26/35 [06:02<02:43, 18.22s/it]

{'Model': 'PassiveAggressiveRegressor', 'R-Squared': -0.08753992684203893, 'Adjusted R-Squared': -0.08788918934336021, 'RMSE': np.float64(141.36506435359559), 'Time taken': 0.7066056728363037}


 77%|███████▋  | 27/35 [06:02<01:42, 12.86s/it]

{'Model': 'PoissonRegressor', 'R-Squared': 0.012672300765965483, 'Adjusted R-Squared': 0.012355221334353628, 'RMSE': np.float64(134.69458940535247), 'Time taken': 0.348522424697876}


 80%|████████  | 28/35 [06:04<01:06,  9.50s/it]

{'Model': 'RANSACRegressor', 'R-Squared': -2.2018692895773015, 'Adjusted R-Squared': -2.2028975671139515, 'RMSE': np.float64(242.56117596970725), 'Time taken': 1.6744086742401123}


 83%|████████▎ | 29/35 [07:19<02:55, 29.26s/it]

{'Model': 'RandomForestRegressor', 'R-Squared': -0.05282613615402276, 'Adjusted R-Squared': -0.0531642503516665, 'RMSE': np.float64(139.09061230153523), 'Time taken': 75.36422085762024}


 86%|████████▌ | 30/35 [07:19<01:42, 20.56s/it]

{'Model': 'Ridge', 'R-Squared': 0.012954764923156614, 'Adjusted R-Squared': 0.012637776204662998, 'RMSE': np.float64(134.67532066850728), 'Time taken': 0.2379443645477295}


 89%|████████▊ | 31/35 [07:20<00:58, 14.65s/it]

{'Model': 'RidgeCV', 'R-Squared': 0.012954767996503525, 'Adjusted R-Squared': 0.012637779278996897, 'RMSE': np.float64(134.67532045883908), 'Time taken': 0.8553192615509033}


 91%|█████████▏| 32/35 [07:22<00:32, 10.79s/it]

{'Model': 'SGDRegressor', 'R-Squared': 0.011717840302035931, 'Adjusted R-Squared': 0.011400454346276456, 'RMSE': np.float64(134.75967904334846), 'Time taken': 1.7886035442352295}


 94%|█████████▍| 33/35 [08:18<00:48, 24.31s/it]

{'Model': 'TheilSenRegressor', 'R-Squared': -0.03998260568039669, 'Adjusted R-Squared': -0.04031659518940289, 'RMSE': np.float64(138.23961891097332), 'Time taken': 55.857672452926636}


100%|██████████| 35/35 [08:18<00:00, 14.26s/it]

{'Model': 'TransformedTargetRegressor', 'R-Squared': 0.012954764574578559, 'Adjusted R-Squared': 0.012637775855973032, 'RMSE': np.float64(134.67532069228778), 'Time taken': 0.6758930683135986}
VotingRegressor model failed to execute
VotingRegressor.__init__() missing 1 required positional argument: 'estimators'





Unnamed: 0_level_0,Adjusted R-Squared,R-Squared,RMSE,Time Taken,Target,Year floor
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
MLPRegressor,0.11,0.11,128.01,242.81,TravelTimeSeconds_mean,2022
HistGradientBoostingRegressor,0.09,0.09,129.04,2.0,TravelTimeSeconds_mean,2022
GradientBoostingRegressor,0.05,0.05,132.37,23.49,TravelTimeSeconds_mean,2022
RidgeCV,0.01,0.01,134.68,0.86,TravelTimeSeconds_mean,2022
Ridge,0.01,0.01,134.68,0.24,TravelTimeSeconds_mean,2022
LassoLarsCV,0.01,0.01,134.68,1.01,TravelTimeSeconds_mean,2022
LassoLarsIC,0.01,0.01,134.68,0.67,TravelTimeSeconds_mean,2022
LarsCV,0.01,0.01,134.68,1.09,TravelTimeSeconds_mean,2022
Lars,0.01,0.01,134.68,0.28,TravelTimeSeconds_mean,2022
LinearRegression,0.01,0.01,134.68,0.47,TravelTimeSeconds_mean,2022


-----------------------------------------------------
year 2023
-----------------------------------------------------
(225027, 45)
cols_to_remove ['TurnoutTimeSeconds_min', 'TurnoutTimeSeconds_mean', 'TurnoutTimeSeconds_max', 'TravelTimeSeconds_min', 'TravelTimeSeconds_mean', 'TravelTimeSeconds_max', 'PumpSecondsOnSite_min', 'PumpSecondsOnSite_mean', 'PumpSecondsOnSite_max', 'NumPumpsAttending']
cols_to_keep ['CalYear', 'HourOfCall_0', 'HourOfCall_1', 'HourOfCall_2', 'HourOfCall_3', 'HourOfCall_4', 'Postcode_district_0', 'Postcode_district_1', 'Postcode_district_2', 'Postcode_district_3', 'Postcode_district_4', 'Postcode_district_5', 'Postcode_district_6', 'Postcode_district_7', 'Postcode_district_8', 'Month_0', 'Month_1', 'Month_2', 'Month_3', 'DayOfWeek_0', 'DayOfWeek_1', 'DayOfWeek_2']


Unnamed: 0,CalYear,HourOfCall_0,HourOfCall_1,HourOfCall_2,HourOfCall_3,HourOfCall_4,Postcode_district_0,Postcode_district_1,Postcode_district_2,Postcode_district_3,Postcode_district_4,Postcode_district_5,Postcode_district_6,Postcode_district_7,Postcode_district_8,Month_0,Month_1,Month_2,Month_3,DayOfWeek_0,DayOfWeek_1,DayOfWeek_2
1366500,15,0,0,0,0,1,0,1,0,0,1,0,0,0,1,0,0,0,1,1,0,0
1366501,15,0,0,0,0,1,0,1,0,1,1,1,1,1,0,0,0,0,1,1,0,0


(225027, 22)
cible TravelTimeSeconds_mean
[('AdaBoostRegressor', <class 'sklearn.ensemble._weight_boosting.AdaBoostRegressor'>), ('BaggingRegressor', <class 'sklearn.ensemble._bagging.BaggingRegressor'>), ('BayesianRidge', <class 'sklearn.linear_model._bayes.BayesianRidge'>), ('DecisionTreeRegressor', <class 'sklearn.tree._classes.DecisionTreeRegressor'>), ('DummyRegressor', <class 'sklearn.dummy.DummyRegressor'>), ('ElasticNet', <class 'sklearn.linear_model._coordinate_descent.ElasticNet'>), ('ElasticNetCV', <class 'sklearn.linear_model._coordinate_descent.ElasticNetCV'>), ('ExtraTreeRegressor', <class 'sklearn.tree._classes.ExtraTreeRegressor'>), ('GradientBoostingRegressor', <class 'sklearn.ensemble._gb.GradientBoostingRegressor'>), ('HistGradientBoostingRegressor', <class 'sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingRegressor'>), ('HuberRegressor', <class 'sklearn.linear_model._huber.HuberRegressor'>), ('KNeighborsRegressor', <class 'sklearn.neigh

  3%|▎         | 1/35 [00:04<02:16,  4.00s/it]

{'Model': 'AdaBoostRegressor', 'R-Squared': -0.12367442260931893, 'Adjusted R-Squared': -0.12422398216064723, 'RMSE': np.float64(141.8381235124389), 'Time taken': 4.004483938217163}


  6%|▌         | 2/35 [00:08<02:28,  4.48s/it]

{'Model': 'BaggingRegressor', 'R-Squared': -0.13028314239413663, 'Adjusted R-Squared': -0.13083593409617222, 'RMSE': np.float64(142.254611680655), 'Time taken': 4.819693088531494}


  9%|▊         | 3/35 [00:09<01:24,  2.66s/it]

{'Model': 'BayesianRidge', 'R-Squared': 0.013822844614997831, 'Adjusted R-Squared': 0.013340531354022134, 'RMSE': np.float64(132.87711745725557), 'Time taken': 0.47617077827453613}


 14%|█▍        | 5/35 [00:10<00:38,  1.30s/it]

{'Model': 'DecisionTreeRegressor', 'R-Squared': -0.8549668654210252, 'Adjusted R-Squared': -0.8558740808366103, 'RMSE': np.float64(182.2387834085472), 'Time taken': 0.8557751178741455}
{'Model': 'DummyRegressor', 'R-Squared': -1.7971754026957498e-05, 'Adjusted R-Squared': -0.0005070541935838779, 'RMSE': np.float64(133.80632155935763), 'Time taken': 0.14236235618591309}


 17%|█▋        | 6/35 [00:10<00:27,  1.07it/s]

{'Model': 'ElasticNet', 'R-Squared': 0.009986945099905498, 'Adjusted R-Squared': 0.009502755801552687, 'RMSE': np.float64(133.13529043632343), 'Time taken': 0.22540974617004395}


 20%|██        | 7/35 [00:11<00:26,  1.07it/s]

{'Model': 'ElasticNetCV', 'R-Squared': 0.013823174716916187, 'Adjusted R-Squared': 0.01334086161738468, 'RMSE': np.float64(132.87709521835316), 'Time taken': 0.9431953430175781}


 23%|██▎       | 8/35 [00:12<00:24,  1.09it/s]

{'Model': 'ExtraTreeRegressor', 'R-Squared': -0.8575034473501655, 'Adjusted R-Squared': -0.8584119033411333, 'RMSE': np.float64(182.3633424209739), 'Time taken': 0.8811681270599365}


 26%|██▌       | 9/35 [00:25<02:00,  4.62s/it]

{'Model': 'GradientBoostingRegressor', 'R-Squared': 0.04748228493112594, 'Adjusted R-Squared': 0.04701643361548413, 'RMSE': np.float64(130.5898009880598), 'Time taken': 12.754282236099243}


 29%|██▊       | 10/35 [00:26<01:30,  3.63s/it]

{'Model': 'HistGradientBoostingRegressor', 'R-Squared': 0.0961850174860912, 'Adjusted R-Squared': 0.09574298539362724, 'RMSE': np.float64(127.20743553559181), 'Time taken': 1.4034347534179688}


 31%|███▏      | 11/35 [00:27<01:05,  2.71s/it]

{'Model': 'HuberRegressor', 'R-Squared': 0.0024839597141601555, 'Adjusted R-Squared': 0.0019961009033585553, 'RMSE': np.float64(133.63883261908182), 'Time taken': 0.6317944526672363}


 37%|███▋      | 13/35 [00:55<02:39,  7.26s/it]

{'Model': 'KNeighborsRegressor', 'R-Squared': -0.08234086608744651, 'Adjusted R-Squared': -0.08287021048541732, 'RMSE': np.float64(139.2049752773983), 'Time taken': 27.764297485351562}
{'Model': 'Lars', 'R-Squared': 0.013818913832822077, 'Adjusted R-Squared': 0.013336598649404396, 'RMSE': np.float64(132.87738227300474), 'Time taken': 0.18549513816833496}


 40%|████      | 14/35 [00:55<01:50,  5.27s/it]

{'Model': 'LarsCV', 'R-Squared': 0.013819476804246222, 'Adjusted R-Squared': 0.013337161896162963, 'RMSE': np.float64(132.87734434580207), 'Time taken': 0.6809659004211426}


 43%|████▎     | 15/35 [00:56<01:15,  3.77s/it]

{'Model': 'Lasso', 'R-Squared': 0.012216963613244092, 'Adjusted R-Squared': 0.011733864958185292, 'RMSE': np.float64(132.985261333612), 'Time taken': 0.29636144638061523}


 46%|████▌     | 16/35 [00:56<00:55,  2.91s/it]

{'Model': 'LassoCV', 'R-Squared': 0.01381913533166923, 'Adjusted R-Squared': 0.013336820256580784, 'RMSE': np.float64(132.87736735070038), 'Time taken': 0.9127323627471924}


 49%|████▊     | 17/35 [00:57<00:37,  2.10s/it]

{'Model': 'LassoLars', 'R-Squared': 0.012216999748581037, 'Adjusted R-Squared': 0.0117339011111951, 'RMSE': np.float64(132.9852589011612), 'Time taken': 0.21477746963500977}


 51%|█████▏    | 18/35 [00:57<00:28,  1.68s/it]

{'Model': 'LassoLarsCV', 'R-Squared': 0.013819476804246222, 'Adjusted R-Squared': 0.013337161896162963, 'RMSE': np.float64(132.87734434580207), 'Time taken': 0.6944565773010254}


 54%|█████▍    | 19/35 [00:58<00:20,  1.31s/it]

{'Model': 'LassoLarsIC', 'R-Squared': 0.013818913832822077, 'Adjusted R-Squared': 0.013336598649404396, 'RMSE': np.float64(132.87738227300474), 'Time taken': 0.434159517288208}


 57%|█████▋    | 20/35 [00:58<00:15,  1.01s/it]

{'Model': 'LinearRegression', 'R-Squared': 0.013818913832822965, 'Adjusted R-Squared': 0.013336598649405285, 'RMSE': np.float64(132.87738227300466), 'Time taken': 0.31607985496520996}


 60%|██████    | 21/35 [00:59<00:12,  1.13it/s]

{'Model': 'LinearSVR', 'R-Squared': -0.00588417667556107, 'Adjusted R-Squared': -0.006376128121370783, 'RMSE': np.float64(134.19820828591767), 'Time taken': 0.5862205028533936}


 66%|██████▌   | 23/35 [03:15<05:47, 28.98s/it]

{'Model': 'MLPRegressor', 'R-Squared': 0.1054624273023429, 'Adjusted R-Squared': 0.10502493254656076, 'RMSE': np.float64(126.55287698658795), 'Time taken': 135.6068217754364}
{'Model': 'OrthogonalMatchingPursuit', 'R-Squared': 0.0031582828313471145, 'Adjusted R-Squared': 0.0026707538142137777, 'RMSE': np.float64(133.59365490498377), 'Time taken': 0.1908886432647705}


 69%|██████▊   | 24/35 [03:15<03:44, 20.45s/it]

{'Model': 'OrthogonalMatchingPursuitCV', 'R-Squared': 0.009131513899872101, 'Adjusted R-Squared': 0.008646906232659957, 'RMSE': np.float64(133.19279649130686), 'Time taken': 0.5573906898498535}


 71%|███████▏  | 25/35 [03:15<02:23, 14.39s/it]

{'Model': 'PLSRegression', 'R-Squared': 0.01299317019672852, 'Adjusted R-Squared': 0.01251045116385674, 'RMSE': np.float64(132.93300070650164), 'Time taken': 0.2691938877105713}


 74%|███████▍  | 26/35 [03:16<01:31, 10.20s/it]

{'Model': 'PassiveAggressiveRegressor', 'R-Squared': -0.15979297083577948, 'Adjusted R-Squared': -0.16036019501732324, 'RMSE': np.float64(144.09966309396128), 'Time taken': 0.41540074348449707}


 77%|███████▋  | 27/35 [03:16<00:57,  7.21s/it]

{'Model': 'PoissonRegressor', 'R-Squared': 0.013497503051403514, 'Adjusted R-Squared': 0.013015030674441763, 'RMSE': np.float64(132.89903384624373), 'Time taken': 0.24283719062805176}


 80%|████████  | 28/35 [03:17<00:37,  5.41s/it]

{'Model': 'RANSACRegressor', 'R-Squared': -0.9650369894058146, 'Adjusted R-Squared': -0.9659980372186978, 'RMSE': np.float64(187.56771778913318), 'Time taken': 1.184629201889038}


 86%|████████▌ | 30/35 [04:03<01:00, 12.17s/it]

{'Model': 'RandomForestRegressor', 'R-Squared': -0.053842427762670786, 'Adjusted R-Squared': -0.05435783432538965, 'RMSE': np.float64(137.36009070692867), 'Time taken': 45.134113788604736}
{'Model': 'Ridge', 'R-Squared': 0.013818919663518336, 'Adjusted R-Squared': 0.013336604482952263, 'RMSE': np.float64(132.87738188019267), 'Time taken': 0.15484118461608887}


 89%|████████▊ | 31/35 [04:03<00:34,  8.69s/it]

{'Model': 'RidgeCV', 'R-Squared': 0.013818971977954009, 'Adjusted R-Squared': 0.01333665682297358, 'RMSE': np.float64(132.8773783557865), 'Time taken': 0.5757279396057129}


 91%|█████████▏| 32/35 [04:04<00:19,  6.37s/it]

{'Model': 'SGDRegressor', 'R-Squared': 0.01158432074112914, 'Adjusted R-Squared': 0.011100912677111774, 'RMSE': np.float64(133.02784088190975), 'Time taken': 0.9390981197357178}


 94%|█████████▍| 33/35 [04:43<00:32, 16.29s/it]

{'Model': 'TheilSenRegressor', 'R-Squared': -0.04003677432247521, 'Adjusted R-Squared': -0.04054542890387469, 'RMSE': np.float64(136.45739527014638), 'Time taken': 39.445831298828125}


100%|██████████| 35/35 [04:44<00:00,  8.12s/it]

{'Model': 'TransformedTargetRegressor', 'R-Squared': 0.013818913832822965, 'Adjusted R-Squared': 0.013336598649405285, 'RMSE': np.float64(132.87738227300466), 'Time taken': 0.3268747329711914}
VotingRegressor model failed to execute
VotingRegressor.__init__() missing 1 required positional argument: 'estimators'





Unnamed: 0_level_0,Adjusted R-Squared,R-Squared,RMSE,Time Taken,Target,Year floor
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
MLPRegressor,0.11,0.11,126.55,135.61,TravelTimeSeconds_mean,2023
HistGradientBoostingRegressor,0.1,0.1,127.21,1.4,TravelTimeSeconds_mean,2023
GradientBoostingRegressor,0.05,0.05,130.59,12.75,TravelTimeSeconds_mean,2023
ElasticNetCV,0.01,0.01,132.88,0.94,TravelTimeSeconds_mean,2023
BayesianRidge,0.01,0.01,132.88,0.48,TravelTimeSeconds_mean,2023
LassoLarsCV,0.01,0.01,132.88,0.69,TravelTimeSeconds_mean,2023
LarsCV,0.01,0.01,132.88,0.68,TravelTimeSeconds_mean,2023
LassoCV,0.01,0.01,132.88,0.91,TravelTimeSeconds_mean,2023
RidgeCV,0.01,0.01,132.88,0.58,TravelTimeSeconds_mean,2023
Ridge,0.01,0.01,132.88,0.15,TravelTimeSeconds_mean,2023


-----------------------------------------------------
year 2021
-----------------------------------------------------
(444247, 40)
cols_to_remove ['TurnoutTimeSeconds_min', 'TurnoutTimeSeconds_mean', 'TurnoutTimeSeconds_max', 'TravelTimeSeconds_min', 'TravelTimeSeconds_mean', 'TravelTimeSeconds_max', 'PumpSecondsOnSite_min', 'PumpSecondsOnSite_mean', 'PumpSecondsOnSite_max', 'NumPumpsAttending']
cols_to_keep ['CalYear', 'PropertyType_0', 'PropertyType_1', 'PropertyType_2', 'PropertyType_3', 'PropertyType_4', 'PropertyType_5', 'PropertyType_6', 'StopCode_0', 'StopCode_1', 'StopCode_2', 'StopCode_3', 'StopCode_4']


Unnamed: 0,CalYear,PropertyType_0,PropertyType_1,PropertyType_2,PropertyType_3,PropertyType_4,PropertyType_5,PropertyType_6,StopCode_0,StopCode_1,StopCode_2,StopCode_3,StopCode_4
1147280,13,0,0,0,0,0,1,0,0,0,0,1,1
1147281,13,0,0,0,0,0,1,0,0,0,0,1,0


(444247, 13)
cible PumpSecondsOnSite_mean
[('AdaBoostRegressor', <class 'sklearn.ensemble._weight_boosting.AdaBoostRegressor'>), ('BaggingRegressor', <class 'sklearn.ensemble._bagging.BaggingRegressor'>), ('BayesianRidge', <class 'sklearn.linear_model._bayes.BayesianRidge'>), ('DecisionTreeRegressor', <class 'sklearn.tree._classes.DecisionTreeRegressor'>), ('DummyRegressor', <class 'sklearn.dummy.DummyRegressor'>), ('ElasticNet', <class 'sklearn.linear_model._coordinate_descent.ElasticNet'>), ('ElasticNetCV', <class 'sklearn.linear_model._coordinate_descent.ElasticNetCV'>), ('ExtraTreeRegressor', <class 'sklearn.tree._classes.ExtraTreeRegressor'>), ('GradientBoostingRegressor', <class 'sklearn.ensemble._gb.GradientBoostingRegressor'>), ('HistGradientBoostingRegressor', <class 'sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingRegressor'>), ('HuberRegressor', <class 'sklearn.linear_model._huber.HuberRegressor'>), ('KNeighborsRegressor', <class 'sklearn.neigh

  3%|▎         | 1/35 [00:02<01:23,  2.46s/it]

{'Model': 'AdaBoostRegressor', 'R-Squared': 0.15255295783128398, 'Adjusted R-Squared': 0.15242894491368086, 'RMSE': np.float64(1357.445862758935), 'Time taken': 2.4577600955963135}


  6%|▌         | 2/35 [00:04<01:14,  2.26s/it]

{'Model': 'BaggingRegressor', 'R-Squared': 0.2217585139443552, 'Adjusted R-Squared': 0.2216446283650999, 'RMSE': np.float64(1300.8386235687547), 'Time taken': 2.1217217445373535}


  9%|▊         | 3/35 [00:05<00:45,  1.44s/it]

{'Model': 'BayesianRidge', 'R-Squared': 0.14086984733755425, 'Adjusted R-Squared': 0.14074412474778641, 'RMSE': np.float64(1366.7708725339023), 'Time taken': 0.45204591751098633}


 14%|█▍        | 5/35 [00:05<00:21,  1.38it/s]

{'Model': 'DecisionTreeRegressor', 'R-Squared': 0.22075163542184917, 'Adjusted R-Squared': 0.2206376024989405, 'RMSE': np.float64(1301.6798554501922), 'Time taken': 0.4674990177154541}
{'Model': 'DummyRegressor', 'R-Squared': -8.295197690966205e-07, 'Adjusted R-Squared': -0.00014716671171566276, 'RMSE': np.float64(1474.5734640912024), 'Time taken': 0.14567017555236816}


 17%|█▋        | 6/35 [00:05<00:16,  1.73it/s]

{'Model': 'ElasticNet', 'R-Squared': 0.1284439072774073, 'Adjusted R-Squared': 0.12831636631197219, 'RMSE': np.float64(1376.6194638952686), 'Time taken': 0.29016613960266113}


 20%|██        | 7/35 [00:07<00:21,  1.30it/s]

{'Model': 'ElasticNetCV', 'R-Squared': 0.12933971714284775, 'Adjusted R-Squared': 0.12921230726760413, 'RMSE': np.float64(1375.911817912051), 'Time taken': 1.1592648029327393}


 23%|██▎       | 8/35 [00:07<00:18,  1.49it/s]

{'Model': 'ExtraTreeRegressor', 'R-Squared': 0.22068210149543233, 'Adjusted R-Squared': 0.22056805839713256, 'RMSE': np.float64(1301.7379299334848), 'Time taken': 0.46701788902282715}


 26%|██▌       | 9/35 [00:18<01:39,  3.83s/it]

{'Model': 'GradientBoostingRegressor', 'R-Squared': 0.20882673834974552, 'Adjusted R-Squared': 0.2087109603723326, 'RMSE': np.float64(1311.6018931320532), 'Time taken': 10.755483627319336}


 29%|██▊       | 10/35 [00:19<01:16,  3.06s/it]

{'Model': 'HistGradientBoostingRegressor', 'R-Squared': 0.22165739971462795, 'Adjusted R-Squared': 0.2215434993386125, 'RMSE': np.float64(1300.923127559893), 'Time taken': 1.3307335376739502}


 31%|███▏      | 11/35 [00:20<00:59,  2.49s/it]

{'Model': 'HuberRegressor', 'R-Squared': 0.06493465152178912, 'Adjusted R-Squared': 0.06479781679791341, 'RMSE': np.float64(1425.893915215041), 'Time taken': 1.2087676525115967}


 37%|███▋      | 13/35 [09:45<44:22, 121.01s/it]  

{'Model': 'KNeighborsRegressor', 'R-Squared': 0.10965417625817009, 'Adjusted R-Squared': 0.1095238856585411, 'RMSE': np.float64(1391.3794894605912), 'Time taken': 564.7160489559174}
{'Model': 'Lars', 'R-Squared': 0.14086999772850328, 'Adjusted R-Squared': 0.14074427516074328, 'RMSE': np.float64(1366.7707529071047), 'Time taken': 0.18317365646362305}


 40%|████      | 14/35 [09:46<29:39, 84.72s/it] 

{'Model': 'LarsCV', 'R-Squared': 0.14086999772850328, 'Adjusted R-Squared': 0.14074427516074328, 'RMSE': np.float64(1366.7707529071047), 'Time taken': 0.846111536026001}


 43%|████▎     | 15/35 [09:47<19:46, 59.33s/it]

{'Model': 'Lasso', 'R-Squared': 0.14084106734378343, 'Adjusted R-Squared': 0.14071534054243562, 'RMSE': np.float64(1366.7937650651766), 'Time taken': 0.5057482719421387}


 46%|████▌     | 16/35 [09:48<13:16, 41.90s/it]

{'Model': 'LassoCV', 'R-Squared': 0.14086042929660125, 'Adjusted R-Squared': 0.14073470532862487, 'RMSE': np.float64(1366.7783639883305), 'Time taken': 1.4124085903167725}


 49%|████▊     | 17/35 [09:48<08:48, 29.37s/it]

{'Model': 'LassoLars', 'R-Squared': 0.14084139079792757, 'Adjusted R-Squared': 0.1407156640439131, 'RMSE': np.float64(1366.7935077814934), 'Time taken': 0.21632146835327148}


 51%|█████▏    | 18/35 [09:49<05:53, 20.80s/it]

{'Model': 'LassoLarsCV', 'R-Squared': 0.14086999772850328, 'Adjusted R-Squared': 0.14074427516074328, 'RMSE': np.float64(1366.7707529071047), 'Time taken': 0.8576147556304932}


 54%|█████▍    | 19/35 [09:50<03:54, 14.68s/it]

{'Model': 'LassoLarsIC', 'R-Squared': 0.14086999772850328, 'Adjusted R-Squared': 0.14074427516074328, 'RMSE': np.float64(1366.7707529071047), 'Time taken': 0.4194047451019287}


 57%|█████▋    | 20/35 [09:50<02:35, 10.38s/it]

{'Model': 'LinearRegression', 'R-Squared': 0.1408699977285066, 'Adjusted R-Squared': 0.1407442751607466, 'RMSE': np.float64(1366.770752907102), 'Time taken': 0.3717663288116455}


 60%|██████    | 21/35 [09:51<01:44,  7.49s/it]

{'Model': 'LinearSVR', 'R-Squared': 0.048986939010936426, 'Adjusted R-Squared': 0.0488477705455298, 'RMSE': np.float64(1438.0019495511888), 'Time taken': 0.7563960552215576}


 63%|██████▎   | 22/35 [14:28<19:11, 88.60s/it]

{'Model': 'MLPRegressor', 'R-Squared': 0.2154100475051265, 'Adjusted R-Squared': 0.21529523290988994, 'RMSE': np.float64(1306.133610476064), 'Time taken': 277.7389326095581}


 66%|██████▌   | 23/35 [14:29<12:24, 62.08s/it]

{'Model': 'OrthogonalMatchingPursuit', 'R-Squared': 0.09931313019477006, 'Adjusted R-Squared': 0.09918132631675358, 'RMSE': np.float64(1399.436348435213), 'Time taken': 0.20658183097839355}


 69%|██████▊   | 24/35 [14:29<08:00, 43.66s/it]

{'Model': 'OrthogonalMatchingPursuitCV', 'R-Squared': 0.13466941764347917, 'Adjusted R-Squared': 0.1345427877009937, 'RMSE': np.float64(1371.6940679037316), 'Time taken': 0.7030088901519775}


 71%|███████▏  | 25/35 [14:30<05:06, 30.66s/it]

{'Model': 'PLSRegression', 'R-Squared': 0.13421973997526948, 'Adjusted R-Squared': 0.1340930442282714, 'RMSE': np.float64(1372.0504288651666), 'Time taken': 0.3369407653808594}


 74%|███████▍  | 26/35 [14:30<03:15, 21.67s/it]

{'Model': 'PassiveAggressiveRegressor', 'R-Squared': 0.045047912099487664, 'Adjusted R-Squared': 0.044908167208422056, 'RMSE': np.float64(1440.9769219341058), 'Time taken': 0.6899571418762207}


 77%|███████▋  | 27/35 [14:31<02:02, 15.27s/it]

{'Model': 'PoissonRegressor', 'R-Squared': 0.12497775463487204, 'Adjusted R-Squared': 0.12484970644281312, 'RMSE': np.float64(1379.3541349860882), 'Time taken': 0.3473217487335205}


 80%|████████  | 28/35 [14:32<01:18, 11.17s/it]

{'Model': 'RANSACRegressor', 'R-Squared': -0.16226854986490546, 'Adjusted R-Squared': -0.16243863283969318, 'RMSE': np.float64(1589.7157508632092), 'Time taken': 1.5951642990112305}


 86%|████████▌ | 30/35 [14:51<00:46,  9.37s/it]

{'Model': 'RandomForestRegressor', 'R-Squared': 0.22218890889965204, 'Adjusted R-Squared': 0.22207508630313366, 'RMSE': np.float64(1300.4788690322464), 'Time taken': 18.30207085609436}
{'Model': 'Ridge', 'R-Squared': 0.140869996169518, 'Adjusted R-Squared': 0.14074427360152986, 'RMSE': np.float64(1366.770754147182), 'Time taken': 0.18424582481384277}


 89%|████████▊ | 31/35 [14:51<00:26,  6.74s/it]

{'Model': 'RidgeCV', 'R-Squared': 0.14086998193293876, 'Adjusted R-Squared': 0.14074425936286727, 'RMSE': np.float64(1366.7707654715102), 'Time taken': 0.5815150737762451}


 91%|█████████▏| 32/35 [14:53<00:15,  5.18s/it]

{'Model': 'SGDRegressor', 'R-Squared': 0.13920196322206202, 'Adjusted R-Squared': 0.1390759965590187, 'RMSE': np.float64(1368.0969289543011), 'Time taken': 1.532562017440796}


 94%|█████████▍| 33/35 [16:05<00:50, 25.30s/it]

{'Model': 'TheilSenRegressor', 'R-Squared': 0.0048736846123538236, 'Adjusted R-Squared': 0.0047280607425258525, 'RMSE': np.float64(1470.9751621311054), 'Time taken': 72.26560640335083}


100%|██████████| 35/35 [16:06<00:00, 27.60s/it]

{'Model': 'TransformedTargetRegressor', 'R-Squared': 0.1408699977285066, 'Adjusted R-Squared': 0.1407442751607466, 'RMSE': np.float64(1366.770752907102), 'Time taken': 0.3777914047241211}
VotingRegressor model failed to execute
VotingRegressor.__init__() missing 1 required positional argument: 'estimators'





Unnamed: 0_level_0,Adjusted R-Squared,R-Squared,RMSE,Time Taken,Target,Year floor
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
RandomForestRegressor,0.22,0.22,1300.48,18.3,PumpSecondsOnSite_mean,2021
BaggingRegressor,0.22,0.22,1300.84,2.12,PumpSecondsOnSite_mean,2021
HistGradientBoostingRegressor,0.22,0.22,1300.92,1.33,PumpSecondsOnSite_mean,2021
DecisionTreeRegressor,0.22,0.22,1301.68,0.47,PumpSecondsOnSite_mean,2021
ExtraTreeRegressor,0.22,0.22,1301.74,0.47,PumpSecondsOnSite_mean,2021
MLPRegressor,0.22,0.22,1306.13,277.74,PumpSecondsOnSite_mean,2021
GradientBoostingRegressor,0.21,0.21,1311.6,10.76,PumpSecondsOnSite_mean,2021
AdaBoostRegressor,0.15,0.15,1357.45,2.46,PumpSecondsOnSite_mean,2021
LinearRegression,0.14,0.14,1366.77,0.37,PumpSecondsOnSite_mean,2021
TransformedTargetRegressor,0.14,0.14,1366.77,0.38,PumpSecondsOnSite_mean,2021


-----------------------------------------------------
year 2022
-----------------------------------------------------
(342634, 40)
cols_to_remove ['TurnoutTimeSeconds_min', 'TurnoutTimeSeconds_mean', 'TurnoutTimeSeconds_max', 'TravelTimeSeconds_min', 'TravelTimeSeconds_mean', 'TravelTimeSeconds_max', 'PumpSecondsOnSite_min', 'PumpSecondsOnSite_mean', 'PumpSecondsOnSite_max', 'NumPumpsAttending']
cols_to_keep ['CalYear', 'PropertyType_0', 'PropertyType_1', 'PropertyType_2', 'PropertyType_3', 'PropertyType_4', 'PropertyType_5', 'PropertyType_6', 'StopCode_0', 'StopCode_1', 'StopCode_2', 'StopCode_3', 'StopCode_4']


Unnamed: 0,CalYear,PropertyType_0,PropertyType_1,PropertyType_2,PropertyType_3,PropertyType_4,PropertyType_5,PropertyType_6,StopCode_0,StopCode_1,StopCode_2,StopCode_3,StopCode_4
1248893,14,0,0,0,0,0,1,0,0,0,0,1,1
1248894,14,0,1,1,1,1,0,0,0,0,0,1,0


(342634, 13)
cible PumpSecondsOnSite_mean
[('AdaBoostRegressor', <class 'sklearn.ensemble._weight_boosting.AdaBoostRegressor'>), ('BaggingRegressor', <class 'sklearn.ensemble._bagging.BaggingRegressor'>), ('BayesianRidge', <class 'sklearn.linear_model._bayes.BayesianRidge'>), ('DecisionTreeRegressor', <class 'sklearn.tree._classes.DecisionTreeRegressor'>), ('DummyRegressor', <class 'sklearn.dummy.DummyRegressor'>), ('ElasticNet', <class 'sklearn.linear_model._coordinate_descent.ElasticNet'>), ('ElasticNetCV', <class 'sklearn.linear_model._coordinate_descent.ElasticNetCV'>), ('ExtraTreeRegressor', <class 'sklearn.tree._classes.ExtraTreeRegressor'>), ('GradientBoostingRegressor', <class 'sklearn.ensemble._gb.GradientBoostingRegressor'>), ('HistGradientBoostingRegressor', <class 'sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingRegressor'>), ('HuberRegressor', <class 'sklearn.linear_model._huber.HuberRegressor'>), ('KNeighborsRegressor', <class 'sklearn.neigh

  3%|▎         | 1/35 [00:02<01:09,  2.05s/it]

{'Model': 'AdaBoostRegressor', 'R-Squared': 0.1588915035781029, 'Adjusted R-Squared': 0.15873190743644394, 'RMSE': np.float64(1348.2452316242898), 'Time taken': 2.0464797019958496}


  6%|▌         | 2/35 [00:03<01:01,  1.86s/it]

{'Model': 'BaggingRegressor', 'R-Squared': 0.22264091508883121, 'Adjusted R-Squared': 0.22249341508001763, 'RMSE': np.float64(1296.1453980015692), 'Time taken': 1.722642183303833}


  9%|▊         | 3/35 [00:04<00:38,  1.21s/it]

{'Model': 'BayesianRidge', 'R-Squared': 0.1422404450268938, 'Adjusted R-Squared': 0.14207768942993193, 'RMSE': np.float64(1361.5251413796543), 'Time taken': 0.44046664237976074}


 14%|█▍        | 5/35 [00:04<00:17,  1.67it/s]

{'Model': 'DecisionTreeRegressor', 'R-Squared': 0.22080400258700617, 'Adjusted R-Squared': 0.22065615403320815, 'RMSE': np.float64(1297.6759009964915), 'Time taken': 0.3435971736907959}
{'Model': 'DummyRegressor', 'R-Squared': -2.0692507056718767e-06, 'Adjusted R-Squared': -0.00019181465523110397, 'RMSE': np.float64(1470.087999186315), 'Time taken': 0.12210440635681152}


 17%|█▋        | 6/35 [00:04<00:13,  2.10it/s]

{'Model': 'ElasticNet', 'R-Squared': 0.12899729295562, 'Adjusted R-Squared': 0.1288320245366109, 'RMSE': np.float64(1371.9953371042852), 'Time taken': 0.23537421226501465}


 20%|██        | 7/35 [00:05<00:17,  1.56it/s]

{'Model': 'ElasticNetCV', 'R-Squared': 0.13024101059993687, 'Adjusted R-Squared': 0.13007597817014693, 'RMSE': np.float64(1371.0154409629672), 'Time taken': 0.9732601642608643}


 23%|██▎       | 8/35 [00:06<00:15,  1.72it/s]

{'Model': 'ExtraTreeRegressor', 'R-Squared': 0.22097007187820938, 'Adjusted R-Squared': 0.220822254835231, 'RMSE': np.float64(1297.537607421078), 'Time taken': 0.45911169052124023}


 26%|██▌       | 9/35 [00:15<01:24,  3.27s/it]

{'Model': 'GradientBoostingRegressor', 'R-Squared': 0.21534017767071478, 'Adjusted R-Squared': 0.21519129238339296, 'RMSE': np.float64(1302.2176901288938), 'Time taken': 9.175774574279785}


 29%|██▊       | 10/35 [00:16<01:06,  2.66s/it]

{'Model': 'HistGradientBoostingRegressor', 'R-Squared': 0.2255884284505254, 'Adjusted R-Squared': 0.2254414877176697, 'RMSE': np.float64(1293.6857662207317), 'Time taken': 1.2831025123596191}


 31%|███▏      | 11/35 [00:17<00:52,  2.18s/it]

{'Model': 'HuberRegressor', 'R-Squared': 0.0675328496987494, 'Adjusted R-Squared': 0.06735591870822333, 'RMSE': np.float64(1419.5792855674608), 'Time taken': 1.0842540264129639}


 37%|███▋      | 13/35 [06:20<28:35, 77.97s/it] 

{'Model': 'KNeighborsRegressor', 'R-Squared': 0.14664308594263187, 'Adjusted R-Squared': 0.14648116572482295, 'RMSE': np.float64(1358.0264815045869), 'Time taken': 362.52173590660095}
{'Model': 'Lars', 'R-Squared': 0.14224116613061488, 'Adjusted R-Squared': 0.14207841067047888, 'RMSE': np.float64(1361.5245690741367), 'Time taken': 0.1411454677581787}


 40%|████      | 14/35 [06:21<19:06, 54.61s/it]

{'Model': 'LarsCV', 'R-Squared': 0.1422411614212139, 'Adjusted R-Squared': 0.14207840596018428, 'RMSE': np.float64(1361.5245728117636), 'Time taken': 0.6325938701629639}


 43%|████▎     | 15/35 [06:21<12:45, 38.26s/it]

{'Model': 'Lasso', 'R-Squared': 0.14221867620680495, 'Adjusted R-Squared': 0.14205591647931803, 'RMSE': np.float64(1361.5424181372439), 'Time taken': 0.3723766803741455}


 49%|████▊     | 17/35 [06:22<05:41, 18.96s/it]

{'Model': 'LassoCV', 'R-Squared': 0.14223434831409165, 'Adjusted R-Squared': 0.1420715915603089, 'RMSE': np.float64(1361.529980039254), 'Time taken': 1.0264215469360352}
{'Model': 'LassoLars', 'R-Squared': 0.14221900322058012, 'Adjusted R-Squared': 0.14205624355514246, 'RMSE': np.float64(1361.5421586053817), 'Time taken': 0.14633488655090332}


 51%|█████▏    | 18/35 [06:23<03:48, 13.46s/it]

{'Model': 'LassoLarsCV', 'R-Squared': 0.1422411614212139, 'Adjusted R-Squared': 0.14207840596018428, 'RMSE': np.float64(1361.5245728117636), 'Time taken': 0.6512315273284912}


 54%|█████▍    | 19/35 [06:23<02:32,  9.52s/it]

{'Model': 'LassoLarsIC', 'R-Squared': 0.14224116613061488, 'Adjusted R-Squared': 0.14207841067047888, 'RMSE': np.float64(1361.5245690741367), 'Time taken': 0.32898426055908203}


 57%|█████▋    | 20/35 [06:23<01:41,  6.74s/it]

{'Model': 'LinearRegression', 'R-Squared': 0.142241166130611, 'Adjusted R-Squared': 0.142078410670475, 'RMSE': np.float64(1361.5245690741397), 'Time taken': 0.25058579444885254}


 60%|██████    | 21/35 [06:24<01:08,  4.89s/it]

{'Model': 'LinearSVR', 'R-Squared': 0.053057410026036145, 'Adjusted R-Squared': 0.05287773239303717, 'RMSE': np.float64(1430.5554886233797), 'Time taken': 0.5746488571166992}


 66%|██████▌   | 23/35 [09:47<09:01, 45.09s/it]

{'Model': 'MLPRegressor', 'R-Squared': 0.22119724620646897, 'Adjusted R-Squared': 0.2210494722686862, 'RMSE': np.float64(1297.3484049808048), 'Time taken': 203.02682733535767}
{'Model': 'OrthogonalMatchingPursuit', 'R-Squared': 0.10127284745058651, 'Adjusted R-Squared': 0.10110231845633522, 'RMSE': np.float64(1393.6599328249888), 'Time taken': 0.16283392906188965}


 69%|██████▊   | 24/35 [09:48<05:49, 31.73s/it]

{'Model': 'OrthogonalMatchingPursuitCV', 'R-Squared': 0.13629838128710914, 'Adjusted R-Squared': 0.13613449821319235, 'RMSE': np.float64(1366.2329328976036), 'Time taken': 0.5756783485412598}


 71%|███████▏  | 25/35 [09:48<03:42, 22.29s/it]

{'Model': 'PLSRegression', 'R-Squared': 0.135183714265794, 'Adjusted R-Squared': 0.13501961968936993, 'RMSE': np.float64(1367.1142579565935), 'Time taken': 0.24036836624145508}


 74%|███████▍  | 26/35 [09:49<02:22, 15.78s/it]

{'Model': 'PassiveAggressiveRegressor', 'R-Squared': 0.07582711386635654, 'Adjusted R-Squared': 0.07565175667108359, 'RMSE': np.float64(1413.2516278878745), 'Time taken': 0.6020081043243408}


 77%|███████▋  | 27/35 [09:49<01:29, 11.13s/it]

{'Model': 'PoissonRegressor', 'R-Squared': 0.124922364895661, 'Adjusted R-Squared': 0.12475632327937869, 'RMSE': np.float64(1375.2009864543095), 'Time taken': 0.2730288505554199}


 80%|████████  | 28/35 [09:50<00:57,  8.17s/it]

{'Model': 'RANSACRegressor', 'R-Squared': -0.068595107146187, 'Adjusted R-Squared': -0.06879786773750385, 'RMSE': np.float64(1519.6706422616091), 'Time taken': 1.2812309265136719}


 86%|████████▌ | 30/35 [10:03<00:33,  6.69s/it]

{'Model': 'RandomForestRegressor', 'R-Squared': 0.2238012610953195, 'Adjusted R-Squared': 0.2236539812563728, 'RMSE': np.float64(1295.1776735465023), 'Time taken': 12.574926376342773}
{'Model': 'Ridge', 'R-Squared': 0.14224115808666415, 'Adjusted R-Squared': 0.14207840262500182, 'RMSE': np.float64(1361.5245754582368), 'Time taken': 0.13673663139343262}


 89%|████████▊ | 31/35 [10:03<00:19,  4.81s/it]

{'Model': 'RidgeCV', 'R-Squared': 0.14224108544099812, 'Adjusted R-Squared': 0.1420783299655517, 'RMSE': np.float64(1361.5246331136364), 'Time taken': 0.4421522617340088}


 91%|█████████▏| 32/35 [10:05<00:11,  3.77s/it]

{'Model': 'SGDRegressor', 'R-Squared': 0.14071476549614714, 'Adjusted R-Squared': 0.14055172040910457, 'RMSE': np.float64(1362.7354620086974), 'Time taken': 1.3377699851989746}


 94%|█████████▍| 33/35 [10:58<00:37, 18.72s/it]

{'Model': 'TheilSenRegressor', 'R-Squared': 0.0021585319171857353, 'Adjusted R-Squared': 0.0019691964759545044, 'RMSE': np.float64(1468.4990067915514), 'Time taken': 53.60888314247131}


100%|██████████| 35/35 [10:59<00:00, 18.83s/it]

{'Model': 'TransformedTargetRegressor', 'R-Squared': 0.142241166130611, 'Adjusted R-Squared': 0.142078410670475, 'RMSE': np.float64(1361.5245690741397), 'Time taken': 0.2541477680206299}
VotingRegressor model failed to execute
VotingRegressor.__init__() missing 1 required positional argument: 'estimators'





Unnamed: 0_level_0,Adjusted R-Squared,R-Squared,RMSE,Time Taken,Target,Year floor
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
HistGradientBoostingRegressor,0.23,0.23,1293.69,1.28,PumpSecondsOnSite_mean,2022
RandomForestRegressor,0.22,0.22,1295.18,12.57,PumpSecondsOnSite_mean,2022
BaggingRegressor,0.22,0.22,1296.15,1.72,PumpSecondsOnSite_mean,2022
MLPRegressor,0.22,0.22,1297.35,203.03,PumpSecondsOnSite_mean,2022
ExtraTreeRegressor,0.22,0.22,1297.54,0.46,PumpSecondsOnSite_mean,2022
DecisionTreeRegressor,0.22,0.22,1297.68,0.34,PumpSecondsOnSite_mean,2022
GradientBoostingRegressor,0.22,0.22,1302.22,9.18,PumpSecondsOnSite_mean,2022
AdaBoostRegressor,0.16,0.16,1348.25,2.05,PumpSecondsOnSite_mean,2022
KNeighborsRegressor,0.15,0.15,1358.03,362.52,PumpSecondsOnSite_mean,2022
Lars,0.14,0.14,1361.52,0.14,PumpSecondsOnSite_mean,2022


-----------------------------------------------------
year 2023
-----------------------------------------------------
(225027, 40)
cols_to_remove ['TurnoutTimeSeconds_min', 'TurnoutTimeSeconds_mean', 'TurnoutTimeSeconds_max', 'TravelTimeSeconds_min', 'TravelTimeSeconds_mean', 'TravelTimeSeconds_max', 'PumpSecondsOnSite_min', 'PumpSecondsOnSite_mean', 'PumpSecondsOnSite_max', 'NumPumpsAttending']
cols_to_keep ['CalYear', 'PropertyType_0', 'PropertyType_1', 'PropertyType_2', 'PropertyType_3', 'PropertyType_4', 'PropertyType_5', 'PropertyType_6', 'StopCode_0', 'StopCode_1', 'StopCode_2', 'StopCode_3', 'StopCode_4']


Unnamed: 0,CalYear,PropertyType_0,PropertyType_1,PropertyType_2,PropertyType_3,PropertyType_4,PropertyType_5,PropertyType_6,StopCode_0,StopCode_1,StopCode_2,StopCode_3,StopCode_4
1366500,15,0,0,0,0,0,1,0,0,0,1,1,0
1366501,15,0,0,0,0,0,1,0,0,0,0,1,1


(225027, 13)
cible PumpSecondsOnSite_mean
[('AdaBoostRegressor', <class 'sklearn.ensemble._weight_boosting.AdaBoostRegressor'>), ('BaggingRegressor', <class 'sklearn.ensemble._bagging.BaggingRegressor'>), ('BayesianRidge', <class 'sklearn.linear_model._bayes.BayesianRidge'>), ('DecisionTreeRegressor', <class 'sklearn.tree._classes.DecisionTreeRegressor'>), ('DummyRegressor', <class 'sklearn.dummy.DummyRegressor'>), ('ElasticNet', <class 'sklearn.linear_model._coordinate_descent.ElasticNet'>), ('ElasticNetCV', <class 'sklearn.linear_model._coordinate_descent.ElasticNetCV'>), ('ExtraTreeRegressor', <class 'sklearn.tree._classes.ExtraTreeRegressor'>), ('GradientBoostingRegressor', <class 'sklearn.ensemble._gb.GradientBoostingRegressor'>), ('HistGradientBoostingRegressor', <class 'sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingRegressor'>), ('HuberRegressor', <class 'sklearn.linear_model._huber.HuberRegressor'>), ('KNeighborsRegressor', <class 'sklearn.neigh

  3%|▎         | 1/35 [00:01<00:37,  1.10s/it]

{'Model': 'AdaBoostRegressor', 'R-Squared': 0.16244591820542986, 'Adjusted R-Squared': 0.1622039151145842, 'RMSE': np.float64(1335.0680932537296), 'Time taken': 1.094792366027832}


  6%|▌         | 2/35 [00:01<00:31,  1.05it/s]

{'Model': 'BaggingRegressor', 'R-Squared': 0.21937264430666858, 'Adjusted R-Squared': 0.21914708963863838, 'RMSE': np.float64(1288.8989510267875), 'Time taken': 0.8569839000701904}


  9%|▊         | 3/35 [00:02<00:19,  1.60it/s]

{'Model': 'BayesianRidge', 'R-Squared': 0.13962287438412335, 'Adjusted R-Squared': 0.1393742767971522, 'RMSE': np.float64(1353.1358950300043), 'Time taken': 0.22623038291931152}


 11%|█▏        | 4/35 [00:02<00:14,  2.16it/s]

{'Model': 'DecisionTreeRegressor', 'R-Squared': 0.21899719509986537, 'Adjusted R-Squared': 0.2187715319494452, 'RMSE': np.float64(1289.2088670919925), 'Time taken': 0.21859407424926758}
{'Model': 'DummyRegressor', 'R-Squared': -6.59179161428547e-05, 'Adjusted R-Squared': -0.0003548772185280047, 'RMSE': np.float64(1458.85221970003), 'Time taken': 0.08655047416687012}


 17%|█▋        | 6/35 [00:02<00:08,  3.59it/s]

{'Model': 'ElasticNet', 'R-Squared': 0.12756502954288673, 'Adjusted R-Squared': 0.12731294795913972, 'RMSE': np.float64(1362.5847367398562), 'Time taken': 0.13991832733154297}


 20%|██        | 7/35 [00:03<00:11,  2.48it/s]

{'Model': 'ElasticNetCV', 'R-Squared': 0.12891906621722304, 'Adjusted R-Squared': 0.12866737586917942, 'RMSE': np.float64(1361.5269466527402), 'Time taken': 0.7303180694580078}


 23%|██▎       | 8/35 [00:03<00:09,  2.80it/s]

{'Model': 'ExtraTreeRegressor', 'R-Squared': 0.21817444841116596, 'Adjusted R-Squared': 0.21794854753610693, 'RMSE': np.float64(1289.8877462973492), 'Time taken': 0.2400660514831543}


 26%|██▌       | 9/35 [00:08<00:40,  1.57s/it]

{'Model': 'GradientBoostingRegressor', 'R-Squared': 0.21375716793567712, 'Adjusted R-Squared': 0.21352999073046652, 'RMSE': np.float64(1293.5265187360064), 'Time taken': 4.575584411621094}


 29%|██▊       | 10/35 [00:08<00:31,  1.25s/it]

{'Model': 'HistGradientBoostingRegressor', 'R-Squared': 0.21983354092845064, 'Adjusted R-Squared': 0.21960811943200842, 'RMSE': np.float64(1288.518400128671), 'Time taken': 0.47515296936035156}


 31%|███▏      | 11/35 [00:09<00:25,  1.07s/it]

{'Model': 'HuberRegressor', 'R-Squared': 0.06441393519137706, 'Adjusted R-Squared': 0.06414360671425867, 'RMSE': np.float64(1411.0384862865778), 'Time taken': 0.6309945583343506}


 34%|███▍      | 12/35 [02:37<17:05, 44.58s/it]

{'Model': 'KNeighborsRegressor', 'R-Squared': 0.07447543986510397, 'Adjusted R-Squared': 0.07420801856172221, 'RMSE': np.float64(1403.4306630241995), 'Time taken': 148.3654248714447}
{'Model': 'Lars', 'R-Squared': 0.13962181023631315, 'Adjusted R-Squared': 0.13937321234186684, 'RMSE': np.float64(1353.136731835233), 'Time taken': 0.09563708305358887}


 40%|████      | 14/35 [02:38<08:30, 24.30s/it]

{'Model': 'LarsCV', 'R-Squared': 0.13962181023631315, 'Adjusted R-Squared': 0.13937321234186684, 'RMSE': np.float64(1353.136731835233), 'Time taken': 0.45537757873535156}


 43%|████▎     | 15/35 [02:38<06:07, 18.38s/it]

{'Model': 'Lasso', 'R-Squared': 0.13957690065302808, 'Adjusted R-Squared': 0.1393282897823953, 'RMSE': np.float64(1353.1720465459632), 'Time taken': 0.2770702838897705}


 49%|████▊     | 17/35 [02:39<03:01, 10.09s/it]

{'Model': 'LassoCV', 'R-Squared': 0.13960520512059948, 'Adjusted R-Squared': 0.13935660242826675, 'RMSE': np.float64(1353.149789397515), 'Time taken': 0.7610955238342285}
{'Model': 'LassoLars', 'R-Squared': 0.13957697748202713, 'Adjusted R-Squared': 0.13932836663359327, 'RMSE': np.float64(1353.1719861321642), 'Time taken': 0.1267542839050293}


 51%|█████▏    | 18/35 [02:39<02:05,  7.41s/it]

{'Model': 'LassoLarsCV', 'R-Squared': 0.13962181023631315, 'Adjusted R-Squared': 0.13937321234186684, 'RMSE': np.float64(1353.136731835233), 'Time taken': 0.46846604347229004}


 57%|█████▋    | 20/35 [02:40<00:57,  3.86s/it]

{'Model': 'LassoLarsIC', 'R-Squared': 0.13962181023631315, 'Adjusted R-Squared': 0.13937321234186684, 'RMSE': np.float64(1353.136731835233), 'Time taken': 0.2393951416015625}
{'Model': 'LinearRegression', 'R-Squared': 0.13962181023631026, 'Adjusted R-Squared': 0.13937321234186395, 'RMSE': np.float64(1353.1367318352352), 'Time taken': 0.15985870361328125}


 60%|██████    | 21/35 [02:40<00:39,  2.83s/it]

{'Model': 'LinearSVR', 'R-Squared': 0.046841759888629375, 'Adjusted R-Squared': 0.04656635410267973, 'RMSE': np.float64(1424.2279042703515), 'Time taken': 0.34386491775512695}


 66%|██████▌   | 23/35 [04:50<05:40, 28.36s/it]

{'Model': 'MLPRegressor', 'R-Squared': 0.21936117857504533, 'Adjusted R-Squared': 0.21913562059410374, 'RMSE': np.float64(1288.9084165644026), 'Time taken': 129.8018980026245}
{'Model': 'OrthogonalMatchingPursuit', 'R-Squared': 0.09677069435130514, 'Adjusted R-Squared': 0.09650971504446315, 'RMSE': np.float64(1386.4237745472267), 'Time taken': 0.10764002799987793}


 71%|███████▏  | 25/35 [04:51<02:21, 14.11s/it]

{'Model': 'OrthogonalMatchingPursuitCV', 'R-Squared': 0.1333282495677428, 'Adjusted R-Squared': 0.13307783321026545, 'RMSE': np.float64(1358.0767288356776), 'Time taken': 0.3553617000579834}
{'Model': 'PLSRegression', 'R-Squared': 0.13451888605944495, 'Adjusted R-Squared': 0.1342688137247804, 'RMSE': np.float64(1357.1435430911847), 'Time taken': 0.16163086891174316}


 77%|███████▋  | 27/35 [04:51<00:56,  7.06s/it]

{'Model': 'PassiveAggressiveRegressor', 'R-Squared': 0.05136361324548899, 'Adjusted R-Squared': 0.051089514005006054, 'RMSE': np.float64(1420.8455666021098), 'Time taken': 0.34136080741882324}
{'Model': 'PoissonRegressor', 'R-Squared': 0.12130019573057738, 'Adjusted R-Squared': 0.12104630398414462, 'RMSE': np.float64(1367.4682509160718), 'Time taken': 0.1725935935974121}


 80%|████████  | 28/35 [04:52<00:36,  5.19s/it]

{'Model': 'RANSACRegressor', 'R-Squared': -0.06786602463945113, 'Adjusted R-Squared': -0.06817457412203276, 'RMSE': np.float64(1507.4932342762552), 'Time taken': 0.8259117603302002}


 83%|████████▎ | 29/35 [04:58<00:33,  5.59s/it]

{'Model': 'RandomForestRegressor', 'R-Squared': 0.21997560355623214, 'Adjusted R-Squared': 0.2197502231074019, 'RMSE': np.float64(1288.4010798814793), 'Time taken': 6.531152248382568}
{'Model': 'Ridge', 'R-Squared': 0.13962182337596152, 'Adjusted R-Squared': 0.13937322548531172, 'RMSE': np.float64(1353.136721502718), 'Time taken': 0.09457850456237793}


 89%|████████▊ | 31/35 [04:59<00:12,  3.11s/it]

{'Model': 'RidgeCV', 'R-Squared': 0.13962194075834833, 'Adjusted R-Squared': 0.13937334290161507, 'RMSE': np.float64(1353.1366291977133), 'Time taken': 0.2986185550689697}


 91%|█████████▏| 32/35 [04:59<00:07,  2.49s/it]

{'Model': 'SGDRegressor', 'R-Squared': 0.13908706751844058, 'Adjusted R-Squared': 0.13883831511529643, 'RMSE': np.float64(1353.5571676547406), 'Time taken': 0.6104817390441895}


100%|██████████| 35/35 [05:37<00:00,  9.65s/it]

{'Model': 'TheilSenRegressor', 'R-Squared': 0.003514966166231037, 'Adjusted R-Squared': 0.0032270415254095353, 'RMSE': np.float64(1456.2380593240218), 'Time taken': 37.67497777938843}
{'Model': 'TransformedTargetRegressor', 'R-Squared': 0.13962181023631026, 'Adjusted R-Squared': 0.13937321234186395, 'RMSE': np.float64(1353.1367318352352), 'Time taken': 0.18442201614379883}
VotingRegressor model failed to execute
VotingRegressor.__init__() missing 1 required positional argument: 'estimators'





Unnamed: 0_level_0,Adjusted R-Squared,R-Squared,RMSE,Time Taken,Target,Year floor
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
RandomForestRegressor,0.22,0.22,1288.4,6.53,PumpSecondsOnSite_mean,2023
HistGradientBoostingRegressor,0.22,0.22,1288.52,0.48,PumpSecondsOnSite_mean,2023
BaggingRegressor,0.22,0.22,1288.9,0.86,PumpSecondsOnSite_mean,2023
MLPRegressor,0.22,0.22,1288.91,129.8,PumpSecondsOnSite_mean,2023
DecisionTreeRegressor,0.22,0.22,1289.21,0.22,PumpSecondsOnSite_mean,2023
ExtraTreeRegressor,0.22,0.22,1289.89,0.24,PumpSecondsOnSite_mean,2023
GradientBoostingRegressor,0.21,0.21,1293.53,4.58,PumpSecondsOnSite_mean,2023
AdaBoostRegressor,0.16,0.16,1335.07,1.09,PumpSecondsOnSite_mean,2023
BayesianRidge,0.14,0.14,1353.14,0.23,PumpSecondsOnSite_mean,2023
RidgeCV,0.14,0.14,1353.14,0.3,PumpSecondsOnSite_mean,2023


In [None]:
# ------------------------------------- Lazy predict par code postal


cols_cible_filter = [
    # "TurnoutTimeSeconds_mean",
    "TravelTimeSeconds_mean",
    # "PumpSecondsOnSite_mean",
]

ignore_regressors = [
    "ExtraTreesRegressor",
    "NuSVR",
    "QuantileRegressor",
    "SVR",
    "CCA",
    "GammaRegressor",
    "GaussianProcessRegressor",
    "IsotonicRegression",
    "MultiOutputRegressor",
    "MultiTaskElasticNet",
    "MultiTaskElasticNetCV",
    "MultiTaskLasso",
    "MultiTaskLassoCV",
    "PLSCanonical",
    "RadiusNeighborsRegressor",
    "RegressorChain",
    "StackingRegressor",
    "KernelRidge",
    "ARDRegression",
    "TweedieRegressor",
]
REGRESSORS = [
    estimator
    for estimator in all_estimators()
    if (
        issubclass(estimator[1], RegressorMixin)
        and (not (estimator[0] in ignore_regressors))
    )
]

all_results = pd.DataFrame()

for index, name in enumerate(cols_cible_type):
    df = load_df_full(name)
    # X = df.drop(["PumpSecondsOnSite_min", "PumpSecondsOnSite_mean", "PumpSecondsOnSite_max", "TurnoutTimeSeconds_min", "TurnoutTimeSeconds_mean",
    #               "TurnoutTimeSeconds_max", "TravelTimeSeconds_min", "TravelTimeSeconds_mean", "TravelTimeSeconds_max", "NumPumpsAttending"], axis=1)
    # display(X.head(2))
    # print(X.shape)

    # pca = PCA()
    # data_2D = pca.fit_transform(X)
    # display(data_2D)
    # print("La part de variance expliquée est", round(pca.explained_variance_ratio_.sum(), 5))

    # plt.figure()
    # plt.xlim(0,40)
    # plt.xlabel('Nombre de composantes')
    # plt.ylabel('Part de variance expliquée')
    # plt.axhline(y = 0.95, color ='r', linestyle = '--')
    # plt.plot(pca.explained_variance_ratio_.cumsum());
    # plt.show()

    # variance_expliquee = pca.explained_variance_ratio_
    # charges_factorielles = pca.components_
    # # Créez un DataFrame pour afficher les corrélations de chaque variable avec chaque axe
    # df_charges_factorielles = pd.DataFrame(
    #     charges_factorielles,
    #     columns=X.columns,
    #     #index=["Axe 1", "Axe 2", "Axe 3", "Axe 4", "Axe 5", "Axe 6"],
    # )

    # # Affichez le DataFrame des charges factorielles
    # display(df_charges_factorielles.head(2))

    # charges_factorielles = pca.components_
    # # Appelez la fonction pour tracer le cercle de corrélation
    # draw_correlation_circle(df_charges_factorielles, pca)

    # Auto ML
    for year_floor in range(13, 16): 
        print("-----------------------------------------------------")
        print("year", 2024 - 16 + year_floor)
        print("-----------------------------------------------------")
        df_limited = df[df.CalYear >= year_floor]  # >= 2021

        for col_cible in cols_cible[index]:
            if not (col_cible in cols_cible_filter):
                continue
            print("cible", col_cible)

            # boucle par CP
            # Obtenez les valeurs uniques des codes postaux
            unique_postcodes = df_limited["Postcode_district"].unique()
            # Parcours de chaque code postal
            limiter = 0
            for postcode in unique_postcodes:
                # Limite pour test
                limiter += 1 
                if limiter > 4:
                    break
                # Filtre par postcode et supprime la colonne
                df_postcode = df_limited[df_limited["Postcode_district"] == postcode]
                df_postcode = df_postcode.drop(columns=["Postcode_district"])
                if len(df_postcode) < 5:
                    continue
                # Crée X et y
                X = Create_X(df_postcode, index)
                print("Postcode_district", postcode, X.shape)
                display(X.head())
                y = df_postcode[col_cible]
                X_train, X_test, y_train, y_test = train_test_split(
                    X, y, test_size=0.2, random_state=42
                )
                # Réchantillonne
                # X_train_samp, y_train_samp = custom_random_undersampler(X_train, y_train)
                X_train_samp, y_train_samp = X_train, y_train
                # LazyPredict
                myREGRESSORS = REGRESSORS
                print(myREGRESSORS)
                # myREGRESSORS = myREGRESSORS[4:6]
                reg = LazyRegressor(
                    verbose=2,
                    ignore_warnings=False,
                    custom_metric=None,
                    regressors=myREGRESSORS,
                )
                models, predictions = reg.fit(X_train_samp, X_test, y_train_samp, y_test)
                # models, predictions = reg.fit(X_train, X_test, y_train, y_test)
                # Ajoute le nom de la colonne
                models["Target"] = col_cible
                models["Postcode_district"] = postcode
                models["Year floor"] = 2024 - 16 + year_floor
                # Concaténe dans les résultats
                all_results = pd.concat([all_results, models], axis=0)
                display(models)
                # models.to_csv(
                #     f"../data/_autoML_regressor_{col_cible}.csv", sep=";", index=True
                # )
                # break
            # break
all_results = all_results.sort_values(by=["Target", "Postcode_district", "RMSE"], ascending=[False, True])
all_results.to_csv(f"../data/_autoML_regressor by postcode detailled.csv", sep=";", index=True)

# Calculer les statistiques par modèle
summary_df = (
    all_results.groupby(["Target", "Year floor", "Model"])
    .agg(
        RMSE_Mean=("RMSE", "mean"),
        RMSE_Min=("RMSE", "min"),
        RMSE_Max=("RMSE", "max"),
    )
    .reset_index()
)
summary_df = summary_df.sort_values(by=["Target", "RMSE_Mean"], ascending=False)

# all_results = all_results.sort_values(by=["Target", "F1 Score"], ascending=False)
summary_df.to_csv(f"../data/_autoML_regressor by postcode.csv", sep=";", index=True)

In [None]:
# Regressors retenus pour optimisation, prend le top 5 de chaque cible et les entraine tous
# pour voir si au final on peut limiter à certains communs

# Pour TurnoutTimeSeconds
# HistGradientBoostingRegressor
# MLPRegressor
# GradientBoostingRegressor
# DecisionTreeRegressor
# ExtraTreeRegressor

# Pour TravelTimeSeconds
# MLPRegressor
# HistGradientBoostingRegressor
# GradientBoostingRegressor
# LassoCV
# RidgeCV

# Pour PumpSecondsOnSite
# HistGradientBoostingRegressor
# MLPRegressor
# RandomForestRegressor
# BaggingRegressor
# DecisionTreeRegressor

# On va effectuer un GridSearchCV pour :
# HistGradientBoostingRegressor
# MLPRegressor
# GradientBoostingRegressor
# DecisionTreeRegressor
# ExtraTreeRegressor
# LassoCV
# RidgeCV
# RandomForestRegressor
# BaggingRegressor


import pandas as pd
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import (
    HistGradientBoostingRegressor,
    GradientBoostingRegressor,
    RandomForestRegressor,
    BaggingRegressor,
    AdaBoostRegressor,
)
from sklearn.tree import DecisionTreeRegressor, ExtraTreeRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.linear_model import (
    PoissonRegressor,
    Lars,
    LarsCV,
    LinearRegression,
    Ridge,
    RidgeCV,
    BayesianRidge,
    LassoLarsIC,
    LassoCV,
    LassoLarsCV,
    ElasticNetCV,
    OrthogonalMatchingPursuitCV,
    PassiveAggressiveRegressor,
    RANSACRegressor,
    SGDRegressor,
    HuberRegressor,
    TheilSenRegressor,
    ElasticNet,
    OrthogonalMatchingPursuit,
    LassoLars,
    Lasso,
)
from sklearn.svm import LinearSVR
from sklearn.cross_decomposition import PLSRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.compose import TransformedTargetRegressor

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split, KFold

from sklearn.metrics import (
    accuracy_score,
    root_mean_squared_error,
    mean_absolute_error,
    r2_score,
)


# Hyperparamètres larges pour chaque modèle
param_grid = {
    # "HistGradientBoostingRegressor": {
    #     "learning_rate": [0.01, 0.05, 0.1],
    #     "max_iter": [100, 200, 300],
    #     "max_depth": [None, 5, 10, 15, 20],
    # },
    "HistGradientBoostingRegressor": {
        "learning_rate": [0.05, 0.1],
        "max_iter": [100, 300],
        "max_depth": [None, 15, 20],
    },
    # "MLPRegressor": {
    #     "hidden_layer_sizes": [(50,), (100,), (50, 50), (100, 100)],
    #     "activation": ["relu", "tanh", "logistic"],
    #     # "solver": ["adam", "sgd"],
    #     "learning_rate_init": [0.001, 0.01, 0.1],
    # },
    "MLPRegressor": {
        "hidden_layer_sizes": [(50, 50), (100, 100)],
        "activation": ["tanh", "logistic"],
        "learning_rate_init": [0.001, 0.01],
    },
    # "GradientBoostingRegressor": {
    #     "learning_rate": [0.01, 0.1, 0.2],
    #     "n_estimators": [100, 200, 300],
    #     "max_depth": [3, 5, 7],
    # },
    "GradientBoostingRegressor": {
        "learning_rate": [0.01, 0.1, 0.2],
        "n_estimators": [100, 200],
        "max_depth": [7, 10],
    },
    "DecisionTreeRegressor": {
        "max_depth": [None, 10, 20, 30],
        "min_samples_split": [2, 5, 10],
        "min_samples_leaf": [1, 2, 4],
    },
    "ExtraTreeRegressor": {
        "max_depth": [None, 10, 20, 30],
        "min_samples_split": [2, 5, 10],
        "min_samples_leaf": [1, 2, 4],
    },
    "RandomForestRegressor": {
        "n_estimators": [100, 200, 300],
        "max_depth": [None, 10, 20, 30],
        # "min_samples_split": [2, 5, 10],
    },
    "BaggingRegressor": {
        "n_estimators": [10, 50, 100],
        "max_samples": [0.5, 0.75, 1.0],
        "max_features": [0.5, 1.0],
    },
    "AdaBoostRegressor": {
        "n_estimators": [50, 100, 200],
        "learning_rate": [0.1, 0.5, 1.0],
    },
    "PoissonRegressor": {
        "alpha": [0.01, 0.1, 1.0],
    },
    "Lars": {
        "n_nonzero_coefs": [1, 5, 10, 20],
    },
    "LinearRegression": {},
    "TransformedTargetRegressor": {},
    "Ridge": {
        "alpha": [-5, 0.1, 1.0, 10.0, 50.0, 100.0],
        "fit_intercept": [True, False],
    },
    "BayesianRidge": {
        "alpha_1": [1e-6, 1e-5, 1e-4],
        "alpha_2": [1e-6, 1e-5, 1e-4],
    },
    "LassoLarsIC": {"criterion": ["aic", "bic"]},
    "OrthogonalMatchingPursuitCV": {},
    "PLSRegression": {
        "n_components": [2, 5, 10],
    },
    "TheilSenRegressor": {
        "max_subpopulation": [10000, 100000],
    },
    "HuberRegressor": {
        "alpha": [0.01, 0.1, 1.0],
    },
    "LinearSVR": {
        "C": [0.1, 1.0, 10.0],
        "epsilon": [0.1, 0.2, 0.3],
    },
    "Lasso": {
        "alpha": [0.1, 1.0, 5.0, 10.0],
    },
    "ElasticNet": {
        "alpha": [0.1, 1.0, 10.0],
        "l1_ratio": [0.1, 0.5, 0.7, 0.9],
    },
    "OrthogonalMatchingPursuit": {},
    "KNeighborsRegressor": {
        "n_neighbors": [3, 5, 10, 15],
        "weights": ["uniform", "distance"],
    },
    "PassiveAggressiveRegressor": {
        "C": [0.1, 1.0, 10.0],
    },
    "RANSACRegressor": {"min_samples": [0.1, 0.5, 0.75], "max_trials": [100, 200, 300]},
    "SGDRegressor": {
        "alpha": [0.0001, 0.001, 0.01],
        "penalty": ["l2", "l1", "elasticnet"],
    },
}

# Liste des modèles
regressors = {
    "HistGradientBoostingRegressor": HistGradientBoostingRegressor(),
    "MLPRegressor": MLPRegressor(),
    "GradientBoostingRegressor": GradientBoostingRegressor(),
    "DecisionTreeRegressor": DecisionTreeRegressor(),
    "ExtraTreeRegressor": ExtraTreeRegressor(),
    "RandomForestRegressor": RandomForestRegressor(),
    "BaggingRegressor": BaggingRegressor(),
    "AdaBoostRegressor": AdaBoostRegressor(),
    # "PoissonRegressor": PoissonRegressor(),
    # "Lars": Lars(),
    # "LinearRegression": LinearRegression(),
    # "TransformedTargetRegressor": TransformedTargetRegressor(
    #     regressor=LinearRegression()
    # ),
    # "Ridge": Ridge(),
    # "BayesianRidge": BayesianRidge(),
    # "LassoLarsIC": LassoLarsIC(),
    # "OrthogonalMatchingPursuitCV": OrthogonalMatchingPursuitCV(),
    # "PLSRegression": PLSRegression(),
    # "TheilSenRegressor": TheilSenRegressor(),
    # "HuberRegressor": HuberRegressor(),
    # "LinearSVR": LinearSVR(),
    # "LassoLars": LassoLars(),
    # "Lasso": Lasso(),
    # "ElasticNet": ElasticNet(),
    # "OrthogonalMatchingPursuit": OrthogonalMatchingPursuit(),
    # "KNeighborsRegressor": KNeighborsRegressor(),
    # "PassiveAggressiveRegressor": PassiveAggressiveRegressor(),
    # "RANSACRegressor": RANSACRegressor(),
    # "SGDRegressor": SGDRegressor(),
}

cols_cible_filter = [
    "TurnoutTimeSeconds_mean",
    "TravelTimeSeconds_mean",
    "PumpSecondsOnSite_mean",
]

results = []
crossval = KFold(n_splits=5, random_state=42, shuffle=True)

for index, name in enumerate(cols_cible_type):
    df = load_df(name)

    for year_floor in range(14, 15):  # >= 2022
        print("-----------------------------------------------------")
        print("year", 2024 - 16 + year_floor)
        print("-----------------------------------------------------")
        df_limited = df[df.CalYear >= year_floor]  
        X = Create_X(df_limited, index)
        for index_cible, col_cible in enumerate(cols_cible[index]):
            if not (col_cible in cols_cible_filter):
                continue
            print(
                "--------------------------------------------------------------------------------"
            )
            print("///////////////////////////", "cible", col_cible)
            y = df_limited[col_cible]
            X_train, X_test, y_train, y_test = train_test_split(
                X, y, test_size=0.2, random_state=42
            )
            # Réchantillonne
            # X_train_smote, y_train_smote = under_sampling(X_train, y_train)
            X_train_samp = X_train
            y_train_samp = y_train

            # Exécuter GridSearchCV pour chaque modèle
            for model_name, model in regressors.items():
                # Controle run, car potentiellement long
                print("-------------------------------------------")
                print(f"GridSearchCV pour {model_name}")
                grid = GridSearchCV(
                    model,
                    param_grid[model_name],
                    scoring="neg_mean_squared_error",
                    cv=crossval,
                    n_jobs=2, #-1,
                    verbose=2,
                )
                # Fit et Predict
                grid.fit(X_train_samp, y_train_samp)
                y_pred = grid.best_estimator_.predict(X_test)
                y_train_pred = grid.best_estimator_.predict(X_train_samp)
                # Meilleur score et meilleurs hyperparamètres
                best_params = grid.best_params_
                best_score = grid.best_score_
                # affiche de suite les résultat
                print(f"Meilleurs paramètres {best_params}")
                print(f"Meilleur score {best_score}")
                # R²
                train_r2 = r2_score(y_train_samp, y_train_pred)
                print(f"R² train : {train_r2}")
                test_r2 = r2_score(y_test, y_pred)
                print(f"R² test : {test_r2}")
                # train et test RMSE
                score_train = grid.score(X_train_samp, y_train_samp)
                print(f"Score train : {score_train}")
                score_test = grid.score(X_test, y_test)
                print(f"Score test : {score_test}")
                # RMSE
                rmse_train = root_mean_squared_error(y_train_samp, y_train_pred)
                print(f"RMSE train : {rmse_train}")
                rmse_test = root_mean_squared_error(y_test, y_pred)
                print(f"RMSE test : {rmse_test}")
                # MAE
                mae_train = mean_absolute_error(y_train_samp, y_train_pred)
                print(f"MAE train : {mae_train}")
                mae_test = mean_absolute_error(y_test, y_pred)
                print(f"MAE test : {mae_test}")

                # Score de validation croisée
                #             scores = cross_validate(grid, X_train, y_train, cv=crossval, scoring=['accuracy', 'precision', 'recall', 'f1'])
                #             cv_test_accuracy = f" cv_test_accuracy : {scores['test_accuracy'].mean()} (+/-{scores['test_accuracy'].std()})"
                #             cv_test_precision
                #             cv_test_recall
                #             cv_test_f1

                # {'fit_time': array([1.7602632 , 1.3521595 , 1.53327417, 1.6628437 , 1.52980328]),
                #  'score_time': array([0.10070086, 0.10009599, 0.11566949, 0.09443331, 0.09455991]),
                #  'test_accuracy': array([0.87765306, 0.87768222, 0.8774344 , 0.87757839, 0.87759297]),
                #  'test_precision': array([0.36      , 0.43243243, 0.2745098 , 0.29032258, 0.38888889]),
                #  'test_recall': array([0.00107322, 0.00190794, 0.00166965, 0.00107335, 0.00250447]),
                #  'test_f1': array([0.00214005, 0.00379912, 0.00331911, 0.00213878, 0.00497689])}

                # print("[StackingClassifier]: \n Accuracy: %0.2f (+/- %0.2f)\n" % (scores['test_accuracy'].mean(), scores['test_accuracy'].std()),
                #       "F1 score: %0.2f (+/- %0.2f)" % (scores['test_f1'].mean(), scores['test_f1'].std()))
                # >>>
                # [StackingClassifier]:
                #  Accuracy: 0.76 (+/- 0.02)
                #  F1 score: 0.62 (+/- 0.04)

                # Ajouter au tableau des résultats
                results.append(
                    {
                        "Target": col_cible,
                        "Year floor": 2024 - 16 + year_floor,
                        "Model": model_name,
                        "Best Params": best_params,
                        "Best Score": best_score,
                        "R-Squared train": train_r2,
                        "R-Squared test": test_r2,
                        "Score Train": score_train,
                        "Score Test": score_test,
                        "RMSE train": rmse_train,
                        "RMSE test": rmse_test,
                        "MAE train": mae_train,
                        "MAE test": mae_test,
                    }
                )
                df_results = pd.DataFrame(results)
                df_results = df_results.sort_values(by=["Target", "MAE test"], ascending=[False, True])
                df_results.to_csv(f"../data/_GridSearch_regressor.csv", sep=";", index=False)

print(results)
df_results = pd.DataFrame(results)
df_results = df_results.sort_values(by=["Target", "MAE test"], ascending=[False, True])
df_results.to_csv(f"../data/_GridSearch_regressor.csv", sep=";", index=False)

# p = plt.figure(figsize=(18,12))
# p = sns.set_theme(style="white")
# p = models=models.sort_values(by='Model Accuracy with GridSearch',ascending=False)
# p = sns.barplot(y= 'Model', x= 'Model Accuracy with GridSearch', data= models)
# for container in p.containers:
#     p.bar_label(container,label_type = 'edge',padding = 8,size = 20,color = "black",rotation = 0,
#     bbox={"boxstyle": "round", "pad": 0.6, "facecolor": "pink", "edgecolor": "Blue", "alpha": 1})
# plt.title('COMPARE THE MODEL')
# plt.xlabel('MODEL')
# plt.ylabel('Model Accuracy')
# plt.show()

-----------------------------------------------------
year 2022
-----------------------------------------------------
(342634, 42)
cols_to_remove ['TurnoutTimeSeconds_min', 'TurnoutTimeSeconds_mean', 'TurnoutTimeSeconds_max', 'TravelTimeSeconds_min', 'TravelTimeSeconds_mean', 'TravelTimeSeconds_max', 'PumpSecondsOnSite_min', 'PumpSecondsOnSite_mean', 'PumpSecondsOnSite_max', 'NumPumpsAttending']
cols_to_keep ['CalYear', 'HourOfCall_0', 'HourOfCall_1', 'HourOfCall_2', 'HourOfCall_3', 'HourOfCall_4', 'Postcode_district_0', 'Postcode_district_1', 'Postcode_district_2', 'Postcode_district_3', 'Postcode_district_4', 'Month_0', 'Month_1', 'Month_2', 'Month_3', 'DayOfWeek_0', 'DayOfWeek_1', 'DayOfWeek_2']


Unnamed: 0,CalYear,HourOfCall_0,HourOfCall_1,HourOfCall_2,HourOfCall_3,HourOfCall_4,Postcode_district_0,Postcode_district_1,Postcode_district_2,Postcode_district_3,Postcode_district_4,Month_0,Month_1,Month_2,Month_3,DayOfWeek_0,DayOfWeek_1,DayOfWeek_2
1248893,14,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1
1248894,14,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,1


(342634, 18)
--------------------------------------------------------------------------------
/////////////////////////// cible TurnoutTimeSeconds_mean
-------------------------------------------
GridSearchCV pour HistGradientBoostingRegressor
Fitting 5 folds for each of 12 candidates, totalling 60 fits
Meilleurs paramètres {'learning_rate': 0.1, 'max_depth': 20, 'max_iter': 100}
Meilleur score -1166.7342407438052
R² train : 0.14646706102948992
R² test : 0.14815032430313346
Score train : -1161.4649969009824
Score test : -1182.4627398319167
RMSE train : 34.08027284076497
RMSE test : 34.386955954720925
MAE train : 21.87042208144383
MAE test : 21.941782470690125
-------------------------------------------
GridSearchCV pour MLPRegressor
Fitting 5 folds for each of 8 candidates, totalling 40 fits
Meilleurs paramètres {'activation': 'logistic', 'hidden_layer_sizes': (50, 50), 'learning_rate_init': 0.001}
Meilleur score -1169.6960919361677
R² train : 0.14280193966901067
R² test : 0.1450065205

Unnamed: 0,CalYear,HourOfCall_0,HourOfCall_1,HourOfCall_2,HourOfCall_3,HourOfCall_4,Postcode_district_0,Postcode_district_1,Postcode_district_2,Postcode_district_3,Postcode_district_4,Postcode_district_5,Postcode_district_6,Postcode_district_7,Postcode_district_8,Month_0,Month_1,Month_2,Month_3,DayOfWeek_0,DayOfWeek_1,DayOfWeek_2
1248893,14,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,1
1248894,14,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1,1


(342634, 22)
--------------------------------------------------------------------------------
/////////////////////////// cible TravelTimeSeconds_mean
-------------------------------------------
GridSearchCV pour HistGradientBoostingRegressor
Fitting 5 folds for each of 12 candidates, totalling 60 fits
Meilleurs paramètres {'learning_rate': 0.1, 'max_depth': 15, 'max_iter': 300}
Meilleur score -16454.018481412757
R² train : 0.12272014314634072
R² test : 0.11020176507988466
Score train : -16162.593937923179
Score test : -16350.480506384487
RMSE train : 127.13219080124112
RMSE test : 127.86899744028842
MAE train : 92.82680030437741
MAE test : 93.02890657947654
-------------------------------------------
GridSearchCV pour MLPRegressor
Fitting 5 folds for each of 8 candidates, totalling 40 fits


In [None]:

StackingRegressor

from sklearn.ensemble import StackingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

# Modèles de base
base_models = [
    ('linear', LinearRegression()),
    ('tree', DecisionTreeRegressor(max_depth=5)),
    ('forest', RandomForestRegressor(n_estimators=50))
]

# Modèle final
final_model = LinearRegression()

# Stacking Regressor
stacking = StackingRegressor(estimators=base_models, final_estimator=final_model, cv=5)

# Entraînement
stacking.fit(X_train, y_train)

# Prédictions
y_pred = stacking.predict(X_test)

Voting Regressor

from sklearn.ensemble import VotingRegressor
from sklearn.linear_model import Ridge
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import GradientBoostingRegressor

# Modèles
models = [
    ('ridge', Ridge(alpha=1.0)),
    ('tree', DecisionTreeRegressor(max_depth=5)),
    ('gboost', GradientBoostingRegressor(n_estimators=50))
]

# Voting Regressor
voting = VotingRegressor(estimators=models)

# Entraînement
voting.fit(X_train, y_train)

# Prédictions
y_pred = voting.predict(X_test)


XGBoostRegressor

In [None]:
lancer regressor et classifiers et faire voting dessus