In [1]:
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from sklearn.model_selection import train_test_split
from lazypredict.Supervised import LazyClassifier
from sklearn.utils import all_estimators 
from sklearn.base import ClassifierMixin

pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)

In [2]:
def load_df(col_cible_type):
    df = pd.read_csv(
        f"../data/_df_ready_{col_cible_type}.csv", sep=";", low_memory=False
    )
    # Contrôle
    # display(df.head(3))
    # display(df.info())
    return df

In [9]:
import copy

cols_cible_type = ["TurnoutTimeSeconds", "TravelTimeSeconds", "PumpSecondsOnSite"]
cols_Data = [
    ["CalYear", "HourOfCall", "Postcode_district", "Month", "DayOfWeek"],
    ["CalYear", "HourOfCall", "Postcode_district", "Month", "DayOfWeek"],
    ["CalYear", "PropertyType", "StopCode"],
]
# utilise -1 sinon crée bizarrement des Nan pour les valeurs à 0 aua lieu de mettre 0
cols_cible_bins = [
    {
        "bins": np.array([-1, 1, 2, 3, 5, 8, 10, 15, 20000]) * 60,
        "labels": [1, 2, 3, 5, 8, 10, 15, 30],
    },
    {
        "bins": np.array([-1, 1, 2, 3, 5, 8, 10, 15, 20000]) * 60,
        "labels": [1, 2, 3, 5, 8, 10, 15, 30],
    },
    {
        "bins": np.array([-1, 5, 10, 15, 30, 45, 60, 120, 180, 360, 1000000]) * 60,
        "labels": [5, 10, 15, 30, 45, 60, 120, 180, 360, 1000],
    },
]
cols_cible = [
    ["TurnoutTimeSeconds_min", "TurnoutTimeSeconds_mean", "TurnoutTimeSeconds_max"],
    ["TravelTimeSeconds_min", "TravelTimeSeconds_mean", "TravelTimeSeconds_max"],
    ["PumpSecondsOnSite_min", "PumpSecondsOnSite_mean", "PumpSecondsOnSite_max"],
    # NumPumpsAttending??
]
cols_cible_filter = [
    "TurnoutTimeSeconds_mean",
    "TravelTimeSeconds_mean",
    "PumpSecondsOnSite_mean",
]

# copie profonde, sinon la simple copie fait une copie des références des ss tableaux, et leur modif modifie l'original
cols_cible_minutes = copy.deepcopy(cols_cible)

ignore_classifiers = [
    "CalibratedClassifierCV",
    "CalibratedClassifierCV",
    "CategoricalNB",
    "ClassifierChain",
    "ComplementNB",
    "FixedThresholdClassifier",
    "GaussianProcessClassifier",
    "GradientBoostingClassifier",
    "HistGradientBoostingClassifier",
    "LabelPropagation",
    "LabelSpreading",
    "LinearSVC",
    "LogisticRegressionCV",
    "SVC",
]

CLASSIFIERS = [
    est
    for est in all_estimators()
    if (issubclass(est[1], ClassifierMixin) and (not (est[0] in ignore_classifiers)))
]

all_results = pd.DataFrame()


for index, name in enumerate(cols_cible_type):
    df = load_df(name)
    # Crée 3 nouvelles target pour chque min/mean/max, par tranche en minutes, pour la cible type actuelle
    for index_cible, col_cible in enumerate(cols_cible[index]):
        bins = cols_cible_bins[index]["bins"]
        print(bins)
        labels = cols_cible_bins[index]["labels"]
        print(labels)
        new_name = col_cible.replace("Seconds", "Minutes")
        print(new_name, col_cible)
        df[new_name] = pd.cut(x=df[col_cible], bins=bins, labels=labels)
        cols_cible_minutes[index][index_cible] = new_name
        print("cols_cible", cols_cible)
        print("cols_cible_minutes", cols_cible_minutes)
    display(df.head(10))

    # df = df[df.CalYear > 6]

    # pd.DataFrame(pd.cut(X['Age'], bins = [0, 12, 18, 30, 50, 65, np.max(df.Age)], labels=['Kid','Adolescent','Adult-','Adult','Adult+','Senior']))
    # df['age_cat'] = pd.cut(x = df['age'], bins = [19,24,28,32,36,40], labels = ['19-24', '25-28', '29-32', '33-36', '37-40'],
    #                    include_lowest = True)

    # X = df[["PumpSecondsOnSite_min", "PumpSecondsOnSite_mean", "PumpSecondsOnSite_max", "TurnoutTimeSeconds_min", "TurnoutTimeSeconds_mean",
    #               "TurnoutTimeSeconds_max", "TravelTimeSeconds_min", "TravelTimeSeconds_mean", "TravelTimeSeconds_max", "NumPumpsAttending"]]

    # plt.boxplot([df.TurnoutTimeSeconds_min, df.TurnoutTimeSeconds_mean, df.TurnoutTimeSeconds_max])
    # plt.show()
    # plt.boxplot([df.TravelTimeSeconds_min, df.TravelTimeSeconds_mean, df.TravelTimeSeconds_max])
    # plt.show()
    # plt.figure(figsize=(5, 10))
    # plt.boxplot([df.PumpSecondsOnSite_min, df.PumpSecondsOnSite_mean, df.PumpSecondsOnSite_max])
    # plt.show()

    # Auto ML
    df_limited = df[df.CalYear > 6]
    print(df_limited.shape)
    # Crée X sans les target
    cols_to_remove = (
        [item for sublist in cols_cible for item in sublist]
        + cols_cible_minutes[index]
        + ["NumPumpsAttending"]
    )
    print("cols_to_remove", cols_to_remove)
    X = df_limited.drop(cols_to_remove, axis=1)
    # Ne conserve que certaines colonnes explicatives
    # Crée une liste des colonnes à conserver
    cols_to_keep = [
        col
        for col in X.columns
        if any(substring in col for substring in cols_Data[index])
    ]
    print("cols_to_keep", cols_to_keep)
    # Conserver uniquement ces colonnes
    X = X[cols_to_keep]

    display(X.head(2))
    print(X.shape)

    for index_cible, col_cible in enumerate(cols_cible[index]):
        if not (col_cible in cols_cible_filter):
            continue
        minute_col = cols_cible_minutes[index][index_cible]
        print(col_cible, ">>", minute_col)
        y = df_limited[minute_col]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
        myCLASSIFIERS = CLASSIFIERS
        print(myCLASSIFIERS)
        # myCLASSIFIERS = myCLASSIFIERS[4:6]
        reg = LazyClassifier(
            verbose=2,
            ignore_warnings=False,
            custom_metric=None,
            classifiers=myCLASSIFIERS,
        )
        models, predictions = reg.fit(X_train, X_test, y_train, y_test)
        # Ajoute le nom de la colonne
        models["Target"] = minute_col
        # Concaténe dans les résultats
        all_results = pd.concat([all_results, models], axis=0)
        display(models)
        models.to_csv(f"../data/_autoML_classifier_{minute_col}.csv", sep=";", index=True)
        # break
    # break

all_results.to_csv(f"../data/_autoML_classifier.csv", sep=";", index=True)

# tester automl avant et après PCA et aussi gridsearch pour comparer
# save du PCA

[    -60      60     120     180     300     480     600     900 1200000]
[1, 2, 3, 5, 8, 10, 15, 30]
TurnoutTimeMinutes_min TurnoutTimeSeconds_min
cols_cible [['TurnoutTimeSeconds_min', 'TurnoutTimeSeconds_mean', 'TurnoutTimeSeconds_max'], ['TravelTimeSeconds_min', 'TravelTimeSeconds_mean', 'TravelTimeSeconds_max'], ['PumpSecondsOnSite_min', 'PumpSecondsOnSite_mean', 'PumpSecondsOnSite_max']]
cols_cible_minutes [['TurnoutTimeMinutes_min', 'TurnoutTimeSeconds_mean', 'TurnoutTimeSeconds_max'], ['TravelTimeSeconds_min', 'TravelTimeSeconds_mean', 'TravelTimeSeconds_max'], ['PumpSecondsOnSite_min', 'PumpSecondsOnSite_mean', 'PumpSecondsOnSite_max']]
[    -60      60     120     180     300     480     600     900 1200000]
[1, 2, 3, 5, 8, 10, 15, 30]
TurnoutTimeMinutes_mean TurnoutTimeSeconds_mean
cols_cible [['TurnoutTimeSeconds_min', 'TurnoutTimeSeconds_mean', 'TurnoutTimeSeconds_max'], ['TravelTimeSeconds_min', 'TravelTimeSeconds_mean', 'TravelTimeSeconds_max'], ['PumpSecondsOnSite_min',

Unnamed: 0,CalYear,HourOfCall_0,HourOfCall_1,HourOfCall_2,HourOfCall_3,HourOfCall_4,PropertyType_0,PropertyType_1,PropertyType_2,PropertyType_3,PropertyType_4,PropertyType_5,PropertyType_6,PropertyType_7,PropertyType_8,Postcode_district_0,Postcode_district_1,Postcode_district_2,Postcode_district_3,Postcode_district_4,NumPumpsAttending,StopCode_0,StopCode_1,StopCode_2,StopCode_3,StopCode_4,Month_0,Month_1,Month_2,Month_3,DayOfWeek_0,DayOfWeek_1,DayOfWeek_2,PumpSecondsOnSite_min,PumpSecondsOnSite_mean,PumpSecondsOnSite_max,TurnoutTimeSeconds_min,TurnoutTimeSeconds_mean,TurnoutTimeSeconds_max,TravelTimeSeconds_min,TravelTimeSeconds_mean,TravelTimeSeconds_max,TurnoutTimeMinutes_min,TurnoutTimeMinutes_mean,TurnoutTimeMinutes_max
0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,2.0,0,0,0,0,1,0,0,0,1,0,0,1,240.0,390.0,540.0,253.0,253.0,253.0,89.0,89.0,89.0,5,5,5
1,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1.0,0,0,0,1,0,0,0,0,1,0,0,1,420.0,420.0,420.0,151.0,151.0,151.0,157.0,157.0,157.0,3,3,3
2,1,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1.0,0,0,0,1,0,0,0,0,1,0,0,1,720.0,720.0,720.0,108.0,108.0,108.0,102.0,102.0,102.0,2,2,2
3,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,2.0,0,0,0,1,0,0,0,0,1,0,0,1,120.0,120.0,120.0,114.0,128.0,142.0,108.0,113.5,119.0,2,3,3
4,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,1,2.0,0,0,0,1,1,0,0,0,1,0,0,1,360.0,360.0,360.0,83.0,89.0,95.0,89.0,108.0,127.0,2,2,2
5,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1.0,0,0,0,0,1,0,0,0,1,0,0,1,420.0,420.0,420.0,119.0,119.0,119.0,403.0,403.0,403.0,2,2,2
6,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,1,2.0,0,0,0,1,0,0,0,0,1,0,0,1,1440.0,1440.0,1440.0,178.0,180.5,183.0,164.0,165.0,166.0,3,5,5
7,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0,1,1.0,0,0,0,1,0,0,0,0,1,0,0,1,420.0,420.0,420.0,121.0,121.0,121.0,134.0,134.0,134.0,3,3,3
8,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1.0,0,0,0,1,1,0,0,0,1,0,0,1,780.0,780.0,780.0,110.0,110.0,110.0,187.0,187.0,187.0,2,2,2
9,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1.0,0,0,1,0,0,0,0,0,1,0,0,1,600.0,600.0,600.0,129.0,129.0,129.0,567.0,567.0,567.0,3,3,3


(1016665, 45)
cols_to_remove ['TurnoutTimeSeconds_min', 'TurnoutTimeSeconds_mean', 'TurnoutTimeSeconds_max', 'TravelTimeSeconds_min', 'TravelTimeSeconds_mean', 'TravelTimeSeconds_max', 'PumpSecondsOnSite_min', 'PumpSecondsOnSite_mean', 'PumpSecondsOnSite_max', 'TurnoutTimeMinutes_min', 'TurnoutTimeMinutes_mean', 'TurnoutTimeMinutes_max', 'NumPumpsAttending']
cols_to_keep ['CalYear', 'HourOfCall_0', 'HourOfCall_1', 'HourOfCall_2', 'HourOfCall_3', 'HourOfCall_4', 'Postcode_district_0', 'Postcode_district_1', 'Postcode_district_2', 'Postcode_district_3', 'Postcode_district_4', 'Month_0', 'Month_1', 'Month_2', 'Month_3', 'DayOfWeek_0', 'DayOfWeek_1', 'DayOfWeek_2']


Unnamed: 0,CalYear,HourOfCall_0,HourOfCall_1,HourOfCall_2,HourOfCall_3,HourOfCall_4,Postcode_district_0,Postcode_district_1,Postcode_district_2,Postcode_district_3,Postcode_district_4,Month_0,Month_1,Month_2,Month_3,DayOfWeek_0,DayOfWeek_1,DayOfWeek_2
574862,7,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1
574863,7,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1


(1016665, 18)
TurnoutTimeSeconds_mean >> TurnoutTimeMinutes_mean
[('AdaBoostClassifier', <class 'sklearn.ensemble._weight_boosting.AdaBoostClassifier'>), ('BaggingClassifier', <class 'sklearn.ensemble._bagging.BaggingClassifier'>), ('BernoulliNB', <class 'sklearn.naive_bayes.BernoulliNB'>), ('DecisionTreeClassifier', <class 'sklearn.tree._classes.DecisionTreeClassifier'>), ('DummyClassifier', <class 'sklearn.dummy.DummyClassifier'>), ('ExtraTreeClassifier', <class 'sklearn.tree._classes.ExtraTreeClassifier'>), ('ExtraTreesClassifier', <class 'sklearn.ensemble._forest.ExtraTreesClassifier'>), ('GaussianNB', <class 'sklearn.naive_bayes.GaussianNB'>), ('KNeighborsClassifier', <class 'sklearn.neighbors._classification.KNeighborsClassifier'>), ('LinearDiscriminantAnalysis', <class 'sklearn.discriminant_analysis.LinearDiscriminantAnalysis'>), ('LogisticRegression', <class 'sklearn.linear_model._logistic.LogisticRegression'>), ('MLPClassifier', <class 'sklearn.neural_network._multilayer_perce

  3%|▎         | 1/31 [00:26<13:26, 26.89s/it]

ROC AUC couldn't be calculated for AdaBoostClassifier
multi_class must be in ('ovo', 'ovr')
{'Model': 'AdaBoostClassifier', 'Accuracy': 0.5668091259165998, 'Balanced Accuracy': np.float64(0.12257026673399839), 'ROC AUC': None, 'F1 Score': 0.4385301390884828, 'Time taken': 26.89079737663269}


  6%|▋         | 2/31 [00:48<11:27, 23.71s/it]

ROC AUC couldn't be calculated for BaggingClassifier
multi_class must be in ('ovo', 'ovr')
{'Model': 'BaggingClassifier', 'Accuracy': 0.5751156969109785, 'Balanced Accuracy': np.float64(0.1363400634772959), 'ROC AUC': None, 'F1 Score': 0.5205440772916469, 'Time taken': 21.488060474395752}


 10%|▉         | 3/31 [00:49<06:14, 13.37s/it]

ROC AUC couldn't be calculated for BernoulliNB
multi_class must be in ('ovo', 'ovr')
{'Model': 'BernoulliNB', 'Accuracy': 0.5845780075049304, 'Balanced Accuracy': np.float64(0.125), 'ROC AUC': None, 'F1 Score': 0.43132170866932984, 'Time taken': 1.0546727180480957}


 13%|█▎        | 4/31 [00:52<04:07,  9.17s/it]

ROC AUC couldn't be calculated for DecisionTreeClassifier
multi_class must be in ('ovo', 'ovr')
{'Model': 'DecisionTreeClassifier', 'Accuracy': 0.5755337303831646, 'Balanced Accuracy': np.float64(0.1362583914763559), 'ROC AUC': None, 'F1 Score': 0.522021274599274, 'Time taken': 2.7248427867889404}


 16%|█▌        | 5/31 [00:52<02:39,  6.13s/it]

ROC AUC couldn't be calculated for DummyClassifier
multi_class must be in ('ovo', 'ovr')
{'Model': 'DummyClassifier', 'Accuracy': 0.5845780075049304, 'Balanced Accuracy': np.float64(0.125), 'ROC AUC': None, 'F1 Score': 0.43132170866932984, 'Time taken': 0.7378199100494385}


 19%|█▉        | 6/31 [00:54<01:53,  4.55s/it]

ROC AUC couldn't be calculated for ExtraTreeClassifier
multi_class must be in ('ovo', 'ovr')
{'Model': 'ExtraTreeClassifier', 'Accuracy': 0.5755337303831646, 'Balanced Accuracy': np.float64(0.1362583914763559), 'ROC AUC': None, 'F1 Score': 0.522021274599274, 'Time taken': 1.4903266429901123}


 23%|██▎       | 7/31 [02:18<12:10, 30.43s/it]

ROC AUC couldn't be calculated for ExtraTreesClassifier
multi_class must be in ('ovo', 'ovr')
{'Model': 'ExtraTreesClassifier', 'Accuracy': 0.5755337303831646, 'Balanced Accuracy': np.float64(0.1362583914763559), 'ROC AUC': None, 'F1 Score': 0.522021274599274, 'Time taken': 83.70667767524719}


 26%|██▌       | 8/31 [02:19<08:04, 21.08s/it]

ROC AUC couldn't be calculated for GaussianNB
multi_class must be in ('ovo', 'ovr')
{'Model': 'GaussianNB', 'Accuracy': 0.010042639414162974, 'Balanced Accuracy': np.float64(0.125), 'ROC AUC': None, 'F1 Score': 0.00019970366094919823, 'Time taken': 1.0599181652069092}


 29%|██▉       | 9/31 [14:01<1:25:50, 234.12s/it]

ROC AUC couldn't be calculated for KNeighborsClassifier
multi_class must be in ('ovo', 'ovr')
{'Model': 'KNeighborsClassifier', 'Accuracy': 0.5432566282895546, 'Balanced Accuracy': np.float64(0.14084722762339338), 'ROC AUC': None, 'F1 Score': 0.5235709580712199, 'Time taken': 702.5690569877625}


 32%|███▏      | 10/31 [14:04<56:54, 162.58s/it] 

ROC AUC couldn't be calculated for LinearDiscriminantAnalysis
multi_class must be in ('ovo', 'ovr')
{'Model': 'LinearDiscriminantAnalysis', 'Accuracy': 0.5668091259165998, 'Balanced Accuracy': np.float64(0.12257026673399839), 'ROC AUC': None, 'F1 Score': 0.4385301390884828, 'Time taken': 2.3750998973846436}


 35%|███▌      | 11/31 [14:27<40:00, 120.02s/it]

ROC AUC couldn't be calculated for LogisticRegression
multi_class must be in ('ovo', 'ovr')
{'Model': 'LogisticRegression', 'Accuracy': 0.5668091259165998, 'Balanced Accuracy': np.float64(0.12257026673399839), 'ROC AUC': None, 'F1 Score': 0.4385301390884828, 'Time taken': 23.53259563446045}


 39%|███▊      | 12/31 [18:10<47:54, 151.27s/it]

ROC AUC couldn't be calculated for MLPClassifier
multi_class must be in ('ovo', 'ovr')
{'Model': 'MLPClassifier', 'Accuracy': 0.586618994457368, 'Balanced Accuracy': np.float64(0.12984919513022444), 'ROC AUC': None, 'F1 Score': 0.478567968413555, 'Time taken': 222.72608518600464}
MultiOutputClassifier model failed to execute
MultiOutputClassifier.__init__() missing 1 required positional argument: 'estimator'


 45%|████▌     | 14/31 [18:10<22:57, 81.06s/it] 

MultinomialNB model failed to execute
Negative values in data passed to MultinomialNB (input X).


 48%|████▊     | 15/31 [18:12<16:18, 61.13s/it]

ROC AUC couldn't be calculated for NearestCentroid
multi_class must be in ('ovo', 'ovr')
{'Model': 'NearestCentroid', 'Accuracy': 0.1725297910324443, 'Balanced Accuracy': np.float64(0.24037909410201566), 'ROC AUC': None, 'F1 Score': 0.23940000757315752, 'Time taken': 1.1417865753173828}


 52%|█████▏    | 16/31 [18:12<11:18, 45.27s/it]

NuSVC model failed to execute
specified nu is infeasible
OneVsOneClassifier model failed to execute
OneVsOneClassifier.__init__() missing 1 required positional argument: 'estimator'
OneVsRestClassifier model failed to execute
OneVsRestClassifier.__init__() missing 1 required positional argument: 'estimator'
OutputCodeClassifier model failed to execute
OutputCodeClassifier.__init__() missing 1 required positional argument: 'estimator'


 65%|██████▍   | 20/31 [18:20<03:32, 19.29s/it]

ROC AUC couldn't be calculated for PassiveAggressiveClassifier
multi_class must be in ('ovo', 'ovr')
{'Model': 'PassiveAggressiveClassifier', 'Accuracy': 0.4701302789020966, 'Balanced Accuracy': np.float64(0.14382828931321695), 'ROC AUC': None, 'F1 Score': 0.4731792109033544, 'Time taken': 7.9119181632995605}


 68%|██████▊   | 21/31 [18:28<02:52, 17.26s/it]

ROC AUC couldn't be calculated for Perceptron
multi_class must be in ('ovo', 'ovr')
{'Model': 'Perceptron', 'Accuracy': 0.46079091932937594, 'Balanced Accuracy': np.float64(0.13182679318308824), 'ROC AUC': None, 'F1 Score': 0.46314507204851024, 'Time taken': 7.778050661087036}


 71%|███████   | 22/31 [18:30<02:07, 14.13s/it]

ROC AUC couldn't be calculated for QuadraticDiscriminantAnalysis
multi_class must be in ('ovo', 'ovr')
{'Model': 'QuadraticDiscriminantAnalysis', 'Accuracy': 0.1818986588502604, 'Balanced Accuracy': np.float64(0.12077654677895396), 'ROC AUC': None, 'F1 Score': 0.25267343319443564, 'Time taken': 1.7496211528778076}


 74%|███████▍  | 23/31 [26:55<16:30, 123.83s/it]

ROC AUC couldn't be calculated for RadiusNeighborsClassifier
multi_class must be in ('ovo', 'ovr')
{'Model': 'RadiusNeighborsClassifier', 'Accuracy': 0.5828911194936385, 'Balanced Accuracy': np.float64(0.13189968479969394), 'ROC AUC': None, 'F1 Score': 0.49803019463057413, 'Time taken': 505.14306569099426}


 77%|███████▋  | 24/31 [28:07<12:59, 111.34s/it]

ROC AUC couldn't be calculated for RandomForestClassifier
multi_class must be in ('ovo', 'ovr')
{'Model': 'RandomForestClassifier', 'Accuracy': 0.5763747153683858, 'Balanced Accuracy': np.float64(0.13616119292470238), 'ROC AUC': None, 'F1 Score': 0.5196344528692846, 'Time taken': 72.22383236885071}


 81%|████████  | 25/31 [28:08<08:18, 83.01s/it] 

ROC AUC couldn't be calculated for RidgeClassifier
multi_class must be in ('ovo', 'ovr')
{'Model': 'RidgeClassifier', 'Accuracy': 0.5668091259165998, 'Balanced Accuracy': np.float64(0.12257026673399839), 'ROC AUC': None, 'F1 Score': 0.4385301390884828, 'Time taken': 1.024339199066162}


 84%|████████▍ | 26/31 [28:11<05:07, 61.47s/it]

ROC AUC couldn't be calculated for RidgeClassifierCV
multi_class must be in ('ovo', 'ovr')
{'Model': 'RidgeClassifierCV', 'Accuracy': 0.5668091259165998, 'Balanced Accuracy': np.float64(0.12257026673399839), 'ROC AUC': None, 'F1 Score': 0.4385301390884828, 'Time taken': 2.7836172580718994}


 87%|████████▋ | 27/31 [28:21<03:08, 47.16s/it]

ROC AUC couldn't be calculated for SGDClassifier
multi_class must be in ('ovo', 'ovr')
{'Model': 'SGDClassifier', 'Accuracy': 0.5845780075049304, 'Balanced Accuracy': np.float64(0.125), 'ROC AUC': None, 'F1 Score': 0.43132170866932984, 'Time taken': 9.847229242324829}


100%|██████████| 31/31 [28:21<00:00, 54.89s/it]

SelfTrainingClassifier model failed to execute
You must pass an estimator to SelfTrainingClassifier. Use `estimator`.
StackingClassifier model failed to execute
StackingClassifier.__init__() missing 1 required positional argument: 'estimators'
TunedThresholdClassifierCV model failed to execute
TunedThresholdClassifierCV.__init__() missing 1 required positional argument: 'estimator'
VotingClassifier model failed to execute
VotingClassifier.__init__() missing 1 required positional argument: 'estimators'





Unnamed: 0_level_0,Accuracy,Balanced Accuracy,ROC AUC,F1 Score,Time Taken,Target
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
NearestCentroid,0.17,0.24,,0.24,1.14,TurnoutTimeMinutes_mean
PassiveAggressiveClassifier,0.47,0.14,,0.47,7.91,TurnoutTimeMinutes_mean
KNeighborsClassifier,0.54,0.14,,0.52,702.57,TurnoutTimeMinutes_mean
BaggingClassifier,0.58,0.14,,0.52,21.49,TurnoutTimeMinutes_mean
DecisionTreeClassifier,0.58,0.14,,0.52,2.72,TurnoutTimeMinutes_mean
ExtraTreeClassifier,0.58,0.14,,0.52,1.49,TurnoutTimeMinutes_mean
ExtraTreesClassifier,0.58,0.14,,0.52,83.71,TurnoutTimeMinutes_mean
RandomForestClassifier,0.58,0.14,,0.52,72.22,TurnoutTimeMinutes_mean
RadiusNeighborsClassifier,0.58,0.13,,0.5,505.14,TurnoutTimeMinutes_mean
Perceptron,0.46,0.13,,0.46,7.78,TurnoutTimeMinutes_mean


[    -60      60     120     180     300     480     600     900 1200000]
[1, 2, 3, 5, 8, 10, 15, 30]
TravelTimeMinutes_min TravelTimeSeconds_min
cols_cible [['TurnoutTimeSeconds_min', 'TurnoutTimeSeconds_mean', 'TurnoutTimeSeconds_max'], ['TravelTimeSeconds_min', 'TravelTimeSeconds_mean', 'TravelTimeSeconds_max'], ['PumpSecondsOnSite_min', 'PumpSecondsOnSite_mean', 'PumpSecondsOnSite_max']]
cols_cible_minutes [['TurnoutTimeMinutes_min', 'TurnoutTimeMinutes_mean', 'TurnoutTimeMinutes_max'], ['TravelTimeMinutes_min', 'TravelTimeSeconds_mean', 'TravelTimeSeconds_max'], ['PumpSecondsOnSite_min', 'PumpSecondsOnSite_mean', 'PumpSecondsOnSite_max']]
[    -60      60     120     180     300     480     600     900 1200000]
[1, 2, 3, 5, 8, 10, 15, 30]
TravelTimeMinutes_mean TravelTimeSeconds_mean
cols_cible [['TurnoutTimeSeconds_min', 'TurnoutTimeSeconds_mean', 'TurnoutTimeSeconds_max'], ['TravelTimeSeconds_min', 'TravelTimeSeconds_mean', 'TravelTimeSeconds_max'], ['PumpSecondsOnSite_min', 'Pu

Unnamed: 0,CalYear,HourOfCall_0,HourOfCall_1,HourOfCall_2,HourOfCall_3,HourOfCall_4,PropertyType_0,PropertyType_1,PropertyType_2,PropertyType_3,PropertyType_4,PropertyType_5,PropertyType_6,PropertyType_7,Postcode_district_0,Postcode_district_1,Postcode_district_2,Postcode_district_3,Postcode_district_4,Postcode_district_5,Postcode_district_6,Postcode_district_7,Postcode_district_8,NumPumpsAttending,StopCode_0,StopCode_1,StopCode_2,StopCode_3,StopCode_4,Month_0,Month_1,Month_2,Month_3,DayOfWeek_0,DayOfWeek_1,DayOfWeek_2,PumpSecondsOnSite_min,PumpSecondsOnSite_mean,PumpSecondsOnSite_max,TurnoutTimeSeconds_min,TurnoutTimeSeconds_mean,TurnoutTimeSeconds_max,TravelTimeSeconds_min,TravelTimeSeconds_mean,TravelTimeSeconds_max,TravelTimeMinutes_min,TravelTimeMinutes_mean,TravelTimeMinutes_max
0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,2.0,0,0,0,0,1,0,0,0,1,0,0,1,240.0,390.0,540.0,253.0,253.0,253.0,89.0,89.0,89.0,2,2,2
1,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1.0,0,0,0,1,0,0,0,0,1,0,0,1,420.0,420.0,420.0,151.0,151.0,151.0,157.0,157.0,157.0,3,3,3
2,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,1,1.0,0,0,0,1,0,0,0,0,1,0,0,1,720.0,720.0,720.0,108.0,108.0,108.0,102.0,102.0,102.0,2,2,2
3,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,2.0,0,0,0,1,0,0,0,0,1,0,0,1,120.0,120.0,120.0,114.0,128.0,142.0,108.0,113.5,119.0,2,2,2
4,1,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,1,2.0,0,0,0,1,1,0,0,0,1,0,0,1,360.0,360.0,360.0,83.0,89.0,95.0,89.0,108.0,127.0,2,2,3
5,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1.0,0,0,0,0,1,0,0,0,1,0,0,1,420.0,420.0,420.0,119.0,119.0,119.0,403.0,403.0,403.0,8,8,8
6,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,1,2.0,0,0,0,1,0,0,0,0,1,0,0,1,1440.0,1440.0,1440.0,178.0,180.5,183.0,164.0,165.0,166.0,3,3,3
7,1,0,0,0,0,1,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,1.0,0,0,0,1,0,0,0,0,1,0,0,1,420.0,420.0,420.0,121.0,121.0,121.0,134.0,134.0,134.0,3,3,3
8,1,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,0,0,0,1,0,0,1,1.0,0,0,0,1,1,0,0,0,1,0,0,1,780.0,780.0,780.0,110.0,110.0,110.0,187.0,187.0,187.0,5,5,5
9,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1.0,0,0,1,0,0,0,0,0,1,0,0,1,600.0,600.0,600.0,129.0,129.0,129.0,567.0,567.0,567.0,10,10,10


(1016665, 48)
cols_to_remove ['TurnoutTimeSeconds_min', 'TurnoutTimeSeconds_mean', 'TurnoutTimeSeconds_max', 'TravelTimeSeconds_min', 'TravelTimeSeconds_mean', 'TravelTimeSeconds_max', 'PumpSecondsOnSite_min', 'PumpSecondsOnSite_mean', 'PumpSecondsOnSite_max', 'TravelTimeMinutes_min', 'TravelTimeMinutes_mean', 'TravelTimeMinutes_max', 'NumPumpsAttending']
cols_to_keep ['CalYear', 'HourOfCall_0', 'HourOfCall_1', 'HourOfCall_2', 'HourOfCall_3', 'HourOfCall_4', 'Postcode_district_0', 'Postcode_district_1', 'Postcode_district_2', 'Postcode_district_3', 'Postcode_district_4', 'Postcode_district_5', 'Postcode_district_6', 'Postcode_district_7', 'Postcode_district_8', 'Month_0', 'Month_1', 'Month_2', 'Month_3', 'DayOfWeek_0', 'DayOfWeek_1', 'DayOfWeek_2']


Unnamed: 0,CalYear,HourOfCall_0,HourOfCall_1,HourOfCall_2,HourOfCall_3,HourOfCall_4,Postcode_district_0,Postcode_district_1,Postcode_district_2,Postcode_district_3,Postcode_district_4,Postcode_district_5,Postcode_district_6,Postcode_district_7,Postcode_district_8,Month_0,Month_1,Month_2,Month_3,DayOfWeek_0,DayOfWeek_1,DayOfWeek_2
574862,7,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,1
574863,7,0,0,0,0,1,0,1,0,0,0,1,1,1,1,0,0,0,1,0,0,1


(1016665, 22)
TravelTimeSeconds_mean >> TravelTimeMinutes_mean
[('AdaBoostClassifier', <class 'sklearn.ensemble._weight_boosting.AdaBoostClassifier'>), ('BaggingClassifier', <class 'sklearn.ensemble._bagging.BaggingClassifier'>), ('BernoulliNB', <class 'sklearn.naive_bayes.BernoulliNB'>), ('DecisionTreeClassifier', <class 'sklearn.tree._classes.DecisionTreeClassifier'>), ('DummyClassifier', <class 'sklearn.dummy.DummyClassifier'>), ('ExtraTreeClassifier', <class 'sklearn.tree._classes.ExtraTreeClassifier'>), ('ExtraTreesClassifier', <class 'sklearn.ensemble._forest.ExtraTreesClassifier'>), ('GaussianNB', <class 'sklearn.naive_bayes.GaussianNB'>), ('KNeighborsClassifier', <class 'sklearn.neighbors._classification.KNeighborsClassifier'>), ('LinearDiscriminantAnalysis', <class 'sklearn.discriminant_analysis.LinearDiscriminantAnalysis'>), ('LogisticRegression', <class 'sklearn.linear_model._logistic.LogisticRegression'>), ('MLPClassifier', <class 'sklearn.neural_network._multilayer_percept

  3%|▎         | 1/31 [00:31<15:52, 31.76s/it]

ROC AUC couldn't be calculated for AdaBoostClassifier
multi_class must be in ('ovo', 'ovr')
{'Model': 'AdaBoostClassifier', 'Accuracy': 0.42180560951739265, 'Balanced Accuracy': np.float64(0.12500789107796872), 'ROC AUC': None, 'F1 Score': 0.2503571366210246, 'Time taken': 31.755970001220703}


  6%|▋         | 2/31 [01:11<17:39, 36.53s/it]

ROC AUC couldn't be calculated for BaggingClassifier
multi_class must be in ('ovo', 'ovr')
{'Model': 'BaggingClassifier', 'Accuracy': 0.34746450404017054, 'Balanced Accuracy': np.float64(0.1567787257350723), 'ROC AUC': None, 'F1 Score': 0.3344893232758532, 'Time taken': 39.87187838554382}


 10%|▉         | 3/31 [01:12<09:30, 20.39s/it]

ROC AUC couldn't be calculated for BernoulliNB
multi_class must be in ('ovo', 'ovr')
{'Model': 'BernoulliNB', 'Accuracy': 0.4218006914765434, 'Balanced Accuracy': np.float64(0.12501870865113274), 'ROC AUC': None, 'F1 Score': 0.25049063302153235, 'Time taken': 1.1737699508666992}


 13%|█▎        | 4/31 [01:17<06:25, 14.28s/it]

ROC AUC couldn't be calculated for DecisionTreeClassifier
multi_class must be in ('ovo', 'ovr')
{'Model': 'DecisionTreeClassifier', 'Accuracy': 0.3056759109441163, 'Balanced Accuracy': np.float64(0.15461626139645798), 'ROC AUC': None, 'F1 Score': 0.3095096575005681, 'Time taken': 4.914384365081787}


 16%|█▌        | 5/31 [01:18<04:03,  9.37s/it]

ROC AUC couldn't be calculated for DummyClassifier
multi_class must be in ('ovo', 'ovr')
{'Model': 'DummyClassifier', 'Accuracy': 0.42180560951739265, 'Balanced Accuracy': np.float64(0.125), 'ROC AUC': None, 'F1 Score': 0.25027327368715474, 'Time taken': 0.6648950576782227}


 19%|█▉        | 6/31 [01:20<02:55,  7.00s/it]

ROC AUC couldn't be calculated for ExtraTreeClassifier
multi_class must be in ('ovo', 'ovr')
{'Model': 'ExtraTreeClassifier', 'Accuracy': 0.3071562412397397, 'Balanced Accuracy': np.float64(0.1500657243424323), 'ROC AUC': None, 'F1 Score': 0.3081041136488067, 'Time taken': 2.400496006011963}


 23%|██▎       | 7/31 [04:15<24:41, 61.74s/it]

ROC AUC couldn't be calculated for ExtraTreesClassifier
multi_class must be in ('ovo', 'ovr')
{'Model': 'ExtraTreesClassifier', 'Accuracy': 0.34860548951719594, 'Balanced Accuracy': np.float64(0.1530521494229723), 'ROC AUC': None, 'F1 Score': 0.3324778960400186, 'Time taken': 174.42461037635803}


 26%|██▌       | 8/31 [04:16<16:16, 42.47s/it]

ROC AUC couldn't be calculated for GaussianNB
multi_class must be in ('ovo', 'ovr')
{'Model': 'GaussianNB', 'Accuracy': 0.4158547800898034, 'Balanced Accuracy': np.float64(0.1288494617541881), 'ROC AUC': None, 'F1 Score': 0.2496591200325738, 'Time taken': 1.2208411693572998}


 29%|██▉       | 9/31 [18:17<1:47:05, 292.07s/it]

ROC AUC couldn't be calculated for KNeighborsClassifier
multi_class must be in ('ovo', 'ovr')
{'Model': 'KNeighborsClassifier', 'Accuracy': 0.3520038557440258, 'Balanced Accuracy': np.float64(0.1462322595615131), 'ROC AUC': None, 'F1 Score': 0.3267026991714275, 'Time taken': 840.8956048488617}


 32%|███▏      | 10/31 [18:20<1:11:00, 202.87s/it]

ROC AUC couldn't be calculated for LinearDiscriminantAnalysis
multi_class must be in ('ovo', 'ovr')
{'Model': 'LinearDiscriminantAnalysis', 'Accuracy': 0.42161872396512123, 'Balanced Accuracy': np.float64(0.12509717823436503), 'ROC AUC': None, 'F1 Score': 0.25184380552317515, 'Time taken': 3.117302656173706}


 35%|███▌      | 11/31 [18:28<47:47, 143.38s/it]  

ROC AUC couldn't be calculated for LogisticRegression
multi_class must be in ('ovo', 'ovr')
{'Model': 'LogisticRegression', 'Accuracy': 0.421613805924272, 'Balanced Accuracy': np.float64(0.12511588688549777), 'ROC AUC': None, 'F1 Score': 0.2520631654088738, 'Time taken': 8.51157259941101}


 39%|███▊      | 12/31 [23:06<58:19, 184.17s/it]

ROC AUC couldn't be calculated for MLPClassifier
multi_class must be in ('ovo', 'ovr')
{'Model': 'MLPClassifier', 'Accuracy': 0.44076465699124096, 'Balanced Accuracy': np.float64(0.14870624493553147), 'ROC AUC': None, 'F1 Score': 0.3472166018349888, 'Time taken': 277.45316338539124}
MultiOutputClassifier model failed to execute
MultiOutputClassifier.__init__() missing 1 required positional argument: 'estimator'


 45%|████▌     | 14/31 [23:07<27:57, 98.68s/it] 

MultinomialNB model failed to execute
Negative values in data passed to MultinomialNB (input X).


 48%|████▊     | 15/31 [23:08<19:50, 74.40s/it]

ROC AUC couldn't be calculated for NearestCentroid
multi_class must be in ('ovo', 'ovr')
{'Model': 'NearestCentroid', 'Accuracy': 0.11111821494789335, 'Balanced Accuracy': np.float64(0.15452079990105233), 'ROC AUC': None, 'F1 Score': 0.14281196890314554, 'Time taken': 1.2952983379364014}


 52%|█████▏    | 16/31 [23:09<13:46, 55.09s/it]

NuSVC model failed to execute
specified nu is infeasible
OneVsOneClassifier model failed to execute
OneVsOneClassifier.__init__() missing 1 required positional argument: 'estimator'
OneVsRestClassifier model failed to execute
OneVsRestClassifier.__init__() missing 1 required positional argument: 'estimator'
OutputCodeClassifier model failed to execute
OutputCodeClassifier.__init__() missing 1 required positional argument: 'estimator'


 65%|██████▍   | 20/31 [23:18<04:16, 23.34s/it]

ROC AUC couldn't be calculated for PassiveAggressiveClassifier
multi_class must be in ('ovo', 'ovr')
{'Model': 'PassiveAggressiveClassifier', 'Accuracy': 0.3285939813016087, 'Balanced Accuracy': np.float64(0.12833682772083072), 'ROC AUC': None, 'F1 Score': 0.2947752294013006, 'Time taken': 8.75307297706604}


 68%|██████▊   | 21/31 [23:28<03:31, 21.10s/it]

ROC AUC couldn't be calculated for Perceptron
multi_class must be in ('ovo', 'ovr')
{'Model': 'Perceptron', 'Accuracy': 0.2954021236100387, 'Balanced Accuracy': np.float64(0.12853741990129797), 'ROC AUC': None, 'F1 Score': 0.28008813677755645, 'Time taken': 10.655189752578735}


 71%|███████   | 22/31 [23:30<02:35, 17.25s/it]

ROC AUC couldn't be calculated for QuadraticDiscriminantAnalysis
multi_class must be in ('ovo', 'ovr')
{'Model': 'QuadraticDiscriminantAnalysis', 'Accuracy': 0.41721707740504493, 'Balanced Accuracy': np.float64(0.1328646551561952), 'ROC AUC': None, 'F1 Score': 0.28155505265741554, 'Time taken': 1.986968994140625}


 74%|███████▍  | 23/31 [35:22<22:59, 172.45s/it]

RadiusNeighborsClassifier model failed to execute
No neighbors found for test samples array([     0,      1,      4, ..., 203327, 203330, 203331]), you can try using larger radius, giving a label for outliers, or considering removing them from your dataset.


 77%|███████▋  | 24/31 [37:49<19:23, 166.21s/it]

ROC AUC couldn't be calculated for RandomForestClassifier
multi_class must be in ('ovo', 'ovr')
{'Model': 'RandomForestClassifier', 'Accuracy': 0.3722956922880201, 'Balanced Accuracy': np.float64(0.15151895303643959), 'ROC AUC': None, 'F1 Score': 0.34094896164024663, 'Time taken': 146.67066764831543}


 81%|████████  | 25/31 [37:50<12:22, 123.81s/it]

ROC AUC couldn't be calculated for RidgeClassifier
multi_class must be in ('ovo', 'ovr')
{'Model': 'RidgeClassifier', 'Accuracy': 0.42173183890465393, 'Balanced Accuracy': np.float64(0.1250982581872693), 'ROC AUC': None, 'F1 Score': 0.2515487856274505, 'Time taken': 1.103358507156372}


 84%|████████▍ | 26/31 [37:53<07:37, 91.45s/it] 

ROC AUC couldn't be calculated for RidgeClassifierCV
multi_class must be in ('ovo', 'ovr')
{'Model': 'RidgeClassifierCV', 'Accuracy': 0.42173183890465393, 'Balanced Accuracy': np.float64(0.1250982581872693), 'ROC AUC': None, 'F1 Score': 0.2515487856274505, 'Time taken': 3.278994083404541}


 87%|████████▋ | 27/31 [38:05<04:37, 69.27s/it]

ROC AUC couldn't be calculated for SGDClassifier
multi_class must be in ('ovo', 'ovr')
{'Model': 'SGDClassifier', 'Accuracy': 0.27882832594807533, 'Balanced Accuracy': np.float64(0.1276499578320098), 'ROC AUC': None, 'F1 Score': 0.28097025264033976, 'Time taken': 11.429014921188354}


100%|██████████| 31/31 [38:05<00:00, 73.73s/it]

SelfTrainingClassifier model failed to execute
You must pass an estimator to SelfTrainingClassifier. Use `estimator`.
StackingClassifier model failed to execute
StackingClassifier.__init__() missing 1 required positional argument: 'estimators'
TunedThresholdClassifierCV model failed to execute
TunedThresholdClassifierCV.__init__() missing 1 required positional argument: 'estimator'
VotingClassifier model failed to execute
VotingClassifier.__init__() missing 1 required positional argument: 'estimators'





Unnamed: 0_level_0,Accuracy,Balanced Accuracy,ROC AUC,F1 Score,Time Taken,Target
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
BaggingClassifier,0.35,0.16,,0.33,39.87,TravelTimeMinutes_mean
DecisionTreeClassifier,0.31,0.15,,0.31,4.91,TravelTimeMinutes_mean
NearestCentroid,0.11,0.15,,0.14,1.3,TravelTimeMinutes_mean
ExtraTreesClassifier,0.35,0.15,,0.33,174.42,TravelTimeMinutes_mean
RandomForestClassifier,0.37,0.15,,0.34,146.67,TravelTimeMinutes_mean
ExtraTreeClassifier,0.31,0.15,,0.31,2.4,TravelTimeMinutes_mean
MLPClassifier,0.44,0.15,,0.35,277.45,TravelTimeMinutes_mean
KNeighborsClassifier,0.35,0.15,,0.33,840.9,TravelTimeMinutes_mean
QuadraticDiscriminantAnalysis,0.42,0.13,,0.28,1.99,TravelTimeMinutes_mean
GaussianNB,0.42,0.13,,0.25,1.22,TravelTimeMinutes_mean


[     -60      300      600      900     1800     2700     3600     7200
    10800    21600 60000000]
[5, 10, 15, 30, 45, 60, 120, 180, 360, 1000]
PumpMinutesOnSite_min PumpSecondsOnSite_min
cols_cible [['TurnoutTimeSeconds_min', 'TurnoutTimeSeconds_mean', 'TurnoutTimeSeconds_max'], ['TravelTimeSeconds_min', 'TravelTimeSeconds_mean', 'TravelTimeSeconds_max'], ['PumpSecondsOnSite_min', 'PumpSecondsOnSite_mean', 'PumpSecondsOnSite_max']]
cols_cible_minutes [['TurnoutTimeMinutes_min', 'TurnoutTimeMinutes_mean', 'TurnoutTimeMinutes_max'], ['TravelTimeMinutes_min', 'TravelTimeMinutes_mean', 'TravelTimeMinutes_max'], ['PumpMinutesOnSite_min', 'PumpSecondsOnSite_mean', 'PumpSecondsOnSite_max']]
[     -60      300      600      900     1800     2700     3600     7200
    10800    21600 60000000]
[5, 10, 15, 30, 45, 60, 120, 180, 360, 1000]
PumpMinutesOnSite_mean PumpSecondsOnSite_mean
cols_cible [['TurnoutTimeSeconds_min', 'TurnoutTimeSeconds_mean', 'TurnoutTimeSeconds_max'], ['TravelTimeSecon

Unnamed: 0,CalYear,HourOfCall_0,HourOfCall_1,HourOfCall_2,HourOfCall_3,HourOfCall_4,PropertyType_0,PropertyType_1,PropertyType_2,PropertyType_3,PropertyType_4,PropertyType_5,PropertyType_6,Postcode_district_0,Postcode_district_1,Postcode_district_2,Postcode_district_3,Postcode_district_4,NumPumpsAttending,StopCode_0,StopCode_1,StopCode_2,StopCode_3,StopCode_4,Month_0,Month_1,Month_2,Month_3,DayOfWeek_0,DayOfWeek_1,DayOfWeek_2,PumpSecondsOnSite_min,PumpSecondsOnSite_mean,PumpSecondsOnSite_max,TurnoutTimeSeconds_min,TurnoutTimeSeconds_mean,TurnoutTimeSeconds_max,TravelTimeSeconds_min,TravelTimeSeconds_mean,TravelTimeSeconds_max,PumpMinutesOnSite_min,PumpMinutesOnSite_mean,PumpMinutesOnSite_max
0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,2.0,0,0,0,0,1,0,0,0,1,0,0,1,240.0,390.0,540.0,253.0,253.0,253.0,89.0,89.0,89.0,5,10,10
1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,1.0,0,0,0,1,0,0,0,0,1,0,0,1,420.0,420.0,420.0,151.0,151.0,151.0,157.0,157.0,157.0,10,10,10
2,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,1.0,0,0,0,1,0,0,0,0,1,0,0,1,720.0,720.0,720.0,108.0,108.0,108.0,102.0,102.0,102.0,15,15,15
3,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,2.0,0,0,0,1,0,0,0,0,1,0,0,1,120.0,120.0,120.0,114.0,128.0,142.0,108.0,113.5,119.0,5,5,5
4,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,2.0,0,0,0,1,1,0,0,0,1,0,0,1,360.0,360.0,360.0,83.0,89.0,95.0,89.0,108.0,127.0,10,10,10
5,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,1.0,0,0,0,0,1,0,0,0,1,0,0,1,420.0,420.0,420.0,119.0,119.0,119.0,403.0,403.0,403.0,10,10,10
6,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,2.0,0,0,0,1,0,0,0,0,1,0,0,1,1440.0,1440.0,1440.0,178.0,180.5,183.0,164.0,165.0,166.0,30,30,30
7,1,0,0,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,1.0,0,0,0,1,0,0,0,0,1,0,0,1,420.0,420.0,420.0,121.0,121.0,121.0,134.0,134.0,134.0,10,10,10
8,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,1.0,0,0,0,1,1,0,0,0,1,0,0,1,780.0,780.0,780.0,110.0,110.0,110.0,187.0,187.0,187.0,15,15,15
9,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,1.0,0,0,1,0,0,0,0,0,1,0,0,1,600.0,600.0,600.0,129.0,129.0,129.0,567.0,567.0,567.0,10,10,10


(1016665, 43)
cols_to_remove ['TurnoutTimeSeconds_min', 'TurnoutTimeSeconds_mean', 'TurnoutTimeSeconds_max', 'TravelTimeSeconds_min', 'TravelTimeSeconds_mean', 'TravelTimeSeconds_max', 'PumpSecondsOnSite_min', 'PumpSecondsOnSite_mean', 'PumpSecondsOnSite_max', 'PumpMinutesOnSite_min', 'PumpMinutesOnSite_mean', 'PumpMinutesOnSite_max', 'NumPumpsAttending']
cols_to_keep ['CalYear', 'PropertyType_0', 'PropertyType_1', 'PropertyType_2', 'PropertyType_3', 'PropertyType_4', 'PropertyType_5', 'PropertyType_6', 'StopCode_0', 'StopCode_1', 'StopCode_2', 'StopCode_3', 'StopCode_4']


Unnamed: 0,CalYear,PropertyType_0,PropertyType_1,PropertyType_2,PropertyType_3,PropertyType_4,PropertyType_5,PropertyType_6,StopCode_0,StopCode_1,StopCode_2,StopCode_3,StopCode_4
574862,7,0,0,0,0,0,0,1,0,0,0,0,1
574863,7,0,0,0,0,0,1,0,0,0,0,1,0


(1016665, 13)
PumpSecondsOnSite_mean >> PumpMinutesOnSite_mean
[('AdaBoostClassifier', <class 'sklearn.ensemble._weight_boosting.AdaBoostClassifier'>), ('BaggingClassifier', <class 'sklearn.ensemble._bagging.BaggingClassifier'>), ('BernoulliNB', <class 'sklearn.naive_bayes.BernoulliNB'>), ('DecisionTreeClassifier', <class 'sklearn.tree._classes.DecisionTreeClassifier'>), ('DummyClassifier', <class 'sklearn.dummy.DummyClassifier'>), ('ExtraTreeClassifier', <class 'sklearn.tree._classes.ExtraTreeClassifier'>), ('ExtraTreesClassifier', <class 'sklearn.ensemble._forest.ExtraTreesClassifier'>), ('GaussianNB', <class 'sklearn.naive_bayes.GaussianNB'>), ('KNeighborsClassifier', <class 'sklearn.neighbors._classification.KNeighborsClassifier'>), ('LinearDiscriminantAnalysis', <class 'sklearn.discriminant_analysis.LinearDiscriminantAnalysis'>), ('LogisticRegression', <class 'sklearn.linear_model._logistic.LogisticRegression'>), ('MLPClassifier', <class 'sklearn.neural_network._multilayer_percept

  3%|▎         | 1/31 [00:14<07:27, 14.91s/it]

ROC AUC couldn't be calculated for AdaBoostClassifier
multi_class must be in ('ovo', 'ovr')
{'Model': 'AdaBoostClassifier', 'Accuracy': 0.31089887032601693, 'Balanced Accuracy': np.float64(0.13127454118370113), 'ROC AUC': None, 'F1 Score': 0.23717126976325392, 'Time taken': 14.907055616378784}


  6%|▋         | 2/31 [00:24<05:43, 11.83s/it]

ROC AUC couldn't be calculated for BaggingClassifier
multi_class must be in ('ovo', 'ovr')
{'Model': 'BaggingClassifier', 'Accuracy': 0.3288546374666188, 'Balanced Accuracy': np.float64(0.15175289079957505), 'ROC AUC': None, 'F1 Score': 0.25589412249815163, 'Time taken': 9.67459487915039}


 10%|▉         | 3/31 [00:25<03:10,  6.81s/it]

ROC AUC couldn't be calculated for BernoulliNB
multi_class must be in ('ovo', 'ovr')
{'Model': 'BernoulliNB', 'Accuracy': 0.30978739309408704, 'Balanced Accuracy': np.float64(0.13661991363729256), 'ROC AUC': None, 'F1 Score': 0.23541787645734644, 'Time taken': 0.8373727798461914}


 13%|█▎        | 4/31 [00:26<02:06,  4.68s/it]

ROC AUC couldn't be calculated for DecisionTreeClassifier
multi_class must be in ('ovo', 'ovr')
{'Model': 'DecisionTreeClassifier', 'Accuracy': 0.32904644105973946, 'Balanced Accuracy': np.float64(0.15133114123448782), 'ROC AUC': None, 'F1 Score': 0.255414336030987, 'Time taken': 1.4149971008300781}


 16%|█▌        | 5/31 [00:27<01:22,  3.17s/it]

ROC AUC couldn't be calculated for DummyClassifier
multi_class must be in ('ovo', 'ovr')
{'Model': 'DummyClassifier', 'Accuracy': 0.26701027378733405, 'Balanced Accuracy': np.float64(0.1), 'ROC AUC': None, 'F1 Score': 0.11253971302833142, 'Time taken': 0.4865133762359619}


 19%|█▉        | 6/31 [00:28<00:59,  2.37s/it]

ROC AUC couldn't be calculated for ExtraTreeClassifier
multi_class must be in ('ovo', 'ovr')
{'Model': 'ExtraTreeClassifier', 'Accuracy': 0.3290710312639857, 'Balanced Accuracy': np.float64(0.15141209496775504), 'ROC AUC': None, 'F1 Score': 0.25545655288802765, 'Time taken': 0.8258917331695557}


 23%|██▎       | 7/31 [01:05<05:29, 13.73s/it]

ROC AUC couldn't be calculated for ExtraTreesClassifier
multi_class must be in ('ovo', 'ovr')
{'Model': 'ExtraTreesClassifier', 'Accuracy': 0.32904152301889017, 'Balanced Accuracy': np.float64(0.15148863169580493), 'ROC AUC': None, 'F1 Score': 0.25543564895037585, 'Time taken': 37.12124705314636}


 26%|██▌       | 8/31 [01:06<03:41,  9.63s/it]

ROC AUC couldn't be calculated for GaussianNB
multi_class must be in ('ovo', 'ovr')
{'Model': 'GaussianNB', 'Accuracy': 0.01199510163131415, 'Balanced Accuracy': np.float64(0.11446730795054694), 'ROC AUC': None, 'F1 Score': 0.017480432790946115, 'Time taken': 0.8530476093292236}


 29%|██▉       | 9/31 [48:51<5:30:52, 902.40s/it]

ROC AUC couldn't be calculated for KNeighborsClassifier
multi_class must be in ('ovo', 'ovr')
{'Model': 'KNeighborsClassifier', 'Accuracy': 0.27416110518213965, 'Balanced Accuracy': np.float64(0.13495817526950776), 'ROC AUC': None, 'F1 Score': 0.2422399114216648, 'Time taken': 2865.421546936035}


 32%|███▏      | 10/31 [48:53<3:38:29, 624.27s/it]

ROC AUC couldn't be calculated for LinearDiscriminantAnalysis
multi_class must be in ('ovo', 'ovr')
{'Model': 'LinearDiscriminantAnalysis', 'Accuracy': 0.3115087073913236, 'Balanced Accuracy': np.float64(0.14264382384774033), 'ROC AUC': None, 'F1 Score': 0.2562283514376061, 'Time taken': 1.4896924495697021}


 35%|███▌      | 11/31 [49:09<2:26:04, 438.23s/it]

ROC AUC couldn't be calculated for LogisticRegression
multi_class must be in ('ovo', 'ovr')
{'Model': 'LogisticRegression', 'Accuracy': 0.3176316682486365, 'Balanced Accuracy': np.float64(0.1392669493637383), 'ROC AUC': None, 'F1 Score': 0.22857815825092287, 'Time taken': 16.392553329467773}


 39%|███▊      | 12/31 [51:00<1:47:17, 338.80s/it]

ROC AUC couldn't be calculated for MLPClassifier
multi_class must be in ('ovo', 'ovr')
{'Model': 'MLPClassifier', 'Accuracy': 0.32956775338975963, 'Balanced Accuracy': np.float64(0.15200220123971248), 'ROC AUC': None, 'F1 Score': 0.24950431948274604, 'Time taken': 111.38345646858215}
MultiOutputClassifier model failed to execute
MultiOutputClassifier.__init__() missing 1 required positional argument: 'estimator'


 45%|████▌     | 14/31 [51:01<51:22, 181.34s/it]  

MultinomialNB model failed to execute
Negative values in data passed to MultinomialNB (input X).


 48%|████▊     | 15/31 [51:01<36:20, 136.31s/it]

ROC AUC couldn't be calculated for NearestCentroid
multi_class must be in ('ovo', 'ovr')
{'Model': 'NearestCentroid', 'Accuracy': 0.22887578504227057, 'Balanced Accuracy': np.float64(0.1842639283442105), 'ROC AUC': None, 'F1 Score': 0.17393575978295217, 'Time taken': 0.7323276996612549}


 52%|█████▏    | 16/31 [51:02<25:09, 100.63s/it]

NuSVC model failed to execute
specified nu is infeasible
OneVsOneClassifier model failed to execute
OneVsOneClassifier.__init__() missing 1 required positional argument: 'estimator'
OneVsRestClassifier model failed to execute
OneVsRestClassifier.__init__() missing 1 required positional argument: 'estimator'
OutputCodeClassifier model failed to execute
OutputCodeClassifier.__init__() missing 1 required positional argument: 'estimator'


 65%|██████▍   | 20/31 [51:10<07:35, 41.39s/it] 

ROC AUC couldn't be calculated for PassiveAggressiveClassifier
multi_class must be in ('ovo', 'ovr')
{'Model': 'PassiveAggressiveClassifier', 'Accuracy': 0.2852758775014385, 'Balanced Accuracy': np.float64(0.12259247814465546), 'ROC AUC': None, 'F1 Score': 0.2550528971830087, 'Time taken': 7.685028314590454}


 68%|██████▊   | 21/31 [51:18<05:56, 35.64s/it]

ROC AUC couldn't be calculated for Perceptron
multi_class must be in ('ovo', 'ovr')
{'Model': 'Perceptron', 'Accuracy': 0.20098557538618916, 'Balanced Accuracy': np.float64(0.11540530148305113), 'ROC AUC': None, 'F1 Score': 0.18691969996480431, 'Time taken': 8.818305730819702}


 71%|███████   | 22/31 [51:19<04:18, 28.68s/it]

ROC AUC couldn't be calculated for QuadraticDiscriminantAnalysis
multi_class must be in ('ovo', 'ovr')
{'Model': 'QuadraticDiscriminantAnalysis', 'Accuracy': 0.2260085672271594, 'Balanced Accuracy': np.float64(0.19745772027060543), 'ROC AUC': None, 'F1 Score': 0.1711475913956689, 'Time taken': 1.0771543979644775}


 74%|███████▍  | 23/31 [1:55:07<1:56:58, 877.30s/it]

RadiusNeighborsClassifier model failed to execute



 77%|███████▋  | 24/31 [1:55:49<1:18:47, 675.37s/it]

ROC AUC couldn't be calculated for RandomForestClassifier
multi_class must be in ('ovo', 'ovr')
{'Model': 'RandomForestClassifier', 'Accuracy': 0.32908578538653344, 'Balanced Accuracy': np.float64(0.1516137043202958), 'ROC AUC': None, 'F1 Score': 0.2558053141150616, 'Time taken': 42.306461334228516}


 81%|████████  | 25/31 [1:55:50<50:12, 502.14s/it]  

ROC AUC couldn't be calculated for RidgeClassifier
multi_class must be in ('ovo', 'ovr')
{'Model': 'RidgeClassifier', 'Accuracy': 0.3159300261147969, 'Balanced Accuracy': np.float64(0.13687507840002597), 'ROC AUC': None, 'F1 Score': 0.2241552519407199, 'Time taken': 0.915116548538208}


 84%|████████▍ | 26/31 [1:55:52<30:39, 367.88s/it]

ROC AUC couldn't be calculated for RidgeClassifierCV
multi_class must be in ('ovo', 'ovr')
{'Model': 'RidgeClassifierCV', 'Accuracy': 0.3159300261147969, 'Balanced Accuracy': np.float64(0.13687507840002597), 'ROC AUC': None, 'F1 Score': 0.2241552519407199, 'Time taken': 1.9727146625518799}


 87%|████████▋ | 27/31 [1:56:03<17:55, 268.76s/it]

ROC AUC couldn't be calculated for SGDClassifier
multi_class must be in ('ovo', 'ovr')
{'Model': 'SGDClassifier', 'Accuracy': 0.26284961122887085, 'Balanced Accuracy': np.float64(0.1500007628003304), 'ROC AUC': None, 'F1 Score': 0.21619771426333512, 'Time taken': 10.280339002609253}


100%|██████████| 31/31 [1:56:03<00:00, 224.62s/it]

SelfTrainingClassifier model failed to execute
You must pass an estimator to SelfTrainingClassifier. Use `estimator`.
StackingClassifier model failed to execute
StackingClassifier.__init__() missing 1 required positional argument: 'estimators'
TunedThresholdClassifierCV model failed to execute
TunedThresholdClassifierCV.__init__() missing 1 required positional argument: 'estimator'
VotingClassifier model failed to execute
VotingClassifier.__init__() missing 1 required positional argument: 'estimators'





Unnamed: 0_level_0,Accuracy,Balanced Accuracy,ROC AUC,F1 Score,Time Taken,Target
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
QuadraticDiscriminantAnalysis,0.23,0.2,,0.17,1.08,PumpMinutesOnSite_mean
NearestCentroid,0.23,0.18,,0.17,0.73,PumpMinutesOnSite_mean
MLPClassifier,0.33,0.15,,0.25,111.38,PumpMinutesOnSite_mean
BaggingClassifier,0.33,0.15,,0.26,9.67,PumpMinutesOnSite_mean
RandomForestClassifier,0.33,0.15,,0.26,42.31,PumpMinutesOnSite_mean
ExtraTreesClassifier,0.33,0.15,,0.26,37.12,PumpMinutesOnSite_mean
ExtraTreeClassifier,0.33,0.15,,0.26,0.83,PumpMinutesOnSite_mean
DecisionTreeClassifier,0.33,0.15,,0.26,1.41,PumpMinutesOnSite_mean
SGDClassifier,0.26,0.15,,0.22,10.28,PumpMinutesOnSite_mean
LinearDiscriminantAnalysis,0.31,0.14,,0.26,1.49,PumpMinutesOnSite_mean
