In [7]:
# ------------------------- Creation des 3 datasets par cible -----------------------------

import pandas as pd
import category_encoders as ce
import joblib  # Pour sauvegarder l'encodeur

pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)

In [5]:
def load_df():
  """
    Charge et prétraite les données à partir d'un fichier CSV.

    Cette fonction effectue les opérations suivantes :
    - Charge les données à partir d'un fichier CSV.
    - Supprime la colonne 'IncidentNumber' qui n'est plus nécessaire.
    - Convertit certaines colonnes en texte pour les encoders.
    - Affiche les premières lignes et les informations du DataFrame.

    Returns:
        pd.DataFrame: Le DataFrame prétraité.
    """   
    def ToTextCatgories(df):
    """
    Convertit certaines colonnes du DataFrame en texte.

    Cette fonction effectue les opérations suivantes :
    - Convertit les colonnes 'CalYear', 'HourOfCall', 'NumPumpsAttending', 'Month' et 'DayOfWeek' en texte.

    Args:
        df (pd.DataFrame): Le DataFrame contenant les données à convertir.

    Returns:
        pd.DataFrame: Le DataFrame avec les colonnes converties en texte.
    """
        df.CalYear = df.CalYear.astype("str")
        df.HourOfCall = df.HourOfCall.astype("str")
        df.NumPumpsAttending = df.NumPumpsAttending.astype("str")
        df.Month = df.Month.astype("str")
        df.DayOfWeek = df.DayOfWeek.astype("str")
        return df
    
    df = pd.read_csv("../data/PreProcess.csv", sep=";", low_memory=False)
    # supprime IncidentNumber qui n'est plus nécessaire et ne doit pas être une donnée d'entrainement
    df = df.drop(["IncidentNumber"], axis=1)
    # Passe les colonnes catégorielles en texte pour les encoders
    df = ToTextCatgories(df)
    # Contrôle
    display(df.head(3))
    display(df.info())
    return df


def ReduceCategValues(df_new, df_keep, col_cible_type):
"""
    Réduit les valeurs catégorielles à partir des valeurs calculées par ANOVA et stockées sur disque 
    dans "PreProcessing Reduce categories"
    Concerne 3 colonnes qui avaient de nombreuses valeurs

    Cette fonction effectue les opérations suivantes :
    - Charge les informations des valeurs catégorielles à conserver ou remplacer.
    - Remplace les valeurs catégorielles non conservées par 'OTHER'.

    Args:
        df_new (pd.DataFrame): Le DataFrame contenant les nouvelles données.
        df_keep (pd.DataFrame): Le DataFrame contenant les informations des valeurs à conserver.
        col_cible_type (str): Le type de cible (ex : 'TravelTimeSeconds').

    Returns:
        pd.DataFrame: Le DataFrame avec les valeurs catégorielles réduites.
    """

    display(df_keep.head())
    replaces = {}
    # Pour chaque ligne des données à conserver/remplacer
    for keep_info in df_keep.itertuples():
        # Si ne conserve pas la valeur, l'ajoute dans le dictionnaire 
        # par colonne 
        if not (keep_info.Keep):
            # Si la colonne n'est pas dans le dictionnaire, l'ajoute et met un tableau vide
            if not (keep_info.Column in replaces):
                print(keep_info.Column)
                replaces[keep_info.Column] = []
            # Récupère le tableau pour la colonne
            val = replaces[keep_info.Column]
            # Ajoute la valeur catégorielle dans la liste des valeurs à ne pas conserver
            val.append(keep_info.Value)
            # print("val", val)
            # Remet le tableau modifié en valeur de la colonne
            replaces[keep_info.Column] = val
    # Contrôle
    print("replaces", replaces)
    # Pour chaque colonne, dans le DF, remplace les valeurs qui ne sont pas à conserver
    # par "OTHER"
    for col in replaces:
        # Pour la cible de Temps de déplacement, garde tout les Postcode_district
        # comme c'est la donnée de base du calcul
        if (col_cible_type == "TravelTimeSeconds") & (col == "Postcode_district"):
            print("Ignore", col_cible_type, col)
            continue
        print(col)
        print("Avant remplacement", len(df_new[col].unique()), df_new[col].unique())
        print("A remplacer", len(replaces[col]), replaces[col])
        # Effectue de le remplacement dans la colonne par OTHER pour les valeurs concernées
        df_new[col] = df_new[col].replace(to_replace=replaces[col], value="OTHER")
        print("Après remplacement", len(df_new[col].unique()), df_new[col].unique())
        print()
    return df_new

def create_df_by_target(df, reduceCategValues=True):
    """
   Crée des 3 datasets pour chacun des types de cible ["PumpSecondsOnSite", "TurnoutTimeSeconds", "TravelTimeSeconds"]
   et réduit les valeurs catégorielles si nécessaire.
   
    Cette fonction effectue les opérations suivantes :
    - Réduit les valeurs des catégories pour les colonnes spécifiées.
    - Encode les colonnes catégorielles en utilisant des encoders ordinals et binaires.
    - Sauvegarde les DataFrames résultants et les encoders.

    Args:
        df (pd.DataFrame): Le DataFrame contenant les données à traiter.
        reduceCategValues (bool): Indique si les valeurs catégorielles doivent être réduites.

    Returns:
        None
    """

    # Réduit les valeurs des catégories
    cols_cible_type = ["PumpSecondsOnSite", "TurnoutTimeSeconds", "TravelTimeSeconds"]
    for name in cols_cible_type:
        # charge le fichier de keep
        filename = f"keep {name}"
        df_keep = pd.read_csv(f"../data/{filename}.csv", sep=";")
        print(name)
        # crée une copie du dataset
        df_prepare = df.copy()
        # Réduit le nb de valeurs de catégorie pour les 3 colonnes de cols_categ, à partir des infos des fichiers Keep par cible
        if (reduceCategValues):
            df_prepare = ReduceCategValues(df_prepare, df_keep, name)
        display(df_prepare.head(10))
        # df_prepare.to_csv(f"../data/df {name}.csv", sep=";", index=False)

        # Procéde à l'encodage des colonnes catégorielles

        # print(name)
        # df = pd.read_csv(f"../data/df {name}.csv", sep=";", low_memory=False)
        # df = ToTextCatgories(df)
        # display(df.head(3))
        # Encodage des catégories

        # CalYear, encodage OrdinalEncoder
        encoder = ce.OrdinalEncoder(cols=["CalYear"] )
        print("OrdinalEncoder")
        df_prepare = encoder.fit_transform(df_prepare)
        # Sauvegarde l'encoder
        if (reduceCategValues):
            joblib.dump(encoder, f"../data/_ce_{name}_OrdinalEncoder.pkl")
        else:
            joblib.dump(encoder, f"../data/_ce_full_{name}_OrdinalEncoder.pkl")
        # Les autres colonnes, encodage binaire
        if (reduceCategValues):
            encoder = ce.BinaryEncoder(cols=["HourOfCall", "Month", "DayOfWeek", "StopCode", "PropertyType", "Postcode_district"])
        else:  # TODO faire en fonction de la cible et des données pour lesquelles ont filtre pour entrainer le modèle 
            encoder = ce.BinaryEncoder(cols=["HourOfCall", "Month", "DayOfWeek", "StopCode", "PropertyType"])
        print("BinaryEncoder")
        df_prepare = encoder.fit_transform(df_prepare)
        # Sauvegarde l'encoder
        if (reduceCategValues):
            joblib.dump(encoder, f"../data/_ce_{name}_BinaryEncoder.pkl")
        else: 
            joblib.dump(encoder, f"../data/_ce_full_{name}_BinaryEncoder.pkl")
        display(df_prepare.head(10))
        # Sauvegarde du DF pour la cible
        if (reduceCategValues):
            df_prepare.to_csv(f"../data/_df_ready_{name}.csv", sep=";", index=False)
        else:
            df_prepare.to_csv(f"../data/_df_ready_full_{name}.csv", sep=";", index=False)

In [6]:
df = load_df()
create_df_by_target(df)
create_df_by_target(df, False)

Unnamed: 0,CalYear,HourOfCall,PropertyType,Postcode_district,NumPumpsAttending,StopCode,Month,DayOfWeek,PumpSecondsOnSite_min,PumpSecondsOnSite_mean,PumpSecondsOnSite_max,TurnoutTimeSeconds_min,TurnoutTimeSeconds_mean,TurnoutTimeSeconds_max,TravelTimeSeconds_min,TravelTimeSeconds_mean,TravelTimeSeconds_max
0,2009,0,CAR,SW11,2.0,SST-RTC,1,4,240.0,390.0,540.0,253.0,253.0,253.0,89.0,89.0,89.0
1,2009,0,ROAD SURFACE/PAVEMENT,N9,1.0,SECONDARY FIRE,1,4,420.0,420.0,420.0,151.0,151.0,151.0,157.0,157.0,157.0
2,2009,0,DOMESTIC GARDEN (VEGETATION NOT EQUIPMENT),UB10,1.0,SECONDARY FIRE,1,4,720.0,720.0,720.0,108.0,108.0,108.0,102.0,102.0,102.0


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1591527 entries, 0 to 1591526
Data columns (total 17 columns):
 #   Column                   Non-Null Count    Dtype  
---  ------                   --------------    -----  
 0   CalYear                  1591527 non-null  object 
 1   HourOfCall               1591527 non-null  object 
 2   PropertyType             1591527 non-null  object 
 3   Postcode_district        1591527 non-null  object 
 4   NumPumpsAttending        1591527 non-null  object 
 5   StopCode                 1591527 non-null  object 
 6   Month                    1591527 non-null  object 
 7   DayOfWeek                1591527 non-null  object 
 8   PumpSecondsOnSite_min    1591527 non-null  float64
 9   PumpSecondsOnSite_mean   1591527 non-null  float64
 10  PumpSecondsOnSite_max    1591527 non-null  float64
 11  TurnoutTimeSeconds_min   1591527 non-null  float64
 12  TurnoutTimeSeconds_mean  1591527 non-null  float64
 13  TurnoutTimeSeconds_max   1591527 non-null 

None

PumpSecondsOnSite


Unnamed: 0,Keep,Column,Value
0,False,Postcode_district,BR1
1,False,Postcode_district,BR2
2,False,Postcode_district,BR3
3,False,Postcode_district,BR4
4,False,Postcode_district,BR5


Postcode_district
PropertyType
replaces {'Postcode_district': ['BR1', 'BR2', 'BR3', 'BR4', 'BR5', 'BR6', 'BR7', 'CM13', 'CM14', 'CR0', 'CR2', 'CR3', 'CR4', 'CR5', 'CR7', 'CR8', 'CR9', 'DA14', 'DA15', 'DA16', 'DA17', 'DA18', 'DA1', 'DA5', 'DA6', 'DA7', 'DA8', 'E10', 'E11', 'E12', 'E13', 'E14', 'E15', 'E16', 'E17', 'E18', 'E1W', 'E1', 'E20', 'E22', 'E2', 'E3', 'E4', 'E5', 'E6', 'E7', 'E8', 'E9', 'EC1A', 'EC1M', 'EC1N', 'EC1R', 'EC1V', 'EC1Y', 'EC2A', 'EC2M', 'EC2N', 'EC2P', 'EC2R', 'EC2V', 'EC2Y', 'EC3A', 'EC3M', 'EC3N', 'EC3R', 'EC3V', 'EC4A', 'EC4M', 'EC4N', 'EC4R', 'EC4V', 'EC4Y', 'EN1', 'EN2', 'EN3', 'EN4', 'EN5', 'EN6', 'EN7', 'EN8', 'HA0', 'HA1', 'HA2', 'HA3', 'HA4', 'HA5', 'HA6', 'HA7', 'HA8', 'HA9', 'IG11', 'IG1', 'IG2', 'IG3', 'IG4', 'IG5', 'IG6', 'IG7', 'IG8', 'IG9', 'KT17', 'KT18', 'KT19', 'KT1', 'KT22', 'KT2', 'KT3', 'KT4', 'KT5', 'KT6', 'KT8', 'KT9', 'N10', 'N11', 'N12', 'N13', 'N14', 'N15', 'N16', 'N17', 'N18', 'N19', 'N1C', 'N1', 'N20', 'N21', 'N22', 'N2', 'N3', 'N4', 'N5'

Unnamed: 0,CalYear,HourOfCall,PropertyType,Postcode_district,NumPumpsAttending,StopCode,Month,DayOfWeek,PumpSecondsOnSite_min,PumpSecondsOnSite_mean,PumpSecondsOnSite_max,TurnoutTimeSeconds_min,TurnoutTimeSeconds_mean,TurnoutTimeSeconds_max,TravelTimeSeconds_min,TravelTimeSeconds_mean,TravelTimeSeconds_max
0,2009,0,CAR,OTHER,2.0,SST-RTC,1,4,240.0,390.0,540.0,253.0,253.0,253.0,89.0,89.0,89.0
1,2009,0,OTHER,OTHER,1.0,SECONDARY FIRE,1,4,420.0,420.0,420.0,151.0,151.0,151.0,157.0,157.0,157.0
2,2009,0,OTHER,OTHER,1.0,SECONDARY FIRE,1,4,720.0,720.0,720.0,108.0,108.0,108.0,102.0,102.0,102.0
3,2009,0,OTHER,OTHER,2.0,SECONDARY FIRE,1,4,120.0,120.0,120.0,114.0,128.0,142.0,108.0,113.5,119.0
4,2009,0,OTHER,OTHER,2.0,ALARM,1,4,360.0,360.0,360.0,83.0,89.0,95.0,89.0,108.0,127.0
5,2009,0,CAR,OTHER,1.0,SST-RTC,1,4,420.0,420.0,420.0,119.0,119.0,119.0,403.0,403.0,403.0
6,2009,0,OTHER,OTHER,2.0,SECONDARY FIRE,1,4,1440.0,1440.0,1440.0,178.0,180.5,183.0,164.0,165.0,166.0
7,2009,0,SMALL REFUSE/RUBBISH CONTAINER,OTHER,1.0,SECONDARY FIRE,1,4,420.0,420.0,420.0,121.0,121.0,121.0,134.0,134.0,134.0
8,2009,0,OTHER,OTHER,1.0,ALARM,1,4,780.0,780.0,780.0,110.0,110.0,110.0,187.0,187.0,187.0
9,2009,0,OTHER,OTHER,1.0,SST-LIFT RELEASE,1,4,600.0,600.0,600.0,129.0,129.0,129.0,567.0,567.0,567.0


OrdinalEncoder




BinaryEncoder




Unnamed: 0,CalYear,HourOfCall_0,HourOfCall_1,HourOfCall_2,HourOfCall_3,HourOfCall_4,PropertyType_0,PropertyType_1,PropertyType_2,PropertyType_3,PropertyType_4,PropertyType_5,PropertyType_6,Postcode_district_0,Postcode_district_1,Postcode_district_2,Postcode_district_3,Postcode_district_4,NumPumpsAttending,StopCode_0,StopCode_1,StopCode_2,StopCode_3,StopCode_4,Month_0,Month_1,Month_2,Month_3,DayOfWeek_0,DayOfWeek_1,DayOfWeek_2,PumpSecondsOnSite_min,PumpSecondsOnSite_mean,PumpSecondsOnSite_max,TurnoutTimeSeconds_min,TurnoutTimeSeconds_mean,TurnoutTimeSeconds_max,TravelTimeSeconds_min,TravelTimeSeconds_mean,TravelTimeSeconds_max
0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,2.0,0,0,0,0,1,0,0,0,1,0,0,1,240.0,390.0,540.0,253.0,253.0,253.0,89.0,89.0,89.0
1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,1.0,0,0,0,1,0,0,0,0,1,0,0,1,420.0,420.0,420.0,151.0,151.0,151.0,157.0,157.0,157.0
2,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,1.0,0,0,0,1,0,0,0,0,1,0,0,1,720.0,720.0,720.0,108.0,108.0,108.0,102.0,102.0,102.0
3,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,2.0,0,0,0,1,0,0,0,0,1,0,0,1,120.0,120.0,120.0,114.0,128.0,142.0,108.0,113.5,119.0
4,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,2.0,0,0,0,1,1,0,0,0,1,0,0,1,360.0,360.0,360.0,83.0,89.0,95.0,89.0,108.0,127.0
5,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,1.0,0,0,0,0,1,0,0,0,1,0,0,1,420.0,420.0,420.0,119.0,119.0,119.0,403.0,403.0,403.0
6,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,2.0,0,0,0,1,0,0,0,0,1,0,0,1,1440.0,1440.0,1440.0,178.0,180.5,183.0,164.0,165.0,166.0
7,1,0,0,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,1.0,0,0,0,1,0,0,0,0,1,0,0,1,420.0,420.0,420.0,121.0,121.0,121.0,134.0,134.0,134.0
8,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,1.0,0,0,0,1,1,0,0,0,1,0,0,1,780.0,780.0,780.0,110.0,110.0,110.0,187.0,187.0,187.0
9,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,1.0,0,0,1,0,0,0,0,0,1,0,0,1,600.0,600.0,600.0,129.0,129.0,129.0,567.0,567.0,567.0


TurnoutTimeSeconds


Unnamed: 0,Keep,Column,Value
0,False,Postcode_district,BR1
1,False,Postcode_district,BR2
2,False,Postcode_district,BR3
3,False,Postcode_district,BR4
4,False,Postcode_district,BR5


Postcode_district
PropertyType
StopCode
replaces {'Postcode_district': ['BR1', 'BR2', 'BR3', 'BR4', 'BR5', 'BR6', 'BR7', 'BR8', 'CM13', 'CM14', 'CR0', 'CR2', 'CR3', 'CR4', 'CR5', 'CR6', 'CR7', 'CR8', 'CR9', 'DA14', 'DA15', 'DA16', 'DA17', 'DA18', 'DA1', 'DA5', 'DA6', 'DA7', 'DA8', 'E10', 'E11', 'E12', 'E13', 'E14', 'E15', 'E16', 'E17', 'E18', 'E1W', 'E1', 'E20', 'E22', 'E2', 'E3', 'E4', 'E5', 'E6', 'E7', 'E8', 'E9', 'EC1A', 'EC1M', 'EC1N', 'EC1R', 'EC1V', 'EC1Y', 'EC2A', 'EC2M', 'EC2N', 'EC2P', 'EC2R', 'EC2V', 'EC2Y', 'EC3A', 'EC3M', 'EC3N', 'EC3P', 'EC3R', 'EC3V', 'EC4A', 'EC4M', 'EC4N', 'EC4R', 'EC4V', 'EC4Y', 'EN1', 'EN2', 'EN3', 'EN4', 'EN5', 'EN6', 'EN7', 'EN8', 'EN9', 'HA0', 'HA1', 'HA2', 'HA3', 'HA4', 'HA5', 'HA6', 'HA7', 'HA8', 'HA9', 'IG11', 'IG1', 'IG2', 'IG3', 'IG4', 'IG5', 'IG6', 'IG7', 'IG8', 'IG9', 'KT17', 'KT18', 'KT19', 'KT1', 'KT22', 'KT2', 'KT3', 'KT4', 'KT5', 'KT6', 'KT8', 'KT9', 'N10', 'N11', 'N12', 'N13', 'N14', 'N15', 'N16', 'N17', 'N18', 'N19', 'N1C', 'N1', 'N20'

Unnamed: 0,CalYear,HourOfCall,PropertyType,Postcode_district,NumPumpsAttending,StopCode,Month,DayOfWeek,PumpSecondsOnSite_min,PumpSecondsOnSite_mean,PumpSecondsOnSite_max,TurnoutTimeSeconds_min,TurnoutTimeSeconds_mean,TurnoutTimeSeconds_max,TravelTimeSeconds_min,TravelTimeSeconds_mean,TravelTimeSeconds_max
0,2009,0,CAR,OTHER,2.0,SST-RTC,1,4,240.0,390.0,540.0,253.0,253.0,253.0,89.0,89.0,89.0
1,2009,0,ROAD SURFACE/PAVEMENT,OTHER,1.0,SECONDARY FIRE,1,4,420.0,420.0,420.0,151.0,151.0,151.0,157.0,157.0,157.0
2,2009,0,DOMESTIC GARDEN (VEGETATION NOT EQUIPMENT),OTHER,1.0,SECONDARY FIRE,1,4,720.0,720.0,720.0,108.0,108.0,108.0,102.0,102.0,102.0
3,2009,0,CYCLE PATH/PUBLIC FOOTPATH/BRIDLEWAY,OTHER,2.0,SECONDARY FIRE,1,4,120.0,120.0,120.0,114.0,128.0,142.0,108.0,113.5,119.0
4,2009,0,PURPOSE BUILT FLATS/MAISONETTES - UP TO 3 STOREYS,OTHER,2.0,ALARM,1,4,360.0,360.0,360.0,83.0,89.0,95.0,89.0,108.0,127.0
5,2009,0,CAR,OTHER,1.0,SST-RTC,1,4,420.0,420.0,420.0,119.0,119.0,119.0,403.0,403.0,403.0
6,2009,0,REFUSE/RUBBISH TIP,OTHER,2.0,SECONDARY FIRE,1,4,1440.0,1440.0,1440.0,178.0,180.5,183.0,164.0,165.0,166.0
7,2009,0,SMALL REFUSE/RUBBISH CONTAINER,OTHER,1.0,SECONDARY FIRE,1,4,420.0,420.0,420.0,121.0,121.0,121.0,134.0,134.0,134.0
8,2009,0,STUDENT HALL OF RESIDENCE,OTHER,1.0,ALARM,1,4,780.0,780.0,780.0,110.0,110.0,110.0,187.0,187.0,187.0
9,2009,0,PURPOSE BUILT FLATS/MAISONETTES - 4 TO 9 STOREYS,OTHER,1.0,SST-LIFT RELEASE,1,4,600.0,600.0,600.0,129.0,129.0,129.0,567.0,567.0,567.0


OrdinalEncoder




BinaryEncoder




Unnamed: 0,CalYear,HourOfCall_0,HourOfCall_1,HourOfCall_2,HourOfCall_3,HourOfCall_4,PropertyType_0,PropertyType_1,PropertyType_2,PropertyType_3,PropertyType_4,PropertyType_5,PropertyType_6,PropertyType_7,PropertyType_8,Postcode_district_0,Postcode_district_1,Postcode_district_2,Postcode_district_3,Postcode_district_4,NumPumpsAttending,StopCode_0,StopCode_1,StopCode_2,StopCode_3,StopCode_4,Month_0,Month_1,Month_2,Month_3,DayOfWeek_0,DayOfWeek_1,DayOfWeek_2,PumpSecondsOnSite_min,PumpSecondsOnSite_mean,PumpSecondsOnSite_max,TurnoutTimeSeconds_min,TurnoutTimeSeconds_mean,TurnoutTimeSeconds_max,TravelTimeSeconds_min,TravelTimeSeconds_mean,TravelTimeSeconds_max
0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,2.0,0,0,0,0,1,0,0,0,1,0,0,1,240.0,390.0,540.0,253.0,253.0,253.0,89.0,89.0,89.0
1,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1.0,0,0,0,1,0,0,0,0,1,0,0,1,420.0,420.0,420.0,151.0,151.0,151.0,157.0,157.0,157.0
2,1,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1.0,0,0,0,1,0,0,0,0,1,0,0,1,720.0,720.0,720.0,108.0,108.0,108.0,102.0,102.0,102.0
3,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,2.0,0,0,0,1,0,0,0,0,1,0,0,1,120.0,120.0,120.0,114.0,128.0,142.0,108.0,113.5,119.0
4,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,1,2.0,0,0,0,1,1,0,0,0,1,0,0,1,360.0,360.0,360.0,83.0,89.0,95.0,89.0,108.0,127.0
5,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1.0,0,0,0,0,1,0,0,0,1,0,0,1,420.0,420.0,420.0,119.0,119.0,119.0,403.0,403.0,403.0
6,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,1,2.0,0,0,0,1,0,0,0,0,1,0,0,1,1440.0,1440.0,1440.0,178.0,180.5,183.0,164.0,165.0,166.0
7,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0,1,1.0,0,0,0,1,0,0,0,0,1,0,0,1,420.0,420.0,420.0,121.0,121.0,121.0,134.0,134.0,134.0
8,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1.0,0,0,0,1,1,0,0,0,1,0,0,1,780.0,780.0,780.0,110.0,110.0,110.0,187.0,187.0,187.0
9,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1.0,0,0,1,0,0,0,0,0,1,0,0,1,600.0,600.0,600.0,129.0,129.0,129.0,567.0,567.0,567.0


TravelTimeSeconds


Unnamed: 0,Keep,Column,Value
0,True,Postcode_district,BR1
1,True,Postcode_district,BR2
2,True,Postcode_district,BR3
3,False,Postcode_district,BR4
4,True,Postcode_district,BR5


Postcode_district
PropertyType
StopCode
replaces {'Postcode_district': ['BR4', 'BR7', 'BR8', 'CM14', 'CR3', 'CR5', 'CR6', 'DA16', 'DA18', 'DA1', 'DA5', 'EC2P', 'EN2', 'EN8', 'EN9', 'HA3', 'HA6', 'IG4', 'IG5', 'KT19', 'KT8', 'KT9', 'OX9', 'RM13', 'RM14', 'RM15', 'RM19', 'RM4', 'SE1P', 'SE28', 'SL0', 'SL3', 'SM7', 'SW13', 'TW15', 'TW16', 'TW19', 'TW5', 'TW6', 'UB6', 'UB9', 'W1K', 'WD23', 'WD6'], 'PropertyType': ['AGRICULTURAL VEHICLE', 'AIRFIELD/RUNWAY', 'AIRPORT - FUEL STORAGE', 'AIRPORT - TERMINAL', 'AIRPORT BUILDING (NOT TERMINAL OR HANGAR)', 'ANIMAL BOARDING/BREEDING ESTABLISHMENT - CATS', 'ANIMAL BOARDING/BREEDING ESTABLISHMENT - DOGS', 'ANIMAL HARM OUTDOORS', 'ATHLETICS STADIUM', 'BARBECUE', 'BARGE', 'BARN', 'BEACH', 'BOARDING SCHOOL ACCOMMODATION', 'BOAT - TANKER', 'BRIDGE', 'BULK GAS STORAGE', 'BULK HAZARDOUS MATERIALS STORAGE', 'BULK WASTE STORAGE', 'BUNGALOW - SINGLE OCCUPANCY', 'CABLES', 'CAMPING TENT', 'CANAL/RIVERBANK VEGETATION', 'CARAVAN ON TOW', 'CARAVAN/MOBILE HOME (PERM

Unnamed: 0,CalYear,HourOfCall,PropertyType,Postcode_district,NumPumpsAttending,StopCode,Month,DayOfWeek,PumpSecondsOnSite_min,PumpSecondsOnSite_mean,PumpSecondsOnSite_max,TurnoutTimeSeconds_min,TurnoutTimeSeconds_mean,TurnoutTimeSeconds_max,TravelTimeSeconds_min,TravelTimeSeconds_mean,TravelTimeSeconds_max
0,2009,0,CAR,SW11,2.0,SST-RTC,1,4,240.0,390.0,540.0,253.0,253.0,253.0,89.0,89.0,89.0
1,2009,0,ROAD SURFACE/PAVEMENT,N9,1.0,SECONDARY FIRE,1,4,420.0,420.0,420.0,151.0,151.0,151.0,157.0,157.0,157.0
2,2009,0,OTHER,UB10,1.0,SECONDARY FIRE,1,4,720.0,720.0,720.0,108.0,108.0,108.0,102.0,102.0,102.0
3,2009,0,CYCLE PATH/PUBLIC FOOTPATH/BRIDLEWAY,N7,2.0,SECONDARY FIRE,1,4,120.0,120.0,120.0,114.0,128.0,142.0,108.0,113.5,119.0
4,2009,0,PURPOSE BUILT FLATS/MAISONETTES - UP TO 3 STOREYS,NW5,2.0,ALARM,1,4,360.0,360.0,360.0,83.0,89.0,95.0,89.0,108.0,127.0
5,2009,0,CAR,SE3,1.0,SST-RTC,1,4,420.0,420.0,420.0,119.0,119.0,119.0,403.0,403.0,403.0
6,2009,0,OTHER,TW3,2.0,SECONDARY FIRE,1,4,1440.0,1440.0,1440.0,178.0,180.5,183.0,164.0,165.0,166.0
7,2009,0,SMALL REFUSE/RUBBISH CONTAINER,EC1V,1.0,SECONDARY FIRE,1,4,420.0,420.0,420.0,121.0,121.0,121.0,134.0,134.0,134.0
8,2009,0,STUDENT HALL OF RESIDENCE,WC1B,1.0,ALARM,1,4,780.0,780.0,780.0,110.0,110.0,110.0,187.0,187.0,187.0
9,2009,0,OTHER,EC1A,1.0,SST-LIFT RELEASE,1,4,600.0,600.0,600.0,129.0,129.0,129.0,567.0,567.0,567.0


OrdinalEncoder




BinaryEncoder




Unnamed: 0,CalYear,HourOfCall_0,HourOfCall_1,HourOfCall_2,HourOfCall_3,HourOfCall_4,PropertyType_0,PropertyType_1,PropertyType_2,PropertyType_3,PropertyType_4,PropertyType_5,PropertyType_6,PropertyType_7,Postcode_district_0,Postcode_district_1,Postcode_district_2,Postcode_district_3,Postcode_district_4,Postcode_district_5,Postcode_district_6,Postcode_district_7,Postcode_district_8,NumPumpsAttending,StopCode_0,StopCode_1,StopCode_2,StopCode_3,StopCode_4,Month_0,Month_1,Month_2,Month_3,DayOfWeek_0,DayOfWeek_1,DayOfWeek_2,PumpSecondsOnSite_min,PumpSecondsOnSite_mean,PumpSecondsOnSite_max,TurnoutTimeSeconds_min,TurnoutTimeSeconds_mean,TurnoutTimeSeconds_max,TravelTimeSeconds_min,TravelTimeSeconds_mean,TravelTimeSeconds_max
0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,2.0,0,0,0,0,1,0,0,0,1,0,0,1,240.0,390.0,540.0,253.0,253.0,253.0,89.0,89.0,89.0
1,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1.0,0,0,0,1,0,0,0,0,1,0,0,1,420.0,420.0,420.0,151.0,151.0,151.0,157.0,157.0,157.0
2,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,1,1.0,0,0,0,1,0,0,0,0,1,0,0,1,720.0,720.0,720.0,108.0,108.0,108.0,102.0,102.0,102.0
3,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,2.0,0,0,0,1,0,0,0,0,1,0,0,1,120.0,120.0,120.0,114.0,128.0,142.0,108.0,113.5,119.0
4,1,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,1,2.0,0,0,0,1,1,0,0,0,1,0,0,1,360.0,360.0,360.0,83.0,89.0,95.0,89.0,108.0,127.0
5,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1.0,0,0,0,0,1,0,0,0,1,0,0,1,420.0,420.0,420.0,119.0,119.0,119.0,403.0,403.0,403.0
6,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,1,2.0,0,0,0,1,0,0,0,0,1,0,0,1,1440.0,1440.0,1440.0,178.0,180.5,183.0,164.0,165.0,166.0
7,1,0,0,0,0,1,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,1.0,0,0,0,1,0,0,0,0,1,0,0,1,420.0,420.0,420.0,121.0,121.0,121.0,134.0,134.0,134.0
8,1,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,0,0,0,1,0,0,1,1.0,0,0,0,1,1,0,0,0,1,0,0,1,780.0,780.0,780.0,110.0,110.0,110.0,187.0,187.0,187.0
9,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1.0,0,0,1,0,0,0,0,0,1,0,0,1,600.0,600.0,600.0,129.0,129.0,129.0,567.0,567.0,567.0


PumpSecondsOnSite


Unnamed: 0,CalYear,HourOfCall,PropertyType,Postcode_district,NumPumpsAttending,StopCode,Month,DayOfWeek,PumpSecondsOnSite_min,PumpSecondsOnSite_mean,PumpSecondsOnSite_max,TurnoutTimeSeconds_min,TurnoutTimeSeconds_mean,TurnoutTimeSeconds_max,TravelTimeSeconds_min,TravelTimeSeconds_mean,TravelTimeSeconds_max
0,2009,0,CAR,SW11,2.0,SST-RTC,1,4,240.0,390.0,540.0,253.0,253.0,253.0,89.0,89.0,89.0
1,2009,0,ROAD SURFACE/PAVEMENT,N9,1.0,SECONDARY FIRE,1,4,420.0,420.0,420.0,151.0,151.0,151.0,157.0,157.0,157.0
2,2009,0,DOMESTIC GARDEN (VEGETATION NOT EQUIPMENT),UB10,1.0,SECONDARY FIRE,1,4,720.0,720.0,720.0,108.0,108.0,108.0,102.0,102.0,102.0
3,2009,0,CYCLE PATH/PUBLIC FOOTPATH/BRIDLEWAY,N7,2.0,SECONDARY FIRE,1,4,120.0,120.0,120.0,114.0,128.0,142.0,108.0,113.5,119.0
4,2009,0,PURPOSE BUILT FLATS/MAISONETTES - UP TO 3 STOREYS,NW5,2.0,ALARM,1,4,360.0,360.0,360.0,83.0,89.0,95.0,89.0,108.0,127.0
5,2009,0,CAR,SE3,1.0,SST-RTC,1,4,420.0,420.0,420.0,119.0,119.0,119.0,403.0,403.0,403.0
6,2009,0,REFUSE/RUBBISH TIP,TW3,2.0,SECONDARY FIRE,1,4,1440.0,1440.0,1440.0,178.0,180.5,183.0,164.0,165.0,166.0
7,2009,0,SMALL REFUSE/RUBBISH CONTAINER,EC1V,1.0,SECONDARY FIRE,1,4,420.0,420.0,420.0,121.0,121.0,121.0,134.0,134.0,134.0
8,2009,0,STUDENT HALL OF RESIDENCE,WC1B,1.0,ALARM,1,4,780.0,780.0,780.0,110.0,110.0,110.0,187.0,187.0,187.0
9,2009,0,PURPOSE BUILT FLATS/MAISONETTES - 4 TO 9 STOREYS,EC1A,1.0,SST-LIFT RELEASE,1,4,600.0,600.0,600.0,129.0,129.0,129.0,567.0,567.0,567.0


OrdinalEncoder




BinaryEncoder




Unnamed: 0,CalYear,HourOfCall_0,HourOfCall_1,HourOfCall_2,HourOfCall_3,HourOfCall_4,PropertyType_0,PropertyType_1,PropertyType_2,PropertyType_3,PropertyType_4,PropertyType_5,PropertyType_6,PropertyType_7,PropertyType_8,Postcode_district,NumPumpsAttending,StopCode_0,StopCode_1,StopCode_2,StopCode_3,StopCode_4,Month_0,Month_1,Month_2,Month_3,DayOfWeek_0,DayOfWeek_1,DayOfWeek_2,PumpSecondsOnSite_min,PumpSecondsOnSite_mean,PumpSecondsOnSite_max,TurnoutTimeSeconds_min,TurnoutTimeSeconds_mean,TurnoutTimeSeconds_max,TravelTimeSeconds_min,TravelTimeSeconds_mean,TravelTimeSeconds_max
0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,SW11,2.0,0,0,0,0,1,0,0,0,1,0,0,1,240.0,390.0,540.0,253.0,253.0,253.0,89.0,89.0,89.0
1,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,N9,1.0,0,0,0,1,0,0,0,0,1,0,0,1,420.0,420.0,420.0,151.0,151.0,151.0,157.0,157.0,157.0
2,1,0,0,0,0,1,0,0,0,0,0,0,0,1,1,UB10,1.0,0,0,0,1,0,0,0,0,1,0,0,1,720.0,720.0,720.0,108.0,108.0,108.0,102.0,102.0,102.0
3,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,N7,2.0,0,0,0,1,0,0,0,0,1,0,0,1,120.0,120.0,120.0,114.0,128.0,142.0,108.0,113.5,119.0
4,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,NW5,2.0,0,0,0,1,1,0,0,0,1,0,0,1,360.0,360.0,360.0,83.0,89.0,95.0,89.0,108.0,127.0
5,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,SE3,1.0,0,0,0,0,1,0,0,0,1,0,0,1,420.0,420.0,420.0,119.0,119.0,119.0,403.0,403.0,403.0
6,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,TW3,2.0,0,0,0,1,0,0,0,0,1,0,0,1,1440.0,1440.0,1440.0,178.0,180.5,183.0,164.0,165.0,166.0
7,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,EC1V,1.0,0,0,0,1,0,0,0,0,1,0,0,1,420.0,420.0,420.0,121.0,121.0,121.0,134.0,134.0,134.0
8,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,WC1B,1.0,0,0,0,1,1,0,0,0,1,0,0,1,780.0,780.0,780.0,110.0,110.0,110.0,187.0,187.0,187.0
9,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,EC1A,1.0,0,0,1,0,0,0,0,0,1,0,0,1,600.0,600.0,600.0,129.0,129.0,129.0,567.0,567.0,567.0


TurnoutTimeSeconds


Unnamed: 0,CalYear,HourOfCall,PropertyType,Postcode_district,NumPumpsAttending,StopCode,Month,DayOfWeek,PumpSecondsOnSite_min,PumpSecondsOnSite_mean,PumpSecondsOnSite_max,TurnoutTimeSeconds_min,TurnoutTimeSeconds_mean,TurnoutTimeSeconds_max,TravelTimeSeconds_min,TravelTimeSeconds_mean,TravelTimeSeconds_max
0,2009,0,CAR,SW11,2.0,SST-RTC,1,4,240.0,390.0,540.0,253.0,253.0,253.0,89.0,89.0,89.0
1,2009,0,ROAD SURFACE/PAVEMENT,N9,1.0,SECONDARY FIRE,1,4,420.0,420.0,420.0,151.0,151.0,151.0,157.0,157.0,157.0
2,2009,0,DOMESTIC GARDEN (VEGETATION NOT EQUIPMENT),UB10,1.0,SECONDARY FIRE,1,4,720.0,720.0,720.0,108.0,108.0,108.0,102.0,102.0,102.0
3,2009,0,CYCLE PATH/PUBLIC FOOTPATH/BRIDLEWAY,N7,2.0,SECONDARY FIRE,1,4,120.0,120.0,120.0,114.0,128.0,142.0,108.0,113.5,119.0
4,2009,0,PURPOSE BUILT FLATS/MAISONETTES - UP TO 3 STOREYS,NW5,2.0,ALARM,1,4,360.0,360.0,360.0,83.0,89.0,95.0,89.0,108.0,127.0
5,2009,0,CAR,SE3,1.0,SST-RTC,1,4,420.0,420.0,420.0,119.0,119.0,119.0,403.0,403.0,403.0
6,2009,0,REFUSE/RUBBISH TIP,TW3,2.0,SECONDARY FIRE,1,4,1440.0,1440.0,1440.0,178.0,180.5,183.0,164.0,165.0,166.0
7,2009,0,SMALL REFUSE/RUBBISH CONTAINER,EC1V,1.0,SECONDARY FIRE,1,4,420.0,420.0,420.0,121.0,121.0,121.0,134.0,134.0,134.0
8,2009,0,STUDENT HALL OF RESIDENCE,WC1B,1.0,ALARM,1,4,780.0,780.0,780.0,110.0,110.0,110.0,187.0,187.0,187.0
9,2009,0,PURPOSE BUILT FLATS/MAISONETTES - 4 TO 9 STOREYS,EC1A,1.0,SST-LIFT RELEASE,1,4,600.0,600.0,600.0,129.0,129.0,129.0,567.0,567.0,567.0


OrdinalEncoder




BinaryEncoder




Unnamed: 0,CalYear,HourOfCall_0,HourOfCall_1,HourOfCall_2,HourOfCall_3,HourOfCall_4,PropertyType_0,PropertyType_1,PropertyType_2,PropertyType_3,PropertyType_4,PropertyType_5,PropertyType_6,PropertyType_7,PropertyType_8,Postcode_district,NumPumpsAttending,StopCode_0,StopCode_1,StopCode_2,StopCode_3,StopCode_4,Month_0,Month_1,Month_2,Month_3,DayOfWeek_0,DayOfWeek_1,DayOfWeek_2,PumpSecondsOnSite_min,PumpSecondsOnSite_mean,PumpSecondsOnSite_max,TurnoutTimeSeconds_min,TurnoutTimeSeconds_mean,TurnoutTimeSeconds_max,TravelTimeSeconds_min,TravelTimeSeconds_mean,TravelTimeSeconds_max
0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,SW11,2.0,0,0,0,0,1,0,0,0,1,0,0,1,240.0,390.0,540.0,253.0,253.0,253.0,89.0,89.0,89.0
1,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,N9,1.0,0,0,0,1,0,0,0,0,1,0,0,1,420.0,420.0,420.0,151.0,151.0,151.0,157.0,157.0,157.0
2,1,0,0,0,0,1,0,0,0,0,0,0,0,1,1,UB10,1.0,0,0,0,1,0,0,0,0,1,0,0,1,720.0,720.0,720.0,108.0,108.0,108.0,102.0,102.0,102.0
3,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,N7,2.0,0,0,0,1,0,0,0,0,1,0,0,1,120.0,120.0,120.0,114.0,128.0,142.0,108.0,113.5,119.0
4,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,NW5,2.0,0,0,0,1,1,0,0,0,1,0,0,1,360.0,360.0,360.0,83.0,89.0,95.0,89.0,108.0,127.0
5,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,SE3,1.0,0,0,0,0,1,0,0,0,1,0,0,1,420.0,420.0,420.0,119.0,119.0,119.0,403.0,403.0,403.0
6,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,TW3,2.0,0,0,0,1,0,0,0,0,1,0,0,1,1440.0,1440.0,1440.0,178.0,180.5,183.0,164.0,165.0,166.0
7,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,EC1V,1.0,0,0,0,1,0,0,0,0,1,0,0,1,420.0,420.0,420.0,121.0,121.0,121.0,134.0,134.0,134.0
8,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,WC1B,1.0,0,0,0,1,1,0,0,0,1,0,0,1,780.0,780.0,780.0,110.0,110.0,110.0,187.0,187.0,187.0
9,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,EC1A,1.0,0,0,1,0,0,0,0,0,1,0,0,1,600.0,600.0,600.0,129.0,129.0,129.0,567.0,567.0,567.0


TravelTimeSeconds


Unnamed: 0,CalYear,HourOfCall,PropertyType,Postcode_district,NumPumpsAttending,StopCode,Month,DayOfWeek,PumpSecondsOnSite_min,PumpSecondsOnSite_mean,PumpSecondsOnSite_max,TurnoutTimeSeconds_min,TurnoutTimeSeconds_mean,TurnoutTimeSeconds_max,TravelTimeSeconds_min,TravelTimeSeconds_mean,TravelTimeSeconds_max
0,2009,0,CAR,SW11,2.0,SST-RTC,1,4,240.0,390.0,540.0,253.0,253.0,253.0,89.0,89.0,89.0
1,2009,0,ROAD SURFACE/PAVEMENT,N9,1.0,SECONDARY FIRE,1,4,420.0,420.0,420.0,151.0,151.0,151.0,157.0,157.0,157.0
2,2009,0,DOMESTIC GARDEN (VEGETATION NOT EQUIPMENT),UB10,1.0,SECONDARY FIRE,1,4,720.0,720.0,720.0,108.0,108.0,108.0,102.0,102.0,102.0
3,2009,0,CYCLE PATH/PUBLIC FOOTPATH/BRIDLEWAY,N7,2.0,SECONDARY FIRE,1,4,120.0,120.0,120.0,114.0,128.0,142.0,108.0,113.5,119.0
4,2009,0,PURPOSE BUILT FLATS/MAISONETTES - UP TO 3 STOREYS,NW5,2.0,ALARM,1,4,360.0,360.0,360.0,83.0,89.0,95.0,89.0,108.0,127.0
5,2009,0,CAR,SE3,1.0,SST-RTC,1,4,420.0,420.0,420.0,119.0,119.0,119.0,403.0,403.0,403.0
6,2009,0,REFUSE/RUBBISH TIP,TW3,2.0,SECONDARY FIRE,1,4,1440.0,1440.0,1440.0,178.0,180.5,183.0,164.0,165.0,166.0
7,2009,0,SMALL REFUSE/RUBBISH CONTAINER,EC1V,1.0,SECONDARY FIRE,1,4,420.0,420.0,420.0,121.0,121.0,121.0,134.0,134.0,134.0
8,2009,0,STUDENT HALL OF RESIDENCE,WC1B,1.0,ALARM,1,4,780.0,780.0,780.0,110.0,110.0,110.0,187.0,187.0,187.0
9,2009,0,PURPOSE BUILT FLATS/MAISONETTES - 4 TO 9 STOREYS,EC1A,1.0,SST-LIFT RELEASE,1,4,600.0,600.0,600.0,129.0,129.0,129.0,567.0,567.0,567.0


OrdinalEncoder




BinaryEncoder




Unnamed: 0,CalYear,HourOfCall_0,HourOfCall_1,HourOfCall_2,HourOfCall_3,HourOfCall_4,PropertyType_0,PropertyType_1,PropertyType_2,PropertyType_3,PropertyType_4,PropertyType_5,PropertyType_6,PropertyType_7,PropertyType_8,Postcode_district,NumPumpsAttending,StopCode_0,StopCode_1,StopCode_2,StopCode_3,StopCode_4,Month_0,Month_1,Month_2,Month_3,DayOfWeek_0,DayOfWeek_1,DayOfWeek_2,PumpSecondsOnSite_min,PumpSecondsOnSite_mean,PumpSecondsOnSite_max,TurnoutTimeSeconds_min,TurnoutTimeSeconds_mean,TurnoutTimeSeconds_max,TravelTimeSeconds_min,TravelTimeSeconds_mean,TravelTimeSeconds_max
0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,SW11,2.0,0,0,0,0,1,0,0,0,1,0,0,1,240.0,390.0,540.0,253.0,253.0,253.0,89.0,89.0,89.0
1,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,N9,1.0,0,0,0,1,0,0,0,0,1,0,0,1,420.0,420.0,420.0,151.0,151.0,151.0,157.0,157.0,157.0
2,1,0,0,0,0,1,0,0,0,0,0,0,0,1,1,UB10,1.0,0,0,0,1,0,0,0,0,1,0,0,1,720.0,720.0,720.0,108.0,108.0,108.0,102.0,102.0,102.0
3,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,N7,2.0,0,0,0,1,0,0,0,0,1,0,0,1,120.0,120.0,120.0,114.0,128.0,142.0,108.0,113.5,119.0
4,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,NW5,2.0,0,0,0,1,1,0,0,0,1,0,0,1,360.0,360.0,360.0,83.0,89.0,95.0,89.0,108.0,127.0
5,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,SE3,1.0,0,0,0,0,1,0,0,0,1,0,0,1,420.0,420.0,420.0,119.0,119.0,119.0,403.0,403.0,403.0
6,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,TW3,2.0,0,0,0,1,0,0,0,0,1,0,0,1,1440.0,1440.0,1440.0,178.0,180.5,183.0,164.0,165.0,166.0
7,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,EC1V,1.0,0,0,0,1,0,0,0,0,1,0,0,1,420.0,420.0,420.0,121.0,121.0,121.0,134.0,134.0,134.0
8,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,WC1B,1.0,0,0,0,1,1,0,0,0,1,0,0,1,780.0,780.0,780.0,110.0,110.0,110.0,187.0,187.0,187.0
9,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,EC1A,1.0,0,0,1,0,0,0,0,0,1,0,0,1,600.0,600.0,600.0,129.0,129.0,129.0,567.0,567.0,567.0


In [None]:
# Concaténer les bits dans l'ordre des colonnes initiales
def combine_binary_columns(row):
    """
    Combine les colonnes encodées en binaire en une seule valeur entière.

    Cette fonction effectue les opérations suivantes :
    - Extrait les valeurs des colonnes encodées en binaire spécifiées.
    - Concatène les valeurs binaires en une seule chaîne de caractères.
    - Convertit la chaîne binaire en une valeur entière.

    Args:
        row (pd.Series): Une ligne du DataFrame contenant les colonnes encodées en binaire.

    Returns:
        int: La valeur entière résultant de la conversion de la chaîne binaire.
    """
    # Extraire les colonnes encodées correspondantes
    binary_values = ''.join(row[["Postcode_district_0", "Postcode_district_1", "Postcode_district_2", "Postcode_district_3", 
                                 "Postcode_district_4", "Postcode_district_5", "Postcode_district_6", "Postcode_district_7", 
                                 "Postcode_district_8"]].astype(int).astype(str))
    # print(row[["Postcode_district_0", "Postcode_district_1", "Postcode_district_2", "Postcode_district_3", 
    #                              "Postcode_district_4", "Postcode_district_5", "Postcode_district_6", "Postcode_district_7", 
    #                              "Postcode_district_8"]].astype(int).astype(str))
    # print(binary_values)
    binary_values += ''.join(row[["Month_0", "Month_1", "Month_2", "Month_3",        
                                  "DayOfWeek_0", "DayOfWeek_1", "DayOfWeek_2",     
                                  "HourOfCall_0", "HourOfCall_1", "HourOfCall_2", "HourOfCall_3", "HourOfCall_4"]].astype(int).astype(str))
    # print(binary_values)
    # Convertir la chaîne binaire en un entier
    return int(binary_values, 2)

df = pd.read_csv("../data/_df_ready_TravelTimeSeconds.csv", sep=";", low_memory=False)
# Concaténer les colonnes binaires dans l'ordre
df["Weight"] = df.apply(combine_binary_columns, axis=1)
display(df.head(30))
df.to_csv(f"../data/_df_ready_TravelTimeSeconds with Weight.csv", sep=";", index=False)

# 25 minutes

Unnamed: 0,CalYear,HourOfCall_0,HourOfCall_1,HourOfCall_2,HourOfCall_3,HourOfCall_4,PropertyType_0,PropertyType_1,PropertyType_2,PropertyType_3,PropertyType_4,PropertyType_5,PropertyType_6,PropertyType_7,Postcode_district_0,Postcode_district_1,Postcode_district_2,Postcode_district_3,Postcode_district_4,Postcode_district_5,Postcode_district_6,Postcode_district_7,Postcode_district_8,NumPumpsAttending,StopCode_0,StopCode_1,StopCode_2,StopCode_3,StopCode_4,Month_0,Month_1,Month_2,Month_3,DayOfWeek_0,DayOfWeek_1,DayOfWeek_2,PumpSecondsOnSite_min,PumpSecondsOnSite_mean,PumpSecondsOnSite_max,TurnoutTimeSeconds_min,TurnoutTimeSeconds_mean,TurnoutTimeSeconds_max,TravelTimeSeconds_min,TravelTimeSeconds_mean,TravelTimeSeconds_max,Weight
0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,2.0,0,0,0,0,1,0,0,0,1,0,0,1,240.0,390.0,540.0,253.0,253.0,253.0,89.0,89.0,89.0,4385
1,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1.0,0,0,0,1,0,0,0,0,1,0,0,1,420.0,420.0,420.0,151.0,151.0,151.0,157.0,157.0,157.0,8481
2,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,1,1.0,0,0,0,1,0,0,0,0,1,0,0,1,720.0,720.0,720.0,108.0,108.0,108.0,102.0,102.0,102.0,12577
3,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,2.0,0,0,0,1,0,0,0,0,1,0,0,1,120.0,120.0,120.0,114.0,128.0,142.0,108.0,113.5,119.0,16673
4,1,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,1,2.0,0,0,0,1,1,0,0,0,1,0,0,1,360.0,360.0,360.0,83.0,89.0,95.0,89.0,108.0,127.0,20769
5,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1.0,0,0,0,0,1,0,0,0,1,0,0,1,420.0,420.0,420.0,119.0,119.0,119.0,403.0,403.0,403.0,24865
6,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,1,2.0,0,0,0,1,0,0,0,0,1,0,0,1,1440.0,1440.0,1440.0,178.0,180.5,183.0,164.0,165.0,166.0,28961
7,1,0,0,0,0,1,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,1.0,0,0,0,1,0,0,0,0,1,0,0,1,420.0,420.0,420.0,121.0,121.0,121.0,134.0,134.0,134.0,33057
8,1,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,0,0,0,1,0,0,1,1.0,0,0,0,1,1,0,0,0,1,0,0,1,780.0,780.0,780.0,110.0,110.0,110.0,187.0,187.0,187.0,37153
9,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1,0,1.0,0,0,1,0,0,0,0,0,1,0,0,1,600.0,600.0,600.0,129.0,129.0,129.0,567.0,567.0,567.0,41249
