In [11]:
import polars as pl
import matplotlib.pyplot as plt
import seaborn as sns

# --- 1. CHARGER LES "MASTER FILES" ---
# (Créés par tes scripts)
try:
    df_sirene = pl.read_parquet("../Data/processed/sirene_infos.parquet")
    df_bilan = pl.read_parquet("../Data/processed/sirene_bilan.parquet")
except Exception as e:
    print(f"ERREUR: Fichiers 'processed' non trouvés. As-tu lancé 'make process'?")
    print(e)

print(f"Fichier SIRENE (infos) chargé: {df_sirene.shape}")
print(f"Fichier INPI (bilans) chargé: {df_bilan.shape}")



# # --- 2. DÉFINIR LA COHORTE (Itération 1) ---
# # C'est ici qu'on applique le filtre qu'on a gardé pour la fin
# print("Filtrage: Cohorte 2018 & Bilans 2019...")

# A. Cohorte 2018 (SIRENE)
df_demo = df_sirene.filter(
    pl.col("dateCreationUniteLegale").dt.year() == 2018
)

# B. Bilans 2019 (INPI)
df_bilan_2019 = df_bilan.filter(
    pl.col("AnneeClotureExercice") == 2019
)

# --- 3. LE GRAND MARIAGE (LEFT JOIN) ---
# On garde toutes les entreprises de 2018, même si elles n'ont pas de bilan
df_final_ml = df_demo.join(
    df_bilan_2019, 
    on="siren", 
    how="left"
)

# --- 4. CRÉATION DE LA CIBLE (Y) ---
# (On la crée ici, dans le notebook)
print("Création de la Cible (is_failed_in_3y)...")

df_final_ml = df_final_ml.with_columns(
    # Date limite = 3 ans après la création
    (pl.col("dateCreationUniteLegale") + pl.duration(years=3)).alias("date_limite_3_ans")
).with_columns(
    # 1 = Faillite si Fermé AVANT la date limite
    pl.when(
        (pl.col("dateFermeture").is_not_null()) & # dateFermeture n'est pas null
        (pl.col("dateFermeture") < pl.col("date_limite_3_ans"))
    ).then(1)
    .otherwise(0)
    .alias("is_failed_in_3y")
)

# # --- 5. NETTOYAGE FINAL (TRÈS IMPORTANT) ---
# # Remplir les 'null' des entreprises sans bilan par 0
# # (Ne pas avoir de bilan est une feature !)
# cols_financieres = [col for col in df_bilan.columns if col not in ['siren', 'date_cloture_exercice', 'AnneeCloture']]
# df_final_ml = df_final_ml.with_columns(
#     pl.col(cols_financieres).fill_null(0)
# )

# print("---")
# print("DATASET ML PRÊT !")
# print(f"Shape finale : {df_final_ml.shape}")
# print(df_final_ml.head())

Fichier SIRENE (infos) chargé: (28882409, 10)
Fichier INPI (bilans) chargé: (3706645, 21)
Création de la Cible (is_failed_in_3y)...


TypeError: duration() got an unexpected keyword argument 'years'

In [4]:
df_sirene

siren,dateCreationUniteLegale,dateFermeture,categorieJuridiqueUniteLegale,trancheEffectifsUniteLegale,activitePrincipaleUniteLegale,categorieEntreprise,economieSocialeSolidaireUniteLegale,societeMissionUniteLegale,departement
str,date,date,i64,str,str,str,str,str,str
"""000325175""",2000-09-26,,1000,"""NN""","""32.12Z""","""PME""",,,"""13"""
"""001807254""",1972-05-01,,1000,"""NN""","""85.59A""",,,,"""02"""
"""005410220""",1954-12-25,,1000,"""NN""","""22.02""",,,,"""80"""
"""005410345""",,,1000,"""NN""","""79.06""",,,,"""80"""
"""005410394""",1954-12-25,,1000,"""NN""","""64.42""",,,,"""80"""
…,…,…,…,…,…,…,…,…,…
"""999990625""",1983-11-01,,5599,"""NN""","""27.3C""",,,,"""57"""
"""999990641""",1983-11-01,,5499,"""NN""","""65.01""",,,,"""75"""
"""999990666""",1986-05-15,,5599,"""NN""","""65.11Z""","""PME""","""N""",,"""75"""
"""999990682""",1985-09-01,,5599,"""NN""","""65.2E""",,,,"""75"""


In [5]:
df_bilan

DateClotureExercice,AnneeClotureExercice,siren,HN_RésultatNet,FA_ChiffreAffairesVentes,FB_AchatsMarchandises,CJCK_TotalActifBrut,DL_DettesCourtTerme,DM_DettesLongTerme,DA_TresorerieActive,FJ_ResultatFinancier,FR_ResultatExceptionnel,DF_CapitauxPropres,EG_ImpotsTaxes,ratio_rentabilite_nette,ratio_endettement,ratio_marge_brute,ratio_capitaux_propres,ratio_tresorerie,ratio_resultat_financier,ratio_resultat_exceptionnel
date,i32,str,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,f64,f64,f64,f64,f64,f64,f64
2016-12-31,2016,"""005420120""",-261053,11836,0,31933093,92013428,0,711840,104225,781843,0,586967,-22.055847,2.881444,1.0,0.0,0.022292,8.805762,66.056353
2017-12-31,2017,"""005420120""",-376691,26192,0,22684824,90919571,0,711840,98112,450623,0,441247,-14.381911,4.007947,1.0,0.0,0.03138,3.745877,17.204604
2018-12-31,2018,"""005420120""",-289131,4623,0,15117606,90269342,0,711840,135797,470896,0,841098,-62.541856,5.97114,1.0,0.0,0.047087,29.374216,101.859399
2019-12-31,2019,"""005420120""",-970147,48370,0,12736527,89288445,0,711840,217792,520363,0,0,-20.056791,7.010423,1.0,0.0,0.05589,4.502626,10.75797
2020-12-31,2020,"""005420120""",-807683,72481,0,12006568,88446360,0,711840,342381,725921,0,885730,-11.143376,7.366498,1.0,0.0,0.059288,4.723734,10.015328
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
2020-12-31,2020,"""999990369""",389310,0,0,22017428,14843972,0,7111836,20677807,22224228,0,0,3.8931e11,0.674192,0.0,0.0,0.323009,2.0678e13,2.2224e13
2021-12-31,2021,"""999990369""",254808,0,0,23802015,15159901,0,7111836,22429094,24658118,0,0,2.5481e11,0.636917,0.0,0.0,0.298791,2.2429e13,2.4658e13
2022-12-31,2022,"""999990369""",-672325,0,0,25326821,14487576,0,7111836,20220489,23350653,0,0,-6.7232e11,0.572025,0.0,0.0,0.280803,2.0220e13,2.3351e13
2016-12-31,2016,"""999990542""",318053,0,0,1318894,598610,0,225000,1780080,1780080,3674,0,3.1805e11,0.453873,0.0,0.002786,0.170597,1.7801e12,1.7801e12


In [7]:
df_demo

siren,dateCreationUniteLegale,dateFermeture,categorieJuridiqueUniteLegale,trancheEffectifsUniteLegale,activitePrincipaleUniteLegale,categorieEntreprise,economieSocialeSolidaireUniteLegale,societeMissionUniteLegale,departement
str,date,date,i64,str,str,str,str,str,str
"""130023385""",2018-01-01,,7383,"""52""","""85.42Z""","""GE""","""N""",,"""75"""
"""130023583""",2018-01-01,,7383,"""NN""","""85.42Z""",,"""N""",,"""59"""
"""130023740""",2018-01-01,,7381,"""42""","""94.11Z""","""ETI""","""N""",,"""59"""
"""130023799""",2018-01-01,,7410,"""21""","""84.12Z""","""PME""",,,"""20"""
"""130023807""",2018-01-01,,7410,"""22""","""71.12B""","""PME""","""N""",,"""31"""
…,…,…,…,…,…,…,…,…,…
"""993302033""",2018-11-26,,9220,"""NN""","""88.99B""",,"""O""","""N""","""04"""
"""993322098""",2018-07-05,,8420,"""NN""","""94.11Z""",,"""N""",,"""39"""
"""993348952""",2018-11-09,,9220,"""NN""","""94.99Z""",,"""O""","""N""","""28"""
"""993351584""",2018-07-30,,9220,"""NN""","""93.12Z""",,"""O""","""N""","""22"""
