In [1]:
import pandas as pd
import mlflow
from pathlib import Path
from mlflow.tracking import MlflowClient

CWD = Path.cwd()
PROJECT_ROOT = CWD.parent.parent
DB_PATH = PROJECT_ROOT / "mlflow.db"

mlflow.set_tracking_uri(f"sqlite:///{DB_PATH.as_posix()}")
mlflow.set_experiment("home_credit_benchmarking")

print("Tracking URI :", mlflow.get_tracking_uri())
print("DB_PATH      :", DB_PATH)

Tracking URI : sqlite:///c:/Users/yoann/Documents/open classrooms/projet 8/livrables/pret a dépenser/mlflow.db
DB_PATH      : c:\Users\yoann\Documents\open classrooms\projet 8\livrables\pret a dépenser\mlflow.db


In [2]:
client = MlflowClient()
exp = client.get_experiment_by_name("home_credit_benchmarking")

if exp is None:
    raise ValueError("Experiment 'home_credit_benchmarking' introuvable")

runs = mlflow.search_runs(
    experiment_ids=[exp.experiment_id],
    filter_string='tags.phase = "benchmark_baseline"',
)

print("Nombre de runs benchmark_baseline :", len(runs))
display(runs.head())

Nombre de runs benchmark_baseline : 6


Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.fold_pred_time_sec,metrics.f1_mean_fixed_threshold,metrics.auc_mean,metrics.fbeta_3_std_fixed_threshold,...,tags.dataset,tags.mlflow.source.git.commit,tags.threshold_fixed,tags.fbeta_beta,tags.cost_fp,tags.n_features,tags.mlflow.runName,tags.model_type,tags.n_rows,tags.lgb_categorical_cols_n
0,5730e10f985d4bb28946737d903061bc,1,FINISHED,file:///C:/Users/yoann/Documents/open%20classr...,2026-02-16 09:14:37.143000+00:00,2026-02-16 09:21:29.975000+00:00,0.284682,0.276256,0.769348,0.00896,...,train_split,c469ec1d4ccf08b5e4814b550568779d418adbf5,0.5,3.0,1,1656,CatBoost,boosting,215257,
1,b9ec0a1ffa7e40ad923d5f78edd6a315,1,FINISHED,file:///C:/Users/yoann/Documents/open%20classr...,2026-02-16 08:54:30.017000+00:00,2026-02-16 09:03:46.476000+00:00,0.734307,0.303637,0.782161,0.011238,...,train_split,c469ec1d4ccf08b5e4814b550568779d418adbf5,0.5,3.0,1,1656,XGBoost,boosting,215257,
2,835f4e494920416797ac8122cf3003ec,1,FINISHED,file:///C:/Users/yoann/Documents/open%20classr...,2026-02-16 08:43:32.736000+00:00,2026-02-16 08:52:14.326000+00:00,1.269399,0.302392,0.782745,0.00992,...,train_split,c469ec1d4ccf08b5e4814b550568779d418adbf5,0.5,3.0,1,1656,LightGBM,boosting,215257,14.0
3,80afb0aae0fb4789b0adabb421178a64,1,FINISHED,file:///C:/Users/yoann/Documents/open%20classr...,2026-02-16 07:36:08.281000+00:00,2026-02-16 08:20:02.899000+00:00,5.2177,0.035242,0.757326,0.009455,...,train_split,c469ec1d4ccf08b5e4814b550568779d418adbf5,0.5,3.0,1,1656,MLP,sklearn,215257,
4,632478405a6147a6b9f9a1c819009a2f,1,FINISHED,file:///C:/Users/yoann/Documents/open%20classr...,2026-02-16 07:10:42.136000+00:00,2026-02-16 07:34:39.505000+00:00,5.220648,0.0,0.743083,0.0,...,train_split,c469ec1d4ccf08b5e4814b550568779d418adbf5,0.5,3.0,1,1656,RandomForest,sklearn,215257,


In [3]:
cols = {
    "tags.model_name": "model",
    "tags.model_type": "model_type",

    "metrics.auc_mean": "auc",
    "metrics.auc_std": "auc_std",

    "metrics.recall_mean_fixed_threshold": "recall",
    "metrics.recall_std_fixed_threshold": "recall_std",

    "metrics.f1_mean_fixed_threshold": "f1",
    "metrics.f1_std_fixed_threshold": "f1_std",

    "metrics.fbeta_3_mean_fixed_threshold": "f3",
    "metrics.fbeta_3_std_fixed_threshold": "f3_std",

    "metrics.business_cost_mean_fixed_threshold": "business_cost",
    "metrics.business_cost_std_fixed_threshold": "business_cost_std",

    "metrics.train_time_sec": "time_sec",

    "tags.threshold_fixed": "threshold_fixed",
    "run_id": "run_id",
}
use_cols = [c for c in cols if c in runs.columns]
df_final = runs[use_cols].rename(columns={k: cols[k] for k in use_cols}).copy()
if "threshold_fixed" in df_final.columns:
    df_final["threshold_fixed"] = pd.to_numeric(df_final["threshold_fixed"], errors="coerce")
display(df_final)

Unnamed: 0,model,model_type,auc,auc_std,recall,recall_std,f1,f1_std,f3,f3_std,business_cost,business_cost_std,time_sec,threshold_fixed,run_id
0,CatBoost,boosting,0.769348,0.006078,0.696668,0.01229,0.276256,0.004383,0.534102,0.00896,22173.8,435.234374,414.024441,0.5,5730e10f985d4bb28946737d903061bc
1,XGBoost,boosting,0.782161,0.005322,0.642746,0.013949,0.303637,0.006421,0.525389,0.011238,21420.4,521.001958,557.52576,0.5,b9ec0a1ffa7e40ad923d5f78edd6a315
2,LightGBM,boosting,0.782745,0.005089,0.653852,0.012666,0.302392,0.005379,0.530527,0.00992,21311.4,456.538542,522.616138,0.5,835f4e494920416797ac8122cf3003ec
3,MLP,sklearn,0.757326,0.010308,0.018414,0.008562,0.035242,0.016231,0.020358,0.009455,34178.2,263.05087,2635.282503,0.5,80afb0aae0fb4789b0adabb421178a64
4,RandomForest,sklearn,0.743083,0.005828,0.0,0.0,0.0,0.0,0.0,0.0,34754.0,4.898979,1438.236524,0.5,632478405a6147a6b9f9a1c819009a2f
5,LogisticRegression,sklearn,0.771029,0.003856,0.694941,0.00797,0.279162,0.001735,0.535436,0.004773,22014.4,197.338896,17677.128699,0.5,e5785bf834d14c189d43e4be84d9f509


In [4]:
if "model" in df_final.columns and "start_time" in runs.columns:
    tmp = df_final.copy()
    tmp["start_time"] = runs["start_time"].values

    df_final = (
        tmp.sort_values("start_time")
           .groupby("model", as_index=False)
           .tail(1)
           .drop(columns=["start_time"])
           .reset_index(drop=True)
    )

print("Runs après dédoublonnage par modèle :", len(df_final))
display(df_final)

Runs après dédoublonnage par modèle : 6


Unnamed: 0,model,model_type,auc,auc_std,recall,recall_std,f1,f1_std,f3,f3_std,business_cost,business_cost_std,time_sec,threshold_fixed,run_id
0,LogisticRegression,sklearn,0.771029,0.003856,0.694941,0.00797,0.279162,0.001735,0.535436,0.004773,22014.4,197.338896,17677.128699,0.5,e5785bf834d14c189d43e4be84d9f509
1,RandomForest,sklearn,0.743083,0.005828,0.0,0.0,0.0,0.0,0.0,0.0,34754.0,4.898979,1438.236524,0.5,632478405a6147a6b9f9a1c819009a2f
2,MLP,sklearn,0.757326,0.010308,0.018414,0.008562,0.035242,0.016231,0.020358,0.009455,34178.2,263.05087,2635.282503,0.5,80afb0aae0fb4789b0adabb421178a64
3,LightGBM,boosting,0.782745,0.005089,0.653852,0.012666,0.302392,0.005379,0.530527,0.00992,21311.4,456.538542,522.616138,0.5,835f4e494920416797ac8122cf3003ec
4,XGBoost,boosting,0.782161,0.005322,0.642746,0.013949,0.303637,0.006421,0.525389,0.011238,21420.4,521.001958,557.52576,0.5,b9ec0a1ffa7e40ad923d5f78edd6a315
5,CatBoost,boosting,0.769348,0.006078,0.696668,0.01229,0.276256,0.004383,0.534102,0.00896,22173.8,435.234374,414.024441,0.5,5730e10f985d4bb28946737d903061bc


In [5]:
if "business_cost" in df_final.columns:
    ranking_business = (
        df_final.sort_values(
            by=["business_cost", "recall", "auc", "time_sec"],
            ascending=[True, False, False, True],
        )
        .reset_index(drop=True)
    )

    print("=== Ranking BUSINESS (min coût) ===")
    display(ranking_business)
else:
    print("Pas de colonne business_cost dans les runs -> impossible de classer par coût.")

=== Ranking BUSINESS (min coût) ===


Unnamed: 0,model,model_type,auc,auc_std,recall,recall_std,f1,f1_std,f3,f3_std,business_cost,business_cost_std,time_sec,threshold_fixed,run_id
0,LightGBM,boosting,0.782745,0.005089,0.653852,0.012666,0.302392,0.005379,0.530527,0.00992,21311.4,456.538542,522.616138,0.5,835f4e494920416797ac8122cf3003ec
1,XGBoost,boosting,0.782161,0.005322,0.642746,0.013949,0.303637,0.006421,0.525389,0.011238,21420.4,521.001958,557.52576,0.5,b9ec0a1ffa7e40ad923d5f78edd6a315
2,LogisticRegression,sklearn,0.771029,0.003856,0.694941,0.00797,0.279162,0.001735,0.535436,0.004773,22014.4,197.338896,17677.128699,0.5,e5785bf834d14c189d43e4be84d9f509
3,CatBoost,boosting,0.769348,0.006078,0.696668,0.01229,0.276256,0.004383,0.534102,0.00896,22173.8,435.234374,414.024441,0.5,5730e10f985d4bb28946737d903061bc
4,MLP,sklearn,0.757326,0.010308,0.018414,0.008562,0.035242,0.016231,0.020358,0.009455,34178.2,263.05087,2635.282503,0.5,80afb0aae0fb4789b0adabb421178a64
5,RandomForest,sklearn,0.743083,0.005828,0.0,0.0,0.0,0.0,0.0,0.0,34754.0,4.898979,1438.236524,0.5,632478405a6147a6b9f9a1c819009a2f


In [6]:
ranking_auc = (
    df_final.sort_values(
        by=["auc", "recall", "f3", "time_sec"],
        ascending=[False, False, False, True],
    )
    .reset_index(drop=True)
)

print("=== Ranking TECHNIQUE (max AUC) ===")
display(ranking_auc)

=== Ranking TECHNIQUE (max AUC) ===


Unnamed: 0,model,model_type,auc,auc_std,recall,recall_std,f1,f1_std,f3,f3_std,business_cost,business_cost_std,time_sec,threshold_fixed,run_id
0,LightGBM,boosting,0.782745,0.005089,0.653852,0.012666,0.302392,0.005379,0.530527,0.00992,21311.4,456.538542,522.616138,0.5,835f4e494920416797ac8122cf3003ec
1,XGBoost,boosting,0.782161,0.005322,0.642746,0.013949,0.303637,0.006421,0.525389,0.011238,21420.4,521.001958,557.52576,0.5,b9ec0a1ffa7e40ad923d5f78edd6a315
2,LogisticRegression,sklearn,0.771029,0.003856,0.694941,0.00797,0.279162,0.001735,0.535436,0.004773,22014.4,197.338896,17677.128699,0.5,e5785bf834d14c189d43e4be84d9f509
3,CatBoost,boosting,0.769348,0.006078,0.696668,0.01229,0.276256,0.004383,0.534102,0.00896,22173.8,435.234374,414.024441,0.5,5730e10f985d4bb28946737d903061bc
4,MLP,sklearn,0.757326,0.010308,0.018414,0.008562,0.035242,0.016231,0.020358,0.009455,34178.2,263.05087,2635.282503,0.5,80afb0aae0fb4789b0adabb421178a64
5,RandomForest,sklearn,0.743083,0.005828,0.0,0.0,0.0,0.0,0.0,0.0,34754.0,4.898979,1438.236524,0.5,632478405a6147a6b9f9a1c819009a2f


In [7]:

REPORTS_DIR = PROJECT_ROOT / "reports" / "benchmark_summary"
REPORTS_DIR.mkdir(parents=True, exist_ok=True)

out_csv = REPORTS_DIR / "benchmark_models_comparison.csv"
df_final.to_csv(out_csv, index=False)

ranking_business.to_csv(REPORTS_DIR / "benchmark_ranking_business.csv", index=False)
ranking_auc.to_csv(REPORTS_DIR / "benchmark_ranking_auc.csv", index=False)

print("Saved:", out_csv)
print("Saved ranking_business + ranking_auc")

Saved: c:\Users\yoann\Documents\open classrooms\projet 8\livrables\pret a dépenser\reports\benchmark_summary\benchmark_models_comparison.csv
Saved ranking_business + ranking_auc


## Synthèse du benchmark des modèles


Les modèles ont été comparés par validation croisée selon plusieurs critères complémentaires :
AUC (discrimination globale), recall et F-scores à seuil fixe (0.5), ainsi qu’un **coût métier personnalisé** (FN = 10 × FP), indicateur prioritaire pour le cas d’usage bancaire.

In [8]:

best = ranking_business.iloc[0]

print("=== Synthèse automatique (benchmark brut) ===")
print(f"Modèle retenu (coût minimal) : {best['model']}")
print(f"AUC : {best['auc']:.3f}")
print(f"Coût métier : {best['business_cost']:.0f}")
print(f"Recall@{best.get('threshold_fixed', 0.5)} : {best['recall']:.3f}")
print(f"F3 : {best['f3']:.3f}")
print(f"Temps entraînement (s) : {best['time_sec']:.1f}")

best_auc_row = ranking_auc.iloc[0]
print("\nChallenger technique (AUC max) :", best_auc_row["model"])

=== Synthèse automatique (benchmark brut) ===
Modèle retenu (coût minimal) : LightGBM
AUC : 0.783
Coût métier : 21311
Recall@0.5 : 0.654
F3 : 0.531
Temps entraînement (s) : 522.6

Challenger technique (AUC max) : LightGBM


- **XGBoost** est un excellent *challenger* :
  - AUC très proche (0.782),
  - coût métier légèrement supérieur (21 420),
  - comportement stable et temps d’entraînement plus court (≈ 636 s).

Il sera conservé pour comparaison lors des phases d’optimisation.

- **Régression logistique** et **CatBoost** présentent un **recall élevé** (≈ 0.695) et un **F3 compétitif** (~0.534–0.535).  
  Ces modèles restent intéressants dans une logique métier orientée réduction des faux négatifs,  
  la régression logistique apportant en plus une forte **interprétabilité**.

### Modèles écartés

- **MLP** et **Random Forest** sont écartés à ce stade :
  - performances globales inférieures,
  - recall quasi nul pour Random Forest,
  - **coût métier très défavorable** (≥ 34 000).


La suite du projet se concentre sur **LightGBM**, avec :

- analyse d’importance des variables,  
- réduction du périmètre de features,  
- optimisation des hyperparamètres,  
- choix d’un seuil métier optimal,  

tout en conservant **XGBoost, CatBoost et la régression logistique** comme modèles de référence pour valider la robustesse des choix.