In [1]:

import os
from pathlib import Path
import pandas as pd
import mlflow
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, recall_score, f1_score
from lightgbm import LGBMClassifier
import sys

sys.path.append("../src")
from modeling.prepare_for_model import prepare_application_for_model




In [2]:


# Racine projet
PROJECT_ROOT = Path(r"C:\Users\yoann\Documents\open classrooms\projet 6\pret a depenser v2")
DB_PATH = PROJECT_ROOT / "mlflow.db"
ARTIFACT_ROOT = PROJECT_ROOT / "artifacts"

print("PROJECT_ROOT =", PROJECT_ROOT)
print("DB exists ?", DB_PATH.exists())
print("ARTIFACT_ROOT exists ?", ARTIFACT_ROOT.exists())

PROJECT_ROOT = C:\Users\yoann\Documents\open classrooms\projet 6\pret a depenser v2
DB exists ? False
ARTIFACT_ROOT exists ? True


In [3]:
mlflow.set_tracking_uri(f"sqlite:///{DB_PATH}")

print("Tracking URI =", mlflow.get_tracking_uri())

Tracking URI = sqlite:///C:\Users\yoann\Documents\open classrooms\projet 6\pret a depenser v2\mlflow.db


In [4]:

EXPERIMENT_NAME = "home_credit_tracking"

mlflow.set_experiment(EXPERIMENT_NAME)

print("Experiment actif =", mlflow.get_experiment_by_name(EXPERIMENT_NAME).name)

2026/01/19 11:06:56 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2026/01/19 11:06:56 INFO mlflow.store.db.utils: Updating database tables
INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
INFO  [alembic.runtime.migration] Will assume non-transactional DDL.
INFO  [alembic.runtime.migration] Running upgrade  -> 451aebb31d03, add metric step
INFO  [alembic.runtime.migration] Running upgrade 451aebb31d03 -> 90e64c465722, migrate user column to tags
INFO  [alembic.runtime.migration] Running upgrade 90e64c465722 -> 181f10493468, allow nulls for metric values
INFO  [alembic.runtime.migration] Running upgrade 181f10493468 -> df50e92ffc5e, Add Experiment Tags Table
INFO  [alembic.runtime.migration] Running upgrade df50e92ffc5e -> 7ac759974ad8, Update run tags with larger limit
INFO  [alembic.runtime.migration] Running upgrade 7ac759974ad8 -> 89d4b8295536, create latest metrics table
INFO  [89d4b8295536_create_latest_metrics_table_py] Migration complete!
INFO  

Experiment actif = home_credit_tracking


In [5]:

df = pd.read_csv(PROJECT_ROOT / "data" / "processed" / "train_final.csv")

from modeling.prepare_for_model import prepare_application_for_model

X, y = prepare_application_for_model(df, model_type="boosting")

X_train, X_temp, y_train, y_temp = train_test_split(
    X, y, test_size=0.3, stratify=y, random_state=42
)

X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=42
)

print("Train:", X_train.shape)
print("Test :", X_test.shape)

Train: (215257, 1769)
Test : (46127, 1769)


In [6]:

model = LGBMClassifier(
    n_estimators=200,
    learning_rate=0.05,
    num_leaves=64,
    class_weight="balanced",
    random_state=42,
    n_jobs=-1
)

In [7]:

with mlflow.start_run(run_name="tracking_test_lgbm"):

    # Entraînement
    model.fit(X_train, y_train)

    # Prédictions
    preds_proba = model.predict_proba(X_test)[:, 1]
    preds_class = (preds_proba >= 0.5).astype(int)

    # Métriques
    auc = roc_auc_score(y_test, preds_proba)
    recall = recall_score(y_test, preds_class)
    f1 = f1_score(y_test, preds_class)

    # Logs
    mlflow.log_metric("auc", auc)
    mlflow.log_metric("recall", recall)
    mlflow.log_metric("f1", f1)

    mlflow.log_params({
        "model": "LightGBM",
        "n_estimators": 200,
        "learning_rate": 0.05,
        "num_leaves": 64
    })

    mlflow.set_tag("stage", "tracking_test")
    mlflow.set_tag("dataset", "train_final")

    # Sauvegarde modèle
    mlflow.sklearn.log_model(model, "model")

    print("Run terminé")
    print("AUC   =", auc)
    print("Recall=", recall)
    print("F1    =", f1)

[LightGBM] [Info] Number of positive: 17377, number of negative: 197880
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.469100 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 243802
[LightGBM] [Info] Number of data points in the train set: 215257, number of used features: 1764
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000




Run terminé
AUC   = 0.7851806358167359
Recall= 0.6501074113856069
F1    = 0.30945229117402695


In [8]:

from mlflow.tracking import MlflowClient

client = MlflowClient()

exp = client.get_experiment_by_name(EXPERIMENT_NAME)
print("Experiment id =", exp.experiment_id)
print("Artifact location =", exp.artifact_location)

runs = client.search_runs([exp.experiment_id])
print("Nombre de runs =", len(runs))

Experiment id = 1
Artifact location = file:///c:/Users/yoann/Documents/open classrooms/projet 6/pret a depenser v2/notebooks/mlruns/1
Nombre de runs = 1
