In [1]:
import mlflow
import pandas as pd
import numpy as np
from pathlib import Path
from datetime import datetime

from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score, roc_auc_score,
    confusion_matrix, classification_report
)
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression

import matplotlib.pyplot as plt
import seaborn as sns

sns.set_theme(style="whitegrid")


In [2]:
mlflow.set_tracking_uri("http://localhost:5000")  # adjust if needed
experiment_name = f"Credit_Risk_Modeling_{datetime.now():%Y%m%d_%H%M%S}"
mlflow.set_experiment(experiment_name)

2025/07/01 12:22:42 INFO mlflow.tracking.fluent: Experiment with name 'Credit_Risk_Modeling_20250701_122239' does not exist. Creating a new experiment.


<Experiment: artifact_location='file:///C:/Users/tsega/OneDrive/Documents/credit-scoring/artifacts/7', creation_time=1751361762459, experiment_id='7', last_update_time=1751361762459, lifecycle_stage='active', name='Credit_Risk_Modeling_20250701_122239', tags={}>

In [3]:
data_path = Path("../data/processed/modeling_data.csv")  # <-- change if necessary
df = pd.read_csv(data_path)
display(df.head())
assert "is_high_risk" in df.columns, "Column 'is_high_risk' missing"

Unnamed: 0,CustomerId,recency,frequency,monetary_total,is_high_risk
0,CustomerId_1,84,1,-10000.0,0
1,CustomerId_10,84,1,-10000.0,0
2,CustomerId_1001,90,5,20000.0,0
3,CustomerId_1002,26,11,4225.0,0
4,CustomerId_1003,12,6,20000.0,0


In [4]:
# 2. Preprocessing
df = df.copy()
df.drop(columns=[c for c in ["CustomerId", "TransactionId"] if c in df.columns], inplace=True, errors="ignore")

y = df["is_high_risk"].astype(int)
X = df.drop(columns=["is_high_risk"]).select_dtypes(include=["number"])

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

print(X_train.shape, X_test.shape)

(2993, 3) (749, 3)


In [5]:
models_params = {
    "RandomForest": {
        "model": RandomForestClassifier(random_state=42),
        "params": {
            "n_estimators": [100, 200],
            "max_depth": [None, 10, 20],
            "class_weight": [None, "balanced"],
        },
    },
    "GradientBoosting": {
        "model": GradientBoostingClassifier(random_state=42),
        "params": {
            "n_estimators": [50, 100],
            "learning_rate": [0.01, 0.1],
            "max_depth": [3, 5],
        },
    },
    "LogisticRegression": {
        "model": LogisticRegression(max_iter=1000, random_state=42),
        "params": {
            "C": [0.1, 1, 10],
            "penalty": ["l2"],
            "class_weight": [None, "balanced"],
        },
    },
}

In [8]:
import mlflow
import time
import numpy as np
from datetime import datetime
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

best_models = {}
best_scores = {}
all_metrics = {}  # To store metrics for all models

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

with mlflow.start_run(run_name="Main_Experiment") as parent_run:
    mlflow.log_param("train_size", len(X_train))
    mlflow.log_param("test_size", len(X_test))
    mlflow.log_param("positive_class_ratio", y.mean())

    for name, cfg in models_params.items():
        # Create unique run ID with timestamp and random suffix
        run_id = f"{name}_{datetime.now().strftime('%Y%m%d_%H%M%S%f')}_{np.random.randint(100,999)}"
        
        with mlflow.start_run(nested=True, run_name=run_id):
            # Small delay to prevent timestamp collisions
            time.sleep(0.1)
            
            gs = GridSearchCV(
                cfg["model"],
                cfg["params"],
                cv=cv,
                scoring="roc_auc",
                n_jobs=-1,
                verbose=0,
            )
            gs.fit(X_train, y_train)

            preds = gs.predict(X_test)
            probs = gs.predict_proba(X_test)[:, 1]

            metrics = {
                "accuracy": accuracy_score(y_test, preds),
                "precision": precision_score(y_test, preds),
                "recall": recall_score(y_test, preds),
                "f1": f1_score(y_test, preds),
                "roc_auc": roc_auc_score(y_test, probs),
                "best_cv_score": gs.best_score_,
            }
            
            # Store metrics for display
            all_metrics[name] = metrics

            # Generate unique metric prefix
            metric_prefix = f"{datetime.now().strftime('%f')}_"

            # Log model FIRST with simple artifact_path
            mlflow.sklearn.log_model(
                gs.best_estimator_,
                artifact_path=name.lower(),  # No slashes in artifact_path
                registered_model_name=f"CreditRisk_{name}",
            )

            # Then log parameters and metrics with unique prefixes
            mlflow.log_params({f"{name}_{k}": v for k, v in gs.best_params_.items()})
            
            # Log metrics with unique prefixes and retries
            for metric, value in metrics.items():
                for attempt in range(3):
                    try:
                        mlflow.log_metric(f"{metric_prefix}{name}_{metric}", value)
                        time.sleep(0.05)
                        break
                    except Exception:
                        if attempt == 2:
                            raise
                        time.sleep(1)

            best_models[name] = gs.best_estimator_
            best_scores[name] = metrics["roc_auc"]

            # Display metrics in a readable format
            print(f"\n{name} Evaluation Metrics:")
            print(f"• Accuracy:    {metrics['accuracy']:.4f}")
            print(f"• Precision:   {metrics['precision']:.4f}")
            print(f"• Recall:      {metrics['recall']:.4f}")
            print(f"• F1 Score:    {metrics['f1']:.4f}")
            print(f"• ROC-AUC:     {metrics['roc_auc']:.4f}")
            print(f"• CV Score:    {metrics['best_cv_score']:.4f}")

    # Identify and log best model
    best_name = max(best_scores, key=best_scores.get)
    mlflow.set_tag("best_model", best_name)
    mlflow.log_metric("best_overall_roc_auc", best_scores[best_name])
    
    # Display final comparison
    print("\n=== Model Performance Comparison ===")
    for name, metrics in all_metrics.items():
        print(f"\n{name}:")
        print(f"  ROC-AUC:   {metrics['roc_auc']:.4f}")
        print(f"  Accuracy:  {metrics['accuracy']:.4f}")
        print(f"  Precision: {metrics['precision']:.4f}")
        print(f"  Recall:    {metrics['recall']:.4f}")
        print(f"  F1:        {metrics['f1']:.4f}")
    
    print(f"\nBest model overall: {best_name} (ROC-AUC: {best_scores[best_name]:.4f})")

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
Registered model 'CreditRisk_RandomForest' already exists. Creating a new version of this model...
2025/07/01 12:29:12 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: CreditRisk_RandomForest, version 2
Created version '2' of model 'CreditRisk_RandomForest'.



RandomForest Evaluation Metrics:
• Accuracy:    0.9987
• Precision:   0.0000
• Recall:      0.0000
• F1 Score:    0.0000
• ROC-AUC:     1.0000
• CV Score:    nan
🏃 View run RandomForest_20250701_122900221200_388 at: http://localhost:5000/#/experiments/7/runs/6754b2244236426e9dd7c8197fe59529
🧪 View experiment at: http://localhost:5000/#/experiments/7


Registered model 'CreditRisk_GradientBoosting' already exists. Creating a new version of this model...
2025/07/01 12:29:19 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: CreditRisk_GradientBoosting, version 2
Created version '2' of model 'CreditRisk_GradientBoosting'.



GradientBoosting Evaluation Metrics:
• Accuracy:    0.9987
• Precision:   0.5000
• Recall:      1.0000
• F1 Score:    0.6667
• ROC-AUC:     0.9993
• CV Score:    nan
🏃 View run GradientBoosting_20250701_122913581239_475 at: http://localhost:5000/#/experiments/7/runs/2c9ebd829eb94a51b1afe0148e1a030a
🧪 View experiment at: http://localhost:5000/#/experiments/7


Registered model 'CreditRisk_LogisticRegression' already exists. Creating a new version of this model...
2025/07/01 12:29:24 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: CreditRisk_LogisticRegression, version 2
Created version '2' of model 'CreditRisk_LogisticRegression'.



LogisticRegression Evaluation Metrics:
• Accuracy:    0.9973
• Precision:   0.3333
• Recall:      1.0000
• F1 Score:    0.5000
• ROC-AUC:     1.0000
• CV Score:    nan
🏃 View run LogisticRegression_20250701_122919866094_738 at: http://localhost:5000/#/experiments/7/runs/39704e328845496480c2c7493889efc1
🧪 View experiment at: http://localhost:5000/#/experiments/7

=== Model Performance Comparison ===

RandomForest:
  ROC-AUC:   1.0000
  Accuracy:  0.9987
  Precision: 0.0000
  Recall:    0.0000
  F1:        0.0000

GradientBoosting:
  ROC-AUC:   0.9993
  Accuracy:  0.9987
  Precision: 0.5000
  Recall:    1.0000
  F1:        0.6667

LogisticRegression:
  ROC-AUC:   1.0000
  Accuracy:  0.9973
  Precision: 0.3333
  Recall:    1.0000
  F1:        0.5000

Best model overall: RandomForest (ROC-AUC: 1.0000)
🏃 View run Main_Experiment at: http://localhost:5000/#/experiments/7/runs/a76764eb20e94e3bb0c8c77448734598
🧪 View experiment at: http://localhost:5000/#/experiments/7
