# 03 — Churn Modeling (Leakage‑safe)

In [5]:

import pandas as pd, duckdb, numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, average_precision_score, classification_report
from xgboost import XGBClassifier

con = duckdb.connect("../data/processed/warehouse.duckdb")
con.execute("SET schema='analytics';")
features = con.execute(open("../sql/churn_features.sql").read()).fetchdf()


y = features['churn_label'].astype(int).values
X = pd.get_dummies(features.drop(columns=['churn_label','last_order_date']), drop_first=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)
clf = XGBClassifier(n_estimators=400, max_depth=4, learning_rate=0.05, subsample=0.8, colsample_bytree=0.8, eval_metric='logloss', random_state=42)
clf.fit(X_train, y_train); proba = clf.predict_proba(X_test)[:,1]
print("ROC AUC:", roc_auc_score(y_test, proba)); print("PR AUC :", average_precision_score(y_test, proba))


ROC AUC: 0.6719427361629196
PR AUC : 0.743657961812564


In [6]:
con.close()