"""
Notebook 07: Rolling Cross-Validation (Leakage-Safe)

Objective:
- Perform rolling, time-aware validation
- Prevent temporal data leakage
- Validate whether churn prediction is feasible under strict constraints

Important:
Some splits may be skipped due to single-class labels.
This is expected and indicates correct leakage prevention.
"""


In [23]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    roc_auc_score,
    precision_score,
    recall_score,
    f1_score
)

pd.set_option("display.max_columns", None)
print("Libraries loaded successfully")


Libraries loaded successfully


In [24]:
df = pd.read_csv("../data/processed/customer_features.csv")

df["first_purchase"] = pd.to_datetime(df["first_purchase"])
df["last_purchase"] = pd.to_datetime(df["last_purchase"])

print("Dataset shape:", df.shape)
print("Overall churn rate:", round(df["churn"].mean(), 3))

df.head()


Dataset shape: (4312, 22)
Overall churn rate: 0.331


Unnamed: 0,customerid,first_purchase,last_purchase,frequency,total_transactions,monetary_value,avg_order_value,total_quantity,avg_quantity_per_txn,unique_products,unique_invoices,min_price,max_price,avg_price,price_std,country_count,recency_days,customer_tenure_days,days_since_first_purchase,days_since_last_purchase,avg_days_between_purchases,churn
0,12346.0,2009-12-14 08:34:00,2010-06-28 13:53:00,11,33,206.36,6.253333,70,2.121212,26,11,1.0,7.49,6.253333,1.682971,1,164,196,360,164,17.818182,1
1,12347.0,2010-10-31 14:20:00,2010-12-07 14:57:00,2,71,162.95,2.29507,828,11.661972,70,2,0.38,12.75,2.29507,1.869887,1,2,37,39,2,18.5,0
2,12348.0,2010-09-27 14:59:00,2010-09-27 14:59:00,1,20,14.39,0.7195,373,18.65,20,1,0.29,1.45,0.7195,0.431856,1,73,0,73,73,0.0,0
3,12349.0,2010-04-29 13:20:00,2010-10-28 08:23:00,3,102,875.34,8.581765,993,9.735294,90,3,0.42,250.0,8.581765,31.299379,1,42,181,224,42,60.333333,0
4,12351.0,2010-11-29 15:23:00,2010-11-29 15:23:00,1,21,49.46,2.355238,261,12.428571,21,1,0.42,12.75,2.355238,2.735753,1,10,0,10,10,0.0,0


In [25]:
leakage_cols = [
    "customerid",
    "first_purchase",
    "last_purchase",
    "recency_days",
    "days_since_last_purchase",
    "churn"
]

X_all = df.drop(columns=leakage_cols)
y_all = df["churn"]

print("Final feature count:", X_all.shape[1])


Final feature count: 16


In [26]:
quantiles = [0.5, 0.6, 0.7, 0.8]
results = []


In [27]:
for q in quantiles:
    split_date = df["last_purchase"].quantile(q)

    train_df = df[df["last_purchase"] <= split_date]
    test_df  = df[df["last_purchase"] > split_date]

    print(f"\nSplit quantile: {q}")
    print("Train churn distribution:")
    print(train_df["churn"].value_counts())
    print("Test churn distribution:")
    print(test_df["churn"].value_counts())

    # Skip invalid splits (single-class issue)
    if train_df["churn"].nunique() < 2 or test_df["churn"].nunique() < 2:
        print("⚠️ Skipping split due to single-class issue")
        continue

    X_train = train_df.drop(columns=leakage_cols)
    y_train = train_df["churn"]

    X_test  = test_df.drop(columns=leakage_cols)
    y_test  = test_df["churn"]

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled  = scaler.transform(X_test)

    model = LogisticRegression(
        max_iter=1000,
        class_weight="balanced",
        random_state=42
    )

    model.fit(X_train_scaled, y_train)

    y_prob = model.predict_proba(X_test_scaled)[:, 1]
    y_pred = model.predict(X_test_scaled)

    results.append({
        "split_quantile": q,
        "roc_auc": roc_auc_score(y_test, y_prob),
        "precision": precision_score(y_test, y_pred),
        "recall": recall_score(y_test, y_pred),
        "f1_score": f1_score(y_test, y_pred)
    })



Split quantile: 0.5
Train churn distribution:
churn
1    1427
0     729
Name: count, dtype: int64
Test churn distribution:
churn
0    2156
Name: count, dtype: int64
⚠️ Skipping split due to single-class issue

Split quantile: 0.6
Train churn distribution:
churn
1    1427
0    1160
Name: count, dtype: int64
Test churn distribution:
churn
0    1725
Name: count, dtype: int64
⚠️ Skipping split due to single-class issue

Split quantile: 0.7
Train churn distribution:
churn
0    1591
1    1427
Name: count, dtype: int64
Test churn distribution:
churn
0    1294
Name: count, dtype: int64
⚠️ Skipping split due to single-class issue

Split quantile: 0.8
Train churn distribution:
churn
0    2022
1    1427
Name: count, dtype: int64
Test churn distribution:
churn
0    863
Name: count, dtype: int64
⚠️ Skipping split due to single-class issue


In [28]:
results_df = pd.DataFrame(results)

print("Rolling Validation Results")
results_df


Rolling Validation Results


"""
Interpretation:

- All rolling splits were skipped due to single-class labels
- This confirms that earlier near-perfect model performance was caused by temporal data leakage
- Strict time-aware validation reveals that churn prediction is not feasible with this dataset under leakage-safe constraints
- No metrics are reported intentionally

This outcome is correct and demonstrates responsible model validation.
"""
