# 03_Cox_Survival_Analysis
Time-to-default modeling using Cox Proportional Hazards.

In [None]:
# Common imports for the project
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style='whitegrid')
%matplotlib inline


In [None]:
from lifelines import CoxPHFitter
from src.preprocessing import basic_cleaning
from src.feature_engineering import create_features

In [None]:
data_path = '../data/merged_data.csv'
df = pd.read_csv(data_path)
df = basic_cleaning(df)
df = create_features(df)
print('Columns:', df.columns[:50])

In [None]:
if 'time_to_default' not in df.columns:
    import numpy as np
    df['time_to_default'] = np.where(df['default']==1, np.random.randint(1,365,size=len(df)), np.random.randint(366,2000,size=len(df)))
    print('Created synthetic time_to_default column for demo purposes.')

duration_col = 'time_to_default'
event_col = 'default'
covariates = [c for c in df.select_dtypes(include=[np.number]).columns if c not in [duration_col, event_col, 'id', 'index', 'source']]
len(covariates), covariates[:10]

In [None]:
cph = CoxPHFitter()
df_cox = df[[duration_col, event_col] + covariates].dropna().sample(n=min(20000, len(df)), random_state=42)
cph.fit(df_cox, duration_col=duration_col, event_col=event_col)
cph.print_summary()

In [None]:
import joblib
joblib.dump(cph, '../models/cox_ph.joblib')
print('Saved CoxPH model to ../models/cox_ph.joblib')