# Survival Analysis: Kaplan–Meier & Cox PH
This notebook is a completed project package. Replace `dataset.csv` with your real dataset if available.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from lifelines import KaplanMeierFitter, CoxPHFitter
from lifelines.statistics import proportional_hazard_test
import warnings
warnings.filterwarnings('ignore')

df = pd.read_csv('dataset.csv')
df.head()

In [None]:
# EDA: summary and missingness
print(df.shape)
print(df.describe(include='all'))
print(df.isna().sum())


In [None]:
# Kaplan-Meier by contract_type
kmf = KaplanMeierFitter()
plt.figure(figsize=(8,6))
for grp, gdf in df.groupby('contract_type'):
    kmf.fit(gdf['tenure'], gdf['churn'], label=grp)
    kmf.plot_survival_function(ci_show=True)
plt.title('Kaplan-Meier by contract_type')
plt.xlabel('Time')
plt.ylabel('Survival probability')
plt.grid(True)
plt.show()


In [None]:
# Cox PH model - prepare and fit
model_df = df[['tenure','churn','monthly_charges','contract_type','internet_service','gender']].copy()
model_df = pd.get_dummies(model_df, drop_first=True)
model_df = model_df.rename(columns={'tenure':'duration','churn':'event'})
cph = CoxPHFitter()
cph.fit(model_df, duration_col='duration', event_col='event')
cph.print_summary()
