## Imports

In [None]:
import numpy as np
import pandas as pd

from lifelines import KaplanMeierFitter
from lifelines import CoxPHFitter
from lifelines.statistics import logrank_test

import matplotlib.pyplot as plt

import seaborn as sns

In [None]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

%matplotlib inline
plt.rcParams['figure.figsize'] = [10, 5]
plt.rc('xtick', labelsize=10) 
plt.rc('ytick', labelsize=10)

## Input

In [None]:
kmf_gen = KaplanMeierFitter()
kmf_ep = KaplanMeierFitter() 
kmf_nep = KaplanMeierFitter() 

In [None]:
data = pd.read_csv('ma_crc.csv')

In [None]:
data.columns

In [None]:
data = data.drop(['Time90'], axis=1)
data = data.drop(['Time180'], axis=1)
data = data.drop(['DeathStatus90'], axis=1)
data = data.drop(['DeathStatus180'], axis=1)

In [None]:
data = data.rename(columns={'Time365' : 'Time', 'DeathStatus365' : 'DeathStatus'})

## EDA

## Data Prep

In [None]:
ep = data.query("EmergencyStatus == 1")
nep = data.query("EmergencyStatus == 0")

In [None]:
kmf.fit(durations = data["Time"], event_observed = data["DeathStatus"])

In [None]:
kmf.plot()
plt.title("Cancer Mortality", fontsize=20)
plt.xlabel ("Days", fontsize=12)
plt.ylabel ("Survival", fontsize=12)
plt.figure(figsize=(8, 4), dpi=80)
plt.show()

## Analysis

In [None]:
kmf_ep.fit(durations = ep['Time'], event_observed = ep['DeathStatus'], label = 'Emergency Presentation') 
kmf_nep.fit(durations = nep['Time'], event_observed = nep['DeathStatus'], label = 'Routine Presentation')

In [None]:
fig, ax = plt.subplots(figsize=(10,5))

kmf_ep.plot(color='r')
kmf_nep.plot(color='b')

plt.title("1-Year Mortality of Lung Cancer by Route of Presentation", fontsize=20)
plt.xlabel ("Days from diagnosis", fontsize=12)
plt.ylabel ("Survival", fontsize=12)

plt.xlim([0,365])
plt.show()

In [None]:
data = pd.read_csv('ma_crc.csv')
data = data.drop(['Time180'], axis=1)
data = data.drop(['Time90'], axis=1)
data = data.drop(['DeathStatus180'], axis=1)
data = data.drop(['DeathStatus90'], axis=1)
data = data.rename(columns={'Time365' : 'Time', 'DeathStatus365' : 'DeathStatus'})

In [None]:
# Log-Rank Test

# Define variables :
T=ep['Time']
E=ep['DeathStatus']
T1=nep['Time']
E1=nep['DeathStatus']



results=logrank_test(T,T1,event_observed_A=E, event_observed_B=E1)
results.print_summary()

In [None]:
cph = CoxPHFitter()
cph.fit(data, duration_col='Time', event_col='DeathStatus')

In [None]:
cph.plot()

In [None]:
mm2_crc = pd.read_csv('MM2 CRC.csv', engine='python')

In [None]:
mm2_lc = pd.read_csv('MM2 LC.csv', engine='python')

In [None]:
mm3_crc = pd.read_csv('MM3 CRC.csv', engine='python')

In [None]:
mm3_lc = pd.read_csv('MM3 LC.csv', engine='python')

#### MM2 CRC

In [None]:
a = mm2_crc[['StudyID', 'UnplanVal', 'DxVal', 'TypeOfEP', 'DxMOD','DxDelayRFFUReq','DxDelayRFFUComp']]

In [None]:
a = a[(a['UnplanVal'] == 'Yes') & (a['DxVal'] == 'Yes')]

In [None]:
a[['StudyID', 'TypeOfEP']].groupby('TypeOfEP').count()

In [None]:
a[['StudyID', 'DxMOD']].groupby('DxMOD').count()

In [None]:
b = a[a['DxMOD'] == 'Yes']

In [None]:
b[['StudyID','DxDelayRFFUReq']].groupby('DxDelayRFFUReq').count()

In [None]:
b[['StudyID','DxDelayRFFUComp']].groupby('DxDelayRFFUComp').count()

#### MM2 LC

In [None]:
a = mm2_lc[['StudyID', 'UnplanVal', 'DxVal', 'TypeOfEP', 'DxMOD','DxDelayRFFUReq','DxDelayRFFUComp']]

In [None]:
a = a[(a['UnplanVal'] == 'Yes') & (a['DxVal'] == 'Yes')]

In [None]:
a[['StudyID', 'TypeOfEP']].groupby('TypeOfEP').count()

In [None]:
a[['StudyID', 'DxMOD']].groupby('DxMOD').count()

In [None]:
b = a[a['DxMOD'] == 'Yes']

In [None]:
b[['StudyID','DxDelayRFFUReq']].groupby('DxDelayRFFUReq').count()

In [None]:
b[['StudyID','DxDelayRFFUComp']].groupby('DxDelayRFFUComp').count()

In [None]:
b

In [None]:
kmf_w = KaplanMeierFitter() 
kmf_b = KaplanMeierFitter() 

In [None]:
w = data.query("PatientRace == 1")
b = data.query("PatientRace == 2")

In [None]:
kmf_w.fit(durations = w['Time'], event_observed = w['DeathStatus'], label = 'White') 
kmf_b.fit(durations = b['Time'], event_observed = b['DeathStatus'], label = 'Black')

In [None]:
fig, ax = plt.subplots(figsize=(10,5))

kmf_w.plot(color='r')
kmf_b.plot(color='b')

plt.title("1-Year Mortality of Lung Cancer by Route of Presentation", fontsize=20)
plt.xlabel ("Days from diagnosis", fontsize=12)
plt.ylabel ("Survival", fontsize=12)

plt.xlim([0,365])
plt.show()

In [None]:
data[(data['EmergencyStatus']==1) & (data['PatientRace']==1)].shape[0]

In [None]:
data[(data['PatientRace']==1)].shape[0]

In [None]:
data[(data['EmergencyStatus']==1) & (data['PatientRace']==2)].shape[0]

In [None]:
data[(data['PatientRace']==2)].shape[0]