# Flatiron Health mCRC: Survival metrics for strict elgibility criteria
**Background: Calculate survival metrics for emulated trials involving patients meeting strict elgibliity criteria. Hazard ratio for the full cohort is calculated from a Cox-IPTW model. Restricted mean survival time and median overall survival are calculated for phenotypes using an IPTW-adjusted KM curve.** 

## Part 1: Identify patients with exclusion criteria

In [1]:
import numpy as np
import pandas as pd

In [2]:
# Function that returns number of rows and count of unique PatientIDs for a dataframe. 
def row_ID(dataframe):
    row = dataframe.shape[0]
    ID = dataframe['PatientID'].nunique()
    return row, ID

In [3]:
train = pd.read_csv('train_full.csv')
row_ID(train)

(27452, 27452)

In [4]:
test = pd.read_csv('test_full.csv')
row_ID(test)

(6863, 6863)

In [5]:
df = pd.concat([train, test], ignore_index = True)
row_ID(df)

(34315, 34315)

### 1. Relevant comorbidities in the year preceding metastatic diagnosis 
* MI
* Severe psychiatric history 
* Drug use disorder 
* Inflammatory bowel disease
* Nephrotic syndrome 

In [6]:
diagnosis = pd.read_csv('Diagnosis.csv')

In [7]:
diagnosis = diagnosis[diagnosis['PatientID'].isin(df['PatientID'])]       

In [8]:
diagnosis.loc[:, 'DiagnosisDate'] = pd.to_datetime(diagnosis['DiagnosisDate'])

In [9]:
enhanced_met = pd.read_csv('Enhanced_MetastaticCRC.csv')

In [10]:
enhanced_met.loc[:, 'MetDiagnosisDate'] = pd.to_datetime(enhanced_met['MetDiagnosisDate'])

In [11]:
row_ID(diagnosis)

(1088706, 34315)

In [12]:
diagnosis = pd.merge(diagnosis, enhanced_met[['PatientID', 'MetDiagnosisDate']], on = 'PatientID', how = 'left')

In [13]:
row_ID(diagnosis)

(1088706, 34315)

In [14]:
diagnosis.loc[:, 'date_diff'] = (diagnosis['DiagnosisDate'] - diagnosis['MetDiagnosisDate']).dt.days

In [15]:
diagnosis.loc[:, 'diagnosis_code'] = diagnosis['DiagnosisCode'].replace('\.', '', regex = True)

In [16]:
# ICD-9 dataframe with unique codes for each patient. 
diagnosis_9 = (
    diagnosis
    .query('date_diff <= 0 and date_diff > -365')
    .query('DiagnosisCodeSystem == "ICD-9-CM"')
    .drop_duplicates(subset = (['PatientID', 'DiagnosisCode']), keep = 'first')
    .filter(items = ['PatientID', 'DiagnosisCode', 'diagnosis_code'])
)

In [17]:
comorb_9_IDs = (
    diagnosis_9[diagnosis_9['diagnosis_code'].str.match('410|'
                                                        '29[5789]|'
                                                        '30[34]|'
                                                        '55[56]|'
                                                        '581|'
                                                        '4010|'
                                                        '286|'
                                                        '444|'
                                                        '4151|'
                                                        '451[1289]|'
                                                        '431')].PatientID.unique())

In [18]:
len(comorb_9_IDs)

190

In [19]:
# ICD-10 dataframe with unique codes for each patient. 
diagnosis_10 = (
    diagnosis
    .query('date_diff <= 0 and date_diff > -365')
    .query('DiagnosisCodeSystem == "ICD-10-CM"')
    .drop_duplicates(subset = (['PatientID', 'DiagnosisCode']), keep = 'first')
    .filter(items = ['PatientID', 'DiagnosisCode', 'diagnosis_code'])
)

In [20]:
comorb_10_IDs = (
    diagnosis_10[diagnosis_10['diagnosis_code'].str.match('I21|'
                                                          'F2[0234589]|'
                                                          'F1[0145]|'
                                                          'K5[01]|'
                                                          'N04|'
                                                          'I16|'
                                                          'D6[567]|'
                                                          'I74|'
                                                          'I82|'
                                                          'I6[01]')].PatientID.unique())

In [21]:
len(comorb_10_IDs)

418

In [22]:
comorb_IDs = np.unique(np.concatenate([comorb_9_IDs, comorb_10_IDs]))

In [23]:
len(comorb_IDs)

603

### 2. CNS metastasis at start of treatment 

In [24]:
line_therapy = pd.read_csv('LineOfTherapy.csv')

In [25]:
line_therapy = line_therapy[line_therapy['PatientID'].isin(df['PatientID'])]       

In [26]:
line_therapy.loc[:, 'StartDate'] = pd.to_datetime(line_therapy['StartDate'])

In [27]:
therapy_fl = line_therapy.query('LineNumber == 1').query('IsMaintenanceTherapy == False')[['PatientID', 'StartDate']]

In [28]:
row_ID(diagnosis)

(1088706, 34315)

In [29]:
cns_fl = pd.merge(diagnosis, therapy_fl, on = 'PatientID', how = 'left')

In [30]:
row_ID(diagnosis)

(1088706, 34315)

In [31]:
cns_fl.loc[:, 'fl_date_diff'] = (cns_fl['DiagnosisDate'] - cns_fl['StartDate']).dt.days

In [32]:
# ICD-9 dataframe with unique codes for each patient. 
cns_fl_9 = (
    cns_fl
    .query('fl_date_diff <= 0 and fl_date_diff > -90')
    .query('DiagnosisCodeSystem == "ICD-9-CM"')
    .drop_duplicates(subset = (['PatientID', 'DiagnosisCode']), keep = 'first')
    .filter(items = ['PatientID', 'DiagnosisCode', 'diagnosis_code'])
)

In [33]:
cns_fl_9_ids = (
    cns_fl_9[cns_fl_9['diagnosis_code'].str.match('198[34]')].PatientID.unique()
)

In [34]:
len(cns_fl_9_ids)

34

In [35]:
# ICD-9 dataframe with unique codes for each patient. 
cns_fl_10 = (
    cns_fl
    .query('fl_date_diff <= 0 and fl_date_diff > -90')
    .query('DiagnosisCodeSystem == "ICD-10-CM"')
    .drop_duplicates(subset = (['PatientID', 'DiagnosisCode']), keep = 'first')
    .filter(items = ['PatientID', 'DiagnosisCode', 'diagnosis_code'])
)

In [36]:
cns_fl_10_ids = (
    cns_fl_10[cns_fl_10['diagnosis_code'].str.match('C79[34]')].PatientID.unique()
)

In [37]:
len(cns_fl_10_ids)

134

In [38]:
cns_fl_IDs = np.unique(np.concatenate([cns_fl_9_ids, cns_fl_10_ids]))

In [39]:
len(cns_fl_IDs)

165

### 3. ECOG 3 or 4 at start of treatment 

In [40]:
base_ecog = pd.read_csv('BaselineECOG.csv')

In [41]:
base_ecog = base_ecog[base_ecog['PatientID'].isin(df['PatientID'])]       

In [42]:
ecog_fl_IDs = (
    base_ecog
    .query('LineNumber == 1')
    .query('ECOGValue == "3" or ECOGValue == "4"')
    .PatientID.unique())

In [43]:
len(ecog_fl_IDs)

599

### 4. Abnormal organ function at start of treatment 
* Hemoglobin >9
* Creatinine <2
* Total bilirubin <3

In [44]:
lab = pd.read_csv('Lab.csv')

In [45]:
lab = lab[lab['PatientID'].isin(df['PatientID'])]

In [46]:
lab.loc[:, 'ResultDate'] = pd.to_datetime(lab['ResultDate']) 

In [47]:
row_ID(lab)

(30620401, 33014)

In [48]:
lab = pd.merge(lab, therapy_fl[['PatientID', 'StartDate']], on = 'PatientID', how = 'left')

In [49]:
row_ID(lab)

(31709973, 33014)

In [50]:
# Select rows with clinically relevant labs.
lab_core = (
    lab[
    (lab['LOINC'] == "2160-0") |
    (lab['LOINC'] == "38483-4") | 
    (lab['LOINC'] == "718-7") |
    (lab['LOINC'] == "20509-6") |
    (lab['LOINC'] == "42719-5") |
    (lab['LOINC'] == "1975-2")]
    .filter(items = ['PatientID', 
                     'ResultDate', 
                     'LOINC', 
                     'LabComponent', 
                     'TestUnits', 
                     'TestUnitsCleaned', 
                     'TestResult', 
                     'TestResultCleaned', 
                     'StartDate'])
)

In [51]:
conditions = [
    ((lab_core['LOINC'] == '2160-0') | (lab_core['LOINC'] == '38483-4')),
    ((lab_core['LOINC'] == '718-7') | (lab_core['LOINC'] == '20509-6')),
    ((lab_core['LOINC'] == '42719-5') | (lab_core['LOINC'] == '1975-2'))]

choices = ['creatinine', 
           'hemoglobin', 
           'total_bilirubin']

lab_core.loc[:, 'lab_name'] = np.select(conditions, choices)

In [52]:
row_ID(lab_core)

(3022972, 32769)

In [53]:
conditions = [
    (lab_core['lab_name'] == 'hemoglobin') & (lab_core['TestUnits'] == 'g/uL')]

choices = [lab_core['TestResultCleaned'] / 100000]

lab_core.loc[:, 'test_result_cleaned'] = np.select(conditions, choices, default = lab_core['TestResultCleaned'])

In [54]:
lab_f = (
    lab_core
    .assign(lab_date_diff = (lab_core['ResultDate'] - lab_core['StartDate']).dt.days)
    .query('lab_date_diff <= 0 and lab_date_diff > -90')
    .sort_values(by = ['PatientID', 'lab_name', 'lab_date_diff'], ascending = [True, True, False])
    .drop_duplicates(subset = ['PatientID', 'lab_name'], keep = 'first' )
)

In [55]:
# Select lab closest to date of advanced diagnosis and pivot to a wide table. 
lab_wide = (
    lab_f
    .pivot(index = 'PatientID', columns = 'lab_name', values = 'test_result_cleaned')
    .reset_index())

lab_wide.columns.name = None

In [56]:
lab_wide.sample(3)

Unnamed: 0,PatientID,creatinine,hemoglobin,total_bilirubin
19192,FC908DAC89074,,12.1,
910,F09A18DBB9738,0.9,12.3,0.5
23355,FF4245588A6A1,,9.4,


In [57]:
ab_organ_IDs = lab_wide.query('creatinine > 2 or hemoglobin < 9 or total_bilirubin > 3').PatientID

In [58]:
len(ab_organ_IDs)

2363

In [59]:
ab_organ_IDs.to_csv('ab_organ_IDs.csv', index = False)

In [60]:
del base_ecog
del cns_fl_10
del cns_fl_9
del diagnosis
del diagnosis_10
del diagnosis_9
del enhanced_met
del lab
del lab_core
del lab_f
del lab_wide 
del line_therapy
del therapy_fl

## Part 2: In-silico trials 

### Import packages and create necessary functions

In [61]:
import numpy as np
import pandas as pd

from scipy import stats

from sksurv.nonparametric import kaplan_meier_estimator
from survive import KaplanMeier, SurvivalData

from lifelines import KaplanMeierFitter, CoxPHFitter
from lifelines.plotting import add_at_risk_counts
from lifelines.utils import median_survival_times, restricted_mean_survival_time
from lifelines.statistics import logrank_test

from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer 
from sklearn.linear_model import LogisticRegression
from sklearn.utils import resample

import warnings

In [62]:
# Function that returns number of rows and count of unique PatientIDs for a dataframe. 
def row_ID(dataframe):
    row = dataframe.shape[0]
    ID = dataframe['PatientID'].nunique()
    return row, ID

In [63]:
# Find index for value closest to input value. 
def find_nearest(array, value):
    array = np.asarray(array)
    idx = (np.abs(array - value)).argmin()
    return array[idx]

In [64]:
# Calculates median overeall suvival for risk groups. 
def mos(low, med, high, comp):
    low_os = low.median_survival_time_
    med_os = med.median_survival_time_
    high_os = high.median_survival_time_
    comp_os = comp.median_survival_time_
    mos = [low_os, med_os, high_os, comp_os]
    return (mos)

In [65]:
def rmst_mos_95ci(df, num_samples, drug, event, items_list, numerical_features, rmst_time):
    
    """
    Estimate the 95% confidence interval for RMST and mOS using bootstrap resampling.

    Parameters:
    - df: DataFrame containing survival data
    - num_samples: Number of bootstrap samples
    - drug: Treatment indicator variable
    - event: Event type ('death' or 'progression')
    - items_list: Feature list for IPTW 
    - numerical_features: List of numerical features
    - rmst_time: Time to calculate RMST 

    Returns:
    - mos_A_95: mOS 95% CI for treatment
    - mos_B_95: mOS 95% CI for control
    - rmst_A_95: RMST 95% CI for treatment
    - rmst_B_95: RMST 95% CI for control
    - difference_rmst_95: RMST 95% CI for difference between treatment and control 
    """
    
    np.random.seed(42)
    mos_A = []
    mos_B = []
    rmst_A_list = []
    rmst_B_list = []
    differences_rmst = []
    
    # Define variables based on the event type
    if event == 'death':
        time_column = 'timerisk_treatment'
        status_column = 'death_status'
        
    else:
        time_column = 'time_prog_treatment'
        status_column = 'pfs_status'
        
    # Set up preprocessor for logistical regression which will be for IPTW  
    numerical_transformer = Pipeline(steps = [
        ('imputer', SimpleImputer(strategy = 'median')),
        ('std_scaler', StandardScaler())])
        
    categorical_transformer = OneHotEncoder(handle_unknown = 'ignore')
    categorical_features = list(df.select_dtypes(include = ['category']).columns)
        
    preprocessor = ColumnTransformer(
        transformers = [
            ('num', numerical_transformer, numerical_features),
            ('cat', categorical_transformer, categorical_features)],
        remainder = 'passthrough')
    
    # Boostrap HR 
    for _ in range(num_samples):
        
        # Resample data with replacement
        resampled_df = resample(df).drop(columns = ['ps', 'weight'])
        
        # Calculated IPTW for the resampled group 
        df_x = preprocessor.fit_transform(resampled_df.filter(items = items_list))
                                           
        df_lr = LogisticRegression(max_iter = 1000)
        df_lr.fit(df_x, resampled_df[drug])
        warnings.filterwarnings("ignore")
                                           
        pred = df_lr.predict_proba(df_x)        
        resampled_df['ps'] = pred[:, 1]                          
        resampled_df['weight'] = (
                np.where(resampled_df[drug] == 1, 1/resampled_df['ps'], 1/(1 - resampled_df['ps'])))
    
        # mOS from IPTW-KM
        kmf_A = KaplanMeierFitter()
        kmf_A.fit(resampled_df.query(f'{drug} == 1')[time_column]/30,
                  resampled_df.query(f'{drug} == 1')[status_column], 
                  weights = resampled_df.query(f'{drug} == 1')['weight'])

        kmf_B = KaplanMeierFitter()
        kmf_B.fit(resampled_df.query(f'{drug} == 0')[time_column]/30,
                  resampled_df.query(f'{drug} == 0')[status_column], 
                  weights = resampled_df.query(f'{drug} == 0')['weight'])
    
        mos_A.append(kmf_A.median_survival_time_)
        mos_B.append(kmf_B.median_survival_time_)
        
        # RMST from IPTW-KM
        rmst_A = restricted_mean_survival_time(kmf_A, rmst_time)
        rmst_B = restricted_mean_survival_time(kmf_B, rmst_time)
        
        rmst_A_list.append(rmst_A)
        rmst_B_list.append(rmst_B)
        differences_rmst.append(rmst_A - rmst_B)

    # Calculate the 95% confidence interval
    results = pd.Series({
    'mos_A_95': np.percentile(mos_A, [2.5, 97.5]),
    'mos_B_95': np.percentile(mos_B, [2.5, 97.5]),
    'rmst_A_95': np.percentile(rmst_A_list, [2.5, 97.5]),
    'rmst_B_95': np.percentile(rmst_B_list, [2.5, 97.5]),
    'difference_rmst_95': np.percentile(differences_rmst, [2.5, 97.5])
    })
    
    return results

In [66]:
cutoff = pd.read_csv('risk_cutoff_colorectal.csv', index_col = 0)

### FIRE-3: FOLFIRI plus cetuximab vs. FOLFIRI plus bevacizumab in KRAS wild-type 

**INCLUSION**
* Untreated metastatic colorectal cancer
* First-line recepit of FOLFIRI/FOLFOX plus cetuximab or FOLFIRI/FOLFOX plus bevacizumab 
* KRAS wild-type 
* No relevant comorbidities in the year preceding metastatic diagnosis 
* No CNS metastasis at time of treatment
* No ECOG 3 or 4 at time of treatment 
* Adequate organ function at time of treatment 

#### FOLFIRI plus Cetuximab

In [67]:
df_full = pd.read_csv('df_risk_crude.csv', index_col = 'PatientID', dtype = {'death_status': bool})
df_full.index.nunique()

34315

In [68]:
line_therapy = pd.read_csv('LineOfTherapy.csv')

In [69]:
line_therapy_fl = (
    line_therapy[line_therapy['PatientID'].isin(df_full.index)]
    .query('LineNumber == 1')
    .query('IsMaintenanceTherapy == False'))

In [70]:
line_therapy_fl[line_therapy_fl['LineName'].str.contains('FOLFOX|Cetuximab')].LineName.value_counts().head(10)

FOLFOX,Bevacizumab            6338
FOLFOX                        4140
FOLFOX,Bevacizumab-Awwb       1356
FOLFOX,Bevacizumab-Bvzr        462
FOLFOX,Panitumumab             456
FOLFIRI,Cetuximab              364
FOLFOXIRI                      253
FOLFOX,Cetuximab               245
FOLFOXIRI,Bevacizumab          228
FOLFOXIRI,Bevacizumab-Awwb     140
Name: LineName, dtype: int64

In [71]:
line_therapy_fl[line_therapy_fl['LineName'].str.contains('FOLFIRI|Cetuximab')].LineName.value_counts().head(10)

FOLFIRI,Bevacizumab         2240
FOLFIRI                      914
FOLFIRI,Bevacizumab-Awwb     557
FOLFIRI,Cetuximab            364
FOLFIRI,Panitumumab          261
FOLFOX,Cetuximab             245
FOLFIRI,Bevacizumab-Bvzr     165
Irinotecan,Cetuximab          90
Cetuximab                     70
FOLFIRI,Ziv-Aflibercept       56
Name: LineName, dtype: int64

In [72]:
fxi_cet = (
    line_therapy_fl
    .query('LineName == "FOLFIRI,Cetuximab" or LineName == "FOLFOX,Cetuximab"')
    [['PatientID', 'StartDate']]
)

In [73]:
fxi_cet.loc[:,'fxi_cet'] = 1

In [74]:
row_ID(fxi_cet)

(609, 568)

In [75]:
fxi_cet['StartDate'] = pd.to_datetime(fxi_cet['StartDate'])

In [76]:
fxi_cet = (
    fxi_cet
    .sort_values(['PatientID', 'StartDate'], ascending = [True, True])
    .drop_duplicates(subset = 'PatientID', keep = 'first')
)

In [77]:
row_ID(fxi_cet)

(568, 568)

#### FOLFIRI plus Bevacizumab

In [78]:
line_therapy_fl[line_therapy_fl['LineName'].str.contains('FOLFIRI|Bevacizumab')].LineName.value_counts().head(10)

FOLFOX,Bevacizumab                     6338
FOLFIRI,Bevacizumab                    2240
FOLFOX,Bevacizumab-Awwb                1356
FOLFIRI                                 914
CAPEOX,Bevacizumab                      764
FOLFIRI,Bevacizumab-Awwb                557
FOLFOX,Bevacizumab-Bvzr                 462
Fluorouracil,Leucovorin,Bevacizumab     423
Capecitabine,Bevacizumab                405
FOLFIRI,Cetuximab                       364
Name: LineName, dtype: int64

In [79]:
line_therapy_fl[line_therapy_fl['LineName'].str.contains('FOLFOX|Bevacizumab')].LineName.value_counts().head(10)

FOLFOX,Bevacizumab                     6338
FOLFOX                                 4140
FOLFIRI,Bevacizumab                    2240
FOLFOX,Bevacizumab-Awwb                1356
CAPEOX,Bevacizumab                      764
FOLFIRI,Bevacizumab-Awwb                557
FOLFOX,Bevacizumab-Bvzr                 462
FOLFOX,Panitumumab                      456
Fluorouracil,Leucovorin,Bevacizumab     423
Capecitabine,Bevacizumab                405
Name: LineName, dtype: int64

In [80]:
fxi_bev_comb = [
    'FOLFIRI,Bevacizumab',
    'FOLFIRI,Bevacizumab-Awwb',
    'FOLFIRI,Bevacizumab-Bvzr',
    'FOLFOX,Bevacizumab',
    'FOLFOX,Bevacizumab-Awwb',
    'FOLFOX,Bevacizumab-Bvzr'
]

fxi_bev = (
    line_therapy_fl
    .query('LineName == @fxi_bev_comb')
    [['PatientID', 'StartDate']]
)

In [81]:
fxi_bev.loc[:,'fxi_cet'] = 0

In [82]:
row_ID(fxi_bev)

(11118, 10785)

In [83]:
fxi_bev['StartDate'] = pd.to_datetime(fxi_bev['StartDate'])

In [84]:
fxi_bev = (
    fxi_bev
    .sort_values(['PatientID', 'StartDate'], ascending = [True, True])
    .drop_duplicates(subset = 'PatientID', keep = 'first')
)

In [85]:
row_ID(fxi_bev)

(10785, 10785)

In [86]:
fire = pd.concat([fxi_cet, fxi_bev])

In [87]:
row_ID(fire)

(11353, 11353)

In [88]:
fire = pd.merge(fire, df_full, on = 'PatientID', how = 'left')

In [89]:
row_ID(fire)

(11353, 11353)

#### KRAS wild type 

In [90]:
biomarkers = pd.read_csv('Enhanced_MetCRCBiomarkers.csv')

In [91]:
biomarkers = biomarkers[biomarkers['PatientID'].isin(fire['PatientID'])]

In [92]:
row_ID(biomarkers)

(46879, 10576)

In [93]:
biomarkers = pd.merge(biomarkers, fire[['PatientID', 'StartDate']], on = 'PatientID', how = 'left')

In [94]:
row_ID(biomarkers)

(46879, 10576)

In [95]:
biomarkers['ResultDate'] = pd.to_datetime(biomarkers['ResultDate'])

In [96]:
biomarkers['SpecimenReceivedDate'] = pd.to_datetime(biomarkers['SpecimenReceivedDate'])

In [97]:
biomarkers.loc[:, 'result_date'] = (
    np.where(biomarkers['ResultDate'].isna(), biomarkers['SpecimenReceivedDate'], biomarkers['ResultDate'])
)

In [98]:
biomarkers.loc[:, 'date_diff'] = (biomarkers['result_date'] - biomarkers['StartDate']).dt.days

In [99]:
kras = (
    biomarkers
    .query('BiomarkerName == "KRAS"')
    .query('date_diff <=30')
    .query('BiomarkerStatus == "Mutation negative"')
    [['PatientID', 'BiomarkerStatus']]
    .rename(columns = {'BiomarkerStatus': 'kras_n'})
    .drop_duplicates(subset = 'PatientID', keep = 'first')
)

In [100]:
row_ID(kras)

(3645, 3645)

In [101]:
fire = pd.merge(fire, kras, on  = 'PatientID', how = 'left')

In [102]:
row_ID(fire)

(11353, 11353)

#### Time from treatment to death or censor 

In [103]:
mortality_tr = pd.read_csv('mortality_cleaned_tr.csv')

In [104]:
mortality_te = pd.read_csv('mortality_cleaned_te.csv')

In [105]:
mortality_tr = mortality_tr[['PatientID', 'death_date', 'last_activity']]

In [106]:
mortality_te = mortality_te[['PatientID', 'death_date', 'last_activity']]

In [107]:
mortality = pd.concat([mortality_tr, mortality_te], ignore_index = True)
print(len(mortality), mortality.PatientID.is_unique)

34315 True


In [108]:
mortality.loc[:, 'last_activity'] = pd.to_datetime(mortality['last_activity'])

In [109]:
mortality.loc[:, 'death_date'] = pd.to_datetime(mortality['death_date'])

In [110]:
len(mortality)

34315

In [111]:
fire = pd.merge(fire, mortality, on = 'PatientID', how = 'left')

In [112]:
len(fire)

11353

In [113]:
conditions = [
    (fire['death_status'] == 1),
    (fire['death_status'] == 0)]

choices = [
    (fire['death_date'] - fire['StartDate']).dt.days,
    (fire['last_activity'] - fire['StartDate']).dt.days]

fire.loc[:, 'timerisk_treatment'] = np.select(conditions, choices)

In [114]:
fire = fire.query('timerisk_treatment >= 0')

In [115]:
row_ID(fire)

(11343, 11343)

#### Patient count 

In [116]:
fire = fire.query('kras_n == "Mutation negative"')

In [117]:
row_ID(fire)

(3641, 3641)

In [118]:
# Exclude those with relevant comorbidities in the year preceding metastatic diagnosis 
fire = fire[~fire['PatientID'].isin(comorb_IDs)]

In [119]:
# Exlcude those with CNS metastasis at time of treatment 
fire = fire[~fire['PatientID'].isin(cns_fl_IDs)]

In [120]:
# Exclude those with ECOG 3 or 4 at time of treatment 
fire = fire[~fire['PatientID'].isin(ecog_fl_IDs)]

In [121]:
# Exclude those with abnormal organ function at time of treatment 
fire = fire[~fire['PatientID'].isin(ab_organ_IDs)]

In [122]:
row_ID(fire)

(3263, 3263)

In [123]:
low_cutoff_fire = cutoff.loc['fire3'].low

In [124]:
high_cutoff_fire = cutoff.loc['fire3'].high

In [125]:
print('FOLFIRI + Cetuximab total:',  fire.query('fxi_cet == 1').shape[0])
print('High risk:', fire.query('fxi_cet == 1').query('risk_score >= @high_cutoff_fire').shape[0])
print('Med risk:', fire.query('fxi_cet == 1').query('risk_score < @high_cutoff_fire and risk_score > @low_cutoff_fire').shape[0])
print('Low risk:', fire.query('fxi_cet == 1').query('risk_score <= @low_cutoff_fire').shape[0])

FOLFIRI + Cetuximab total: 450
High risk: 112
Med risk: 173
Low risk: 165


In [126]:
print('FOLFIRI + Bevacizumab:',  fire.query('fxi_cet == 0').shape[0])
print('High risk:', fire.query('fxi_cet == 0').query('risk_score >= @high_cutoff_fire').shape[0])
print('Med risk:', fire.query('fxi_cet == 0').query('risk_score < @high_cutoff_fire and risk_score > @low_cutoff_fire').shape[0])
print('Low risk:', fire.query('fxi_cet == 0').query('risk_score <= @low_cutoff_fire').shape[0])

FOLFIRI + Bevacizumab: 2813
High risk: 860
Med risk: 960
Low risk: 993


#### Survival curves with covariate balancing 

In [127]:
fire = fire.set_index('PatientID')

In [128]:
fire['met_cat'] = pd.cut(fire['met_year'],
                         bins = [2010, 2015, float('inf')],
                         labels = ['11-15', '16-22'])

In [129]:
conditions = [
    ((fire['ecog_diagnosis'] == "1.0") | (fire['ecog_diagnosis'] == "0.0")),  
    ((fire['ecog_diagnosis'] == "2.0") | (fire['ecog_diagnosis'] == "3.0"))
]

choices = ['lt_2', 'gte_2']

fire['ecog_2'] = np.select(conditions, choices, default = 'unknown')

In [130]:
fire_iptw = fire.filter(items = ['death_status',
                                 'timerisk_treatment',
                                 'fxi_cet',
                                 'age',
                                 'gender',
                                 'race',
                                 'p_type',
                                 'crc_site',
                                 'met_cat',
                                 'delta_met_diagnosis',
                                 'commercial',
                                 'medicare',
                                 'medicaid',
                                 'ecog_2', 
                                 'ses',
                                 'albumin_diag',
                                 'weight_pct_change',
                                 'risk_score'])

In [131]:
fire_iptw.dtypes

death_status               bool
timerisk_treatment      float64
fxi_cet                   int64
age                       int64
gender                   object
race                     object
p_type                   object
crc_site                 object
met_cat                category
delta_met_diagnosis       int64
commercial              float64
medicare                float64
medicaid                float64
ecog_2                   object
ses                     float64
albumin_diag            float64
weight_pct_change       float64
risk_score              float64
dtype: object

In [132]:
to_be_categorical = list(fire_iptw.select_dtypes(include = ['object']).columns)

In [133]:
to_be_categorical

['gender', 'race', 'p_type', 'crc_site', 'ecog_2']

In [134]:
to_be_categorical.append('met_cat')

In [135]:
to_be_categorical.append('ses')

In [136]:
# Convert variables in list to categorical.
for x in list(to_be_categorical):
    fire_iptw[x] = fire_iptw[x].astype('category')

In [137]:
# List of numeric variables, excluding binary variables. 
numerical_features = ['age', 'delta_met_diagnosis', 'albumin_diag', 'weight_pct_change', 'risk_score']

# Transformer will first calculate column median and impute, and then apply a standard scaler. 
numerical_transformer = Pipeline(steps = [
    ('imputer', SimpleImputer(strategy = 'median')),
    ('std_scaler', StandardScaler())])

In [138]:
# List of categorical features.
categorical_features = list(fire_iptw.select_dtypes(include = ['category']).columns)

# One-hot-encode categorical features.
categorical_transformer = OneHotEncoder(handle_unknown = 'ignore')

In [139]:
preprocessor = ColumnTransformer(
    transformers = [
        ('num', numerical_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)],
    remainder = 'passthrough')

In [140]:
fire_iptw_low = (
    fire_iptw
    .query('risk_score <= @low_cutoff_fire'))

fire_iptw_med = (
    fire_iptw
    .query('risk_score < @high_cutoff_fire and risk_score > @low_cutoff_fire'))

fire_iptw_high = (
    fire_iptw
    .query('risk_score >= @high_cutoff_fire'))

fire_iptw_all = fire_iptw

In [141]:
fire_low_x = preprocessor.fit_transform(fire_iptw_low.filter(items = ['age',
                                                                      'gender',
                                                                      'race',
                                                                      'p_type',
                                                                      'crc_site',
                                                                      'met_cat',
                                                                      'delta_met_diagnosis',
                                                                      'commercial',
                                                                      'medicare',
                                                                      'medicaid',
                                                                      'ecog_2', 
                                                                      'ses', 
                                                                      'albumin_diag', 
                                                                      'weight_pct_change',
                                                                      'risk_score']))

fire_med_x = preprocessor.fit_transform(fire_iptw_med.filter(items = ['age',
                                                                      'gender',
                                                                      'race',
                                                                      'p_type',
                                                                      'crc_site',
                                                                      'met_cat',
                                                                      'delta_met_diagnosis',
                                                                      'commercial',
                                                                      'medicare',
                                                                      'medicaid',
                                                                      'ecog_2', 
                                                                      'ses', 
                                                                      'albumin_diag', 
                                                                      'weight_pct_change',
                                                                      'risk_score']))

fire_high_x = preprocessor.fit_transform(fire_iptw_high.filter(items = ['age',
                                                                        'gender',
                                                                        'race',
                                                                        'p_type',
                                                                        'crc_site',
                                                                        'met_cat',
                                                                        'delta_met_diagnosis',
                                                                        'commercial',
                                                                        'medicare',
                                                                        'medicaid',
                                                                        'ecog_2', 
                                                                        'ses', 
                                                                        'albumin_diag', 
                                                                        'weight_pct_change',
                                                                        'risk_score']))

fire_all_x = preprocessor.fit_transform(fire_iptw_all.filter(items = ['age',
                                                                      'gender',
                                                                      'race',
                                                                      'p_type',
                                                                      'crc_site',
                                                                      'met_cat',
                                                                      'delta_met_diagnosis',
                                                                      'commercial',
                                                                      'medicare',
                                                                      'medicaid',
                                                                      'ecog_2', 
                                                                      'ses', 
                                                                      'albumin_diag', 
                                                                      'weight_pct_change',
                                                                      'risk_score']))

In [142]:
lr_fire_low = LogisticRegression(max_iter = 1000)
lr_fire_low.fit(fire_low_x, fire_iptw_low['fxi_cet'])

LogisticRegression(max_iter=1000)

In [143]:
lr_fire_med = LogisticRegression(max_iter = 1000)
lr_fire_med.fit(fire_med_x, fire_iptw_med['fxi_cet'])

LogisticRegression(max_iter=1000)

In [144]:
lr_fire_high = LogisticRegression(max_iter = 1000)
lr_fire_high.fit(fire_high_x, fire_iptw_high['fxi_cet'])

LogisticRegression(max_iter=1000)

In [145]:
lr_fire_all = LogisticRegression(max_iter = 1000)
lr_fire_all.fit(fire_all_x, fire_iptw_all['fxi_cet'])

LogisticRegression(max_iter=1000)

In [146]:
pred_low = lr_fire_low.predict_proba(fire_low_x)
pred_med = lr_fire_med.predict_proba(fire_med_x)
pred_high = lr_fire_high.predict_proba(fire_high_x)
pred_all = lr_fire_all.predict_proba(fire_all_x)

In [147]:
fire_iptw_low['ps'] = pred_low[:, 1]
fire_iptw_med['ps'] = pred_med[:, 1]
fire_iptw_high['ps'] = pred_high[:, 1]
fire_iptw_all['ps'] = pred_all[:, 1]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [148]:
fire_iptw_low['weight'] = (
    np.where(fire_iptw_low['fxi_cet'] == 1, 1/fire_iptw_low['ps'], 1/(1 - fire_iptw_low['ps'])))

fire_iptw_med['weight'] = (
    np.where(fire_iptw_med['fxi_cet'] == 1, 1/fire_iptw_med['ps'], 1/(1 - fire_iptw_med['ps'])))

fire_iptw_high['weight'] = (
    np.where(fire_iptw_high['fxi_cet'] == 1, 1/fire_iptw_high['ps'], 1/(1 - fire_iptw_high['ps'])))

fire_iptw_all['weight'] = (
    np.where(fire_iptw_all['fxi_cet'] == 1, 1/fire_iptw_all['ps'], 1/(1 - fire_iptw_all['ps'])))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [149]:
# Low KM curves
kmf_low_cet_f_iptw = KaplanMeierFitter()
kmf_low_bev_f_iptw = KaplanMeierFitter()

kmf_low_cet_f_iptw.fit(
    fire_iptw_low.query('fxi_cet == 1').timerisk_treatment/30,
    fire_iptw_low.query('fxi_cet == 1').death_status,
    weights = fire_iptw_low.query('fxi_cet == 1')['weight'])

kmf_low_bev_f_iptw.fit(
    fire_iptw_low.query('fxi_cet == 0').timerisk_treatment/30,
    fire_iptw_low.query('fxi_cet == 0').death_status,
    weights = fire_iptw_low.query('fxi_cet == 0')['weight'])

# Med KM curves
kmf_med_cet_f_iptw = KaplanMeierFitter()
kmf_med_bev_f_iptw = KaplanMeierFitter()

kmf_med_cet_f_iptw.fit(
    fire_iptw_med.query('fxi_cet == 1').timerisk_treatment/30,
    fire_iptw_med.query('fxi_cet == 1').death_status,
    weights = fire_iptw_med.query('fxi_cet == 1')['weight'])

kmf_med_bev_f_iptw.fit(
    fire_iptw_med.query('fxi_cet == 0').timerisk_treatment/30,
    fire_iptw_med.query('fxi_cet == 0').death_status,
    weights = fire_iptw_med.query('fxi_cet == 0')['weight'])

# High KM curves 
kmf_high_cet_f_iptw = KaplanMeierFitter()
kmf_high_bev_f_iptw = KaplanMeierFitter()

kmf_high_cet_f_iptw.fit(
    fire_iptw_high.query('fxi_cet == 1').timerisk_treatment/30,
    fire_iptw_high.query('fxi_cet == 1').death_status,
    weights = fire_iptw_high.query('fxi_cet == 1')['weight'])

kmf_high_bev_f_iptw.fit(
    fire_iptw_high.query('fxi_cet == 0').timerisk_treatment/30,
    fire_iptw_high.query('fxi_cet == 0').death_status,
    weights = fire_iptw_high.query('fxi_cet == 0')['weight'])

# All KM curves 
kmf_all_cet_f_iptw = KaplanMeierFitter()
kmf_all_bev_f_iptw = KaplanMeierFitter()

kmf_all_cet_f_iptw.fit(
    fire_iptw_all.query('fxi_cet == 1').timerisk_treatment/30,
    fire_iptw_all.query('fxi_cet == 1').death_status,
    weights = fire_iptw_all.query('fxi_cet == 1')['weight'])

kmf_all_bev_f_iptw.fit(
    fire_iptw_all.query('fxi_cet == 0').timerisk_treatment/30,
    fire_iptw_all.query('fxi_cet == 0').death_status,
    weights = fire_iptw_all.query('fxi_cet == 0')['weight'])

  It's important to know that the naive variance estimates of the coefficients are biased. Instead use Monte Carlo to
  estimate the variances. See paper "Variance estimation when using inverse probability of treatment weighting (IPTW) with survival analysis"
  or "Adjusted Kaplan-Meier estimator and log-rank test with inverse probability of treatment weighting for survival data."
                  


<lifelines.KaplanMeierFitter:"KM_estimate", fitted with 3262.87 total observations, 1279.17 right-censored observations>

#### Calculating survival metrics 

In [150]:
cet_fire_median_os = mos(kmf_low_cet_f_iptw,
                         kmf_med_cet_f_iptw,
                         kmf_high_cet_f_iptw,
                         kmf_all_cet_f_iptw)

bev_fire_median_os = mos(kmf_low_bev_f_iptw,
                         kmf_med_bev_f_iptw,
                         kmf_high_bev_f_iptw, 
                         kmf_all_bev_f_iptw)

In [151]:
fire_iptw_all_imputed = fire_iptw_all.copy()
fire_iptw_all_imputed['albumin_diag'] = fire_iptw_all_imputed['albumin_diag'].fillna(fire_iptw_all_imputed['albumin_diag'].median())
fire_iptw_all_imputed['weight_pct_change'] = fire_iptw_all_imputed['weight_pct_change'].fillna(fire_iptw_all_imputed['weight_pct_change'].median())
fire_iptw_all_imputed['ses'] = fire_iptw_all_imputed['ses'].cat.add_categories('unknown')
fire_iptw_all_imputed['ses'] = fire_iptw_all_imputed['ses'].fillna('unknown')

In [152]:
fire_hr_all = CoxPHFitter()
fire_hr_all.fit(fire_iptw_all_imputed,
                duration_col = 'timerisk_treatment', 
                event_col = 'death_status', 
                formula = 'fxi_cet + age + gender + race + p_type + crc_site + met_cat + delta_met_diagnosis + commercial + medicare + medicaid + ecog_2 + ses + albumin_diag + weight_pct_change + risk_score',
                weights_col = 'weight',
                robust = True)

<lifelines.CoxPHFitter: fitted with 6508.19 total observations, 2691.5 right-censored observations>

In [153]:
fire_all_rmst_mos_95 = rmst_mos_95ci(fire_iptw_all,
                                     1000,
                                     'fxi_cet',
                                     'death',
                                     ['age',
                                      'gender',
                                      'race',
                                      'p_type',
                                      'crc_site',
                                      'met_cat',
                                      'delta_met_diagnosis',
                                      'commercial',
                                      'medicare',
                                      'medicaid',
                                      'ecog_2', 
                                      'ses', 
                                      'albumin_diag', 
                                      'weight_pct_change',
                                      'risk_score'],
                                     ['age', 'delta_met_diagnosis', 'albumin_diag', 'weight_pct_change', 'risk_score'],
                                     48)

In [154]:
fire_low_rmst_mos_95 = rmst_mos_95ci(fire_iptw_low,
                                     1000,
                                     'fxi_cet',
                                     'death',
                                     ['age',
                                      'gender',
                                      'race',
                                      'p_type',
                                      'crc_site',
                                      'met_cat',
                                      'delta_met_diagnosis',
                                      'commercial',
                                      'medicare',
                                      'medicaid',
                                      'ecog_2', 
                                      'ses', 
                                      'albumin_diag', 
                                      'weight_pct_change',
                                      'risk_score'],
                                     ['age', 'delta_met_diagnosis', 'albumin_diag', 'weight_pct_change', 'risk_score'],
                                     48)

In [155]:
fire_med_rmst_mos_95 = rmst_mos_95ci(fire_iptw_med,
                                     1000,
                                     'fxi_cet',
                                     'death',
                                     ['age',
                                      'gender',
                                      'race',
                                      'p_type',
                                      'crc_site',
                                      'met_cat',
                                      'delta_met_diagnosis',
                                      'commercial',
                                      'medicare',
                                      'medicaid',
                                      'ecog_2', 
                                      'ses', 
                                      'albumin_diag', 
                                      'weight_pct_change',
                                      'risk_score'],
                                     ['age', 'delta_met_diagnosis', 'albumin_diag', 'weight_pct_change', 'risk_score'],
                                     48)

In [156]:
fire_high_rmst_mos_95 = rmst_mos_95ci(fire_iptw_high,
                                     1000,
                                     'fxi_cet',
                                     'death',
                                     ['age',
                                      'gender',
                                      'race',
                                      'p_type',
                                      'crc_site',
                                      'met_cat',
                                      'delta_met_diagnosis',
                                      'commercial',
                                      'medicare',
                                      'medicaid',
                                      'ecog_2', 
                                      'ses', 
                                      'albumin_diag', 
                                      'weight_pct_change',
                                      'risk_score'],
                                     ['age', 'delta_met_diagnosis', 'albumin_diag', 'weight_pct_change', 'risk_score'],
                                     48)

In [157]:
fire3_data = [
    {'trial_name': 'FIRE-3', 
     'risk_group': 'low', 
     's_trt_mos': cet_fire_median_os[0],
     's_trt_mos_95': fire_low_rmst_mos_95.mos_A_95,
     's_cont_mos': bev_fire_median_os[0],
     's_cont_mos_95': fire_low_rmst_mos_95.mos_B_95,
     's_mos_diff': cet_fire_median_os[0] - bev_fire_median_os[0], 
     'rct_trt_arm': 33.1, 
     'rct_cont_arm': 25.6,
     'rct_mos_diff': 33.1-25.6,
     's_trt_rmst': restricted_mean_survival_time(kmf_low_cet_f_iptw, 48),
     's_trt_rmst_95': fire_low_rmst_mos_95.rmst_A_95,
     's_cont_rmst': restricted_mean_survival_time(kmf_low_bev_f_iptw, 48),
     's_cont_rmst_95': fire_low_rmst_mos_95.rmst_B_95,
     's_diff_rmst': restricted_mean_survival_time(kmf_low_cet_f_iptw, 48) - restricted_mean_survival_time(kmf_low_bev_f_iptw, 48),
     's_diff_rmst_95': fire_low_rmst_mos_95.difference_rmst_95,
     'scount': fire.query('risk_score <= @low_cutoff_fire').shape[0]},
    
    {'trial_name': 'FIRE-3', 
     'risk_group': 'medium', 
     's_trt_mos': cet_fire_median_os[1],
     's_trt_mos_95': fire_med_rmst_mos_95.mos_A_95,
     's_cont_mos': bev_fire_median_os[1],
     's_cont_mos_95': fire_med_rmst_mos_95.mos_B_95,
     's_mos_diff': cet_fire_median_os[1] - bev_fire_median_os[1], 
     'rct_trt_arm': 33.1, 
     'rct_cont_arm': 25.6,
     'rct_mos_diff': 33.1-25.6,
     's_trt_rmst': restricted_mean_survival_time(kmf_med_cet_f_iptw, 48),
     's_trt_rmst_95': fire_med_rmst_mos_95.rmst_A_95,
     's_cont_rmst': restricted_mean_survival_time(kmf_med_bev_f_iptw, 48),
     's_cont_rmst_95': fire_med_rmst_mos_95.rmst_B_95,
     's_diff_rmst': restricted_mean_survival_time(kmf_med_cet_f_iptw, 48) - restricted_mean_survival_time(kmf_med_bev_f_iptw, 48),
     's_diff_rmst_95': fire_med_rmst_mos_95.difference_rmst_95,
     'scount': fire.query('risk_score < @high_cutoff_fire and risk_score > @low_cutoff_fire').shape[0]},
    
    {'trial_name': 'FIRE-3', 
     'risk_group': 'high', 
     's_trt_mos': cet_fire_median_os[2],
     's_trt_mos_95': fire_high_rmst_mos_95.mos_A_95,
     's_cont_mos': bev_fire_median_os[2],
     's_cont_mos_95': fire_high_rmst_mos_95.mos_B_95,
     's_mos_diff': cet_fire_median_os[2] - bev_fire_median_os[2], 
     'rct_trt_arm': 33.1, 
     'rct_cont_arm': 25.6,
     'rct_mos_diff': 33.1-25.6,
     's_trt_rmst': restricted_mean_survival_time(kmf_high_cet_f_iptw, 48),
     's_trt_rmst_95': fire_high_rmst_mos_95.rmst_A_95,
     's_cont_rmst': restricted_mean_survival_time(kmf_high_bev_f_iptw, 48),
     's_cont_rmst_95': fire_high_rmst_mos_95.rmst_B_95,
     's_diff_rmst': restricted_mean_survival_time(kmf_high_cet_f_iptw, 48) - restricted_mean_survival_time(kmf_high_bev_f_iptw, 48),
     's_diff_rmst_95': fire_high_rmst_mos_95.difference_rmst_95,
     'scount': fire.query('risk_score >= @high_cutoff_fire').shape[0]},
    
    {'trial_name': 'FIRE-3', 
     'risk_group': 'all', 
     's_hr': fire_hr_all.hazard_ratios_['fxi_cet'],
     's_hr_95': [fire_hr_all.summary.loc['fxi_cet']['exp(coef) lower 95%'], fire_hr_all.summary.loc['fxi_cet']['exp(coef) upper 95%']],
     's_trt_mos': cet_fire_median_os[3],
     's_trt_mos_95': fire_all_rmst_mos_95.mos_A_95,
     's_cont_mos': bev_fire_median_os[3],
     's_cont_mos_95': fire_all_rmst_mos_95.mos_B_95,
     's_mos_diff': cet_fire_median_os[3] - bev_fire_median_os[3], 
     'rct_trt_arm': 33.1, 
     'rct_cont_arm': 25.6,
     'rct_mos_diff': 33.1-25.6,
     'scount': fire.shape[0]}
]

In [158]:
strials_mos_rmst_boot = pd.DataFrame(fire3_data)

In [159]:
strials_mos_rmst_boot

Unnamed: 0,trial_name,risk_group,s_trt_mos,s_trt_mos_95,s_cont_mos,s_cont_mos_95,s_mos_diff,rct_trt_arm,rct_cont_arm,rct_mos_diff,s_trt_rmst,s_trt_rmst_95,s_cont_rmst,s_cont_rmst_95,s_diff_rmst,s_diff_rmst_95,scount,s_hr,s_hr_95
0,FIRE-3,low,53.9,"[45.152499999999996, 79.33333333333333]",46.733333,"[42.994166666666665, 50.57749999999999]",7.166667,33.1,25.6,7.5,38.447485,"[36.128892458046465, 40.84760648082756]",36.753086,"[35.810911645959685, 37.721640153783305]",1.694399,"[-0.8533096315383679, 4.309099944690377]",1158,,
1,FIRE-3,medium,31.366667,"[25.766666666666666, 43.5]",26.933333,"[25.6, 28.8]",4.433333,33.1,25.6,7.5,30.194797,"[27.19359859006796, 33.24377074874571]",28.209363,"[27.188743742795022, 29.186498700492457]",1.985434,"[-1.104208624517873, 5.063939746429041]",1133,,
2,FIRE-3,high,12.9,"[9.366666666666667, 16.533333333333335]",13.033333,"[12.233333333333333, 14.033333333333333]",-0.133333,33.1,25.6,7.5,18.199232,"[14.713033707312153, 21.92632228070997]",16.298619,"[15.395196539753334, 17.214082317474574]",1.900613,"[-1.7935344067459034, 5.816671226035976]",972,,
3,FIRE-3,all,32.4,"[28.366666666666667, 36.266666666666666]",26.433333,"[25.166666666666668, 27.868333333333332]",5.966667,33.1,25.6,7.5,,,,,,,3263,0.893247,"[0.770938971358007, 1.0349585661346778]"


In [160]:
strials_mos_rmst_boot.to_csv('strials_mos_rmst_boot.csv', index = False)