# Assess ensemble model performance on validation data for los

In [46]:
import pickle
import pandas as pd
from sklearn.metrics import recall_score, precision_score, f1_score, roc_auc_score

#import logR los model
logR_los = pickle.load(open('ensb_logreg_los.pkl', 'rb'))

In [47]:
#load validation los probs from text and RF

validation_los_RF = pd.read_csv('los_probability_sd_rf_validation 1.csv')
validation_los_RF = validation_los_RF.rename(columns={"1": "RF_val_los"})

validation_los_text = pd.read_csv('text_los_probability_validation_18042024.csv')
validation_los_text.loc[validation_los_text['los_icu_class'] == 'less than 3 days', 'los_icu_class'] = 0
validation_los_text.loc[validation_los_text['los_icu_class'] == 'greater than or equal to 3 days', 'los_icu_class'] = 1
validation_los_text['los_icu_class'] = pd.to_numeric(validation_los_text['los_icu_class'])

validation_los_text = validation_los_text[['los_icu_class', 'greater than or equal to 3 days']]
validation_los_combined = pd.concat([validation_los_text, validation_los_RF],join='outer', axis=1)

validation_los_combined

Unnamed: 0,los_icu_class,greater than or equal to 3 days,0,RF_val_los,id
0,1,0.490637,0.650981,0.349019,20003425
1,0,0.461720,0.627598,0.372402,20008724
2,1,0.553635,0.466449,0.533551,20009330
3,1,0.352949,0.390995,0.609005,20014219
4,0,0.507628,0.716538,0.283462,20015722
...,...,...,...,...,...
4076,0,0.496118,0.535644,0.464356,29985535
4077,1,0.756874,0.496586,0.503414,29987115
4078,1,0.679929,0.469366,0.530634,29988601
4079,0,0.278434,0.562977,0.437023,29988615


In [48]:
#assess model with validation data

los_predictors = validation_los_combined[['greater than or equal to 3 days',
                                 "RF_val_los"]]
predicted_los = logR_los.predict(los_predictors)
print('AUROC:', roc_auc_score(validation_los_combined['los_icu_class'], predicted_los))
print('Precision', precision_score(validation_los_combined['los_icu_class'], predicted_los, average='macro', zero_division=0))
print('Recall', recall_score(validation_los_combined['los_icu_class'], predicted_los, average='macro', zero_division=0))
print('F1 score:', f1_score(validation_los_combined['los_icu_class'], predicted_los, average='macro', zero_division=0))

AUROC: 0.6408551644314044
Precision 0.6472874996544505
Recall 0.6408551644314044
F1 score: 0.6384259342987194


Feature names unseen at fit time:
- RF_val_los
- greater than or equal to 3 days
Feature names seen at fit time, yet now missing:
- RF_los
- long_stay_text



# Next best models - assume all alive, assume all dead and get worse case scenarios and best case scenarios

In [49]:
#assume all alive data
validation_los_RF_all_alive = pd.read_csv('los_probability_sd_rf_valid_assumed_alive.csv')
validation_los_RF_all_alive = validation_los_RF_all_alive.rename(columns={"1": "RF_val_los_alive"})


validation_los_text_all_alive = pd.read_csv('text_los_probability_validation_assumed_survive.csv')
validation_los_text_all_alive.loc[validation_los_text_all_alive['los_icu_class'] == 'less than 3 days', 'los_icu_class'] = 0
validation_los_text_all_alive.loc[validation_los_text_all_alive['los_icu_class'] == 'greater than or equal to 3 days', 'los_icu_class'] = 1
validation_los_text_all_alive['los_icu_class'] = pd.to_numeric(validation_los_text_all_alive['los_icu_class'])

validation_los_text_all_alive = validation_los_text_all_alive[['los_icu_class', 'greater than or equal to 3 days']]
validation_los_all_alive_combined = pd.concat([validation_los_text_all_alive, validation_los_RF_all_alive],join='outer', axis=1)
validation_los_all_alive_combined

Unnamed: 0,los_icu_class,greater than or equal to 3 days,0,RF_val_los_alive,id
0,1,0.490637,0.650981,0.349019,20003425
1,0,0.461720,0.627598,0.372402,20008724
2,1,0.553635,0.466449,0.533551,20009330
3,1,0.352949,0.390995,0.609005,20014219
4,0,0.507628,0.716538,0.283462,20015722
...,...,...,...,...,...
4076,0,0.496118,0.535644,0.464356,29985535
4077,1,0.756874,0.496586,0.503414,29987115
4078,1,0.679929,0.469366,0.530634,29988601
4079,0,0.278434,0.562977,0.437023,29988615


In [50]:
#assume all dead data
validation_los_RF_all_dead = pd.read_csv('los_probability_sd_rf_valid_assumed_dead.csv')
validation_los_RF_all_dead = validation_los_RF_all_dead.rename(columns={"1": "RF_val_los_dead"})

validation_los_text_all_dead = pd.read_csv('text_los_probability_validation_assumed_died.csv')
validation_los_text_all_dead.loc[validation_los_text_all_dead['los_icu_class'] == 'less than 3 days', 'los_icu_class'] = 0
validation_los_text_all_dead.loc[validation_los_text_all_dead['los_icu_class'] == 'greater than or equal to 3 days', 'los_icu_class'] = 1
validation_los_text_all_dead['los_icu_class'] = pd.to_numeric(validation_los_text_all_dead['los_icu_class'])

validation_los_text_all_dead = validation_los_text_all_dead[['los_icu_class', 'greater than or equal to 3 days']]
validation_los_all_dead_combined = pd.concat([validation_los_text_all_dead, validation_los_RF_all_dead],join='outer', axis=1)
validation_los_all_dead_combined

Unnamed: 0,los_icu_class,greater than or equal to 3 days,0,RF_val_los_dead,id
0,1,0.544160,0.539321,0.460679,20003425
1,0,0.515280,0.509387,0.490613,20008724
2,1,0.605857,0.405394,0.594606,20009330
3,1,0.403346,0.325375,0.674625,20014219
4,0,0.560964,0.537366,0.462634,20015722
...,...,...,...,...,...
4076,0,0.549594,0.482511,0.517489,29985535
4077,1,0.794159,0.425601,0.574399,29987115
4078,1,0.724721,0.389852,0.610148,29988601
4079,0,0.323511,0.404805,0.595195,29988615


In [51]:
#all alive los prediction
los_predictors_all_alive = validation_los_all_alive_combined[['greater than or equal to 3 days',
                                 "RF_val_los_alive"]] 
predicted_los_all_alive = logR_los.predict(los_predictors_all_alive)
print('AUROC:', roc_auc_score(validation_los_all_alive_combined['los_icu_class'], predicted_los_all_alive))
print('Precision', precision_score(validation_los_all_alive_combined['los_icu_class'], predicted_los_all_alive, average='macro', zero_division=0))
print('Recall', recall_score(validation_los_all_alive_combined['los_icu_class'], predicted_los_all_alive, average='macro', zero_division=0))
print('F1 score:', f1_score(validation_los_all_alive_combined['los_icu_class'], predicted_los_all_alive, average='macro', zero_division=0))

los_predictors_all_alive

AUROC: 0.6406036553770784
Precision 0.6470562877201627
Recall 0.6406036553770784
F1 score: 0.6381561671958177


Feature names unseen at fit time:
- RF_val_los_alive
- greater than or equal to 3 days
Feature names seen at fit time, yet now missing:
- RF_los
- long_stay_text



Unnamed: 0,greater than or equal to 3 days,RF_val_los_alive
0,0.490637,0.349019
1,0.461720,0.372402
2,0.553635,0.533551
3,0.352949,0.609005
4,0.507628,0.283462
...,...,...
4076,0.496118,0.464356
4077,0.756874,0.503414
4078,0.679929,0.530634
4079,0.278434,0.437023


In [52]:
#all dead los prediction
los_predictors_all_dead = validation_los_all_dead_combined[['greater than or equal to 3 days',
                                 "RF_val_los_dead"]]
predicted_los_all_dead = logR_los.predict(los_predictors_all_dead)
print('AUROC:', roc_auc_score(validation_los_all_dead_combined['los_icu_class'], predicted_los_all_dead))
print('Precision', precision_score(validation_los_all_dead_combined['los_icu_class'], predicted_los_all_dead, average='macro', zero_division=0))
print('Recall', recall_score(validation_los_all_dead_combined['los_icu_class'], predicted_los_all_dead, average='macro', zero_division=0))
print('F1 score:', f1_score(validation_los_all_dead_combined['los_icu_class'], predicted_los_all_dead, average='macro', zero_division=0))

los_predictors_all_dead

AUROC: 0.619825138119688
Precision 0.6265608190813003
Recall 0.619825138119688
F1 score: 0.612638220293106


Feature names unseen at fit time:
- RF_val_los_dead
- greater than or equal to 3 days
Feature names seen at fit time, yet now missing:
- RF_los
- long_stay_text



Unnamed: 0,greater than or equal to 3 days,RF_val_los_dead
0,0.544160,0.460679
1,0.515280,0.490613
2,0.605857,0.594606
3,0.403346,0.674625
4,0.560964,0.462634
...,...,...
4076,0.549594,0.517489
4077,0.794159,0.574399
4078,0.724721,0.610148
4079,0.323511,0.595195


# Conservative predictions, i.e. take long stay if one predicts long and one predicts short

In [53]:
combined_predictions = pd.DataFrame({
                               'All alive':predicted_los_all_alive,
                               'All dead':predicted_los_all_dead,
                                     'Actual': validation_los_combined['los_icu_class'],
                                    })
combined_predictions['Most conservative'] = combined_predictions[[
                                                                  "All alive",
                                                                  "All dead"]].max(axis=1)


print('AUROC:', roc_auc_score(combined_predictions['Actual'], combined_predictions['Most conservative']))
print('Precision', precision_score(combined_predictions['Actual'], combined_predictions['Most conservative'], average='macro', zero_division=0))
print('Recall', recall_score(combined_predictions['Actual'], combined_predictions['Most conservative'], average='macro', zero_division=0))
print('F1 score:', f1_score(combined_predictions['Actual'], combined_predictions['Most conservative'], average='macro', zero_division=0))

combined_predictions

AUROC: 0.619825138119688
Precision 0.6265608190813003
Recall 0.619825138119688
F1 score: 0.612638220293106


Unnamed: 0,All alive,All dead,Actual,Most conservative
0,0,0,1,0
1,0,0,0,0
2,1,1,1,1
3,0,0,1,0
4,0,1,0,1
...,...,...,...,...
4076,0,1,0,1
4077,1,1,1,1
4078,1,1,1,1
4079,0,0,0,0


# Lax predictions, i.e. take short stay over long when there is disagreement

In [54]:
combined_predictions['Most Lax'] = combined_predictions[[
                                                        "All alive",
                                                        "All dead"]].min(axis=1)

print('AUROC:', roc_auc_score(combined_predictions['Actual'], combined_predictions['Most Lax']))
print('Precision', precision_score(combined_predictions['Actual'], combined_predictions['Most Lax'], average='macro', zero_division=0))
print('Recall', recall_score(combined_predictions['Actual'], combined_predictions['Most Lax'], average='macro', zero_division=0))
print('F1 score:', f1_score(combined_predictions['Actual'], combined_predictions['Most Lax'], average='macro', zero_division=0))

AUROC: 0.6406036553770784
Precision 0.6470562877201627
Recall 0.6406036553770784
F1 score: 0.6381561671958177
