# Assess ensemble model performance on validation data for los

In [1]:
import pickle
import pandas as pd
from sklearn.metrics import recall_score, precision_score, f1_score, roc_auc_score

#import logR los model
logR_los = pickle.load(open('ensb_logreg_los.pkl', 'rb'))



In [2]:
#load validation los probs from text and LSTM

validation_los_LSTM = pd.read_csv('lstm_probs_of_los_valid 1.csv')

validation_los_text = pd.read_csv('text_los_probability_validation_170424.csv')
validation_los_text.loc[validation_los_text['los_icu_class'] == 'less than 3 days', 'los_icu_class'] = 0
validation_los_text.loc[validation_los_text['los_icu_class'] == 'greater than or equal to 3 days', 'los_icu_class'] = 1
validation_los_text['los_icu_class'] = pd.to_numeric(validation_los_text['los_icu_class'])

validation_los_text = validation_los_text[['los_icu_class', 'greater than or equal to 3 days']]
validation_los_combined = pd.concat([validation_los_text, validation_los_LSTM],join='outer', axis=1)

In [5]:
#assess model with validation data

los_predictors = validation_los_combined[['greater than or equal to 3 days',
                                 "probs_of_los"]]
predicted_los = logR_los.predict(los_predictors)
print('AUROC:', roc_auc_score(validation_los_combined['los_icu_class'], predicted_los))
print('Precision', precision_score(validation_los_combined['los_icu_class'], predicted_los, average='macro', zero_division=0))
print('Recall', recall_score(validation_los_combined['los_icu_class'], predicted_los, average='macro', zero_division=0))
print('F1 score:', f1_score(validation_los_combined['los_icu_class'], predicted_los, average='macro', zero_division=0))

AUROC: 0.5061236987140233
Precision 0.5062462462462463
Recall 0.5061236987140233
F1 score: 0.5025964402641608


Feature names unseen at fit time:
- greater than or equal to 3 days
- probs_of_los
Feature names seen at fit time, yet now missing:
- LSTM_los
- long_stay_text



# Next best models - assume all alive, assume all dead and get worse case scenarios and best case scenarios

In [7]:
#assume all alive data
validation_los_LSTM_all_alive = pd.read_csv('lstm_probs_of_los_assumed_alive 1.csv')

validation_los_text_all_alive = pd.read_csv('text_los_probability_validation_assumed_survive.csv')
validation_los_text_all_alive.loc[validation_los_text_all_alive['los_icu_class'] == 'less than 3 days', 'los_icu_class'] = 0
validation_los_text_all_alive.loc[validation_los_text_all_alive['los_icu_class'] == 'greater than or equal to 3 days', 'los_icu_class'] = 1
validation_los_text_all_alive['los_icu_class'] = pd.to_numeric(validation_los_text_all_alive['los_icu_class'])

validation_los_text_all_alive = validation_los_text_all_alive[['los_icu_class', 'greater than or equal to 3 days']]
validation_los_all_alive_combined = pd.concat([validation_los_text_all_alive, validation_los_LSTM_all_alive],join='outer', axis=1)
validation_los_all_alive_combined

Unnamed: 0,los_icu_class,greater than or equal to 3 days,probs_of_los_assumed_alive
0,1,0.490637,0.405983
1,0,0.461720,0.274383
2,1,0.553635,0.162815
3,1,0.352949,0.330573
4,0,0.507628,0.213674
...,...,...,...
4076,0,0.496118,0.298234
4077,1,0.756874,0.889351
4078,1,0.679929,0.402790
4079,0,0.278434,0.388519


In [8]:
#assume all dead data
validation_los_LSTM_all_dead = pd.read_csv('lstm_probs_of_los_assumed_dead 1.csv')

validation_los_text_all_dead = pd.read_csv('text_los_probability_validation_assumed_died.csv')
validation_los_text_all_dead.loc[validation_los_text_all_dead['los_icu_class'] == 'less than 3 days', 'los_icu_class'] = 0
validation_los_text_all_dead.loc[validation_los_text_all_dead['los_icu_class'] == 'greater than or equal to 3 days', 'los_icu_class'] = 1
validation_los_text_all_dead['los_icu_class'] = pd.to_numeric(validation_los_text_all_dead['los_icu_class'])

validation_los_text_all_dead = validation_los_text_all_dead[['los_icu_class', 'greater than or equal to 3 days']]
validation_los_all_dead_combined = pd.concat([validation_los_text_all_dead, validation_los_LSTM_all_dead],join='outer', axis=1)
validation_los_all_dead_combined

Unnamed: 0,los_icu_class,greater than or equal to 3 days,probs_of_los_assumed_dead
0,1,0.544160,0.662695
1,0,0.515280,0.928441
2,1,0.605857,0.221589
3,1,0.403346,0.654000
4,0,0.560964,0.857901
...,...,...,...
4076,0,0.549594,0.902852
4077,1,0.794159,0.884625
4078,1,0.724721,0.822771
4079,0,0.323511,0.986257


In [9]:
#all alive los prediction
los_predictors_all_alive = validation_los_all_alive_combined[['greater than or equal to 3 days',
                                 "probs_of_los_assumed_alive"]]
predicted_los_all_alive = logR_los.predict(los_predictors_all_alive)
print('AUROC:', roc_auc_score(validation_los_all_alive_combined['los_icu_class'], predicted_los_all_alive))
print('Precision', precision_score(validation_los_all_alive_combined['los_icu_class'], predicted_los_all_alive, average='macro', zero_division=0))
print('Recall', recall_score(validation_los_all_alive_combined['los_icu_class'], predicted_los_all_alive, average='macro', zero_division=0))
print('F1 score:', f1_score(validation_los_all_alive_combined['los_icu_class'], predicted_los_all_alive, average='macro', zero_division=0))

AUROC: 0.5994873925829223
Precision 0.6022693134372606
Recall 0.5994873925829223
F1 score: 0.5978387298219552


Feature names unseen at fit time:
- greater than or equal to 3 days
- probs_of_los_assumed_alive
Feature names seen at fit time, yet now missing:
- LSTM_los
- long_stay_text



In [10]:
#all dead los prediction
los_predictors_all_dead = validation_los_all_dead_combined[['greater than or equal to 3 days',
                                 "probs_of_los_assumed_dead"]]
predicted_los_all_dead = logR_los.predict(los_predictors_all_dead)
print('AUROC:', roc_auc_score(validation_los_all_dead_combined['los_icu_class'], predicted_los_all_dead))
print('Precision', precision_score(validation_los_all_dead_combined['los_icu_class'], predicted_los_all_dead, average='macro', zero_division=0))
print('Recall', recall_score(validation_los_all_dead_combined['los_icu_class'], predicted_los_all_dead, average='macro', zero_division=0))
print('F1 score:', f1_score(validation_los_all_dead_combined['los_icu_class'], predicted_los_all_dead, average='macro', zero_division=0))

AUROC: 0.535957383094554
Precision 0.5554470728191141
Recall 0.535957383094554
F1 score: 0.4869967519714669


Feature names unseen at fit time:
- greater than or equal to 3 days
- probs_of_los_assumed_dead
Feature names seen at fit time, yet now missing:
- LSTM_los
- long_stay_text



# Conservative predictions, i.e. take long stay if one predicts long and one predicts short

In [14]:
combined_predictions = pd.DataFrame({
                               'All alive':predicted_los_all_alive,
                               'All dead':predicted_los_all_dead,
                                     'Actual': validation_los_combined['los_icu_class'],
                                    })
combined_predictions['Most conservative'] = combined_predictions[[
                                                                  "All alive",
                                                                  "All dead"]].max(axis=1)


print('AUROC:', roc_auc_score(combined_predictions['Actual'], combined_predictions['Most conservative']))
print('Precision', precision_score(combined_predictions['Actual'], combined_predictions['Most conservative'], average='macro', zero_division=0))
print('Recall', recall_score(combined_predictions['Actual'], combined_predictions['Most conservative'], average='macro', zero_division=0))
print('F1 score:', f1_score(combined_predictions['Actual'], combined_predictions['Most conservative'], average='macro', zero_division=0))

AUROC: 0.5437062172365296
Precision 0.5777234281709043
Recall 0.5437062172365296
F1 score: 0.48284684915059073


# Lax predictions, i.e. take short stay over long when there is disagreement

In [15]:
combined_predictions['Most Lax'] = combined_predictions[[#"Using predicted death", 
                                                                  "All alive",
                                                                  "All dead"]].min(axis=1)

print('AUROC:', roc_auc_score(combined_predictions['Actual'], combined_predictions['Most Lax']))
print('Precision', precision_score(combined_predictions['Actual'], combined_predictions['Most Lax'], average='macro', zero_division=0))
print('Recall', recall_score(combined_predictions['Actual'], combined_predictions['Most Lax'], average='macro', zero_division=0))
print('F1 score:', f1_score(combined_predictions['Actual'], combined_predictions['Most Lax'], average='macro', zero_division=0))

AUROC: 0.5917385584409467
Precision 0.5970599528066802
Recall 0.5917385584409467
F1 score: 0.5876738312737715
