# Predict  aggregate demand for beds

At set times of day, we want to know how many beds will be needed for patients in the ED at that time

For aggregation
- evaluation is done for times of day agreed with users

## Set up the notebook environment

In [None]:
# Reload functions every time
%load_ext autoreload 
%autoreload 2

In [1]:
from pathlib import Path
import sys
import json
import pickle


PROJECT_ROOT = Path().home() / 'HyMind'

# Patient flow package
USER_ROOT = Path().home() / 'work'

sys.path.append(str(USER_ROOT / 'patientflow' / 'src' / 'patientflow'))
sys.path.append(str(USER_ROOT / 'patientflow' / 'functions'))




In [None]:
model_file_path = PROJECT_ROOT /'data' / 'ed-predictor' / 'trained-models'
model_file_path

data_file_path = USER_ROOT / 'ed-predictor' / 'data-raw'
data_file_path

## Load parameters

These are set in config.json. You can change these for your own purposes. But the times of day will need to match those in the provided dataset if you want to run this notebook successfully.

In [2]:
# Load the times of day
import yaml

config_path = Path(USER_ROOT / 'patientflow')

with open(config_path / 'config.yaml', 'r') as file:
    config = yaml.safe_load(file)
    
# Convert list of times of day at which predictions will be made (currently stored as lists) to list of tuples
prediction_times = [tuple(item) for item in config['prediction_times']]

# See the times of day at which predictions will be made
prediction_times

FileNotFoundError: [Errno 2] No such file or directory: '/home/jovyan/patientflow/config.yaml'

## Load data

In [None]:
from ed_admissions_data_retrieval import ed_admissions_get_data
PATH_ED = 'HyMind/dissemination/data-raw/ED_visits.csv'

df = ed_admissions_get_data(PATH_ED)

In [None]:
df.head()

In [None]:
print(df[(df.training_validation_test == 'test')].index)


## Set an index column in df

Setting the index as the episode_slice_id before subsetting means that we retain the same values of episode_slice_id throughout the entire process, ensuring that they are consistent across the original dataset df and the subsets train_df_single and test_df_single

After executing the code below, the episode_slice_id has been set as the index column.

In [None]:
if df.index.name != 'episode_slice_id':
    df = df.set_index('episode_slice_id')


In [None]:
df.sort_values(['horizon_dt', 'prediction_time'], inplace = True)

In [None]:
df

## Generate aggregate predictions for each time of day

In [None]:
from ed_admissions_helper_functions import prepare_for_inference, get_model_name
PATH_ED = 'HyMind/dissemination/data-raw/ED_visits.csv'
from predict.emergency_demand.from_individual_probs import get_prob_dist

from ed_admissions_helper_functions import prepare_episode_slices_dict

prob_dist_dict_all = {}
# Process each time of day
for prediction_time_ in prediction_times:

    print("\nProcessing :" + str(prediction_time_))
    
    MODEL__ED_ADMISSIONS__NAME = get_model_name('ed_admission', prediction_time_)

    # prepare data 
    X_test, y_test, model = prepare_for_inference(model_file_path, 'ed_admission', prediction_time = prediction_time_, data_path = PATH_ED, single_episode_slice_per_visit = False)
    
    # a check that might be useful
    sum(df.loc[(df.training_validation_test == 'test') & (df.prediction_time == prediction_time_), 'is_admitted']) == sum(y_test)

    # select the horizon dates to include in the probability distribution, with the episode slices associated with them
    episode_slices_dict = prepare_episode_slices_dict(df[(df.training_validation_test == 'test') & (df.prediction_time == prediction_time_)])
    
    # get probability distribution for this time of day
    prob_dist_dict_all[MODEL__ED_ADMISSIONS__NAME] = get_prob_dist(
            episode_slices_dict, X_test, y_test, model
        )
    
    # use model name in the path for saving the prob dist
    full_path = prob_dist_file_path / MODEL__ED_ADMISSIONS__NAME 
    full_path = full_path.with_suffix('.pickle')
        
    with open(full_path, 'wb') as f:  # Note the 'wb' mode for binary writing
        pickle.dump(prob_dist_dict_all[MODEL__ED_ADMISSIONS__NAME], f)
    
    
    
    
    

## Plot one horizon date as an example

In [None]:
dt = list(episode_slices_dict.keys())[0]
# with open(full_path, 'rb') as f:  # Note the 'wb' mode for binary writing
#     prob_dist_dict_all[MODEL__ED_ADMISSIONS__NAME] = pickle.load( f)

from viz.prob_dist_plot import prob_dist_plot

title_ = f'Probability distribution for beds needed in time window for patients in ED at {dt} {MODEL__ED_ADMISSIONS__NAME[-4:]}'
prob_dist_plot(prob_dist_data=prob_dist_dict_all[MODEL__ED_ADMISSIONS__NAME][dt]['pred_demand'], title_=title_,  include_titles=True)

## Plot qq plot for each time of day

In [None]:
from viz.qq_plot import qq_plot

for prediction_time_ in prediction_times:

    print("\nProcessing :" + str(prediction_time_))
    
    # retrieve a name for the model based on the time of day it is trained for
    hour_, min_ = prediction_time_
    min_ = f"{min_}0" if min_ % 60 == 0 else str(min_)
    MODEL__ED_ADMISSIONS__NAME = 'ed_admission_' + f"{hour_:02}" + min_
    print(MODEL__ED_ADMISSIONS__NAME)
    
    title_ = 'Q-Q Plot for emergency demand predictions at ' + f"{hour_:02}" + min_

    plt = qq_plot(horizon_dts, prob_dist_dict_all[MODEL__ED_ADMISSIONS__NAME], title_)
    
    # use model name in the path for saving the plot
    qq_plot_path = media_file_path / MODEL__ED_ADMISSIONS__NAME 
    qq_plot_path = qq_plot_path.with_suffix('.png')
    
    plt.savefig(qq_plot_path)
        


## Applying a time window

So far, we have created a probability distribution for the number of beds needed at some point, by the set of patients currently in ED. Now consider that we want to predict bed demand within a time window of x hours. 

At the time of prediction, a patient might have just arrived, or might have already been in the ED for some hours. Their probability of being admitted within x hours is different depending how long they have been in the department already. 

In [None]:
from ed_admissions_helper_functions import prepare_for_inference, get_model_name
PATH_ED = 'HyMind/dissemination/data-raw/ED_visits.csv'
from predict.emergency_demand.from_individual_probs import get_prob_dist

from ed_admissions_helper_functions import prepare_episode_slices_dict
from predict.emergency_demand.admission_in_time_window_using_aspirational_curve import create_curve, calculate_probability


prob_dist_dict_all_in_window = {}
# Process each time of day
for prediction_time_ in prediction_times:

    print("\nProcessing :" + str(prediction_time_))
    
    MODEL__ED_ADMISSIONS__NAME = get_model_name('ed_admission', prediction_time_)

    # prepare data 
    X_test, y_test, model = prepare_for_inference(model_file_path, 'ed_admission', prediction_time = prediction_time_, data_path = PATH_ED, single_episode_slice_per_visit = False)
    
    # a check that might be useful
    sum(df.loc[(df.training_validation_test == 'test') & (df.prediction_time == prediction_time_), 'is_admitted']) == sum(y_test)
    
    X_test_admission_in_window_prob = X_test[['elapsed_los_td']].copy()

    time_window_hrs = config['time_window']/60
    X_test_admission_in_window_prob['elapsed_los_td_hrs'] = X_test_admission_in_window_prob['elapsed_los_td']/3600

    # get probability of admission within time window
    weights = X_test_admission_in_window_prob.apply(lambda row: calculate_probability(row['elapsed_los_td_hrs'], time_window_hrs, x1 = 4, y1 = 0.76, x2 = 12, y2 = .99), axis=1)

    # select the horizon dates to include in the probability distribution, with the episode slices associated with them
    episode_slices_dict = prepare_episode_slices_dict(df[(df.training_validation_test == 'test') & (df.prediction_time == prediction_time_)])
    
    # get probability distribution for this time of day
    prob_dist_dict_all_in_window[MODEL__ED_ADMISSIONS__NAME] = get_prob_dist(
            episode_slices_dict, X_test, y_test, model, weights = weights
        )
    
    # use model name in the path for saving the prob dist
    full_path = prob_dist_file_path / str(MODEL__ED_ADMISSIONS__NAME + '_in_time_window')
    full_path = full_path.with_suffix('.pickle')
        
    with open(full_path, 'wb') as f:  # Note the 'wb' mode for binary writing
        pickle.dump(prob_dist_dict_all_in_window[MODEL__ED_ADMISSIONS__NAME], f)
    
    
    

## Compare one horizon date with and without admission in time window

In [None]:
full_path

In [None]:
dt = list(episode_slices_dict.keys())[0]
with open('/data/hymind/home/zella/HyMind/dissemination/model-output/probability-distributions/ed_admission_2200.pickle', 'rb') as f:  # Note the 'wb' mode for binary writing
    prob_dict = pickle.load( f)
    
with open('/data/hymind/home/zella/HyMind/dissemination/model-output/probability-distributions/ed_admission_2200_in_time_window.pickle', 'rb') as f:  # Note the 'wb' mode for binary writing
    prob_dict_in_window = pickle.load( f)

from viz.prob_dist_plot import prob_dist_plot

prediction_time_ = prediction_times[-1]
MODEL__ED_ADMISSIONS__NAME = get_model_name('ed_admission', prediction_time_)

title_ = f'Probability distribution for beds needed for patients in ED at {dt} {MODEL__ED_ADMISSIONS__NAME[-4:]}'
prob_dist_plot(prob_dict[dt]['pred_demand'], title_=title_,  include_titles=True)

title_ = f'Probability distribution for beds needed in time window for patients in ED at {dt} {MODEL__ED_ADMISSIONS__NAME[-4:]}'
prob_dist_plot(prob_dict_in_window[dt]['pred_demand'], title_=title_,  include_titles=True)

In [None]:
weights