In [None]:
# Reload functions every time
%load_ext autoreload 
%autoreload 2

In [None]:
from pathlib import Path
import sys
import json
import pandas as pd
import numpy as np
import os


PROJECT_ROOT = Path().home() / 'HyMind'

# Patient flow package
USER_ROOT = Path().home()
sys.path.append(str(USER_ROOT / 'patientflow' / 'patientflow' ))

# Functions that sit outside the package
sys.path.append(str(USER_ROOT / 'patientflow' / 'functions' ))


In [None]:


# data_file_path = PROJECT_ROOT / 'dissemination' / 'data-raw'
# data_file_path

model_file_path = PROJECT_ROOT / 'dissemination' / 'model-output' / 'trained-models'

media_file_path = PROJECT_ROOT / 'dissemination' / 'media' 
media_file_path.mkdir(parents=True, exist_ok=True)

## Load parameters

These are set in config.json. You can change these for your own purposes. But the times of day will need to match those in the provided dataset if you want to run this notebook successfully.

In [None]:
# Load the times of day
import yaml

config_path = Path(PROJECT_ROOT / 'dissemination' )

with open(config_path / 'config.yaml', 'r') as file:
    config = yaml.safe_load(file)
    
# Convert list of times of day at which predictions will be made (currently stored as lists) to list of tuples
tod = [tuple(item) for item in config['tod']]

# See the times of day at which predictions will be made
tod

x1 = float(config['x1'])
y1 = float(config['y1'])
x2 = float(config['x2'])
y2 = float(config['y2'])

# time_window = config['time_window']
time_window = 480

# Load data and select snapshot to plot

In [None]:
from ed_admissions_data_retrieval import ed_admissions_get_data
PATH_ED = 'HyMind/dissemination/data-raw/ED_visits.csv'
df = ed_admissions_get_data(PATH_ED)
if df.index.name != 'episode_slice_id':
    df = df.set_index('episode_slice_id')

In [None]:

PATH_SPEC = 'HyMind/dissemination/data-raw/specialty.csv'
df_spec = ed_admissions_get_data(PATH_SPEC)

if df_spec.index.name != 'episode_slice_id':
    df_spec = df_spec.set_index('episode_slice_id')

In [None]:
# pick a random horizon_dt and time of day in the test set
tod_ = tod[3]
random_horizon_dt = df.loc[df.training_validation_test == 'test', 'horizon_dt'].sample(n=1, random_state=42).iloc[0]

hour_, min_ = tod_
min_ = f"{min_}0" if min_ % 60 == 0 else str(min_)
horizon_datetime = random_horizon_dt + ' '+ str(hour_) + '-' + str(min_ )

# create folder to save the plots in
full_file_path = media_file_path / horizon_datetime / 'full'
pipeline_file_path = media_file_path / horizon_datetime / 'pipeline'
os.makedirs(pipeline_file_path, exist_ok=True)
os.makedirs(full_file_path, exist_ok=True)

# get the data for this random time
ex = df[(df.horizon_dt == random_horizon_dt) & (df.time_of_day == tod_) & (df.current_ed_location != 'OTF')].copy()

# reduce number of locations for presentation purposes
mapping = {'UTC': 'Minors', 
           'MAJORS': 'Majors/Resus', 
           'SDEC': 'Other', 
           'PAEDS': 'Other', 
           'Waiting': 'Other', 
           'RAT': 'Majors/Resus', 
           'SDEC_Waiting': 'Other', 
           'RESUS' : 'Majors/Resus',
            'OTF' : 'Other'}

categories = ['Majors/Resus', 'Minors', 'Other']  # Define order

ex['loc_new'] = ex['current_ed_location'].map(mapping)
ex['loc_new'] = pd.Categorical(ex['loc_new'], categories=categories, ordered=True)


In [None]:
full_file_path

## Set up plot

In [None]:
# set plot parameters
figsize_global = tuple([6,3])
full_output = True

from plot_pipeline import create_colour_dict
spec_colour_dict = create_colour_dict()

## Figure a

In [None]:
from plot_pipeline import in_ED_now_plot

title_ = f'Patients in ED at {horizon_datetime}' 
if full_output:    
    in_ED_now_plot(full_file_path, 'Full Figure a', ex, horizon_datetime, figsize_global, title_, include_titles = True, size = 20)
else:
    in_ED_now_plot(pipeline_file_path, 'Figure a', ex, horizon_datetime, figsize_global, title_, text_size= 20)

## Figure b

In [None]:
from ed_admissions_helper_functions import prepare_for_inference
from predict.emergency_demand.from_individual_probs import model_input_to_pred_proba

# load saved model
model = prepare_for_inference(model_file_path = model_file_path, model_name = 'ed_admission', time_of_day = tod_, data_path = None, single_episode_slice_per_visit = False, model_only = True)

# get probabilities
episode_slices_in_ex = ex.index[(ex.horizon_dt == random_horizon_dt)]
preds = model_input_to_pred_proba(ex, model)
ex['preds'] = preds['pred_proba'].values

title = f'Patients in ED at {horizon_datetime}' 
if full_output:
    in_ED_now_plot(full_file_path, 'Full Figure b', ex, horizon_datetime, figsize_global, title_, include_titles = True, size = 20, colour = True, colour_map = 'Spectral_r')# spec_colour_dict['spectrum']['all'])
else:
    in_ED_now_plot(pipeline_file_path, 'Figure b', ex, horizon_datetime, figsize_global, title_, text_size= 20, colour = True, colour_map = 'Spectral_r')


## Figure c

In [None]:
from predict.emergency_demand.from_individual_probs import get_prob_dist
from viz.prob_dist_plot import prob_dist_plot

from ed_admissions_helper_functions import prepare_for_inference


from ed_admissions_helper_functions import prepare_episode_slices_dict


# prepare data 
X_test, y_test, model = prepare_for_inference(model_file_path, 'ed_admission', time_of_day = tod_, data_path = PATH_ED, single_episode_slice_per_visit = False)


episode_slices_dict = prepare_episode_slices_dict(df[(df.training_validation_test == 'test') & (df.time_of_day == tod_) & (df.horizon_dt == random_horizon_dt)])
prob_dist = get_prob_dist(
    episode_slices_dict, X_test, y_test, model
)

if full_output:

    title_ = f'Probability distribution for number of beds needed patients in ED at {random_horizon_dt} {hour_}-{min_}'
    prob_dist_plot(prob_dist_data=prob_dist[random_horizon_dt]['pred_demand'], title_=title_, directory_path=full_file_path, figsize=figsize_global, include_titles=True, 
                   file_name = 'Full Figure c',
                  bar_colour = spec_colour_dict['single']['all'])
else:
    title_ = 'Figure c'
    prob_dist_plot(prob_dist_data=prob_dist[random_horizon_dt]['pred_demand'], title_=title_, directory_path=pipeline_file_path, figsize=figsize_global, text_size=20, file_name = 'Figure c',
                  bar_colour = spec_colour_dict['single']['all'])


### Figure d - currently done in powerpoint

In [None]:
specialty_model = prepare_for_inference(model_file_path, 'ed_specialty', model_only=True)

weights = specialty_model.weights
weights[tuple(['surgical'])]

In [None]:
# df_admitted_with_spec = df[(df.is_admitted)].merge(df_spec[['episode_slice_id','consultation_sequence', 'observed_specialty']] , on='episode_slice_id', how='left')
# df_admitted_with_spec.loc[df_admitted_with_spec.consultation_sequence.isnull(), 'consultation_sequence'] = 'None'

# spec_dict = {}
# spec_dict['-'] = spec_dict = {'-': {k[0].upper(): v for k, v in df_admitted_with_spec[df_admitted_with_spec.consultation_sequence == 'None']['observed_specialty'].value_counts(normalize=True).to_dict().items()}}


# spec_dict['-']['-m'] = {k[0].upper(): v for k, v in df_admitted_with_spec[df_admitted_with_spec.consultation_sequence == "('medical',)"]['observed_specialty'].value_counts(normalize=True).to_dict().items()}
# spec_dict['-']['-s'] = {k[0].upper(): v for k, v in df_admitted_with_spec[df_admitted_with_spec.consultation_sequence == "('surgical',)"]['observed_specialty'].value_counts(normalize=True).to_dict().items()}
# spec_dict['-']['-h'] = {k[0].upper(): v for k, v in df_admitted_with_spec[df_admitted_with_spec.consultation_sequence == "('haem_onc',)"]['observed_specialty'].value_counts(normalize=True).to_dict().items()}
# spec_dict#['-']['-m'].keys()


### Figure e - NOTE currently (I think) there is no data on consults for discharged patients

In [None]:
ex[(ex.has_consult) & (ex.is_admitted)].index

df_spec[ex[(ex.has_consult) & (ex.is_admitted)].index]

In [None]:
from ed_admissions_data_retrieval import ed_admissions_get_data
from ed_admissions_helper_functions import get_specialty_probs, prepare_episode_slices_dict


child_age_group = '0-17'
child_dict = {
    'medical': 0.0,
    'surgical': 0.0,
    'haem_onc': 0.0,
    'paediatric': 1.0
}

X_test, y_test, model = prepare_for_inference(model_file_path, 'ed_admission', time_of_day = tod_, data_path = PATH_ED, single_episode_slice_per_visit = False)
episode_slices_dict = prepare_episode_slices_dict(df[(df.training_validation_test == 'test') & (df.time_of_day == tod_) & (df.horizon_dt == random_horizon_dt)])

is_child_func = lambda row: row['age_group'] == '0-17' # or row['age'] <= 17

spec = pd.merge(ex[['age_group']], df_spec[['consultation_sequence', 'observed_specialty']], left_index=True, right_index=True, how='left')
spec['specialty_prob'] = get_specialty_probs(model_file_path, spec, special_category_func=is_child_func, special_category_dict=child_dict)

specs =  ['medical', 'surgical', 'haem_onc', 'paediatric']
spec = spec.assign(**{f'{key}_prob': spec['specialty_prob'].apply(lambda d: d.get(key)) for key in specs})

# spec = pd.merge(ex['prob_admission_in_window'], spec, left_index=True, right_index=True, how='left')
ex_spec = pd.merge(ex, spec, left_index=True, right_index=True, how='left')

for spec_ in ['medical', 'surgical', 'haem_onc', 'paediatric']:

    if full_output:    
        title_ = 'Patients in ED at' 
        in_ED_now_plot(full_file_path, f'Full Figure e {spec_}', ex_spec, horizon_datetime, figsize_global, title_, include_titles = True, colour = True, colour_map = spec_colour_dict['spectrum'][spec_], 
                       size = 20, preds_col = f'{spec_}_prob', title_suffix = f'admission to {spec_} specialty')
    else:
        title_ = 'Figure e' 
        in_ED_now_plot(pipeline_file_path, f'Figure e {spec_}', ex_spec, horizon_datetime, figsize_global, title_, text_size= 20, colour = True, colour_map = spec_colour_dict['spectrum'][spec_], preds_col = f'{spec_}_prob')

## Figure f

In [None]:
prob_dist_spec = {}

for spec_ in ['medical', 'surgical', 'haem_onc', 'paediatric']:
    print("\nProcessing probability of admission to:" + spec_ )

    # get the probability of admission to this specialty for all patients
    weights = spec['specialty_prob'].apply(lambda x: x.get(spec_))

    # select only the episode slices that pertain to children or adults, as appropriate
    if spec_ == 'paediatric':
        episode_slices_dict = prepare_episode_slices_dict(ex_spec[ex_spec.age_group_x == '0-17'])
    else:
        episode_slices_dict = prepare_episode_slices_dict(ex_spec[ex_spec.age_group_x != '0-17'])
        
    # get probability distribution for this time of day
    prob_dist_spec[spec_] = get_prob_dist(
        episode_slices_dict, X_test, y_test, model, weights
    )

for spec_ in ['medical', 'surgical', 'haem_onc', 'paediatric']:

    if full_output:

        title_ = f'Probability distribution for number of beds needed patients in ED at {random_horizon_dt} {hour_}-{min_} in {spec_} specialties'
        prob_dist_plot(prob_dist_data=prob_dist_spec[spec_][random_horizon_dt]['pred_demand'], title_=title_, directory_path=full_file_path, figsize=figsize_global, include_titles=True, 
                       bar_colour= spec_colour_dict['single'][spec_], file_name = f'Full Figure f {spec_}')
    else:
        title_ = 'Figure f'
        prob_dist_plot(prob_dist_data=prob_dist_spec[spec_][random_horizon_dt]['pred_demand'], title_=title_, directory_path=pipeline_file_path, figsize=figsize_global, text_size=20, 
                       bar_colour= spec_colour_dict['single'][spec_], file_name = f'Figure f {spec_}')

## Figure g 

In [None]:

from predict.emergency_demand.admission_in_time_window_using_aspirational_curve import calculate_probability
from viz.aspirational_curve_plot import plot_curve

time_window_hrs = time_window/60
ex_spec['elapsed_los_td_hrs'] = ex_spec['elapsed_los_td']/3600

ex_spec['prob_adm_in_window'] = ex_spec.apply(lambda row: calculate_probability(row['elapsed_los_td_hrs'], time_window_hrs, x1 = 4, y1 = 0.76, x2 = 12, y2 = .99), axis=1)


directory_path = media_file_path / horizon_datetime

if full_output:

    title_ = 'Aspirational curve reflecting a 4 hour target for 76% of patients\nand a 12 hour target for 99% of patients'
    plot_curve(full_file_path, figsize_global, title_, x1 = 4, y1 = .76, x2 = 12, y2 = 0.99, include_titles = True, file_name = 'Full Figure g')
    
else:
    
    title_ = 'Figure g'
    plot_curve(pipeline_file_path, figsize_global, title_, x1 = 4, y1 = .76, x2 = 12, y2 = 0.99,  text_size = 20, file_name = 'Figure g')

## Figure h

In [None]:
if full_output:    
    title_ = 'Patients in ED at' 
    in_ED_now_plot(full_file_path, 'Full figure h', ex_spec, horizon_datetime, figsize_global, title_, include_titles = True, colour = True, colour_map = spec_colour_dict['spectrum']['window'], size = 20, preds_col = 'prob_adm_in_window', 
                   title_suffix = 'admission within ' + str(int(time_window_hrs)) + ' hours')
else:
    title_ = 'Figure h' 
    in_ED_now_plot(pipeline_file_path, 'Figure h', ex_spec, horizon_datetime,  figsize_global, title_, text_size= 20, colour = True, colour_map = spec_colour_dict['spectrum']['window'], preds_col = 'prob_adm_in_window')

## Figure i


In [None]:
prob_dist_spec_in_window = {}

for spec_ in ['medical', 'surgical', 'haem_onc', 'paediatric']:
    print("\nProcessing probability of admission to:" + spec_ )

    # get the probability of admission to this specialty for all patients
    weights = ex_spec['specialty_prob'].apply(lambda x: x.get(spec_))
    
    # multiply by probability of admission in window
    weights_in_window = weights * ex_spec['prob_adm_in_window']

    # select only the episode slices that pertain to children or adults, as appropriate
    if spec_ == 'paediatric':
        episode_slices_dict = prepare_episode_slices_dict(ex_spec[ex_spec.age_group_x == '0-17'])
    else:
        episode_slices_dict = prepare_episode_slices_dict(ex_spec[ex_spec.age_group_x != '0-17'])
        
    # get probability distribution for this time of day
    prob_dist_spec_in_window[spec_] = get_prob_dist(
        episode_slices_dict, X_test, y_test, model, weights_in_window
    )


for spec_ in ['medical', 'surgical', 'haem_onc', 'paediatric']:

    if full_output:
        title_ = f'Probability distribution for beds needed for patients in ED at {random_horizon_dt} {hour_}-{min_} in {spec_} specialties within {str(int(time_window_hrs))} hours'
        prob_dist_plot(prob_dist_data=prob_dist_spec_in_window[spec_][random_horizon_dt]['pred_demand'], title_=title_, directory_path=full_file_path, figsize=figsize_global, include_titles=True, 
                       bar_colour=spec_colour_dict['single'][spec_], file_name = f'Full Figure i {spec_}')

    else: 
        title_ = f'Figure i {spec_}'
        prob_dist_plot(prob_dist_data=prob_dist_spec_in_window[spec_][random_horizon_dt]['pred_demand'], title_=title_, directory_path=pipeline_file_path, figsize=figsize_global, text_size=20,
                           bar_colour=spec_colour_dict['single'][spec_], file_name = f'Figure i {spec_}')

## Figure j

In [None]:
from joblib import load
time_window = 480
MODEL__ED_YETTOARRIVE__NAME = 'ed_yet_to_arrive_all_' + str(int(time_window/60)) + '_hours'
full_path = model_file_path / MODEL__ED_YETTOARRIVE__NAME 
full_path = full_path.with_suffix('.joblib')

# only medical patients
prediction_context = {
    'default': {
        'time_of_day': tod_ 
    }
}

model = load(full_path)
prob_dist_yta = model.predict(prediction_context, x1, y1, x2, y2)

if full_output:

    title_ = f'Probability distribution for beds needed in next {int(time_window/60)} hours\nfor arrivals after {horizon_datetime}'
    prob_dist_plot(prob_dist_yta['default'],figsize=figsize_global, title_=title_, directory_path=full_file_path, include_titles = True, truncate_at_beds=30, 
                   bar_colour = spec_colour_dict['single']['all'], file_name = f'Full Figure j')
    
else: 

    title_ = 'Figure j all' 
    prob_dist_plot(prob_dist_yta['default'],figsize=figsize_global, title_=title_, directory_path=pipeline_file_path, text_size = 20,  truncate_at_beds=30, 
                   bar_colour = spec_colour_dict['single']['all'], file_name = f'Figure j')

    


In [None]:

MODEL__ED_YETTOARRIVE__NAME = 'ed_yet_to_arrive_by_spec_' + str(int(time_window/60)) + '_hours'
full_path = model_file_path / MODEL__ED_YETTOARRIVE__NAME 
full_path = full_path.with_suffix('.joblib')

model_by_spec = load(full_path)

for spec_ in ['medical', 'surgical', 'haem_onc', 'paediatric']:

    prediction_context = {
        spec_: {
            'time_of_day': tod_ 
        }
    }

    model = load(full_path)
    prob_dist_yta_spec = model.predict(prediction_context, x1, y1, x2, y2)
    
    if full_output:

        title_ = f'Probability distribution for beds needed in {spec_} specialties\nin next {int(time_window/60)} hours for arrivals after {horizon_datetime}'
        prob_dist_plot(prob_dist_yta_spec[spec_],figsize=figsize_global, title_=title_, directory_path=full_file_path, include_titles = True, truncate_at_beds=20, 
                    bar_colour = spec_colour_dict['single'][spec_], file_name = f'Full Figure j {spec_}')
        
    else:

        title_ = 'Figure g' 
        prob_dist_plot(prob_dist_yta_spec[spec_],figsize=figsize_global, title_=title_, directory_path=pipeline_file_path, text_size = 20,  truncate_at_beds=20, 
                       bar_colour = spec_colour_dict['single'][spec_], file_name = f'Figure j {spec_}')
