# Predict yet to arrive 

prepare a model that will predict the number of patients yet to arrive.

Inputs
- A series of times in the day at which we want to make these predictions is set 
- A series of dates on which we want to make these predictions
- A time window after the prediction time, within which we are interested in predicting a number of patients (eg 8 hours)

## Set up the notebook environment

In [None]:
# Reload functions every time
%load_ext autoreload 
%autoreload 2

In [None]:
from pathlib import Path
import sys
import json
import pandas as pd
import numpy as np
# import joblib
from joblib import load, dump

PROJECT_ROOT = Path().home() / 'HyMind'

# Patient flow package
USER_ROOT = Path().home()
sys.path.append(str(USER_ROOT / 'patientflow' / 'patientflow' ))

# Functions that sit outside the package
sys.path.append(str(USER_ROOT / 'patientflow' / 'functions' ))


In [None]:
model_file_path = PROJECT_ROOT / 'dissemination' / 'model-output' / 'trained-models'
model_file_path

data_file_path = PROJECT_ROOT / 'dissemination' / 'data-raw'
data_file_path

## Load parameters

These are set in config.json. You can change these for your own purposes. But the times of day will need to match those in the provided dataset if you want to run this notebook successfully.

In [None]:
# Load the times of day
import yaml

config_path = Path(PROJECT_ROOT / 'dissemination' )

with open(config_path / 'config.yaml', 'r') as file:
    config = yaml.safe_load(file)
    
# Convert list of times of day at which predictions will be made (currently stored as lists) to list of tuples
prediction_times = [tuple(item) for item in config['prediction_times']]
epsilon = float(eval(config['epsilon']))
prediction_window = int(config['prediction_window'])
time_interval = int(config['yta_time_interval'])
reference_year = config['reference_year']

# json_file_path = '../patientflow/predict/emergency_demand/nhse_urls.json'


## Load data

In [None]:
from ed_admissions_data_retrieval import ed_admissions_get_data
PATH_ED = 'HyMind/dissemination/data-raw/yet_to_arrive.csv'

df = ed_admissions_get_data(PATH_ED)

In [None]:
df.head()

## Separate into training, validation and test sets

As part of preparing the data, each visit has already been allocated into one of three sets - training, vaidation and test sets. 

In [None]:
df.training_validation_test.value_counts()

In [None]:
train_df = df[df.training_validation_test == 'train']#.drop(columns='training_validation_test')
valid_df = df[df.training_validation_test == 'valid']#.drop(columns='training_validation_test')
test_df = df[df.training_validation_test == 'test']#.drop(columns='training_validation_test')


train_df['ed_visit_start_dttm'] = pd.to_datetime(train_df['ed_visit_start_dttm'], utc = True)
train_df.set_index('ed_visit_start_dttm', inplace=True)

In [None]:
isinstance(train_df.index, pd.DatetimeIndex)

## Train the Poisson-Binomial model

In [None]:
config

In [None]:
from predict.emergency_demand.poisson_binomial_predictor import PoissonBinomialPredictor

### Train a model for all admission, irrespective of specialty of admission

In [None]:
from predict.emergency_demand.poisson_binomial_predictor import PoissonBinomialPredictor
from joblib import dump, load

model =  PoissonBinomialPredictor()

model.fit(train_df, prediction_window, time_interval, prediction_times)

MODEL__ED_YETTOARRIVE__NAME = 'ed_yet_to_arrive_all_' + str(int(prediction_window/60)) + '_hours'
full_path = model_file_path / MODEL__ED_YETTOARRIVE__NAME 
full_path = full_path.with_suffix('.joblib')

dump(model, full_path)

In [None]:
weights = model.get_weights()


In [None]:


print(x1)
print(x2)

preds = model.predict(prediction_context, x1, y1, x2, y2)

preds['default'].head(10)#['agg_proba']

In [None]:
prediction_context = {
    'default': {
        'prediction_time': tuple([7, 0])  
    }
}

x1 = float(config['x1'])
y1 = float(config['y1'])
x2 = float(config['x2'])
y2 = float(config['y2'])

MODEL__ED_YETTOARRIVE__NAME = 'ed_yet_to_arrive_all_' + str(int(prediction_window/60)) + '_hours'
full_path = model_file_path / MODEL__ED_YETTOARRIVE__NAME 
full_path = full_path.with_suffix('.joblib')

model = load(full_path)

preds = model.predict(prediction_context, x1, y1, x2, y2)


### Predict within specialty

In [None]:
from predict.emergency_demand.poisson_binomial_predictor import PoissonBinomialPredictor

specialty_filters = {
    'medical': {'observed_specialty': 'medical', 'is_child': False},
    'surgical': {'observed_specialty': 'surgical', 'is_child': False},
    'haem_onc': {'observed_specialty': 'haem_onc', 'is_child': False},
    'paediatric': {'is_child': True}  # Pediatric doesn't filter by observed_specialty
}

model_by_spec =  PoissonBinomialPredictor(filters = specialty_filters)

model_by_spec.fit(train_df, prediction_window, time_interval, prediction_times)


MODEL__ED_YETTOARRIVE__NAME = 'ed_yet_to_arrive_by_spec_' + str(int(prediction_window/60)) + '_hours'
full_path = model_file_path / MODEL__ED_YETTOARRIVE__NAME 
full_path = full_path.with_suffix('.joblib')

dump(model_by_spec, full_path)

In [None]:
MODEL__ED_YETTOARRIVE__NAME = 'ed_yet_to_arrive_by_spec_' + str(int(prediction_window/60)) + '_hours'
full_path = model_file_path / MODEL__ED_YETTOARRIVE__NAME 
full_path = full_path.with_suffix('.joblib')

model_by_spec = load(full_path)

x1 = float(config['x1'])
y1 = float(config['y1'])
x2 = float(config['x2'])
y2 = float(config['y2'])

prediction_context = {
    'medical': {
        'prediction_time': tuple([7, 0])  
    }
}

preds = model_by_spec.predict(prediction_context, x1, y1, x2, y2)
preds['medical']