In [1]:
# Filter tensorflow version warnings
import os
# https://stackoverflow.com/questions/40426502/is-there-a-way-to-suppress-the-messages-tensorflow-prints/40426709
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # or any {'0', '1', '2'}
import warnings
# https://stackoverflow.com/questions/15777951/how-to-suppress-pandas-future-warning
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=Warning)
import tensorflow as tf
tf.get_logger().setLevel('INFO')
tf.autograph.set_verbosity(0)
import logging
tf.get_logger().setLevel(logging.ERROR)

import re
import altair as alt
from stable_baselines.deepq import DQN, MlpPolicy as DQN_MlpPolicy, LnMlpPolicy
from stable_baselines.common.vec_env import DummyVecEnv
import pandas as pd
import numpy as np
from tqdm import tqdm
from bayes_opt import BayesianOptimization


from env.SepsisEnv import SepsisEnv
from load_data import load_data
from add_reward import add_reward_df, add_end_episode_df



In [2]:
df = load_data()
df = add_reward_df(df)
df = add_end_episode_df(df)

In [3]:
df = df.reset_index()

In [8]:
total_timesteps = 10_000
iterations = 50_000

In [9]:
def train_model(env, model, total_timesteps, iterations):
    model.learn(total_timesteps=total_timesteps)
    reward_list = []
    obs = env.reset()
    patient_count = 0
    for _ in tqdm(range(iterations)):
        action, _states = model.predict(obs)
        obs, rewards, done, info = env.step(action)
        reward_list.append(rewards[0])
        if done:
            patient_count += 1           
            obs = env.reset()
    model_name = re.sub(r'\W+', '', str(model.__class__).split('.')[-1])
    policy_name = re.sub(r'\W+', '', str(model.policy).split('.')[-1])
#     print('Model: ', model_name)
#     print('Policy: ', policy_name)
#     print('Total patients: ', patient_count)
#     print('Total reward:', sum(reward_list))
    
    return sum(reward_list)

Bayesian Optimization code from:
https://colab.research.google.com/gist/iyaja/bf1d35a09ea5e0559900cc9136f96e36/hyperparameter-optimization-fastai.ipynb#scrollTo=gGZm73Txs9PS

In [10]:
def fit_with(lr, bs, eps, final_eps):
    env = DummyVecEnv([lambda: SepsisEnv(df)])
    model = DQN(env=env,
    policy=DQN_MlpPolicy,
    learning_rate=lr,
    buffer_size=bs,
    exploration_fraction=eps,
    exploration_final_eps=final_eps,
    )
    total_reward = train_model(env=env, model=model, total_timesteps=total_timesteps, iterations=iterations)
    return total_reward

In [11]:
# Bounded region of parameter space
pbounds = {'lr': (1e-2, 1e-4), 'bs':(5_000, 100_000), 'eps':(0.01, 0.2), 'final_eps': (0.01, 0.02)}
optimizer = BayesianOptimization(
    f=fit_with,
    pbounds=pbounds,
    verbose=2 

)

optimizer.maximize(init_points=2, n_iter=5,)



for i, res in enumerate(optimizer.res):
    print("Iteration {}: \n\t{}".format(i, res))

print('Max', optimizer.max)    

|   iter    |  target   |    bs     |    eps    | final_eps |    lr     |
-------------------------------------------------------------------------


100%|██████████| 50000/50000 [01:48<00:00, 459.70it/s]


| [0m 1       [0m | [0m-883.0   [0m | [0m 1.265e+0[0m | [0m 0.0376  [0m | [0m 0.01658 [0m | [0m 0.003183[0m |


100%|██████████| 50000/50000 [01:48<00:00, 460.89it/s]


| [95m 2       [0m | [95m-765.1   [0m | [95m 3.432e+0[0m | [95m 0.03907 [0m | [95m 0.01284 [0m | [95m 0.00233 [0m |


100%|██████████| 50000/50000 [01:48<00:00, 459.75it/s]


| [0m 3       [0m | [0m-1.036e+0[0m | [0m 3.432e+0[0m | [0m 0.1372  [0m | [0m 0.01463 [0m | [0m 0.0001  [0m |


100%|██████████| 50000/50000 [01:50<00:00, 454.25it/s]


| [0m 4       [0m | [0m-1.163e+0[0m | [0m 8.192e+0[0m | [0m 0.1276  [0m | [0m 0.01956 [0m | [0m 0.0001  [0m |


100%|██████████| 50000/50000 [01:47<00:00, 464.04it/s]


| [0m 5       [0m | [0m-919.4   [0m | [0m 3.146e+0[0m | [0m 0.1056  [0m | [0m 0.01801 [0m | [0m 0.0001  [0m |


100%|██████████| 50000/50000 [01:49<00:00, 457.60it/s]


| [0m 6       [0m | [0m-1.154e+0[0m | [0m 3.707e+0[0m | [0m 0.1979  [0m | [0m 0.01131 [0m | [0m 0.0001  [0m |


100%|██████████| 50000/50000 [01:48<00:00, 459.49it/s]

| [0m 7       [0m | [0m-1.37e+03[0m | [0m 8.73e+04[0m | [0m 0.1744  [0m | [0m 0.01805 [0m | [0m 0.0001  [0m |
Iteration 0: 
	{'target': -882.9500152952969, 'params': {'bs': 12652.586464441809, 'eps': 0.03759859529433134, 'final_eps': 0.016576522954824646, 'lr': 0.0031825364793057883}}
Iteration 1: 
	{'target': -765.133347325027, 'params': {'bs': 34315.248936763615, 'eps': 0.039067682414731714, 'final_eps': 0.012844403027561272, 'lr': 0.0023302383142856323}}
Iteration 2: 
	{'target': -1036.4166837446392, 'params': {'bs': 34321.727982842254, 'eps': 0.13717585018667033, 'final_eps': 0.014630310695071915, 'lr': 0.0001}}
Iteration 3: 
	{'target': -1163.2333531156182, 'params': {'bs': 81916.57958022068, 'eps': 0.12758175509872266, 'final_eps': 0.019563716561210746, 'lr': 0.0001}}
Iteration 4: 
	{'target': -919.3666823580861, 'params': {'bs': 31464.259446718886, 'eps': 0.10556288295448141, 'final_eps': 0.018011535356845222, 'lr': 0.0001}}
Iteration 5: 
	{'target': -1153.6277967281




In [12]:
print('Max', optimizer.max) 

Max {'target': -765.133347325027, 'params': {'bs': 34315.248936763615, 'eps': 0.039067682414731714, 'final_eps': 0.012844403027561272, 'lr': 0.0023302383142856323}}
