In [48]:
import gym
from stable_baselines3 import PPO, DQN, A2C, SAC, DDPG
from stable_baselines3.common.env_util import make_vec_env
import hydra
from scipy.integrate import odeint
import numpy as np

beta= 0.002
gamma= 0.5
tf= 30
S0= 990
I0= 10

n_episodes= 2000
eps_start= 1.0
eps_end= 0.001
eps_decay= 0.995


def sir(y, t, beta, gamma, u):
    S, I = y
    dydt = np.array([-beta * S * I - u * S, beta * S * I - gamma * I])
    return dydt

class SirEnvironment(gym.Env):
    def __init__(self, S0=S0, I0=I0):
        self.state = np.array([S0, I0])
        self.beta = beta
        self.gamma = gamma
        self.observation_space = gym.spaces.Box(low=np.array([0.0, 0.0]), high=np.array([1000.0, 1000.0]), dtype=np.float32)
        self.action_space = gym.spaces.Box(low=np.array([0.0]), high=np.array([1.0]), dtype=np.float32)

    def reset(self, S0=S0, I0=I0):
        self.state = np.array([S0, I0])
        self.beta = beta
        self.gamma = gamma
        return np.array(self.state, dtype=np.float32)#, 0, False, False, {}

    def step(self, action):
        sol = odeint(sir, self.state, np.linspace(0, 1, 101), args=(self.beta, self.gamma, action[0]))
        new_state = sol[-1, :]
        S0, I0 = self.state
        S, I = new_state
        self.state = new_state
        reward = - I - 10*action[0]
        done = True if new_state[1] < 1.0 else False
        return (np.array(new_state, dtype=np.float32), reward, done, {})

In [49]:
from stable_baselines3.common.env_checker import check_env

env = SirEnvironment()
check_env(env)

# PPO

In [50]:
env = SirEnvironment()
check_env(env)
model = PPO("MlpPolicy", env, verbose=0, tensorboard_log="./ppo_sir_tensorboard/")
model.learn(total_timesteps=100000)
model.save("sir")

import plotly.graph_objects as go
from plotly.subplots import make_subplots

# 3. Visualize Controlled SIR Dynamics
env = SirEnvironment()
state = env.reset()
max_t = tf
states = state
reward_sum = 0.
actions = []
for t in range(max_t):
    action, _states = model.predict(state)
    actions = np.append(actions, action[0])
    next_state, reward, done, _ = env.step(action)
    reward_sum += reward
    states = np.vstack((states, next_state))
    state = next_state

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])
# Add traces
fig.add_trace(
    go.Scatter(x=list(range(max_t+1)), y=states[:,0].flatten(), name="susceptible",
        mode='lines+markers'),
    secondary_y=False,
)
fig.add_trace(
    go.Scatter(x=list(range(max_t+1)), y=states[:,1].flatten(), name="infected",
        mode='lines+markers'),
    secondary_y=False,
)
fig.add_trace(
    go.Scatter(x=list(range(max_t+1)), y=actions, name="vaccine",
        mode='lines+markers'),
    secondary_y=True,
)
# Add figure title
fig.update_layout(
    title_text=f'{reward_sum:.2f}: SIR model with control'
)
# Set x-axis title
fig.update_xaxes(title_text="day")
# Set y-axes titles
fig.update_yaxes(title_text="Population", secondary_y=False)
fig.update_yaxes(title_text="Vaccine", secondary_y=True)

# A2C

In [51]:
env = SirEnvironment()
check_env(env)
model = A2C("MlpPolicy", env, verbose=0, tensorboard_log="./a2c_sir_tensorboard2/")
model.learn(total_timesteps=4000)

# 3. Visualize Controlled SIR Dynamics
env = SirEnvironment()
state = env.reset()
max_t = tf
states = state
reward_sum = 0.
actions = []
for t in range(max_t):
    action, _states = model.predict(state)
    actions = np.append(actions, action[0])
    next_state, reward, done, _ = env.step(action)
    reward_sum += reward
    states = np.vstack((states, next_state))
    state = next_state

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])
# Add traces
fig.add_trace(
    go.Scatter(x=list(range(max_t+1)), y=states[:,0].flatten(), name="susceptible",
        mode='lines+markers'),
    secondary_y=False,
)
fig.add_trace(
    go.Scatter(x=list(range(max_t+1)), y=states[:,1].flatten(), name="infected",
        mode='lines+markers'),
    secondary_y=False,
)
fig.add_trace(
    go.Scatter(x=list(range(max_t+1)), y=actions, name="vaccine",
        mode='lines+markers'),
    secondary_y=True,
)
# Add figure title
fig.update_layout(
    title_text=f'{reward_sum:.2f}: SIR model with control'
)
# Set x-axis title
fig.update_xaxes(title_text="day")
# Set y-axes titles
fig.update_yaxes(title_text="Population", secondary_y=False)
fig.update_yaxes(title_text="Vaccine", secondary_y=True)

# SAC

In [52]:
env = SirEnvironment()
check_env(env)
model = SAC("MlpPolicy", env, verbose=0, tensorboard_log="./sac_sir_tensorboard/")
model.learn(total_timesteps=10000)

# 3. Visualize Controlled SIR Dynamics
env = SirEnvironment()
state = env.reset()
max_t = tf
states = state
reward_sum = 0.
actions = []
for t in range(max_t):
    action, _states = model.predict(state)
    actions = np.append(actions, action[0])
    next_state, reward, done, _ = env.step(action)
    reward_sum += reward
    states = np.vstack((states, next_state))
    state = next_state

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])
# Add traces
fig.add_trace(
    go.Scatter(x=list(range(max_t+1)), y=states[:,0].flatten(), name="susceptible",
        mode='lines+markers'),
    secondary_y=False,
)
fig.add_trace(
    go.Scatter(x=list(range(max_t+1)), y=states[:,1].flatten(), name="infected",
        mode='lines+markers'),
    secondary_y=False,
)
fig.add_trace(
    go.Scatter(x=list(range(max_t+1)), y=actions, name="vaccine",
        mode='lines+markers'),
    secondary_y=True,
)
# Add figure title
fig.update_layout(
    title_text=f'{reward_sum:.2f}: SIR model with control'
)
# Set x-axis title
fig.update_xaxes(title_text="day")
# Set y-axes titles
fig.update_yaxes(title_text="Population", secondary_y=False)
fig.update_yaxes(title_text="Vaccine", secondary_y=True)

# DDPG

In [53]:
env = SirEnvironment()
check_env(env)
model = DDPG("MlpPolicy", env, verbose=0, tensorboard_log="./ddpg_sir_tensorboard/")
model.learn(total_timesteps=150)

# 3. Visualize Controlled SIR Dynamics
env = SirEnvironment()
state = env.reset()
max_t = tf
states = state
reward_sum = 0.
actions = []
for t in range(max_t):
    action, _states = model.predict(state)
    actions = np.append(actions, action[0])
    next_state, reward, done, _ = env.step(action)
    reward_sum += reward
    states = np.vstack((states, next_state))
    state = next_state

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])
# Add traces
fig.add_trace(
    go.Scatter(x=list(range(max_t+1)), y=states[:,0].flatten(), name="susceptible",
        mode='lines+markers'),
    secondary_y=False,
)
fig.add_trace(
    go.Scatter(x=list(range(max_t+1)), y=states[:,1].flatten(), name="infected",
        mode='lines+markers'),
    secondary_y=False,
)
fig.add_trace(
    go.Scatter(x=list(range(max_t+1)), y=actions, name="vaccine",
        mode='lines+markers'),
    secondary_y=True,
)
# Add figure title
fig.update_layout(
    title_text=f'{reward_sum:.2f}: SIR model with control'
)
# Set x-axis title
fig.update_xaxes(title_text="day")
# Set y-axes titles
fig.update_yaxes(title_text="Population", secondary_y=False)
fig.update_yaxes(title_text="Vaccine", secondary_y=True)