In [1]:
import sys
sys.path.append("../../src/models")

In [2]:
import numpy as np
from actions import city_restrictions, costs

In [3]:
from simulate_pandemic import init_infection, spread_infection, lambda_leak_expose, update_population
from simulate_pandemic import main

In [4]:
from numpy.random import default_rng
import pandas as pd
from datetime import datetime
from tqdm import tqdm

In [5]:
from joblib import Parallel, delayed

In [6]:
def make_df(week, sim, action, data):
    df = pd.DataFrame(pd.Series(data[:, 1]).value_counts()).T
    df['simulation'] = sim
    df['week'] = week
    df['action'] = action
    df = df.rename(columns = {
                              -1 : 'removed',
                               0 : 'susceptible',
                               1 : 'exposed',
                               2 : 'infected',
                               3 : 'hospitalized'
                              }
                  )
    
    return df

In [7]:
def simulate_and_save(policy_number, policy, n_sims, step_size=7, folder='generated_sims/'):
    dfs = []
    for sim in range(1, n_sims+1):
        
        sim_name = f'{policy_number}_{sim}'
                
        data, pop_matrix = main(gpickle_path=gpickle_path,
                                p_r=p_r,
                                policy=policy,
                                disable_tqdm=True,
                                days=step_size*len(policy),
                                step_size=step_size)
        
        
        weeks = (step_size/7) * len(policy)
        
        weekly_data = [data[i*7] for i in range(int(weeks))]
        df = pd.concat([make_df(week, sim_name, action, data)
                             for week, (data, action) in enumerate(zip(weekly_data, policy))])
        #df.to_parquet(folder + f'{sim_name}.parquet')
        dfs += [df]
    return dfs

In [8]:
rng = default_rng(None)

gpickle_path = "../../data/processed/SP_multiGraph_Job_Edu_Level.gpickle"

prhome = 0.06
p_r = {
    'home'    :  prhome,
    'neighbor':  .1*prhome,
    'work'    :  .1*prhome,
    'school'  :  .15*prhome,
}

In [None]:
costs_keys = list(costs.keys())
costs_values = np.array(list(costs.values()))


### Cost Weighted

In [13]:
p = (1 - costs_values) / (1 - costs_values).sum()
policy = [rng.choice(costs_keys, size=int(364/14),
                     replace=True, p=p)
          for i in range(120)]

assert len(set([tuple(p) for p in policy])) == len(policy)

In [14]:
n_sims=4
res = Parallel(n_jobs=-1)(delayed(simulate_and_save)(i, pol, n_sims) 
                              for i, pol in tqdm(enumerate(policy),
                                                 total=len(policy)))

100%|██████████| 120/120 [22:13<00:00, 11.11s/it]


### At Random

In [15]:
policy = [rng.choice(costs_keys, size=int(364/14), replace=True)
          for i in range(120)]

In [16]:
n_sims=4
res2 = Parallel(n_jobs=-1)(delayed(simulate_and_save)(i, pol, n_sims) 
                              for i, pol in tqdm(enumerate(policy),
                                                 total=len(policy)))

100%|██████████| 120/120 [17:32<00:00,  8.77s/it]


In [25]:
np.mean([r['hospitalized'].mean() for rs in res for r in rs]),\
np.max([r['hospitalized'].max() for rs in res for r in rs])

(84.92006840140536, 441.0)

In [26]:
np.mean([r['hospitalized'].mean() for rs in res2 for r in rs]),\
np.max([r['hospitalized'].max() for rs in res2 for r in rs])

(58.20080728480457, 417.0)

In [31]:
final_res = []
for i, sims_array in enumerate(res+res2):
    for j, sim_df in enumerate(sims_array):
        sim_df['simulation'] = f'{i}_{j}'
        final_res.append(sim_df)

In [34]:
pd.concat(final_res).to_parquet('sim_dataset.parquet')