In [1]:
import sys
sys.path.append("../../src/models")

In [2]:
import numpy as np
from actions import city_restrictions, costs

In [3]:
from simulate_pandemic import init_infection, spread_infection, lambda_leak_expose, update_population
from simulate_pandemic import main

In [4]:
from numpy.random import default_rng
import pandas as pd
from datetime import datetime
from tqdm import tqdm

In [5]:
from joblib import Parallel, delayed

In [6]:
def make_df(week, sim, action, data):
    df = pd.DataFrame(pd.Series(data[:, 1]).value_counts()).T
    df['simulation'] = sim
    df['week'] = week
    df['action'] = action
    df = df.rename(columns = {
                              -1 : 'removed',
                               0 : 'susceptible',
                               1 : 'exposed',
                               2 : 'infected',
                               3 : 'hospitalized'
                              }
                  )
    
    return df

In [7]:
def simulate_and_save(policy_number, policy, n_sims, step_size=7, folder='generated_sims/'):
    dfs = []
    for sim in range(1, n_sims+1):
        
        sim_name = f'{policy_number}_{sim}'
                
        data, pop_matrix = main(gpickle_path=gpickle_path,
                                p_r=p_r,
                                policy=policy,
                                disable_tqdm=True,
                                days=step_size*len(policy),
                                step_size=step_size)
        
        
        weeks = (step_size/7) * len(policy)
        
        weekly_data = [data[i*7] for i in range(int(weeks))]
        df = pd.concat([make_df(week, sim_name, action, data)
                             for week, (data, action) in enumerate(zip(weekly_data, policy))])
        #df.to_parquet(folder + f'{sim_name}.parquet')
        dfs += [df]
    return dfs

In [8]:
rng = default_rng(None)

gpickle_path = "../../data/processed/SP_multiGraph_Job_Edu_Level.gpickle"

prhome = 0.06
p_r = {
    'home'    :  prhome,
    'neighbor':  .1*prhome,
    'work'    :  .1*prhome,
    'school'  :  .15*prhome,
}

In [9]:
costs_keys = list(costs.keys())
costs_values = np.array(list(costs.values()))


### Cost Weighted

In [10]:
p = (1 - costs_values) / (1 - costs_values).sum()
policy = [rng.choice(costs_keys, size=int(364/14),
                     replace=True, p=p)
          for i in range(120)]

assert len(set([tuple(p) for p in policy])) == len(policy)

In [12]:
n_sims=1
res = Parallel(n_jobs=-1)(delayed(simulate_and_save)(i, pol, n_sims) 
                              for i, pol in tqdm(enumerate(policy),
                                                 total=len(policy)))


  0%|          | 0/120 [00:00<?, ?it/s][A
 10%|█         | 12/120 [00:08<01:19,  1.36it/s][A
 15%|█▌        | 18/120 [00:17<01:44,  1.03s/it][A
 20%|██        | 24/120 [00:25<01:49,  1.14s/it][A
 25%|██▌       | 30/120 [00:33<01:51,  1.23s/it][A
 30%|███       | 36/120 [00:42<01:50,  1.32s/it][A
 35%|███▌      | 42/120 [00:50<01:42,  1.31s/it][A
 40%|████      | 48/120 [00:58<01:35,  1.32s/it][A
 45%|████▌     | 54/120 [01:08<01:32,  1.40s/it][A
 50%|█████     | 60/120 [01:16<01:23,  1.40s/it][A
 55%|█████▌    | 66/120 [01:24<01:15,  1.40s/it][A
 60%|██████    | 72/120 [01:33<01:07,  1.40s/it][A
 65%|██████▌   | 78/120 [01:42<00:59,  1.42s/it][A
 70%|███████   | 84/120 [01:50<00:51,  1.42s/it][A
 75%|███████▌  | 90/120 [01:58<00:42,  1.41s/it][A
 80%|████████  | 96/120 [02:07<00:34,  1.43s/it][A
 85%|████████▌ | 102/120 [02:16<00:25,  1.42s/it][A
 90%|█████████ | 108/120 [02:24<00:17,  1.43s/it][A
 95%|█████████▌| 114/120 [02:32<00:08,  1.40s/it][A
100%|██████████| 

### At Random

In [13]:
policy = [rng.choice(costs_keys, size=int(364/14), replace=True)
          for i in range(120)]

In [14]:
n_sims=1
res2 = Parallel(n_jobs=-1)(delayed(simulate_and_save)(i, pol, n_sims) 
                              for i, pol in tqdm(enumerate(policy),
                                                 total=len(policy)))


  0%|          | 0/120 [00:00<?, ?it/s][A
 10%|█         | 12/120 [00:07<01:11,  1.52it/s][A
 15%|█▌        | 18/120 [00:15<01:29,  1.14it/s][A
 20%|██        | 24/120 [00:21<01:32,  1.04it/s][A
 25%|██▌       | 30/120 [00:30<01:40,  1.11s/it][A
 30%|███       | 36/120 [00:38<01:41,  1.21s/it][A
 35%|███▌      | 42/120 [00:46<01:36,  1.24s/it][A
 40%|████      | 48/120 [00:53<01:29,  1.24s/it][A
 45%|████▌     | 54/120 [01:01<01:24,  1.28s/it][A
 50%|█████     | 60/120 [01:10<01:18,  1.31s/it][A
 55%|█████▌    | 66/120 [01:17<01:09,  1.28s/it][A
 60%|██████    | 72/120 [01:25<01:03,  1.32s/it][A
 65%|██████▌   | 78/120 [01:33<00:54,  1.29s/it][A
 70%|███████   | 84/120 [01:39<00:44,  1.24s/it][A
 75%|███████▌  | 90/120 [01:48<00:38,  1.30s/it][A
 80%|████████  | 96/120 [01:56<00:31,  1.31s/it][A
 85%|████████▌ | 102/120 [02:04<00:23,  1.30s/it][A
 90%|█████████ | 108/120 [02:11<00:15,  1.27s/it][A
 95%|█████████▌| 114/120 [02:19<00:07,  1.28s/it][A
100%|██████████| 

In [15]:
np.mean([r['hospitalized'].mean() for rs in res for r in rs]),\
np.max([r['hospitalized'].max() for rs in res for r in rs])

(87.28224214975846, 487.0)

In [16]:
np.mean([r['hospitalized'].mean() for rs in res2 for r in rs]),\
np.max([r['hospitalized'].max() for rs in res2 for r in rs])

(54.05884859464207, 384.0)

In [17]:
final_res = []
for i, sims_array in enumerate(res+res2):
    for j, sim_df in enumerate(sims_array):
        sim_df['simulation'] = f'{i}_{j}'
        final_res.append(sim_df)

In [None]:
pd.concat(final_res).to_parquet('sim_dataset.parquet')