In [1]:
import os
os.chdir('../../src/')
from visualization.SIR_Plot import make_SIR_graph, make_beds_graph

In [2]:
os.chdir('models')

In [82]:
from copy import deepcopy
from policies import policies, costs, closest_actions
import simulate_pandemic as simp
import numpy as np
import pandas as pd
from scipy.special import expit
import random

def normallize_to_range(x,  x_min, x_max, scale=1,a=-4, b=4):
    x = (x - x_min)/(x_max - x_min)
    x = (x*(b-a)) + a
    return x

def exposed_cost(h, limit = 0.1, scale=1):
    y = expit(normallize_to_range(h, 0, limit, scale))
    return y*scale
 

class CovidState():
    def __init__(self, actions, day):
        self.day = day
        self.actions = actions
        self.cost = None

    def getPossibleActions(self):
        possible_actions = [k for k in policies.keys()]
        random.shuffle(possible_actions)
        return possible_actions

    def getPossibleRangeActions(self):
        return closest_actions[self.policy]

    def takeAction(self, pop_matrix, step_size):
        local_pop_matrix = deepcopy(pop_matrix)
        pop = local_pop_matrix.shape[0]

        day = self.day
        sims_costs = []

        for action in self.actions:
            cost_action = costs[action]*step_size
            cost_exposed = 0
            
            # spread disease for step_size days with policy
            for i in range(step_size):
                # Simulate one day
                day += 1
                local_pop_matrix= simp.spread_infection(local_pop_matrix,
                                                            policies[action],
                                                            day)
                local_pop_matrix = simp.lambda_leak_expose(local_pop_matrix,
                                                            day)
                local_pop_matrix = simp.update_population(local_pop_matrix)
                        
                e = np.where(local_pop_matrix[:, 1] == 1)[0].shape[0] / pop
                cost_exposed += exposed_cost(e)        
            
            sims_costs.append(max(cost_action, cost_exposed))
        
        print(sims_costs)
        self.cost = -1*np.sum([c*0.8**i for i,c in enumerate(sims_costs)])
        
        return self.cost

    def isTerminal(self):
        return False
        #return (np.where(self.pop_matrix[:, 1] == -1)[0].shape[0] > self.pop_matrix.shape[0]*.9)

    def getReward(self):
        return self.cost
        #return min(-self.cost_of_policy, -self.cost_exposed)

    def __eq__(self, other):
        return self.actions == other.actions

    def __hash__(self):
        # hash da tupla representado os counts de estados na população
        return hash(self.rsieh)


class ActionInterface():
    def __eq__(self, other):
        raise NotImplementedError

    def __hash__(self):
        raise NotImplementedError()


In [83]:
import time
import random
from joblib import Parallel, delayed
import numpy as np

def rolloutPolicy(state, pop_matrix, step_size):
    reward = state.takeAction(pop_matrix, step_size)
    return reward


def getRolloutPolicy(policy_name='rolloutPolicy'):
    if policy_name == 'rolloutPolicy':
        return rolloutPolicy
    raise ValueError('Unknown Rollout Policy')


class treeNode():
    def __init__(self, state, parent):
        self.state = state
        self.isTerminal = state.isTerminal()
        self.isFullyExpanded = self.isTerminal
        self.parent = parent
        self.numVisits = 0
        self.totalReward = 0
        self.children = {}


class mcts():
    def __init__(self, pop_matrix, 
                 sim_rounds, step_size, n_jobs=6,
                 timeLimit=None, iterationLimit=None,
                 explorationConstant=2, 
                 rolloutPolicy='rolloutPolicy'):
        self.explorationConstant = explorationConstant
        self.rollout = getRolloutPolicy(rolloutPolicy)
        self.sim_rounds = sim_rounds
        self.step_size = step_size
        self.n_jobs = n_jobs
        self.pop_matrix = pop_matrix

        if timeLimit is not None:
            if iterationLimit is not None:
                raise ValueError("Cannot have both time and iteration limit")
            # time taken for each MCTS search in milliseconds
            self.timeLimit = timeLimit
            self.limitType = 'time'
        else:
            if iterationLimit is None:
                raise ValueError("Must have either time or iteration limit")
            # number of iterations of the search
            if iterationLimit < 1:
                raise ValueError("Iteration limit must be greater than one")
            self.searchLimit = iterationLimit
            self.limitType = 'iterations'


    def search(self, current_state):
        self.root = current_state

        if self.limitType == 'time':
            timeLimit = time.time() + self.timeLimit / 1000
            while time.time() < timeLimit:
                self.executeRound()
        else:
            for i in range(self.searchLimit):
                self.executeRound()

        return self.getAction(self.root)

    def executeRound(self):
        node = self.selectNode(self.root)
        rewards = Parallel(n_jobs=self.n_jobs)(delayed(self.rollout)
                                              (node.state, self.pop_matrix, self.step_size)
                                              for i in range(self.sim_rounds))
        reward = np.sum(rewards)
        self.backpropogate(node, reward)

    def selectNode(self, node):
        while not node.isTerminal:
            if node.isFullyExpanded:
                node = self.getBestChild(node, self.explorationConstant)
            else:
                return self.expand(node)
        return node

    def expand(self, node):
        actions = node.state.getPossibleActions()
        for action in actions:
            if action not in node.children:
                new_actions = node.state.actions + [action]
                new_state = CovidState(new_actions, node.state.day)
                newNode = treeNode(new_state, node)
                node.children[action] = newNode
                if len(actions) == len(node.children):
                    node.isFullyExpanded = True
                return newNode

        raise Exception("Should never reach here")

    def backpropogate(self, node, reward):
        while node != self.root.parent:
            node.numVisits += self.sim_rounds
            node.totalReward += reward
            node = node.parent

    def getBestChild(self, node, explorationValue):
        bestValue = float("-inf")
        bestNodes = []
        for child in node.children.values():
            nodeValue = child.totalReward / child.numVisits + (
                explorationValue * np.sqrt(np.log(node.numVisits)
                                           / child.numVisits))
            if nodeValue > bestValue:
                bestValue = nodeValue
                bestNodes = [child]
            elif nodeValue == bestValue:
                bestNodes.append(child)
        return random.choice(bestNodes)

    def getAction(self, root):
        c = root.children
        best_action = max(c, key=lambda key: c[key].numVisits)
                          #key=lambda key: c[key].totalReward/c[key].numVisits)
        best_node = root.children[best_action]
        return best_action, best_node

In [84]:
def run_full_mcst(rolloutPolicy='rolloutPolicy', iterationLimit=100, sim_rounds=10, n_jobs = 6, step_size = 7, days = 210):

    pop_matrix = simp.init_infection(.01)
    data = []
    actions = []

    for day in tqdm(range(1, days+1)):
        #if less than 20% still susceptible, break simulation
        if pop_matrix[np.where(pop_matrix[:,1] == -1)].shape[0] > pop_matrix.shape[0]*.9: break            
            
        
        # Choose a new policy at each week
        if day % step_size == 1:                    
            tree = mcts(iterationLimit=iterationLimit, sim_rounds=sim_rounds, step_size=step_size, 
                        n_jobs=n_jobs, rolloutPolicy=rolloutPolicy, pop_matrix=pop_matrix)

            root = treeNode(CovidState(actions=[], day=day), parent=None)
            action, best_node = tree.search(root)

            actions.append(action)
            restrictions = policies[action]

        pop_matrix = simp.spread_infection(pop_matrix, restrictions, day)
        pop_matrix = simp.lambda_leak_expose(pop_matrix, day)
        pop_matrix = simp.update_population(pop_matrix)

        data.append(np.array(sorted(pop_matrix,key=lambda x: x[0]))[:,1]) 
    
    return data, actions, tree

In [85]:
from tqdm import tqdm

In [86]:
data, actions, tree = run_full_mcst(iterationLimit=155 , sim_rounds=6, days=210, step_size=7, n_jobs=6)

100%|██████████████████████████████████████████████████████████████████████████████| 210/210 [2:35:04<00:00, 44.31s/it]


In [87]:
make_beds_graph(data, actions, 7, '155_6_sum_no_norm')

In [88]:
[print(f'Nó {k} com {v.totalReward/v.numVisits} avg Reward e {v.numVisits} visitas') for k,v in tree.root.children.items()]

Nó Social Distancing com -0.7080163260220035 avg Reward e 192 visitas
Nó Lockdown com -0.7532596332851792 avg Reward e 156 visitas
Nó Hard Quarantine com -0.7049670355742101 avg Reward e 192 visitas
Nó Light Quarantine com -0.6804879063670284 avg Reward e 222 visitas
Nó Unrestricted com -0.7400445279174248 avg Reward e 168 visitas


[None, None, None, None, None]