In [None]:
import random
import numpy as np

In [8]:
class Datagram:
    def __init__(self, source: int, destination: int):
        self.source = source
        self.destination = destination

In [10]:
class Queue:
    def __init__(self, size: int):
        self.queue = []
        self.size = size

    def is_empty(self):
        return len(self.queue) == 0
    
    def enqueue(self, datagram: Datagram):
        if len(self.queue) < self.size:
            self.queue.append(datagram)
            return True
        return False

    def dequeue(self):
        if not self.isEmpty():
            return self.queue.pop(0)
        return None

In [11]:
class Event:
    def __init__(self, delay: int, destination: int, datagram: Datagram):
        self.delay = delay
        self.destination = destination
        self.datagram = datagram

In [12]:
class Router: #roteador
    
    def __init__(self, address: int, size: int):
        self.address = address
        self.adjacents = {}
        self.size = size
        self.queue = Queue(self.size)

    #método mapear
    def link(self, router, delay: int):
        """
        Ele deve ter o mesmo retorno do método mapear da TabelaDeRepasse: true se o
        mapeamento foi feito ou false caso o endereço já tenha um mapeamento ou a tabela esteja cheia.
        """
        if router.address not in self.adjacents:
            self.adjacents[router.address] = (router, delay)
            return True
        else:
            return False


    def receive(self, datagram: Datagram):
        if datagram.destination != self.address:
            
            if self.queue.size == self.size:
                print(f'Fila em {self.address} estourou ')
            else:
                self.queue.queue(datagram)
            
        else:
            del datagram

    def process(self, action: int): 
        if not self.queue.is_empty():
            datagram = self.queue.dequeue()
            if datagram.destination == self.address:
                del datagram
                return 
            else: 
                return Event(self.adjacents[action][1], action, datagram)
                    
        else:
            return 

In [13]:
class Network:
    def __init__(self, size: int, gen_prob: float, max_data: int, generate: bool=True) -> None:
        """
        @param size: size of the network
        @param gen_prob: probability of generating one packet
        @param max_data: max number of datagrams that can be generated in one instant
        """
        self.size = size
        self.quantity = 0
        self.routers = {}
        self.gen_prob = gen_prob
        self.max_data = max_data
        self.generate = generate

    def add_router(self, router: Router):
        if self.quantity < self.size:
            if router.address not in self.routers:
                self.routers[router.address] = router
                self.quantity += 1
                return True
        return False
    
    def generate_random_data(self):
        if self.generate:
            for _ in range(self.max_data):
                if np.random.random() < self.gen_prob:
                    sender = random.choice([self.routers.keys()])
                    # !!!!! Assumindo que a rede seja toda conectada !!!!! "
                    receiver = random.choice([self.routers.keys()])
                    datagram = Datagram(sender, receiver)
                    self.routers[sender].receive(datagram)

In [14]:
class Scheduler:
    def __init__(self, inicial_instant: int, network: Network, size: int):
        self.instant = inicial_instant
        self.network = network
        self.size = size
        self.events = []
    
    def process(self, action: list[int]):
        for event in self.events:
            if event.delay == 0:
                event.destination.receive(event.datagram)
        self.events = [event for event in self.events if event.delay != 0]
        for event in self.events:
            event.delay -= 1
        for router in self.network.routers:            
            event = router.process(action[router.address])
            if event is not None:
                self.events.append(event)
        self.network.generate_random_data()
        self.instant += 1


In [None]:
class Environment:
    def __init__(self, network, scheduler):
        self.network = network
        self.scheduler = scheduler

    def get_all_states(self):
        ...
    
    def get_state(self):
        events_state = []
        for event in self.scheduler.events:
            event_state = (event.instant, event.destination, event.datagram.destination)
            events_state.append(event_state)

        routers_state = []
        for router in self.network.routers:
            router_state = []
            datagrams_state = []
            for datagram in router.datagrams:
                datagrams_state.append(datagram.destination)
            router_state.append(datagrams_state)
            for adjacent in router.adjacents:
                queue_state = []
                for datagram in router.adjacents[adjacent][0].queue:
                    queue_state.append(datagram.destination)
                router_state.append(queue_state)
            routers_state.append(router_state)
            
        return (events_state, routers_state)
    
    def take_action(self, action):
        reward = 0
        for router in self.network.routers:
            reward -= len(router.datagrams)
        reward -= len(self.scheduler.events)
        self.scheduler.process(action)
        new_state = self.get_state()
        return reward, new_state
    
    def start(self, state = None): 
        ...

    def possible_actions(self, current=True):
        ...

# Cria Rede Genérica
![rede 1](./img/rede1.png)

In [None]:
network = Network(4, 1.0, 1)

r1 = Router(1,2)
r2 = Router(2,2)
r3 = Router(3,2)
r4 = Router(4,2)

r1.link(r2, 2)
r1.link(r3, 5)

r2.link(r1, 2)
r2.link(r4, 3)

r3.link(r1, 2)
r3.link(r4, 4)

r4.link(r2, 2)
r4.link(r3, 5)

network.add_router(r1)
network.add_router(r2)
network.add_router(r3)
network.add_router(r4)

scheduler = Scheduler(0, network, 5)

environment = Environment(network, scheduler)

obs: Se não for ter criação de evento AGENDADO, o método schedule() parece pouco útil, podemos inserir um novo datagrama diretamente em datagrams do router de origem 

Onde está deletando os eventos?

Ordem dos elementos no estados não deve importar ex: [1, 2] = [2, 1]

In [None]:
class Agent_Q_Learning:
    def __init__(self, env):
        self.env = env
        self.policy = {}
        self.values = {}

    def q_learning(self, alpha, epsilon, time_steps, gama):
        self.initialize() #Initialize value table 

        self.env.start()
        
        for t in range(time_steps):
            state = self.env.get_state()
            
            if random.random() >= epsilon:
                action = max(self.values[state], key = self.values[state].get)
            else:
                action = random.choice(list(self.values[state].keys())) #ou usar o possible actions

            reward, next_state = self.env.take_action(action)
            self.values[state][action] += alpha*(reward + gama*max(self.values[next_state].values()) - self.values[state][action])
        
    def initialize(self):
        ...

In [None]:
class StateActionArray:
    def __init__(self, env: Environment):
        ...
    def get(self, state, action):
        ...
    def set(self, state, action, value):
        ...

class Policy:
    def __init__(self, env:Environment, eps: float=0):
        ...
    def get(self, state):
        ...
    def set(self, state, action):
        ...
        
def sarsa(env: Environment, returns: StateActionArray, policy: Policy, gamma: float, alpha: float):
    
    env.reset()
    state = env.get_state()
    action = policy.get(state)
    while True:
        reward, next_state = env.take_action(action)
        next_action = policy.get(next_state)
        current_return = returns.get(state, action)
        
        next_return = 0
        if not env.terminal():
            next_return = returns.get(next_state, next_action)

        new_return = current_return + alpha * (gamma * next_return - current_return + reward)
        returns.set(state, action, new_return)

        actions = env.possible_actions()
        values = [returns.get(state, act) for act in actions]
        policy.set(state, actions[np.argmax(values)])

        state = next_state
        action = next_action
        if env.terminal():
            break

In [None]:
class Episode:
    def __init__(self, env : Environment, initialState, initialAction, policy):
        self.env = env
        env.start(initialState)
        self.initialState = initialState
        self.initialAction = initialAction
        self.pairs = []
        self.rewards = []
        self.policy = policy
        self.steps = 0

    def genEpisode(self):
        state = self.initialState
        action = self.initialAction
        while True:
            self.pairs.append((state, action))
            reward, state = self.env.takeAction(action)
            self.rewards.append(reward)
            self.steps += 1
            if (state == -1 or self.steps == 400):
                self.steps = len(self.rewards)
                return
            action = self.policy[state]


In [None]:
class Agent_MC:
    def __init__(self, env: Environment):
        self.env = env
        self.pairs = {}
        for state in env.get_all_states():
            for action in env.possible_actions(state):
                self.pairs.append((state, action))
        self.Q_values = {pair : [0, 0] for pair in self.pairs}
        self.policy = {state : np.choice([self.pairs[(state, action)] for action in env.possible_actions(state)])}

    def initialize(self):
        for pair in self.pairs:
            self.Q_values[pair] = [0, 0]
        for state in self.env.get_all_states():
            self.policy[state] = np.choice([self.pairs[(state, action)] for action in self.env.possible_actions(state)])
    
    def MCES(self, numEpisodes, gamma):
        for _ in range(numEpisodes):
            start_pair = random.choice(self.pairs)
            episode = Episode(self.env, start_pair[0], start_pair[1], self.policy)
            episode.genEpisode()
            g = 0
            for step in range(episode.steps - 1, -1, -1):
                g = gamma * g + episode.rewards[step]
                pair = episode.pairs[step]
                if (pair not in episode.pairs[0:step]):
                    self.Q_values[pair][0] = (self.Q_values[pair][0] * self.Q_values[pair][1] + g) / (self.Q_values[pair][1] + 1)
                    self.Q_values[pair][1] += 1
                    self.policy[pair[0]] = self.actions[pair[0]][np.argmax([self.Q_values[(pair[0], action)][0] for action in self.actions[pair[0]]])]
