# Multi-Agent Deep RL in Practice
## MARL Neural Networks in PyTorch

In [4]:
import torch
from torch import nn
from typing import List

class MultiAgentFCNetwork(nn.Module):
    def __init__(self, in_sizes: List[int], out_sizes: List[int]):
        super().__init__()

        # we use the ReLU activation function
        activ = nn.ReLU
        # we use two hidden layers of 64 units each
        hidden_dims = (64, 64)

        n_agents = len(in_sizes)
        # the number of agents is the length of the input and output vector
        assert n_agents == len(out_sizes)

        # we will create `n_agents` (independent) networks
        self.networks = nn.ModuleList()

        # for each agent
        for in_size, out_size in zip(in_sizes, out_sizes):
            network = [nn.Linear(in_size, hidden_dims[0]),
                       activ(),
                       nn.Linear(hidden_dims[0], hidden_dims[1]),
                       activ(),
                       nn.Linear(hidden_dims[1], out_size)]
            self.networks.append(nn.Sequential(*network))
    
    def forward(self, inputs: List[torch.Tensor]):
        # the networks can run in parallel
        futures = [torch.jit.fork(model, inputs[i]) for i, model in enumerate(self.networks)]
        results = [torch.jit.wait(fut) for fut in futures]
        return results

## Seamless Parameter Sharing Implementation

In [5]:
class MultiAgentFCNetwork_SharedParameters(nn.Module):
    def __init__(self, in_sizes:List[int], out_sizes: List[int]):
        super().__init__()

        # we use the ReLU activation function
        activ = nn.ReLU
        # we use two hidden layers of 64 units each
        hidden_dims = (64, 64)

        n_agents = len(in_sizes)
        # the number of agents is the length of the input and output vector
        assert n_agents == len(out_sizes)

        # we will create one (shared) network
        # this assumes that input and output size is identical across agents. If not, we should first pad the inputs and outputs
        network = [nn.Linear(in_sizes, hidden_dims[0]),
                       activ(),
                       nn.Linear(hidden_dims[0], hidden_dims[1]),
                       activ(),
                       nn.Linear(hidden_dims[1], out_sizes)]
        
        self.network = nn.Sequential(*network)
    
    def forward(self, inputs: List[torch.Tensor]):
        # a forward pass of the same network in parallel
        futures = [torch.jit.fork(self.network, inp) for inp in inputs]
        results = [torch.jit.wait(fut) for fut in futures]
        return results

In [11]:
# example of observaton of agent 2
obs1 = torch.tensor([1, 0, 2, 3, 0])
# example of observation of agent 1
obs2 = torch.tensor([0, 0, 0, 3, 0])
obs_sizes = (5, 5)

# example of action of agent 1
act1 = [0, 0, 1]  # one-hot encoded
# example of action of agent 2
act2 = [1, 0, 0]  # one-hot encoded
action_sizes = (3, 3)

model = MultiAgentFCNetwork(obs_sizes, action_sizes)
model([obs1, obs2])

RuntimeError: mat1 and mat2 must have the same dtype, but got Long and Float