In [6]:
import torch 
import torch.nn as nn

import torch 
import torch.nn as nn

class Attention(nn.Module): 
    def __init__(self, dmodel, dk): 
        super(Attention, self).__init__() 
        self.dk = dk # 键的维度
        self.W = nn.Linear(dmodel, dk) #查询的线性层
        self.V = nn.Linear(dk, dk)  # 调整输入维度 值的线性层
        self.a = nn.Linear(dmodel, 1) #注意力的线性层

    def forward(self, Q, K, V):
        a = self.a(Q)
        a = torch.tanh(a + self.W(Q) + K)  # 确保维度匹配
        a = self.V(a)
        a = torch.softmax(a, dim=-1)
        return a * V
    
# 使用 Attention 模块
attention = Attention(dmodel=64, dk=32) 
Q = torch.randn(1, 1, 64)  #(1,1,64) 1个样本 1个查询 64个维度 1*1*64
K = torch.randn(1, 32, 32)  #(1,32,32) 1个样本 32个键 32个维度 1*32*32
V = torch.randn(1, 32, 32)

output = attention(Q, K, V) 
print(output.shape) 
#print(output) 
print(Q)
print(K[0])


# torch.Size([1, 1, 32]) ```


torch.Size([1, 32, 32])
tensor([[[-0.7812,  0.1937,  0.6831,  0.3020, -1.2855,  0.0128,  0.4752,
          -0.6468, -1.7165, -0.4823,  2.3699, -2.2589, -1.0785,  1.7208,
           0.0644, -0.3116,  0.4442, -0.9582,  0.9313,  0.1171, -0.9809,
           0.9163, -0.3754,  0.0775, -0.4352,  0.1542, -0.5990, -1.6279,
          -0.2050, -1.2688,  0.2543,  1.2175,  0.2484, -1.2445,  1.2138,
          -2.5827,  2.4274, -0.3891,  0.6663,  0.2754, -2.1325, -1.4978,
          -0.3794, -1.5681,  0.9710,  0.7007,  0.4299,  0.1714, -1.0776,
          -0.9400, -0.4812, -0.7455,  1.3695, -0.4955, -0.8966,  0.1539,
           1.9773, -1.5391,  1.3548, -1.4211,  0.0226,  1.5352, -1.3459,
           0.1566]]])
tensor([[-0.5906, -2.0971, -0.1380,  ..., -1.6176, -0.3098, -0.2979],
        [ 0.3817, -0.4182,  0.0311,  ..., -0.1159, -0.1287, -0.0273],
        [ 0.0532,  0.0124,  1.1253,  ..., -1.0223,  0.0213,  1.3701],
        ...,
        [-1.0162, -0.8913,  1.8390,  ...,  1.0211,  0.5831,  1.8436],
    

In [20]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Attention(nn.Module):
    def __init__(self, in_dim, hidden_dim):
        super(Attention, self).__init__()
        self.query = nn.Linear(in_dim, hidden_dim) #查询
        self.key = nn.Linear(in_dim, hidden_dim)
        self.value = nn.Linear(in_dim, hidden_dim)
    
    def forward(self, x):
        '''
        公式为：Attention(Q, K, V) = softmax(Q*K^T/sqrt(d_k)) * V
        Q: 查询
        K: 键
        V: 值
        d_k: 键的维度 这里用hidden_dim表示 即:K.size(-1)
        对张量 K 进行转置（transpose）。具体来说，这个操作会将张量 K 的第0维和第1维进行交换
        '''
        Q = self.query(x) 
        K = self.key(x)
        V = self.value(x)
        attn_scores = torch.matmul(Q, K.transpose(0, 1)) / torch.sqrt(torch.tensor(K.size(-1), dtype=torch.float32))
        attn_probs = F.softmax(attn_scores, dim=-1)
        attended_values = torch.matmul(attn_probs, V)
        return attended_values

class MLPNetworkWithAttention(nn.Module):
    def __init__(self, in_dim, out_dim, hidden_dim_1=256, hidden_dim_2=128, attention_dim=64):
        super(MLPNetworkWithAttention, self).__init__()
        self.attention = Attention(in_dim, attention_dim)
        self.fc1 = torch.nn.Linear(attention_dim, hidden_dim_1)
        self.fc2 = torch.nn.Linear(hidden_dim_1, hidden_dim_2)
        self.fc3 = torch.nn.Linear(hidden_dim_2, out_dim)
        
        # 根据计算增益
        gain1 = nn.init.calculate_gain('relu')
        # Xavier均匀分布初始化
        torch.nn.init.xavier_uniform_(self.fc1.weight, gain=gain1)
        torch.nn.init.xavier_uniform_(self.fc2.weight, gain=gain1)
        torch.nn.init.xavier_uniform_(self.fc3.weight, gain=gain1)
        # 初始化参数
        self.fc1.bias.data.fill_(0.01)
        self.fc2.bias.data.fill_(0.01)
    
    def forward(self, x): #这里的x是状态加动作
        x = self.attention(x)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.fc3(x)

# 创建带有注意力机制的critic
global_obs_dim = 128  # 示例输入维度
critic = MLPNetworkWithAttention(global_obs_dim, 1)
#print(critic)
query = nn.Linear(128, 1)
x = torch.randn(12, 128)
Q = query(x)
print(Q.transpose(0, 1).size())
print(Q.size()) #.size表示维度
print(Q.size(-1))
an = Attention(128, 64)
output = an(x)
print(output)

torch.Size([1, 12])
torch.Size([12, 1])
1
tensor([[ 3.7421e-01,  4.7305e-02,  2.4503e-03, -1.7902e-03,  6.8952e-02,
          1.5911e-01, -1.1148e-01,  3.2580e-01,  2.3623e-02,  1.6507e-01,
          2.8500e-02,  1.7523e-02,  1.3803e-01,  2.0690e-01,  2.4201e-02,
         -1.1153e-01,  8.0610e-03,  1.5245e-01, -7.2237e-02,  8.8497e-02,
         -1.5001e-01, -1.4662e-01,  4.8354e-01, -6.8436e-02,  2.4036e-01,
          1.2587e-01, -2.9741e-01, -1.9121e-02, -3.4182e-01, -3.6344e-02,
         -1.1900e-01,  1.6724e-01,  3.6068e-01, -7.9976e-02,  1.6924e-01,
         -5.1975e-02, -1.9620e-01, -2.6547e-03,  1.1183e-01, -3.5658e-02,
          4.1525e-01,  2.4581e-01, -4.7811e-01,  1.6771e-01,  1.8765e-01,
          3.5561e-02,  2.0565e-01,  1.7503e-01,  6.9023e-02, -2.1510e-01,
         -3.2191e-01, -2.7941e-01,  1.9586e-01,  1.6022e-01,  9.0823e-02,
         -1.2894e-01, -2.6922e-01,  9.0844e-02,  1.4083e-01, -2.0077e-01,
          5.7030e-02,  8.1929e-02, -6.1835e-02,  1.0544e-01],
        

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class MultiAgentAttention(nn.Module):
    def __init__(self, in_dim, hidden_dim):
        super(MultiAgentAttention, self).__init__()
        self.query = nn.Linear(in_dim, hidden_dim)
        self.key = nn.Linear(in_dim, hidden_dim)
        self.value = nn.Linear(in_dim, hidden_dim)
    
    def forward(self, agent_states):
        """
        Inputs:
            agent_states (list of tensors): List of states for each agent
        Outputs:
            attended_values (tensor): Attention-weighted values for each agent
        """
        Q = [self.query(state) for state in agent_states]
        K = [self.key(state) for state in agent_states]
        V = [self.value(state) for state in agent_states]
        
        Q = torch.stack(Q)
        K = torch.stack(K)
        V = torch.stack(V)
        
        attn_scores = torch.matmul(Q, K.transpose(0, 1)) / torch.sqrt(torch.tensor(K.size(-1), dtype=torch.float32))
        attn_probs = F.softmax(attn_scores, dim=-1)
        attended_values = torch.matmul(attn_probs, V)
        
        return attended_values

class MultiAgentMLPNetworkWithAttention(nn.Module):
    def __init__(self, in_dim, out_dim, hidden_dim_1=256, hidden_dim_2=128, attention_dim=64, ):
        super(MultiAgentMLPNetworkWithAttention, self).__init__()
        self.attention = MultiAgentAttention(in_dim, attention_dim)
        self.fc1 = torch.nn.Linear(attention_dim, hidden_dim_1)
        self.fc2 = torch.nn.Linear(hidden_dim_1, hidden_dim_2)
        self.fc3 = torch.nn.Linear(hidden_dim_2, out_dim)
        
        # 根据计算增益
        gain1 = nn.init.calculate_gain('relu')
        # Xavier均匀分布初始化
        torch.nn.init.xavier_uniform_(self.fc1.weight, gain=gain1)
        torch.nn.init.xavier_uniform_(self.fc2.weight, gain=gain1)
        torch.nn.init.xavier_uniform_(self.fc3.weight, gain=gain1)
        # 初始化参数
        self.fc1.bias.data.fill_(0.01)
        self.fc2.bias.data.fill_(0.01)
    
    def forward(self, agent_states):
        """
        Inputs:
            agent_states (list of tensors): List of states for each agent
        Outputs:
            outputs (list of tensors): Outputs for each agent
        """
        attended_values = self.attention(agent_states)
        outputs = []
        for value in attended_values:
            x = F.relu(self.fc1(value))
            x = F.relu(self.fc2(x))
            output = self.fc3(x)
            outputs.append(output)
        return outputs


In [21]:
import torch

# 假设 state_list 包含两个张量，每个张量的形状为 (batch_size, state_dim)
state_list = [
    torch.tensor([[1, 2, 3], [4, 5, 6]]),  # 形状为 (2, 3)
    torch.tensor([[7, 8, 9], [10, 11, 12]])  # 形状为 (2, 3)
]

# 假设 act_list 包含两个张量，每个张量的形状为 (batch_size, action_dim)
act_list = [
    torch.tensor([[13, 14], [15, 16]]),  # 形状为 (2, 2)
    torch.tensor([[17, 18], [19, 20]])  # 形状为 (2, 2)
]
x = torch.cat(state_list + act_list, 1) #按列拼接
print(x)


tensor([[ 1,  2,  3,  7,  8,  9, 13, 14, 17, 18],
        [ 4,  5,  6, 10, 11, 12, 15, 16, 19, 20]])


In [33]:
import torch
import torch.nn as nn
import numpy as np
class Attention_block(nn.Module):
    def __init__(self, embedding_size, hidden_dim):
        super(Attention_block,self).__init__()

        self.key_extractors=nn.Linear(embedding_size, hidden_dim, bias=False)
        self.selector_extractors=nn.Linear(embedding_size, hidden_dim, bias=False)
        self.value_extractors=nn.Sequential(nn.Linear(embedding_size,hidden_dim),nn.LeakyReLU())

        self.context_size = hidden_dim
    '''
        self.W_k = nn.Linear(embedding_size, hidden_dim)  # 所有智能体共享

        self.W_q = nn.Linear(embedding_size, hidden_dim)

        self.V = nn.Linear(embedding_size, hidden_dim)  # 转换为信息向量（关键信息）

        self.ReLU = nn.ReLU()
        self.context_size = hidden_dim
    '''
    #input: 当前agent状态编码后E_q,其余agent状态编码后的e_k(e_k 总共5个智能体)
    # output：其余agent状态对当前agent决策贡献的信息，是一个向量xi
    def forward(self, e_q, e_k):
        query = self.selector_extractors(e_q)  # 转换为索引
        print('que',query.shape,query.shape[0],query.shape[1])
        keys = self.key_extractors(e_k)  # 转换为关键字，为了比较重要性
        values=self.value_extractors(e_k)
        print('key',keys.shape)
        print('val',values.shape)
        #print(query.view(query.shape[0], 1, self.context_size).shape) #.view是维度变化 本身不变
        #print(query.shape)
        print(query.view(query.shape[0], 1, -1).shape)
        #print(keys.view(keys.shape[1], self.context_size, -1).shape)
        print(keys.permute(0, 2, 1).shape)
        # 矩阵相乘，3维的。第一维不变（batchsize），第二维和第三维是要相乘的矩阵。
        # view是把维度变化下，（batchsize）*1*（context_size）
        weight_log =torch.matmul(query.view(query.shape[0], 1, -1),keys.permute(0, 2, 1))
        '''
        weight_log = torch.bmm(query.view(query.shape[0], 1, self.context_size),keys.view(keys.shape[0], self.context_size, -1).T)
        '''
        print(weight_log.shape)
        scaled_weight_log=weight_log/np.sqrt(keys[0].shape[1])
        print(scaled_weight_log.shape)
        print(keys[0].shape[1])
        attend_weights = torch.softmax(scaled_weight_log, dim=2) # 对第二维度归一化，0到1，每一个的重要程度 2*1*5
        print(attend_weights.shape)
        print(values.permute(0, 2, 1).shape)
        attention_values = (values.permute(0, 2, 1) * attend_weights).sum(dim=2) # 2*4*5 * 2*1*5 -> 2*4*5
        print(attention_values.shape)
        '''
        weight = torch.softmax(scaled_weight_log, dim=2)  # batchsize*1*5的矩阵，对第二维度归一化，0到1，每一个的重要程度
        
        # print(weight.shape)
        # print(self.ReLU(self.V(e_k)).shape)

        # 每一个状态对我当前状态的重要性，做一个加权和
        attention_context = torch.bmm(weight, self.ReLU(self.V(e_k)).view(query.shape[0], -1, self.context_size))
        '''
        return attention_values   # batchsize*1*context_size
    
# 例子
e_q = torch.randn(2, 3)  # 当前agent状态编码后E_q # 2个样本，3个维度
e_k = torch.randn(2, 5, 3)  # 其余agent状态编码后的e_k # 2个样本，5个智能体，3个维度
attention = Attention_block(3, 4) # 3是输入维度，4是隐藏层维度
output = attention(e_q, e_k)


que torch.Size([2, 4]) 2 4
key torch.Size([2, 5, 4])
val torch.Size([2, 5, 4])
torch.Size([2, 1, 4])
torch.Size([5, 4, 2])


RuntimeError: The size of tensor a (2) must match the size of tensor b (5) at non-singleton dimension 0

In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import argparse

torch.manual_seed(1)
class CriticBase(nn.Module):
    def __init__(self, args):
        super(CriticBase, self).__init__()
        self.args = args
        self._define_parameters()

    def _define_parameters_for_hidden_layers(self, parameters_dict, agent_index=None):
        pass

    def _define_parameters(self):
        self.parameters_all_agent = nn.ModuleList()  # do not use python list []
        for i in range(self.args.agent_count):
            parameters_dict = nn.ModuleDict()  # do not use python dict {}
            # parameters for pre-processing observations and actions
            parameters_dict["fc_obs"] = nn.Linear(self.args.observation_dim_list[i], self.args.hidden_dim)
            parameters_dict["fc_action"] = nn.Linear(self.args.action_dim_list[i], self.args.hidden_dim)

            # parameters for hidden layers
            self._define_parameters_for_hidden_layers(parameters_dict, i)

            # parameters for generating Qvalues
            parameters_dict["Qvalue"] = nn.Linear(self.args.hidden_dim, 1)
            self.parameters_all_agent.append(parameters_dict)

    def _forward_of_hidden_layers(self, out_obs_list, out_action_list):
        pass

    def forward(self, observation_batch_list, action_batch_list):
        # pre-process
        out_obs_list, out_action_list = [], []
        for i in range(self.args.agent_count):
            out_obs = F.relu(self.parameters_all_agent[i]["fc_obs"](observation_batch_list[i]))
            out_action = F.relu(self.parameters_all_agent[i]["fc_action"](action_batch_list[i]))
            out_obs_list.append(out_obs)
            out_action_list.append(out_action)

        # key part of difference MARL methods #
        out_hidden_list = self._forward_of_hidden_layers(out_obs_list, out_action_list)
        # if self.args.agent_name == "NCC_AC":
        #     out_hidden_list, C_hat_list, obs_hat_list, action_hat_list = out_hidden_list
        # elif self.args.agent_name == "Contrastive":
        #     out_hidden_list, C_hat_list = out_hidden_list

        # post-process
        Qvalue_list = []
        for i in range(self.args.agent_count):
            Qvalue = self.parameters_all_agent[i]["Qvalue"](out_hidden_list[i])  # linear activation for Q-value
            Qvalue_list.append(Qvalue)

        # if self.args.agent_name == "NCC_AC":
        #     return (Qvalue_list, C_hat_list, obs_hat_list, action_hat_list)
        # elif self.args.agent_name == "Contrastive":
        #     return (Qvalue_list, C_hat_list)
        # else:
        #     return Qvalue_list
        return Qvalue_list
        

class CriticAttentionalMADDPG(CriticBase):
    def __init__(self, args):
        super(CriticAttentionalMADDPG, self).__init__(args)

    def _define_parameters_for_hidden_layers(self, parameters_dict, agent_index=None):
        hidden_dim = self.args.hidden_dim
        head_dim = hidden_dim
        encoder_input_dim = hidden_dim * (self.args.agent_count + 1) # 1 is for the action of the current agent
        decoder_input_dim = hidden_dim * (self.args.agent_count - 1) #

        parameters_dict["fc_encoder_input"] = nn.Linear(encoder_input_dim, hidden_dim)
        for k in range(self.args.head_count):
            parameters_dict["fc_encoder_head" + str(k)] = nn.Linear(hidden_dim, head_dim)

        parameters_dict["fc_decoder_input"] = nn.Linear(decoder_input_dim, head_dim)

    def _global_attention(self, encoder_H, decoder_H):
        # encoder_H 用作键值对，decoder_H 用作查询 也是当前目标向量
        # encoder_H has a shape (source_vector_count, batch_size, hidden_dim)
        # decoder_H has a shape (batch_size, hidden_dim)
        # scores is based on "dot-product" function, it works well for the global attention #zh-cn: 基于“点积”函数的分数，对于全局注意力效果很好
        temp_scores = torch.mul(encoder_H, decoder_H)  # (source_vector_count, batch_size, hidden_dim)
        scores = torch.sum(temp_scores, dim=2)  # (source_vector_count, batch_size)
        attention_weights = F.softmax(scores.permute(1, 0), dim=1)  # (batch_size, source_vector_count)
        attention_weights = torch.unsqueeze(attention_weights, dim=2)  # (batch_size, source_vector_count, 1)
        contextual_vector = torch.matmul(encoder_H.permute(1, 2, 0), attention_weights)  # (batch_size, hidden_dim, 1)
        contextual_vector = torch.squeeze(contextual_vector)  # (batch_size, hidden_dim)
        return contextual_vector

    # in fact, K-head module and attention module are integrated into one module
    def _attention_module(self, obs_list, action_list, agent_index):
        encoder_input_list = obs_list + [action_list[agent_index]] # batch_size * (agent_count + 1) * obs_dim
        decoder_input_list = action_list[:agent_index] + action_list[agent_index + 1:]

        # generating a temp hidden layer "h" (the encoder part, refer the figure in our paper)
        encoder_input = torch.cat(encoder_input_list, dim=1) 
        encoder_h = F.relu(self.parameters_all_agent[agent_index]["fc_encoder_input"](encoder_input))

        # generating action-conditional Q-value heads (i.e., the encoder part)
        encoder_head_list = []
        for k in range(self.args.head_count):
            encoder_head = F.relu(self.parameters_all_agent[agent_index]["fc_encoder_head" + str(k)](encoder_h))
            encoder_head_list.append(encoder_head)
        encoder_heads = torch.stack(encoder_head_list, dim=0)  # (head_count, batch_size, head_dim)

        # generating a temp hidden layer "H" (the decoder part, refer the figure in our paper)
        decoder_input = torch.cat(decoder_input_list, dim=1)
        decoder_H = F.relu(self.parameters_all_agent[agent_index]["fc_decoder_input"](decoder_input))

        # generating content vector (i.e., the decoder part)
        contextual_vector = self._global_attention(encoder_heads, decoder_H)  # (batch_size, head_dim)   ###!!!

        # contextual_vector need to be further transformed into 1-dimension Q-value
        # this will be done by the forward() function in CriticBase()

        return contextual_vector

    def _forward_of_hidden_layers(self, out_obs_list, out_action_list):
        out_hidden_list = []
        for i in range(self.args.agent_count):
            out = self._attention_module(out_obs_list, out_action_list, i)
            out_hidden_list.append(out)
        return out_hidden_list
    
# 例子
# 将字典转换为对象
args_dict = {'agent_count': 2, 'observation_dim_list': [3, 3], 'hidden_dim': 4, 'action_dim_list': [2, 2], 'head_count': 3, 'agent_name': 'qita'}
args = argparse.Namespace(**args_dict)
critic = CriticAttentionalMADDPG(args)
observation_batch_list = [torch.randn(2, 3), torch.randn(2, 3)]  # 2个智能体，每个智能体的状态维度为3 形状: 2(智能体个数) * batch_size(2) * 3
action_batch_list = [torch.randn(2, 2), torch.randn(2, 2)]  # 2个智能体，每个智能体的动作维度为2 形状: 2(智能体个数) * batch_size(2) * 2
output = critic(observation_batch_list, action_batch_list) 
print(output)
print(output[0].shape)  # torch.Size([2, 1]) batch_size * 1


[tensor([[-0.3392],
        [-0.3593]], grad_fn=<AddmmBackward0>), tensor([[-0.1796],
        [-0.1811]], grad_fn=<AddmmBackward0>)]
torch.Size([2, 1])


In [8]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import argparse
torch.manual_seed(1)

class AttentionModule(nn.Module):
    def __init__(self, encoder_input_dim, decoder_input_dim, hidden_dim, head_count):
        super(AttentionModule, self).__init__()
        self.fc_encoder_input = nn.Linear(encoder_input_dim, hidden_dim)
        self.fc_encoder_heads = nn.ModuleList([nn.Linear(hidden_dim, hidden_dim) for _ in range(head_count)])
        self.fc_decoder_input = nn.Linear(decoder_input_dim, hidden_dim)

    def forward(self, encoder_input, decoder_input):
        # encoder_input shape: (batch_size, input_dim)
        encoder_h = F.relu(self.fc_encoder_input(encoder_input))
        # encoder_h shape: (batch_size, hidden_dim)

        encoder_heads = torch.stack([F.relu(head(encoder_h)) for head in self.fc_encoder_heads], dim=0)
        # encoder_heads shape: (head_count, batch_size, hidden_dim)

        # decoder_input shape: (batch_size, input_dim)
        decoder_H = F.relu(self.fc_decoder_input(decoder_input))
        # decoder_H shape: (batch_size, hidden_dim)

        scores = torch.sum(torch.mul(encoder_heads, decoder_H), dim=2)
        # scores shape: (head_count, batch_size)

        attention_weights = F.softmax(scores.permute(1, 0), dim=1).unsqueeze(2)
        # attention_weights shape: (batch_size, head_count, 1)

        contextual_vector = torch.matmul(encoder_heads.permute(1, 2, 0), attention_weights).squeeze()
        # contextual_vector shape: (batch_size, hidden_dim)

        return contextual_vector

class CriticAttentionalMADDPG(nn.Module):
    def __init__(self, args):
        super(CriticAttentionalMADDPG, self).__init__()
        self.args = args
        self.fc_obs = nn.ModuleList([nn.Linear(obs_dim, args.hidden_dim) for obs_dim in args.observation_dim_list])
        self.fc_action = nn.ModuleList([nn.Linear(action_dim, args.hidden_dim) for action_dim in args.action_dim_list])
        self.attention_modules = nn.ModuleList([AttentionModule(args.hidden_dim * (args.agent_count + 1), args.hidden_dim * (args.agent_count - 1),args.hidden_dim, args.head_count) for _ in range(args.agent_count)])
        self.fc_qvalue = nn.ModuleList([nn.Linear(args.hidden_dim, 1) for _ in range(args.agent_count)])

    def forward(self, observation_batch_list, action_batch_list):
        out_obs_list = [F.relu(fc_obs(obs)) for fc_obs, obs in zip(self.fc_obs, observation_batch_list)]
        # out_obs_list shape: [(batch_size, hidden_dim), ...] #即 batch_size * hidden_dim * agent_count

        out_action_list = [F.relu(fc_action(action)) for fc_action, action in zip(self.fc_action, action_batch_list)]
        # out_action_list shape: [(batch_size, hidden_dim), ...]

        qvalue_list = []
        for i in range(self.args.agent_count):
            encoder_input = torch.cat(out_obs_list + [out_action_list[i]], dim=1)
            # encoder_input shape: (batch_size, hidden_dim * (agent_count + 1))

            decoder_input = torch.cat(out_action_list[:i] + out_action_list[i+1:], dim=1)
            # decoder_input shape: (batch_size, hidden_dim * (agent_count - 1))

            contextual_vector = self.attention_modules[i](encoder_input, decoder_input)
            # contextual_vector shape: (batch_size, hidden_dim)

            qvalue = self.fc_qvalue[i](contextual_vector)
            # qvalue shape: (batch_size, 1)

            qvalue_list.append(qvalue)

        return qvalue_list

# 例子
args_dict = {'agent_count': 2, 'observation_dim_list': [3, 3], 'hidden_dim': 4, 'action_dim_list': [2, 2], 'head_count': 3, 'agent_name': 'qita'}
args = argparse.Namespace(**args_dict)
critic = CriticAttentionalMADDPG(args)
observation_batch_list = [torch.randn(2, 3), torch.randn(2, 3)]
action_batch_list = [torch.randn(2, 2), torch.randn(2, 2)]
output = critic(observation_batch_list, action_batch_list)
print(output)



[tensor([[0.1795],
        [0.1644]], grad_fn=<AddmmBackward0>), tensor([[-0.1597],
        [-0.1720]], grad_fn=<AddmmBackward0>)]


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import argparse
torch.manual_seed(1)

# 改成 只输出当前agent的Q值

class AttentionModule(nn.Module):
    def __init__(self, encoder_input_dim, decoder_input_dim, hidden_dim, head_count):
        super(AttentionModule, self).__init__()
        self.fc_encoder_input = nn.Linear(encoder_input_dim, hidden_dim)
        self.fc_encoder_heads = nn.ModuleList([nn.Linear(hidden_dim, hidden_dim) for _ in range(head_count)])
        self.fc_decoder_input = nn.Linear(decoder_input_dim, hidden_dim)

    def forward(self, encoder_input, decoder_input):
        # encoder_input shape: (batch_size, input_dim)
        encoder_h = F.relu(self.fc_encoder_input(encoder_input))
        # encoder_h shape: (batch_size, hidden_dim)

        encoder_heads = torch.stack([F.relu(head(encoder_h)) for head in self.fc_encoder_heads], dim=0)
        # encoder_heads shape: (head_count, batch_size, hidden_dim)

        # decoder_input shape: (batch_size, input_dim)
        decoder_H = F.relu(self.fc_decoder_input(decoder_input))
        # decoder_H shape: (batch_size, hidden_dim)

        scores = torch.sum(torch.mul(encoder_heads, decoder_H), dim=2)
        # scores shape: (head_count, batch_size)

        attention_weights = F.softmax(scores.permute(1, 0), dim=1).unsqueeze(2)
        # attention_weights shape: (batch_size, head_count, 1)

        contextual_vector = torch.matmul(encoder_heads.permute(1, 2, 0), attention_weights).squeeze()
        # contextual_vector shape: (batch_size, hidden_dim)

        return contextual_vector

class CriticAttentionalMADDPG(nn.Module):
    def __init__(self, hidden_dim,head_count):
        super(CriticAttentionalMADDPG, self).__init__()
        #self.args = args # 3为智能体个数 12为状态维度 1为动作维度 
        self.fc_obs = nn.Linear(12, hidden_dim) #nn.ModuleList([nn.Linear(obs_dim, args.hidden_dim) for obs_dim in args.observation_dim_list])
        self.fc_action = nn.Linear(1, hidden_dim)#nn.ModuleList([nn.Linear(action_dim, args.hidden_dim) for action_dim in args.action_dim_list])
        self.attention_modules = AttentionModule(hidden_dim * (3 + 1), hidden_dim * (3 - 1),hidden_dim, head_count) #nn.ModuleList([AttentionModule(args.hidden_dim * (args.agent_count + 1), args.hidden_dim * (args.agent_count - 1),args.hidden_dim, args.head_count) for _ in range(args.agent_count)])
        self.fc_qvalue = nn.Linear(hidden_dim, 1) #nn.ModuleList([nn.Linear(args.hidden_dim, 1) for _ in range(args.agent_count)])

    def forward(self, x,agent_id,agents):
        agent_id_list = list(agents.keys())
        agent_id_index = agent_id_list.index(agent_id) #获取agent_id在agents中的索引 按照顺序排
        agent_n = len(agent_id_list) #智能体数量 #12为state_dim #3*12=36
        #out_obs_list = [F.relu(fc_obs(obs)) for fc_obs, obs in zip(self.fc_obs, observation_batch_list)]
        out_obs_list = [F.relu(self.fc_obs(x[:,:12])) , F.relu(self.fc_obs(x[:,12:24])) , F.relu(self.fc_obs(x[:,24:36]))]               
        # out_obs_list shape: [(batch_size, hidden_dim), ...] #即 batch_size * hidden_dim * agent_count

        #out_action_list = [F.relu(fc_action(action)) for fc_action, action in zip(self.fc_action, action_batch_list)]
        out_action_list = [F.relu(self.fc_action(x[:,36:37])) , F.relu(self.fc_action(x[:,37:38])) , F.relu(self.fc_action(x[:,38:39]))]
        # out_action_list shape: [(batch_size, hidden_dim), ...]

        #qvalue_list = []
        #for i in range(self.args.agent_count):
        encoder_input = torch.cat(out_obs_list + [out_action_list[agent_id_index]], dim=1)
        # encoder_input shape: (batch_size, hidden_dim * (agent_count + 1))

        decoder_input = torch.cat(out_action_list[:agent_id_index] + out_action_list[agent_id_index+1:], dim=1)
        # decoder_input shape: (batch_size, hidden_dim * (agent_count - 1))

        contextual_vector = self.attention_modules(encoder_input, decoder_input)
        # contextual_vector shape: (batch_size, hidden_dim)

        qvalue = self.fc_qvalue(contextual_vector)
            # qvalue shape: (batch_size, 1)

            #qvalue_list.append(qvalue)

        return qvalue

# 例子
args_dict = {'agent_count': 2, 'observation_dim_list': [3, 3], 'hidden_dim': 4, 'action_dim_list': [2, 2], 'head_count': 3, 'agent_name': 'qita'}
args = argparse.Namespace(**args_dict)
critic = CriticAttentionalMADDPG(args)
observation_batch_list = [torch.randn(2, 3), torch.randn(2, 3)]
action_batch_list = [torch.randn(2, 2), torch.randn(2, 2)]
output = critic(observation_batch_list, action_batch_list)
print(output)

In [5]:
class MLPNetwork1(nn.Module):
    def __init__(self, in_dim, out_dim, hidden_dim_1=256, hidden_dim_2=128,non_linear=nn.ReLU()):
        super(MLPNetwork1, self).__init__()
        self.fc1 = torch.nn.Linear(in_dim, hidden_dim_1)
        self.fc2 = torch.nn.Linear(hidden_dim_1, hidden_dim_2)
        self.fc3 = torch.nn.Linear(hidden_dim_2, out_dim)
        
        #根据计算增益
        gain1 = nn.init.calculate_gain('relu')
        #Xavier均匀分布初始化
        torch.nn.init.xavier_uniform_(self.fc1.weight, gain=gain1)
        torch.nn.init.xavier_uniform_(self.fc2.weight, gain=gain1)
        torch.nn.init.xavier_uniform_(self.fc3.weight, gain=gain1)
        #初始化参数
        self.fc1.bias.data.fill_(0.01)
        self.fc2.bias.data.fill_(0.01)
        self.fc3.bias.data.fill_(0.01)
    def forward(self, x): #输入维度：batch_size * in_dim 输出维度：batch_size * out_dim
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.fc3(x)
# 例子
global_obs_dim = 128  # 示例输入维度
critic = MLPNetwork1(global_obs_dim, 1)
#print(critic)
x = torch.randn(12, 128) # 12个样本，128个维度
output = critic(x)
print(output) #输出
print(output.shape) #输出维度 torch.Size([12, 1]) BATCH_SIZE * 1


tensor([[-2.1130],
        [ 1.0746],
        [-1.1199],
        [ 2.1931],
        [ 1.7309],
        [ 2.0937],
        [ 1.9863],
        [-1.7342],
        [-1.5999],
        [-1.7113],
        [-3.7277],
        [-1.9056]], grad_fn=<AddmmBackward0>)
torch.Size([12, 1])


In [10]:
class MLPNetwork(nn.Module):
    def __init__(self, in_dim, out_dim, hidden_dim_1=256, hidden_dim_2=128,non_linear=nn.ReLU()):
        super(MLPNetwork, self).__init__()

        self.net = nn.Sequential(
            nn.Linear(in_dim, hidden_dim_1),
            non_linear,
            nn.Linear(hidden_dim_1, hidden_dim_2),
            non_linear,
            nn.Linear(hidden_dim_2, out_dim),
        ).apply(self.init) #apply(self.init)是在初始化模块的权重和偏置时调用init方法

    @staticmethod
    def init(m):
        """init parameter of the module"""
        gain = nn.init.calculate_gain('relu') #zh-cn:计算增益
        if isinstance(m, nn.Linear):
            torch.nn.init.xavier_uniform_(m.weight, gain=gain)#这行代码使用 Xavier 均匀分布初始化方法来初始化模块的权重（m.weight）。Xavier 初始化方法旨在使得网络各层的激活值和梯度的方差在传播过程中保持一致，有助于加速网络的收敛。gain 参数是根据 ReLU 激活函数的特性调整的。
            m.bias.data.fill_(0.01) #zh-cn:这行代码使用常数 0.01 来初始化模块的偏置（m.bias）。

    def forward(self, x):
        return self.net(x)
torch.manual_seed(1)
# 例子
global_obs_dim = 128  # 示例输入维度
critic = MLPNetwork(global_obs_dim, 1)
#print(critic)
x = torch.randn(12, 128) # 12个样本，128个维度
output = critic(x)
print(output) #输出


tensor([[ 3.4956],
        [ 0.8701],
        [-1.1959],
        [ 2.1783],
        [-2.8783],
        [-1.7185],
        [ 3.4321],
        [-0.6630],
        [-0.8066],
        [ 1.5690],
        [-2.6908],
        [-0.2538]], grad_fn=<AddmmBackward0>)


In [12]:
# 和上面一样 但不同实现
class MLPNetwork1(nn.Module):
    def __init__(self, in_dim, out_dim, hidden_dim_1=256, hidden_dim_2=128,non_linear=nn.ReLU()):
        super(MLPNetwork1, self).__init__()
        self.fc1 = torch.nn.Linear(in_dim, hidden_dim_1)
        self.fc2 = torch.nn.Linear(hidden_dim_1, hidden_dim_2)
        self.fc3 = torch.nn.Linear(hidden_dim_2, out_dim)
        
        #根据计算增益
        gain1 = nn.init.calculate_gain('relu')
        #Xavier均匀分布初始化
        torch.nn.init.xavier_uniform_(self.fc1.weight, gain=gain1)
        torch.nn.init.xavier_uniform_(self.fc2.weight, gain=gain1)
        torch.nn.init.xavier_uniform_(self.fc3.weight, gain=gain1)
        #初始化参数
        self.fc1.bias.data.fill_(0.01)
        self.fc2.bias.data.fill_(0.01)
        self.fc3.bias.data.fill_(0.01)
    def forward(self, x): #输入维度：batch_size * in_dim 输出维度：batch_size * out_dim
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.fc3(x)

torch.manual_seed(1)
# 例子
global_obs_dim = 128  # 示例输入维度
critic = MLPNetwork(global_obs_dim, 1)
#print(critic)
x = torch.randn(12, 128) # 12个样本，128个维度
output = critic(x)
print(output) #输出

tensor([[ 3.4956],
        [ 0.8701],
        [-1.1959],
        [ 2.1783],
        [-2.8783],
        [-1.7185],
        [ 3.4321],
        [-0.6630],
        [-0.8066],
        [ 1.5690],
        [-2.6908],
        [-0.2538]], grad_fn=<AddmmBackward0>)


In [None]:
# 和上面一样 但不同实现
class MLPNetwork1(nn.Module):
    def __init__(self, in_dim, out_dim, hidden_dim_1=256, hidden_dim_2=128,non_linear=nn.ReLU()):
        super(MLPNetwork1, self).__init__()
        self.fc1 = torch.nn.Linear(in_dim, hidden_dim_1)
        self.fc2 = torch.nn.Linear(hidden_dim_1, hidden_dim_2)
        self.fc3 = torch.nn.Linear(hidden_dim_2, out_dim)
        
        #根据计算增益
        gain1 = nn.init.calculate_gain('relu')
        #Xavier均匀分布初始化
        torch.nn.init.xavier_uniform_(self.fc1.weight, gain=gain1)
        torch.nn.init.xavier_uniform_(self.fc2.weight, gain=gain1)
        torch.nn.init.xavier_uniform_(self.fc3.weight, gain=gain1)
        #初始化参数
        self.fc1.bias.data.fill_(0.01)
        self.fc2.bias.data.fill_(0.01)
        self.fc3.bias.data.fill_(0.01)
    def forward(self, x): #输入维度：batch_size * in_dim 输出维度：batch_size * out_dim
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.fc3(x)
## 注意力机制改1
class Attention1(nn.Module):
    def __init__(self, in_dim, hidden_dim):
        super(Attention1, self).__init__()
        self.query = nn.Linear(in_dim, hidden_dim, bias = False) #查询
        self.key = nn.Linear(in_dim, hidden_dim, bias = False) #false 好
        #self.value = nn.Linear(in_dim, hidden_dim)
        self.value = nn.Sequential(nn.Linear(in_dim,hidden_dim),nn.LeakyReLU()) # 输出经过激活函数处理
    
    
    def forward(self, e_q, e_k):  
        '''
        公式为：Attention(Q, K, V) = softmax(Q*K^T/sqrt(d_k)) * V 输出为当前智能体的注意力值
        Q: 查询
        K: 键
        V: 值
        d_k: 键的维度 这里用hidden_dim表示 即:K[0].shape[1]
        e_q: 为batch_size * 1 * in_dim #e_q 为当前状态编码 或者输入batch_size  * in_dim 也可以 view有调整维度的功能
        e_k: 为batch_size * n * in_dim n为【其余】智能体数量 #e_k为其余智能体状态编码
        本质：在其余智能体中找到与当前智能体最相关的智能体
        '''
        Q = self.query(e_q)  #查询当前智能体价值 Q: batch_size * hidden_dim
        K = self.key(e_k)  #其余智能体的键 K: batch_size * n * hidden_dim
        V = self.value(e_k) #其余智能体的值 V: batch_size * n * hidden_dim
        d_k = K[0].shape[1] #键的维度
        '''
        Q -> batch_size * 1 * hidden_dim 
        K -> batch_size * hidden_dim * n
        Q*K^T -> batch_size * 1 * n
        '''
        fenzi = torch.matmul(Q.view(Q.shape[0], 1, -1),K.permute(0, 2, 1)) #Q*K^T
        atten_scores = fenzi/np.sqrt(d_k) # 维度为 batch_size * 1 * n
        atten_weights = torch.softmax(atten_scores, dim=2) # 维度为 batch_size * 1 * n
        '''
        V -> batch_size * hidden_dim * n
        atten_weights : batch_size * 1 * n  #其余智能体的权重值
        V.permute(0, 2, 1) * atten_weights) : batch_size * hidden_dim * n
        atten_values : batch_size * hidden_dim  #加权求和表示当前智能体的注意力值
        '''
        atten_values = (V.permute(0, 2, 1) * atten_weights).sum(dim=2)

        return atten_values #当前智能体的注意力值

class MLPNetworkWithAttention1(nn.Module):
    def __init__(self, in_dim, out_dim, hidden_dim_1=256, hidden_dim_2=128, attention_dim=256, non_linear=nn.ReLU()):
        '''
        # in_dim: 为所有智能体状态和动作维度之和 这里是13*3=39 #这里似乎没用到
        # 输入维度为 batch_size * in_dim 输出维度为 batch_size * out_dim
        注意力机制作用：改善了MADDPG中critic输入随智能体数目增大而指数增加的扩展性问题
        '''
        super(MLPNetworkWithAttention1, self).__init__()
        self.attention = Attention1(attention_dim, attention_dim)
        self.fc1 = torch.nn.Linear(2*attention_dim, hidden_dim_1)
        self.fc2 = torch.nn.Linear(hidden_dim_1, hidden_dim_2)
        self.fc3 = torch.nn.Linear(hidden_dim_2, out_dim)
        
        # 根据计算增益
        gain1 = nn.init.calculate_gain('relu')
        # Xavier均匀分布初始化
        torch.nn.init.xavier_uniform_(self.fc1.weight, gain=gain1)
        torch.nn.init.xavier_uniform_(self.fc2.weight, gain=gain1)
        torch.nn.init.xavier_uniform_(self.fc3.weight, gain=gain1)
        # 初始化参数
        self.fc1.bias.data.fill_(0.01)
        self.fc2.bias.data.fill_(0.01)
        self.fc3.bias.data.fill_(0.01)

        # 注意力相关
        self.embedding = nn.Linear(13, attention_dim) #13 为状态维度12+动作维度1
        self.in_fn = nn.BatchNorm1d(2*attention_dim) # BatchNorm1d 只是对每个样本的特征维度归一化 输出和输入维度一样
        self.in_fn.weight.data.fill_(1) #确保在训练开始时，批归一化层不会对输入数据进行任何不必要的缩放和平移，从而保持输入数据的原始分布。这有助于稳定训练过程。
        self.in_fn.bias.data.fill_(0) 
    
    def forward(self, x ,agent_id,agents): # x本来为cat后的张量,增加 x,agent_i,agents #agents=Agent() 
        agent_id_list = list(agents.keys())
        agent_id_index = agent_id_list.index(agent_id) #获取agent_id在agents中的索引 按照顺序排
        agent_n = len(agent_id_list) #智能体数量 #12为state_dim #3*12=36
        '''
        temp1 : permute前 :   1 * batch_size * 12 permute后 : batch_size * 1 * 12
        temp2 : x[:, 36 + agent_id_index] : batch_size
        permute前 :  1 * 1 * batch_size   permute后 : batch_size * 1 * 1
        torch.cat((temp1,temp2),2) : batch_size * 1 * 13
        e_q : batch_size * 1 * attention_dim
        【注】我这里动作为列表不是离散值
        '''
        #print('x:',x[:,12:24].shape)
        temp1 = torch.unsqueeze(x[:,12 * agent_id_index:12 * agent_id_index + 12],0).permute(1, 0, 2) #.permute(1, 0, 2)将第0维和第1维进行交换
        temp2 = torch.unsqueeze(torch.unsqueeze(x[:, 36 + agent_id_index], 0),0).permute(2, 1, 0)  ######
        e_q = self.embedding(torch.cat((temp1,temp2),2))
        '''
        n :【其余】智能体数量
        torch.cat((temp3,temp4),2) : batch_size * 1 * 13
        embedding(torch.cat((temp3,temp4),2)) : batch_size * 1 * attention_dim
        stack: n * batch_size * 1 * attention_dim  #堆叠 : 将多个张量堆叠在一起
        squeeze: n * batch_size * attention_dim #压缩 : 去掉维度为1的维度      
        e_k : batch_size * n * attention_dim 
        '''
        e_k = []
        for j in range(agent_n): # 其余智能体
            if j!=agent_id_index:
                temp3 = torch.unsqueeze(x[:,12 * j:12 * j + 12],0).permute(1, 0, 2)
                temp4 = torch.unsqueeze(torch.unsqueeze(x[:, 36 + j], 0), 0).permute(2, 1, 0)
                e_k.append(agents[agent_id_list[j]].critic.embedding(torch.cat((temp3,temp4),2)))   #agents[j].critic.embedding 在这里使用集中式训练的critic,所以其实这里embedding是一样的

        e_k_s  = torch.squeeze(torch.stack(e_k))  
        e_k = e_k_s.permute(1,0,2)

        atten_values = self.attention(e_q, e_k) #输出 batch_size * attention_dim
        X_in=torch.cat([torch.squeeze(e_q), atten_values], dim=1) # 输出 batch_size * (attention_dim*2)
        h1 = F.relu(self.fc1(self.in_fn(X_in)))
        h2 = F.relu(self.fc2(h1))      
        out = (self.fc3(h2))

        return out #输出 batch_size * out_dim
