In [1]:
# agent.py
import torch
import torch.nn as nn
import torch.nn.functional as F

class CircuitOptimizerAgent(nn.Module):
    """深度卷积强化学习代理模型，用于量子电路优化。"""
    def __init__(self, n_qubits, n_moments, n_gate_classes, n_rules):
        """
        初始化深度卷积强化学习代理模型。

        参数：
            n_qubits (int): 量子比特数目。
            n_moments (int): 时间步数。
            n_gate_classes (int): 门类型数量。
            n_rules (int): 规则数量。
        """
        super(CircuitOptimizerAgent, self).__init__()
        self.n_qubits = n_qubits
        self.n_moments = n_moments
        self.n_rules = n_rules  # 将 n_rules 作为属性存储

        # 卷积层
        self.conv1 = nn.Conv2d(n_gate_classes, 64, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(128, 256, kernel_size=3, padding=1)

        # 计算展平后的特征大小
        def conv_output_size(size, kernel_size=3, stride=1, padding=1):
            return ((size - kernel_size + 2 * padding) // stride) + 1

        qubits_out = conv_output_size(conv_output_size(conv_output_size(conv_output_size(self.n_qubits))))
        moments_out = conv_output_size(conv_output_size(conv_output_size(conv_output_size(self.n_moments))))

        flattened_size = 256 * qubits_out * moments_out

        # 策略网络的全连接层
        self.policy_linear = nn.Linear(flattened_size, n_rules * n_qubits * n_moments)

        # 价值网络的全连接层
        self.value_linear = nn.Linear(flattened_size, 1)

    def forward(self, x):
        """前向传播。"""
        self.bn = nn.BatchNorm2d(256)  # 根据最后一个卷积层的输出通道数

        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))
        #卷积网络的输出通过全连接层后可能存在数值稳定性问题，尤其是在使用 softmax 之前。考虑在全连接层之前添加批归一化层（Batch Normalization）来增强模型的数值稳定性和收敛速度。
        x = self.bn(x)  # 在ReLU和Flatten之间添加批归一化

        # 展平输出
        x_flat = torch.flatten(x, start_dim=1)

        # 策略网络
        policy = self.policy_linear(x_flat)
        policy = policy.view(-1, self.n_rules, self.n_qubits * self.n_moments)  # 修正此处的维度
        policy = F.softmax(policy, dim=-1)

        # 价值网络
        value = self.value_linear(x_flat)

        return policy, value


In [2]:
# test_agent.py

# 定义模型的参数
n_qubits = 5
n_moments = 10
n_gate_classes = 3
n_rules = 4

# 创建代理模型
agent = CircuitOptimizerAgent(n_qubits, n_moments, n_gate_classes, n_rules)

# 模拟输入数据
batch_size = 8
input_tensor = torch.randn(batch_size, n_gate_classes, n_qubits, n_moments)

# 执行前向传播，获取策略和价值输出
policy, value = agent(input_tensor)

# 打印输出的维度
print("Policy output shape:", policy.shape)  # 应输出 [batch_size, n_rules, n_qubits * n_moments]
print("Value output shape:", value.shape)    # 应输出 [batch_size, 1]


Policy output shape: torch.Size([8, 4, 50])
Value output shape: torch.Size([8, 1])
