# A3C算法
A3C算法是A2C（Advantage Actor-Critic）算法的分布式版本，其由一个中心节点来负责汇总执行梯度下降算法，由多个工作节点并行的计算梯度。

## 网络模型
我们首先来看网络模型，其由两部分组成：策略网络作为Actor，值函数网络作为Critic。
我们首先来看网络模型定义：

In [None]:
# 
class A2cConv1dModel(nn.Module):
    def __init__(self, input_shape, n_actions):
        super(A2cConv1dModel, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv1d(input_shape[0], 32, kernel_size=3, stride=1),
            nn.ReLU(),
            nn.Conv1d(32, 64, kernel_size=3, stride=1),
            nn.ReLU(),
            nn.Conv1d(64, 64, kernel_size=3, stride=1),
            nn.ReLU()
        )
        conv_out_size = self._get_conv_out(input_shape)
        # 策略网络定义
        self.policy = nn.Sequential(
            nn.Linear(conv_out_size, 512),
            nn.ReLU(),
            nn.Linear(512, n_actions)
        )
        # 值网络定义
        self.value = nn.Sequential(
            nn.Linear(conv_out_size, 512),
            nn.ReLU(),
            nn.Linear(512, 1)
        )

    def _get_conv_out(self, shape):
        o = self.conv(torch.zeros(1, *shape))
        return int(np.prod(o.size()))

    def forward(self, x):
        x = x.reshape(x.shape[0], 1, x.shape[1])
        fx = x.float() / 256
        conv_out = self.conv(fx).view(fx.size()[0], -1)
        return self.policy(conv_out), self.value(conv_out)
    
# 单元测试用例
class TA2cConv1dModel(unittest.TestCase):
    def test_exp(self):
        obs_n = 42 # obs = np.zeros(1, 42)
        action_n = 3
        net = A2cConv1dModel((1, obs_n), action_n)
        # prepare input
        mu = 0.0
        std = 1.0
        x_raw = np.random.normal(mu, std, obs_n)
        x = torch.from_numpy(x_raw.reshape((1, 42))).float()
        print('x: {0};'.format(x.shape))
        actions, v_pi = net(x)
        print('actions: {0}; {1};'.format(actions.shape, actions))
        print('v_pi: {0};'.format(v_pi))

网络拓朴结构图如下所示：
![网络拓朴结构图](./images/chp_02_04_001.png)