# 7.1 卷积神经网络基础

### nn.Conv2d

In [1]:
import torch
from torch import nn

$$Output\_size = \left \lfloor \frac{n + 2p - f}{s} + 1\right \rfloor \times \left \lfloor \frac{n + 2p - f}{s} + 1\right \rfloor$$



In [15]:
layer = nn.Conv2d(1,3,kernel_size=3,stride=1,padding=0)
x = torch.rand(1,1,28,28)

out = layer.forward(x) # 卷积运算
print('out1.shape:',out.shape)

layer = nn.Conv2d(1,3,kernel_size=3,stride=1,padding=1)
out = layer.forward(x)
print('out2.shape:',out.shape)

layer = nn.Conv2d(1,4,kernel_size=3,stride=2,padding=1)
out = layer.forward(x)
print('out3.shape:',out.shape)

out1.shape: torch.Size([1, 3, 26, 26])
out2.shape: torch.Size([1, 3, 28, 28])
out3.shape: torch.Size([1, 4, 14, 14])


### Inner weight & bias

In [16]:
layer.weight

Parameter containing:
tensor([[[[-0.1704, -0.0875,  0.0579],
          [ 0.2762,  0.2394, -0.0993],
          [ 0.1446,  0.0253, -0.1044]]],


        [[[-0.1198, -0.3039, -0.3194],
          [ 0.2971, -0.2700, -0.0318],
          [ 0.1160,  0.1986, -0.3255]]],


        [[[-0.1016,  0.0956, -0.0222],
          [ 0.1155,  0.1237,  0.1385],
          [ 0.2375, -0.2012,  0.3073]]],


        [[[-0.3036,  0.2053,  0.2781],
          [-0.0271, -0.0822, -0.0675],
          [ 0.2384, -0.2601,  0.0878]]]], requires_grad=True)

In [17]:
layer.weight.shape

torch.Size([4, 1, 3, 3])

In [18]:
layer.bias.shape

torch.Size([4])

### F.conv2d

In [23]:
from torch.nn import functional as F
x = torch.randn(1,3,28,28) # x 和 w 的 channel 值要相同,均为3
w = torch.rand(16,3,5,5)
b = torch.rand(16)

In [27]:
out = F.conv2d(x,w,b,stride=2,padding=2)
print('x.shape: ',x.shape)
print('out.shape: ',out.shape)

x.shape:  torch.Size([1, 3, 28, 28])
out.shape:  torch.Size([1, 16, 14, 14])


# 7.2 Down/Up Sample 池化层与采样 ReLU层

### Max Pooling

### Avg Pooling


In [28]:
x = out
print('x.shape: ',x.shape)

layer = nn.MaxPool2d(2,stride=2)

out1 = layer(x)
print('out1.shape: ',out1.shape)

out2 = F.avg_pool2d(x,2,stride=2)
print('out2.shape: ',out2.shape)

x.shape:  torch.Size([1, 16, 14, 14])
out1.shape:  torch.Size([1, 16, 7, 7])
out2.shape:  torch.Size([1, 16, 7, 7])


### UpSample 

#### F.interpolate( )

In [30]:
x = out
print('x.shape: ',x.shape)
out = F.interpolate(x,scale_factor=2,mode='nearest')
print('out.shape: ',out.shape)

x.shape:  torch.Size([1, 16, 14, 14])
out.shape:  torch.Size([1, 16, 28, 28])


### ReLU层

In [33]:
print('x.shape: ',x.shape)

layer = nn.ReLU(inplace=True)
print('layer = nn.ReLU(inplace=True), inplace=True可以节省内存一半空间')
out = layer(x)
print('out.shape: ',out.shape)

x.shape:  torch.Size([1, 16, 14, 14])
layer = nn.ReLU(inplace=True), inplace=True可以节省内存一半空间
out.shape:  torch.Size([1, 16, 14, 14])


In [34]:
out = F.relu(x)
print('out.shape: ',out.shape)

out.shape:  torch.Size([1, 16, 14, 14])


# 7.3 Batch Norm

### Feature Scaling
- Image Normalization:
```Python
normaliza = transforms.Normaliza(mean=[0.485,0.456,0.406],
                                 std=[0.229,0.224,0.225])
# RGB 各通道的均值和方差
```
- Batch Normalization:

$ Algorithm : Batch Normalizing Transform, applied to activation x over a mini-batch.$

$Input: Values of x over a mini-batch: \mathcal{B} = \{ x_{1...m}\}; $
$Output: \{ y_i = BN_{\gamma , \beta}(x_i)\} $

$$mini-batch \space mean:  \mu_{\mathcal{B}} \leftarrow \frac{1}{m}\sum_{i=1}^m x_i $$

$$mini-batch \space variance: \sigma_{\mathcal{B}}^2 \leftarrow \frac{1}{m} \sum_{i=1}^m (x_i-\mu_\mathcal{B})^2 $$

$$normalize: \hat{x_i} \leftarrow \frac{x_i - \mu_{\mathcal{B}}}{\sqrt{\sigma_\mathcal{B}^2 + \varepsilon }}$$

$$scale \space and \space shift: y_i \leftarrow \gamma \hat{x_i} + \beta \equiv BN_{\gamma , \beta (x_i)}$$



### nn.BatchNorm1d

In [38]:
x = torch.randn(100,16,784) # 784 = 28*28
print('x.shape: ',x.shape)
layer = nn.BatchNorm1d(16)
out = layer(x)

x.shape:  torch.Size([100, 16, 784])


**nn.BatchNorm1d(16)的参数16为channel通道数,BatchNormalize通道的统计数据是跨通道运算的,有多少个channel,就计算多少个channel上的均值和方差,16个channel就生成16个长度的统计信息,每个信息代表每个channel的均值和方差**

In [39]:
print('layer.running_mean: ')
print(layer.running_mean)
print('layer.running_var: ')
print(layer.running_var)

layer.running_mean: 
tensor([-4.5907e-04,  8.6306e-05, -7.4280e-04, -6.2997e-04, -6.4749e-04,
         1.4273e-04, -3.9751e-05, -8.0233e-05,  2.4080e-04, -3.0221e-04,
         8.5041e-05,  1.8740e-04, -1.6602e-04, -6.1695e-05, -3.7739e-04,
         1.0307e-04])
layer.running_var: 
tensor([1.0003, 0.9996, 1.0000, 1.0000, 1.0006, 1.0003, 1.0000, 1.0004, 1.0000,
        0.9994, 0.9999, 0.9996, 0.9997, 1.0008, 0.9995, 0.9999])


### nn.BatchNorm2d

layer.weigh $ \rightarrow \space \gamma \ $

layer.bias $ \rightarrow \space \beta$

In [41]:
x = torch.rand(6,16,28,28)
print('x.shape: ',x.shape)
layer = nn.BatchNorm2d(16)
out = layer(x)
print('After BatchNorm2d(16), out.shape: ',out.shape)

print('layer.weight.shape: ',layer.weight.shape)
print('layer.bias.shape: ',layer.bias.shape)

x.shape:  torch.Size([6, 16, 28, 28])
After BatchNorm2d(16), out.shape:  torch.Size([6, 16, 28, 28])
layer.weight.shape:  torch.Size([16])
layer.bias.shape:  torch.Size([16])


In [43]:
print('vars()方法打印所有参数:')
vars(layer)

vars()方法打印所有参数:


{'_backward_hooks': OrderedDict(),
 '_buffers': OrderedDict([('running_mean',
               tensor([0.0501, 0.0498, 0.0502, 0.0494, 0.0505, 0.0503, 0.0507, 0.0497, 0.0508,
                       0.0509, 0.0500, 0.0496, 0.0496, 0.0510, 0.0501, 0.0502])),
              ('running_var',
               tensor([0.9084, 0.9082, 0.9082, 0.9082, 0.9084, 0.9082, 0.9084, 0.9083, 0.9083,
                       0.9084, 0.9082, 0.9083, 0.9085, 0.9083, 0.9083, 0.9084])),
              ('num_batches_tracked', tensor(1))]),
 '_forward_hooks': OrderedDict(),
 '_forward_pre_hooks': OrderedDict(),
 '_load_state_dict_pre_hooks': OrderedDict(),
 '_modules': OrderedDict(),
 '_parameters': OrderedDict([('weight', Parameter containing:
               tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
                      requires_grad=True)), ('bias', Parameter containing:
               tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
                      requi

## Test 时注意事项:
test时,均值和方差取的不是当前值,而是全局值(running),$\gamma$和$\beta$不需要backward,即不需要更新只需切换test模式,使用 **layer.eval()**

```Python
layer.eval()
BatchNorm1d(16,eps=1e-05,momentum=0.1,affine=True,track_running_stats=True)
out = layer(x)
```