## 基本配置　　
### 导入包和版本查询

In [1]:
import PIL
import torch
import torch.nn as nn
import torchvision
print(torch.__version__)
print(torch.version.cuda)
print(torch.backends.cudnn.version())
print(torch.cuda.get_device_name(0))

1.5.0
10.1
7603
GeForce RTX 2080 Ti


### 更新PyTorch

`PyTorch`将被安装在`anaconda3/lib/python3.7/site-packages/torch/`目录下  
`conda update pytorch torchvision -c pytorch`  

### 固定随机种子

`torch.manual_seed(0)`

`torch.cuda.manual_seed_all(0)`  

### 指定程序运行在特定GPU卡上

在命令行指定环境变量

`CUDA_VISIBLE_DEVICES=0,1 python train.py`
或在代码中指定

`os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'`   

### 判断是否有CUDA支持

`torch.cuda.is_available()`

### 设置为cuDNN benchmark模式

`Benchmark`模式会提升计算速度，但是由于计算中有随机性，每次网络前馈结果略有差异。

`torch.backends.cudnn.benchmark = True`
如果想要避免这种结果波动，设置

`torch.backends.cudnn.deterministic = True`

### 清除GPU存储

有时`Control-C`中止运行后`GPU`存储没有及时释放，需要手动清空。在`PyTorch`内部可以

`torch.cuda.empty_cache()`  
或在命令行可以先使用`ps`找到程序的`PID`，再使用`kill`结束该进程

`ps aux | grep python`   
`ps aux:see every process on the system`  
`kill -9 [pid]`
或者直接重置没有被清空的`GPU`

`nvidia-smi --gpu-reset -i [gpu_id]`

In [2]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

### 张量(Tensor)处理

In [3]:
tensor = torch.randn(3,4,5)
print(tensor.type())  # 数据类型
print(tensor.size())  # 张量的shape，是个元组
print(tensor.dim())   # 维度的数量

torch.FloatTensor
torch.Size([3, 4, 5])
3


In [4]:
# Tensor[N, C, H, W]
images = torch.randn(32, 3, 56, 56)

In [5]:
images.sum(dim=1).shape

torch.Size([32, 56, 56])

In [6]:
NCHW = ['N', 'C', 'H', 'W']
images = torch.randn(32, 3, 56, 56, names=NCHW)



In [7]:
images.sum('C').shape

torch.Size([32, 56, 56])

In [8]:
images.select('C', index=0)

tensor([[[ 5.6744e-01,  4.1752e-01,  2.8830e-01,  ..., -8.4218e-01,
          -2.1659e-01, -9.8932e-01],
         [ 1.1841e+00, -1.5200e+00, -1.2679e-02,  ..., -1.4709e+00,
           2.9981e-01, -1.3500e+00],
         [ 7.9807e-01,  1.0632e+00, -1.2566e+00,  ...,  6.1915e-01,
          -1.3127e+00,  1.3306e-01],
         ...,
         [ 2.1473e+00, -8.3112e-01,  1.4068e+00,  ..., -7.4190e-01,
           2.0996e+00, -1.2519e+00],
         [-3.0205e-01,  3.9233e-03, -4.0610e-01,  ...,  4.1151e-01,
           3.7060e-01,  1.5014e-01],
         [ 1.3275e-01, -1.1107e+00,  1.2884e+00,  ...,  1.1835e-01,
           1.3068e+00,  1.6116e-01]],

        [[ 3.6937e-01,  2.1532e-01, -1.0256e+00,  ..., -2.1193e-01,
           9.5672e-02,  1.5446e+00],
         [ 5.0710e-01, -4.4327e-01,  4.4714e-01,  ..., -1.2731e+00,
          -4.8913e-01,  8.1772e-01],
         [-3.6150e-01,  7.9346e-01,  4.3908e-01,  ..., -1.7832e-01,
          -3.3333e-01, -1.1776e+00],
         ...,
         [-8.9571e-01,  2

In [9]:
tensor = torch.rand(3,4,1,2,names=('C', 'N', 'H', 'W'))

In [10]:
tensor.shape

torch.Size([3, 4, 1, 2])

In [11]:
tensor = tensor.align_to('N', 'C', 'H', 'W')

In [12]:
tensor.shape

torch.Size([4, 3, 1, 2])

### torch.Tensor与np.ndarray转换
除了CharTensor，其他所有CPU上的tensor都支持转换为numpy格式然后再转换回来。

In [15]:
import numpy as np
img_1 = np.random.randn(3,25,25)
img_2 = torch.from_numpy(img_1).float()

In [16]:
img_1.dtype

dtype('float64')

In [17]:
img_1.shape

(3, 25, 25)

In [18]:
img_2.type

<function Tensor.type>

In [19]:
img_3 = img_2.cpu().numpy()

In [20]:
img_3.dtype

dtype('float32')

In [21]:
img_3.shape

(3, 25, 25)

### np.ndarray 与 PIL.Image的转换    
`PyTorch`中的张量默认采用`N×D×H×W`的顺序，并且数据范围在[0, 1]，需要进行转置和规范化。

In [25]:
# torch.Tensor -> PIL.Image.  
torch.clamp(img_2 * 255, min=0, max=255).byte()

tensor([[[  0,   0, 255,  ...,   0, 255,   0],
         [  0, 242,   0,  ...,   0,   0,   0],
         [255,   0,   0,  ...,   0,   0,   0],
         ...,
         [  0,   0,   0,  ..., 110,   0, 139],
         [  0, 147,   0,  ..., 153,  15, 185],
         [  0,  11,   0,  ...,   0, 146, 174]],

        [[230,   0, 255,  ..., 140,   0, 255],
         [  0, 177, 229,  ...,   0, 236,   0],
         [  0, 255, 184,  ...,   0,   0,   0],
         ...,
         [  0,   0,  57,  ..., 255,   0,  62],
         [ 86,   0,   0,  ...,   0,   0, 213],
         [255, 255,   0,  ...,   0,  78,   0]],

        [[  0,   0,   0,  ...,  54,   0, 242],
         [  0, 113, 114,  ..., 255, 154, 128],
         [114,   0, 255,  ..., 244, 255,   0],
         ...,
         [ 87,   0,   0,  ...,   0,   0,  76],
         [  0,   0,   0,  ...,   0,   0, 255],
         [  0, 177,   0,  ...,  72, 255, 136]]], dtype=torch.uint8)

In [29]:
image = torch.clamp(img_2 * 255, min=0, max=255).byte().permute(1, 2, 0).cpu().numpy() # permute：维度换位　
#image = torchvision.transforms.functional.to_pil_image(img_2)  # Equivalently way


In [41]:
# PIL.Image -> torch.Tensor.
tensor = torch.from_numpy(np.asarray(PIL.Image.open('/home/weiweia92/Downloads/kobe.jpeg'))).permute(2, 0, 1).float()/255
#tensor = torchvision.transforms.functional.to_tensor(PIL.Image.open(path))  # Equivalently way

### np.ndarray与PIL.Image转换

`# np.ndarray -> PIL.Image.`
`image = PIL.Image.fromarray(ndarray.astypde(np.uint8))`

`# PIL.Image -> np.ndarray.`
`ndarray = np.asarray(PIL.Image.open(path))`

In [42]:
value = torch.rand(1).item() # 提取值

In [43]:
value

0.2026575207710266

### tensor 变形  
在将卷积层输入全连接层的情况下通常需要对张量做形变处理，
相比`torch.view，torch.reshape`可以自动处理输入张量不连续的情况。

In [44]:
tensor = torch.rand(2,3,4)
shape = (6, 4)
tensor = torch.reshape(tensor, shape)

In [45]:
tensor.shape

torch.Size([6, 4])

### 打乱顺序

`tensor = tensor[torch.randperm(tensor.size(0))]  # Shuffle the first dimension`  

### 水平翻转

`PyTorch`不支持`tensor[::-1]`这样的负步长操作，水平翻转可以用张量索引实现。

`# Assume tensor has shape N*D*H*W.`
`tensor = tensor[:, :, :, torch.arange(tensor.size(3) - 1, -1, -1).long()]`  

### 复制张量

有三种复制的方式，对应不同的需求。   
|`tensor.clone()`         |    `New/Shared memory      New `        | `Still in computation graph         Yes` |   
|`tensor.detach()`        |    `New/Shared memory      Shared`      | `Still in computation graph          No` |  
|`tensor.detach.clone()()`|    `New/Shared memory      New`         | `Still in computation graph          No` |  

### 拼接张量

注意`torch.cat`和`torch.stac`k的区别在于`torch.cat`沿着给定的维度拼接，而`torch.stack`会新增一维。例如当参数是3个10×5的张量，`torch.cat`的结果是30×5的张量，而`torch.stack`的结果是3×10×5的张量。

In [59]:
tensor1 = torch.rand(10, 5)
tensor2 = torch.rand(10, 5)
tensor3 = torch.rand(10, 5)
tensor_cat = torch.cat([tensor1, tensor2, tensor3], dim=0)

In [60]:
tensor_cat.shape

torch.Size([30, 5])

In [74]:
tensor_cat.size()

torch.Size([30, 5])

In [63]:
tensor1 = torch.rand(10, 5)
tensor2 = torch.rand(10, 5)
tensor3 = torch.rand(10, 5)
tensor_stack = torch.stack([tensor1, tensor2, tensor3], dim=0)

In [64]:
tensor_stack.shape

torch.Size([3, 10, 5])

In [53]:
tensor = torch.rand(3, 2, 2)

In [66]:
tensor

tensor([[[0.0219, 0.6251],
         [0.0798, 0.6325]],

        [[0.9203, 0.3982],
         [0.0589, 0.8266]],

        [[0.8809, 0.1143],
         [0.9980, 0.5072]]])

In [67]:
N = tensor.size(0)
N

3

### 判断两个tensor相等

In [73]:
torch.allclose(tensor1, tensor2)  # float tensor
torch.equal(tensor1, tensor2)     # int tensor

False

### 数据类型转换

In [56]:
# 设置默认类型，pytorch中的FloatTensor远远快于DoubleTensor
torch.set_default_tensor_type(torch.FloatTensor)

In [47]:
# 类型转换
tensor = tensor.cuda()
tensor.dtype

torch.float32

In [48]:
tensor = tensor.cpu()
tensor.dtype

torch.float32

In [49]:
tensor = tensor.float()
tensor.dtype

torch.float32

In [50]:
tensor = tensor.long()
tensor.dtype

torch.int64

## Tensor

本质上来说，PyTorch 是一个处理张量的库。一个张量是一个数字、向量、矩阵或任何 n 维数组。

In [1]:
import torch

# number
t1 = torch.tensor(4.)

In [2]:
t1

tensor(4.)

In [3]:
t1.dtype

torch.float32

In [4]:
# vector
t2 = torch.tensor([1, 2, 3, 4])
t2

tensor([1, 2, 3, 4])

In [None]:
# Matrix
t3 = torch.tensor([[5., 6]])

### 矩阵乘法

`# Matrix multiplication: (m*n) * (n*p) -> (m*p).`  
`result = torch.mm(tensor1, tensor2)`  

`# Batch matrix multiplication: (b*m*n) * (b*n*p) -> (b*m*p).`  
`result = torch.bmm(tensor1, tensor2)`  

`# Element-wise multiplication.`  
`result = tensor1 * tensor2`  
计算两组数据之间的两两欧式距离  

`# X1 is of shape m*d, X2 is of shape n*d.`  
`dist = torch.sqrt(torch.sum((X1[:,None,:] - X2) ** 2, dim=2))`   

## 模型定义  

### 卷积层

最常用的卷积层配置是

`conv = torch.nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=True)`  
`conv = torch.nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0, bias=True)`  

### GAP（Global average pooling）层

`gap = torch.nn.AdaptiveAvgPool2d(output_size=1)`  

### 双线性汇合（bilinear pooling)  
`
X = torch.reshape(N, D, H * W)                        # Assume X has shape N*D*H*W
X = torch.bmm(X, torch.transpose(X, 1, 2)) / (H * W)  # Bilinear pooling
assert X.size() == (N, D, D)
X = torch.reshape(X, (N, D * D))
X = torch.sign(X) * torch.sqrt(torch.abs(X) + 1e-5)   # Signed-sqrt normalization
X = torch.nn.functional.normalize(X)                  # L2 normalization`  

### 多卡同步BN（Batch normalization）

当使用`torch.nn.DataParallel`将代码运行在多张GPU卡上时，PyTorch的BN层默认操作是各卡上数据独立地计算均值和标准差，同步BN使用所有卡上的数据一起计算BN层的均值和标准差，缓解了当批量大小`（batch size）`比较小时对均值和标准差估计不准的情况，是在目标检测等任务中一个有效的提升性能的技巧。

现在PyTorch官方已经支持同步BN操作

`sync_bn = torch.nn.SyncBatchNorm(num_features, eps=1e-05, momentum=0.1, affine=True, 
                                 track_running_stats=True)`  

将已有网络的所有BN层改为同步BN层

In [76]:
def convertBNtoSyncBN(module, process_group=None):
    '''Recursively replace all BN layers to SyncBN layer.

    Args:
        module[torch.nn.Module]. Network
    '''
    if isinstance(module, torch.nn.modules.batchnorm._BatchNorm):
        sync_bn = torch.nn.SyncBatchNorm(module.num_features, module.eps, module.momentum, 
                                         module.affine, module.track_running_stats, process_group)
        sync_bn.running_mean = module.running_mean
        sync_bn.running_var = module.running_var
        if module.affine:
            sync_bn.weight = module.weight.clone().detach()
            sync_bn.bias = module.bias.clone().detach()
        return sync_bn
    else:
        for name, child_module in module.named_children():
            setattr(module, name) = convert_syncbn_model(child_module, process_group=process_group)
        return module

SyntaxError: can't assign to function call (<ipython-input-76-801794ad3fe5>, line 18)

### 计算模型整体参数量


`# torch.numel:返回输入张量中元素的总数   
num_parameters = sum(torch.numel(parameter) for parameter in model.parameters())`  

### 模型权值初始化

注意`model.modules()`和`model.children()`的区别：`model.modules()`会迭代地遍历模型的所有子层，而`model.children()`只会返回模型最外层的子层


`# Common practise for initialization.
for layer in model.modules():
    if isinstance(layer, torch.nn.Conv2d):
        torch.nn.init.kaiming_normal_(layer.weight, mode='fan_out',
                                      nonlinearity='relu')
        if layer.bias is not None:
            torch.nn.init.constant_(layer.bias, val=0.0)
    elif isinstance(layer, torch.nn.BatchNorm2d):
        torch.nn.init.constant_(layer.weight, val=1.0)
        torch.nn.init.constant_(layer.bias, val=0.0)
    elif isinstance(layer, torch.nn.Linear):
        torch.nn.init.xavier_normal_(layer.weight)
        if layer.bias is not None:
            torch.nn.init.constant_(layer.bias, val=0.0)`

`# Initialization with given tensor.
layer.weight = torch.nn.Parameter(tensor)`

## PyTorch其他注意事项

### 模型定义

建议有参数的层和汇合`（pooling）`层使用`torch.nn`模块定义，激活函数直接使用`torch.nn.functional`。`torch.nn`模块和`torch.nn.functional`的区别在于，`torch.nn`模块在计算时底层调用了`torch.nn.functional`，但`torch.nn`模块包括该层参数，还可以应对训练和测试两种网络状态。使用`torch.nn.functional`时要注意网络状态，如
`def forward(self, x):
    ...
    x = torch.nn.functional.dropout(x, p=0.5, training=self.training)`
`model(x)`前用`model.train()`和`model.eval()`切换网络状态。
不需要计算梯度的代码块用`with torch.no_grad()`包含起来。`model.eval()`和`torch.no_grad()`的区别在于，`model.eval()`是将网络切换为测试状态，例如BN和dropout在训练和测试阶段使用不同的计算方法。`torch.no_grad()`是关闭`PyTorch`张量的自动求导机制，以减少存储使用和加速计算，得到的结果无法进行`loss.backward()`。
`torch.nn.CrossEntropyLoss`的输入不需要经过`Softmax`。`torch.nn.CrossEntropyLoss`等价于`torch.nn.functional.log_softmax + torch.nn.NLLLoss`。
`loss.backward()`前用`optimizer.zero_grad()`清除累积梯度。`optimizer.zero_grad()`和`model.zero_grad()`效果一样。