#### parameter

In [2]:
import torch
import torch.nn as nn

param = nn.Parameter(torch.zeros(3, 3))

param, param.data, param.requires_grad, param.grad

(Parameter containing:
 tensor([[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]], requires_grad=True),
 tensor([[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]]),
 True,
 None)

一个`nn.Sequential`会自动把上一个Module的输出当作下一个Module的输入，就像我们自己写的那样   
否则就自己去接：
```python
def forward(self, x):
        x = self.conv1(x)
        return self.conv2(x)
```

### 初始化

线性函数会自己用凯明[初始化](https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/linear.py#L81)

如果要自己初始化：

In [3]:
param.data.normal_()  # 大部分pytorch函数加_尾缀都是为了执行_inplace操作

tensor([[-0.2936,  1.0011,  0.4703],
        [ 0.2000, -0.1920, -0.8351],
        [-1.8805, -0.0365,  0.8782]])

如果对nn的Module里的参数初始化：

```python
conv1 = torch.nn.Conv2d(...)
torch.nn.init.xavier_uniform(conv1.weight)
```

或者手动调整：
```python
conv1.weight.data.fill_(0.01)
conv1.bias.data.fill_(0.01)
```

In [3]:
param.data.normal_()  # 大部分pytorch函数加_尾缀都是为了执行_inplace操作

tensor([[-0.2936,  1.0011,  0.4703],
        [ 0.2000, -0.1920, -0.8351],
        [-1.8805, -0.0365,  0.8782]])

### 通过torch.save(obj, f)，保存state_dict为文件

- 注意存的是stae_dict()
- 注意cpu()，下次加载就会在cpu，如果要改变加载位置，用`map_location`

In [7]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 3, (2, 3))
        self.conv2 = nn.Conv2d(3, 2, 3)
        
    def forward(self, x):
        x = self.conv1(x)
        return self.conv2(x)

model = Model()
model.cpu()
torch.save(model.state_dict(), "data/model.pth")
loaded_model = torch.load("data/model.pth") 
# loaded_model = torch.load("data/model.pth", map_location='cuda:3') 
loaded_model

OrderedDict([('conv1.weight',
              tensor([[[[ 0.1695, -0.1040,  0.3645],
                        [-0.3489, -0.0117,  0.1016]]],
              
              
                      [[[-0.2587,  0.0651,  0.3172],
                        [-0.3226,  0.2848,  0.3283]]],
              
              
                      [[[-0.0018, -0.3298, -0.3942],
                        [-0.1472, -0.2767, -0.1312]]]])),
             ('conv1.bias', tensor([ 0.2073, -0.1674,  0.3022])),
             ('conv2.weight',
              tensor([[[[ 0.0219, -0.0274, -0.0278],
                        [ 0.1360,  0.1432,  0.0516],
                        [-0.0298,  0.1339,  0.1325]],
              
                       [[-0.0822,  0.1861, -0.1133],
                        [ 0.1399, -0.0471,  0.0366],
                        [-0.0168,  0.1499,  0.1609]],
              
                       [[ 0.0304, -0.0666, -0.1714],
                        [-0.0512,  0.0941,  0.1615],
                        [-0.152

### 最大池化，

```python
torch.nn.MaxPool2d(kernel_size, stride=None, padding=0, dilation=1, return_indices=False, ceil_mode=False)
```

* ceil_mode指定对于输出的计算是否向上取整
* 输出的大小：
- Input: $ (N, C, H_{in}, W_{in}) $
- Output: $ (N, C, H_{out}, W_{out}) $
    $$
      H_{out} = \left\lfloor\frac{H_{in} + 2 * \text{padding[0]} - \text{dilation[0]}
            \times (\text{kernel_size[0]} - 1) - 1}{\text{stride[0]}} + 1\right\rfloor
    $$

    $$
      W_{out} = \left\lfloor\frac{W_{in} + 2 * \text{padding[1]} - \text{dilation[1]}
            \times (\text{kernel_size[1]} - 1) - 1}{\text{stride[1]}} + 1\right\rfloor
    $$
- 形象就用小窗口在图片上移动，根据步长等信息来计算pool之后的大小
- 2D的pool至少要有3维。（为什么）？

In [13]:
x = torch.zeros(2, 1, 28, 28)
m = torch.nn.MaxPool2d(3, stride=3)
m(x).shape

torch.Size([2, 1, 9, 9])

### 交叉熵损失：

```python
torch.nn.CrossEntropyLoss(weight=None, size_average=None, ignore_index=-100, reduce=None, reduction='mean')
```

* 多元交叉熵公式：$ Loss = -sum(y * \log(p))$ 通常配合Softmax实现多分类
* 二元交叉熵：$ Loss = -sum(y * \log(p) + (1 - y) * \log(1 - p)) $  通常配合Sigmoid实现二分类

In [88]:
batch   = 2
classes = 3
torch.manual_seed(3)
x = torch.rand(batch, classes)   # predict  2x3
y = torch.tensor([1, 2])         # classes
entloss = torch.nn.CrossEntropyLoss()
x, entloss(x, y)

(tensor([[0.0043, 0.1056, 0.2858],
         [0.0270, 0.4716, 0.0601]]),
 tensor(1.1889))

In [85]:
# one hot
one_hot = torch.nn.functional.one_hot(y, classes)

In [94]:
loss = -(one_hot.float() * torch.log(torch.softmax(x, dim=1))).sum() / batch # 应该按行softmax才对啊？
loss

tensor(1.1889)

In [91]:
torch.softmax

[0;31mDocstring:[0m <no docstring>
[0;31mType:[0m      builtin_function_or_method
