## backward求导原理

In [1]:
import torch

### 标量对向量求导

In [2]:
# 求出y在x处的导数
x = torch.ones(2, requires_grad=True)  # x = [1,1]
y = 2 * x[0] ** 3+ 2 * x[1] ** 2 
y.backward()  # y'= ∂(2*x^2)/∂x = 6x0 + 4x1
print(x.grad)  # x在x=[1,1]时候的导数值

tensor([6., 4.])


### 向量对向量求导

In [3]:
# 在向量对向量的求导的时候如果没有传入gradients会报错
# gradients表示各个维度上导函数前的权重
x = torch.arange(0, 6.0, 1, requires_grad=True)  # x = [1,1,1]
y = 2 * x ** 2
gradients = torch.tensor([0.1, 1.0, 0.001], dtype=torch.float)  # [0.1, 1.0, 0.0001] 表示各个维度上导函数前的权重

In [4]:
# retain_graph=True表示链式求导的计算图暂时不被释放，链式求导可以重复运行，否则不能重复运行。并且导函数前的权重会累加，但是下面用的权重每次都会更新成1，所以结果不变
y.backward(torch.ones_like(x), retain_graph=True)  # y'= ∂(2*x^2)/∂x = 4x
# 各维度的权重分别是[0, 1, 2, 3, 4, 5]，所以结果4 * [0, 1, 2, 3, 4, 5] * [1, 1, 1, 1, 1]
print(x.grad)

tensor([ 0.,  4.,  8., 12., 16., 20.])


In [5]:
# 这里由于权重在累加，每次运行结果都会变
y.backward(x, retain_graph=True)
print(x.grad)

tensor([  0.,   8.,  24.,  48.,  80., 120.])


## 读取数据集

In [6]:
import torchvision
from torchvision import transforms

### 直接下载数据集

In [None]:
# mnist_train: [(img, label), (), ...]
trans = transforms.Compose([
    transforms.ToTensor()
])
mnist_train = torchvision.datasets.FashionMNIST("../Data/FashionMNIST", train=True, transform=trans, download=True)
mnist_test = torchvision.datasets.FashionMNIST("../Data/FashionMNIST", train=False, transform=trans, download=True)

### 本地读取数据集

In [None]:
mnist_train = torchvision.datasets.ImageFolder("../Data/FashionMNIST/train", transform=trans)
mnist_test = torchvision.datasets.ImageFolder("../Data/FashionMNIST/test", transform=trans)

## TensorDataset和DataLoader

In [5]:
# 数据打包
from torch.utils.data import TensorDataset  # 相当于zip，将features和Label一一对应，传入数据的第一维度要相等，可索引
from torch.utils.data import DataLoader  # 数据batch封装，不可索引
import torch

a = torch.tensor([[11, 22, 33], [44, 55, 66], [77, 88, 99], [11, 22, 33], [44, 55, 66], [77, 88, 99], [11, 22, 33], [44, 55, 66], [77, 88, 99], [11, 22, 33], [44, 55, 66], [77, 88, 99]])
b = torch.tensor([0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2])
train_ids = TensorDataset(a, b)  # 要传tensor
# 切片输出
print(train_ids[0:2])
print('#' * 30)
# 循环取数据
for x_train, y_label in train_ids:
    print(x_train, y_label)
# DataLoader进行数据封装,num_workers可以设置读取数据集的进程
print('#' * 30)
train_loader = DataLoader(dataset=train_ids, batch_size=4, shuffle=True, num_workers=0)
for i, data in enumerate(train_loader, 1):  # 注意enumerate返回值有两个,一个是序号，一个是数据（包含训练数据和标签）
    x_data, label = data
    print(' batch:{0} x_data:{1}  label: {2}'.format(i, x_data, label))   # y data (torch tensor)
    
print('#' * 30)
print(train_ids[0])
print(next(iter(train_loader)))  # 查看第一个batch
print(next(iter(train_loader)))  # 查看第二个batch

(tensor([[11, 22, 33],
        [44, 55, 66]]), tensor([0, 1]))
##############################
tensor([11, 22, 33]) tensor(0)
tensor([44, 55, 66]) tensor(1)
tensor([77, 88, 99]) tensor(2)
tensor([11, 22, 33]) tensor(0)
tensor([44, 55, 66]) tensor(1)
tensor([77, 88, 99]) tensor(2)
tensor([11, 22, 33]) tensor(0)
tensor([44, 55, 66]) tensor(1)
tensor([77, 88, 99]) tensor(2)
tensor([11, 22, 33]) tensor(0)
tensor([44, 55, 66]) tensor(1)
tensor([77, 88, 99]) tensor(2)
##############################
 batch:1 x_data:tensor([[11, 22, 33],
        [77, 88, 99],
        [44, 55, 66],
        [44, 55, 66]])  label: tensor([0, 2, 1, 1])
 batch:2 x_data:tensor([[11, 22, 33],
        [77, 88, 99],
        [77, 88, 99],
        [44, 55, 66]])  label: tensor([0, 2, 2, 1])
 batch:3 x_data:tensor([[77, 88, 99],
        [44, 55, 66],
        [11, 22, 33],
        [11, 22, 33]])  label: tensor([2, 1, 0, 0])
##############################
(tensor([11, 22, 33]), tensor(0))
[tensor([[11, 22, 33],
        [77, 

## 网络模型

### 定义一

In [10]:
import torch.nn as nn
class Net(nn.Module):
    def __init__(self, input_num, output):
        super().__init__()  # 继承父类的init
        self.hidden = nn.Linear(input_num, output)
    
    def forward(self, x):
        x = self.hidden(x)
        return x
    
input_num = 2
output_num = 1
net = Net(input_num, output_num)
# 自定义权重和偏置
net.hidden.weight.data = torch.tensor([[-0.0017,  0.0118]])
net.hidden.bias.data = torch.tensor([[0.0]])

### 定义二

In [11]:
net1 = nn.Sequential(
    nn.Linear(input_num, output_num)
)
net1[0].weight.data = torch.tensor([[-0.0017,  0.0118]])
net1[0].bias.data = torch.tensor([[0.0]])

### 定义三

In [22]:
class Net2(nn.Module):
    def __init__(self, input_num, output):
        super().__init__()
        self.hidden = nn.Sequential(nn.Linear(input_num, output_num))
    
    def forward(self, x):
        x = self.hidden(x)
        return x

In [23]:
net2 = Net2(input_num, output_num)
net2.hidden[0].weight.data = torch.tensor([[-0.0017,  0.0118]])
net2.hidden[0].bias.data = torch.tensor([[0.0]])

In [24]:
# 三种定义结果一样
print(net(torch.tensor([[1.0, 2.0]])))
print(net1(torch.tensor([[1.0, 2.0]])))
print(net2(torch.tensor([[1.0, 2.0]])))

tensor([[0.0219]], grad_fn=<AddmmBackward0>)
tensor([[0.0219]], grad_fn=<AddmmBackward0>)
tensor([[0.0219]], grad_fn=<AddmmBackward0>)


### 权重初始化（防止梯度爆炸和梯度消失）

#### 常用的十种权重初始化方法

../Reference/PyTorch_tutorial.pdf

#### 方法一

In [92]:
import torch.nn as nn

# 直接展平后过全连接
class SoftMaxNet(nn.Module):
    def __init__(self, input_num, output_num):
        super().__init__()
        self.hidden = nn.Sequential(
            nn.Flatten(),
            nn.Linear(input_num, output_num)
        )
        
    def forward(self, x):
        x = self.hidden(x)
        return x

input_num = 784
output_num = 10
net = SoftMaxNet(input_num, output_num)
for name, model in net.named_children():
    if type(model) == nn.Sequential:
        for m in model:
            if type(m) == nn.Linear:
                print("原始权重", m.weight.data)
                print("原始偏置", m.bias.data)
                nn.init.normal_(m.weight, mean=0, std=0.01)
                nn.init.normal_(m.bias, mean=0, std=0.01)
                print("修改权重", m.weight.data)
                print("修改偏置", m.bias.data)
    elif type[model] == nn.Linear:
        nn.init.normal_(model.weight, mean=0, std=0.01)
        nn.init.normal_(m.bias, mean=0, std=0.01)

原始权重 tensor([[-0.0186,  0.0120,  0.0103,  ..., -0.0094,  0.0315,  0.0306],
        [-0.0265,  0.0132, -0.0228,  ...,  0.0148,  0.0185, -0.0122],
        [-0.0320,  0.0315, -0.0338,  ...,  0.0009,  0.0048, -0.0152],
        ...,
        [-0.0266, -0.0224, -0.0284,  ..., -0.0194,  0.0224,  0.0136],
        [-0.0242, -0.0357, -0.0025,  ..., -0.0176, -0.0037, -0.0011],
        [ 0.0303, -0.0028, -0.0330,  ...,  0.0028, -0.0047,  0.0177]])
原始偏置 tensor([0.0065, 0.0262, 0.0049, 0.0251, 0.0071, 0.0131, 0.0164, 0.0283, 0.0192,
        0.0208])
修改权重 tensor([[ 0.0154, -0.0081,  0.0118,  ...,  0.0107, -0.0119, -0.0126],
        [ 0.0067,  0.0034, -0.0047,  ...,  0.0162, -0.0002, -0.0014],
        [-0.0070, -0.0125,  0.0061,  ...,  0.0016, -0.0030,  0.0077],
        ...,
        [ 0.0005,  0.0084,  0.0037,  ...,  0.0094,  0.0052, -0.0177],
        [-0.0359,  0.0020, -0.0019,  ...,  0.0095, -0.0184,  0.0090],
        [ 0.0098, -0.0172, -0.0097,  ...,  0.0157,  0.0094,  0.0133]])
修改偏置 tensor([-0.0033

#### 方法二

In [90]:
def init_weights(m):
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight, std=0.01)
        nn.init.normal_(m.bias, std=0.01)

net = nn.Sequential(nn.Flatten(),
                    nn.Linear(784, 256),
                    nn.ReLU(),
                    nn.Linear(256, 10))
print("原始权重", net[1].weight.data)
print("原始偏置", net[1].bias.data)

net.apply(init_weights)  # apply递归初始化
print("修改权重", net[1].weight.data)
print("修改偏置", net[1].bias.data)

原始权重 tensor([[ 0.0320, -0.0031, -0.0327,  ..., -0.0319, -0.0268,  0.0236],
        [ 0.0008,  0.0235, -0.0347,  ...,  0.0255,  0.0301,  0.0339],
        [-0.0079, -0.0001,  0.0022,  ..., -0.0182,  0.0296,  0.0336],
        ...,
        [ 0.0220,  0.0310,  0.0162,  ..., -0.0164, -0.0243, -0.0292],
        [ 0.0160,  0.0005, -0.0268,  ...,  0.0113, -0.0352, -0.0064],
        [-0.0157,  0.0231,  0.0017,  ...,  0.0352,  0.0288, -0.0298]])
原始偏置 tensor([-8.0495e-03, -2.0037e-02, -3.4692e-03, -2.2001e-02, -1.5363e-02,
         3.2640e-02, -2.7605e-02,  3.4867e-02,  2.1547e-02,  1.2764e-02,
        -3.4017e-02, -1.6642e-02,  1.3973e-02, -1.4633e-02,  3.4857e-02,
         1.4940e-02,  3.9604e-03,  3.1490e-02,  1.9545e-02,  1.1835e-02,
         3.1879e-02,  2.8525e-02, -8.6612e-03,  8.8350e-03,  6.3228e-03,
         1.4076e-02, -2.3187e-02, -2.5201e-03,  2.7289e-02,  5.9613e-03,
        -4.4714e-03,  1.1179e-02, -2.3965e-04,  1.6895e-02,  1.8803e-02,
         3.3039e-02, -1.5504e-02,  3.3519e-02

## 损失函数

### 交叉熵损失函数

In [None]:
# 网络模型最后一层全连接层，将输出经过 softmax 激活函数之后，再计算其与 target 的交叉熵损失
# 'none'代表的是batch内的每个元素都会计算一个损失，返回的结果还是一个batch；
# 'mean’代表的是是否进行平均，一个batch只返回一个；
# 'sum’代表的是将batch内的loss相加，一个batch也是只返回一个；
loss = nn.CrossEntropyLoss(reduction="none")

## 加载本地pth模型

### 方式一

In [1]:
# 存整个神经网络的结构信息和模型的参数信息，save的对象是网络net
#保存模型
torch.save(model_object,'resnet.pth')
#加载模型
model = torch.load('resnet.pth')

NameError: name 'torch' is not defined

### 方式二

In [2]:
#将my_resnet模型存储为my_resnet.pth
torch.save(my_resnet.state_dict(),"my_resnet.pth")
#加载resnet，模型存放在my_resnet.pth
my_resnet.load_state_dict(torch.load("my_resnet.pth"))
#其中my_resnet是my_resnet.pth对应的网络结构；

NameError: name 'torch' is not defined

In [4]:
import numpy as np
li = (1, 2, 3)
li.apply(lambda x: x + 1)

AttributeError: 'tuple' object has no attribute 'apply'