# 神经网络基础

视频：https://www.bilibili.com/video/BV1AK4y1P7vs

5章：https://zh-v2.d2l.ai/chapter_deep-learning-computation/index.html



# 一、模型构造

## 1.1多层感知机定义

```
网络 = nn.Sequential(层, 激活函数, 层)
```

In [1]:
import torch
from torch import nn
from torch.nn import functional as F #定义了没有参数的函数

net = nn.Sequential(nn.Linear(20,256), nn.ReLU(), nn.Linear(256,10))

X = torch.rand(2,20)
net(X)

tensor([[-0.0711, -0.0966,  0.3948, -0.0431, -0.0378, -0.1735,  0.0194, -0.1121,
         -0.2351, -0.1535],
        [-0.0433, -0.0640,  0.3070, -0.0714,  0.0027, -0.3321, -0.1279, -0.0256,
         -0.1638, -0.0984]], grad_fn=<AddmmBackward>)

## 1.2自定义块
```
class 网络(nn.Module):
    def __init__(self):
        super().__init__()
        self.层名 = 层()
    
    def forward(self, X):
        X = self.层名(X)
        X = 激活函数(X)
    
    return X
```

In [2]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(20, 256)
        self.out = nn.Linear(256, 10)
        
    def forward(self, X):
        X = self.hidden(X)
        X = F.relu(X)
        X = self.out(X)
        return X

In [3]:
net = MLP()
net(X)

tensor([[ 0.1092,  0.0554,  0.0318, -0.1181, -0.0251,  0.1486, -0.0087,  0.0135,
          0.1106,  0.2357],
        [ 0.0996, -0.0487,  0.0395, -0.1800,  0.0966,  0.2124,  0.0480, -0.3080,
          0.1353,  0.1662]], grad_fn=<AddmmBackward>)

## 1.3顺序块

In [4]:
class MySequential(nn.Module):
    def __init__(self, *args):
        super().__init__()
        for block in args:
            self._modules[block] = block
            
    def forward(self, X):
        for block in self._modules.values():
            X = block(X)
        return X
        

In [5]:
net = MySequential(nn.Linear(20,256), nn.ReLU(), nn.Linear(256,10))
net(X)

tensor([[-0.0132,  0.0638,  0.0390,  0.0499,  0.1299,  0.0576, -0.1571,  0.0073,
         -0.1448, -0.0544],
        [ 0.0395,  0.2598, -0.0891,  0.1879,  0.2393,  0.0194, -0.1408,  0.0040,
         -0.1866, -0.0431]], grad_fn=<AddmmBackward>)

# 二、参数管理

In [10]:
import torch
from torch import nn

net = nn.Sequential(nn.Linear(4,8), nn.ReLU(), nn.Linear(8,1))
X = torch.rand(size=(2,4))
net(X)

tensor([[0.5972],
        [0.4275]], grad_fn=<AddmmBackward>)

## 2.1访问参数

In [11]:
# 得到nn.Linear(8,1)的权重参数

print(net[2].state_dict())

OrderedDict([('weight', tensor([[ 0.2188, -0.2106, -0.2614,  0.1184, -0.0556, -0.2155,  0.3012, -0.0851]])), ('bias', tensor([0.1198]))])


In [12]:
print(net[2].bias)
print(net[2].bias.data) # 参数
print(net[2].bias.grad) # 梯度

Parameter containing:
tensor([0.1198], requires_grad=True)
tensor([0.1198])
None


## 2.2参数初始化

In [18]:
# 用内置的初始化器。下面的代码将所有权重参数初始化为标准差为0.01的高斯随机变量，且将偏置参数设置为0
def init_normal(m):
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight, mean=0, std=0.01)
        nn.init.zeros_(m.bias)
        
net.apply(init_normal)

print(net[2].state_dict()) # 权重

print(net[2].bias) # 参数

OrderedDict([('weight', tensor([[ 0.0042, -0.0042, -0.0145, -0.0134, -0.0028,  0.0024,  0.0099, -0.0116]])), ('bias', tensor([0.]))])
Parameter containing:
tensor([0.], requires_grad=True)


In [19]:
# 直接设置参数
net[2].weight.data[:] += 1
net[2].weight.data[0, 0] = 42

net[2].weight.data[0]

tensor([42.0000,  0.9958,  0.9855,  0.9866,  0.9972,  1.0024,  1.0099,  0.9884])

# 三、加载保存模型参数

In [20]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(20, 256)
        self.output = nn.Linear(256, 10)

    def forward(self, x):
        return self.output(F.relu(self.hidden(x)))

net = MLP()
X = torch.randn(size=(2, 20))
Y = net(X)

In [None]:
# 保存参数
torch.save(net.state_dict(), 'mlp.params')

In [None]:
# 加载参数
clone = MLP()
clone.load_state_dict(torch.load('mlp.params'))
clone.eval()

# 四、训练模型

## 4.1定义损失函数和优化器

In [None]:
#反向传播算法 SGD Adam等
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
#均方损失函数
criterion =	torch.nn.MSELoss()

## 4.2训练

In [None]:
from torch.autograd import Variable

EPOCH = 3 # 训练轮数

#记录用于绘图
losses = []#记录每次迭代后训练的loss
eval_losses = []#测试的

# 自定义训练方法

for epoch in range(EPOCH):  # 训练轮数
    # 遍历训练集每条数据，进行训练，得到每轮损失loss
    running_loss = 0.0
    for step, (x, y) in enumerate(train_loader):   #【enumerate()枚举对象 得到格式（id，元素）】
        b_x = Variable(x) # 数据x
        b_y = Variable(y) # 标签y
    
        output = model_2(b_x) # 把数据输入进网络
        loss = loss_func(output, b_y) # 损失
        running_loss += loss.item() # 损失累加
        
        optimizer.zero_grad() # 梯度置零
        loss.backward()  # loss反向传播
        optimizer.step() # 反向传播后参数更新
        
    losses.append(running_loss/len(train_loader))    #记录改论平均损失，用于后续画图
    print('训练轮数：', epoch, ' 训练平均损失loss：',running_loss/len(train_loader)) #平均损失=每轮每条训练数据损失求和/每轮训练数据数
    
    
    
    # 计算测试集每轮训练后损失val_loss
    running_loss = 0.0
    for step, (x, y) in enumerate(test_loader): 
        b_x = Variable(x) # 数据x
        b_y = Variable(y) # 标签y
        
        output = model_2(b_x) # 把数据输入进网络
        loss = loss_func(output, b_y) # 损失
        running_loss += loss.item() # 损失累加
    
    eval_losses.append(running_loss/len(test_loader))    #记录改论平均损失，用于后续画图
    print('训练轮数：', epoch, ' 测试平均损失val_loss：',running_loss/len(test_loader)) #平均损失=每轮每条训练数据损失求和/每轮训练数据数
    
        
    
print('end') 
    