In [1]:
import numpy as np
import torch
from torch.utils import data # 加载数据
from torch import nn # 定义网络结构

In [2]:
w = torch.tensor([2,-3.4])
b = 4.2
n = 1000

## 生成数据

In [3]:
def get_data(w,b,sample_number):
    x = torch.normal(0,1,(sample_number,w.shape[0]))
    y = torch.matmul(x,w) + b + torch.normal(0,0.01,(sample_number,))
    return x,y.reshape(-1,1)

In [4]:
features,labels = get_data(w,b,n)

## 加载数据

In [5]:
def load_data(my_data,batch_size,is_train=True):
    dataset = data.TensorDataset(*my_data)
    # shuffle决定数据在每个迭代周期内是否是乱序的
    return data.DataLoader(dataset,batch_size,shuffle=is_train)

In [6]:
batch_size = 10

In [7]:
features.shape

torch.Size([1000, 2])

In [8]:
labels.shape

torch.Size([1000, 1])

In [9]:
# next(iter(load_data((features,labels),batch_size)))

In [10]:
data_loader = load_data((features,labels),batch_size)

## 定义模型

In [11]:
net = nn.Sequential(
    nn.Linear(2,1),# 第一个参数是输入维度，第二个参数是输出维度
)

## 模型参数初始化

In [12]:
net[0].weight # net[0]是神经网络的第0层

Parameter containing:
tensor([[0.6055, 0.2091]], requires_grad=True)

In [13]:
net[0].weight.data.normal_(0,0.01)

tensor([[-0.0029,  0.0034]])

In [14]:
net[0].weight

Parameter containing:
tensor([[-0.0029,  0.0034]], requires_grad=True)

In [15]:
net[0].bias

Parameter containing:
tensor([-0.3525], requires_grad=True)

In [16]:
net[0].bias.data.fill_(0)

tensor([0.])

In [17]:
net[0].bias

Parameter containing:
tensor([0.], requires_grad=True)

## 定义损失函数

In [18]:
loss = nn.MSELoss() # 均方差作为损失函数

## 定义优化算法

In [19]:
# SGD就是利用随机梯度下降实现的优化算法
# parameters是要优化的参数，也就是要在迭代过程中用梯度修正的参数
# lr还是学习率
trainer = torch.optim.SGD(params=net.parameters(),lr=0.03)

## 迭代过程

In [20]:
epochs = 30

In [21]:
for epoch in range(epochs): # 迭代
    for x,y in data_loader: # 加载小批量训练数据
        y_hat = net(x) # 预测值
        l = loss(y_hat,y) # 根据预测值与观察值计算损失函数
        trainer.zero_grad() # 反向传播前梯度清零, Sets the gradients of all optimized torch.Tensor s to zero
        l.backward() # 反向传播
        trainer.step() # 优化，更新模型参数, Performs a single optimization step (parameter update).
    # 计算一次迭代后的误差
    labels_hat = net(features)
    l = loss(labels_hat,labels)
    print(f"epoch {epoch}, loss {l}, w {net[0].weight.data}, b {net[0].bias.data}")

epoch 0, loss 0.0002098849945468828, w tensor([[ 1.9984, -3.3942]]), b tensor([4.1919])
epoch 1, loss 0.00011101487325504422, w tensor([[ 2.0001, -3.3998]]), b tensor([4.1991])
epoch 2, loss 0.00011117455142084509, w tensor([[ 2.0006, -3.3996]]), b tensor([4.1998])
epoch 3, loss 0.00011076741066062823, w tensor([[ 1.9995, -3.4006]]), b tensor([4.1999])
epoch 4, loss 0.0001122849207604304, w tensor([[ 2.0002, -3.3998]]), b tensor([4.2011])
epoch 5, loss 0.00011071480548707768, w tensor([[ 1.9991, -3.4001]]), b tensor([4.2000])
epoch 6, loss 0.00011186307528987527, w tensor([[ 1.9989, -3.4000]]), b tensor([4.1989])
epoch 7, loss 0.000110965920612216, w tensor([[ 2.0002, -3.4003]]), b tensor([4.2003])
epoch 8, loss 0.00011054243805119768, w tensor([[ 1.9999, -3.4003]]), b tensor([4.2000])
epoch 9, loss 0.00011166548210894689, w tensor([[ 2.0007, -3.4004]]), b tensor([4.2003])
epoch 10, loss 0.00011042208643630147, w tensor([[ 1.9995, -3.4000]]), b tensor([4.1996])
epoch 11, loss 0.0001104