In [7]:
# 1. Linear Regresion是对n-维的加权，加上一个偏差
# 2. 用平方差来估量预测值和真实值的差异
# 3. LR 有显示解
# 4. LR 可以看做单层神经网络
# 有 X = [x1,x2,...,xn]^T, y = [y1,y2,...,yn]^T, n个数据
# Loss = 1/2n |y-Xw-b|^2, 用 w*, b* = argmin loss来学习参数
# 将加入权重为 X = [X, 1], w = [w, b]^T
# dLoss/dw = 1/n (y - Xw)^T X； 当=0时，得到最小值，为 w* = (X^T X)^-1 X^T y

In [8]:
# 梯度下降
# 将 w0 作为随机初始值，迭代t=1,2,3
# wt = wt-1 - n*(dLoss/dw), 因为dLoss/dw是增加最快的方向，-dLoss/dw是减少最快的方向
# n作为步长，可理解为为走多远
# 批量下降，随机采样b个样本为一个batch

In [63]:
#将图嵌入到notebook
%matplotlib inline 
from matplotlib import pyplot as plt
import random
import torch
import d2l

In [64]:
#构造一个数据集，用 w=[2,-3.4]^T, b=4.2, y = Xw + b + 噪声
def synthetic_data(w, b, num_examples):
    X = torch.normal(0, 1, (num_examples, len(w))) # 随机数, 有n个样本，列数为w个
    y = torch.matmul(X, w) + b
    y += torch.normal(0, 0.01, y.shape) #加入随机噪声
    return X, y.reshape((-1, 1))
                     
true_w = torch.tensor([2, -3.4])
true_b = 4.2
features, labels = synthetic_data(true_w, true_b, 1000) # feature为因素，label为预测

In [65]:
# features中每一行是一个2-d vector, labels中每一行是一个标量
print('features:', features[0], '\nlabels:', labels[0])

features: tensor([-0.4982, -0.4757]) 
labels: tensor([4.8117])


In [66]:
def data_iter(batch_size, features, labels):
    num_examples = len(features)
    indices = list(range(num_examples))
    #随机读取样本
    random.shuffle(indices)
    for i in range(0, num_examples, batch_size):
        # 得到一个batch的indices
        batch_indices = torch.tensor(
        indices[i:min(i + batch_size, num_examples)])
        yield features[batch_indices], labels[batch_indices] #相当于return

batch_size = 10

for X, y in data_iter(batch_size, features, labels):
    print(X, '\n', y)
    break

tensor([[ 0.1511,  0.4517],
        [-1.3180, -1.0271],
        [-1.1956, -0.6494],
        [ 2.0913,  1.8373],
        [-1.0065, -0.8676],
        [ 0.1223,  0.1020],
        [-0.3477,  1.8908],
        [ 1.5729, -1.6319],
        [-0.2688, -0.5206],
        [-0.7008,  1.6389]]) 
 tensor([[ 2.9897],
        [ 5.0553],
        [ 4.0398],
        [ 2.1305],
        [ 5.1201],
        [ 4.0943],
        [-2.9217],
        [12.9057],
        [ 5.4162],
        [-2.7718]])


In [67]:
#定义初始化参数
w = torch.normal(0, 0.01, size=(2,1), requires_grad = True)
b = torch.zeros(1, requires_grad = True)

In [68]:
#定义模型
def linreg(X, w, b):
    return torch.matmul(X, w) + b

In [69]:
#定义损失函数
def square_loss(y_hat, y):
    return (y_hat - y.reshape(y_hat.shape))**2 /2 / batch_size

In [74]:
#定义优化算法
def sgd(params, lr, batch_size):
    with torch.no_grad(): #更新时不要参与梯度计算
        for param in params:
            param -= lr * param.grad 
            param.grad.zero_()

In [79]:
#训练过程
lr = 0.03
num_epochs = 3
net = linreg
loss = square_loss

for epoch in range(num_epochs):
    for X, y in data_iter(batch_size, features, labels):
        l = loss(net(X, w, b), y) # 算小批量损失
        # 因为'l'的形状时('batch_size', 1), 要sum所有的元素
        l.sum().backward()
        sgd([w,b], lr, batch_size)
    with torch.no_grad():
        #扫完之后，评价一下进度, 与真实的值做对比
        train_l = loss(net(features, w, b), labels)
        print(f'epoch{epoch+1}, loss{float(train_l.mean()):f}')

epoch1, loss0.000006
epoch2, loss0.000006
epoch3, loss0.000006


In [80]:
#比较真实参数与训练参数的区别
print(f'w的估计误差:{true_w - w.reshape(true_w.shape)}')
print(f'b的估计误差:{true_b - b}')

w的估计误差:tensor([0.0001, 0.0002], grad_fn=<SubBackward0>)
b的估计误差:tensor([-0.0003], grad_fn=<RsubBackward1>)


In [None]:
#简单实现

In [None]:
import numpy as np
import torch
from torch.utils import data

true_w = torch.tensor([2, -3.4])
true_b = 4.2
features, labels = d2l.synthetic_data(true_w, true_b, 1000)

In [86]:
#调用现有框架中的API来读取数据
def load_array(data_arrays, batch_size, is_train = True):
    #构造一个PyTorch数据迭代器
    dataset = data.TensorDataset(*data_arrays) 
    #这里的*data_arrays是解包操作符，允许从一个列表或原组中解包并传递多个参数，这样是允许TensorDataset接受一对或多对特征和标签张量
    return data.DataLoader(dataset, batch_size, shuffle = is_train)

batch_size = 10
data_iter = load_array((features, labels), batch_size)

next(iter(data_iter)) #转成python的iterator
#这行是用迭代器换取下一个元素，这里iter()是确保这里是一个迭代器，next()是从中获取第一个批次的数据

NameError: name 'data' is not defined

In [82]:
#对模型的定义
from torch import nn
net = nn.Sequential(nn.Linear(2,1)) #这里2是输入的维度，1是输出的维度
#这里Sequential是list of layers


In [83]:
#初始化模型参数
net[0].weight.data.normal_(0, 0.01) 
#这里的 net[0]访问到具体的layer, weight来访问到w，
#data是真实data，normal是正太分布来替换掉具体的值
net[0].bias.data.fill_(0)
#fill是把这里全部设置成0

tensor([0.])

In [84]:
#使用均方误差
loss = nn.MSELoss()

In [85]:
#实例化SGD
trainer = torch.optim.SGD(net.parameters(), lr =0.03)

In [87]:
#训练过程
num_epochs = 3
for epoch in range(num_epochs):
    for X, y in data_iter: #这里X是feature, y是label
        l = loss(net(X), y) #net(X)表示feature X通过net得到的预测结果
        trainer.zero_grad() #告诉tranier先把梯度清零
        l.backward()
        trainer.step() #对模型的更新
    l = loss(net(features), labels) #这行代码通过计算整个数据集上的损失来评估模型的性能
    print(f'epoch {epoch + 1}, loss{l:f}')

TypeError: 'function' object is not iterable