In [1]:
import random
import torch

In [2]:
# x.shape=[num_example, num_features]
# w.shape=[num_features, 1]
def generate_dataset(w, b, num_example):
    X = torch.normal(0, 1, (num_example, len(w)))
    Y = torch.matmul(X, w) + b
    # Y.shape=[num_example, 1]
    Y[:] = Y + torch.normal(0, 0.01, Y.shape)
    return X, Y

true_w = torch.tensor([2, -3.4]).reshape(-1, 1)
true_b = 4.2
num_example = 1000
X, Y = generate_dataset(true_w, true_b, num_example)
print(X, '\n', Y)

tensor([[ 1.1645,  0.5092],
        [ 0.1971, -0.0974],
        [ 1.6466, -0.5593],
        ...,
        [ 0.4769, -1.5845],
        [-1.7759,  0.2886],
        [-0.8553, -0.7828]]) 
 tensor([[ 4.7880e+00],
        [ 4.9147e+00],
        [ 9.4062e+00],
        [ 1.0534e-01],
        [ 4.6455e+00],
        [ 6.6164e+00],
        [ 6.3818e+00],
        [ 2.4496e+00],
        [ 2.7479e+00],
        [ 2.7346e+00],
        [ 5.0391e+00],
        [ 8.7001e+00],
        [ 8.6768e+00],
        [-1.0413e+00],
        [ 5.7793e+00],
        [ 3.4310e+00],
        [ 2.2096e+00],
        [ 8.8355e+00],
        [ 8.9766e+00],
        [ 4.8281e+00],
        [ 6.3403e+00],
        [ 9.2217e+00],
        [ 4.5299e+00],
        [ 9.5851e+00],
        [ 5.5952e+00],
        [ 1.0275e+01],
        [ 1.4452e+01],
        [-3.0317e-01],
        [ 9.8448e-01],
        [ 8.6595e-01],
        [-3.7008e+00],
        [ 4.3019e+00],
        [ 8.0296e+00],
        [-1.6785e+00],
        [ 2.8357e+00],
        [ 6

In [3]:
# 打乱原本的顺序(不实现了，pytorch已有封装函数)
# return X.shape=[batchsize,:],Y.shape=[batchsize, 1]
def data_iterator(X, Y, batchsize):
    for i in range(0, X.shape[0], batchsize):
        yield X[i:i+batchsize], Y[i:i+batchsize]
    pass

batchsize = 100
for X_batch, Y_batch in data_iterator(X, Y, batchsize):
    print(X_batch, '\n', Y_batch)
    break

tensor([[ 1.1645,  0.5092],
        [ 0.1971, -0.0974],
        [ 1.6466, -0.5593],
        [-0.9115,  0.6691],
        [ 0.0227, -0.1156],
        [ 0.1930, -0.6015],
        [ 0.0035, -0.6394],
        [-0.7366,  0.0839],
        [-0.5411,  0.1047],
        [ 0.5618,  0.7612],
        [ 0.9071,  0.2830],
        [ 0.9377, -0.7741],
        [ 0.1226, -1.2433],
        [-0.8222,  1.0616],
        [ 1.1017,  0.1862],
        [ 1.0992,  0.8749],
        [-0.1451,  0.5001],
        [ 0.8434, -0.8621],
        [ 0.4664, -1.1305],
        [-0.6658, -0.5790],
        [-0.2431, -0.7717],
        [ 0.0156, -1.4703],
        [-0.8604, -0.6083],
        [ 0.2785, -1.4193],
        [ 0.1340, -0.3337],
        [ 0.8185, -1.3100],
        [ 1.1724, -2.3286],
        [-0.6930,  0.9192],
        [-0.8226,  0.4588],
        [-0.3510,  0.7742],
        [-1.7318,  1.3002],
        [-0.1460, -0.1193],
        [-0.3676, -1.3410],
        [ 0.2139,  1.8563],
        [-0.4428,  0.1426],
        [ 0.4944, -0

In [4]:
#注意，需要定义能存储梯度的输入层x(即w)
w = torch.normal(0, 2, size=[X.shape[1], 1],requires_grad=True)
b = torch.zeros(1,requires_grad=True)
# 定义模型, 损失函数，训练过程等等等等
epoch = 100
def model(X, w, b):
    return torch.matmul(X, w) + b

def loss_fn(Y, Y_pred):
    Y0 = Y - Y_pred
    return torch.sum(Y0**2) / 2 / Y.shape[0]

def SGD(w, b, learn_rate):
    with torch.no_grad():
        w -= learn_rate * w.grad
        b -= learn_rate * b.grad
        w.grad.zero_()
        b.grad.zero_()

#开始进行训练，并在每个epoch计算loss
for i in range(epoch):
    for X_batch, Y_batch_labels in data_iterator(X, Y, batchsize):
        Y_batch_pred = model(X_batch, w, b)
        #有没有想过为什么要对loss进行backward()操作？
        #因为loss是训练计算图的最后一步？
        loss = loss_fn(Y_batch_labels, Y_batch_pred)
        
        # loss此时是一个向量，loss.backward() 和loss.sum().backward()虽然在效果上相同，但内在实际上是因为torch对loss自动求和成了一个标量
        loss.sum().backward()
        SGD(w, b, learn_rate=0.02)
    with torch.no_grad():
        train_loss = loss_fn(Y, model(X, w, b))
        print(f'epoch {i + 1}, loss {float(train_loss.item())}')
        #如果梯度总和小于某个值就GG
print(f'true_w = {true_w}, true_b = {true_b}, w = {w}, b = {b}')




epoch 1, loss 10.210000991821289
epoch 2, loss 6.712548732757568
epoch 3, loss 4.413288593292236
epoch 4, loss 2.9016900062561035
epoch 5, loss 1.9078940153121948
epoch 6, loss 1.2545058727264404
epoch 7, loss 0.8249126672744751
epoch 8, loss 0.5424532890319824
epoch 9, loss 0.3567292094230652
epoch 10, loss 0.23460696637630463
epoch 11, loss 0.15430323779582977
epoch 12, loss 0.10149632394313812
epoch 13, loss 0.06676993519067764
epoch 14, loss 0.04393268749117851
epoch 15, loss 0.028913576155900955
epoch 16, loss 0.019035691395401955
epoch 17, loss 0.012539023533463478
epoch 18, loss 0.008266014978289604
epoch 19, loss 0.005455361679196358
epoch 20, loss 0.003606583923101425
epoch 21, loss 0.0023904405534267426
epoch 22, loss 0.0015903858002275229
epoch 23, loss 0.0010640667751431465
epoch 24, loss 0.0007177875377237797
epoch 25, loss 0.000489983765874058
epoch 26, loss 0.00034008873626589775
epoch 27, loss 0.0002414575283182785
epoch 28, loss 0.00017655061674304307
epoch 29, loss 0.