In [1]:
%matplotlib inline
import math, torch, random
from torch import nn, tensor, arange, randn, zeros, tanh, mm
from torch.nn import functional as F
from d2l import torch as d2l
from tqdm import tqdm

In [80]:
class MLP:
    def __init__(self, num_inputs, num_hiddens, num_outputs):
        self.num_inputs, self.num_hiddens, self.num_outputs = num_inputs, num_hiddens, num_outputs
        self.params = self.init_params()
        
    def init_params(self):
        num_inputs = self.num_inputs; num_hiddens = self.num_hiddens; num_outputs = self.num_outputs
        def normal(shape):
            return torch.randn(size=shape, device=device) * 0.01
        W_xh = normal((num_inputs, num_hiddens)); b_h = torch.zeros(num_hiddens, device=device)
        W_hy = normal((num_hiddens, num_outputs)); b_y = torch.zeros(num_outputs, device=device)
        W = normal((num_inputs, num_outputs)); b = torch.zeros(num_outputs, device=device)
        params = [W, b]
        for param in params:
            param.requires_grad_(True)
        return params
    
    def forward(self, inputs):
        outputs = []
        W, b = self.params
        for X in inputs:
            Y = torch.tanh(X @ W + b)
            outputs.append(Y)
        return torch.cat(outputs, dim=0)
    
    def sgd(self, lr, batch_size):
        with torch.no_grad():
            for param in self.params:
                param -= lr * param.grad / batch_size
                param.grad.zero_()
                
    def __call__(self, X):
        return self.forward(X)
    
    def grad_clipping(self, theta):
        norm = torch.sqrt(sum([torch.sum(p) for p in self.params]))
        if norm > theta:
            for p in self.params:
                p.grad[:] *= theta / norm
                


In [81]:
def synthetic_data(w, b, num_examples):  #@save
    """生成y=Xw+b+噪声"""
    X = torch.normal(0, 1, (num_examples, len(w)))
    y = torch.matmul(X, w) + b
    y += torch.normal(0, 0.01, y.shape)
    return X, y.reshape((-1, 1))

true_w = torch.tensor([2, -3.4])
true_b = 4.2
features, labels = synthetic_data(true_w, true_b, 1000)

def data_iter(batch_size, features, labels):
    num_examples = len(features)
    indices = list(range(num_examples))
    # 这些样本是随机读取的，没有特定的顺序
    random.shuffle(indices)
    for i in range(0, num_examples, batch_size):
        batch_indices = torch.tensor(
            indices[i: min(i + batch_size, num_examples)])
        yield features[batch_indices], labels[batch_indices]

In [88]:
batch_size, device = 8, 'cpu'
num_epochs, num_inputs, num_hiddens, num_outputs = 100, 2, 256, 1
net = MLP(num_inputs, num_hiddens, num_outputs)
loss = torch.nn.MSELoss()
updater = torch.optim.SGD(net.params, lr=1)

In [89]:
for epoch in range(num_epochs):
    state = None; metrics = [0, 0]
    for X, Y in data_iter(batch_size, features, labels):
        y_hat = net(X)
        y = Y.T.reshape(-1)
        l = loss(y_hat, y)
        l.backward()
        net.sgd(1, batch_size)
        metrics[0] += l * y.numel(); metrics[1] += y.numel()
    print('epoch %d loss %f' % (epoch + 1, metrics[0] / metrics[1]))

epoch 1 loss 26.730768
epoch 2 loss 26.557814
epoch 3 loss 26.540184
epoch 4 loss 26.532017
epoch 5 loss 26.527061
epoch 6 loss 26.523800
epoch 7 loss 26.521450
epoch 8 loss 26.519712
epoch 9 loss 26.518322
epoch 10 loss 26.517250
epoch 11 loss 26.516363
epoch 12 loss 26.515581
epoch 13 loss 26.514967
epoch 14 loss 26.514442
epoch 15 loss 26.513983
epoch 16 loss 26.513567
epoch 17 loss 26.513229
epoch 18 loss 26.512897
epoch 19 loss 26.512598
epoch 20 loss 26.512379
epoch 21 loss 26.512144
epoch 22 loss 26.511950
epoch 23 loss 26.511763
epoch 24 loss 26.511602
epoch 25 loss 26.511440
epoch 26 loss 26.511314
epoch 27 loss 26.511168
epoch 28 loss 26.511049
epoch 29 loss 26.510954
epoch 30 loss 26.510853
epoch 31 loss 26.510759
epoch 32 loss 26.510668
epoch 33 loss 26.510586
epoch 34 loss 26.510508
epoch 35 loss 26.510429
epoch 36 loss 26.510378
epoch 37 loss 26.510311
epoch 38 loss 26.510254
epoch 39 loss 26.510201
epoch 40 loss 26.510149
epoch 41 loss 26.510099
epoch 42 loss 26.510061
e

In [34]:
for X, Y in data_iter(batch_size, features, labels):
    print(X.shape, Y.shape)
    break

torch.Size([32, 2]) torch.Size([32, 1])


In [93]:
net(tensor([[1.0, 2.0]]))

tensor([-0.7154], grad_fn=<CatBackward0>)

In [84]:
W, b = net.params

In [90]:
W

tensor([[ 2.1949],
        [-3.7460]], requires_grad=True)

In [91]:
b

tensor([4.6577], requires_grad=True)

In [92]:
true_w = torch.tensor([2, -3.4])
true_b = 4.2