In [263]:
import torch

In [258]:
class MLP:
    def __init__(self, num_inputs, num_outputs):
        self.num_inputs, self.num_outputs = num_inputs, num_outputs
        self.params = self._init_params()
        
    def _init_params(self):
        """初始化参数"""
        num_inputs = self.num_inputs; num_outputs = self.num_outputs
        def normal(shape):
            return torch.randn(size=shape, device=device) * 0.01
        W = normal((num_inputs, num_outputs)); b = torch.zeros(num_outputs, device=device)
        params = [W, b]
        for param in params:
            param.requires_grad_(True)
        return params
    
    def _forward(self, inputs):
        """推理函数"""
        outputs = []
        W, b = self.params
        for X in inputs:
            Y = X @ W + b
            outputs.append(Y)
        return torch.cat(outputs, dim=0)
    
    def update(self, X, y, lr):
        """更新函数"""
        y_hat = self._forward(X)
        l = self._loss(y_hat, y.T.reshape(-1))
        l.sum().backward()
        """sgd"""
        with torch.no_grad():
            for param in self.params:
                param -= lr * param.grad / X.shape[0]
                param.grad.zero_()
        return l
                
    def _loss(self, y_hat, y):
        """均方损失"""
        return (y_hat - y.reshape(y_hat.shape)) ** 2 / 2
                
    def __call__(self, X):
        return self._forward(X)

In [259]:
def synthetic_data(w, b, num_examples):  #@save
    """生成y=Xw+b+噪声"""
    X = torch.normal(0, 1, (num_examples, len(w)))
    y = torch.matmul(X, w) + b
    y += torch.normal(0, 0.01, y.shape)
    return X, y.reshape((-1, 1))

true_w = torch.tensor([2, -3.4])
true_b = 4.2
features, labels = synthetic_data(true_w, true_b, 1000)

def data_iter(batch_size, features, labels):
    num_examples = len(features)
    indices = list(range(num_examples))
    # 这些样本是随机读取的，没有特定的顺序
    random.shuffle(indices)
    for i in range(0, num_examples, batch_size):
        batch_indices = torch.tensor(
            indices[i: min(i + batch_size, num_examples)])
        yield features[batch_indices], labels[batch_indices]

In [260]:
batch_size, device = 32, 'cpu'
num_epochs, num_inputs, num_outputs = 10, 2, 1
net = MLP(num_inputs, num_outputs)

In [261]:
for epoch in range(num_epochs):
    metrics = [0, 0]
    for X, y in data_iter(batch_size, features, labels):
        l = net.update(X, y, lr=1)
        metrics[0] += l.sum(); metrics[1] += y.numel()
    print('epoch %d loss %f' % (epoch + 1, metrics[0] / metrics[1]))

epoch 1 loss 0.754812
epoch 2 loss 0.000058
epoch 3 loss 0.000056
epoch 4 loss 0.000055
epoch 5 loss 0.000061
epoch 6 loss 0.000062
epoch 7 loss 0.000056
epoch 8 loss 0.000056
epoch 9 loss 0.000055
epoch 10 loss 0.000054


In [262]:
W, b = net.params
print(f'w的估计误差: {true_w - W.reshape(true_w.shape)}')
print(f'b的估计误差: {true_b - b}')

w的估计误差: tensor([-0.0015,  0.0018], grad_fn=<SubBackward0>)
b的估计误差: tensor([-0.0003], grad_fn=<RsubBackward1>)
