# 多项式函数拟合实验

In [4]:
import collections
import math
import os
import random
import sys
import tarfile
import time
import json
import zipfile
from tqdm import tqdm
from PIL import Image
from collections import namedtuple

from IPython import display
from matplotlib import pyplot as plt
import torch
from torch import nn
from torch.nn import init
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import torchtext
import torchtext.vocab as Vocab
import numpy as np


## 生成数据集
在这里我们生成一个人工数据集，我们使用如下的三阶多项式函数来生成样本的标签：
$$
y=1.2x-3.4x^2+5.6x^3+5+\epsilon
$$
其中噪声项ϵ服从均值为0、标准差为0.01的正态分布。训练数据集和测试数据集的样本数都设为100。

In [5]:
n_train, n_test, true_w, true_b = 100, 100, [1.2, -3.4, 5.6], 5
features = torch.randn((n_train + n_test,1))
# poly_features将x,x^2,x^3拼接成每行三个数据，同时作为输入
poly_features = torch.cat((features, torch.pow(features,2), torch.pow(features, 3)), 1)
labels = (true_w[0] * poly_features[:, 0] + true_w[1] * poly_features[:, 1]  + true_w[2] * poly_features[:, 2] + true_b)
labels += torch.tensor(np.random.normal(0, 0.01, size=labels.size()), dtype=torch.float)

看看数据集的前两个样本：

In [6]:
features[:2], poly_features[:2], labels[:2]

(tensor([[0.1441],
         [0.2979]]),
 tensor([[0.1441, 0.0208, 0.0030],
         [0.2979, 0.0888, 0.0264]]),
 tensor([5.1137, 5.1887]))

写一个作图函数`semilogy`

In [7]:
def set_figsize(figsize=(3.5, 2.5)):
    use_svg_display()
    # 设置图的尺寸
    plt.rcParams['figure.figsize'] = figsize

def use_svg_display():
    """Use svg format to display plot in jupyter"""
    display.set_matplotlib_formats('svg')

def semilogy(x_vals, y_vals, x_label, y_label, x2_vals=None, y2_vals=None,
             legend=None, figsize=(3.5, 2.5)):
    set_figsize(figsize)
    plt.xlabel(x_label)
    plt.ylabel(y_label)
    plt.semilogy(x_vals, y_vals)
    if x2_vals and y2_vals:
        plt.semilogy(x2_vals, y2_vals, linestyle=':')
        plt.legend(legend)


## 训练模型

In [8]:
num_epochs, loss = 1000, torch.nn.MSELoss()

def train(train_features, test_features, train_labels, test_labels):
    net = torch.nn.Linear(train_features.shape[-1], 1)
    batch_size = 10
    data_set = torch.utils.data.TensorDataset(train_features, train_labels)
    train_iter = torch.utils.data.DataLoader(data_set, batch_size, shuffle=True)
    optimizer = torch.optim.SGD(net.parameters(), lr = 0.01)
     
    for epoch in range(num_epochs):
        for X, y in train_iter:
            l = loss(net(X), y.view(-1, 1))
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            
    train_loss=loss(net(train_features),train_labels.view(-1,1)).item()
    test_loss=loss(net(test_features),test_labels.view(-1,1)).item()
    print('train loss', train_loss, '\ntest loss', test_loss)
    print('\ntrue_w',true_w, '\ntrue_b',true_b)
    print('\nout_w:',net.weight.data, '\nout_b',net.bias.data)

正常拟合

In [9]:
train_features = poly_features[:n_train, :]
test_features = poly_features[n_train:,:]
train_labels = labels[:n_train]
test_labels = labels[n_train:]
train(train_features, test_features,train_labels, test_labels)

train loss 8.197781426133588e-05 
test loss 9.133313869824633e-05

true_w [1.2, -3.4, 5.6] 
true_b 5

out_w: tensor([[ 1.1976, -3.4008,  5.6010]]) 
out_b tensor([4.9987])


训练样本过少，过拟合

In [10]:
train_features=poly_features[:10,:]
test_features=poly_features[10:,:]
train_labels=labels[:10]
test_labels=labels[10:]
train(train_features,test_features,train_labels,test_labels)

train loss 0.053821295499801636 
test loss 1.8260118961334229

true_w [1.2, -3.4, 5.6] 
true_b 5

out_w: tensor([[ 1.9386, -3.3896,  4.7793]]) 
out_b tensor([5.0379])


In [11]:
# train_features=poly_features[:n_train,0].view(-1,1) #或者直接用features[...,:]
# poly_features[100,0]size为[100](一维)，需要加view后size为[100,1]，才可与net运算
train_features = features[:n_train,:]
test_features = features[n_train:,:]
# test_features=poly_features[n_train:,0].view(-1,1)
train_labels=labels[:n_train]
test_labels=labels[n_train:]
train(train_features,test_features,train_labels,test_labels)

train loss 29.963733673095703 
test loss 100.70545959472656

true_w [1.2, -3.4, 5.6] 
true_b 5

out_w: tensor([[10.9593]]) 
out_b tensor([2.5417])
