# 使用torch.nn实现softmax回归

In [1]:
import torch
import numpy as np
import random
from torch import nn
from IPython import display
from matplotlib import pyplot as plt
import torchvision
import torchvision.transforms as transforms
from torch.nn import init

## 1 获取和读取数据

In [2]:
#下载Fashion-MINIST数据
mnist_train = torchvision.datasets.FashionMNIST(root='./Datasets/FashionMNIST', train=True,
download=True, transform=transforms.ToTensor())
mnist_test = torchvision.datasets.FashionMNIST(root='./Datasets/FashionMNIST', train=False,
download=True, transform=transforms.ToTensor())
#读取数据
batch_size = 256
train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True,
num_workers=0)
test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False,
num_workers=0)
print(test_iter)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./Datasets/FashionMNIST\FashionMNIST\raw\train-images-idx3-ubyte.gz


  0%|          | 0/26421880 [00:00<?, ?it/s]

Extracting ./Datasets/FashionMNIST\FashionMNIST\raw\train-images-idx3-ubyte.gz to ./Datasets/FashionMNIST\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./Datasets/FashionMNIST\FashionMNIST\raw\train-labels-idx1-ubyte.gz


  0%|          | 0/29515 [00:00<?, ?it/s]

Extracting ./Datasets/FashionMNIST\FashionMNIST\raw\train-labels-idx1-ubyte.gz to ./Datasets/FashionMNIST\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./Datasets/FashionMNIST\FashionMNIST\raw\t10k-images-idx3-ubyte.gz


  0%|          | 0/4422102 [00:00<?, ?it/s]

Extracting ./Datasets/FashionMNIST\FashionMNIST\raw\t10k-images-idx3-ubyte.gz to ./Datasets/FashionMNIST\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./Datasets/FashionMNIST\FashionMNIST\raw\t10k-labels-idx1-ubyte.gz


  0%|          | 0/5148 [00:00<?, ?it/s]

Extracting ./Datasets/FashionMNIST\FashionMNIST\raw\t10k-labels-idx1-ubyte.gz to ./Datasets/FashionMNIST\FashionMNIST\raw

<torch.utils.data.dataloader.DataLoader object at 0x000001ED6EC99670>


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


## 2 定义和初始化模型

In [3]:
#定义输入和输出
num_inputs = 784
num_outputs = 10
#定义网络模型
class LinearNet(nn.Module):
    def __init__(self, num_inputs, num_outputs):
        super(LinearNet, self).__init__()
        self.linear = nn.Linear(num_inputs, num_outputs)
    def forward(self, x): # x shape: (batch, 1, 28, 28)
        y = self.linear(x.view(x.shape[0], -1))
        return y

net = LinearNet(num_inputs, num_outputs)

In [4]:
# 初始化参数w和b
init.normal_(net.linear.weight, mean=0, std=0.01)
init.constant_(net.linear.bias, val=0)

Parameter containing:
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], requires_grad=True)

## 3 softmax和交叉熵损失函数

In [5]:
#nn模块实现交叉熵损失函数--包含了softmax函数
cross_entropy = nn.CrossEntropyLoss()

## 4 定义优化算法

In [6]:
#优化函数SGD
optimizer = torch.optim.SGD(net.parameters(), lr=0.1)

## 5 计算分类准确率

In [7]:
def evaluate_accuracy(data_iter,net):
    acc_sum,n = 0.0,0
    for X,y in data_iter:
        #print(len(X)) 小批量数据集 每个X中有 256个图像
        #print((net(X).argmax(dim=1)==y).float().sum().item())
        acc_sum += (net(X).argmax(dim=1)==y).float().sum().item()
        n+=y.shape[0]
    return acc_sum/n

## 6 训练模型

In [8]:
def train(net, train_iter, test_iter, loss, num_epochs, batch_size,params=None, lr=None, optimizer=None):
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
        for X, y in train_iter:
            y_hat = net(X)
            l = loss(y_hat, y).sum()
            optimizer.zero_grad() # 梯度清零
            l.backward() # 计算梯度
            optimizer.step()  # 随机梯度下降算法, 更新参数
            train_l_sum += l.item()
            #训练集准确率
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item()
            n += y.shape[0]
        test_acc = evaluate_accuracy(test_iter, net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f'% (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc))


In [9]:
num_epochs = 30
train(net, train_iter, test_iter, cross_entropy, num_epochs,batch_size,None,None, optimizer)

epoch 1, loss 0.0031, train acc 0.748, test acc 0.795
epoch 2, loss 0.0022, train acc 0.813, test acc 0.800
epoch 3, loss 0.0021, train acc 0.825, test acc 0.819
epoch 4, loss 0.0020, train acc 0.831, test acc 0.824
epoch 5, loss 0.0019, train acc 0.836, test acc 0.809
epoch 6, loss 0.0019, train acc 0.839, test acc 0.799
epoch 7, loss 0.0018, train acc 0.843, test acc 0.831
epoch 8, loss 0.0018, train acc 0.844, test acc 0.833
epoch 9, loss 0.0018, train acc 0.846, test acc 0.827
epoch 10, loss 0.0018, train acc 0.848, test acc 0.832
epoch 11, loss 0.0017, train acc 0.849, test acc 0.835
epoch 12, loss 0.0017, train acc 0.850, test acc 0.835
epoch 13, loss 0.0017, train acc 0.851, test acc 0.833
epoch 14, loss 0.0017, train acc 0.852, test acc 0.837
epoch 15, loss 0.0017, train acc 0.853, test acc 0.837
epoch 16, loss 0.0017, train acc 0.853, test acc 0.831
epoch 17, loss 0.0017, train acc 0.854, test acc 0.840
epoch 18, loss 0.0017, train acc 0.855, test acc 0.837
epoch 19, loss 0.00