## softmax回归从零开始实现

In [194]:
import torch
from torchvision import transforms
import torchvision
from torch.utils import data
import matplotlib.pyplot as plt
from torch import nn

### 1、加载数据集

In [195]:
def load_data_fashion_mnist(batch_size,resize=None):
    '''下载Fashion-MNIST数据集，然后将其加载到内存中'''
    trans = [transforms.ToTensor()] #将原始的PILImage格式或者numpy.array格式的数据格式化为可被pytorch快速处理的张量类型。
    if resize:
        trans.insert(0,transforms.Resize(resize)) # 调整PILImage对象的尺寸
    trans = transforms.Compose(trans) # 把多个步骤整合在一起
    mnist_train = torchvision.datasets.FashionMNIST(root='./',train=True,transform=trans,download=True)
    mnist_test = torchvision.datasets.FashionMNIST(root='./',train=False,transform=trans,download=True)
    mnist_train_loader = data.DataLoader(mnist_train,batch_size,shuffle=True,num_workers=4)
    mnist_test_loader = data.DataLoader(mnist_test,batch_size,shuffle=False,num_workers=4)
    return mnist_train_loader,mnist_test_loader


def get_fashion_minist_labels(labels):
    text_labels = ['t-shirt','trouser','pullover','dress','coat',
                   'sandal','shirt','sneaker','bag','ankle boot']
    return [text_labels[int(i)] for i in labels]

In [196]:
batch_size = 256
train_iter,test_iter = load_data_fashion_mnist(batch_size)

In [197]:
for x,y in train_iter:
    print(x.shape)
    print(y.shape)
    break

torch.Size([256, 1, 28, 28])
torch.Size([256])


### 2、初始化模型参数

In [198]:
num_inputs = 784
num_outputs = 10

W = torch.normal(0,0.01,size=(num_inputs,num_outputs),requires_grad=True) # [784,10]
b = torch.zeros(num_outputs,requires_grad=True) # [1,10]

### 3、定义softmax操作

In [199]:
def softmax(X):
    x_exp = torch.exp(X)
    return x_exp/x_exp.sum(dim=1,keepdim=True)

### 4、定义模型

In [200]:
def net(X):
    return softmax(torch.matmul(X.reshape(-1,W.shape[0]),W)+b)

### 5、定义损失函数

In [201]:
def cross_entropy(y_hat,y):
    return -torch.log(y_hat[range(len(y_hat)),y]) # 使用y当作列的索引取到正确类的概率，然后直接取对数

### 6、分类精度

In [202]:
def accuracy(y_hat,y):
    '''计算准确预测的样本数'''
    y_hat = y_hat.argmax(dim=1)
    cmp = y_hat.type(y.dtype)==y
    return float(cmp.type(y.dtype).sum())

In [203]:
y = torch.tensor([0, 2])
y_hat = torch.tensor([[0.1, 0.3, 0.6], [0.3, 0.2, 0.5]])

In [204]:
accuracy(y_hat,y)/len(y)

0.5

In [205]:
class Accumulator:
    '''在n个变量上进行累加'''
    def __init__(self,n):
        self.data = [0.0]*n
    
    def add(self,*args):
        self.data =[a+float(b) for a,b in zip(self.data,args)]
    
    def reset(self):
        self.data = [0.0]*len(self.data)
    
    def __getitem__(self,idx):
        return self.data[idx]

In [206]:
def evaluate_accuracy(net,data_iter):
    '''在指定的数据集上评估模型的精度'''
    if isinstance(net,torch.nn.Module):
        net.eval() # 将模型设置为评估模式
    metric = Accumulator(2)
    with torch.no_grad():
        for x,y in data_iter:
            metric.add(accuracy(net(x),y),y.numel())
    return metric[0]/metric[1]

In [207]:
evaluate_accuracy(net,test_iter) # 因为这里一共10个类，随机初始化的参数准确率大约在10%左右

0.1248

### 6、定义优化算法

In [208]:
def sgd(params,lr,batch_size):
    with torch.no_grad():
        for param in params:
            param -= lr * param.grad / batch_size
            param.grad.zero_()

### 7、训练

In [209]:
def train_epoch(net,data_iter,loss_fn,lr):
    '''训练模型一个周期'''
    metric = Accumulator(3) # 训练损失总和、训练准确度总和、样本数
    for x,y in data_iter:
        y_hat = net(x) # 前向传播
        l = loss_fn(y_hat,y) # 计算损失
        l.sum().backward() # 后向传播
        sgd([W,b],lr,batch_size) # 更新参数，参数梯度归零
        metric.add(float(l.sum()),accuracy(y_hat,y),y.numel())
    return metric[0]/metric[2],metric[1]/metric[2] # 单个样本平均训练损失，准确率
            

In [210]:
def train(net,train_iter,test_iter,loss_fn,lr,num_epochs):
    for epoch in range(num_epochs):
        train_loss,train_acc = train_epoch(net,train_iter,loss_fn,lr)
        test_acc = evaluate_accuracy(net,test_iter)
        print(epoch+1,train_loss,train_acc,test_acc)

In [211]:
num_epochs = 10
lr = 0.03
train(net,train_iter,test_iter,cross_entropy,lr,num_epochs)

1 1.033520164489746 0.6965666666666667 0.7454
2 0.7084728014628092 0.7767 0.7757
3 0.636662836265564 0.79825 0.791
4 0.5972671895345052 0.8091666666666667 0.8011
5 0.5714250760396321 0.8168833333333333 0.8051
6 0.5528819741566976 0.8205333333333333 0.8106
7 0.5385389862696329 0.82455 0.8137
8 0.5271430640538534 0.8272666666666667 0.8171
9 0.5173635903040568 0.8296333333333333 0.8188
10 0.5094965869903565 0.8321 0.8203


### 8、预测

In [None]:
def show_images(imgs,num_rows,num_cols,titles,scale=1.5):
    _,axes = plt.subplots(num_rows,num_cols,figsize=(num_cols*scale,num_rows*scale))
    axes = axes.flatten() # 把二维的numpy.ndarray拉成一维
    for i,(ax,img) in enumerate(zip(axes,imgs)):
        if torch.is_tensor(img):
            ax.imshow(img.numpy()) # 图片张量
        else:
            ax.imshow(img) # PIL图片
        ax.axes.get_xaxis().set_visible(False)
        ax.axes.get_yaxis().set_visible(False)
        if titles:
            ax.set_title(titles[i])
    plt.show()

In [None]:
def predict(net, test_iter, n=6):
    for x,y in test_iter:
        break
    trues = get_fashion_minist_labels(y)
    preds = get_fashion_minist_labels(net(x).argmax(1))
    titles = [true+'\n'+pred for true,pred in zip(trues,preds)]
    show_images(x[0:n].reshape(n,28,28),1,n,titles[0:n])


predict(net, test_iter, n=6)

## softmax回归的简洁实现

In [186]:
# 读取数据集
batch_size = 256
train_iter,test_iter = load_data_fashion_mnist(batch_size)

In [187]:
net = nn.Sequential(nn.Flatten(),nn.Linear(784,10))

In [188]:
def init_weights(m):
    if type(m)==nn.Linear:
        nn.init.normal_(m.weight,std=0.01)

net.apply(init_weights)


Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=10, bias=True)
)

In [189]:
loss = nn.CrossEntropyLoss(reduction='none')

In [190]:
trainer = torch.optim.SGD(net.parameters(),lr=0.1)

In [191]:
def train_epoch(net,train_iter,loss,trainer):
    metric = Accumulator(3)
    for x,y in train_iter:
        y_hat = net(x)
        l = loss(y_hat,y)
        trainer.zero_grad()
        l.mean().backward()
        trainer.step()
        metric.add(float(l.sum()),accuracy(y_hat,y),y.numel())
    return metric[0]/metric[2],metric[1]/metric[2]


In [192]:
def train(net,train_iter,test_iter,loss,trainer,num_epochs):
    for epoch in range(num_epochs):
        train_loss,train_acc = train_epoch(net,train_iter,loss,trainer)
        test_acc = evaluate_accuracy(net,test_iter)
        print(epoch+1,train_loss,train_acc,test_acc)

In [193]:
num_epochs = 20
train(net,train_iter,test_iter,loss,trainer,num_epochs)

1 0.7865897769927979 0.7478666666666667 0.7852
2 0.5703130973815917 0.8132166666666667 0.8074
3 0.5249946798960368 0.8270833333333333 0.8172
4 0.5008768747965495 0.8326666666666667 0.8143
5 0.48550347595214843 0.8359833333333333 0.791
6 0.47450652389526365 0.8402666666666667 0.8269
7 0.46455805123647054 0.8426833333333333 0.8292
8 0.45832816429138185 0.8442 0.8291
9 0.45243869819641114 0.8467166666666667 0.8321
10 0.4478655403137207 0.8482833333333333 0.833
11 0.4424949731826782 0.8490166666666666 0.8356
12 0.4396551072438558 0.85 0.8322
13 0.4356987876256307 0.85185 0.8368
14 0.4329678873697917 0.8513333333333334 0.8361
15 0.4309636246363322 0.8526333333333334 0.8338
16 0.4272518388748169 0.8542666666666666 0.8302
17 0.42634141572316486 0.8542 0.8292
18 0.42420314067204795 0.8550666666666666 0.8366
19 0.4224140298207601 0.8544666666666667 0.8355
20 0.4202173875172933 0.8565666666666667 0.8371
