In [2]:
import torch
import torch.nn as nn
import torchvision
from sklearn.model_selection import train_test_split

In [3]:
data=torchvision.datasets.FashionMNIST(root='./data',train=True,download=True,transform=torchvision.transforms.ToTensor())
train_data,test_data=train_test_split(data,test_size=0.2,random_state=42)
train_set=torch.utils.data.DataLoader(train_data,batch_size=64,shuffle=True)
test_set=torch.utils.data.DataLoader(test_data,batch_size=64,shuffle=True)
del data,train_data,test_data

In [4]:
LeNet=nn.Sequential(nn.Conv2d(1, 6, kernel_size=5, padding=2), nn.Sigmoid(),
        nn.AvgPool2d(kernel_size=2, stride=2),
        nn.Conv2d(6, 16, kernel_size=5), nn.Sigmoid(),
        nn.AvgPool2d(kernel_size=2, stride=2),
        nn.Flatten(),
        nn.Linear(16 * 5 * 5, 120), nn.Sigmoid(),
        nn.Linear(120, 84), nn.Sigmoid(),
        nn.Linear(84, 10))

In [5]:
def init_weights(m):
    if type(m)==nn.Linear or type(m)==nn.Conv2d:
        torch.nn.init.xavier_uniform_(m.weight) # 使用Xavier均匀分布初始化权重

LeNet.apply(init_weights) # 初始化权重

Sequential(
  (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (1): Sigmoid()
  (2): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (4): Sigmoid()
  (5): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (6): Flatten(start_dim=1, end_dim=-1)
  (7): Linear(in_features=400, out_features=120, bias=True)
  (8): Sigmoid()
  (9): Linear(in_features=120, out_features=84, bias=True)
  (10): Sigmoid()
  (11): Linear(in_features=84, out_features=10, bias=True)
)

In [6]:
def evaluate_accuracy(data_iter,net,device=None):
    if device is None and isinstance(net,torch.nn.Module):
        device=list(net.parameters())[0].device
    acc_sum,n=0.0,0
    with torch.no_grad():
        for X,y in data_iter:
            if isinstance(net,torch.nn.Module):
                net.eval()
                acc_sum+=(net(X.to(device)).argmax(dim=1)==y.to(device)).float().sum().cpu().item()
                net.train()
            else:
                if('is_training' in net.__code__.co_varnames):
                    acc_sum+=(net(X,is_training=False).argmax(dim=1)==y).float().sum().item()
                else:
                    acc_sum+=(net(X).argmax(dim=1)==y).float().sum().item()
            n+=y.shape[0]
    return acc_sum/n

def train(net,train_iter,test_iter,loss,epochs,lr,device):
    net=net.to(device)
    optimizer=torch.optim.Adam(net.parameters(),lr=lr)
    for epoch in range(epochs):
        train_loss,train_acc,n=0.0,0.0,0
        for X,y in train_iter:
            X=X.to(device)
            y=y.to(device)
            y_hat=net(X)
            l=loss(y_hat,y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            train_loss+=l.cpu().item()
            train_acc+=(y_hat.argmax(dim=1)==y).sum().cpu().item()
            n+=y.shape[0]
        test_acc=evaluate_accuracy(test_iter,net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f'%(epoch+1,train_loss/n,train_acc/n,test_acc))

In [7]:
train(LeNet,train_set,test_set,torch.nn.CrossEntropyLoss(),10,0.001,torch.device('cuda:0'))

epoch 1, loss 0.0192, train acc 0.551, test acc 0.721
epoch 2, loss 0.0107, train acc 0.737, test acc 0.756
epoch 3, loss 0.0094, train acc 0.768, test acc 0.785
epoch 4, loss 0.0084, train acc 0.794, test acc 0.809
epoch 5, loss 0.0077, train acc 0.815, test acc 0.822
epoch 6, loss 0.0072, train acc 0.830, test acc 0.827
epoch 7, loss 0.0068, train acc 0.839, test acc 0.844
epoch 8, loss 0.0065, train acc 0.845, test acc 0.846
epoch 9, loss 0.0063, train acc 0.853, test acc 0.850
epoch 10, loss 0.0061, train acc 0.857, test acc 0.853


In [9]:
LeNet

Sequential(
  (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (1): Sigmoid()
  (2): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (4): Sigmoid()
  (5): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (6): Flatten(start_dim=1, end_dim=-1)
  (7): Linear(in_features=400, out_features=120, bias=True)
  (8): Sigmoid()
  (9): Linear(in_features=120, out_features=84, bias=True)
  (10): Sigmoid()
  (11): Linear(in_features=84, out_features=10, bias=True)
)

In [11]:
print(LeNet[0].weight.data)

tensor([[[[-0.1765,  0.6679,  0.7620,  0.1233, -0.3778],
          [-0.4203,  0.9429,  0.9815,  0.1715, -0.4026],
          [-0.5020,  0.9010,  1.1045, -0.1239, -0.7185],
          [-0.6585,  0.7684,  0.9329, -0.1525, -0.9039],
          [-0.5508,  0.6134,  0.6255, -0.1812, -1.1147]]],


        [[[-0.7014,  0.1782,  0.5097, -0.1563, -0.4662],
          [-0.2249,  1.2198,  1.4461,  0.0456, -0.7614],
          [-0.3935,  1.4770,  1.8339,  0.3682, -0.4790],
          [-0.8663,  0.5880,  1.0173, -0.0251, -0.7535],
          [-0.9659,  0.3668,  0.5634,  0.0945, -0.8074]]],


        [[[ 0.1466,  0.3485,  0.2595,  0.5414, -0.0305],
          [ 0.9193,  2.4310,  2.3361,  1.2598,  0.2747],
          [ 0.7885,  2.7417,  2.8556,  1.3662,  0.2222],
          [ 0.1595,  1.4333,  1.9664,  0.9787, -0.0576],
          [-0.6857,  0.1164,  0.5180,  0.1032, -0.0194]]],


        [[[ 0.0046, -0.4949, -0.5754, -0.1067,  0.2487],
          [ 0.1352, -0.4158, -0.5282, -0.2789,  0.3486],
          [-0.0577,