# 1.Setup

In [30]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [1]:
# utils
import matplotlib.pyplot as plt
# torch
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
# torchvision
import torchvision
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda, Compose

print(torch.cuda.is_available())
print(torch.__version__)

False
1.9.1


# 2.数据、模型全局变量

In [25]:
# 超参数
learning_rate = 1e-3
batch_size = 64
epochs = 5

# get cpu or gpu device for training
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

Using cpu device


# 3.数据下载、加载

## 3.1 数据下载

In [18]:
training_data = datasets.FashionMNIST(
    root = "~/Datasets/FashionMNIST",
    train = True,
    download = False,
    transform = ToTensor(),
)
test_data = datasets.FashionMNIST(
    root = "~/Datasets/FashionMNIST",
    train = False,
    download = True,
    transform = ToTensor(),
)

## 3.2 数据加载--创建 data loader

In [19]:
train_dataloader = DataLoader(
    training_data, 
    batch_size = batch_size, 
    shuffle = True
)#, num_workers = args.nThreads)
test_dataloader = DataLoader(
    test_data, 
    batch_size = batch_size, 
    shuffle = False
)#, num_workers = args.nThreads)

In [21]:
for X_train, y_train in train_dataloader:
    print(f"Shape of X_train [N, C, H, W]: {X_train.shape}")
    print(f"Shape of y_train: {y_train.shape}, {y_train.dtype}")
    break

Shape of X_train [N, C, H, W]: torch.Size([64, 1, 28, 28])
Shape of y_train: torch.Size([64]), torch.int64


# 4.模型构建

## 4.1 模型构建

In [35]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(X_train.shape[2] * X_train.shape[3], 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )
    
    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        
        return logits

In [36]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


## 4.2 模型训练

In [44]:
# 损失函数
loss_fn = nn.CrossEntropyLoss()

# 优化算法
optimizer =  optim.SGD(model.parameters(), lr = learning_rate)


def train(dataloader, model, loss_fn, optimizer):
    """
    In a single training loop, the model makes predictions 
    on the training dataset (fed to it in batches), 
    and backpropagates the prediction error to adjust 
    the model’s parameters.
    """
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)
        # 计算预测误差
        pred = model(X)
        loss = loss_fn(pred, y)
        # 后向传播
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")


def test(dataloader, model, loss_fn):
    """
    check the model’s performance against 
    the test dataset to ensure it is learning
    """
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            # 将数据移动到设备上
            X, y = X.to(device), y.to(device)
            # 计算累计测试误差
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: Accuracy: {(100 * correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [45]:
for t in range(epochs):
    print(f"Epoch {t + 1} ---------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1 ---------------------------
loss: 2.307063 [    0/60000]
loss: 2.283336 [ 6400/60000]
loss: 2.277623 [12800/60000]
loss: 2.270706 [19200/60000]
loss: 2.242836 [25600/60000]
loss: 2.227749 [32000/60000]
loss: 2.226338 [38400/60000]
loss: 2.223214 [44800/60000]
loss: 2.196446 [51200/60000]
loss: 2.187925 [57600/60000]
Test Error: Accuracy: 47.4%, Avg loss: 2.155148 

Epoch 2 ---------------------------
loss: 2.144556 [    0/60000]
loss: 2.127807 [ 6400/60000]
loss: 2.104260 [12800/60000]
loss: 2.061341 [19200/60000]
loss: 2.081207 [25600/60000]
loss: 2.046627 [32000/60000]
loss: 2.015819 [38400/60000]
loss: 1.973440 [44800/60000]
loss: 1.945907 [51200/60000]
loss: 1.937383 [57600/60000]
Test Error: Accuracy: 57.2%, Avg loss: 1.889818 

Epoch 3 ---------------------------
loss: 1.885061 [    0/60000]
loss: 1.821127 [ 6400/60000]
loss: 1.811453 [12800/60000]
loss: 1.711759 [19200/60000]
loss: 1.652731 [25600/60000]
loss: 1.650425 [32000/60000]
loss: 1.621689 [38400/60000]
loss: 1.6

## 4.3 模型保存

In [48]:
torch.save(model.state_dict(), "./model/model.pth")
print("Saved PyTorch Model State to model.pth.")

Saved PyTorch Model State to model.pth.


# 5.模型重载、预测

## 5.1 模型重载

In [49]:
model = NeuralNetwork()
model.load_state_dict(torch.load("./model/model.pth"))

<All keys matched successfully>

## 5.2 模型预测

In [59]:
classes = [
    "T-shirt/top",
    "Trouser",
    "Pullover",
    "Dress",
    "Coat",
    "Sandal",
    "Shirt",
    "Sneaker",
    "Bag",
    "Ankle boot",
]
model.eval()
X, y = test_data[0][0], test_data[0][1]
with torch.no_grad():
    pred = model(X)
    print(pred)
    print(pred[0].argmax(0))
    predicted, actual = classes[pred[0].argmax(0)], classes[y]
    print(f"Predicted: '{predicted}', Actual: '{actual}'")

tensor([[-2.0782, -2.5635, -1.0011, -1.9565, -1.0447,  2.4286, -1.1340,  2.7147,
          1.7693,  2.9568]])
tensor(9)
Predicted: 'Ankle boot', Actual: 'Ankle boot'


In [72]:
b = torch.randn(10, 1)
print(b)
print(b.size())
print(b.argmax(0))
print(b.argmax(1))

tensor([[ 0.9922],
        [-0.4266],
        [-1.1973],
        [ 0.1428],
        [-0.9913],
        [ 1.0354],
        [ 0.3422],
        [ 1.0472],
        [ 1.7108],
        [ 0.7771]])
torch.Size([10, 1])
tensor([8])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])


In [73]:
b = torch.randn(1, 10)
print(b)
print(b.size())
print(b.argmax(0))
print(b.argmax(1))

tensor([[ 0.5218, -0.4220,  0.0992, -1.2034,  0.7426, -0.8680, -1.5766,  0.3354,
         -1.5546, -0.0171]])
torch.Size([1, 10])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
tensor([4])
