## First Neural Network: Image Classification 

Objectives:
- Train a minimal image classifier on [MNIST](https://paperswithcode.com/dataset/mnist) using PyTorch
- Usese PyTorch and torchvision

In [1]:
# The usual imports

import torch #导包
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

In [2]:
# load the data

class ReshapeTransform:
    def __init__(self, new_size): #初始化
        self.new_size = new_size

    def __call__(self, img):
        return torch.reshape(img, self.new_size)

transformations = transforms.Compose([ #组合操作
                                transforms.ToTensor(), #将图像类型转化为张量
                                transforms.ConvertImageDtype(torch.float32), #将数据结构转变为float类型
                                ReshapeTransform((-1,))
                                ])

trainset = torchvision.datasets.MNIST(root='./data', train=True,
                                        download=True, transform=transformations) #下载训练数据集

testset = torchvision.datasets.MNIST(root='./data', train=False,
                                       download=True, transform=transformations) #下载测试数据集

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data\MNIST\raw\train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:01<00:00, 5677393.46it/s] 


Extracting ./data\MNIST\raw\train-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data\MNIST\raw\train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 2930796.81it/s]


Extracting ./data\MNIST\raw\train-labels-idx1-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data\MNIST\raw\t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:01<00:00, 1343061.05it/s]


Extracting ./data\MNIST\raw\t10k-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 2274418.43it/s]

Extracting ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw






In [3]:
# check shape of data

trainset.data.shape, testset.data.shape #显示数据集信息

(torch.Size([60000, 28, 28]), torch.Size([10000, 28, 28]))

In [4]:
# data loader

BATCH_SIZE = 128
train_dataloader = torch.utils.data.DataLoader(trainset, 
                                               batch_size=BATCH_SIZE,
                                               shuffle=True, 
                                               num_workers=0) #加载训练数据集

test_dataloader = torch.utils.data.DataLoader(testset, 
                                              batch_size=BATCH_SIZE,
                                              shuffle=False, 
                                              num_workers=0) #加载测试数据集

In [5]:
# model

model = nn.Sequential(nn.Linear(784, 512), nn.ReLU(), nn.Linear(512, 10)) #Sequential有序容器，将参数依次加入到计算图中执行

In [6]:
# training preparation

trainer = torch.optim.RMSprop(model.parameters()) #训练优化
loss = nn.CrossEntropyLoss() #交叉熵函数

In [7]:
def get_accuracy(output, target, batch_size): #获取训练回合的精度
    # Obtain accuracy for training round
    corrects = (torch.max(output, 1)[1].view(target.size()).data == target.data).sum() #正确数数据的总数
    accuracy = 100.0 * corrects/batch_size #精度
    return accuracy.item()

In [9]:
# train

for ITER in range(5):
    train_acc = 0.0 #训练数据的精度
    train_running_loss = 0.0 #训练数据的交叉熵

    model.train() #训练模型
    for i, (X, y) in enumerate(train_dataloader): #enumerate返回枚举对象
        output = model(X)
        l = loss(output, y) #计算损失

        # update the parameters
        l.backward() #通过反向传播来计算梯度
        trainer.step() #更新所有参数
        trainer.zero_grad() #梯度置为0

        # gather metrics
        train_acc += get_accuracy(output, y, BATCH_SIZE) #训练数据精度
        train_running_loss += l.detach().item() #训练数据损失和

    print('Epoch: %d | Train loss: %.4f | Train Accuracy: %.4f' \
          %(ITER+1, train_running_loss / (i+1),train_acc/(i+1)))

tensor([[ 42.7547, -38.6758,  20.5659,  ...,   8.1838,  -4.1467,  -6.8722],
        [-22.1040, -22.0396,  -9.0274,  ...,   3.5251,   9.0528,   8.0229],
        [ -7.1586,  -8.8147,  24.1262,  ..., -12.2955,   7.9594, -15.1574],
        ...,
        [ -9.9221,  -7.4083,  -5.6056,  ...,   4.5368,   2.1152,  15.6056],
        [-35.3542,  21.2294,  -6.7811,  ...,   3.7357,   1.5764,  -1.8169],
        [ -4.0417,  -2.3790,   1.1561,  ...,  -0.1152,  15.7861,   1.4021]],
       grad_fn=<AddmmBackward0>)
tensor(0.0450, grad_fn=<NllLossBackward0>)
output tensor([[ 42.7547, -38.6758,  20.5659,  ...,   8.1838,  -4.1467,  -6.8722],
        [-22.1040, -22.0396,  -9.0274,  ...,   3.5251,   9.0528,   8.0229],
        [ -7.1586,  -8.8147,  24.1262,  ..., -12.2955,   7.9594, -15.1574],
        ...,
        [ -9.9221,  -7.4083,  -5.6056,  ...,   4.5368,   2.1152,  15.6056],
        [-35.3542,  21.2294,  -6.7811,  ...,   3.7357,   1.5764,  -1.8169],
        [ -4.0417,  -2.3790,   1.1561,  ...,  -0.1152,

### Other things to try

- Evaluate on test set
- Plot loss curve
- Add more layers to the model