<a href="https://colab.research.google.com/github/vvvu/potential-chainsaw/blob/main/pytorch-tutorial/%5BIntermediate%5D_Convolutional_Neural_Network.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

In [None]:
# Device configuration
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [None]:
# Hyper Parameters
num_epochs = 5
num_classes = 10
batch_size = 100
learning_rate = 0.01

In [None]:
# MNIST dataset
train_dataset = torchvision.datasets.MNIST(root = './data',
                                           train = True,
                                           transform = transforms.ToTensor(),
                                           download = True)

test_dataset = torchvision.datasets.MNIST(root = './data',
                                          train = False,
                                          transform = transforms.ToTensor())

# Data Loader
train_loader = torch.utils.data.DataLoader(dataset = train_dataset,
                                           batch_size = batch_size,
                                           shuffle = True)

test_loader = torch.utils.data.DataLoader(dataset = test_dataset,
                                          batch_size = batch_size,
                                          shuffle = False)

In [None]:
# Convolutional Neural Network (2 convolutional layers)
class ConvNet(nn.Module):
  def __init__(self, num_classes = 10):
    super(ConvNet, self).__init__()
    '''
    nn.Conv2d():
    in_channels: 输入数据的通道数，例如RGB图片的通道数为3，这里为黑白MNIST，通道数为1
    out_channels: 输出数据的通道数，根据Model调整
    kernel_size: 卷积核大小，可以为int，多维度卷积核则为tuple
    stride: 步长，默认为1，可以为int，多维度步长则为tuple
    padding: 零填充

    nn.BatchNorm2d():
    Batch Normalization - 批标准化，与数据的普通标准化类似，是将分散的数据统一的一种做法
    也是优化神经网络的一种方法，「具有统一规格的数据能让机器学习更容易学习到数据之中的规律」
    num_features: 输入特征的数量，因为out_channels = 16,所以这里选择为16

    nn.MaxPool2d():
    nn.MaxPool2d可以提取重要信息，去掉不重要的信息，减少计算开销
    '''
    self.layer1 = nn.Sequential( # Sequential - 按照构造函数中传递的顺序添加到模块中
        nn.Conv2d(in_channels = 1, out_channels = 16, kernel_size = 5,
                  stride = 1, padding = 2),
        nn.BatchNorm2d(16),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size = 2, stride = 2)
    )
    self.layer2 = nn.Sequential(
        nn.Conv2d(in_channels = 16, out_channels = 32, kernel_size = 5,
                  stride = 1, padding = 2),
        nn.BatchNorm2d(32),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size = 2, stride = 2)
    )
    self.fc = nn.Linear(in_features = 7 * 7 * 32, out_features = num_classes)
    # fully connected layer
    # in_features = 7 * 7 * 32?

  def forward(self, x):
    out = self.layer1(x)
    out = self.layer2(out)
    out = out.reshape(out.size(0), -1) # ?
    print(out.shape)
    out = self.fc(out)
    return out

model = ConvNet(num_classes).to(device)

1. 卷积层`nn.Conv2d()`

   - 作用：提取一个局部区域的特征，不同的**卷积核**相当于不同的**特征提取器**

   - 参数`out_channels`：代表输出频道的数量。输出频道的数量是和卷积核挂钩的。即：我们有多少个卷积核，就代表我们从原始的图片中提取了多少种特征，即代表着我们有多少可以输出的频道。**值得注意的是，这里的卷积核是自动生成的，在PyTorch中有其自定义的生成规则，我们也可以在这里自定义自己希望的卷积核。但默认情况下，这里的卷积核我们无需定义，是自动生成的。**

2. 汇聚层`nn.MaxPool2d()`
   - 作用：又名子采样层`Subsampling Layer`，其作用是进行特征选择，降低特征数量，从而减少参数数量
   - 这里我们可以发现每一个卷积层的输出都有一个`nn.MaxPool2d()`层的参与，这里采用的是**Maximum Pooling**的方式，即对于一个区域，选择这个区域内所有神经元的最大活性值作为这个区域的表示。当我们选择参数`kernel_size = 2， stride = 2`时，相当于我们每两个格卷一下，则原始输入数据为`28 x 28 x 1`的图片，在经过`self.layer1()`后，频道增加到`16`，尺寸经过`nn.MaxPool2d()`降低为`14 x 14`。则得到的中间项为`14 x 14 x 16`。再经过`self.layer2()`后，频道增加到`32`，尺寸经过同样的`nn.MaxPool2d()`变化后变为`7 x 7`。则最后给到`self.fc()`层的输入为`7 x 7 x 32`

In [None]:
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

In [None]:
# Train the model
total_step = len(train_loader)
for epoch in range(num_epochs):
  for i, (images, labels) in enumerate(train_loader):
    images = images.to(device)
    labels = labels.to(device)

    # Forward pass
    outputs = model(images)
    loss = criterion(outputs, labels)

    # Backward and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (i + 1) % 100 == 0:
      print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
            .format(epoch + 1, num_epochs, i + 1, total_step, loss.item()))

Epoch [1/5], Step [100/600], Loss: 0.2597
Epoch [1/5], Step [200/600], Loss: 0.1021
Epoch [1/5], Step [300/600], Loss: 0.0352
Epoch [1/5], Step [400/600], Loss: 0.0787
Epoch [1/5], Step [500/600], Loss: 0.0169
Epoch [1/5], Step [600/600], Loss: 0.0419
Epoch [2/5], Step [100/600], Loss: 0.1031
Epoch [2/5], Step [200/600], Loss: 0.0295
Epoch [2/5], Step [300/600], Loss: 0.0538
Epoch [2/5], Step [400/600], Loss: 0.1662
Epoch [2/5], Step [500/600], Loss: 0.0266
Epoch [2/5], Step [600/600], Loss: 0.1016
Epoch [3/5], Step [100/600], Loss: 0.0187
Epoch [3/5], Step [200/600], Loss: 0.0428
Epoch [3/5], Step [300/600], Loss: 0.0092
Epoch [3/5], Step [400/600], Loss: 0.0724
Epoch [3/5], Step [500/600], Loss: 0.0098
Epoch [3/5], Step [600/600], Loss: 0.0029
Epoch [4/5], Step [100/600], Loss: 0.0336
Epoch [4/5], Step [200/600], Loss: 0.0160
Epoch [4/5], Step [300/600], Loss: 0.0178
Epoch [4/5], Step [400/600], Loss: 0.0178
Epoch [4/5], Step [500/600], Loss: 0.0306
Epoch [4/5], Step [600/600], Loss:

In [None]:
# Test the model
model.eval()
'''
eval mode
- batch norm uses moving mean/variance instead of mini-batch mean/variance

- eval() sets the module in evaluation mode. [This has any effect only on certain
modules. They will (1)make normalization layers use running statistics 
(2) deactivates Dropout layers]

- The difference between `model.eval()` and `with torch.no_grad()`
I. `model.eval()` will notify all your layers that you are in eval mode, that way,
[batchnorm or dropout] will work in eval mode instead of training mode.
II. `torch.no_grad()` impacts the autograd engine and deactivate it. It will reduce memory
usage and speed up computations but you won't be able to BP(backprop) [which you
don't want in an eval script]
'''
with torch.no_grad():
  correct = 0
  total = 0
  for images, labels in test_loader:
    images = images.to(device)
    labels = labels.to(device)
    outputs = model(images)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()

  print('Test Accuracy of the model on the 10000 test images: {}%'.format(100 * correct / total))

# Save the model checkpoint
torch.save(model.state_dict(), 'model.ckpt')

Test Accuracy of the model on the 10000 test images: 99.11%
