In [2]:
# 安装 numpy
!pip install numpy
# 安装 matplotlib
!pip install matplotlib
# 安装 PyTorch 和 torchvision
!pip install torch torchvision

Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Collecting torch
  Downloading torch-2.3.0-cp311-cp311-win_amd64.whl.metadata (26 kB)
Collecting torchvision
  Downloading torchvision-0.18.0-cp311-cp311-win_amd64.whl.metadata (6.6 kB)
Collecting mkl<=2021.4.0,>=2021.1.1 (from torch)
  Using cached mkl-2021.4.0-py2.py3-none-win_amd64.whl.metadata (1.4 kB)
Collecting intel-openmp==2021.* (from mkl<=2021.4.0,>=2021.1.1->torch)
  Using cached intel_openmp-2021.4.0-py2.py3-none-win_amd64.whl.metadata (1.2 kB)
Collecting tbb==2021.* (from mkl<=2021.4.0,>=2021.1.1->torch)
  Using cached tbb-2021.12.0-py3-none-win_amd64.whl.metadata (1.1 kB)
Downloading torch-2.3.0-cp311-cp311-win_amd64.whl (159.8 MB)
   ---------------------------------------- 0.0/159.8 MB ? eta -:--:--
   --------------------------------------



In [4]:
import torch
from torch import nn
from torch import optim
from torchvision import datasets, transforms
from torch.utils.data import random_split, DataLoader

ModuleNotFoundError: No module named 'torch'

In [34]:
# datasets: train and validation
train_data = datasets.MNIST('data', train = True, download = True, transform = transforms.ToTensor())
train, val = random_split(train_data, [55000, 5000]) #splits into training and validation datasets
train_loader = DataLoader(train, batch_size = 32)
val_loader = DataLoader(val, batch_size = 32)

In [35]:
# model
model = nn.Sequential(
    nn.Linear(28*28, 64),
    nn.ReLU(),
    nn.Linear(64,64),
    nn.ReLU(),
    nn.Dropout(0.1), #if overfitting
    nn.Linear(64,10)
)

In [36]:
# more flexible model with residual
class ResNet(nn.Module):
  def __init__(self):
    super().__init__()
    self.l1 = nn.Linear(28*28,64)
    self.l2 = nn.Linear(64,64)
    self.l3 = nn.Linear(64,10)
    self.do = nn.Dropout(0.1)
  
  def forward(self, x):
    h1 = nn.functional.relu(self.l1(x))
    h2 = nn.functional.relu(self.l2(h1))
    do = self.do(h2+h1)
    logits = self.l3(do)
    return logits

model = ResNet().cuda()

In [37]:
# optimizer
params = model.parameters()
optimizer = optim.SGD(params, lr = 1e-2)

In [38]:
# loss
loss_func = nn.CrossEntropyLoss()  

In [39]:
# training and validation loops
n_epochs = 5
for epoch in range(n_epochs):
  losses = list()
  accuracies = list()
  model.train()
  for batch in train_loader:
    x, y = batch #image and the label
    
    # reshaping x: b x 1 x 28 x 28
    b = x.size(0) #number of rows
    x = x.view(b, -1).cuda() 

    #forward
    pred = model(x) #l:prediction

    # compute the objective function
    loss = loss_func(pred, y.cuda())

    # cleaning the gradients
    model.zero_grad()
    # optimizer.zero_grad()
    # params.grad._zero()

    # accumulate the partical derivatives of J
    loss.backward()
    # params.grad._sum(dJ/dparams)

    # step in the opposite direction of the gradient
    optimizer.step()
    # with torch.no_grad(): params = params-lr*params.grad

    losses.append(loss.item())
    accuracies.append(y.eq(pred.detach().argmax(dim=1).cpu()).float().mean())

  print(f'Epoch {epoch +1}', end= ',')
  print(f'training loss: {torch.tensor(losses).mean():.2f}', end=',')
  print(f'training accuracy: {torch.tensor(accuracies).mean():.2f}')
  # print(f'Epoch {epoch +1}, train loss: {torch.tensor(losses).mean():.2f}')
    
  losses = list()
  accuracies = list()
  model.eval()

  for batch in val_loader:
    x, y = batch #image and the label
    
    # reshaping x: b x 1 x 28 x 28
    b = x.size(0) #number of rows
    x = x.view(b, -1).cuda() 

    #forward
    with torch.no_grad():
      pred = model(x) #l:prediction


    # compute the objective function
    loss = loss_func(pred, y.cuda())

    losses.append(loss.item())
    accuracies.append(y.eq(pred.detach().argmax(dim=1).cpu()).float().mean())
  
  # print(f'Epoch {epoch +1}, validation loss: {torch.tensor(losses).mean():.2f}')
  print(f'Epoch {epoch +1}', end= ',')
  print(f'validation loss: {torch.tensor(losses).mean():.2f}', end=',')
  print(f'validation accuracy: {torch.tensor(accuracies).mean():.2f}')

Epoch 1,training loss: 0.84,training accuracy: 0.78
Epoch 1,validation loss: 0.38,validation accuracy: 0.90
Epoch 2,training loss: 0.38,training accuracy: 0.89
Epoch 2,validation loss: 0.30,validation accuracy: 0.91
Epoch 3,training loss: 0.31,training accuracy: 0.91
Epoch 3,validation loss: 0.26,validation accuracy: 0.92
Epoch 4,training loss: 0.27,training accuracy: 0.92
Epoch 4,validation loss: 0.23,validation accuracy: 0.93
Epoch 5,training loss: 0.24,training accuracy: 0.93
Epoch 5,validation loss: 0.21,validation accuracy: 0.94
