<a href="https://colab.research.google.com/github/vvvu/potential-chainsaw/blob/main/pytorch-tutorial/%5BBasics%5D_Feedforward_Neural_Network.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

In [13]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
'''
torch.device代表将torch.Tensor分配到的设备的对象。
torch.device包含一个设备类型（‘cpu’或‘cuda’）和可选的设备序号。
'''
# Hyper Parameters
input_size = 28 * 28
hidden_size = 500
num_classes = 10
num_epochs = 5
batch_size = 100
learning_rate = 0.001

# MNIST dataset
train_dataset = torchvision.datasets.MNIST(root = './data',
                                           train = True,
                                           transform = transforms.ToTensor(),
                                           download = True)

test_dataset = torchvision.datasets.MNIST(root = './data',
                                          train = False,
                                          transform = transforms.ToTensor())

# Data Loader
train_loader = torch.utils.data.DataLoader(dataset = train_dataset,
                                           batch_size = batch_size,
                                           shuffle = True)

test_loader = torch.utils.data.DataLoader(dataset = test_dataset,
                                          batch_size = batch_size,
                                          shuffle = False)

In [23]:
# Fully connected neural network with [one hidden layer]
class NeuralNet(nn.Module):
  def __init__(self, input_size, hidden_size, num_classes):
    super(NeuralNet, self).__init__()
    self.fc1 = nn.Linear(input_size, hidden_size) 
    '''
    - Fully Connceted Layer 1
    - Input Size = 784 => Hidden Size = 500
    '''
    self.relu = nn.ReLU()
    self.fc2 = nn.Linear(hidden_size, num_classes)
    '''
    - Fully Connected Layer 2
    - Hidden Size = 500 => num_classes = 10
    '''

  def forward(self, x):
    '''
    784 => 500 => 10
    '''
    out = self.fc1(x)
    out = self.relu(out)
    out = self.fc2(out)
    return out

In [24]:
model = NeuralNet(input_size, hidden_size, num_classes).to(device)
'''
You can use the tensor.to(device) command to move a tensor to a device.
The .to() command is also used to move a whole model to a device, like in the 
  post you linked to.
- device在前面我们定义过为：如果有GPU则使用CUDA否则使用CPU
- 这里.to()即让我们将对应的Tensor安置到前面我们定义的Device中
'''

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

# Train the model
total_step = len(train_loader)
for epoch in range(num_epochs):
  for i, (images, labels) in enumerate(train_loader):
    # Move tensors to the configured device
    images = images.reshape(-1, 28 * 28).to(device)
    labels = labels.to(device)

    # Forward pass
    outputs = model(images)
    loss = criterion(outputs, labels)

    # Backward and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (i + 1) % 100 == 0:
      print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
              .format(epoch + 1, num_epochs, i + 1, total_step, loss.item()))

Epoch [1/5], Step [100/600], Loss: 0.2655
Epoch [1/5], Step [200/600], Loss: 0.4096
Epoch [1/5], Step [300/600], Loss: 0.2071
Epoch [1/5], Step [400/600], Loss: 0.1275
Epoch [1/5], Step [500/600], Loss: 0.0881
Epoch [1/5], Step [600/600], Loss: 0.1573
Epoch [2/5], Step [100/600], Loss: 0.0623
Epoch [2/5], Step [200/600], Loss: 0.1275
Epoch [2/5], Step [300/600], Loss: 0.1059
Epoch [2/5], Step [400/600], Loss: 0.2059
Epoch [2/5], Step [500/600], Loss: 0.1824
Epoch [2/5], Step [600/600], Loss: 0.0598
Epoch [3/5], Step [100/600], Loss: 0.0684
Epoch [3/5], Step [200/600], Loss: 0.0501
Epoch [3/5], Step [300/600], Loss: 0.0612
Epoch [3/5], Step [400/600], Loss: 0.0552
Epoch [3/5], Step [500/600], Loss: 0.1007
Epoch [3/5], Step [600/600], Loss: 0.1817
Epoch [4/5], Step [100/600], Loss: 0.0795
Epoch [4/5], Step [200/600], Loss: 0.0616
Epoch [4/5], Step [300/600], Loss: 0.0750
Epoch [4/5], Step [400/600], Loss: 0.0241
Epoch [4/5], Step [500/600], Loss: 0.0105
Epoch [4/5], Step [600/600], Loss:

In [26]:
# Test the model
# In test phase, we don't need to compute gradients (for memory efficiency)
with torch.no_grad():
  correct = 0
  total = 0
  for images, labels in test_loader:
    images = images.reshape(-1, 28 * 28).to(device)
    labels = labels.to(device)
    outputs = model(images)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()

  print("Accuracy of the network on the 10000 test images: {} %".format(100 * correct / total))

# Save the model checkpoint
torch.save(model.state_dict(), 'model.ckpt')

Accuracy of the network on the 10000 test images: 97.74 %
