In [30]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader

from torchvision import datasets
from torchvision import transforms

In [31]:
# Config
batch_size = 64
epochs = 10
learning_rate = 1e-3

In [32]:
# Datasets
train_dataset = datasets.FashionMNIST(
    root="data",
    train=True,
    transform=transforms.ToTensor(),
    download=True
)

test_dataset = datasets.FashionMNIST(
    root="data",
    train=False,
    transform=transforms.ToTensor(),
    download=True
)

In [33]:
# DataLoaders
train_dataloader = DataLoader(
    dataset=train_dataset,
    batch_size=batch_size,
    shuffle=True
)

test_dataloader = DataLoader(
    dataset=test_dataset,
    batch_size=batch_size,
    shuffle=False
)

In [34]:
for x, y in test_dataloader:
    print(x.shape)
    print(y.shape)
    break

torch.Size([64, 1, 28, 28])
torch.Size([64])


In [35]:
# ConvNet
class ConvNet(nn.Module):
    def __init__(self):
        super().__init__()

        self.feature_extractor = nn.Sequential(
            nn.Conv2d(
                in_channels=1,
                out_channels=64,
                kernel_size=3,
                stride=1,
                padding=1
            ),
            nn.ReLU(),
            nn.MaxPool2d(
                kernel_size=2,
                stride=2
            ),
            nn.Conv2d(
                in_channels=64,
                out_channels=128,
                kernel_size=3,
                stride=1,
                padding=1
            ),
            nn.ReLU(),
            nn.MaxPool2d(
                kernel_size=2,
                stride=2
            ),
            nn.Conv2d(
                in_channels=128,
                out_channels=256,
                kernel_size=3,
                stride=1,
                padding=1
            ),
            nn.ReLU(),
            nn.MaxPool2d(
                kernel_size=7,
                stride=1
            )
        )

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(
                in_features=256,
                out_features=128
            ),
            nn.ReLU(),
            nn.Linear(
                in_features=128,
                out_features=10
            )
        )

    def forward(self, x):
        z = self.feature_extractor(x)
        return self.classifier(z)

In [36]:
convnet = ConvNet()
print(convnet)

ConvNet(
  (feature_extractor): Sequential(
    (0): Conv2d(1, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): MaxPool2d(kernel_size=7, stride=1, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=64, out_features=32, bias=True)
    (2): ReLU()
    (3): Linear(in_features=32, out_features=10, bias=True)
  )
)


In [37]:
# MLP
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.mlp = nn.Sequential(
            nn.Flatten(),
            nn.Linear(
                in_features=784,
                out_features=512
            ),
            nn.ReLU(),
            nn.Linear(
                in_features=512,
                out_features=256
            ),
            nn.ReLU(),
            nn.Linear(
                in_features=256,
                out_features=10
            )
        )

    def forward(self, x):
        return self.mlp(x)

In [38]:
mlp = MLP()
print(mlp)

MLP(
  (mlp): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=784, out_features=256, bias=True)
    (2): ReLU()
    (3): Linear(in_features=256, out_features=64, bias=True)
    (4): ReLU()
    (5): Linear(in_features=64, out_features=10, bias=True)
  )
)


In [39]:
criterion = nn.CrossEntropyLoss()

convnet_optimizer = torch.optim.SGD(convnet.parameters(), lr=learning_rate)
mlp_optimizer = torch.optim.SGD(mlp.parameters(), lr=learning_rate)

In [40]:
device = "cuda" if torch.cuda.is_available() else "cpu"

mlp = mlp.to(device)
convnet = convnet.to(device)

In [41]:
# train
def train(dataloader, model, criterion, optimizer):
    size = len(dataloader.dataset) # 데이터셋 내 샘플 수

    model.train() # "학습 모드"로 설정

    for batch, (inputs, labels) in enumerate(dataloader):
        inputs, labels = inputs.to(device), labels.to(device) # 데이터를 device로 보낸다.
    
        outputs = model(inputs) # forward 연산

        loss = criterion(outputs, labels) # 손실 계산

        optimizer.zero_grad() # 기울기 초기화
        loss.backward() # 기울기 계산
        optimizer.step() # 모델 파라미터 업데이트

        if batch % 1000 == 0:
            loss, current = loss.item(), batch * len(inputs)
            print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")

In [42]:
def eval(dataloader, model, criterion):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval() # "검증 모드"로 설정
    eval_loss, correct = 0, 0
    with torch.no_grad(): # 기울기를 계산하지 않도록 설정
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            eval_loss += criterion(outputs, labels).item()
            correct += (outputs.argmax(1) == labels).type(torch.float).sum().item()
    eval_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100 * correct):>0.1f}%, Avg Loss: {eval_loss:>8f} \n")

In [43]:
for epoch in range(epochs):
    print(f"Epoch {epoch + 1}\n--------------------")

    print(f"Model : MLP ==============================")
    train(train_dataloader, mlp, criterion, mlp_optimizer)
    eval(test_dataloader, mlp, criterion)

    print(f"Model : CNN ==============================")
    train(train_dataloader, convnet, criterion, convnet_optimizer)
    eval(test_dataloader, convnet, criterion)

print("Done!")

Epoch 1
--------------------
loss: 2.290663 [    0/60000]
loss: 2.268184 [ 6400/60000]
loss: 2.275130 [12800/60000]
loss: 2.277776 [19200/60000]
loss: 2.267177 [25600/60000]
loss: 2.233670 [32000/60000]
loss: 2.245631 [38400/60000]
loss: 2.227592 [44800/60000]
loss: 2.208667 [51200/60000]
loss: 2.196825 [57600/60000]
Test Error: 
 Accuracy: 27.6%, Avg Loss: 2.195574 

loss: 2.293963 [    0/60000]
loss: 2.331232 [ 6400/60000]
loss: 2.295572 [12800/60000]
loss: 2.313127 [19200/60000]
loss: 2.295026 [25600/60000]
loss: 2.298992 [32000/60000]
loss: 2.313068 [38400/60000]
loss: 2.298003 [44800/60000]
loss: 2.304269 [51200/60000]
loss: 2.298318 [57600/60000]
Test Error: 
 Accuracy: 10.2%, Avg Loss: 2.300087 

Epoch 2
--------------------
loss: 2.175826 [    0/60000]
loss: 2.161732 [ 6400/60000]
loss: 2.174685 [12800/60000]
loss: 2.139398 [19200/60000]
loss: 2.153334 [25600/60000]
loss: 2.100088 [32000/60000]
loss: 2.080714 [38400/60000]
loss: 2.052056 [44800/60000]
loss: 2.041138 [51200/6000