In [None]:
import argparse
from time import time

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils import data as D
import torch.nn.functional as F


device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)


class Model(nn.Module):
    def __init__(self, filters=(6, 16), bm=False):
        super().__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, filters[0], 5),
            nn.BatchNorm2d(filters[0]) if bm else nn.Identity()
        )
        self.pool = nn.MaxPool2d(2, 2)
        # self.conv2 = nn.Sequential(
        #     nn.Conv2d(filters[0], filters[1], 5),
        #     nn.BatchNorm2d(filters[1]) if bm else nn.Identity()
        # )

        self.fc1 = nn.Linear(filters[0] * 14 * 14, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        self.loss = nn.CrossEntropyLoss()

    def forward(self, x, y=None):
        x = self.pool(F.relu(self.conv1(x)))
        # x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        logits = self.fc3(x)
        pred = torch.argmax(logits, 1)  # Calculate the prediction result
        if y is None:
            return pred
        loss = self.loss(logits, y)
        correct_pred = pred.int() == y.int()
        acc = torch.mean(
            correct_pred.float()
        )  # Calculate the accuracy in this mini-batch

        return loss, acc



def load_data(batch_size: int):

    transform = transforms.Compose(
        [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
    )

    trainset = torchvision.datasets.CIFAR10(
        root="./data", train=True, download=True, transform=transform
    )
    trainset, valset = D.random_split(
        trainset, [40000, 10000], generator=torch.Generator().manual_seed(42)
    )
    trainloader = D.DataLoader(
        trainset, batch_size=batch_size, shuffle=True, num_workers=2
    )
    valloader = D.DataLoader(
        valset, batch_size=batch_size, shuffle=False, num_workers=2
    )
    testset = torchvision.datasets.CIFAR10(
        root="./data", train=False, download=True, transform=transform
    )
    testloader = D.DataLoader(
        testset, batch_size=batch_size, shuffle=False, num_workers=2
    )
    return trainloader, valloader, testloader


def train_epoch(
    model: Model, loader: D.DataLoader, optimizer: optim.Optimizer
):  # Training Process
    model.train()
    loss, acc = 0.0, 0.0
    length = len(loader)
    for data in loader:
        optimizer.zero_grad()
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        loss_, acc_ = model(inputs, labels)

        loss_.backward()
        optimizer.step()

        loss += loss_.cpu().data.numpy()
        acc += acc_.cpu().data.numpy()
    loss /= length
    acc /= length
    return acc, loss


def valid_epoch(model: Model, loader: D.DataLoader):  # Valid Process
    model.eval()
    loss, acc = 0.0, 0.0
    length = len(loader)
    for data in loader:
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        loss_, acc_ = model(inputs, labels)

        loss += loss_.cpu().data.numpy()
        acc += acc_.cpu().data.numpy()
    loss /= length
    acc /= length
    return acc, loss

cuda:0


In [None]:
batch_size = 128
num_epochs = 100
early_stoping = 10

lr = 1e-2
filters = (64, 128)

model = Model()
model.to(device)
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
# optimizer = optim.Adam(model.parameters(), lr=lr)
# optimizer = optim.RMSprop(model.parameters(), lr=lr)
# optimizer = optim.SGD(model.parameters(), lr=lr)

In [None]:
trainloader, valloader, testloader = load_data(batch_size)



best_val_acc = 0.0
best_epoch = test_loss = test_acc = -1
result = None
elapsed = 0
stoping = 0
print(model)
for epoch in range(num_epochs + 1):
    start_time = time()
    train_acc, train_loss = train_epoch(model, trainloader, optimizer)
    val_acc, val_loss = valid_epoch(model, valloader)

    if val_acc >= best_val_acc:
        best_val_acc = val_acc
        best_epoch = epoch
        test_acc, test_loss = valid_epoch(model, testloader)
        stoping = 0
    else:
        stoping += 1
        if stoping >= early_stoping:
            break
    # if verbose:
    elapsed = time() - start_time
    print(f"Epoch {epoch} of {num_epochs} in {elapsed:.2f} s")
    print("  training loss:                 " + str(train_loss))
    print("  training accuracy:             " + str(train_acc))
    print("  validation loss:               " + str(val_loss))
    print("  validation accuracy:           " + str(val_acc))
    print("  best epoch:                    " + str(best_epoch))
    print("  best validation accuracy:      " + str(best_val_acc))
    print("  test loss:                     " + str(test_loss))
    print("  test accuracy:                 " + str(test_acc))


Files already downloaded and verified
Files already downloaded and verified
Model(
  (conv1): Sequential(
    (0): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
    (1): Identity()
  )
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=1176, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
  (loss): CrossEntropyLoss()
)
Epoch 0 of 100 in 12.36 s
  training loss:                 1.9541550021582899
  training accuracy:             0.2877146565495208
  validation loss:               1.6499069868763792
  validation accuracy:           0.40031645569620256
  best epoch:                    0
  best validation accuracy:      0.40031645569620256
  test loss:                     1.6308818768851365
  test accuracy:                 0.4040743670886076
Epoch 1 of 100 in 12.07 s
  training loss:                 1.536738625349709
  trainin