## 1. Import libraries

In [1]:
import numpy as np
import pandas as pd
import random
from tqdm import tqdm

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset, random_split
from torchvision.models import resnet34
from torchvision.transforms import v2

## 2. Set hyperparameters

In [2]:
batch_size = 1024
learning_rate = 1e-3
num_epochs = 1

In [3]:
# Set seed
def set_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)


set_seed(42)

## 3. Data preprocessing

In [4]:
# Read csv
train_data = pd.read_csv('../data/train.csv')
test_data = pd.read_csv('../data//test.csv')
submit = pd.read_csv('../data//sample_submission.csv')

In [5]:
# Split train and validation data
train = np.array(train_data)
train_size = int(len(train) * 0.9)
valid_size = len(train) - train_size

train_dataset, valid_dataset = random_split(train, (train_size, valid_size))

In [6]:
# Split data and label
train_dataset = np.array(train_dataset)
valid_dataset = np.array(valid_dataset)
x_train = train_dataset[:, 1:]
x_valid = valid_dataset[:, 1:]
y_train = train_dataset[:, 0]
y_valid = valid_dataset[:, 0]
x_test = np.array(test_data)

# Convert dtype to tensor and reshape data
x_train = torch.tensor(x_train, dtype=torch.float)
x_train = x_train.view(-1, 1, 28, 28)
x_valid = torch.tensor(x_valid, dtype=torch.float)
x_valid = x_valid.view(-1, 1, 28, 28)
x_test = torch.tensor(x_test, dtype=torch.float)
x_test = x_test.view(-1, 1, 28, 28)

y_train = torch.tensor(y_train)
y_valid = torch.tensor(y_valid)
y_test = torch.randn(len(x_test), 10)

# Transforms
train_transforms = v2.Compose(
    [
        v2.Resize(32, antialias=True),
        v2.RandomRotation(15),
        v2.ToDtype(torch.float32, scale=True),
        v2.Normalize(mean=[0.1307], std=[0.3081]),
    ]
)
test_transforms = v2.Compose(
    [
        v2.Resize(32, antialias=True),
        v2.ToDtype(torch.float32, scale=True),
        v2.Normalize(mean=[0.1307], std=[0.3081]),
    ]
)

In [7]:
# Define dataset
class MNIST(Dataset):
    def __init__(self, x_data, y_data, transforms):
        self.x_data = x_data
        self.y_data = y_data
        self.transforms = transforms

    def __len__(self):
        return len(self.x_data)

    def __getitem__(self, idx):
        img = self.x_data[idx]
        transformed_img = self.transforms(img)
        label = self.y_data[idx]
        return transformed_img, label

In [8]:
# Load data
train_dataset = MNIST(x_train, y_train, train_transforms)
valid_dataset = MNIST(x_valid, y_valid, test_transforms)
test_dataset = MNIST(x_test, y_test, test_transforms)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

## 4. Define the model

In [9]:
# Define CNN
class CNN(nn.Module):
    def __init__(self, num_classes):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(16, 30, kernel_size=3, stride=1, padding=1)
        self.pool = nn.AvgPool2d(2, 2)
        self.relu = nn.ReLU()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(480, 84)
        self.fc2 = nn.Linear(84, num_classes)

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu(x)
        x = self.pool(x)
        x = self.conv2(x)
        x = self.relu(x)
        x = self.pool(x)
        x = self.conv3(x)
        x = self.relu(x)
        x = self.pool(x)
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

In [10]:
# Can use cuda if you have gpu
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Load model, optimizer, loss function, learning rate scheduler
model = CNN(num_classes=10).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = torch.nn.CrossEntropyLoss().to(device)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)

## 5. Train and validation

In [11]:
# Train
best_epoch = 1
best_acc = 0.0
for epoch in range(num_epochs):
    print("" * 40)
    print(f"Epoch : {epoch+1}/{num_epochs}")
    epoch_loss = 0.0
    epoch_corrects = 0
    model.train()
    for batch_in, batch_out in tqdm(train_loader):
        batch_in = batch_in.to(device)
        batch_out = batch_out.to(device)

        y_pred = model(batch_in)
        _, preds = torch.max(y_pred, 1)

        loss = criterion(y_pred, batch_out)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item() * batch_in.size(0)
        epoch_corrects += torch.sum(preds == batch_out.data)

    epoch_loss = epoch_loss / len(train_loader.dataset)
    epoch_acc = epoch_corrects.double() / len(train_loader.dataset)

    print(f"Train loss : {epoch_loss:.4f}")
    print(f"Train acc : {epoch_acc:.4f}")

# Validation
    epoch_loss = 0.0
    epoch_corrects = 0
    model.eval()
    for batch_in, batch_out in tqdm(valid_loader):
        batch_in = batch_in.to(device)
        batch_out = batch_out.to(device)

        with torch.no_grad():
            y_pred = model(batch_in)
            _, preds = torch.max(y_pred, 1)

            loss = criterion(y_pred, batch_out)

            epoch_loss += loss.item() * batch_in.size(0)
            epoch_corrects += torch.sum(preds == batch_out.data)

    epoch_loss = epoch_loss / len(valid_loader.dataset)
    epoch_acc = epoch_corrects.double() / len(valid_loader.dataset)

    scheduler.step()

    if epoch_acc >= best_acc:
        best_acc = epoch_acc
        best_epoch = epoch + 1
        torch.save(model, "../checkpoints/model.pt")

    print(f"Valid loss : {epoch_loss:.4f}")
    print(f"Valid acc : {epoch_acc:.4f}")
    print(f"Best epoch : {best_epoch}")
    print(f"Best acc : {best_acc:.4f}")
    print("" * 40)

print("Finished!")


Epoch : 1/1


100%|██████████| 37/37 [00:12<00:00,  3.04it/s]


Train loss : 1.0770
Train acc : 0.6809


100%|██████████| 5/5 [00:00<00:00, 15.18it/s]

Valid loss : 0.2911
Valid acc : 0.9105
Best epoch : 1
Best acc : 0.9105

Finished!





## 6. Result

In [12]:
# Get a result
model = torch.load("../checkpoints/model.pt")
model.to(device)

preds = []
with torch.no_grad():
    model.eval()
    for batch_in, batch_out in tqdm(test_loader):
        batch_in = batch_in.to(device)
        y_pred = model(batch_in)
        y_pred = torch.argmax(y_pred, 1)
        preds.extend(y_pred.cpu().numpy())
        
submit["Label"] = preds
submit.to_csv("../predicts/predict.csv", index=False)

100%|██████████| 28/28 [00:02<00:00, 12.52it/s]
