In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from tqdm import trange, tqdm

In [None]:
# Set the device to use for training
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

In [None]:
# Define the FCN model
class FCN(nn.Module):
    def __init__(self, num_block=1, input_dim = 3*32*32):
        super(FCN, self).__init__()
        """
        implement code here
        """
        self.feature = nn.Linear(input_dim, 512)
        
        self.block = []
        for i in range(num_block):
            self.block += [nn.Linear(512, 512), nn.BatchNorm1d(512), nn.ReLU(True)]
        self.block = nn.Sequential(*self.block)
        
        self.classifier = nn.Linear(512,10)
        
    def forward(self, x):
        """
        implement code here
        """
        batch_size = x.shape[0]
        x = x.reshape(batch_size, -1)
        x = self.feature(x)
        x = self.block(x)
        outputs = self.classifier(x)
        return outputs

In [None]:
# Define the CNN model
class CNN(nn.Module):
    def __init__(self, num_block=1):
        super(CNN, self).__init__()
        """
        implement code here
        """
        cfg = [3, 32, 64, 128]
        self.block = []
        for i in range(num_block):
            self.block += [nn.Conv2d(cfg[i], cfg[i+1], 3, padding=1), nn.BatchNorm2d(cfg[i+1]), nn.ReLU(True), nn.MaxPool2d(2)]
        self.block = nn.Sequential(*self.block)
        
        self.classifier = nn.Sequential(
            nn.Linear(int(cfg[num_block] *(32/(2**num_block)) *(32/(2**num_block))), 128),
            nn.ReLU(True),
            nn.Linear(128, 10),
        )
        
    def forward(self, x):
        """
        implement code here
        """
        x = self.block(x)
        
        batch_size = x.shape[0]
        x = x.reshape(batch_size, -1)
        outputs = self.classifier(x)
        
        return outputs

In [None]:
# Set the hyperparameters
"""You can change those values"""

# configurate
model = [FCN, CNN]
num_block = [1,2,3]
opts = [optim.SGD, optim.Adam]
learning_rate = [0.01, 0.001] 
batch_size = [128, 512]

num_epochs = 100 # fix
weight_decay = 0 # fix


configs = [] # List[dict]
for m in model:
    for n in num_block:
        for opt in opts:
            for lr in learning_rate:
                for batch in batch_size:
                    configs.append( {'model':m, 'num_block': n, 'optim': opt, 'lr': lr, 'batch': batch})
print(len(configs))

def logging(config, epoch, message):
    assert list(config.keys()) == ['model', 'num_block', 'optim', 'lr', 'batch']
    # model:num_block:optim:lr:batch:epoch:message
    
    m = ''
    if config['model'] == FCN:
        m += "FCN:"
    else:
        m += "CNN:"
    
    m += f"{config['num_block']}:"
        
    if config['optim'] == optim.SGD:
        m += "SGD:"
    else:
        m += "Adam:"
    
    m += f"{config['lr']}:{config['batch']}:{epoch}:{message}"
                
    print(m)
    return m

In [None]:
# CIFAR preprocessing 
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

# Load the CIFAR-10 dataset
train_dataset = datasets.CIFAR10(root='./data', train=True, transform=transform_train, download=True)
test_dataset = datasets.CIFAR10(root='./data', train=False, transform=transform_test, download=True)


In [None]:
import time
fout = open(f'./log.txt-{int(time.time())}', 'wt')

for config in configs:

    # Initialize the model and optimizer
    model = config['model'](config['num_block']).to(device)
    ## for multi-gpu
    '''
    import os
    os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"]="0,1,2,3"

    model = config['model'](config['num_block'])
    model = nn.DataParallel(model).to(device)
    '''

    print(model)

    """You can change the optimizer"""
    optimizer = config['optim'](model.parameters(), lr=config['lr'], weight_decay=weight_decay) 
    num_params = sum(p.numel() for p in model.parameters())
    fout.write(logging(config, 0, f'parameter:{num_params}') + "\n")
    
    
    # Define the loss function
    criterion = nn.CrossEntropyLoss()

    # Create the data loaders
    train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=config['batch'], shuffle=True)
    test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=config['batch'], shuffle=False)

    train_losses = []
    test_accs = []
    for epoch in trange(num_epochs):
        # Training
        epoch_loss = 0.0
        for images, labels in train_loader:
            # Move the images and labels to the device
            images = images.to(device)
            labels = labels.to(device)

            # Zero the gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()

        # Evaluation
        if (epoch + 1) % 10 == 0:
            num_correct = 0
            num_total = 0
            for images, labels in test_loader:
                # Move the images and labels to the device
                images = images.to(device)
                labels = labels.to(device)

                outputs = model(images)
                num_correct += torch.sum(torch.argmax(outputs, dim=-1) == labels)
                num_total += len(labels)
            test_accs.append(100 * num_correct / num_total)
            fout.write(logging(config, epoch+1, f'test-acc:{100 * num_correct / num_total:.2f}') + "\n")
                    
        # Print statistics
        train_losses.append(epoch_loss / len(train_loader))
        fout.write(logging(config, epoch+1, f'train-loss:{epoch_loss / len(train_loader):.4f}') + "\n")

    print('Finished Training')