In [5]:
# ========= hyper parameters =========
num_epochs = 150
batch_size = 64
num_workers = 2

# choose path, dataset, model
run_on_local = 0 # 1 for True, 0 for False
dataset1 = 'CIFAR10' # choose between 'MNIST' or 'CIFAR10'
model_id = -1 # 0/2/4/6/8/10 for MNIST, 1/3/5/7/9/11 for CIFAR10
# 2025-3-8 running: resenet18 and cgg16 on cifar10 v/v4
modelnames = ['C3L2_MNIST', 'C3L2_cifar10', 'C5L3_MNIST', 'C5L3_cifar10', 'ResNet18_MNIST', 'ResNet18_cifar10' ,'ResNet20_MNIST', 'ResNet20_cifar10', 'ResNet50_MNIST', 'ResNet50_cifar10', 'VGG16_MNIST', 'VGG16_cifar10', 'ResNet20_omni', 'C5L3_base_cifar10']

optimizer_id = 0 # 0 for Adam, 1 for SGD
data_aug = False

In [8]:
def main(num_epochs = num_epochs, batch_size = batch_size, num_workers = num_workers, run_on_local = run_on_local, dataset1 = dataset1, model_id = model_id, optimizer_id = optimizer_id, data_aug = data_aug, modelnames = modelnames):
    import torch
    import torch.nn as nn
    import torch.optim as optim

    import torchvision.datasets as tv_datasets
    import torchvision.transforms as tv_transforms
    import numpy as np

    # add kaggle dataset file to path /kaggle/input/training-scripts-dl-hw1
    import sys

    # some experimental setup
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # device(type='cpu')
    kaggle = "/kaggle/input/cifar-10-dlhw1-2/data"
    local = "../data"

    if run_on_local:
        data_path = local
        sys.path.append('./code')
        
    else:
        data_path = kaggle
        sys.path.append('/kaggle/input')

    from mymodels import models
    print('successfully load all pac')


    optim_name = ["Adam", "SGD"]
    optim_kwargs = {"Adam": dict(lr=3e-4, weight_decay=1e-6,),
        "SGD": dict(lr = 1e-3, momentum = 0.9)}

    if data_aug:
        # ============ MNIST transform with aug ===========
        if dataset1 == 'MNIST':
            transformation = dict()
            for data_type in ("train", "test"):
                is_train = data_type=="train"
                transformation[data_type] = tv_transforms.Compose(([
                tv_transforms.RandomRotation(15),  
                tv_transforms.RandomAffine(0, translate=(0.1, 0.1)),  
                # tv_transforms.RandomResizedCrop(28, scale=(0.9, 1.1)), 
                tv_transforms.ToTensor(),  
                tv_transforms.Normalize((0.1307,), (0.3081,))  
            ] if is_train else [
                tv_transforms.ToTensor(),
                tv_transforms.Normalize((0.1307,), (0.3081,))]))

        elif dataset1 == 'CIFAR10':
        # ============== cifar transform with aug ================
            transformation = dict()
            for data_type in ("train", "test"):
                is_train = data_type=="train"
                transformation[data_type] = tv_transforms.Compose((
                    [
                        
                        tv_transforms.RandomRotation(degrees=15),
                        tv_transforms.RandomHorizontalFlip(),
                        tv_transforms.RandomAffine(degree = 0, translate = (0.1, 0.1)),
                        tv_transforms.ColorJitter(
                            brightness=0.2, 
                            contrast=0.2,
                            saturation=0.2,
                            hue=0.1
                        ),
                        

                        tv_transforms.ToTensor(),
                        tv_transforms.Normalize(
                            mean=[0.4914, 0.4822, 0.4465],
                            std=[0.247, 0.243, 0.261]
                        )
                    ] if is_train else [
                        tv_transforms.ToTensor(),
                        tv_transforms.Normalize(
                            mean=[0.4914, 0.4822, 0.4465],
                            std=[0.247, 0.243, 0.261]
                        )
                    ]
                ))

    # ============= transformation without data augmentation ==================
    else:
        if dataset1 == 'CIFAR10':
            transformation = dict()
            for data_type in ('train', 'test'):
                is_train = data_type == 'train'
                transformation[data_type] = tv_transforms.Compose(([
                tv_transforms.RandomRotation(degrees=15),
                tv_transforms.RandomHorizontalFlip(),
                tv_transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
                tv_transforms.ToTensor(),
                tv_transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616)),
            ] if is_train else  [   
                tv_transforms.ToTensor(),
                tv_transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616)),
            ]))
        elif dataset1 == 'MNIST':
            transformation = dict()
            for data_type in ('train', 'test'):
                is_train = data_type == 'train'
                transformation[data_type] = tv_transforms.Compose(([
                tv_transforms.ToTensor(),  
                tv_transforms.Normalize((0.1307,), (0.3081,))  
            ] if is_train else  [   
                tv_transforms.ToTensor(),  
                tv_transforms.Normalize((0.1307,), (0.3081,))  
            ]))

    # prepare datasets
    dataloader = getattr(tv_datasets, dataset1)
    dataset, loader = {}, {}
    for data_type in ("train", "test"):
        is_train = data_type=="train"
        # root=./data: create data file in root path if there is none, dataset size ~340MB
        # path kaggle: "/kaggle/input/cifar-10-dlhw1/data"
        # path local: "../data"
        dataset[data_type] = dataloader(
            root=data_path, train=is_train, download=True, transform=transformation[data_type],
        )
        loader[data_type] = torch.utils.data.DataLoader(
            dataset[data_type], batch_size=batch_size, shuffle=is_train, num_workers=num_workers
        )

    # ======== training loop =========
    # modelnames = ['C3L2_MNIST', 'C3L2_cifar10', 'C5L3_MNIST', 'C5L3_cifar10', 'ResNet20_MNIST', 'ResNet20_cifar10']
    # use dummy input to initialize nn.LazyConv2d
    dummy_input = dataset['train'][0][0].unsqueeze(0)
    net = getattr(models, modelnames[model_id])
    net(dummy_input)

    # move to device
    net.to(device)

    # print the info of hyper/parameters
    # do not check parameter info before passing data into the model if you use LazyConv2d
    print(f'hyperparams: num_epochs: {num_epochs}, batch_size: {batch_size}, num_workers: {num_workers}, run_on_local: {run_on_local}, dataset: {dataset1}, model_id: {model_id}-{modelnames[model_id]}, optimizer_id: {optimizer_id}-{optim_name[optimizer_id]}, data_aug: {int(data_aug)}')
    print(f"number of parameters: {sum(p.numel() for p in net.parameters() if p.requires_grad) / 1_000_000:.2f}M")

    # the network optimizer
    optimizer = getattr(optim, optim_name[optimizer_id])(net.parameters(), **optim_kwargs[optim_name[optimizer_id]])

    # loss function
    criterion = nn.CrossEntropyLoss()

    # statistics:
    loss_train = []
    accuracy_test = []


    # training loop
    net.train()
    for epoch in range(num_epochs):

        running_loss = 0.0
        for i, (img, target) in enumerate(loader["train"]):
            img, target = img.to(device), target.to(device)

            pred = net(img)
            loss = criterion(pred, target)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            running_loss += loss.item()/batch_size

        # eval at each epoch: computation-consuming? i just don't care
        correct, total = 0, 0
        with torch.no_grad():
            for img, target in loader["test"]:
                img, target = img.to(device), target.to(device)
                
                # make prediction
                pred = net(img)
                
                # accumulate
                total += len(target)
                correct += (torch.argmax(pred, dim=1) == target).sum().item()
                
        loss_train.append(running_loss)
        accuracy_test.append(correct / total)
        print(f"Epchs: {epoch+1}, train loss: {loss_train[-1]:.2f}, test Accuracy: {100 * accuracy_test[-1]:.2f}%")

    print("Finished Training")

    # ===== save trained weights =====
    save_name = f'{modelnames[model_id]}_{optim_name[optimizer_id]}_{int(data_aug)}aug'
    PATH = '/kaggle/working/'+ 'weights_' + save_name +'.pth'
    torch.save(net.state_dict(), PATH)

    # save accuracy/loss statistics
    np.save('stats_' + save_name+'_accuracy_test.npy', np.array(accuracy_test))
    np.save('stats_' + save_name+'_loss_train.npy', np.array(loss_train))
    # reload
    # loaded_data = np.load('data.npy').tolist()


    import matplotlib.pyplot as plt

    def visualize_accuracy(accuracy_test, save_path):
        plt.figure(figsize=(10, 5))
        plt.plot(accuracy_test, marker='o', linestyle='-', color='b', label='Accuracy')
        plt.xlabel('Epochs')
        plt.ylabel('Accuracy of Test Set')
        plt.title('Accuracy Test Sequence')
        plt.legend()
        plt.grid(True)
        plt.savefig(save_path)
        plt.show()

    visualize_accuracy(loss_train, 'fig_' + save_name + '_lost_train.png')
    visualize_accuracy(accuracy_test, 'fig_' + save_name + '_accuracy_test.png')


In [None]:
# ======== run main function ===========
main(num_epochs = num_epochs, batch_size = batch_size, num_workers = num_workers, run_on_local = run_on_local, dataset1 = dataset1, model_id = model_id, optimizer_id = optimizer_id, data_aug = data_aug)

In [3]:
# trash code:
# link all dataset in kaggle to os.path
# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))


# optim_kwargs = dict(
#     lr=3e-4,
#     weight_decay=1e-6,
# )

# ============================= old transoformation =============
# # preprocessing pipeline for input images
# transformation = dict()
# for data_type in ("train", "test"):
#     is_train = data_type=="train"
#     transformation[data_type] = tv_transforms.Compose(([
#         tv_transforms.RandomRotation(degrees=15),
#         tv_transforms.RandomHorizontalFlip(),
#         tv_transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
#     ] if is_train else []) + 
#     [
#         tv_transforms.ToTensor(),
#         tv_transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
#     ])



0
1
2
3
4
5
