In [12]:
import torch
print(f'PyTorch version: {torch.__version__}')
print('*'*10)
print(f'_CUDA version: ')
!nvcc --version
print('*'*10)
print(f'CUDNN version: {torch.backends.cudnn.version()}')
print(f'Available GPU devices: {torch.cuda.device_count()}')
print(f'Device Name: {torch.cuda.get_device_name()}')

PyTorch version: 2.2.2
**********
_CUDA version: 
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2024 NVIDIA Corporation
Built on Tue_Feb_27_16:28:36_Pacific_Standard_Time_2024
Cuda compilation tools, release 12.4, V12.4.99
Build cuda_12.4.r12.4/compiler.33961263_0
**********
CUDNN version: 8801
Available GPU devices: 1
Device Name: NVIDIA GeForce RTX 3080


In [13]:
import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader

import torchvision
import torchvision.transforms as transforms

import numpy as np

import matplotlib.pyplot as plt

device = 'cuda' if torch.cuda.is_available() else 'cpu'

#### Load the complete CIFAR-10 dataset from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz

In [14]:
transform_traindata = transforms.Compose([ transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(), transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ])

transform_testdata = transforms.Compose([ transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ])

In [15]:
# Loading the Complete CIFAR-10 dataset

train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_traindata)
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=2)

test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_testdata)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=100, shuffle=False, num_workers=2)

Files already downloaded and verified
Files already downloaded and verified


#### Create a Model to establish a BasicBlock of Resnet

In [16]:
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 32

        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(32)
        self.layer1 = self._make_layer(block, 32, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 64, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 128, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 256, num_blocks[3], stride=2)
        self.linear = nn.Linear(256 * block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

def ResNet18():
    return ResNet(BasicBlock, [3,3,3,3])

# Check number of parameters
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

# Create the model
model = ResNet18()

In [17]:
criterion = nn.CrossEntropyLoss()
#optimizer = optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-4)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
#scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=100)

# Total Parameters of the Basic Resnet Model
num_params = sum(p.numel() for p in model.parameters() )
print(f"Number of trainable parameters: {count_parameters(model)}")

Number of trainable parameters: 4366250


In [18]:
from torchsummary import summary
summary(model, input_size=(3, 32, 32))

Layer (type:depth-idx)                   Param #
├─Conv2d: 1-1                            864
├─BatchNorm2d: 1-2                       64
├─Sequential: 1-3                        --
|    └─BasicBlock: 2-1                   --
|    |    └─Conv2d: 3-1                  9,216
|    |    └─BatchNorm2d: 3-2             64
|    |    └─Conv2d: 3-3                  9,216
|    |    └─BatchNorm2d: 3-4             64
|    |    └─Sequential: 3-5              --
|    └─BasicBlock: 2-2                   --
|    |    └─Conv2d: 3-6                  9,216
|    |    └─BatchNorm2d: 3-7             64
|    |    └─Conv2d: 3-8                  9,216
|    |    └─BatchNorm2d: 3-9             64
|    |    └─Sequential: 3-10             --
|    └─BasicBlock: 2-3                   --
|    |    └─Conv2d: 3-11                 9,216
|    |    └─BatchNorm2d: 3-12            64
|    |    └─Conv2d: 3-13                 9,216
|    |    └─BatchNorm2d: 3-14            64
|    |    └─Sequential: 3-15             --
├─Sequen

Layer (type:depth-idx)                   Param #
├─Conv2d: 1-1                            864
├─BatchNorm2d: 1-2                       64
├─Sequential: 1-3                        --
|    └─BasicBlock: 2-1                   --
|    |    └─Conv2d: 3-1                  9,216
|    |    └─BatchNorm2d: 3-2             64
|    |    └─Conv2d: 3-3                  9,216
|    |    └─BatchNorm2d: 3-4             64
|    |    └─Sequential: 3-5              --
|    └─BasicBlock: 2-2                   --
|    |    └─Conv2d: 3-6                  9,216
|    |    └─BatchNorm2d: 3-7             64
|    |    └─Conv2d: 3-8                  9,216
|    |    └─BatchNorm2d: 3-9             64
|    |    └─Sequential: 3-10             --
|    └─BasicBlock: 2-3                   --
|    |    └─Conv2d: 3-11                 9,216
|    |    └─BatchNorm2d: 3-12            64
|    |    └─Conv2d: 3-13                 9,216
|    |    └─BatchNorm2d: 3-14            64
|    |    └─Sequential: 3-15             --
├─Sequen

#### Now, Create a ModifiedResnet Model by changing the parameter values

#### Train the Model

In [19]:
def train_model(model, train_dataloader, criterion, optimizer, epochs=10):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.train()

    for epoch in range(epochs):  # loop over the dataset multiple times
        running_loss = 0.0
        for i, data in enumerate(train_dataloader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data[0].to(device), data[1].to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            if i % 100 == 99:    # print every 100 mini-batches
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 100))
                running_loss = 0.0

    print('Finished Training')

In [34]:
train_model(model, train_dataloader, criterion, optimizer, epochs=50)

[1,   100] loss: 0.034
[1,   200] loss: 0.035
[1,   300] loss: 0.038
[2,   100] loss: 0.036
[2,   200] loss: 0.038
[2,   300] loss: 0.032
[3,   100] loss: 0.030
[3,   200] loss: 0.029
[3,   300] loss: 0.032
[4,   100] loss: 0.027
[4,   200] loss: 0.026
[4,   300] loss: 0.031
[5,   100] loss: 0.030
[5,   200] loss: 0.024
[5,   300] loss: 0.029
[6,   100] loss: 0.030
[6,   200] loss: 0.027
[6,   300] loss: 0.031
[7,   100] loss: 0.031
[7,   200] loss: 0.034
[7,   300] loss: 0.034
[8,   100] loss: 0.026
[8,   200] loss: 0.025
[8,   300] loss: 0.025
[9,   100] loss: 0.028
[9,   200] loss: 0.025
[9,   300] loss: 0.029
[10,   100] loss: 0.027
[10,   200] loss: 0.026
[10,   300] loss: 0.024
[11,   100] loss: 0.023
[11,   200] loss: 0.022
[11,   300] loss: 0.026
[12,   100] loss: 0.026
[12,   200] loss: 0.028
[12,   300] loss: 0.029
[13,   100] loss: 0.023
[13,   200] loss: 0.032
[13,   300] loss: 0.032
[14,   100] loss: 0.024
[14,   200] loss: 0.024
[14,   300] loss: 0.025
[15,   100] loss: 0

#### Evaluate the Model

In [31]:
def evaluate_model(model, test_dataloader):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()

    correct = 0
    total = 0
    with torch.no_grad():
        for data in test_dataloader:
            images, labels = data[0].to(device), data[1].to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the 10000 test images: %d %%' % (
        100 * correct / total))

In [35]:
evaluate_model(model, test_dataloader)

Accuracy of the network on the 10000 test images: 91 %


In [36]:
torch.save(model.state_dict(), 'model5M_91.pt')

#### Kaggle submission

In [37]:
transform_nolabel_data = transforms.Compose([ transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ])


In [38]:
import csv

def generate_submission_csv(model, cifar_nolabels):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()
    with torch.no_grad():
        images = cifar_nolabels[b'data'].reshape((-1, 3, 32, 32)) 
        images = np.transpose(images, (0,2,3,1))
        img_tensor = torch.stack([transform_nolabel_data(img) for img in images]).float().to(device)
        outputs = model(img_tensor)
        _, predicted = torch.max(outputs.data, 1)
    
    predTuple = list(zip(cifar_nolabels[b'ids'],predicted.cpu().numpy()))

    with open('predictions.csv','w',newline='') as out:
        csv_out=csv.writer(out)
        csv_out.writerow(['ID','Labels'])
        csv_out.writerows(predTuple)
        

In [39]:
import pickle
def load_cifar_batch(file):
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

cifar_nolabels = load_cifar_batch('./data/cifar_test_nolabels.pkl')
generate_submission_csv(model, cifar_nolabels)

#### Plot the Accuracy of this Model