In [1]:
# Imports
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
from ipywidgets import interact, FloatSlider
from tqdm import tqdm


In [2]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load CIFAR-10 dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True)

Files already downloaded and verified


In [None]:

# ResNet Architecture Description:

# 1. Input Layer:
#    - Receives the input image.

# 2. Initial Convolutional Layer:
#    - A standard convolutional layer with a kernel size of 7x7 and stride of 2.
#    - Followed by batch normalization and a ReLU activation function.
#    - Reduces the spatial dimensions of the feature maps.

# 3. Max Pooling Layer:
#    - Reduces the spatial dimensions further.
#    - Typically uses a kernel size of 3x3 and a stride of 2.

# 4. Residual Blocks (Multiple):
#    - The core building blocks of ResNet.
#    - Each residual block contains two or more convolutional layers.
#    - A skip connection adds the input of the block to its output.
#    - This skip connection allows for the efficient flow of gradients during training.
#    - Different ResNet versions have varying numbers of residual blocks.
#    - Within a residual block:
#      - Convolutional layers with smaller kernel sizes (e.g., 3x3).
#      - Batch normalization and ReLU activation functions after each convolutional layer.

# 5. Average Pooling Layer (Global Average Pooling):
#    - Reduces the spatial dimensions of the feature maps to a single value per channel.
#    - This is followed by a fully connected layer to get final classes.

# 6. Fully Connected Layer:
#    - Maps the output of the average pooling layer to the number of classes.
#    - Produces the final classification probabilities or logits.


# The skip connections in the residual blocks are crucial for addressing the vanishing gradient problem
# during training of very deep networks. They enable the network to learn identity mappings, which helps
# in training deeper architectures effectively and avoids performance degradation.



In [3]:
# Define a Basic Residual Block
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.downsample = downsample

    def forward(self, x):
        identity = x
        if self.downsample is not None:
            identity = self.downsample(x)
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out += identity
        out = self.relu(out)
        return out

# Define ResNet Model
class ResNet(nn.Module):
    def __init__(self, num_classes=10):
        super(ResNet, self).__init__()
        self.layer1 = self._make_layer(3, 64, 2)
        self.layer2 = self._make_layer(64, 128, 2, stride=2)
        self.layer3 = self._make_layer(128, 256, 2, stride=2)
        self.layer4 = self._make_layer(256, 512, 2, stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512, num_classes)

    def _make_layer(self, in_channels, out_channels, blocks, stride=1):
        downsample = None
        if stride != 1 or in_channels != out_channels:
            downsample = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )
        layers = [ResidualBlock(in_channels, out_channels, stride, downsample)]
        for _ in range(1, blocks):
            layers.append(ResidualBlock(out_channels, out_channels))
        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x



In [4]:
# Instantiate model, loss, and optimizer
model = ResNet()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Interactive function for adjusting learning rate
def train_resnet(lr=0.001, epochs=5):
    optimizer = optim.Adam(model.parameters(), lr=lr)
    print(f"Training ResNet with learning rate: {lr}")
    for epoch in tqdm(range(epochs)):
        running_loss = 0.0
        for images, labels in trainloader:
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(trainloader):.4f}")
    print("Training Complete!")

In [None]:
# Create an interactive slider
interact(train_resnet, lr=FloatSlider(min=0.0001, max=0.01, step=0.0001, value=0.001), epochs=(1, 10))

In [None]:
# Visualization function for feature maps
def visualize_feature_maps():
    images, _ = next(iter(trainloader))
    model.eval()
    with torch.no_grad():
        activations = model.layer1(images[:1])  # Get activations from first ResNet block
    fig, axes = plt.subplots(1, 5, figsize=(15, 5))
    for i in range(5):
        axes[i].imshow(activations[0, i].cpu().numpy(), cmap='viridis')
        axes[i].axis('off')
    plt.show()

visualize_feature_maps()