# Simple CNN for MNIST

In [1]:
# Import necessary libraries
import torch
import torch.nn as nn  # Neural network modules (e.g., Conv2d, Linear, etc.)
import torch.optim as optim  # Optimization algorithms (e.g., Adam)
import torch.nn.functional as F  # Functional operations (e.g., ReLU)
from torch.utils.data import DataLoader  # Data loading utilities
from torchvision import datasets, transforms  # Datasets and transformations for images
from tqdm import tqdm  # Progress bar for loops

# Define the CNN model
class CNNV0(nn.Module):
    def __init__(self, in_channels, out_channels, hidden_units):
        """
        Initialize the CNN model.

        Args:
            in_channels (int): Number of input channels (e.g., 1 for grayscale images).
            out_channels (int): Number of output classes (e.g., 10 for MNIST digits).
            hidden_units (int): Number of filters/units in the hidden convolutional layer.
        """
        super(CNNV0, self).__init__()

        # First convolutional layer
        self.cl1 = nn.Conv2d(
            in_channels=in_channels,  # Input channels (1 for grayscale)
            out_channels=hidden_units,  # Number of filters (hidden units)
            kernel_size=3,  # 3x3 kernel
            stride=1,  # Stride of 1
            padding=1,  # Padding to maintain spatial dimensions
        )
        # Input size: (batch_size, in_channels, 28, 28)
        # Output size: (batch_size, hidden_units, 28, 28)
        # Formula: Output size = (Input size - Kernel size + 2 * Padding) / Stride + 1
        # Here: (28 - 3 + 2*1) / 1 + 1 = 28

        # Second convolutional layer
        self.cl2 = nn.Conv2d(
            in_channels=hidden_units,  # Input channels (output of cl1)
            out_channels=out_channels,  # Number of output channels
            kernel_size=3,  # 3x3 kernel
            stride=1,  # Stride of 1
            padding=1,  # Padding to maintain spatial dimensions
        )
        # Input size: (batch_size, hidden_units, 14, 14) (after max pooling)
        # Output size: (batch_size, out_channels, 14, 14)

        # Max pooling layers
        self.max1 = nn.MaxPool2d(kernel_size=2, stride=2)  # Reduces spatial dimensions by half
        # Input size: (batch_size, hidden_units, 28, 28)
        # Output size: (batch_size, hidden_units, 14, 14)
        # Formula: Output size = (Input size - Kernel size) / Stride + 1
        # Here: (28 - 2) / 2 + 1 = 14

        self.max2 = nn.MaxPool2d(2)  # Same as above, reduces spatial dimensions by half
        # Input size: (batch_size, out_channels, 14, 14)
        # Output size: (batch_size, out_channels, 7, 7)

        # Flatten layer to convert 2D feature maps into 1D vectors
        self.flat = nn.Flatten()
        # Input size: (batch_size, out_channels, 7, 7)
        # Output size: (batch_size, out_channels * 7 * 7)

        # Fully connected (linear) layer for classification
        self.classifier = nn.Linear(
            in_features=490,
            out_features=out_channels,  # Number of output classes
        )


    def forward(self, x):
        """
        Forward pass of the CNN.

        Args:
            x (torch.Tensor): Input tensor of shape (batch_size, in_channels, 28, 28).

        Returns:
            torch.Tensor: Output tensor of shape (batch_size, out_channels).
        """
        # print('initial shape: ',x.shape)
        x = F.relu(self.cl1(x))  # Apply first convolutional layer + ReLU
        # print('after conv layer 1', x.shape)
        x = self.max1(x)  # Apply max pooling
        # print('after conv+max layer 1', x.shape)
        x = F.relu(self.cl2(x))  # Apply second convolutional layer + ReLU
        # print('after conv layer 2', x.shape)
        x = self.max2(x)  # Apply max pooling
        # print('after conv+max layer 2', x.shape)
        x = self.flat(x)  # Flatten the output
        # print('after flatten', x.shape)
        x = self.classifier(x)  # Apply fully connected layer
        # print('output after classifier', x.shape)
        # print('*'*75)
        return x


# Load Data
batch_size = 64  # Number of samples per batch

# Download and load the MNIST training dataset
train_dataset = datasets.MNIST(
    root="dataset/",  # Directory to store the dataset
    train=True,  # Load training data
    transform=transforms.ToTensor(),  # Convert images to PyTorch tensors
    download=True,  # Download if not already present
)

# Download and load the MNIST test dataset
test_dataset = datasets.MNIST(
    root="dataset/",  # Directory to store the dataset
    train=False,  # Load test data
    transform=transforms.ToTensor(),  # Convert images to PyTorch tensors
    download=True,  # Download if not already present
)

# Create DataLoader for training and testing datasets
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)

# Define device (GPU if available, else CPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Initialize the model, loss function, and optimizer
cnn0 = CNNV0(in_channels=1, out_channels=10, hidden_units=10).to(device)  # Move model to device
loss_fn = nn.CrossEntropyLoss()  # Loss function for classification
optimizer = optim.Adam(cnn0.parameters(), lr=0.01)  # Adam optimizer with learning rate 0.01

# Training loop
num_epochs = 3  # Number of epochs to train the model
for epoch in range(num_epochs):
    cnn0.train()  # Set the model to training mode
    for batch_idx, (data, targets) in enumerate(tqdm(train_loader)):
        # Move data and targets to the appropriate device (GPU/CPU)
        data = data.to(device=device)
        targets = targets.to(device=device)

        # Forward pass: compute the model's predictions
        scores = cnn0(data)
        # Compute the loss between predictions and true labels
        loss = loss_fn(scores, targets)

        # Backward pass: compute gradients and update model parameters
        optimizer.zero_grad()  # Clear previous gradients
        loss.backward()  # Compute gradients
        optimizer.step()  # Update model parameters
    # Evaluation on the test set
    cnn0.eval()  # Set the model to evaluation mode
    with torch.no_grad():  # Disable gradient computation for evaluation
        correct = 0  # Count of correct predictions
        total = 0  # Total number of samples
        for data, targets in test_loader:
            data = data.to(device=device)
            targets = targets.to(device=device)

            # Compute model predictions
            scores = cnn0(data)
            _, predicted = torch.max(scores.data, 1)  # Get the predicted class

            # Update counts
            total += targets.size(0)
            correct += (predicted == targets).sum().item()

        # Calculate and print accuracy
        accuracy = 100 * correct / total
        print(f'Epoch [{epoch+1}/{num_epochs}], Test Accuracy: {accuracy:.2f}%')

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 938/938 [00:15<00:00, 60.55it/s]


Epoch [1/3], Test Accuracy: 98.10%


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 938/938 [00:13<00:00, 68.39it/s]


Epoch [2/3], Test Accuracy: 98.30%


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 938/938 [00:13<00:00, 68.64it/s]


Epoch [3/3], Test Accuracy: 98.14%
