In [10]:
import torch
import torch.nn as nn


In [11]:
class SimpleCNN(nn.Module):
    def __init__(self, num_classes=10):
        super(SimpleCNN, self).__init__()
        # First convolutional layer:
        # - Input channels: 1 (for grayscale images; use 3 for RGB),
        # - Output channels: 16 feature maps,
        # - Kernel size: 3x3,
        # - Padding: 1 (to preserve spatial dimensions)
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, padding=1)

        # Second convolutional layer:
        # - Input channels: 16 (from previous layer),
        # - Output channels: 32,
        # - Kernel size: 3x3,
        # - Padding: 1
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)

        # Max pooling layer with a 2x2 window (reduces feature map size by half)
        self.max_pool = nn.MaxPool2d(2)

        # Non-linear activation function (introduces non-linearity)
        self.relu = nn.ReLU()

        # Adaptive average pooling to output 4x4 feature maps (regardless of original input size)
        self.adaptive_pool = nn.AdaptiveAvgPool2d((4, 4))

        # Fully connected layer for classification:
        # - Input size: 32 feature maps * 4 * 4 (from adaptive pooling)
        # - Output size: number of classes
        self.fc1 = nn.Linear(32 * 4 * 4, num_classes)

    def forward(self, X):
        # Pass input through the first convolutional layer
        X = self.conv1(X)
        X = self.relu(X)  # Apply ReLU activation
        X = self.max_pool(X)  # Apply max pooling

        # Pass through the second convolutional layer
        X = self.conv2(X)
        X = self.relu(X)  # Apply ReLU activation
        X = self.max_pool(X)  # Apply max pooling

        # Apply adaptive average pooling to standardize feature map size
        X = self.adaptive_pool(X)

        # Flatten the output to a 1D tensor for the fully connected layer
        X = X.view(X.shape[0], -1)

        # Pass through the fully connected layer to get final class scores
        X = self.fc1(X)
        return X


model = SimpleCNN(num_classes=10)

In [12]:
from torchinfo import summary

input_size = (100, 1, 32, 32)  # (batch_size, channels, height, width)
summary(model, input_size=input_size)

Layer (type:depth-idx)                   Output Shape              Param #
SimpleCNN                                [100, 10]                 --
├─Conv2d: 1-1                            [100, 16, 32, 32]         160
├─ReLU: 1-2                              [100, 16, 32, 32]         --
├─MaxPool2d: 1-3                         [100, 16, 16, 16]         --
├─Conv2d: 1-4                            [100, 32, 16, 16]         4,640
├─ReLU: 1-5                              [100, 32, 16, 16]         --
├─MaxPool2d: 1-6                         [100, 32, 8, 8]           --
├─AdaptiveAvgPool2d: 1-7                 [100, 32, 4, 4]           --
├─Linear: 1-8                            [100, 10]                 5,130
Total params: 9,930
Trainable params: 9,930
Non-trainable params: 0
Total mult-adds (Units.MEGABYTES): 135.68
Input size (MB): 0.41
Forward/backward pass size (MB): 19.67
Params size (MB): 0.04
Estimated Total Size (MB): 20.12

In [13]:
# Test
batch_size = 100
X = torch.randn(batch_size, 1, 28, 28)
Y = model(X)
print(Y.shape)


torch.Size([100, 10])
