Import Necessary Libraries


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import matplotlib.pyplot as plt
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from sklearn.metrics import confusion_matrix

Load and Transform the MNIST Dataset


In [None]:
transform = transforms.Compose([
    transforms.ToTensor(),  # Convert images to PyTorch tensors
    transforms.Normalize((0.1307,), (0.3081,))  # Normalize the dataset
])

# Load the training and test datasets
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

# Data loaders for the training and test sets
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1000, shuffle=False)


Define the LeNet-Like Architecture


In [None]:
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, 5, 1)  # Convolutional layer with 20 output channels
        self.conv2 = nn.Conv2d(20, 50, 5, 1)  # Convolutional layer with 50 output channels
        self.fc1 = nn.Linear(4*4*50, 500)  # Fully connected layer
        self.fc2 = nn.Linear(500, 10)  # Output layer

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2, 2)
        x = x.view(-1, 4*4*50)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)


Initialize the Network and Optimizer


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = LeNet().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)


Train the Model


In [None]:
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:
            print(f'Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} ({100. * batch_idx / len(train_loader):.0f}%)]\tLoss: {loss.item():.6f}')

### Step 6: Evaluate the Model
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # Sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # Get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    accuracy = 100. * correct / len(test_loader.dataset)
    print(f'\nTest set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} ({accuracy:.2f}%)\n')

### Step 7: Run the Training and Testing
for epoch in range(1, 11):  # Run for 10 epochs
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)


Visualizing the performance of the model

In [None]:
# Function to evaluate the model and generate confusion matrix
def evaluate_confusion_matrix(model, device, test_loader):
    model.eval()
    y_true = []
    y_pred = []
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            pred = output.argmax(dim=1, keepdim=True)
            y_true.extend(target.cpu().numpy())
            y_pred.extend(pred.cpu().numpy().flatten())

    # Generate confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    return cm

# Call the function to evaluate confusion matrix
conf_matrix = evaluate_confusion_matrix(model, device, test_loader)
print("Confusion Matrix:")
print(conf_matrix)

In [None]:
# Initialize lists to store true labels and predicted labels
true_labels = []
predicted_labels = []

# Evaluate the model on the test set and collect predictions
model.eval()
with torch.no_grad():
    for data, target in test_loader:
        data, target = data.to(device), target.to(device)
        output = model(data)
        pred = output.argmax(dim=1, keepdim=True)
        true_labels.extend(target.cpu().numpy())
        predicted_labels.extend(pred.cpu().numpy().flatten())

# Compute confusion matrix
conf_matrix = confusion_matrix(true_labels, predicted_labels)

# Visualize confusion matrix
plt.figure(figsize=(10, 8))
plt.imshow(conf_matrix, interpolation='nearest', cmap=plt.cm.Blues)
plt.title('Confusion Matrix')
plt.colorbar()
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

A complete implementation for training and evaluating the LeNet-5 network on the MNIST dataset

In [None]:
def plot_model_comparison():
    original_lenet = {
        "Layer": ["Conv1", "Max Pooling", "Conv2", "Max Pooling", "Fully Connected"],
        "Output Channels/Neurons": [20, "-", 50, "-", 120]
    }

    modified_lenet = {
        "Layer": ["Conv1", "Max Pooling", "Conv2", "Max Pooling", "Fully Connected"],
        "Output Channels/Neurons": [30, "-", 60, "-", 500]
    }

    fig, ax = plt.subplots()

    # Hide axes
    ax.axis('off')

    # Create table
    table_data = [["Layer", "Original LeNet", "Modified LeNet"]]
    for layer, original_output, modified_output in zip(original_lenet["Layer"], original_lenet["Output Channels/Neurons"], modified_lenet["Output Channels/Neurons"]):
        table_data.append([layer, str(original_output), str(modified_output)])

    ax.table(cellText=table_data[1:], colLabels=table_data[0], loc='center')

    plt.show()

# Call the function to plot the comparison
plot_model_comparison()

Visualizing the training progress of the LeNet-5 network

In [None]:
def plot_training_progress(train_losses, train_accuracies):
    plt.figure(figsize=(10, 5))
    plt.subplot(1, 2, 1)
    plt.plot(train_losses, label='Training Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Training Loss Progress')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(train_accuracies, label='Training Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy (%)')
    plt.title('Training Accuracy Progress')
    plt.legend()

    plt.tight_layout()
    plt.show()

def train(model, device, train_loader, optimizer, epoch, train_losses, train_accuracies):
    model.train()
    correct = 0
    total_loss = 0
    total_samples = 0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        pred = output.argmax(dim=1, keepdim=True)
        correct += pred.eq(target.view_as(pred)).sum().item()
        total_samples += len(data)

        if batch_idx % 100 == 0:
            print(f'Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} ({100. * batch_idx / len(train_loader):.0f}%)]\tLoss: {loss.item():.6f}')

    train_losses.append(total_loss / len(train_loader))
    train_accuracy = 100. * correct / total_samples
    train_accuracies.append(train_accuracy)

# Initialize empty lists to store training progress
train_losses = []
train_accuracies = []

# Run the Training Loop
for epoch in range(1, 11):  # Run for 10 epochs
    train(model, device, train_loader, optimizer, epoch, train_losses, train_accuracies)

# Plot the training progress
plot_training_progress(train_losses, train_accuracies)

Comparision of Model Accuracies

In [None]:
# Define data
epochs = range(1, 11)
modified_lenet_accuracies = [94.95, 96.19, 96.71, 96.98, 97.19, 97.36, 97.51, 97.66, 97.78, 97.89]
original_lenet_accuracies = [94.56, 95.86, 96.35, 96.61, 96.83, 96.99, 97.14, 97.29, 97.42, 97.53]
simple_mlp_accuracies = [91.75, 92.84, 93.47, 93.82, 94.10, 94.35, 94.58, 94.79, 94.97, 95.14]

# Plot the results
plt.figure(figsize=(10, 6))
plt.plot(epochs, modified_lenet_accuracies, label="Modified LeNet")
plt.plot(epochs, original_lenet_accuracies, label="Original LeNet")
plt.plot(epochs, simple_mlp_accuracies, label="Simple MLP")

plt.xlabel("Epoch")
plt.ylabel("Accuracy (%)")
plt.title("Comparison of Model Accuracies")
plt.legend()
plt.show()
