In [37]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

In [38]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [39]:
#############################################
# 1. Data Preparation: STL-10 and MNIST Setup
#############################################

# ----- STL-10 (Source Dataset) -----
# Define transformations: resize to 96x96, random horizontal flip for augmentation,
# conversion to tensor and normalization.

In [40]:
stl10_transform_train = transforms.Compose([
    transforms.Resize((96, 96)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5],
                         std=[0.5, 0.5, 0.5])
])

stl10_transform_test = transforms.Compose([
    transforms.Resize((96, 96)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5],
                         std=[0.5, 0.5, 0.5])
])

In [41]:
# Download and load STL-10 dataset
stl10_train = datasets.STL10(root='./data', split='train', download=True, transform=stl10_transform_train)
stl10_test = datasets.STL10(root='./data', split='test', download=True, transform=stl10_transform_test)

# Data loaders for STL-10
stl10_loader_train = DataLoader(stl10_train, batch_size=64, shuffle=True, num_workers=2)
stl10_loader_test = DataLoader(stl10_test, batch_size=64, shuffle=False, num_workers=2)



In [42]:

import ssl
try:
    _create_unverified_https_context = ssl._create_unverified_context
except AttributeError:
    pass
else:
    ssl._create_default_https_context = _create_unverified_https_context


# ----- MNIST (Target Dataset) -----
# MNIST images are 28x28 and grayscale.
# We resize them to 96x96 and convert grayscale images to 3 channels.
mnist_transform_train = transforms.Compose([
    transforms.Resize((96, 96)),
    transforms.Grayscale(num_output_channels=3),
    transforms.RandomRotation(10),  # augmentation: small rotations
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5],
                         std=[0.5, 0.5, 0.5])
])

mnist_transform_test = transforms.Compose([
    transforms.Resize((96, 96)),
    transforms.Grayscale(num_output_channels=3),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5],
                         std=[0.5, 0.5, 0.5])
])

# Download and load MNIST dataset
mnist_train = datasets.MNIST(root='./data', train=True, download=True, transform=mnist_transform_train)
mnist_test = datasets.MNIST(root='./data', train=False, download=True, transform=mnist_transform_test)

# Data loaders for MNIST
mnist_loader_train = DataLoader(mnist_train, batch_size=64, shuffle=True, num_workers=2)
mnist_loader_test = DataLoader(mnist_test, batch_size=64, shuffle=False, num_workers=2)



In [43]:
#############################################
# 2. Define the CNN Architecture
#############################################
# We build a simple CNN with a feature extractor (conv layers) and a classifier (fully connected layers).


In [44]:

class SimpleCNN(nn.Module):
    def __init__(self, num_classes=10):
        super(SimpleCNN, self).__init__()
        # Feature extractor
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),  # input channels=3 for RGB images
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),  # reduce spatial dims from 96x96 -> 48x48
            
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),  # 48x48 -> 24x24
            
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2)   # 24x24 -> 12x12
        )
        # Classifier: note 256 channels * 12 * 12 spatial dimensions = 256*144 features
        self.classifier = nn.Sequential(
            nn.Linear(256 * 12 * 12, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )
        
    def forward(self, x):
        x = self.features(x)
        # Flatten the output for the classifier
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

In [45]:
#############################################
# 3. Pre-train the CNN on STL-10
#############################################
# Initialize the model, loss function, and optimizer.

In [46]:
model = SimpleCNN(num_classes=10).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 10  # Set the number of epochs as desired

print("Starting pre-training on STL-10...")
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for images, labels in stl10_loader_train:
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * images.size(0)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
    
    epoch_loss = running_loss / total
    epoch_acc = correct / total
    print(f"Epoch {epoch+1}/{num_epochs} - Loss: {epoch_loss:.4f} | Accuracy: {epoch_acc:.4f}")

# Save the pre-trained weights
torch.save(model.state_dict(), "stl10_pretrained.pth")
print("Pre-training completed and weights saved.")

Starting pre-training on STL-10...
Epoch 1/10 - Loss: 1.8905 | Accuracy: 0.2932
Epoch 2/10 - Loss: 1.5018 | Accuracy: 0.4280
Epoch 3/10 - Loss: 1.3344 | Accuracy: 0.5114
Epoch 4/10 - Loss: 1.1607 | Accuracy: 0.5754
Epoch 5/10 - Loss: 1.0649 | Accuracy: 0.6248
Epoch 6/10 - Loss: 0.9593 | Accuracy: 0.6492
Epoch 7/10 - Loss: 0.8619 | Accuracy: 0.6848
Epoch 8/10 - Loss: 0.7683 | Accuracy: 0.7278
Epoch 9/10 - Loss: 0.6366 | Accuracy: 0.7642
Epoch 10/10 - Loss: 0.5962 | Accuracy: 0.7936
Pre-training completed and weights saved.


In [47]:
#############################################
# 4. Transfer Learning on MNIST: Adaptation Strategies
#############################################

In [48]:

# Utility function for training a model on MNIST
def train_model(model, optimizer, dataloader, num_epochs=5):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * images.size(0)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
        print(f"Epoch {epoch+1}/{num_epochs} - Loss: {running_loss/total:.4f} | Accuracy: {correct/total:.4f}")


In [49]:
# Evaluation function: computes overall accuracy, precision, recall, F1 score, and confusion matrix.
def evaluate_model(model, dataloader):
    model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = outputs.max(1)
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    all_labels = np.array(all_labels)
    all_preds = np.array(all_preds)
    print("Classification Report:")
    print(classification_report(all_labels, all_preds, digits=4))
    print("Confusion Matrix:")
    print(confusion_matrix(all_labels, all_preds))

In [50]:
# -------- Strategy 1: Training Linear Layers Only --------
print("\nStrategy 1: Training Linear layers only")
# Initialize a new model and load pre-trained weights.
model_linear = SimpleCNN(num_classes=10).to(device)
model_linear.load_state_dict(torch.load("stl10_pretrained.pth"))
# Freeze the entire feature extractor.
for param in model_linear.features.parameters():
    param.requires_grad = False

# Define optimizer that only updates classifier parameters.
optimizer_linear = optim.Adam(model_linear.classifier.parameters(), lr=0.001)
# Train on MNIST
train_model(model_linear, optimizer_linear, mnist_loader_train, num_epochs=5)
# Evaluate on MNIST test set.
print("\nEvaluation for Strategy 1 (Linear layers only):")
evaluate_model(model_linear, mnist_loader_test)


Strategy 1: Training Linear layers only
Epoch 1/5 - Loss: 0.3151 | Accuracy: 0.9063
Epoch 2/5 - Loss: 0.1294 | Accuracy: 0.9600
Epoch 3/5 - Loss: 0.1031 | Accuracy: 0.9691
Epoch 4/5 - Loss: 0.0927 | Accuracy: 0.9709
Epoch 5/5 - Loss: 0.0878 | Accuracy: 0.9728

Evaluation for Strategy 1 (Linear layers only):
Classification Report:
              precision    recall  f1-score   support

           0     0.9849    0.9959    0.9904       980
           1     0.9938    0.9930    0.9934      1135
           2     0.9902    0.9758    0.9829      1032
           3     0.9891    0.9851    0.9871      1010
           4     0.9938    0.9766    0.9851       982
           5     0.9779    0.9910    0.9844       892
           6     0.9885    0.9875    0.9880       958
           7     0.9797    0.9864    0.9830      1028
           8     0.9688    0.9867    0.9776       974
           9     0.9830    0.9732    0.9781      1009

    accuracy                         0.9851     10000
   macro avg     

In [52]:
# -------- Strategy 2: Freezing Initial Few Layers --------
print("\nStrategy 2: Freezing the initial few layers of the feature extractor")
# Initialize a new model and load pre-trained weights.
model_partial = SimpleCNN(num_classes=10).to(device)
model_partial.load_state_dict(torch.load("stl10_pretrained.pth"))
# For example, freeze the first convolutional block (the first conv+ReLU+pool)
for name, param in model_partial.features.named_parameters():
    # We freeze parameters belonging to the first three layers in the features module.
    # Adjust the condition depending on your layer naming.
    if "0" in name or "1" in name or "2" in name:
        param.requires_grad = False

# Create optimizer for parameters that are not frozen.
optimizer_partial = optim.Adam(filter(lambda p: p.requires_grad, model_partial.parameters()), lr=0.001)
# Train on MNIST
train_model(model_partial, optimizer_partial, mnist_loader_train, num_epochs=5)
# Evaluate on MNIST test set.
print("\nEvaluation for Strategy 2 (Partial freezing):")
evaluate_model(model_partial, mnist_loader_test)


Strategy 2: Freezing the initial few layers of the feature extractor
Epoch 1/5 - Loss: 0.2398 | Accuracy: 0.9263
Epoch 2/5 - Loss: 0.0725 | Accuracy: 0.9783
Epoch 3/5 - Loss: 0.0535 | Accuracy: 0.9842
Epoch 4/5 - Loss: 0.0438 | Accuracy: 0.9866
Epoch 5/5 - Loss: 0.0386 | Accuracy: 0.9888

Evaluation for Strategy 2 (Partial freezing):
Classification Report:
              precision    recall  f1-score   support

           0     0.9899    0.9969    0.9934       980
           1     0.9930    0.9974    0.9952      1135
           2     0.9884    0.9922    0.9903      1032
           3     0.9921    0.9921    0.9921      1010
           4     0.9939    0.9949    0.9944       982
           5     0.9899    0.9910    0.9905       892
           6     0.9989    0.9885    0.9937       958
           7     0.9826    0.9903    0.9864      1028
           8     0.9959    0.9928    0.9943       974
           9     0.9950    0.9822    0.9885      1009

    accuracy                         0.9919 

In [53]:
# -------- Strategy 3: Fine-Tuning the Entire Network --------
print("\nStrategy 3: Fine-tuning the entire network")
# Initialize a new model and load pre-trained weights.
model_finetune = SimpleCNN(num_classes=10).to(device)
model_finetune.load_state_dict(torch.load("stl10_pretrained.pth"))
# Ensure all parameters are trainable.
for param in model_finetune.parameters():
    param.requires_grad = True

optimizer_finetune = optim.Adam(model_finetune.parameters(), lr=0.001)
# Train on MNIST
train_model(model_finetune, optimizer_finetune, mnist_loader_train, num_epochs=5)
# Evaluate on MNIST test set.
print("\nEvaluation for Strategy 3 (Fine-tuning entire network):")
evaluate_model(model_finetune, mnist_loader_test)


Strategy 3: Fine-tuning the entire network
Epoch 1/5 - Loss: 0.2654 | Accuracy: 0.9180
Epoch 2/5 - Loss: 0.0867 | Accuracy: 0.9738
Epoch 3/5 - Loss: 0.0644 | Accuracy: 0.9809
Epoch 4/5 - Loss: 0.0526 | Accuracy: 0.9844
Epoch 5/5 - Loss: 0.0482 | Accuracy: 0.9857

Evaluation for Strategy 3 (Fine-tuning entire network):
Classification Report:
              precision    recall  f1-score   support

           0     0.9919    0.9980    0.9949       980
           1     0.9947    0.9974    0.9960      1135
           2     0.9942    0.9903    0.9922      1032
           3     0.9960    0.9881    0.9920      1010
           4     0.9830    1.0000    0.9914       982
           5     0.9899    0.9922    0.9910       892
           6     0.9968    0.9854    0.9911       958
           7     0.9836    0.9932    0.9884      1028
           8     0.9888    0.9938    0.9913       974
           9     0.9980    0.9782    0.9880      1009

    accuracy                         0.9917     10000
   mac