<a href="https://colab.research.google.com/github/zachary013/lab2-deep-learning/blob/main/lab2_deep_learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import time
from sklearn.metrics import accuracy_score, f1_score

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [2]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

train_dataset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

100%|██████████| 9.91M/9.91M [00:00<00:00, 16.5MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 478kB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 3.84MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 6.13MB/s]


In [3]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout = nn.Dropout(p=0.5)
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool1(x)
        x = F.relu(self.conv2(x))
        x = self.pool2(x)
        x = x.view(-1, 64 * 7 * 7)
        x = self.dropout(x)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

model_cnn = CNN().to(device)

In [4]:
def train_model(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    epoch_loss = running_loss / len(train_loader)
    epoch_acc = 100 * correct / total
    return epoch_loss, epoch_acc

def evaluate_model(model, test_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    test_loss = running_loss / len(test_loader)
    test_acc = accuracy_score(all_labels, all_preds) * 100
    test_f1 = f1_score(all_labels, all_preds, average='macro') * 100
    return test_loss, test_acc, test_f1

In [5]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_cnn.parameters(), lr=0.001)
num_epochs = 10

start_time = time.time()
for epoch in range(num_epochs):
    train_loss, train_acc = train_model(model_cnn, train_loader, criterion, optimizer, device)
    print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")
cnn_training_time = time.time() - start_time

test_loss_cnn, test_acc_cnn, test_f1_cnn = evaluate_model(model_cnn, test_loader, criterion, device)
print(f"CNN - Test Loss: {test_loss_cnn:.4f}, Test Acc: {test_acc_cnn:.2f}%, Test F1: {test_f1_cnn:.2f}%, Training Time: {cnn_training_time:.2f}s")

Epoch 1/10, Train Loss: 0.1622, Train Acc: 95.06%
Epoch 2/10, Train Loss: 0.0641, Train Acc: 98.00%
Epoch 3/10, Train Loss: 0.0482, Train Acc: 98.50%
Epoch 4/10, Train Loss: 0.0394, Train Acc: 98.78%
Epoch 5/10, Train Loss: 0.0350, Train Acc: 98.88%
Epoch 6/10, Train Loss: 0.0312, Train Acc: 98.97%
Epoch 7/10, Train Loss: 0.0256, Train Acc: 99.21%
Epoch 8/10, Train Loss: 0.0241, Train Acc: 99.21%
Epoch 9/10, Train Loss: 0.0218, Train Acc: 99.31%
Epoch 10/10, Train Loss: 0.0195, Train Acc: 99.34%
CNN - Test Loss: 0.0231, Test Acc: 99.25%, Test F1: 99.25%, Training Time: 155.27s


In [6]:
from torchvision.models import resnet50

model_resnet = resnet50(pretrained=True)
model_resnet.fc = nn.Linear(2048, 10)  # Replace fc layer for 10 classes
model_resnet = model_resnet.to(device)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 193MB/s]


In [7]:
transform_resnet = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.repeat(3, 1, 1)),  # Repeat grayscale to 3 channels
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

train_dataset_resnet = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform_resnet)
test_dataset_resnet = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform_resnet)

train_loader_resnet = DataLoader(train_dataset_resnet, batch_size=64, shuffle=True)
test_loader_resnet = DataLoader(test_dataset_resnet, batch_size=64, shuffle=False)

In [None]:
optimizer_resnet = optim.Adam(model_resnet.parameters(), lr=0.001)

start_time = time.time()
for epoch in range(num_epochs):
    train_loss, train_acc = train_model(model_resnet, train_loader_resnet, criterion, optimizer_resnet, device)
    print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")
resnet_training_time = time.time() - start_time

test_loss_resnet, test_acc_resnet, test_f1_resnet = evaluate_model(model_resnet, test_loader_resnet, criterion, device)
print(f"ResNet50 - Test Loss: {test_loss_resnet:.4f}, Test Acc: {test_acc_resnet:.2f}%, Test F1: {test_f1_resnet:.2f}%, Training Time: {resnet_training_time:.2f}s")

In [None]:
print("Model Comparison:")
print(f"CNN: Loss={test_loss_cnn:.4f}, Acc={test_acc_cnn:.2f}%, F1={test_f1_cnn:.2f}%, Time={cnn_training_time:.2f}s")
print(f"ResNet50: Loss={test_loss_resnet:.4f}, Acc={test_acc_resnet:.2f}%, F1={test_f1_resnet:.2f}%, Time={resnet_training_time:.2f}s")

In [None]:
from torchvision.models import vgg16

model_vgg = vgg16(pretrained=True)
model_vgg.classifier[6] = nn.Linear(4096, 10)
model_vgg = model_vgg.to(device)

optimizer_vgg = optim.Adam(model_vgg.parameters(), lr=0.001)

start_time = time.time()
for epoch in range(num_epochs):
    train_loss, train_acc = train_model(model_vgg, train_loader_resnet, criterion, optimizer_vgg, device)
    print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")
vgg_training_time = time.time() - start_time

test_loss_vgg, test_acc_vgg, test_f1_vgg = evaluate_model(model_vgg, test_loader_resnet, criterion, device)
print(f"VGG16 - Test Loss: {test_loss_vgg:.4f}, Test Acc: {test_acc_vgg:.2f}%, Test F1: {test_f1_vgg:.2f}%, Training Time: {vgg_training_time:.2f}s")

In [None]:
from torchvision.models import alexnet

model_alex = alexnet(pretrained=True)
model_alex.classifier[6] = nn.Linear(4096, 10)
model_alex = model_alex.to(device)

optimizer_alex = optim.Adam(model_alex.parameters(), lr=0.001)

start_time = time.time()
for epoch in range(num_epochs):
    train_loss, train_acc = train_model(model_alex, train_loader_resnet, criterion, optimizer_alex, device)
    print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")
alex_training_time = time.time() - start_time

test_loss_alex, test_acc_alex, test_f1_alex = evaluate_model(model_alex, test_loader_resnet, criterion, device)
print(f"AlexNet - Test Loss: {test_loss_alex:.4f}, Test Acc: {test_acc_alex:.2f}%, Test F1: {test_f1_alex:.2f}%, Training Time: {alex_training_time:.2f}s")

In [None]:
from torchvision.models import alexnet

model_alex = alexnet(pretrained=True)
model_alex.classifier[6] = nn.Linear(4096, 10)
model_alex = model_alex.to(device)

optimizer_alex = optim.Adam(model_alex.parameters(), lr=0.001)

start_time = time.time()
for epoch in range(num_epochs):
    train_loss, train_acc = train_model(model_alex, train_loader_resnet, criterion, optimizer_alex, device)
    print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")
alex_training_time = time.time() - start_time

test_loss_alex, test_acc_alex, test_f1_alex = evaluate_model(model_alex, test_loader_resnet, criterion, device)
print(f"AlexNet - Test Loss: {test_loss_alex:.4f}, Test Acc: {test_acc_alex:.2f}%, Test F1: {test_f1_alex:.2f}%, Training Time: {alex_training_time:.2f}s")

In [None]:
print("Final Comparison:")
print(f"CNN: Loss={test_loss_cnn:.4f}, Acc={test_acc_cnn:.2f}%, F1={test_f1_cnn:.2f}%, Time={cnn_training_time:.2f}s")
print(f"ResNet50: Loss={test_loss_resnet:.4f}, Acc={test_acc_resnet:.2f}%, F1={test_f1_resnet:.2f}%, Time={resnet_training_time:.2f}s")
print(f"VGG16: Loss={test_loss_vgg:.4f}, Acc={test_acc_vgg:.2f}%, F1={test_f1_vgg:.2f}%, Time={vgg_training_time:.2f}s")
print(f"AlexNet: Loss={test_loss_alex:.4f}, Acc={test_acc_alex:.2f}%, F1={test_f1_alex:.2f}%, Time={alex_training_time:.2f}s")

In [None]:
class ViT(nn.Module):
    def __init__(self, image_size=28, patch_size=7, num_classes=10, dim=64, depth=6, heads=8, mlp_dim=128):
        super(ViT, self).__init__()
        num_patches = (image_size // patch_size) ** 2
        patch_dim = 1 * patch_size * patch_size

        self.patch_embedding = nn.Linear(patch_dim, dim)
        self.pos_embedding = nn.Parameter(torch.randn(1, num_patches + 1, dim))
        self.cls_token = nn.Parameter(torch.randn(1, 1, dim))
        self.transformer = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(d_model=dim, nhead=heads, dim_feedforward=mlp_dim), num_layers=depth
        )
        self.mlp_head = nn.Sequential(
            nn.LayerNorm(dim),
            nn.Linear(dim, num_classes)
        )

    def forward(self, x):
        b, c, h, w = x.shape
        x = x.view(b, c, h // 7, 7, w // 7, 7).permute(0, 2, 4, 1, 3, 5).reshape(b, 16, -1)
        x = self.patch_embedding(x)
        cls_tokens = self.cls_token.expand(b, -1, -1)
        x = torch.cat((cls_tokens, x), dim=1)
        x = x + self.pos_embedding
        x = self.transformer(x)
        x = self.mlp_head(x[:, 0])
        return x

model_vit = ViT().to(device)

In [None]:
optimizer_vit = optim.Adam(model_vit.parameters(), lr=0.001)

start_time = time.time()
for epoch in range(num_epochs):
    train_loss, train_acc = train_model(model_vit, train_loader, criterion, optimizer_vit, device)
    print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")
vit_training_time = time.time() - start_time

test_loss_vit, test_acc_vit, test_f1_vit = evaluate_model(model_vit, test_loader, criterion, device)
print(f"ViT - Test Loss: {test_loss_vit:.4f}, Test Acc: {test_acc_vit:.2f}%, Test F1: {test_f1_vit:.2f}%, Training Time: {vit_training_time:.2f}s")

In [None]:
print("Part 1 vs Part 2 Comparison:")
print(f"CNN: Loss={test_loss_cnn:.4f}, Acc={test_acc_cnn:.2f}%, F1={test_f1_cnn:.2f}%, Time={cnn_training_time:.2f}s")
print(f"ResNet50: Loss={test_loss_resnet:.4f}, Acc={test_acc_resnet:.2f}%, F1={test_f1_resnet:.2f}%, Time={resnet_training_time:.2f}s")
print(f"VGG16: Loss={test_loss_vgg:.4f}, Acc={test_acc_vgg:.2f}%, F1={test_f1_vgg:.2f}%, Time={vgg_training_time:.2f}s")
print(f"AlexNet: Loss={test_loss_alex:.4f}, Acc={test_acc_alex:.2f}%, F1={test_f1_alex:.2f}%, Time={alex_training_time:.2f}s")
print(f"ViT: Loss={test_loss_vit:.4f}, Acc={test_acc_vit:.2f}%, F1={test_f1_vit:.2f}%, Time={vit_training_time:.2f}s")