<a href="https://colab.research.google.com/github/zachary013/lab2-deep-learning/blob/main/lab2-deep-learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score, f1_score
import time

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
train_dataset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)

In [None]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
        self.fc1 = nn.Linear(32 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, 10)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = x.view(-1, 32 * 7 * 7)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
model_cnn = CNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_cnn.parameters(), lr=0.001)
start = time.time()
for epoch in range(3):
    model_cnn.train()
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model_cnn(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
cnn_time = time.time() - start
model_cnn.eval()
cnn_preds, cnn_true, cnn_loss = [], [], 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model_cnn(images)
        cnn_loss += criterion(outputs, labels).item()
        _, preds = torch.max(outputs, 1)
        cnn_preds.extend(preds.cpu().numpy())
        cnn_true.extend(labels.cpu().numpy())
cnn_loss /= len(test_loader)
cnn_acc = accuracy_score(cnn_true, cnn_preds)
cnn_f1 = f1_score(cnn_true, cnn_preds, average='macro')

In [None]:
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

train_loader_frcnn = DataLoader(train_dataset, batch_size=2, shuffle=True)
test_loader_frcnn = DataLoader(test_dataset, batch_size=2, shuffle=False)
model_frcnn = fasterrcnn_resnet50_fpn(weights="DEFAULT")
in_features = model_frcnn.roi_heads.box_predictor.cls_score.in_features
model_frcnn.roi_heads.box_predictor = FastRCNNPredictor(in_features, 10)
model_frcnn = model_frcnn.to(device)
optimizer = optim.Adam(model_frcnn.parameters(), lr=0.001)
start = time.time()
for epoch in range(2):
    model_frcnn.train()
    for images, labels in train_loader_frcnn:
        images = [img.to(device) for img in images]
        targets = [{'boxes': torch.tensor([[0, 0, 28, 28]], dtype=torch.float32).to(device),
                    'labels': torch.tensor([label], dtype=torch.int64).to(device)} for label in labels]
        optimizer.zero_grad()
        loss_dict = model_frcnn(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        losses.backward()
        optimizer.step()
frcnn_time = time.time() - start
model_frcnn.eval()
frcnn_preds, frcnn_true, frcnn_loss = [], [], 0
with torch.no_grad():
    for images, labels in test_loader_frcnn:
        images = [img.to(device) for img in images]
        targets = [{'boxes': torch.tensor([[0, 0, 28, 28]], dtype=torch.float32).to(device),
                    'labels': torch.tensor([label], dtype=torch.int64).to(device)} for label in labels]
        model_frcnn.train()
        loss_dict = model_frcnn(images, targets)
        frcnn_loss += sum(loss for loss in loss_dict.values()).item()
        model_frcnn.eval()
        outputs = model_frcnn(images)
        for i, out in enumerate(outputs):
            pred = out['labels'][0].cpu().item()
            frcnn_preds.append(pred)
            frcnn_true.append(labels[i].item())
frcnn_loss /= len(test_loader_frcnn)
frcnn_acc = accuracy_score(frcnn_true, frcnn_preds)
frcnn_f1 = f1_score(frcnn_true, frcnn_preds, average='macro')

Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth
100%|██████████| 160M/160M [00:03<00:00, 53.3MB/s]


In [None]:
print(f"CNN: Acc={cnn_acc:.4f}, F1={cnn_f1:.4f}, Loss={cnn_loss:.4f}, Time={cnn_time:.2f}s")
print(f"Faster R-CNN: Acc={frcnn_acc:.4f}, F1={frcnn_f1:.4f}, Loss={frcnn_loss:.4f}, Time={frcnn_time:.2f}s")

In [None]:
from torchvision.models import vgg16, alexnet

transform_resized = transforms.Compose([transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
train_dataset_resized = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform_resized)
test_dataset_resized = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform_resized)
train_loader_resized = DataLoader(train_dataset_resized, batch_size=16, shuffle=True)
test_loader_resized = DataLoader(test_dataset_resized, batch_size=16, shuffle=False)

model_vgg = vgg16(weights="DEFAULT")
model_vgg.features[0] = nn.Conv2d(1, 64, 3, padding=1)
model_vgg.classifier[6] = nn.Linear(4096, 10)
model_vgg = model_vgg.to(device)
optimizer_vgg = optim.Adam(model_vgg.parameters(), lr=0.001)
start = time.time()
for epoch in range(2):
    model_vgg.train()
    for images, labels in train_loader_resized:
        images, labels = images.to(device), labels.to(device)
        optimizer_vgg.zero_grad()
        outputs = model_vgg(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer_vgg.step()
vgg_time = time.time() - start
model_vgg.eval()
vgg_preds, vgg_true, vgg_loss = [], [], 0
with torch.no_grad():
    for images, labels in test_loader_resized:
        images, labels = images.to(device), labels.to(device)
        outputs = model_vgg(images)
        vgg_loss += criterion(outputs, labels).item()
        _, preds = torch.max(outputs, 1)
        vgg_preds.extend(preds.cpu().numpy())
        vgg_true.extend(labels.cpu().numpy())
vgg_loss /= len(test_loader_resized)
vgg_acc = accuracy_score(vgg_true, vgg_preds)
vgg_f1 = f1_score(vgg_true, vgg_preds, average='macro')

model_alex = alexnet(weights="DEFAULT")
model_alex.features[0] = nn.Conv2d(1, 64, 11, stride=4, padding=2)
model_alex.classifier[6] = nn.Linear(4096, 10)
model_alex = model_alex.to(device)
optimizer_alex = optim.Adam(model_alex.parameters(), lr=0.001)
start = time.time()
for epoch in range(2):
    model_alex.train()
    for images, labels in train_loader_resized:
        images, labels = images.to(device), labels.to(device)
        optimizer_alex.zero_grad()
        outputs = model_alex(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer_alex.step()
alex_time = time.time() - start
model_alex.eval()
alex_preds, alex_true, alex_loss = [], [], 0
with torch.no_grad():
    for images, labels in test_loader_resized:
        images, labels = images.to(device), labels.to(device)
        outputs = model_alex(images)
        alex_loss += criterion(outputs, labels).item()
        _, preds = torch.max(outputs, 1)
        alex_preds.extend(preds.cpu().numpy())
        alex_true.extend(labels.cpu().numpy())
alex_loss /= len(test_loader_resized)
alex_acc = accuracy_score(alex_true, alex_preds)
alex_f1 = f1_score(alex_true, alex_preds, average='macro')

print(f"VGG16: Acc={vgg_acc:.4f}, F1={vgg_f1:.4f}, Loss={vgg_loss:.4f}, Time={vgg_time:.2f}s")
print(f"AlexNet: Acc={alex_acc:.4f}, F1={alex_f1:.4f}, Loss={alex_loss:.4f}, Time={alex_time:.2f}s")
print("Conclusion: VGG16 and AlexNet beat CNN with pretrained weights; Faster R-CNN is slower, less suited for MNIST.")

In [None]:
class ViT(nn.Module):
    def __init__(self, img_size=28, patch_size=7, num_classes=10, dim=64, depth=6, heads=4, mlp_dim=128):
        super().__init__()
        num_patches = (img_size // patch_size) ** 2
        patch_dim = 1 * patch_size * patch_size
        self.patch_embed = nn.Linear(patch_dim, dim)
        self.pos_embed = nn.Parameter(torch.randn(1, num_patches + 1, dim))
        self.cls_token = nn.Parameter(torch.randn(1, 1, dim))
        self.transformer = nn.TransformerEncoder(nn.TransformerEncoderLayer(d_model=dim, nhead=heads, dim_feedforward=mlp_dim), num_layers=depth)
        self.mlp_head = nn.Sequential(nn.LayerNorm(dim), nn.Linear(dim, num_classes))

    def forward(self, x):
        b = x.shape[0]
        x = x.view(b, -1, 28 * 28 // 16).transpose(1, 2)
        x = self.patch_embed(x)
        cls_tokens = self.cls_token.expand(b, -1, -1)
        x = torch.cat((cls_tokens, x), dim=1)
        x = x + self.pos_embed
        x = self.transformer(x)
        x = x[:, 0]
        return self.mlp_head(x)

model_vit = ViT().to(device)
optimizer_vit = optim.Adam(model_vit.parameters(), lr=0.001)
start = time.time()
for epoch in range(3):
    model_vit.train()
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer_vit.zero_grad()
        outputs = model_vit(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer_vit.step()
vit_time = time.time() - start
model_vit.eval()
vit_preds, vit_true, vit_loss = [], [], 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model_vit(images)
        vit_loss += criterion(outputs, labels).item()
        _, preds = torch.max(outputs, 1)
        vit_preds.extend(preds.cpu().numpy())
        vit_true.extend(labels.cpu().numpy())
vit_loss /= len(test_loader)
vit_acc = accuracy_score(vit_true, vit_preds)
vit_f1 = f1_score(vit_true, vit_preds, average='macro')

In [None]:
print(f"ViT: Acc={vit_acc:.4f}, F1={vit_f1:.4f}, Loss={vit_loss:.4f}, Time={vit_time:.2f}s")
print("ViT is slower than CNN but close in accuracy; VGG16/AlexNet outperform due to transfer learning.")