# 1a

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split, ConcatDataset
from torchvision.models import resnet18, resnet50
import argparse
import os
import time

parser = argparse.ArgumentParser()
parser.add_argument('--dataset', type=str, required=True, choices=['MNIST', 'FashionMNIST'])
parser.add_argument('--model', type=str, required=True, choices=['resnet18', 'resnet50'])
parser.add_argument('--batch_size', type=int, required=True)
parser.add_argument('--optimizer', type=str, required=True, choices=['SGD', 'Adam'])
parser.add_argument('--lr', type=float, required=True)
parser.add_argument('--epochs', type=int, default=4)
parser.add_argument('--pin_memory', type=str, default='True', choices=['True', 'False'])
args = parser.parse_args()

use_pin_mem = True if args.pin_memory == 'True' else False
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=3),
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

data_path = './data'

if args.dataset == 'MNIST':
    d1 = torchvision.datasets.MNIST(root=data_path, train=True, download=True, transform=transform)
    d2 = torchvision.datasets.MNIST(root=data_path, train=False, download=True, transform=transform)
else:
    d1 = torchvision.datasets.FashionMNIST(root=data_path, train=True, download=True, transform=transform)
    d2 = torchvision.datasets.FashionMNIST(root=data_path, train=False, download=True, transform=transform)

full_dataset = ConcatDataset([d1, d2])
total_size = len(full_dataset)
train_size = int(0.7 * total_size)
val_size = int(0.1 * total_size)
test_size = total_size - train_size - val_size

train_set, val_set, test_set = random_split(full_dataset, [train_size, val_size, test_size])

train_loader = DataLoader(train_set, batch_size=args.batch_size, shuffle=True, pin_memory=use_pin_mem, num_workers=2)
val_loader = DataLoader(val_set, batch_size=args.batch_size, shuffle=False, pin_memory=use_pin_mem, num_workers=2)
test_loader = DataLoader(test_set, batch_size=args.batch_size, shuffle=False, pin_memory=use_pin_mem, num_workers=2)

if args.model == 'resnet18':
    model = resnet18(pretrained=False, num_classes=10)
else:
    model = resnet50(pretrained=False, num_classes=10)

model = model.to(device)
criterion = nn.CrossEntropyLoss()

if args.optimizer == 'SGD':
    optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9)
else:
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

scaler = torch.cuda.amp.GradScaler()
print(f"Running: {args.dataset} {args.model} BS={args.batch_size} Opt={args.optimizer} LR={args.lr} Ep={args.epochs}")

start_time = time.time()
output_dir = "results_final"
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

summary_file = os.path.join(output_dir, "q1_final_report.csv")
if not os.path.isfile(summary_file):
    with open(summary_file, "w") as f:
        f.write("Dataset,Model,BatchSize,Optimizer,LR,Epochs,PinMem,TestAccuracy,TotalTime\n")

curve_file = os.path.join(output_dir, f"curve_{args.dataset}_{args.model}_{args.optimizer}_{args.lr}.csv")
with open(curve_file, "w") as f:
    f.write("Dataset,Model,BatchSize,Optimizer,LR,Epochs,Epoch,TrainLoss,ValAcc\n")

for epoch in range(args.epochs):
    ep_start = time.time()
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        with torch.cuda.amp.autocast():
            outputs = model(inputs)
            loss = criterion(outputs, labels)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        running_loss += loss.item()

    avg_loss = running_loss / len(train_loader)

    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_acc = 100 * correct / total
    epoch_time = time.time() - ep_start
    total_time = time.time() - start_time

    with open(curve_file, "a") as f:
        f.write(f"{args.dataset},{args.model},{args.batch_size},{args.optimizer},{args.lr},{args.epochs},{epoch+1},{avg_loss:.4f},{val_acc:.2f}\n")

    print(f"Ep {epoch+1}/{args.epochs} | Loss: {avg_loss:.4f} | ValAcc: {val_acc:.2f}% | Time: {epoch_time:.2f}s")

model.eval()
correct_test = 0
total_test = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total_test += labels.size(0)
        correct_test += (predicted == labels).sum().item()

test_acc = 100 * correct_test / total_test
print(f"TEST ACCURACY: {test_acc:.2f}%")

with open(summary_file, "a") as f:
    f.write(f"{args.dataset},{args.model},{args.batch_size},{args.optimizer},{args.lr},{args.epochs},{args.pin_memory},{test_acc:.2f},{total_time:.2f}\n")

tracker_path = os.path.join(output_dir, f"best_acc_tracker_{args.dataset}.txt")
current_best = 0.0

if os.path.exists(tracker_path):
    with open(tracker_path, "r") as f:
        try:
            current_best = float(f.read().strip())
        except:
            pass

if test_acc > current_best:
    with open(tracker_path, "w") as f:
        f.write(str(test_acc))
    save_path = os.path.join(output_dir, f"best_model_{args.dataset}.pth")
    torch.save(model.state_dict(), save_path)
    print(f"New best model saved for {args.dataset}: {test_acc:.2f}%")

    all_preds = []
    all_labels = []
    
    model.eval()
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.numpy())

    cm = confusion_matrix(all_labels, all_preds)
    
    cm_file = os.path.join(output_dir, f"confusion_matrix_{args.dataset}_best.csv")
    
    if args.dataset == 'MNIST':
        classes = [str(i) for i in range(10)]
    else:
        classes = ['T-shirt', 'Trouser', 'Pullover', 'Dress', 'Coat', 
                   'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
                   
    df_cm = pd.DataFrame(cm, index=classes, columns=classes)
    df_cm.to_csv(cm_file)
    print(f"Confusion Matrix saved to: {cm_file}")


### Q1(a) Execution Logs

```text
Running: MNIST resnet18 BS=16 Opt=SGD LR=0.001 Ep=4
Ep 1/4 | Loss: 0.1480 | ValAcc: 98.69% | Time: 49.80s
Ep 2/4 | Loss: 0.0396 | ValAcc: 98.81% | Time: 41.58s
Ep 3/4 | Loss: 0.0225 | ValAcc: 98.96% | Time: 41.66s
Ep 4/4 | Loss: 0.0139 | ValAcc: 99.09% | Time: 41.50s
TEST ACCURACY: 99.24%
New best model saved for MNIST: 99.24%

Running: MNIST resnet18 BS=16 Opt=SGD LR=0.0001 Ep=4
Ep 1/4 | Loss: 0.4686 | ValAcc: 96.14% | Time: 43.26s
Ep 2/4 | Loss: 0.1198 | ValAcc: 97.79% | Time: 32.34s
Ep 3/4 | Loss: 0.0780 | ValAcc: 98.26% | Time: 44.06s
Ep 4/4 | Loss: 0.0573 | ValAcc: 98.44% | Time: 38.00s
TEST ACCURACY: 98.44%

Running: FashionMNIST resnet50 BS=32 Opt=Adam LR=0.001 Ep=10
Ep 1/10 | Loss: 0.5572 | ValAcc: 86.70% | Time: 79.35s
Ep 2/10 | Loss: 0.3904 | ValAcc: 88.96% | Time: 77.39s
Ep 3/10 | Loss: 0.2942 | ValAcc: 88.61% | Time: 78.63s
Ep 4/10 | Loss: 0.2818 | ValAcc: 90.13% | Time: 78.68s
Ep 5/10 | Loss: 0.2529 | ValAcc: 90.11% | Time: 78.65s
Ep 6/10 | Loss: 0.4414 | ValAcc: 84.89% | Time: 78.00s
Ep 7/10 | Loss: 0.4037 | ValAcc: 85.56% | Time: 72.00s
Ep 8/10 | Loss: 0.3766 | ValAcc: 86.37% | Time: 71.44s
Ep 9/10 | Loss: 0.3540 | ValAcc: 85.80% | Time: 70.90s
Ep 10/10 | Loss: 0.3320 | ValAcc: 87.16% | Time: 70.88s
TEST ACCURACY: 87.34%
```

# 1b

In [None]:
import time
import argparse
import numpy as np
from sklearn import svm
from sklearn.metrics import accuracy_score
import torchvision
import torchvision.transforms as transforms
import joblib
import os

parser = argparse.ArgumentParser()
parser.add_argument('--dataset', type=str, required=True, choices=['MNIST', 'FashionMNIST'])
parser.add_argument('--kernel', type=str, required=True, choices=['poly', 'rbf'])
args = parser.parse_args()

print(f"Processing {args.dataset} with {args.kernel} kernel...")

transform = transforms.Compose([transforms.ToTensor()])
data_path = './data'

if args.dataset == 'MNIST':
    train_set = torchvision.datasets.MNIST(root=data_path, train=True, download=True, transform=transform)
    test_set = torchvision.datasets.MNIST(root=data_path, train=False, download=True, transform=transform)
else:
    train_set = torchvision.datasets.FashionMNIST(root=data_path, train=True, download=True, transform=transform)
    test_set = torchvision.datasets.FashionMNIST(root=data_path, train=False, download=True, transform=transform)

X_train = train_set.data.numpy().reshape(-1, 28*28) / 255.0
y_train = train_set.targets.numpy()

X_test = test_set.data.numpy().reshape(-1, 28*28) / 255.0
y_test = test_set.targets.numpy()

clf = svm.SVC(kernel=args.kernel)

t0 = time.time()
clf.fit(X_train, y_train)
train_time = (time.time() - t0) * 1000

y_pred = clf.predict(X_test)
acc = accuracy_score(y_test, y_pred) * 100

print(f"Done. Acc: {acc:.2f}%, Time: {train_time:.2f}ms")

if not os.path.exists("results_revised"):
    os.makedirs("results_revised")

csv_path = "results_revised/svm_results.csv"
if not os.path.isfile(csv_path):
    with open(csv_path, "w") as f:
        f.write("Dataset,Kernel,Accuracy,Time_ms\n")

with open(csv_path, "a") as f:
    f.write(f"{args.dataset},{args.kernel},{acc:.2f},{train_time:.2f}\n")

best_tracker = f"results_revised/best_svm_{args.dataset}.txt"
current_best = 0.0
if os.path.exists(best_tracker):
    with open(best_tracker, "r") as f:
        try:
            current_best = float(f.read().strip())
        except:
            pass

if acc > current_best:
    with open(best_tracker, "w") as f:
        f.write(str(acc))
    joblib.dump(clf, f"results_revised/best_svm_{args.dataset}.pkl")
    print(f"New Best SVM: {acc:.2f}%")


### Q1(b) Execution Logs

```text
Processing MNIST with poly kernel...
Done. Acc: 97.71%, Time: 169357.63ms
New Best SVM: 97.71%

Processing MNIST with rbf kernel...
Done. Acc: 97.92%, Time: 162137.42ms
New Best SVM: 97.92%

Processing FashionMNIST with poly kernel...
Done. Acc: 86.30%, Time: 279310.38ms

Processing FashionMNIST with rbf kernel...
Done. Acc: 88.28%, Time: 222573.50ms
```

# 2

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchvision.models import resnet18, resnet50
from torch.utils.data import DataLoader
import time
import os
import sys

def count_flops(model, input_size=(1, 3, 64, 64), device='cpu'):
    flops = 0
    def hook(mod, inp, out):
        nonlocal flops
        if isinstance(mod, nn.Conv2d):
            in_c = mod.in_channels
            out_c = mod.out_channels
            k_h, k_w = mod.kernel_size
            b, c, h, w = out.shape
            ops = 2 * in_c * k_h * k_w * out_c * h * w
            if mod.bias is not None:
                ops += out_c * h * w
            flops += (ops // b)
        elif isinstance(mod, nn.Linear):
            in_f = mod.in_features
            out_f = mod.out_features
            ops = 2 * in_f * out_f
            if mod.bias is not None:
                ops += out_f
            flops += ops

    hooks = []
    for m in model.modules():
        if isinstance(m, (nn.Conv2d, nn.Linear)):
            hooks.append(m.register_forward_hook(hook))

    dummy = torch.randn(input_size).to(device)
    model.to(device)
    model.eval()
    with torch.no_grad():
        model(dummy)

    for h in hooks:
        h.remove()
    return flops / 1e9

def train_model(device_str, model_name, optimizer_name, epochs=10):
    print(f"Running: {device_str} | {model_name} | {optimizer_name}")

    device = torch.device(device_str)

    transform = transforms.Compose([
        transforms.Grayscale(num_output_channels=3),
        transforms.Resize((64, 64)),
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])

    data_path = './data'
    train_set = torchvision.datasets.FashionMNIST(root=data_path, train=True, download=True, transform=transform)
    test_set = torchvision.datasets.FashionMNIST(root=data_path, train=False, download=True, transform=transform)

    train_loader = DataLoader(train_set, batch_size=64, shuffle=True, pin_memory=True, num_workers=2)
    test_loader = DataLoader(test_set, batch_size=64, shuffle=False, pin_memory=True, num_workers=2)

    if model_name == 'resnet18':
        model = resnet18(pretrained=False, num_classes=10)
    else:
        model = resnet50(pretrained=False, num_classes=10)

    model = model.to(device)
    gflops = count_flops(model, device=device)

    criterion = nn.CrossEntropyLoss()
    if optimizer_name == 'SGD':
        optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
    else:
        optimizer = optim.Adam(model.parameters(), lr=0.001)

    start_train = time.time()

    output_dir = "results_q2"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    curve_file = os.path.join(output_dir, f"curve_q2_{device_str}_{model_name}_{optimizer_name}.csv")
    with open(curve_file, "w") as f:
        f.write("Device,Model,Optimizer,Epoch,TrainLoss,TestAcc\n")

    for epoch in range(epochs):
        ep_start = time.time()
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        avg_loss = running_loss / len(train_loader)

        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in test_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        test_acc = 100 * correct / total
        ep_time = time.time() - ep_start

        print(f"  Ep {epoch+1}/{epochs} | Time: {ep_time:.2f}s | Loss: {avg_loss:.4f} | Acc: {test_acc:.2f}%")
        with open(curve_file, "a") as f:
            f.write(f"{device_str},{model_name},{optimizer_name},{epoch+1},{avg_loss:.4f},{test_acc:.2f}\n")

    total_train_time = time.time() - start_train

    summary_file = os.path.join(output_dir, "q2_final_report.csv")
    if not os.path.isfile(summary_file):
        with open(summary_file, "w") as f:
            f.write("Device,Model,Optimizer,TotalTimeSec,FinalAcc,GFLOPs\n")

    with open(summary_file, "a") as f:
        f.write(f"{device_str},{model_name},{optimizer_name},{total_train_time:.2f},{test_acc:.2f},{gflops:.4f}\n")

    tracker_file = os.path.join(output_dir, f"best_acc_q2_{device_str}.txt")
    current_best = 0.0
    if os.path.exists(tracker_file):
        try:
            with open(tracker_file, "r") as f:
                current_best = float(f.read().strip())
        except:
            pass

    if test_acc > current_best:
        with open(tracker_file, "w") as f:
            f.write(str(test_acc))
        save_name = os.path.join(output_dir, f"best_model_q2_{device_str}.pth")
        torch.save(model.state_dict(), save_name)
        print(f"  New Best Model Saved for {device_str}!")

if __name__ == "__main__":
    configs = [
        ('cpu', 'resnet18', 'SGD'),
        ('cpu', 'resnet18', 'Adam'),
        ('cpu', 'resnet50', 'SGD'),
        ('cpu', 'resnet50', 'Adam'),
        ('cuda', 'resnet18', 'SGD'),
        ('cuda', 'resnet18', 'Adam'),
        ('cuda', 'resnet50', 'SGD'),
        ('cuda', 'resnet50', 'Adam')
    ]

    print("Starting Q2 Benchmark...")
    for dev, mod, opt in configs:
        if dev == 'cuda' and not torch.cuda.is_available():
            print("Skipping CUDA (not available)")
            continue
        train_model(dev, mod, opt, epochs=10)


### Q2 Execution Logs

```text
Starting Q2 Benchmark...
Running: CPU | resnet18 | SGD
  Ep 1/10 | Time: 274.14s | Loss: 0.4254 | Acc: 87.85%
  Ep 2/10 | Time: 272.25s | Loss: 0.2694 | Acc: 90.27%
  Ep 3/10 | Time: 272.06s | Loss: 0.2166 | Acc: 90.99%
  ...
  Ep 10/10 | Time: 272.42s | Loss: 0.0685 | Acc: 91.61%

Running: CUDA | resnet18 | Adam
  Ep 1/10 | Time: 78.05s | Loss: 0.4012 | Acc: 89.12%
  ...
  Ep 10/10 | Time: 77.69s | Loss: 0.0685 | Acc: 92.83%
  New Best Model Saved for cuda!

Running: CUDA | resnet50 | SGD
  Ep 1/10 | Time: 135.04s | Loss: 0.6541 | Acc: 86.15%
  ...
  Ep 10/10 | Time: 126.51s | Loss: 0.1296 | Acc: 91.17%
```

# 2_resnet34

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchvision.models import resnet34
from torch.utils.data import DataLoader
import time
import os
import sys

def count_flops(model, input_size=(1, 3, 64, 64), device='cpu'):
    flops = 0
    def hook(mod, inp, out):
        nonlocal flops
        if isinstance(mod, nn.Conv2d):
            in_c = mod.in_channels
            out_c = mod.out_channels
            k_h, k_w = mod.kernel_size
            b, c, h, w = out.shape
            ops = 2 * in_c * k_h * k_w * out_c * h * w
            if mod.bias is not None:
                ops += out_c * h * w
            flops += (ops // b)
        elif isinstance(mod, nn.Linear):
            in_f = mod.in_features
            out_f = mod.out_features
            ops = 2 * in_f * out_f
            if mod.bias is not None:
                ops += out_f
            flops += ops

    hooks = []
    for m in model.modules():
        if isinstance(m, (nn.Conv2d, nn.Linear)):
            hooks.append(m.register_forward_hook(hook))

    dummy = torch.randn(input_size).to(device)
    model.to(device)
    model.eval()
    with torch.no_grad():
        model(dummy)

    for h in hooks:
        h.remove()
    return flops / 1e9

def train_model(device_str, model_name, optimizer_name, epochs=10):
    print(f"Running: {device_str} | {model_name} | {optimizer_name}")

    device = torch.device(device_str)

    transform = transforms.Compose([
        transforms.Grayscale(num_output_channels=3),
        transforms.Resize((64, 64)),
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])

    data_path = './data'
    train_set = torchvision.datasets.FashionMNIST(root=data_path, train=True, download=True, transform=transform)
    test_set = torchvision.datasets.FashionMNIST(root=data_path, train=False, download=True, transform=transform)

    train_loader = DataLoader(train_set, batch_size=64, shuffle=True, pin_memory=True, num_workers=2)
    test_loader = DataLoader(test_set, batch_size=64, shuffle=False, pin_memory=True, num_workers=2)

    if model_name == 'resnet34':
        model = resnet34(pretrained=False, num_classes=10)

    model = model.to(device)
    gflops = count_flops(model, device=device)

    criterion = nn.CrossEntropyLoss()
    if optimizer_name == 'SGD':
        optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
    else:
        optimizer = optim.Adam(model.parameters(), lr=0.001)

    start_train = time.time()

    output_dir = "results_q2"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    curve_file = os.path.join(output_dir, f"curve_q2_{device_str}_{model_name}_{optimizer_name}.csv")
    with open(curve_file, "w") as f:
        f.write("Device,Model,Optimizer,Epoch,TrainLoss,TestAcc\n")

    for epoch in range(epochs):
        ep_start = time.time()
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        avg_loss = running_loss / len(train_loader)

        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in test_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        test_acc = 100 * correct / total
        ep_time = time.time() - ep_start

        print(f"  Ep {epoch+1}/{epochs} | Time: {ep_time:.2f}s | Loss: {avg_loss:.4f} | Acc: {test_acc:.2f}%")
        with open(curve_file, "a") as f:
            f.write(f"{device_str},{model_name},{optimizer_name},{epoch+1},{avg_loss:.4f},{test_acc:.2f}\n")

    total_train_time = time.time() - start_train

    summary_file = os.path.join(output_dir, "q2_final_report.csv")
    if not os.path.isfile(summary_file):
        with open(summary_file, "w") as f:
            f.write("Device,Model,Optimizer,TotalTimeSec,FinalAcc,GFLOPs\n")

    with open(summary_file, "a") as f:
        f.write(f"{device_str},{model_name},{optimizer_name},{total_train_time:.2f},{test_acc:.2f},{gflops:.4f}\n")

    tracker_file = os.path.join(output_dir, f"best_acc_q2_{device_str}_res34.txt")
    current_best = 0.0
    if os.path.exists(tracker_file):
        try:
            with open(tracker_file, "r") as f:
                current_best = float(f.read().strip())
        except:
            pass

    if test_acc > current_best:
        with open(tracker_file, "w") as f:
            f.write(str(test_acc))
        save_name = os.path.join(output_dir, f"best_model_q2_{device_str}_res34.pth")
        torch.save(model.state_dict(), save_name)
        print(f"  New Best Model Saved for {device_str}!")

if __name__ == "__main__":
    configs = [
        ('cpu', 'resnet34', 'SGD'),
        ('cpu', 'resnet34', 'Adam'),
        ('cuda', 'resnet34', 'SGD'),
        ('cuda', 'resnet34', 'Adam')
    ]

    print("Starting Q2 Benchmark (ResNet34 Only)...")
    for dev, mod, opt in configs:
        if dev == 'cuda' and not torch.cuda.is_available():
            print("Skipping CUDA (not available)")
            continue
        train_model(dev, mod, opt, epochs=10)


### Q2 (ResNet-34) Execution Logs

```text
Starting Q2 Benchmark (ResNet34 Only)...
Running: CPU | resnet34 | SGD
  Ep 1/10 | Time: 469.34s | Loss: 0.4620 | Acc: 88.63%
  Ep 2/10 | Time: 467.68s | Loss: 0.2841 | Acc: 90.20%
  ...
  Ep 10/10 | Time: 557.22s | Loss: 0.0786 | Acc: 92.51%
  New Best Model Saved for cpu!

Running: CUDA | resnet34 | SGD
  Ep 1/10 | Time: 111.50s | Loss: 0.4796 | Acc: 88.23%
  Ep 2/10 | Time: 109.30s | Loss: 0.2904 | Acc: 89.56%
  ...
  Ep 5/10 | Time: 110.30s | Loss: 0.1698 | Acc: 90.7%
```