In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchvision.models import resnet50
import time
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"

transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(0.2, 0.2, 0.2),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465),
                         (0.2023, 0.1994, 0.2010)),
    transforms.RandomErasing(p=0.5)
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465),
                         (0.2023, 0.1994, 0.2010)),
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(
    trainset,
    batch_size=256,
    shuffle=True,
    num_workers=2,
    pin_memory=True,
    persistent_workers=True
)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(
    testset,
    batch_size=100,
    shuffle=False,
    num_workers=2,
    pin_memory=True,
    persistent_workers=True
)

100%|██████████| 170M/170M [00:03<00:00, 42.7MB/s]


In [3]:
model = resnet50(num_classes=10).to(device)
lr = 0.1
epochs = 300

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4)

scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs)
start_time = time.time()

best_acc = 0
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for inputs, targets in trainloader:
        inputs, targets = inputs.to(device), targets.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    scheduler.step()
    current_lr = optimizer.param_groups[0]['lr']

    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, targets in testloader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

    acc = 100. * correct / total
    print(f'Epoch {epoch+1} | Current lr: {current_lr:.4f} | Loss: {running_loss/len(trainloader):.4f} | Acc: {acc:.2f}%')

    if acc > best_acc:
        torch.save(model.state_dict(), 'saved_models/teacher_resnet50.pt')
        torch.save(model.state_dict(), "/content/drive/MyDrive/resnet50_cifar10.pt")
        best_acc = acc

total_time = time.time() - start_time
print(f"\nTotal training time: {total_time:.2f} seconds")

Epoch 1 | Current lr: 0.1000 | Loss: 5.8009 | Acc: 9.55%
Epoch 2 | Current lr: 0.1000 | Loss: 2.5221 | Acc: 16.23%
Epoch 3 | Current lr: 0.1000 | Loss: 2.1594 | Acc: 27.92%
Epoch 4 | Current lr: 0.1000 | Loss: 1.9951 | Acc: 28.43%
Epoch 5 | Current lr: 0.0999 | Loss: 1.8886 | Acc: 35.98%
Epoch 6 | Current lr: 0.0999 | Loss: 1.8062 | Acc: 38.75%
Epoch 7 | Current lr: 0.0999 | Loss: 1.7439 | Acc: 41.38%
Epoch 8 | Current lr: 0.0998 | Loss: 1.6798 | Acc: 42.69%
Epoch 9 | Current lr: 0.0998 | Loss: 1.6346 | Acc: 45.06%
Epoch 10 | Current lr: 0.0997 | Loss: 1.5792 | Acc: 46.46%
Epoch 11 | Current lr: 0.0997 | Loss: 1.5290 | Acc: 49.60%
Epoch 12 | Current lr: 0.0996 | Loss: 1.4830 | Acc: 52.59%
Epoch 13 | Current lr: 0.0995 | Loss: 1.4374 | Acc: 54.96%
Epoch 14 | Current lr: 0.0995 | Loss: 1.4045 | Acc: 55.51%
Epoch 15 | Current lr: 0.0994 | Loss: 1.3641 | Acc: 58.66%
Epoch 16 | Current lr: 0.0993 | Loss: 1.3324 | Acc: 58.77%
Epoch 17 | Current lr: 0.0992 | Loss: 1.2961 | Acc: 60.28%
Epoch 1