In [1]:
import torch
import torchvision
from torchvision import transforms
import torch.nn as nn
import torch.optim as optim
from tqdm.auto import tqdm
from models import vgg

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import random
import numpy as np
def manual_seed(seed):
    np.random.seed(seed) #1
    random.seed(seed) #2
    torch.manual_seed(seed) #3
    torch.cuda.manual_seed(seed) #4.1
    torch.cuda.manual_seed_all(seed) #4.2
    torch.backends.cudnn.benchmark = False #5 
    torch.backends.cudnn.deterministic = True #6

manual_seed(42)

In [3]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

model = torchvision.models.vgg.vgg16(pretrained=True)
model.features[0] = nn.Conv2d(4,64,3,1,1)
model.classifier[6] = nn.Linear(in_features=4096, out_features=100, bias=True)
# freeze convolution weights
# for i,param in enumerate(model.features.parameters()):
#     if i !=0:
#         param.requires_grad = False
      
for name, param  in model.classifier[6].named_parameters():
    print(name, param.shape)
print(model)



cuda:0
weight torch.Size([100, 4096])
bias torch.Size([100])
VGG(
  (features): Sequential(
    (0): Conv2d(4, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (

In [4]:
EPOCHS = 50
BATCH = 32
VAL_BATCH=256
LR = 1e-5
MOMENTUM = 0.9
DECAY=5e-4

In [5]:
transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ToTensor(),
    # transforms.Normalize(mean=(0.5071, 0.4867, 0.4408), std= (0.2675, 0.2565, 0.2761)),
])

test_transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    # transforms.Normalize(mean=(0.5071, 0.4867, 0.4408), std= (0.2675, 0.2565, 0.2761)),
])

train_data = torchvision.datasets.CIFAR100(root="./dataset", train=True, transform=transform)
train_loader = torch.utils.data.DataLoader(train_data, batch_size=BATCH,
                                          shuffle=True,pin_memory=True,num_workers=4)
val_data = torchvision.datasets.CIFAR100(root="./dataset", train=False, transform=test_transform)
val_loader = torch.utils.data.DataLoader(val_data, batch_size=VAL_BATCH,
                                          shuffle=False,pin_memory=True,num_workers=4)

print(next(iter(val_loader))[0].shape)

torch.Size([256, 3, 224, 224])


In [6]:
# optimizerdmf aksemfusmsep
optimizer = optim.Adam(model.classifier.parameters(), lr=LR)
# optimizer = optim.SGD(model.classifier.parameters(), lr=LR, momentum=MOMENTUM, weight_decay=DECAY)
# loss function
criterion = nn.CrossEntropyLoss()

In [7]:
from torch import autocast
from torch.cuda.amp import GradScaler
model.to(device)
zero_tensor = torch.zeros((BATCH,1,224,224))
def validate(model, test_loader):
    model.eval()
    val_loss = 0.0
    val_acc = 0
    with torch.no_grad():
        for data in tqdm(test_loader,leave=True):
            imgs = torch.cat([data[0], torch.zeros((data[0].size(0),1,224,224))], 1)
            imgs, target = imgs.to(device), data[1].to(device)
            output = model(imgs)
            loss = criterion(output, target)

            val_loss += loss.item()
            _, preds = torch.max(output.data, 1)
            val_acc += (preds==target).sum().item()
    
    val_loss = val_loss/len(test_loader)
    val_acc = 100. * val_acc/len(test_loader.dataset)

    return val_loss, val_acc


start_loss, start_acc = validate(model, val_loader)
print(f"START LOSS {start_loss:.4f}, ACC {start_acc:.2f}")
best = 0
scaler = GradScaler()
for epoch in range(EPOCHS):
    model.train()
    train_loss = 0.0
    train_acc = 0
    for data in tqdm(train_loader,leave=True):
        imgs = torch.cat([data[0], torch.zeros((data[0].size(0),1,224,224))], 1)
        imgs, target = imgs.to(device), data[1].to(device)
        optimizer.zero_grad()
        # output = model(imgs)
        # loss = criterion(output, target)
        # loss.backward()
        # optimizer.step()
        with autocast(device_type='cuda', dtype=torch.float16):
            output = model(imgs)
            loss = criterion(output, target)

        # Scales loss.  Calls backward() on scaled loss to create scaled gradients.
        # Backward passes under autocast are not recommended.
        # Backward ops run in the same dtype autocast chose for corresponding forward ops.
        scaler.scale(loss).backward()

        # scaler.step() first unscales the gradients of the optimizer's assigned params.
        # If these gradients do not contain infs or NaNs, optimizer.step() is then called,
        # otherwise, optimizer.step() is skipped.
        scaler.step(optimizer)

        # Updates the scale for next iteration.
        scaler.update()

        _, preds = torch.max(output.data, 1)

        train_loss += loss.item()
        train_acc += (preds ==target).sum().item()

    train_loss = train_loss/len(train_loader)
    train_acc = 100. * train_acc/len(train_loader.dataset)
    print(f"{epoch} TRAIN_LOSS : {train_loss:.4f}, TRAIN_ACC : {train_acc:.2f}")

    val_loss, val_acc = validate(model, val_loader)
    print(f" VAL_LOSS : {val_loss:.4f}, VAL_ACC : {val_acc:.2f}")
    if best < val_acc:
        best = val_acc
        checkpoint = {
            'model' : model,
            'model_state_dict' : model.state_dict(),
        }
        torch.save(checkpoint, './checkpoint/vgg16_cifar.pth')
        print(f"save best acc {best:.2f}")


100%|██████████| 40/40 [00:19<00:00,  2.10it/s]


START LOSS 4.6295, ACC 0.92


100%|██████████| 1563/1563 [02:48<00:00,  9.26it/s]


0 TRAIN_LOSS : 4.4023, TRAIN_ACC : 3.66


100%|██████████| 40/40 [00:17<00:00,  2.26it/s]


 VAL_LOSS : 4.1001, VAL_ACC : 8.62
save best acc 8.62


100%|██████████| 1563/1563 [02:47<00:00,  9.31it/s]


1 TRAIN_LOSS : 4.0078, TRAIN_ACC : 8.40


100%|██████████| 40/40 [00:17<00:00,  2.27it/s]


 VAL_LOSS : 3.8338, VAL_ACC : 13.04
save best acc 13.04


100%|██████████| 1563/1563 [02:48<00:00,  9.30it/s]


2 TRAIN_LOSS : 3.8175, TRAIN_ACC : 11.46


100%|██████████| 40/40 [00:17<00:00,  2.27it/s]


 VAL_LOSS : 3.6823, VAL_ACC : 14.89
save best acc 14.89


100%|██████████| 1563/1563 [02:48<00:00,  9.26it/s]


3 TRAIN_LOSS : 3.6819, TRAIN_ACC : 13.82


 55%|█████▌    | 22/40 [00:10<00:08,  2.08it/s]


KeyboardInterrupt: 

In [None]:
filepath = './checkpoint/vgg16_cifar.pth'
checkpoint = torch.load(filepath)
model = checkpoint['model']
print(model)
model.load_state_dict(checkpoint['model_state_dict'])

VGG(
  (features): Sequential(
    (0): Conv2d(4, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

<All keys matched successfully>