## Dataloader for CUB Birds, Stanford Dogs and Foodx dataset

In [108]:
from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision as tv
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


## 1 - Dataloader for CUB Dataset

In [47]:
class CUBDataset(torchvision.datasets.ImageFolder):
    """
    Dataset class for CUB Dataset
    """

    def __init__(self, image_root_path, caption_root_path=None, split="train", *args, **kwargs):
        """
        Args:
            image_root_path:      path to dir containing images and lists folders
            caption_root_path:    path to dir containing captions
            split:          train / test
            *args:
            **kwargs:
        """
        image_info = self.get_file_content(f"{image_root_path}/images.txt")
        self.image_id_to_name = {y[0]: y[1] for y in [x.strip().split(" ") for x in image_info]}
        split_info = self.get_file_content(f"{image_root_path}/train_test_split.txt")
        self.split_info = {self.image_id_to_name[y[0]]: y[1] for y in [x.strip().split(" ") for x in split_info]}
        self.split = "1" if split == "train" else "0"
        self.caption_root_path = caption_root_path

        super(CUBDataset, self).__init__(root=f"{image_root_path}/images", is_valid_file=self.is_valid_file,
                                         *args, **kwargs)

    def is_valid_file(self, x):
        return self.split_info[(x[len(self.root) + 1:])] == self.split

    @staticmethod
    def get_file_content(file_path):
        with open(file_path) as fo:
            content = fo.readlines()
        return content

In [48]:
data_root = "/datasets/CUB/CUB_200_2011/"

mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)

# write data transform here as per the requirement
data_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=std)
    ])

train_dataset_cub = CUBDataset(image_root_path=f"{data_root}", transform=data_transform, split="train")
test_dataset_cub = CUBDataset(image_root_path=f"{data_root}", transform=data_transform, split="test")


# load in into the torch dataloader to get variable batch size, shuffle 
train_loader_cub = torch.utils.data.DataLoader(train_dataset_cub, batch_size=16, drop_last=True, shuffle=True)
test_loader_cub = torch.utils.data.DataLoader(test_dataset_cub, batch_size=16, drop_last=False, shuffle=False)

### Test the dataloader

In [49]:
len(train_dataset_cub), len(test_dataset_cub)

(5994, 5794)

In [50]:
len(train_loader_cub), len(test_loader_cub)

(374, 363)

In [51]:
for i, (inputs, labels) in enumerate(train_loader_cub):
    print(inputs.shape)
    print( "labels",labels)
    print("label() len",len(labels))
    print('='*50)
    break

torch.Size([16, 3, 224, 224])
labels tensor([ 42,  85,  69,  43,  44,  66,  94, 109,  30,  85,  64,  82,   8,  58,
        187,  46])
label() len 16


In [52]:
for i, (inputs, labels) in enumerate(train_loader_cub):
    print(inputs.shape)
    print( "labels",labels)
    print("label() len",len(labels))
    print('='*50)
    break

torch.Size([16, 3, 224, 224])
labels tensor([ 73, 157, 186, 111,  14, 109, 172,  85, 199, 149, 146, 120,   2,  32,
        160,  23])
label() len 16


In [142]:
# Loss and optimizer
num_epochs=20 
learning_rate=0.001
criterion = nn.CrossEntropyLoss()
#optimizer = torch.optim.Adam(vgg.parameters(), lr=learning_rate)
optimizer = optim.SGD(vggm.parameters(), lr=learning_rate, momentum=0.9)

# For updating learning rate
def update_lr(optimizer, lr):    
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

In [151]:
import torchvision.models as models
vgg16 = models.vgg16(pretrained=True)
vgg16.to(device)
# change the number of classes 
vggm16.classifier[6].out_features = 200
# freeze convolution weights
for param in vgg16.features.parameters():
    param.requires_grad = False

In [152]:
# optimizer
optimizer = optim.SGD(vgg16.classifier.parameters(), lr=0.001, momentum=0.9)
# loss function
criterion = nn.CrossEntropyLoss()

In [153]:
# training function
def fit(model, train_dataloader):
    model.train()
    train_running_loss = 0.0
    train_running_correct = 0
    for i, data in enumerate(train_dataloader):
        data, target = data[0].to(device), data[1].to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        train_running_loss += loss.item()
        _, preds = torch.max(output.data, 1)
        train_running_correct += (preds == target).sum().item()
        loss.backward()
        optimizer.step()
    train_loss = train_running_loss/len(train_dataloader.dataset)
    train_accuracy = 100. * train_running_correct/len(train_dataloader.dataset)
    print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.2f}')
    
    return train_loss, train_accuracy

In [154]:
# validation function
def validate(model, test_dataloader):
    model.eval()
    val_running_loss = 0.0
    val_running_correct = 0
    for int, data in enumerate(test_dataloader):
        data, target = data[0].to(device), data[1].to(device)
        output = model(data)
        loss = criterion(output, target)
        
        val_running_loss += loss.item()
        _, preds = torch.max(output.data, 1)
        val_running_correct += (preds == target).sum().item()
    
    val_loss = val_running_loss/len(test_dataloader.dataset)
    val_accuracy = 100. * val_running_correct/len(test_dataloader.dataset)
    
    return val_loss, val_accuracy

In [None]:
train_loss , train_accuracy = [], []
val_loss , val_accuracy = [], []
start = time.time()
for epoch in range(10):
    train_epoch_loss, train_epoch_accuracy = fit(vgg16, train_loader_cub)
    val_epoch_loss, val_epoch_accuracy = validate(vgg16,test_loader_cub)
    train_loss.append(train_epoch_loss)
    train_accuracy.append(train_epoch_accuracy)
    val_loss.append(val_epoch_loss)
    val_accuracy.append(val_epoch_accuracy)
end = time.time()
print((end-start)/60, 'minutes')
