In [1]:
""" In this notebook, we will look into only one specialist model. """

# Load libraries 
import torch 
import torchvision 
import torchvision.transforms as transforms 
import torch.optim as optim 
import torch.nn as nn 
import torch.nn.functional as F 
import random 
import numpy as np 

In [2]:
def create_specialist_dataset(dataset, subset_class, transformation, batch_size, train, dataloader, shuffle = True): 
    """ Returns dataset enriched with examples that the specialist model specializes in. """
    
    # Gather all images that belong to specialist's sub-class 
    num_examples = len(dataset.targets)
    
    subset_indices = [] 
    for label in subset_class: 
        indices = [i for i in range(num_examples) if dataset.targets[i] == label]
        subset_indices.append(indices)
        
    # Flatten the list of lists into one list 
    subset_indices = [item for sublist in subset_indices for item in sublist]
    
    # Get training data from dustbin class
    if train: 
        num_dustbin_examples = 500
    else: 
        num_dustbin_examples = 100 
        
    random_indices = np.random.randint(0, num_examples - 1, num_dustbin_examples * 5)
    dustbin_indices = [] 
    for index in random_indices: 
        if index in subset_indices: 
            continue
        else: 
            dustbin_indices.append(index)          
    random.shuffle(dustbin_indices)
    dustbin_indices = dustbin_indices[:num_dustbin_examples]
    
    # Combine examples from specialised subset and dustbin class 
    indices = subset_indices + dustbin_indices 
    
    # Create dataset 
    specialist_dataset, specialist_dataset_targets = [dataset.data[i] for i in indices], [dataset.targets[i] for i in indices]
    torch_specialist_dataset = specialistDataset(specialist_dataset, specialist_dataset_targets, transform = transformation)
    if not dataloader: 
        return torch_specialist_dataset
    
    specialist_dataloader = torch.utils.data.DataLoader(torch_specialist_dataset, batch_size = batch_size, shuffle = shuffle)
    return specialist_dataloader


class specialistDataset(torch.utils.data.Dataset):
    def __init__(self, images, labels, transform=None):
        super(specialistDataset, self).__init__()
        self.images = images
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        sample = self.images[idx]
        if self.transform: 
            sample = self.transform(sample)
        label = self.labels[idx]
        return sample, label 

    
def train_and_evaluate_specialist(trainloader, testloader, specialist_model, optimizer, scheduler, criterion, num_epochs, domain, model_path): 
    
    lowest_test_loss = 10.0
    dustbin_class = len(domain)
    specialist_model.train()
    
    for epoch in range(num_epochs): 
        running_loss, corrects = 0.0, 0 
        train_total = 0 
        
        for i, data in enumerate(trainloader, 0): 
            inputs, labels = data 
            
            # Correct the labels
            for index in range(labels.shape[0]): 
                label = labels[index]
                if label in domain: 
                    labels[index] = domain.index(label)
                else: 
                    labels[index] = dustbin_class
                
            if torch.cuda.is_available(): 
                inputs, labels = inputs.cuda(), labels.cuda()
                
            # Zero the parameter gradients 
            optimizer.zero_grad()
            
            # Forward + Backward + Optimise
            outputs = specialist_model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            # Calculate statistics
            running_loss += loss.item()
            predicted_class = outputs.data.max(1, keepdim = True)[1]
            corrects += predicted_class.eq(labels.data.view_as(predicted_class)).cpu().sum()
            train_total += labels.size(0)
            
        # Evaluation 
        test_correct, test_total = 0, 0 
        test_running_loss = 0.0
        with torch.no_grad(): 
            for data in testloader: 
                images, labels = data 
                
                for j in range(labels.shape[0]): 
                    label = labels[j]
                    if label in domain: 
                        labels[j] = domain.index(label)
                    else: 
                        labels[j] = dustbin_class              
                
                if torch.cuda.is_available(): 
                    images, labels = images.cuda(), labels.cuda()

                # Forward + Backward + Optimise
                outputs = specialist_model(images)
                test_loss = criterion(outputs, labels)
            
                # Calculate testing statistics
                test_running_loss += test_loss.item()
                _, predicted = torch.max(outputs.data, 1)
                test_total += labels.size(0)
                test_correct += (predicted == labels).cpu().sum().item()
        
        scheduler.step()
        
        if test_running_loss/test_total < lowest_test_loss: 
            torch.save(specialist_model.state_dict(), model_path)
            lowest_test_loss = test_running_loss/test_total
    
        print(f'[{epoch + 1}] train_loss: {running_loss/train_total}, test_loss: {test_running_loss/test_total}, train_acc: {corrects/train_total}, test_acc: {test_correct/test_total}')

In [3]:
def evaluate_specialist(model, testloader, domain):
    test_correct, test_total = 0, 0 
    dustbin_class = len(domain)
    with torch.no_grad(): 
        for data in testloader: 
            images, labels = data 
            
            # Correct the labels 
            for j in range(labels.shape[0]): 
                label = labels[j]
                if label in domain: 
                    labels[j] = domain.index(label)
                else: 
                    labels[j] = dustbin_class              
                
            if torch.cuda.is_available(): 
                images, labels = images.cuda(), labels.cuda()

            # Calculate statistics 
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            test_total += labels.size(0)
            test_correct += (predicted == labels).cpu().sum().item()
            
    return test_correct*100/test_total


def evaluate(model, testloader): 
    test_correct, test_total = 0, 0 
    with torch.no_grad(): 
        for data in testloader: 
            images, labels = data 
            if torch.cuda.is_available():
                images, labels = images.cuda(), labels.cuda() 
      
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            test_total += labels.size(0)
            test_correct += (predicted == labels).cpu().sum().item() 
      
    return test_correct*100/test_total 

In [4]:
# Load and normalise data 
transformation = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761))])

train_batch_size, test_batch_size = 128, 1024
trainset = torchvision.datasets.CIFAR100(root = './data', train = True, download = True, transform = transformation)
testset = torchvision.datasets.CIFAR100(root = './data', train = False, download = True, transform = transformation)

num_classes = 100

Files already downloaded and verified
Files already downloaded and verified


In [5]:
sub_class = [12, 17, 23, 33, 37, 47, 49, 52, 56, 59, 60, 68, 69, 71, 76, 81, 85, 90, 96]

specialist_model = torch.hub.load("chenyaofo/pytorch-cifar-models", "cifar100_resnet20", pretrained = False)
generalist_state_dict = torch.load("teacher_model_cifar100.pth")
specialist_model.load_state_dict(generalist_state_dict)
specialist_model.fc = nn.Linear(specialist_model.fc.in_features, len(sub_class) + 1)   # +1 for dustbin class
if torch.cuda.is_available(): 
    specialist_model = specialist_model.cuda() 
    
criterion = nn.CrossEntropyLoss() 
optimizer = optim.SGD(specialist_model.parameters(), lr = 0.001, nesterov = True, momentum = 0.9, weight_decay = 5e-4)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size = 100, gamma = 0.1)
num_epochs = 20 
model_path = 'specialist_0_debug.pth'

specialist_train_batch_size, specialist_test_batch_size = 128, 128

# Returns dataloader
specialist_traindataloader = create_specialist_dataset(trainset, sub_class, transformation, specialist_train_batch_size, train = True, dataloader = True, shuffle = True)

# Returns dataset
specialist_testset = create_specialist_dataset(testset, sub_class, transformation, specialist_test_batch_size, train = False, dataloader = False)                               
specialist_testdataloader = torch.utils.data.DataLoader(specialist_testset, batch_size = specialist_test_batch_size, shuffle = True)

train_and_evaluate_specialist(specialist_traindataloader, specialist_testdataloader, specialist_model, optimizer, scheduler, criterion, num_epochs, sub_class, model_path)

Using cache found in C:\Users\yeewenli/.cache\torch\hub\chenyaofo_pytorch-cifar-models_master


[1] train_loss: 0.01913579797744751, test_loss: 0.013277672350406647, train_acc: 0.3292999863624573, test_acc: 0.5875
[2] train_loss: 0.009995295268297195, test_loss: 0.0092293221950531, train_acc: 0.7045999765396118, test_acc: 0.6815
[3] train_loss: 0.007291124790906906, test_loss: 0.007724655032157898, train_acc: 0.7764000296592712, test_acc: 0.7255
[4] train_loss: 0.006032058668136596, test_loss: 0.006898874849081039, train_acc: 0.8123999834060669, test_acc: 0.7395
[5] train_loss: 0.0051697238743305205, test_loss: 0.0064776366353034975, train_acc: 0.8343999981880188, test_acc: 0.757
[6] train_loss: 0.004619477725028992, test_loss: 0.006115018844604492, train_acc: 0.8529999852180481, test_acc: 0.764
[7] train_loss: 0.004149292653799057, test_loss: 0.005906160563230515, train_acc: 0.8657000064849854, test_acc: 0.762
[8] train_loss: 0.003807839462161064, test_loss: 0.005649289056658745, train_acc: 0.8759999871253967, test_acc: 0.774
[9] train_loss: 0.0034924988836050035, test_loss: 0.0

In [7]:
batch_sizes = [16, 32, 64, 128]
shuffle_boolean = [True, False]
for i in range(len(shuffle_boolean)): 
    for size in batch_sizes: 
        specialist_testdataloader = torch.utils.data.DataLoader(specialist_testset, batch_size = size, shuffle = shuffle_boolean[i])
        print("shuffle: {}, batch_size: {}, testing accuracy of specialist model: {} %".format(shuffle_boolean[i], size, evaluate_specialist(specialist_model, specialist_testdataloader, sub_class)))

shuffle: True, batch_size: 16, testing accuracy of specialist model: 72.8 %
shuffle: True, batch_size: 32, testing accuracy of specialist model: 75.5 %
shuffle: True, batch_size: 64, testing accuracy of specialist model: 78.2 %
shuffle: True, batch_size: 128, testing accuracy of specialist model: 78.85 %
shuffle: False, batch_size: 16, testing accuracy of specialist model: 9.3 %
shuffle: False, batch_size: 32, testing accuracy of specialist model: 10.5 %
shuffle: False, batch_size: 64, testing accuracy of specialist model: 15.75 %
shuffle: False, batch_size: 128, testing accuracy of specialist model: 22.0 %
