In [1]:
""" In this notebook, we will look into only one specialist model. """

# Load libraries 
import torch 
import torchvision 
import torchvision.transforms as transforms 
import torch.optim as optim 
import torch.nn as nn 
import torch.nn.functional as F 
import random 
import numpy as np 

In [2]:
def create_specialist_dataset(dataset, subset_class, batch_size, train, dataloader, shuffle = True): 
    """ Returns dataset enriched with examples that the specialist model specializes in. """
    
    # Gather all images that belong to specialist's sub-class 
    num_examples = len(dataset.targets)
    subset_indices = [] 
    for label in subset_class: 
        indices = [i for i in range(num_examples) if dataset.targets[i] == label]
        subset_indices.append(indices)
        
    # Flatten the list of lists into one list 
    subset_indices = [item for sublist in subset_indices for item in sublist]
    
    # Get training data from dustbin class
    if train: 
        num_dustbin_examples = 500
    else: 
        num_dustbin_examples = 100 
        
    random_indices = np.random.randint(0, num_examples - 1, num_dustbin_examples * 5)
    dustbin_indices = [] 
    for index in random_indices: 
        if index in subset_indices: 
            continue
        else: 
            dustbin_indices.append(index)          
    random.shuffle(dustbin_indices)
    dustbin_indices = dustbin_indices[:num_dustbin_examples]
    
    # Combine examples from specialised subset and dustbin class 
    indices = subset_indices + dustbin_indices 
    
    # Create dataset 
    specialist_dataset, specialist_dataset_targets = [dataset.data[i] for i in indices], [dataset.targets[i] for i in indices] 
    transformation = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761))])
    torch_specialist_dataset = specialistDataset(specialist_dataset, specialist_dataset_targets, subset_class, transform = transformation)
    if not dataloader: 
        return torch_specialist_dataset
    specialist_dataloader = torch.utils.data.DataLoader(torch_specialist_dataset, batch_size = batch_size, shuffle = shuffle)
    return specialist_dataloader


class specialistDataset(torch.utils.data.Dataset):
    def __init__(self, images, labels, sub_class, transform=None):
        super(specialistDataset, self).__init__()
        self.images = images
        self.labels = labels
        self.transform = transform
        self.sub_class = sub_class
        self.dustbin_class = len(self.sub_class)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        sample = self.images[idx]
        if self.transform: 
            sample = self.transform(sample)
        label = self.labels[idx]
        if label in self.sub_class: 
            label = self.sub_class.index(label)
        else: 
            label = self.dustbin_class
        return sample, label 
    
def train_and_evaluate_scratch(trainloader, testloader, model, optimizer, scheduler, criterion, num_epochs, model_path, device): 
    lowest_test_loss = 1000.0 
    for epoch in range(num_epochs): 
        running_loss, train_corrects, train_total = 0.0, 0, 0
        model.train() 
        for inputs, labels in trainloader: 
            inputs, labels = inputs.to(device), labels.to(device)
            
            optimizer.zero_grad() 
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward() 
            optimizer.step()

            running_loss += loss.item()
            predicted_class = outputs.data.max(1, keepdim = True)[1]
            train_corrects += predicted_class.eq(labels.data.view_as(predicted_class)).cpu().sum()
            train_total += labels.size(0)
    
        # Evaluation 
        test_corrects, test_total, test_running_loss = evaluate(model, testloader, device)

        scheduler.step()
        if test_running_loss/test_total < lowest_test_loss: 
            torch.save(model.state_dict(), model_path)
            lowest_test_loss = test_running_loss/test_total
 
        print(f'[{epoch + 1}], train_loss: {running_loss/train_total:.4f}, test_loss: {test_running_loss/test_total:.4f}, train_accuracy: {train_corrects*100/train_total:.2f} %, test_accuracy: {test_corrects*100/test_total:.2f} %')

def evaluate(model, testloader, device):   
    criterion = nn.CrossEntropyLoss() 
    test_running_loss, test_corrects, test_total = 0.0, 0, 0 
    model.eval() 
    with torch.no_grad(): 
        for inputs, labels in testloader:
            inputs, labels = inputs.to(device), labels.to(device) 

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            test_running_loss += loss.item() 
            predicted_class = outputs.data.max(1, keepdim = True)[1]
            test_corrects += predicted_class.eq(labels.data.view_as(predicted_class)).cpu().sum()
            test_total += labels.size(0)

    return test_corrects, test_total, test_running_loss

In [3]:
# Load and normalise data 
transformation = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761))])

train_batch_size, test_batch_size = 128, 1024
trainset = torchvision.datasets.CIFAR100(root = './data', train = True, download = True, transform = transformation)
testset = torchvision.datasets.CIFAR100(root = './data', train = False, download = True, transform = transformation)

num_classes = 100

Files already downloaded and verified
Files already downloaded and verified


In [4]:
sub_class = [12, 17, 23, 33, 37, 47, 49, 52, 56, 59, 60, 68, 69, 71, 76, 81, 85, 90, 96]
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

specialist_model = torch.hub.load("chenyaofo/pytorch-cifar-models", "cifar100_resnet20", pretrained = False)
generalist_state_dict = torch.load("teacher_model_cifar100.pth")
specialist_model.load_state_dict(generalist_state_dict)
specialist_model.fc = nn.Linear(specialist_model.fc.in_features, len(sub_class) + 1)   # +1 for dustbin class
specialist_model = specialist_model.to(device)
  
criterion = nn.CrossEntropyLoss() 
optimizer = optim.SGD(specialist_model.parameters(), lr = 0.001, nesterov = True, momentum = 0.9, weight_decay = 5e-4)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size = 100, gamma = 0.1)
num_epochs = 20 
model_path = 'specialist_0_debug.pth'

specialist_train_batch_size, specialist_test_batch_size = 32, 128

# Returns dataloader
specialist_traindataloader = create_specialist_dataset(trainset, sub_class, specialist_train_batch_size, train = True, dataloader = True, shuffle = True)
specialist_testset = create_specialist_dataset(testset, sub_class, specialist_test_batch_size, train = False, dataloader = False, shuffle = False)
specialist_testdataloader = torch.utils.data.DataLoader(specialist_testset, batch_size = specialist_test_batch_size, shuffle = False)
train_and_evaluate_scratch(specialist_traindataloader, specialist_testdataloader, specialist_model, optimizer, scheduler, criterion, num_epochs, model_path, device)

Using cache found in C:\Users\yeewenli/.cache\torch\hub\chenyaofo_pytorch-cifar-models_master


[1], train_loss: 0.0449, test_loss: 0.0067, train_accuracy: 63.83 %, test_accuracy: 76.10 %
[2], train_loss: 0.0207, test_loss: 0.0056, train_accuracy: 82.40 %, test_accuracy: 78.90 %
[3], train_loss: 0.0157, test_loss: 0.0051, train_accuracy: 85.99 %, test_accuracy: 80.85 %
[4], train_loss: 0.0128, test_loss: 0.0051, train_accuracy: 88.63 %, test_accuracy: 80.25 %
[5], train_loss: 0.0111, test_loss: 0.0050, train_accuracy: 90.14 %, test_accuracy: 80.40 %
[6], train_loss: 0.0093, test_loss: 0.0051, train_accuracy: 91.94 %, test_accuracy: 80.30 %
[7], train_loss: 0.0082, test_loss: 0.0050, train_accuracy: 92.93 %, test_accuracy: 80.80 %
[8], train_loss: 0.0070, test_loss: 0.0051, train_accuracy: 94.25 %, test_accuracy: 80.90 %
[9], train_loss: 0.0060, test_loss: 0.0054, train_accuracy: 95.38 %, test_accuracy: 80.70 %
[10], train_loss: 0.0052, test_loss: 0.0054, train_accuracy: 96.02 %, test_accuracy: 80.80 %
[11], train_loss: 0.0046, test_loss: 0.0054, train_accuracy: 96.67 %, test_accu

In [8]:
batch_sizes = [1, 16, 32, 64, 128]
shuffle_boolean = [True, False]
for i in range(len(shuffle_boolean)): 
    for size in batch_sizes: 
        specialist_testdataloader = torch.utils.data.DataLoader(specialist_testset, batch_size = size, shuffle = shuffle_boolean[i])
        test_corrects, test_total, _ = evaluate(specialist_model, specialist_testdataloader, device)
        print(f'shuffle: {shuffle_boolean[i]}, batch_size: {size}, testing accuracy of specialist model {i+1} is {test_corrects*100/test_total:.2f}%')

shuffle: True, batch_size: 1, testing accuracy of specialist model 1 is 80.25%
shuffle: True, batch_size: 16, testing accuracy of specialist model 1 is 80.25%
shuffle: True, batch_size: 32, testing accuracy of specialist model 1 is 80.25%
shuffle: True, batch_size: 64, testing accuracy of specialist model 1 is 80.25%
shuffle: True, batch_size: 128, testing accuracy of specialist model 1 is 80.25%
shuffle: False, batch_size: 1, testing accuracy of specialist model 2 is 80.25%
shuffle: False, batch_size: 16, testing accuracy of specialist model 2 is 80.25%
shuffle: False, batch_size: 32, testing accuracy of specialist model 2 is 80.25%
shuffle: False, batch_size: 64, testing accuracy of specialist model 2 is 80.25%
shuffle: False, batch_size: 128, testing accuracy of specialist model 2 is 80.25%
