In [1]:
%load_ext autoreload
%autoreload 2

In [None]:
from google.colab import drive
drive.mount('/content/drive')

!unzip /content/drive/My\ Drive/bbr.zip

In [None]:
!wget http://cs231n.stanford.edu/tiny-imagenet-200.zip
!unzip tiny-imagenet-200.zip
!rm tiny-imagenet-200.zip

In [2]:
import os
import numpy as np
import torch
import torchvision.transforms as T
from shutil import rmtree
from sklearn.model_selection import train_test_split
from tqdm.autonotebook import tqdm, trange
from torch.nn.functional import softmax
from torch.utils.data import Dataset, DataLoader
from torchvision.io import read_image
from torchvision.datasets import ImageFolder

from predictors.alexnet import Alexnet
from predictors.half_alexnet_distil import HalfAlexnetDistil

from datasets import CIFAR10, ProxyDataset

%matplotlib inline

In [3]:
LR = 0.001
PRETRAIN_EPOCHS = 10
BATCH_SIZE = 32
CONFIDENCE_TH = 0.8

# Set random seed for replicating testing results
RANDOM_SEED = 0
np.random.seed(0)
torch.manual_seed(0)
if torch.cuda.is_available():
    torch.cuda.manual_seed(0)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cpu')

In [None]:
# utils
def create_val_img_folder():
    '''
    This method is responsible for separating validation images into separate sub folders
    '''
    dataset_dir = 'tiny-imagenet-200'
    val_dir = os.path.join(dataset_dir, 'val')
    img_dir = os.path.join(val_dir, 'images')

    fp = open(os.path.join(val_dir, 'val_annotations.txt'), 'r')
    data = fp.readlines()
    val_img_dict = {}
    for line in data:
        words = line.split('\t')
        val_img_dict[words[0]] = words[1]
    fp.close()

    # Create folder if not present and move images into proper folders
    for img, folder in val_img_dict.items():
        newpath = (os.path.join(img_dir, folder))
        if not os.path.exists(newpath):
            os.makedirs(newpath)
        if os.path.exists(os.path.join(img_dir, img)):
            os.rename(os.path.join(img_dir, img), os.path.join(newpath, img))

def get_class_name():
    class_to_name = dict()
    fp = open(os.path.join('tiny-imagenet-200', 'words.txt'), 'r')
    data = fp.readlines()
    for line in data:
        words = line.strip('\n').split('\t')
        class_to_name[words[0]] = words[1].split(',')[0]
    fp.close()
    return class_to_name

create_val_img_folder()
class_to_name = get_class_name()

In [None]:
# Define the TinyImagenet Dataset
train_transforms = T.Compose([
    T.ToTensor(),
    T.Resize((32,32)),
    T.RandomCrop(32, padding=4),
    T.RandomHorizontalFlip(p=0.5),
    T.Normalize((0.5,), (0.5,))
])
valid_transforms = T.Compose([
    T.ToTensor(),
    T.Resize((32,32)),
    T.Normalize((0.5,), (0.5,))
])

tinyimagenet_train_dataset = ImageFolder(root='tiny-imagenet-200/train/', transform=train_transforms)
tinyimagenet_valid_dataset = ImageFolder(root='tiny-imagenet-200/val/images/', transform=valid_transforms)

tinyimagenet_train_dataloader = DataLoader(tinyimagenet_train_dataset, batch_size=BATCH_SIZE, shuffle=True)
tinyimagenet_valid_dataloader = DataLoader(tinyimagenet_valid_dataset, batch_size=BATCH_SIZE)

In [4]:
# Define the student model
student_model = HalfAlexnetDistil(name=None, n_outputs=200)
student_model.to(device)

# Define optimizer
optimizer = torch.optim.Adam(student_model.parameters(), lr=LR)

# Define loss function
criterion = torch.nn.CrossEntropyLoss(reduction='mean')

In [7]:
# test_sample = torch.rand(size=(4,3,32,32))
# a,b = student_model(test_sample)
# print(a.shape)
# print(b.shape)

torch.Size([4, 3136])
torch.Size([4, 1024])
torch.Size([4, 1024])
torch.Size([4, 200])
torch.Size([4, 200])


In [None]:
# Pre-training the student
for epoch in range(PRETRAIN_EPOCHS):
    # Define progress bar
    loop = tqdm(enumerate(tinyimagenet_train_dataloader), total=len(tinyimagenet_train_dataloader))
    
    # Training loop
    student_model.train()
    for batch_idx, (x,y) in loop:
        optimizer.zero_grad()
        
        x = x.to(device=device)
        y = y.to(device=device)
        
        # Forward pass
        logits_bot, logit_top = student_model(x)
        # Backward pass
        loss = criterion(input=logits_bot, target=y) + criterion(input=logit_top, target=y)
        # Prob trb inlocuit criterionul, sa adaugi soft-labels
        loss.backward()
        
        # Optimize
        optimizer.step()
        
        # Update progress bar
        loop.set_description(f'Epoch {epoch+1}/{PRETRAIN_EPOCHS}')
        loop.set_postfix(training_loss=loss.item())
    
    # Validation loop on proxy validation dataset
    student_model.eval()
    with torch.no_grad():
        val_loss_bot = []
        val_loss_top = []
        acc_bot = 0
        acc_top = 0
        for x,y in tinyimagenet_valid_dataloader:
            x = x.to(device=device)
            y = y.to(device=device)
        
            logits_bot, logits_top = student_model(x)
            pred_bot = softmax(logits_bot, dim=1)
            pred_top = softmax(logits_top, dim=1)
            
            confidence_bot,y_hat_bot = torch.max(pred_bot, dim=1)
            confidence_top,y_hat_top = torch.max(pred_top, dim=1)
            
            loss_bot = criterion(input=logits_bot, target=y)
            loss_top = criterion(input=logits_top, target=y)
            val_loss_bot.append(loss_bot.item())
            val_loss_top.append(loss_top.item())
            
            acc_bot += torch.sum(y_hat_bot==y).item()
            acc_top += torch.sum(y_hat_top==y).item()
        
    loop.write(f'validation_loss_bot on TinyImagenet = {sum(val_loss_bot)/len(val_loss_bot):.4f}')
    loop.write(f'validation_loss_top on TinyImagenet = {sum(val_loss_top)/len(val_loss_top):.4f}')
    loop.write(f'validation_accuracy_bot on TinyImagenet = {100*acc_bot/len(tinyimagenet_valid_dataset):.2f}%')
    loop.write(f'validation_accuracy_top on TinyImagenet = {100*acc_top/len(tinyimagenet_valid_dataset):.2f}%')

In [None]:
# Re-define the student model by changing top linear layer
top_last_linear_layer = torch.nn.Linear(128, 10)
student_model.fc3_top = top_last_linear_layer
student_model.fc3_top.bias.data.normal_(0, 0.01)
student_model.fc3_top.bias.data.fill_(0)

In [None]:
# Re-define the student model by changing last linear layer
last_linear_layer = torch.nn.Linear(128, 10)
student_model.fc3 = last_linear_layer
student_model.fc3.bias.data.normal_(0, 0.01)
student_model.fc3.bias.data.fill_(0)

student_model.to(device)

In [None]:
# Save model
path_to_save = 'drive/MyDrive/PhD/framework_copiere/pretrained_student_weights/pretrained_student_distil.pt'

torch.save(student_model.state_dict(), path_to_save)

In [None]:
# Test loading the model
student_model = HalfAlexnetDistil(name=None, n_outputs=10)

if torch.cuda.is_available():
    student_model.load_state_dict(torch.load(path_to_save))
else:
    student_model.load_state_dict(torch.load(path_to_save, map_location ='cpu'))
student_model.to(device)