In [6]:
import os
import gc
import csv
import glob
import torch
import multiprocessing

import numpy as np
import pandas as pd
import torch.nn as nn
import matplotlib.pyplot as plt

import torch.optim as optim
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
from torch.autograd import Variable

import torchvision
import torchvision.transforms as transforms

In [7]:
# @title Set random seed

# @markdown Executing `set_seed(seed=seed)` you are setting the seed

# for DL its critical to set the random seed so that students can have a
# baseline to compare their results to expected results.
# Read more here: https://pytorch.org/docs/stable/notes/randomness.html

# Call `set_seed` function in the exercises to ensure reproducibility.
import random
import torch

def set_seed(seed=None, seed_torch=True):
  if seed is None:
    seed = np.random.choice(2 ** 32)
  random.seed(seed)
  np.random.seed(seed)
  if seed_torch:
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

  print(f'Random seed {seed} has been set.')

# In case that `DataLoader` is used
def seed_worker(worker_id):
  worker_seed = torch.initial_seed() % 2**32
  np.random.seed(worker_seed)
  random.seed(worker_seed)

In [8]:
# @title Set device (GPU or CPU)

# inform the user if the notebook uses GPU or CPU.

def set_device():
  device = "cuda" if torch.cuda.is_available() else "cpu"
  if device != "cuda":
    print("WARNING: For this notebook to perform best, "
        "if possible, in the menu under `Runtime` -> "
        "`Change runtime type.`  select `GPU` ")
  else:
    print("GPU is enabled in this notebook.")

  return device

In [9]:
set_seed(seed=2021)
device = set_device()

Random seed 2021 has been set.
GPU is enabled in this notebook.


In [10]:

use_cuda = torch.cuda.is_available()
start_epoch = 0
best_acc = 0  # best test accuracy
batch_size = 128
max_epochs_target = 40
base_learning_rate = 0.1

In [11]:
# @markdown Download and prepare Source Data
##print('==> Preparing data..')
def percentageSplit(full_dataset, percent = 0.0):
  set1_size = int(percent * len(full_dataset))
  set2_size = len(full_dataset) - set1_size
  final_dataset, _ = torch.utils.data.random_split(full_dataset, [set1_size, set2_size])
  return final_dataset

# ResNet

In [13]:
# @title ResNet model in PyTorch

class BasicBlock(nn.Module):
  """ResNet in PyTorch.
      Reference:
      [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
        Deep Residual Learning for Image Recognition. arXiv:1512.03385
  """

  expansion = 1

  def __init__(self, in_planes, planes, stride=1):
    super(BasicBlock, self).__init__()
    self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
    self.bn1 = nn.BatchNorm2d(planes)
    self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
    self.bn2 = nn.BatchNorm2d(planes)

    self.shortcut = nn.Sequential()
    if stride != 1 or in_planes != self.expansion*planes:
        self.shortcut = nn.Sequential(
            nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
            nn.BatchNorm2d(self.expansion*planes)
        )

  def forward(self, x):
    out = F.relu(self.bn1(self.conv1(x)))
    out = self.bn2(self.conv2(out))
    out += self.shortcut(x)
    out = F.relu(out)
    return out


class Bottleneck(nn.Module):
  expansion = 4

  def __init__(self, in_planes, planes, stride=1):
    super(Bottleneck, self).__init__()
    self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
    self.bn1 = nn.BatchNorm2d(planes)
    self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
    self.bn2 = nn.BatchNorm2d(planes)
    self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
    self.bn3 = nn.BatchNorm2d(self.expansion*planes)

    self.shortcut = nn.Sequential()
    if stride != 1 or in_planes != self.expansion*planes:
        self.shortcut = nn.Sequential(
            nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
            nn.BatchNorm2d(self.expansion*planes)
        )

  def forward(self, x):
    out = F.relu(self.bn1(self.conv1(x)))
    out = F.relu(self.bn2(self.conv2(out)))
    out = self.bn3(self.conv3(out))
    out += self.shortcut(x)
    out = F.relu(out)
    return out


class ResNet(nn.Module):
  def __init__(self, block, num_blocks, num_classes=100):
    super(ResNet, self).__init__()
    self.in_planes = 64

    self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
    self.bn1 = nn.BatchNorm2d(64)
    self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
    self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
    self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
    self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
    self.linear = nn.Linear(512*block.expansion, num_classes)

  def _make_layer(self, block, planes, num_blocks, stride):
    strides = [stride] + [1]*(num_blocks-1)
    layers = []
    for stride in strides:
      layers.append(block(self.in_planes, planes, stride))
      self.in_planes = planes * block.expansion
    return nn.Sequential(*layers)

  def forward(self, x):
    out = F.relu(self.bn1(self.conv1(x)))
    out = self.layer1(out)
    out = self.layer2(out)
    out = self.layer3(out)
    out = self.layer4(out)
    out = F.avg_pool2d(out, 4)
    out = out.view(out.size(0), -1)
    out = self.linear(out)
    return out


def ResNet18():
  return ResNet(BasicBlock, [2, 2, 2, 2])


def ResNet34():
  return ResNet(BasicBlock, [3, 4, 6, 3])


def ResNet50(num_classes=100):
  return ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes)

In [16]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def checkpoint(model, acc, epoch, outModelName):
  # Save checkpoint.
  print('Saving..')
  state = {
      'state_dict': model.state_dict(),
      'acc': acc,
      'epoch': epoch,
      'rng_state': torch.get_rng_state()
  }
  if not os.path.isdir('checkpoint'):
      os.mkdir('checkpoint')
  torch.save(state, f'./checkpoint/{outModelName}.t7')

# Train and test functions

In [18]:
def train(model, loader, epoch, optimizer, criterion, use_cuda=True):
  print('\nEpoch: %d' % epoch)
  print(f"LR head / body : {[g['lr'] for g in optimizer.param_groups]} ===")
  print(f'Using {len(loader.dataset)} data during training')
  model.train()
  train_loss = 0
  correct = 0
  total = 0
  for batch_idx, (inputs, targets) in enumerate(loader):
    if use_cuda:
      inputs, targets = inputs.cuda(), targets.cuda()

    optimizer.zero_grad()
    inputs, targets = Variable(inputs), Variable(targets)
    outputs = model(inputs)
    loss = criterion(outputs, targets)
    loss.backward()
    optimizer.step()

    train_loss += loss.item()
    _, predicted = torch.max(outputs.data, 1)
    total += targets.size(0)
    correct += predicted.eq(targets.data).cpu().sum()

    if batch_idx % 500 == 0:
      print(batch_idx, len(loader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
          % (train_loss/(batch_idx+1), 100.*correct/total, correct, total))
  return (train_loss/batch_idx, 100.*correct/total)


def test(model, loader, epoch, outModelName, criterion, use_cuda=True):
  global best_acc
  model.eval()
  test_loss, correct, total = 0, 0, 0
  with torch.no_grad():
    for batch_idx, (inputs, targets) in enumerate(loader):
      if use_cuda:
        inputs, targets = inputs.cuda(), targets.cuda()

      outputs = model(inputs)
      loss = criterion(outputs, targets)

      test_loss += loss.item()
      _, predicted = torch.max(outputs.data, 1)
      total += targets.size(0)
      correct += predicted.eq(targets.data).cpu().sum()

      if batch_idx % 200 == 0:
        print(batch_idx, len(loader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
            % (test_loss/(batch_idx+1), 100.*correct/total, correct, total))

  # Save checkpoint.
  acc = 100.*correct/total
  if acc > best_acc:
    best_acc = acc
    checkpoint(model, acc, epoch, outModelName)
  return (test_loss/batch_idx, 100.*correct/total)

In [21]:
# Check number of nodes in last layer
# This will keep updating until it finds the last Linear layer in the hierarchy

def check_num_features(model):

  # Iterating through named modules to find the last Linear layer
  output_nodes = None
  for name, module in model.named_modules():
      if isinstance(module, nn.Linear):
          output_nodes = module.out_features

  if output_nodes is not None:
      print(f"Number of nodes in the output layer (found by iteration): {output_nodes}")


In [23]:
'''Freezing / unfreezing functions '''
def freeze_all(model):
    print('freezing all')
    for p in model.parameters():
        p.requires_grad = False

def unfreeze_head(model):
    for p in model.linear.parameters():
        p.requires_grad = True

def set_bn_eval(model):
    """Set BatchNorm to eval to prevent stats updates when backbone frozen."""
    for m in model.modules():
        if isinstance(m, nn.BatchNorm2d):
            m.eval()

def unfreeze_all(model):
    for p in model.parameters():
        p.requires_grad = True

def unfreeze_layer4(model):
    for p in model.layer4.parameters():
        p.requires_grad = True

In [24]:
def set_optimizer(model, head_lr, body_lr, wd=5e-4):
    head, body = [], []
    for n, p in model.named_parameters():
        if not p.requires_grad:
            continue
        if n.startswith("linear") or "linear" in n:  # suffit pour ta classe
            head.append(p)
        else:
            body.append(p)
    param_groups = []
    if head:
        param_groups.append({"params": head, "lr": head_lr})
    if body_lr and body:
        param_groups.append({"params": body, "lr": body_lr})
    return optim.SGD(param_groups, momentum=0.9, weight_decay=wd, nesterov=True)


In [25]:
#Three-phase finetuning of model on target data 
def setup_phase_head(model, lr=1e-2):
    """Phase 1 : train only head(linear)."""
    freeze_all(model)
    unfreeze_head(model)
    set_bn_eval(model)
    return set_optimizer(model, head_lr=lr, body_lr=0)


def setup_phase_layer4(model):
    """Phase 2 : train head + layer4 (before last layer)."""
    freeze_all(model)
    unfreeze_head(model)
    unfreeze_layer4(model)
    set_bn_eval(model)
    return set_optimizer(model, head_lr=5e-3, body_lr=5e-4)


def setup_phase_full(model, update_bn=True):
    """Phase 3 : full fine-tuning .
        """
    unfreeze_all(model)
    if not update_bn:
        set_bn_eval(model)   # keep stats BN from pretrain
    return set_optimizer(model, head_lr=1e-4, body_lr=1e-4)

In [26]:
PHASES = [
    {"name": "head",   "epochs": 10,  "setup_fn": setup_phase_head},
    {"name": "layer4", "epochs": 10, "setup_fn": setup_phase_layer4},
    {"name": "full",   "epochs": 20, "setup_fn": lambda m: setup_phase_full(m, update_bn=True)},
]


In [27]:
#refresh last layer of network for new task
def replace_classification_layer(model,num_classes):
  #Replace classifier head for new task
  num_ftrs = model.linear.in_features
  model.linear = nn.Linear(num_ftrs, num_classes)

In [28]:
#get balanced samples when taking a percentage of training set
from collections import defaultdict,Counter

def get_stratified_loader(dataset, fraction, batch_size=128):
    num_samples = int(len(dataset) * fraction)
    targets = [dataset[i][1] for i in range(len(dataset))]

    # Group indices by class
    class_to_indices = defaultdict(list)
    for idx, label in enumerate(targets):
        class_to_indices[label].append(idx)

    selected_indices = []

    for cls, idxs in class_to_indices.items():
        n = num_samples // num_classes  # or n = num_samples // num_classes
        selected = np.random.choice(idxs, n, replace=False)
        selected_indices.extend(selected)

    np.random.shuffle(selected_indices)
    subset = torch.utils.data.Subset(dataset, selected_indices)
    return torch.utils.data.DataLoader(subset, batch_size=batch_size, shuffle=True)


In [29]:
# Target domain Data
print('==> Preparing target domain data..')

# CIFAR10 normalizing
mean = (0.4914, 0.4822, 0.4465)
std = (0.2023, 0.1994, 0.2010)
num_classes = 10
lr = 0.0001

# torchvision transforms
transform_train = transforms.Compose([])
if torchvision_transforms:
    transform_train.transforms.append(transforms.RandomCrop(32, padding=4))
    transform_train.transforms.append(transforms.RandomHorizontalFlip())

transform_train.transforms.append(transforms.ToTensor())
transform_train.transforms.append(transforms.Normalize(mean, std))

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean, std),
])

trainset = torchvision.datasets.CIFAR10(
    root='./CIFAR10', train=True, download=True, transform=transform_train)

testset = torchvision.datasets.CIFAR10(
    root='./CIFAR10', train=False, download=True, transform=transform_test)

==> Preparing target domain data..


In [30]:
#train with x% of training data

def frac_data_train(name, pretrained_path, fraction, trainloader, testloader):
    ''' name: file name to save output
        pretrained_path: path of pretrained model
        fraction: fraction of data to train with
        trainloader: dataloader for train data
        testloader: dataloader for test data
    '''
    result_folder = './results/'
    if not os.path.exists(result_folder):
        os.makedirs(result_folder)

    criterion = nn.CrossEntropyLoss()
    model = ResNet50()
    # load pretrained model
    if os.path.isfile(pretrained_path):
        state_dict = torch.load(pretrained_path)
        best_acc = state_dict['acc']
        print('Best Accuracy:', best_acc)
        if "state_dict" in state_dict:
            state_dict = state_dict["state_dict"]
        # remove prefix "module."
        state_dict = {k.replace("module.", ""): v for k, v in state_dict.items()}
        for k, v in model.state_dict().items():
            if k not in list(state_dict):
                print('key "{}" could not be found in provided state dict'.format(k))
            elif state_dict[k].shape != v.shape:
                print('key "{}" is of different shape in model and provided state dict'.format(k))
                state_dict[k] = v
        msg = model.load_state_dict(state_dict, strict=False)
        print("Load pretrained model with msg: {}".format(msg))
    else:
        raise Exception('No pretrained weights found')

    # outModelName = 'finetuned_'+str(fraction)
    outModelName = name
    logname = result_folder + model.__class__.__name__ + f'_{outModelName}.csv'
    replace_classification_layer(model, 10)

    model.to(device)

    if not os.path.exists(logname):
        with open(logname, 'w') as logfile:
            logwriter = csv.writer(logfile, delimiter=',')
            logwriter.writerow(['epoch', 'train loss', 'train acc', 'val loss', 'val acc'])

    print(len(trainloader.dataset))

    epoch_counter = 0

    for phase_id, phase in enumerate(PHASES, 1):
        optimizer = phase["setup_fn"](model)
        print(f"\n=== Phase {phase_id} | LR head / body : {[g['lr'] for g in optimizer.param_groups]} ===")
        for epoch in range(phase["epochs"]):
            train_loss, train_acc = train(model, trainloader, epoch_counter, optimizer, criterion, use_cuda=use_cuda)
            # creates a checkpoint
            val_loss, val_acc = test(model, testloader, epoch_counter, outModelName, criterion, use_cuda=use_cuda)
            with open(logname, 'a') as logfile:
                logwriter = csv.writer(logfile, delimiter=',')
                logwriter.writerow([epoch_counter, train_loss, train_acc.item(), val_loss, val_acc.item()])
            print(f'Epoch: {epoch_counter} | train acc: {train_acc} | val acc: {val_acc}')

            epoch_counter += 1
    del model, optimizer, trainloader, testloader
    gc.collect()

In [31]:
batch_size = 128
fractions = [0.01,0.2,0.4,0.6,1.0]

testloader = torch.utils.data.DataLoader(
    testset, batch_size=batch_size, shuffle=False, num_workers=2)
for fraction in fractions:
    trainloader = get_stratified_loader(trainset, fraction)
    print(len(trainloader.dataset))
    all_labels = []
    for _, labels in trainloader:
        all_labels.extend(labels.tolist())
    
    class_counts = Counter(all_labels)
    total = sum(class_counts.values())
    
    for cls, count in sorted(class_counts.items()):
        print(f"Class {cls}: {count} samples ({100 * count / total:.2f}%)")
    frac_data_train('finetuned_'+str(fraction),'/kaggle/input/pretrained-model/pretrain.t7',fraction, trainloader,testloader)


50000 500
500
Class 0: 50 samples (10.00%)
Class 1: 50 samples (10.00%)
Class 2: 50 samples (10.00%)
Class 3: 50 samples (10.00%)
Class 4: 50 samples (10.00%)
Class 5: 50 samples (10.00%)
Class 6: 50 samples (10.00%)
Class 7: 50 samples (10.00%)
Class 8: 50 samples (10.00%)
Class 9: 50 samples (10.00%)
Best Accuracy: tensor(74.4000)
Load pretrained model with msg: <All keys matched successfully>
500
freezing all

=== Phase 1 | LR head / body : [0.01] ===

Epoch: 0
LR head / body : [0.01] ===
Using 500 data during training
0 4 Loss: 2.353 | Acc: 14.062% (18/128)
0 79 Loss: 1.827 | Acc: 27.344% (35/128)
Saving..
Epoch: 0 | train acc: 16.0 | val acc: 29.059999465942383

Epoch: 1
LR head / body : [0.01] ===
Using 500 data during training
0 4 Loss: 1.806 | Acc: 33.594% (43/128)
0 79 Loss: 1.376 | Acc: 59.375% (76/128)
Saving..
Epoch: 1 | train acc: 49.0 | val acc: 55.939998626708984

Epoch: 2
LR head / body : [0.01] ===
Using 500 data during training
0 4 Loss: 1.210 | Acc: 67.969% (87/128)


In [None]:
# #Delete all files in results folder
#import shutil, os
#shutil.rmtree('/kaggle/working/results', ignore_errors=True)