In [1]:
!rm -rf milli_imagenet.zip
!rm -rf milli_imagenet


In [2]:
!gdown --id 1x3BSoqoe76FcWdbagLj6Mx6_2EAJ982g

Downloading...
From: https://drive.google.com/uc?id=1x3BSoqoe76FcWdbagLj6Mx6_2EAJ982g
To: /content/milli_imagenet.zip
92.9MB [00:00, 224MB/s]


In [3]:
!unzip milli_imagenet.zip

Archive:  milli_imagenet.zip
   creating: milli_imagenet/
   creating: milli_imagenet/test/
  inflating: milli_imagenet/test/car_2715810931_648aa512c2.jpg  
  inflating: milli_imagenet/test/airplane_148113522_3b0c62dcf1.jpg  
  inflating: milli_imagenet/test/cat_1972050799_f63e09067f.jpg  
  inflating: milli_imagenet/test/dog_411390999_7097d3634b.jpg  
  inflating: milli_imagenet/test/dog_640212967_0d8b02dcdd.jpg  
  inflating: milli_imagenet/test/airplane_2043159074_1b5dabcb37.jpg  
  inflating: milli_imagenet/test/elephant_325999779_18c27f01ba.jpg  
  inflating: milli_imagenet/test/car_1260288332_8f3ec15630.jpg  
  inflating: milli_imagenet/test/dog_492288911_a3c455f54c.jpg  
  inflating: milli_imagenet/test/dog_470186220_7595143e12.jpg  
  inflating: milli_imagenet/test/car_2491565093_3315d56e69.jpg  
  inflating: milli_imagenet/test/dog_65983741_7258b41fd9.jpg  
  inflating: milli_imagenet/test/dog_180300673_180c64d718.jpg  
  inflating: milli_imagenet/test/elephant_87654345673872.

# Utils

In [4]:
# model.py

import torch.nn as nn
from collections import OrderedDict
from torchvision.models import resnet18

def get_model(args):
    # defining our deep learning architecture
    resnet = resnet18(pretrained=False)

    head = nn.Sequential(OrderedDict([
        ('fc1', nn.Linear(resnet.fc.in_features, 100)),
        ('added_relu1', nn.ReLU(inplace=True)),
        ('fc2', nn.Linear(100, 100)),
        ('added_relu2', nn.ReLU(inplace=True)),
        ('fc3', nn.Linear(100, 100))
    ]))

    resnet.fc = head

    if args.multiple_gpus:
        resnet = nn.DataParallel(resnet)

    resnet.to(args.device)

    return resnet

In [5]:
# ntxent.py

import torch

tau = 0.05

def loss_function(a, b):
    a_norm = torch.norm(a, dim=1).reshape(-1, 1)
    a_cap = torch.div(a, a_norm)
    b_norm = torch.norm(b, dim=1).reshape(-1, 1)
    b_cap = torch.div(b, b_norm)
    a_cap_b_cap = torch.cat([a_cap, b_cap], dim=0)
    a_cap_b_cap_transpose = torch.t(a_cap_b_cap)
    b_cap_a_cap = torch.cat([b_cap, a_cap], dim=0)
    sim = torch.mm(a_cap_b_cap, a_cap_b_cap_transpose)
    sim_by_tau = torch.div(sim, tau)
    exp_sim_by_tau = torch.exp(sim_by_tau)
    sum_of_rows = torch.sum(exp_sim_by_tau, dim=1)
    exp_sim_by_tau_diag = torch.diag(exp_sim_by_tau)
    numerators = torch.exp(torch.div(torch.nn.CosineSimilarity()(a_cap_b_cap, b_cap_a_cap), tau))
    denominators = sum_of_rows - exp_sim_by_tau_diag
    num_by_den = torch.div(numerators, denominators)
    neglog_num_by_den = -torch.log(num_by_den)
    return torch.mean(neglog_num_by_den)

In [6]:
# plotfuncs.py

import matplotlib.pyplot as plt
import seaborn as sns
import os

def plot_losses(arr, legend_name, fname):
    plt.figure(figsize=(10, 10))
    sns.set_style('darkgrid')
    plt.plot(arr)
    plt.legend(legend_name)
    plt.savefig(fname)
    plt.close()

In [7]:
# transforms.py

import torchvision

def get_color_distortion(s=1.0):
    color_jitter = torchvision.transforms.ColorJitter(0.8 * s, 0.8 * s, 0.8 * s, 0.2 * s)
    rnd_color_jitter =  torchvision.transforms.RandomApply([color_jitter], p=0.8)
    rnd_gray =  torchvision.transforms.RandomGrayscale(p=0.2)
    color_distort =  torchvision.transforms.Compose([rnd_color_jitter, rnd_gray])
    return color_distort

def deprocess_and_show(img_tensor):
    return  torchvision.transforms.Compose([
             torchvision.transforms.Normalize((0, 0, 0), (2, 2, 2)),
             torchvision.transforms.Normalize((-0.5, -0.5, -0.5), (1, 1, 1)),
             torchvision.transforms.ToPILImage()
          ])(img_tensor)

# Simclr 

In [8]:
# simclr.py

import torch
import os
import numpy as np
from pathlib import Path

class SimCLR:
    def __init__(self, model, optimizer, dataloaders, loss_fn):
        self.model = model
        self.optimizer = optimizer
        self.dataloaders = dataloaders
        self.loss_fn = loss_fn
        
    def load_model(self, args):
        self.model.load_state_dict(torch.load(args.model_path), strict=False)

        if 'remove_top_layers' in vars(args):
            if args.remove_top_layers > 0:
                if args.multiple_gpus:
                    temp = list(self.model.module.fc.children())
                    if args.remove_top_layers <= len(temp):
                        self.model.module.fc = torch.nn.Sequential(*temp[:-args.remove_top_layers])
                else:
                    temp = list(self.model.fc.children())
                    if args.remove_top_layers <= len(temp):
                        self.model.fc = torch.nn.Sequential(*temp[:-args.remove_top_layers])

    def get_representations(self, args, mode):

        self.model.eval()

        res = {
        'X':torch.FloatTensor(),
        'Y':torch.LongTensor()
        }

        with torch.no_grad():
            for batch in self.dataloaders[mode]:
                x = batch['image'].to(args.device)
                label = batch['label']

                # get their outputs
                pred = self.model(x)

                res['X'] = torch.cat((res['X'], pred.cpu()))
                res['Y'] = torch.cat((res['Y'], label.cpu()))


        res['X'] = np.array(res['X'])
        res['Y'] = np.array(res['Y'])

        return res

    def train(self, args, num_epochs, log_interval):
        '''
        trains self.model on the train dataset for num_epochs
        and saves model and loss graph after log_interval
        number of epochs
        '''
        
        batch_losses = []

        def logging():
            # Plot the training losses Graph and save it
            
            Path(os.path.join(args.respath,"plots")).mkdir(parents=True, exist_ok=True)
            
            plot_losses(batch_losses, 'Training Losses', os.path.join(args.respath,'plots/training_losses.png'))
            
            Path(os.path.join(args.respath,"model")).mkdir(parents=True, exist_ok=True)
            
            # Store model and optimizer files
            torch.save(self.model.state_dict(), os.path.join(args.respath,"model/model.pth"))
            torch.save(self.optimizer.state_dict(), os.path.join(args.respath,"model/optimizer.pth"))
            np.savez(os.path.join(args.respath,"model/lossesfile"), np.array(batch_losses))
        
        self.model.train()
    
        # run a for loop for num_epochs
        for epoch in range(num_epochs):

            # run a for loop for each batch
            for batch in self.dataloaders['train']:

                # zero out grads
                self.optimizer.zero_grad()

                x1 = batch['image1'].to(args.device)
                x2 = batch['image2'].to(args.device)

                # get their outputs
                y1 = self.model(x1)
                y2 = self.model(x2)

                # get loss value
                loss = self.loss_fn(y1, y2)
                
                batch_losses.append(loss.cpu().data.item())

                # perform backprop on loss value to get gradient values
                loss.backward()

                # run the optimizer
                self.optimizer.step()
                
            if epoch%log_interval == log_interval-1:
                logging()
            
        logging()



In [9]:
# main.py

import argparse 
import torch
torch.cuda.empty_cache()
import torchvision
import gc

gc.collect()
from PIL import Image
import os
# '../milli_imagenet' 'results' -bs 250 -c --multiple_gpus
# making a command line interface
parser = argparse.ArgumentParser(description="This is the command line interface for the SimCLR framework for self-supervised learning. Below are the arguments which are required to run this program.")

parser.add_argument('datapath', type=str ,help="Path to the data root folder which contains train and test folders")

parser.add_argument('respath', type=str, help="Path to the results directory where the saved model and evaluation graphs would be stored. ")

parser.add_argument('-bs','--batch_size',default=250, type=int, help="The batch size for self-supervised training")

parser.add_argument('-nw','--num_workers',default=2,type=int,help="The number of workers for loading data")

parser.add_argument('-c','--cuda',action='store_true')

parser.add_argument('--multiple_gpus', action='store_true')

class TrainDataset(torch.utils.data.Dataset):

    def __init__(self, args):
        self.args = args
        
        with open(os.path.join(args.datapath, "train","names.txt")) as f:
            self.filenames = f.read().split('\n')
 
    def __len__(self):
        return len(self.filenames)

    def tensorify(self, img):
        return torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))(
            torchvision.transforms.ToTensor()(img)
            )

    def augmented_image(self, img):
        return get_color_distortion(1)(
            torchvision.transforms.RandomResizedCrop(224)(img)
            )    

    def __getitem__(self, idx):
        img = torchvision.transforms.Resize((224, 224))(
                                Image.open(os.path.join(args.datapath, 'train', self.filenames[idx])).convert('RGB')
                            )
        return {
        'image1':self.tensorify(
            self.augmented_image(img)
            ), 
        'image2': self.tensorify(
            self.augmented_image(img)
            )
        }

if __name__ == '__main__':
    argsv = ['./milli_imagenet', 'results', '-bs', '250', '-c', '--multiple_gpus']
    args = parser.parse_args(argsv)
    args.device = torch.device('cuda' if args.cuda else 'cpu')
    model = get_model(args)
    
    optimizer = torch.optim.Adam(
        model.parameters(), 
        lr=0.001, 
        weight_decay=1e-4
        )
    
    dataloaders = {}
    
    dataloaders['train'] = torch.utils.data.DataLoader(
        TrainDataset(args), 
        batch_size=args.batch_size, 
        shuffle=True, 
        num_workers=args.num_workers
        )
    
    loss_fn = loss_function
    simclrobj = SimCLR(model, optimizer, dataloaders, loss_fn)
    simclrobj.train(args, 200, 10)

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


In [14]:
# linear_evaluation.py

import argparse
import torch
import os
import torchvision
from PIL import Image
import json
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report

# making a command line interface
parser = argparse.ArgumentParser(description="This is the command line interface for the linear evaluation model")

parser.add_argument('datapath', type=str ,help="Path to the data root folder which contains train and test folders")

parser.add_argument('model_path', type=str, help="Path to the trained self-supervised model")

parser.add_argument('respath', type=str, help="Path to the results where the evaluation metrics would be stored. ")

parser.add_argument('-bs','--batch_size',default=250, type=int, help="The batch size for evaluation")

parser.add_argument('-nw','--num_workers',default=2,type=int,help="The number of workers for loading data")

parser.add_argument('-c','--cuda',action='store_true')

parser.add_argument('--multiple_gpus', action='store_true')

parser.add_argument('--remove_top_layers', default=1, type=int)


class TrainDataset(torch.utils.data.Dataset):

    def __init__(self, args):
        self.args = args
        
        with open(os.path.join(args.datapath,'train','train.json')) as f:
            self.filedict = json.load(f)

        with open(os.path.join(args.datapath,'mapper.json')) as f:
            self.mapper = json.load(f)

        self.filenames = list(self.filedict)
    
    def __len__(self):
        return len(self.filenames)

    def tensorify(self, img):
        return torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))(
            torchvision.transforms.ToTensor()(img)
            )

    def __getitem__(self, idx):
        return {
        'image':self.tensorify(
                    torchvision.transforms.Resize((224, 224))(
                                Image.open(os.path.join(args.datapath, 'train', self.filenames[idx])).convert('RGB')
                            )
                    ), 
        'label':self.mapper[self.filedict[self.filenames[idx]]]
        }


class TestDataset(torch.utils.data.Dataset):

    def __init__(self, args):
        self.args = args
        
        with open(os.path.join(args.datapath,'test','test.json')) as f:
            self.filedict = json.load(f)

        with open(os.path.join(args.datapath,'mapper.json')) as f:
            self.mapper = json.load(f)

        self.filenames = list(self.filedict)
    
    def __len__(self):
        return len(self.filenames)

    def tensorify(self, img):
        return torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))(
            torchvision.transforms.ToTensor()(img)
            )

    def __getitem__(self, idx):
        return {
        'image':self.tensorify(
                    torchvision.transforms.Resize((224, 224))(
                                Image.open(os.path.join(args.datapath, 'test', self.filenames[idx])).convert('RGB')
                            )
                    ), 
        'label':self.mapper[self.filedict[self.filenames[idx]]]
        }


if __name__ == '__main__':
    # '../milli_imagenet/' 'results/model/model.pth' 'results' -c --multiple_gpus -bs 125 
    argsv = ['./milli_imagenet/', 'results/model/model.pth', 'results', '-c' , '--multiple_gpus', '-bs', '125']
    args = parser.parse_args(argsv)
    args.device = torch.device('cuda' if args.cuda else 'cpu')
    model = get_model(args)

    dataloaders = {}
    
    dataloaders['train'] = torch.utils.data.DataLoader(
        TrainDataset(args), 
        batch_size=args.batch_size, 
        shuffle=False, 
        num_workers=args.num_workers
        )
    
    dataloaders['test'] = torch.utils.data.DataLoader(
        TestDataset(args), 
        batch_size=args.batch_size, 
        shuffle=False, 
        num_workers=args.num_workers
        )

    simclrobj = SimCLR(
        model, 
        None, 
        dataloaders, 
        None
        )
    simclrobj.load_model(args)


    reprs = {}

    for mode in ['train', 'test']:
        reprs[mode] = simclrobj.get_representations(args, mode=mode)    

    scaler = StandardScaler().fit(reprs['train']['X'])

    Xtrain = scaler.transform(reprs['train']['X'])
    Xtest = scaler.transform(reprs['test']['X'])

    clf = LogisticRegression(
        multi_class='multinomial', 
        max_iter=1000, 
        n_jobs=16,
        ).fit(
        Xtrain, reprs['train']['Y']
        )
    
    ypred = clf.predict(Xtest)
    print(
        classification_report(
        reprs['test']['Y'], 
        ypred, 
        digits=4, 
        target_names=['car', 'airplane', 'elephant', 'dog', 'cat'])
    )

              precision    recall  f1-score   support

         car     0.8148    0.8800    0.8462        50
    airplane     0.8222    0.7400    0.7789        50
    elephant     0.6071    0.6800    0.6415        50
         dog     0.4821    0.5400    0.5094        50
         cat     0.6154    0.4800    0.5393        50

    accuracy                         0.6640       250
   macro avg     0.6683    0.6640    0.6631       250
weighted avg     0.6683    0.6640    0.6631       250

