In [1]:
from pathlib import Path
import argparse
import json
import math
import os
import random
import signal
import subprocess
import sys
import time
from torch import nn, optim
import torch
import torchvision
import torchvision.transforms as transforms
# from utils import BenchmarkModule
import lightly
import lightly.models as models
import lightly.loss as loss
# import lightly.data as data
from lightly.models.barlowtwins import BarlowTwins
from lightly.models.simclr import SimCLR
from pytorch_lightning.callbacks import ModelCheckpoint

from simclr.modules.transformations import TransformsSimCLR
from PIL import Image, ImageOps, ImageFilter
from byol_pytorch import BYOL
import lightly
from lightly.models._momentum import _MomentumEncoderMixin
from lightly.models.batchnorm import get_norm_layer
import torch.nn.functional as F



import pytorch_lightning as pl
from pytorch_lightning import Trainer, LightningModule

class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, root, split, transform, limit=0):
        r"""
        Args:
            root: Location of the dataset folder, usually it is /dataset
            split: The split you want to used, it should be one of train, val or unlabeled.
            transform: the transform you want to applied to the images.
        """

        self.split = split
        self.transform = transform

        self.image_dir = os.path.join(root, split)
        label_path = os.path.join(root, f"{split}_label_tensor.pt")

        if limit == 0:
            self.num_images = len(os.listdir(self.image_dir))
        else:
            self.num_images = limit

        if os.path.exists(label_path):
            self.labels = torch.load(label_path)
        else:
            self.labels = -1 * torch.ones(self.num_images, dtype=torch.long)

    def __len__(self):
        return self.num_images

    def __getitem__(self, idx):
        with open(os.path.join(self.image_dir, f"{idx}.png"), 'rb') as f:
            img = Image.open(f).convert('RGB')
            
        if self.transform == None:
            return img, self.labels[idx]            

        return self.transform(img), self.labels[idx], 2
    
train_transform = transforms.Compose([transforms.RandomHorizontalFlip(p=0.5),transforms.RandomVerticalFlip(p=0.5)])   
# the collate function applies random transforms to the input images
collate_fn = lightly.data.SimCLRCollateFunction(
    input_size=32,
    gaussian_blur=0.,
)

# create a dataset from your image folder
dataset = CustomDataset(root='/dataset', split='unlabeled', transform=train_transform)

# build a PyTorch dataloader
dataloader = torch.utils.data.DataLoader(dataset, batch_size=512, shuffle=True, collate_fn=collate_fn, pin_memory=True, num_workers=4)



In [2]:
def knn_predict(feature, feature_bank, feature_labels, classes: int, knn_k: int, knn_t: float):
    """Helper method to run kNN predictions on features based on a feature bank
    Args:
        feature: Tensor of shape [N, D] consisting of N D-dimensional features
        feature_bank: Tensor of a database of features used for kNN
        feature_labels: Labels for the features in our feature_bank
        classes: Number of classes (e.g. 10 for CIFAR-10)
        knn_k: Number of k neighbors used for kNN
        knn_t: 
    """
    # compute cos similarity between each feature vector and feature bank ---> [B, N]
    sim_matrix = torch.mm(feature, feature_bank)
    # [B, K]
    sim_weight, sim_indices = sim_matrix.topk(k=knn_k, dim=-1)
    # [B, K]
    sim_labels = torch.gather(feature_labels.expand(feature.size(0), -1), dim=-1, index=sim_indices)
    # we do a reweighting of the similarities 
    sim_weight = (sim_weight / knn_t).exp()
    # counts for each class
    one_hot_label = torch.zeros(feature.size(0) * knn_k, classes, device=sim_labels.device)
    # [B*K, C]
    one_hot_label = one_hot_label.scatter(dim=-1, index=sim_labels.view(-1, 1), value=1.0)
    # weighted score ---> [B, C]
    pred_scores = torch.sum(one_hot_label.view(feature.size(0), -1, classes) * sim_weight.unsqueeze(dim=-1), dim=1)
    pred_labels = pred_scores.argsort(dim=-1, descending=True)
    return pred_labels


class BenchmarkModule(pl.LightningModule):
    """A PyTorch Lightning Module for automated kNN callback
    
    At the end of every training epoch we create a feature bank by inferencing
    the backbone on the dataloader passed to the module. 
    At every validation step we predict features on the validation data.
    After all predictions on validation data (validation_epoch_end) we evaluate
    the predictions on a kNN classifier on the validation data using the 
    feature_bank features from the train data.
    We can access the highest accuracy during a kNN prediction using the 
    max_accuracy attribute.
    """
    def __init__(self, dataloader_kNN, gpus, classes, knn_k, knn_t):
        super().__init__()
        self.backbone = nn.Module()
        self.max_accuracy = 0.0
        self.dataloader_kNN = dataloader_kNN
        self.gpus = gpus
        self.classes = classes
        self.knn_k = knn_k
        self.knn_t = knn_t

    def training_epoch_end(self, outputs):
        # update feature bank at the end of each training epoch
        self.backbone.eval()
        self.feature_bank = []
        self.targets_bank = []
        with torch.no_grad():
            for data in self.dataloader_kNN:
                img, target, _ = data
                if self.gpus > 0:
                    img = img[0].cuda()
                    target = target[0].cuda()
                feature = self.backbone(img).squeeze()
                feature = F.normalize(feature, dim=1)
                self.feature_bank.append(feature)
                self.targets_bank.append(target)
#         print("targets_bank:",self.targets_bank)
#         self.feature_bank = torch.cat(self.feature_bank).t().contiguous()
#         self.targets_bank = torch.cat(self.targets_bank).t().contiguous()
        self.backbone.train()

    def validation_step(self, batch, batch_idx):
        # we can only do kNN predictions once we have a feature bank
        if hasattr(self, 'feature_bank') and hasattr(self, 'targets_bank'):
            images, targets, _ = batch
            feature = self.backbone(images).squeeze()
            feature = F.normalize(feature, dim=1)
            pred_labels = knn_predict(feature, self.feature_bank, self.targets_bank, self.classes, self.knn_k, self.knn_t)
            num = images.size(0)
            top1 = (pred_labels[:, 0] == targets).float().sum().item()
            return (num, top1)
    
    def validation_epoch_end(self, outputs):
        if outputs:
            total_num = 0
            total_top1 = 0.
            for (num, top1) in outputs:
                total_num += num
                total_top1 += top1
            acc = float(total_top1 / total_num)
            if acc > self.max_accuracy:
                self.max_accuracy = acc
            self.log('kNN_accuracy', acc * 100.0, prog_bar=True)

In [3]:
def _get_byol_mlp(num_ftrs: int, hidden_dim: int, out_dim: int):
    """Returns a 2-layer MLP with batch norm on the hidden layer.
    Reference (12.03.2021)
    https://arxiv.org/abs/2006.07733
    """
    modules = [
        nn.Linear(num_ftrs, hidden_dim),
        nn.BatchNorm1d(hidden_dim),
        nn.ReLU(),
        nn.Linear(hidden_dim, out_dim)
    ]
    return nn.Sequential(*modules)


class BYOL(nn.Module, _MomentumEncoderMixin):
    """Implementation of the BYOL architecture.
    Attributes:
        backbone:
            Backbone model to extract features from images.
        num_ftrs:
            Dimension of the embedding (before the projection mlp).
        hidden_dim:
            Dimension of the hidden layer in the projection and prediction mlp.
        out_dim:
            Dimension of the output (after the projection/prediction mlp).
        m:
            Momentum for the momentum update of encoder.
    """

    def __init__(self,
                 # TODO adapt parameters according to paper
                 backbone: nn.Module,
                 num_ftrs: int = 512,
                 hidden_dim: int = 4096,
                 out_dim: int = 256,
                 m: float = 0.999):

        super(BYOL, self).__init__()

        self.backbone = backbone
        self.projection_head = _get_byol_mlp(num_ftrs, hidden_dim, out_dim)
        self.prediction_head = _get_byol_mlp(out_dim, hidden_dim, out_dim)
        self.momentum_backbone = None
        self.momentum_projection_head = None

        self._init_momentum_encoder()
        self.m = m

    def _forward(self,
                x0: torch.Tensor,
                x1: torch.Tensor = None,
                return_features: bool = False):
        """Forward pass through the encoder and the momentum encoder.
        Performs the momentum update, extracts features with the backbone and
        applies the projection (and prediciton) head to the output space. If
        x1 is None, only x0 will be processed otherwise, x0 is processed with
        the encoder and x1 with the momentum encoder.
        Args:
            x0:
                Tensor of shape bsz x channels x W x H.
            x1:
                Tensor of shape bsz x channels x W x H.
            return_features:
                Whether or not to return the intermediate features backbone(x).
        Returns:
            The output proejction of x0 and (if x1 is not None) the output 
            projection of x1. If return_features is True, the output for each x 
            is a tuple (out, f) where f are the features before the projection
            head.
        
        Examples:
            >>> # single input, single output
            >>> out = model._forward(x)
            >>>
            >>> # single input with return_features=True
            >>> out, f = model._forward(x, return_features=True)
            >>>
            >>> # two inputs, two outputs
            >>> out0, out1 = model._forward(x0, x1)
            >>>
            >>> # two inputs two outputs with return_features=True
            >>> (out0, f0), (out1, f1) = model._forward(x0, x1, return_features=True)
        """

        self._momentum_update(self.m)

        # forward pass of first input x0
        f0 = self.backbone(x0).squeeze()
        z0 = self.projection_head(f0)
        out0 = self.prediction_head(z0)

        # append features if requested
        if return_features:
            out0 = (out0, f0)

        if x1 is None:
            return out0

        # forward pass of second input x1
        with torch.no_grad():

            f1 = self.momentum_backbone(x1).squeeze()
            out1 = self.momentum_projection_head(f1)
        
            if return_features:
                out1 = (out1, f1)
        
        return out0, out1

    def forward(self,
                x0: torch.Tensor,
                x1: torch.Tensor = None,
                return_features: bool = False
                ):
        """Symmetrizes the forward pass (see _forward).
        Performs two forward passes, once where x0 is passed through the encoder
        and x1 through the momentum encoder and once the other way around.
        Args:
            x0:
                Tensor of shape bsz x channels x W x H.
            x1:
                Tensor of shape bsz x channels x W x H.
        Returns: TODO
        """
        p0, z1 = self._forward(x0, x1, return_features=return_features)
        p1, z0 = self._forward(x1, x0, return_features=return_features)

        return (z0, p0), (z1, p1)

In [4]:
class BYOLModule(BenchmarkModule):
    def __init__(self, dataloader, gpus, classes, knn_k, knn_t):
        super().__init__(dataloader, gpus, classes, knn_k, knn_t)
        # create a ResNet backbone and remove the classification head
        resnet = lightly.models.ResNetGenerator('resnet-34')
        self.backbone = nn.Sequential(
            *list(resnet.children())[:-1],
            nn.AdaptiveAvgPool2d(1),
        )
        # create a simsiam model based on ResNet
        # note that bartontwins has the same architecture
        self.resnet_byol = BYOL(self.backbone, num_ftrs=512, hidden_dim=1024, out_dim=1024,m=0.996)
        self.criterion = lightly.loss.SymNegCosineSimilarityLoss()
            
    def forward(self, x):
        self.resnet_simsiam(x)

    def training_step(self, batch, batch_idx):
        (x0, x1), _, _ = batch
        y0, y1 = self.resnet_byol(x0, x1)
        # symmetrize the outputs of byol and calculate the loss
        loss = self.criterion(y0, y1)
        self.log('train_loss_ssl', loss)
        return loss

    # learning rate warm-up
    def optimizer_steps(self,
                        epoch=None,
                        batch_idx=None,
                        optimizer=None,
                        optimizer_idx=None,
                        optimizer_closure=None,
                        on_tpu=None,
                        using_native_amp=None,
                        using_lbfgs=None):        

        # learning rate warmup
        if self.trainer.global_step < 1000:
            lr_scale = min(1., float(self.trainer.global_step + 1) / 1000.)
            for pg in optimizer.param_groups:
                pg['lr'] = lr_scale * 2e-2 * 512 / 256

        # update params
        optimizer.step()
        optimizer.zero_grad()

    def configure_optimizers(self):
        lr = 2e-2 * 512 / 256 # linear scaling of lr
        optim = torch.optim.SGD(self.resnet_byol.parameters(), lr=lr,
                                momentum=0.9, weight_decay=5e-4)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optim, 800)
        return [optim], [scheduler]

In [5]:
EPOCHS = 5


# model = BYOLModule(dataloader, 1, 800, 200, 0.1)
# checkpoint_callback = ModelCheckpoint(monitor='train_loss_ssl', save_last=True)

In [6]:
# trainer = pl.Trainer(gpus=1, deterministic=True, max_epochs=EPOCHS, default_root_dir='/scratch/nr2229/BYOL/byol-resent34', profiler="simple",
#                      precision=16, benchmark=True, callbacks=[checkpoint_callback], fast_dev_run=False)
# trainer.fit(model, train_dataloader=dataloader) 



# trainer.fit(
#     model,
#     train_dataloader=dataloader,
# )

In [7]:
byol = BYOLModule.load_from_checkpoint("/scratch/nr2229/BYOL/byol-resent34-300Epochs/lightning_logs/version_6110740/checkpoints/last.ckpt",dataloader=dataloader,gpus=1, classes=800, knn_k=200, knn_t=0.1)
# byol = BYOLModule(dataloader=dataloader,gpus=1, classes=800, knn_k=200, knn_t=0.1)
def expand_greyscale(t):
    return t.expand(3, -1, -1)



In [8]:
checkpoint_callback = ModelCheckpoint(monitor='train_loss_ssl', save_last=True)

trainer = pl.Trainer(gpus=1, deterministic=True, max_epochs=EPOCHS, default_root_dir='/scratch/nr2229/BYOL/byol-resent34-testing', profiler="simple",
                     precision=16, benchmark=True, callbacks=[checkpoint_callback], fast_dev_run=False)
trainer.fit(byol, train_dataloader=dataloader) 

GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Using native 16bit precision.

  | Name        | Type                       | Params
-----------------------------------------------------------
0 | backbone    | Sequential                 | 21.3 M
1 | resnet_byol | BYOL                       | 47.8 M
2 | criterion   | SymNegCosineSimilarityLoss | 0     
-----------------------------------------------------------
25.0 M    Trainable params
22.9 M    Non-trainable params
47.8 M    Total params


Epoch 0:   0%|          | 0/1000 [00:02<?, ?it/s] 



Profiler Report

Action              	|  Mean duration (s)	|Num calls      	|  Total time (s) 	|  Percentage %   	|
---------------------------------------------------------------------------------------------------------------------
Total               	|  -              	|_              	|  4.8598         	|  100 %          	|
---------------------------------------------------------------------------------------------------------------------
run_training_epoch  	|  2.0659         	|1              	|  2.0659         	|  42.511         	|
on_train_end        	|  0.00097845     	|1              	|  0.00097845     	|  0.020134       	|
on_epoch_start      	|  0.00057626     	|1              	|  0.00057626     	|  0.011858       	|
on_train_start      	|  0.0003744      	|1              	|  0.0003744      	|  0.0077039      	|
cache_result        	|  6.3693e-06     	|4              	|  2.5477e-05     	|  0.00052425     	|
on_fit_start        	|  2.1257e-05     	|1              	|  2.12




1

In [9]:
class NYUImageNetDataModule(pl.LightningDataModule):
  
    def train_dataloader(self):
        train_transform = transforms.Compose([
            transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5),
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.RandomVerticalFlip(p=0.5),
            transforms.ToTensor(),
            transforms.Lambda(expand_greyscale),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])

        trainset = CustomDataset(root='/dataset', split="train", transform=train_transform)
        train_loader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True, num_workers=4, pin_memory=True)
        return train_loader
    
    def val_dataloader(self):
        eval_transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])
        evalset = CustomDataset(root='/dataset', split="val", transform=eval_transform)
        eval_loader = torch.utils.data.DataLoader(evalset, batch_size=64, shuffle=False, num_workers=4, pin_memory=True)
        return eval_loader
    
    def ssl_train_dataloader(self, batch_size):
        ssl_train_transform = transforms.Compose([
#             transforms.Resize((96,96)),
            transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5),
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.RandomVerticalFlip(p=0.5),
            transforms.ToTensor(),
            transforms.Lambda(expand_greyscale),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])
        unlabeled_dataset = CustomDataset(root='/dataset', split='unlabeled', transform=ssl_train_transform)
        unlabeled_dataloader = torch.utils.data.DataLoader(unlabeled_dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)
        return unlabeled_dataloader
        
    def ssl_val_dataloader(self, batch_size):
        ssl_eval_transform = transforms.Compose([
#             transforms.Resize((96,96)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])
        val_dataset = CustomDataset(root='/dataset', split='val', transform=ssl_eval_transform)
        val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)
        return val_dataloader

In [10]:
data = NYUImageNetDataModule()

In [11]:
from simclr.modules.identity import Identity
import torch.nn.functional as F
from torchmetrics.functional import accuracy

class ResNetClassifier(LightningModule):
    def __init__(self):
        super().__init__()  
        resnet = lightly.models.ResNetGenerator('resnet-34')
#         resnet = lightly.models.ResNetGenerator('resnet-34')
        self.encoder = nn.Sequential(
            *list(resnet.children())[:-1],
            nn.AdaptiveAvgPool2d(1),
        )
#         self.encoder.fc = nn.Identity()
        states = byol.backbone.state_dict()
        self.encoder.load_state_dict(states)
        self.lastLayer = torch.nn.Sequential(
            torch.nn.Linear(512, 1024),
            torch.nn.ReLU(),
            nn.Dropout(p=0.1),
            torch.nn.Linear(1024, 800),
        )
#         self.lastLayer = torch.nn.Linear(512, 800)
        for layer in self.lastLayer.modules():
           if isinstance(layer, nn.Linear):
                layer.weight.data.normal_(mean=0.0, std=0.01)
                layer.bias.data.zero_()
        
        self.param_groups = [dict(params=self.lastLayer.parameters(), lr=0.01)]
        self.param_groups.append(dict(params=byol.parameters(), lr=0.0005))
        
        self.criterion=torch.nn.CrossEntropyLoss()
        
#         resnet = lightly.models.ResNetGenerator('resnet-34')
#         self.encoder = nn.Sequential(
#             *list(resnet.children())[:-1],
#             nn.AdaptiveAvgPool2d(1),
#         )
#         states = byol.backbone.state_dict()
#         self.encoder.load_state_dict(states)
# #         self.encoder.load_state_dict(torch.load(os.path.join('/scratch/nr2229/barlow-34/','version_5956041_backbone.pth')))
#         self.lastLayer = torch.nn.Linear(512, 800)
#         self.criterion=torch.nn.CrossEntropyLoss()

    def forward(self, x):
        x = torch.flatten(self.encoder(x), 1)
#         print(x.shape)
#         x = self.encoder(x)
#         print(x.shape)
#         x = self.relu(self.projector(x))
        x = self.lastLayer(x)
        return x
        
    def training_step(self, batch, batch_idx):
#         data, label = batch
        data = batch[0]
        label = batch[1]
        classProbs = self.forward(data)
        loss = self.criterion(classProbs, label)
        self.log('train_loss', loss)
        return loss
    
    def _evaluate(self, batch, batch_idx, stage=None):
#         x, y = batch
        x = batch[0]
        y = batch[1]
        out = self.forward(x)
        logits = F.log_softmax(out, dim=-1)
        loss = F.nll_loss(logits, y)
        preds = torch.argmax(logits, dim=-1)
        acc = accuracy(preds, y)

        if stage:
            self.log(f'{stage}_loss', loss, prog_bar=True)
            self.log(f'{stage}_acc', acc, prog_bar=True)

        return loss, acc
    
    def validation_step(self,batch,batch_idx):
        self._evaluate(batch, batch_idx, 'val')[0]
    
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.param_groups, weight_decay = 1e-5)
#         optimizer = optim.SGD(self.param_groups, 0, momentum=0.9, weight_decay=1e-5)
        scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, EPOCHS, verbose=True)
        return {'optimizer': optimizer, 'lr_scheduler': scheduler, 'monitor': 'val_loss'}
#     def forward(self, x):
#         x = torch.flatten(self.encoder(x), 1)
# #         print("shape1",x.shape)
#         x = self.lastLayer(x)
# #         print("shape2",x.shape)
#         return x
    
#     def training_step(self, batch, batch_idx):
# #         data, label = batch
#         data = batch[0]
#         label = batch[1]
#         classProbs = self.forward(data)
#         loss = self.criterion(classProbs, label)
#         self.log('train_loss', loss)
#         return loss
    
#     def validation_step(self,batch,batch_idx):
# #         print("batch shape:", batch)
#         data = batch[0]
#         label = batch[1]
#         classProbs = self.forward(data)
#         loss = self.criterion(classProbs, label)
#         self.log('val_loss', loss, on_step=False, on_epoch=True, prog_bar=True, logger=True)
#         return { 'val_loss' : loss, 'prediction' : classProbs, 'target' : label }
    
#     def configure_optimizers(self):
#         optimizer = torch.optim.Adam(self.parameters())
#         scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5)
#         return ({'optimizer': optimizer, 'lr_scheduler': scheduler, 'monitor': 'val_loss'})

In [12]:
EPOCHS = 200
classifier = ResNetClassifier()

In [13]:
from pytorch_lightning.callbacks import ModelCheckpoint
checkpoint_callback = ModelCheckpoint(monitor='val_loss', save_last=True)
classifier_trainer = Trainer(gpus=1,deterministic=True, max_epochs=EPOCHS, default_root_dir='/scratch/nr2229/classifier-' + 'byol-resnet34-lightly', profiler="simple",
                     limit_val_batches= 0.5, benchmark=True, callbacks=[checkpoint_callback], fast_dev_run=False)


GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


In [14]:
classifier_trainer.fit(classifier, train_dataloader=data.train_dataloader(), val_dataloaders=data.val_dataloader())


  | Name      | Type             | Params
-----------------------------------------------
0 | encoder   | Sequential       | 21.3 M
1 | lastLayer | Sequential       | 1.3 M 
2 | criterion | CrossEntropyLoss | 0     
-----------------------------------------------
22.6 M    Trainable params
0         Non-trainable params
22.6 M    Total params


Adjusting learning rate of group 0 to 1.0000e-02.
Adjusting learning rate of group 1 to 5.0000e-04.
Epoch 0:  67%|██████▋   | 400/600 [01:36<00:48,  4.13it/s, loss=5.32, v_num=18, val_loss=6.68, val_acc=0.0625]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/200 [00:00<?, ?it/s][A
Epoch 0:  67%|██████▋   | 402/600 [01:37<00:47,  4.14it/s, loss=5.32, v_num=18, val_loss=6.68, val_acc=0.0625]
Epoch 0:  67%|██████▋   | 404/600 [01:37<00:47,  4.15it/s, loss=5.32, v_num=18, val_loss=6.68, val_acc=0.0625]
Epoch 0:  68%|██████▊   | 406/600 [01:37<00:46,  4.17it/s, loss=5.32, v_num=18, val_loss=6.68, val_acc=0.0625]
Epoch 0:  68%|██████▊   | 408/600 [01:37<00:45,  4.18it/s, loss=5.32, v_num=18, val_loss=6.68, val_acc=0.0625]
Epoch 0:  68%|██████▊   | 410/600 [01:37<00:45,  4.20it/s, loss=5.32, v_num=18, val_loss=6.68, val_acc=0.0625]
Epoch 0:  69%|██████▊   | 412/600 [01:37<00:44,  4.21it/s, loss=5.32, v_num=18, val_loss=6.68, val_acc=0.0625]
Epoch 0:  69%|██████▉   | 414/600 

Saving latest checkpoint...


Epoch 61:  12%|█▏        | 74/600 [00:18<02:12,  3.98it/s, loss=3.64, v_num=18, val_loss=3.79, val_acc=0.209]



Profiler Report

Action                      	|  Mean duration (s)	|Num calls      	|  Total time (s) 	|  Percentage %   	|
-----------------------------------------------------------------------------------------------------------------------------
Total                       	|  -              	|_              	|  6766.6         	|  100 %          	|
-----------------------------------------------------------------------------------------------------------------------------
run_training_epoch          	|  108.65         	|62             	|  6736.2         	|  99.551         	|
run_training_batch          	|  0.23794        	|24475          	|  5823.5         	|  86.062         	|
evaluation_step_and_end     	|  0.064434       	|12202          	|  786.23         	|  11.619         	|
optimizer_step_and_closure_0	|  0.012384       	|24475          	|  303.09         	|  4.4792         	|
training_step_and_backward  	|  0.011432       	|24475          	|  279.79         	|  4.1348    




1

In [17]:
# classifier = ResNetClassifier()

In [14]:
# from pytorch_lightning.callbacks import ModelCheckpoint
# checkpoint_callback = ModelCheckpoint(monitor='val_loss', save_last=True)

# EPOCHS = 60
# trainer = Trainer(gpus=1,deterministic=True, max_epochs=EPOCHS, default_root_dir='/scratch/nr2229/BYOL/FineTuned/classifier_byol_resnet34_lightly_new', profiler="simple",
#                      limit_val_batches= 0.75, benchmark=True, callbacks=[checkpoint_callback], fast_dev_run=False)

GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


In [124]:
# data = NYUImageNetDataModule()
# trainer.fit(classifier, train_dataloader=data.train_dataloader(), val_dataloaders=data.val_dataloader())

In [164]:
checkpoint_dir = "/scratch/nr2229/BYOL/FineTuned"

torch.save(classifier.state_dict(), os.path.join(checkpoint_dir, 'lightly_resnet34v2_classifier.pth'))

In [165]:
net = ResNetClassifier()
net.load_state_dict(torch.load(os.path.join(checkpoint_dir, 'lightly_resnet34v2_classifier.pth')))

<All keys matched successfully>

In [15]:
# net = net.cuda()
net = classifier.cuda()

net.eval()
correct = 0
total = 0
with torch.no_grad():
    for batch in data.val_dataloader():
#         print(batch)
#         break
#         images, labels = batch
        images = batch[0]
        labels = batch[1]
        images = images.cuda()
        labels = labels.cuda()

        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()


print(f"Accuracy: {(100 * correct / total):.2f}%")

Accuracy: 21.00%
