In [1]:
import os
import glob
import torch
import torch.nn as nn
import numpy as np
# import argparse
# from pathlib import Path
# import torch.backends.cudnn
from torch.optim import Adam
import random
from torch.optim import lr_scheduler 
from torch.autograd import Variable
# import torch.backends.cudnn as cudnn
from torch.utils.data import DataLoader, Dataset
# from torchvision import transforms, datasets, models
import torch.utils.model_zoo as model_zoo
# from torch.utils.data.sampler import SubsetRandomSampler
# import torchvision
import pandas as pd
from timm import models
from tqdm.notebook import tqdm as tqdm_notebook
from transformsdata import (DualCompose,
                        ImageOnly,
                        Normalize,
                        HorizontalFlip,
                        Rotate,
                        CenterCrop,
                        VerticalFlip)

In [2]:
os.environ['WANDB_NOTEBOOK_NAME'] = "efficentnet-b3-765"

In [3]:
import wandb
wandb.init(project='dr-challenge', name='efficentnet-b3-765')

W&B Run: https://app.wandb.ai/ubamba98/dr-challenge/runs/8o7xqu3j

## SEED

In [4]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
seed_everything(43)

## HELPER FUNCTION

In [5]:
def load_pretrained(model, num_classes=1000, in_chans=3, filter_fn=None, strict=True):
    
    cfg = getattr(model, 'default_cfg')
    
    state_dict = model_zoo.load_url(cfg['url'], progress=False, map_location='cpu')

    if in_chans != 3:
        conv1_name = cfg['first_conv']
        del state_dict[conv1_name + '.weight']
        
    classifier_name = cfg['classifier']
    if num_classes == 1000 and cfg['num_classes'] == 1001:
        # special case for imagenet trained models with extra background class in pretrained weights
        classifier_weight = state_dict[classifier_name + '.weight']
        state_dict[classifier_name + '.weight'] = classifier_weight[1:]
        classifier_bias = state_dict[classifier_name + '.bias']
        state_dict[classifier_name + '.bias'] = classifier_bias[1:]
    elif num_classes != cfg['num_classes']:
        # completely discard fully connected for all other differences between pretrained and created model
        del state_dict[classifier_name + '.weight']
        del state_dict[classifier_name + '.bias']
        strict = False

    if filter_fn is not None:
        state_dict = filter_fn(state_dict)

    model.load_state_dict(state_dict, strict=strict)

In [6]:
def run_epoch(model, loss_fn, loader, optimizer, dtype, exp_lr_scheduler = None):
        """
        Train the model for one epoch.
        """
        # Set the model to training mode
        model.train()
        counter = 0
        device = torch.device("cuda:0")
        model.to(device)
        tk1 = tqdm_notebook(loader, total=len(loader))
        running_loss = 0
        for x_var, y_var in tk1:
#             print(x_var.shape, y_var.shape)
            counter +=1
            x_var = Variable(x_var.type(dtype))
            y_var = Variable(y_var.type(dtype).long())
            #print("x:" ,x_var.shape)
            #print("y: ",y_var)
        
            x_var = x_var.to(device)
            y_var = y_var.to(device)
            # Run the model forward to compute scores and loss.
            scores = model(x_var)
            #print("scores:", scores)
            loss = loss_fn(scores, y_var)
            running_loss+=loss.item()
            tk1.set_postfix(loss=(running_loss /counter))
            # Run the model backward and take a step using the optimizer.
            optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1) ## Gradient clipping
            optimizer.step()

In [7]:
def check_accuracy(model, loader, dtype):
        """
        Check the accuracy of the model.
        """
        # Set the model to eval mode
        model.eval()
        num_correct, num_samples = 0, 0
        device = torch.device("cuda:0")
        with torch.no_grad():
            for x_var, y_var in loader:
                # Cast the image data to the correct type and wrap it in a Variable. At
                # test-time when we do not need to compute gradients, marking the Variable
                # as volatile can reduce memory usage and slightly improve speed.
                
                x_var = Variable(x_var.type(dtype))
                x_var = x_var.to(device)
                y_var = y_var.to(device)
                # Run the model forward, and compare the argmax score with the ground-truth
                # category.
                scores = model(x_var)
                _, preds = scores.data.cpu().max(1)
                #print("preds:", preds)
                preds = preds.to(device)
                num_correct += (preds == y_var).sum()
                num_samples += x_var.shape[0]

            # Return the fraction of datapoints that were correctly classified.

            acc = float(num_correct) / num_samples
            return acc

In [8]:
device = torch.device("cuda:0")
loss_fn = nn.CrossEntropyLoss()#weight=torch.FloatTensor([1., 4., 4., 6.]).to(device))
dtype = torch.FloatTensor

## MODEL

In [9]:
model = models.tf_efficientnet_b3(pretrained=False, num_classes=4, in_chans=3)
load_pretrained(model, num_classes=4, in_chans=3)
model.to(device)

GenEfficientNet(
  (conv_stem): Conv2dSame(3, 40, kernel_size=(3, 3), stride=(2, 2), bias=False)
  (bn1): BatchNorm2d(40, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  (blocks): Sequential(
    (0): Sequential(
      (0): DepthwiseSeparableConv(
        (conv_dw): Conv2d(40, 40, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=40, bias=False)
        (bn1): BatchNorm2d(40, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
        (se): SqueezeExcite(
          (conv_reduce): Conv2d(40, 10, kernel_size=(1, 1), stride=(1, 1))
          (conv_expand): Conv2d(10, 40, kernel_size=(1, 1), stride=(1, 1))
        )
        (conv_pw): Conv2d(40, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn2): BatchNorm2d(24, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): DepthwiseSeparableConv(
        (conv_dw): Conv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=24, bias=False)
        (bn1): 

In [10]:
wandb.watch(model)

[<wandb.wandb_torch.TorchGraph at 0x7f45b3143908>]

## TRANSFORMS

In [11]:
train_transform = DualCompose([
            CenterCrop(64),
            HorizontalFlip(),
            VerticalFlip(),
            Rotate(),
            ImageOnly(Normalize(in_ch=[6, 5, 4]))
        ])
    
val_transform = DualCompose([
        CenterCrop(64),
        ImageOnly(Normalize(in_ch=[6, 5, 4]))
    ])

## DATA LOADER

In [12]:
cd ..

/home/ubamba98


In [13]:
class ImageDataset(Dataset):
        
    def __init__(self, transform=None, limit=None, mode = "train", train_dir="train", val_dir="val", weights = [1, 1, 1, 1]):
        
        self.transform = transform
        self.limit = limit
        self.mode = mode
        self.train_dir = train_dir
        self.val_dir = val_dir
        self.train_files=[]
        for i in range(4):
            self.train_files += glob.glob(self.train_dir+"/*"+str(i)+"/*") * weights[i]
        self.val_files=glob.glob(self.val_dir+"/*/*")
        self.train_sample_count = len(self.train_files)
        self.val_sample_count = len(self.val_files)
        
    def __len__(self):
        if self.mode == "train":
            return (self.train_sample_count)
        else:
            return(self.val_sample_count)

    def __getitem__(self, idx): 
        if self.mode == "train":
            return (self.get_img(self.train_files[idx]), int(self.train_files[idx].split('/')[-2].split("_")[-1]))
        if self.mode == "val":
            return (self.get_img(self.val_files[idx]), int(self.val_files[idx].split('/')[-2].split("_")[-1]))
        
    def get_img(self, img_dir):
        img = np.load(img_dir)
        #img = img.transpose((1, 2, 0))
        img = img[:, :, [6, 5, 4]] ## Select for channels to run
        img = self.transform(img)
        img = torch.from_numpy(np.moveaxis(img, -1, 0)).float()  
        return img

In [14]:
train_loader = DataLoader(
                dataset=ImageDataset(transform=train_transform, 
                                     mode = "train", 
                                     weights = [1, 4, 4, 6]),
                shuffle=True,
                batch_size=256,
                num_workers=4
            )
valid_loader = DataLoader(
                dataset=ImageDataset(transform=val_transform, 
                                     mode = "val"),
                shuffle=False,
                batch_size=32,
                num_workers=4
            )

dataloaders = {
        'train': train_loader, 'val': valid_loader
}

dataloaders_sizes = {
        x: len(dataloaders[x]) for x in dataloaders.keys()
}
print( dataloaders_sizes)

{'train': 799, 'val': 331}


## TRAIN

In [15]:
train_acc_list=[]
val_acc_list=[]

In [16]:
num_epochs1 = 5

In [17]:
for param in model.parameters():
    param.requires_grad = False
for param in model.classifier.parameters():
    param.requires_grad = True

# Construct an Optimizer object for updating the last layer and first layer only.
optimizer1 = torch.optim.Adam(model.parameters(), lr=1e-5)
#exp_lr_scheduler1 = lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)

# Update only the last layer for a few epochs.
for epoch in range(num_epochs1):
# Run an epoch over the training data.
    print('Starting epoch %d / %d' % (epoch + 1, num_epochs1))
    run_epoch(model, loss_fn, train_loader, optimizer1, dtype)

    print('Calculating accuracies')
    # Check accuracy on the train and val sets.
    train_acc = check_accuracy(model, train_loader, dtype)
    val_acc = check_accuracy(model, valid_loader, dtype)
    train_acc_list.append(train_acc)
    val_acc_list.append(val_acc)
    print('Train accuracy: ', train_acc, ';    Val accuracy: ', val_acc, "\n")
    df_data=np.array([train_acc_list, val_acc_list]).T
#     wandb.log({
#         "val_acc": val_acc,
#         "train_acc": train_acc})
    df = pd.DataFrame(df_data,columns = ['train_accuracy','test_accuracy'])
    df.to_csv('logs/b3-765.csv')
model_wgt = "models/b3-765_1_e" + str(epoch+1) + ".pth"
torch.save(model.state_dict(), model_wgt)

Starting epoch 1 / 5


HBox(children=(FloatProgress(value=0.0, max=799.0), HTML(value='')))


Calculating accuracies
Train accuracy:  0.25812953441384673 ;    Val accuracy:  0.23687943262411348 

Starting epoch 2 / 5


HBox(children=(FloatProgress(value=0.0, max=799.0), HTML(value='')))


Calculating accuracies
Train accuracy:  0.258383926343752 ;    Val accuracy:  0.25011820330969264 

Starting epoch 3 / 5


HBox(children=(FloatProgress(value=0.0, max=799.0), HTML(value='')))


Calculating accuracies
Train accuracy:  0.2603407873430231 ;    Val accuracy:  0.24964539007092199 

Starting epoch 4 / 5


HBox(children=(FloatProgress(value=0.0, max=799.0), HTML(value='')))


Calculating accuracies

Calculating accuracies
Train accuracy:  0.2653650279586515 ;    Val accuracy:  0.262789598108747 



In [18]:
num_epochs2 = 10

In [19]:
# # Now we want to finetune the entire model for a few epochs. To do thise we
# # will need to compute gradients with respect to all model parameters, so
# # we flag all parameters as requiring gradients.
for param in model.parameters():
    param.requires_grad = True

best_acc=0

# Construct a new Optimizer that will update all model parameters. Note the
# small learning rate.
optimizer2 = torch.optim.Adam(model.parameters(), lr= 3e-3)
# Train the entire model for a few more epochs, checking accuracy on the
# train and validation sets after each epoch.

for epoch in range(num_epochs2):
    print('Starting epoch %d / %d' % (epoch + 1, num_epochs2))
    run_epoch(model, loss_fn, train_loader, optimizer2, dtype)

    print('Calculating accuracies')
    train_acc = check_accuracy(model, train_loader, dtype)
    val_acc = check_accuracy(model, valid_loader, dtype)
    
    train_acc_list.append(train_acc)
    val_acc_list.append(val_acc)
    
    print('Train accuracy: ', train_acc, ';    Val accuracy: ', val_acc)
#     wandb.log({
#         "val_acc": val_acc,
#         "train_acc": train_acc})
    df_data=np.array([train_acc_list, val_acc_list]).T
    df = pd.DataFrame(df_data,columns = ['train_accuracy','test_accuracy'])
    df.to_csv('logs/b3-765.csv')

    if best_acc<val_acc:
        best_acc=val_acc
        print("*** New optimal found, saving state ***")
        model_wgt = "models/b3-765_e" + str(epoch+1) + ".pth"
        torch.save(model.state_dict(), model_wgt)

Starting epoch 1 / 10


HBox(children=(FloatProgress(value=0.0, max=799.0), HTML(value='')))


Calculating accuracies
Train accuracy:  0.5801701490638866 ;    Val accuracy:  0.6363120567375886
*** New optimal found, saving state ***
Starting epoch 2 / 10


HBox(children=(FloatProgress(value=0.0, max=799.0), HTML(value='')))


Calculating accuracies
Train accuracy:  0.6266651664065672 ;    Val accuracy:  0.6672340425531915
*** New optimal found, saving state ***
Starting epoch 3 / 10


HBox(children=(FloatProgress(value=0.0, max=799.0), HTML(value='')))


Calculating accuracies
Train accuracy:  0.6697112162380325 ;    Val accuracy:  0.6526713947990543
Starting epoch 4 / 10


HBox(children=(FloatProgress(value=0.0, max=799.0), HTML(value='')))


Calculating accuracies
Train accuracy:  0.672612262669452 ;    Val accuracy:  0.6879432624113475
*** New optimal found, saving state ***
Starting epoch 5 / 10


HBox(children=(FloatProgress(value=0.0, max=799.0), HTML(value='')))


Calculating accuracies
Train accuracy:  0.6646625148599132 ;    Val accuracy:  0.6322458628841607
Starting epoch 6 / 10


HBox(children=(FloatProgress(value=0.0, max=799.0), HTML(value='')))


Calculating accuracies
Train accuracy:  0.7050668023423626 ;    Val accuracy:  0.6571158392434988
Starting epoch 7 / 10


HBox(children=(FloatProgress(value=0.0, max=799.0), HTML(value='')))


Calculating accuracies
Train accuracy:  0.7046852144475048 ;    Val accuracy:  0.7300236406619386
*** New optimal found, saving state ***
Starting epoch 8 / 10


HBox(children=(FloatProgress(value=0.0, max=799.0), HTML(value='')))


Calculating accuracies
Train accuracy:  0.7135351183167081 ;    Val accuracy:  0.6553191489361702
Starting epoch 9 / 10


HBox(children=(FloatProgress(value=0.0, max=799.0), HTML(value='')))


Calculating accuracies
Train accuracy:  0.7369049308005029 ;    Val accuracy:  0.6692198581560284
Starting epoch 10 / 10


HBox(children=(FloatProgress(value=0.0, max=799.0), HTML(value='')))


Calculating accuracies
Train accuracy:  0.740158212211791 ;    Val accuracy:  0.6615602836879433


In [20]:
train_loader = DataLoader(
                dataset=ImageDataset(transform=train_transform, 
                                     mode = "train"),
                shuffle=True,
                batch_size=256,
                num_workers=4
            )
valid_loader = DataLoader(
                dataset=ImageDataset(transform=val_transform, 
                                     mode = "val"),
                shuffle=False,
                batch_size=32,
                num_workers=4
            )

dataloaders = {
        'train': train_loader, 'val': valid_loader
}

dataloaders_sizes = {
        x: len(dataloaders[x]) for x in dataloaders.keys()
}
print( dataloaders_sizes) 

{'train': 335, 'val': 331}


In [21]:
num_epochs3=22

In [22]:
# # Now we want to finetune the entire model for a few epochs. To do thise we
# # will need to compute gradients with respect to all model parameters, so
# # we flag all parameters as requiring gradients.
for param in model.parameters():
    param.requires_grad = True
# Construct a new Optimizer that will update all model parameters. Note the
# small learning rate.
optimizer2 = torch.optim.Adam(model.parameters(), lr= 3e-4)
#adapt learning rate stepwise
# exp_lr_scheduler2 = lr_scheduler.StepLR(optimizer2, step_size=12, gamma=0.1)

# Train the entire model for a few more epochs, checking accuracy on the
# train and validation sets after each epoch.

for epoch in range(num_epochs2, num_epochs3):
    print('Starting epoch %d / %d' % (epoch + 1, num_epochs3))
    run_epoch(model, loss_fn, train_loader, optimizer2, dtype)

    print('Calculating accuracies')
    train_acc = check_accuracy(model, train_loader, dtype)
    val_acc = check_accuracy(model, valid_loader, dtype)
    
    train_acc_list.append(train_acc)
    val_acc_list.append(val_acc)
    
    print('Train accuracy: ', train_acc, ';    Val accuracy: ', val_acc)
    df_data=np.array([train_acc_list, val_acc_list]).T
    df = pd.DataFrame(df_data,columns = ['train_accuracy','test_accuracy'])
    df.to_csv('logs/b3-765.csv')
    
    if best_acc<val_acc:
        best_acc=val_acc
        print("*** New optimal found, saving state ***")
        model_wgt = "models/b3-765_e" + str(epoch+1) + ".pth"
        torch.save(model.state_dict(), model_wgt)

Starting epoch 11 / 22


HBox(children=(FloatProgress(value=0.0, max=335.0), HTML(value='')))


Calculating accuracies
Train accuracy:  0.8186220738908209 ;    Val accuracy:  0.7677541371158393
*** New optimal found, saving state ***
Starting epoch 12 / 22


HBox(children=(FloatProgress(value=0.0, max=335.0), HTML(value='')))


Calculating accuracies
Train accuracy:  0.8241650523957337 ;    Val accuracy:  0.7709692671394799
*** New optimal found, saving state ***
Starting epoch 13 / 22


HBox(children=(FloatProgress(value=0.0, max=335.0), HTML(value='')))


Calculating accuracies
Train accuracy:  0.8255303755222069 ;    Val accuracy:  0.7713475177304965
*** New optimal found, saving state ***
Starting epoch 14 / 22


HBox(children=(FloatProgress(value=0.0, max=335.0), HTML(value='')))


Calculating accuracies
Train accuracy:  0.8277358974957406 ;    Val accuracy:  0.7716312056737589
*** New optimal found, saving state ***
Starting epoch 15 / 22


HBox(children=(FloatProgress(value=0.0, max=335.0), HTML(value='')))


Calculating accuracies
Train accuracy:  0.8315634700212383 ;    Val accuracy:  0.7734278959810875
*** New optimal found, saving state ***
Starting epoch 16 / 22


HBox(children=(FloatProgress(value=0.0, max=335.0), HTML(value='')))


Calculating accuracies
Train accuracy:  0.8326837351506523 ;    Val accuracy:  0.7704018912529551
Starting epoch 17 / 22


HBox(children=(FloatProgress(value=0.0, max=335.0), HTML(value='')))


Calculating accuracies
Train accuracy:  0.8339090251359489 ;    Val accuracy:  0.7711583924349882
Starting epoch 18 / 22


HBox(children=(FloatProgress(value=0.0, max=335.0), HTML(value='')))


Calculating accuracies
Train accuracy:  0.8328471071486918 ;    Val accuracy:  0.7719148936170213
Starting epoch 19 / 22


HBox(children=(FloatProgress(value=0.0, max=335.0), HTML(value='')))


Calculating accuracies
Train accuracy:  0.8373164982379163 ;    Val accuracy:  0.7720094562647755
Starting epoch 20 / 22


HBox(children=(FloatProgress(value=0.0, max=335.0), HTML(value='')))


Calculating accuracies
Train accuracy:  0.8368847293859547 ;    Val accuracy:  0.7750354609929078
*** New optimal found, saving state ***
Starting epoch 21 / 22


HBox(children=(FloatProgress(value=0.0, max=335.0), HTML(value='')))


Calculating accuracies
Train accuracy:  0.8385651270800756 ;    Val accuracy:  0.774468085106383
Starting epoch 22 / 22


HBox(children=(FloatProgress(value=0.0, max=335.0), HTML(value='')))


Calculating accuracies
Train accuracy:  0.8401871776320395 ;    Val accuracy:  0.7746572104018913


In [23]:
wandb.log({"val_acc": best_acc})

In [None]:
!sudo shutdown -h now