# Downloading Essentials 

In [1]:
!pip install albumentations==0.4.6   # Used For Augmentations 
!pip install efficientnet_pytorch    # Library for Model Creation
!pip install tqdm                    # Interactive Loop

Collecting albumentations==0.4.6
  Downloading albumentations-0.4.6.tar.gz (117 kB)
[K     |████████████████████████████████| 117 kB 5.1 MB/s 
Collecting imgaug>=0.4.0
  Downloading imgaug-0.4.0-py2.py3-none-any.whl (948 kB)
[K     |████████████████████████████████| 948 kB 44.5 MB/s 
Building wheels for collected packages: albumentations
  Building wheel for albumentations (setup.py) ... [?25l[?25hdone
  Created wheel for albumentations: filename=albumentations-0.4.6-py3-none-any.whl size=65174 sha256=848c7996f3a4a18e415eb4b21f41220fe5f18ba91f67d011d5ac3d031f68f366
  Stored in directory: /root/.cache/pip/wheels/cf/34/0f/cb2a5f93561a181a4bcc84847ad6aaceea8b5a3127469616cc
Successfully built albumentations
Installing collected packages: imgaug, albumentations
  Attempting uninstall: imgaug
    Found existing installation: imgaug 0.2.9
    Uninstalling imgaug-0.2.9:
      Successfully uninstalled imgaug-0.2.9
  Attempting uninstall: albumentations
    Found existing installation: album

# Mounting Drive 

In [2]:
from google.colab import drive
drive.mount('/content/drive')          # The Dataset is Uploaded on Drive because of its size 

Mounted at /content/drive


# Extracting Zip File

In [4]:
import zipfile

with zipfile.ZipFile("/content/drive/MyDrive/DBTRB/aptos2019-blindness-detection.zip", 'r') as zip_ref:
    zip_ref.extractall("APTOS")    # extract in APTOS Folder

# Importing Essentials

In [6]:
import os
import torch 
import pandas as pd 
import numpy as np
from PIL import Image
from tqdm import tqdm
import albumentations as A
from torch import nn , optim 

from efficientnet_pytorch import EfficientNet

from albumentations.pytorch import ToTensorV2

from sklearn.metrics import cohen_kappa_score

from torch.utils.data import Dataset , DataLoader

# Configurations

In [28]:
# Hyperparameters

DEVICE = 'cuda' if torch.cuda.is_available else 'cpu'
BATCH_SIZE = 16
LEARNING_RATE = 3e-4
WEIGHT_DECAY = 5e-4
NUM_EPOCHS = 10
NUM_WORKERS = 4
CHECKPOINT = 'checkpoint.pth.tar'
SAVE_MODEL = True
PIN_MEMORY = True
LOAD_MODEL = False


# Training Augmentations or transforms
TRAIN_Transforms = A.Compose(
    [
      A.Resize(width=300,height=300),       
      A.RandomCrop(height=256,width=256),   
      A.HorizontalFlip(p=0.5),              
      A.VerticalFlip(p=0.5),
      A.RandomRotate90(p=0.5),
      A.Blur(p=0.3),
      A.CLAHE(p=0.3),
      A.CoarseDropout(max_holes=12, max_height=20, max_width=20, p=0.3),
      A.IAAAffine(shear=30, rotate=0, p=0.2, mode="constant"),
      ToTensorV2()
    ]
)


# Validation Augmentations or transforms
VAL_Transforms = A.Compose(
    [
     A.Resize(width=256,height=256),
     ToTensorV2()
    ]
)

# Utils

In [9]:
# Getting Accuracy for the Data

def get_accuracy(model , loader ):
  
  model.eval()

  all_labels = []
  all_preds = []
  num_correct = 0
  num_samples = 0


  for x , y, f in tqdm(loader):

    # Making Device gpu if available 
    x = x.to(DEVICE) 
    y = y.to(DEVICE)
    
    x = x.float()

    # Computing Scores 
    with torch.no_grad():
      scores = model(x)   # Getting Scores

    _ , preds = scores.max(1) # Taking that class with maximum probablistic confidence


    num_correct += (preds == y).sum()
    num_samples += preds.shape[0]

    all_preds.append(preds.detach().cpu().numpy())
    all_labels.append(y.detach().cpu().numpy())

  # Getting Accuracy
  print(f"Got an accuracy on {num_correct}/{num_samples} which is {float(num_correct) / float(num_samples)*100:.2f}")

  model.train()

  return np.concatenate(all_preds , axis = 0 , dtype = np.int64) , np.concatenate(all_labels, axis = 0 , dtype = np.int64)  

# Saving Checkpoint 
def save_checkpoint(state , filename = "checkpoint_0.pth.tar"):
  "<==== SAVING MODEL ====>"
  torch.save(state,f=filename)

# Loading Checkpoint
def checkpoint_load(model ,checkpoint , optimizer , lr):
  "<==== LOADING MODEL ====>"
  model.load_state_dict(checkpoint["state_dict"])

# Make Custom Dataset Function

In [10]:
class MAKEDATASET(Dataset):
  def __init__(self , path_image_folder , path_to_csv , train = False , transforms = None):
    super().__init__()
    self.path_image_folder = path_image_folder
    self.path_to_csv = path_to_csv
    self.data = pd.read_csv(path_to_csv)
    self.train = train 
    self.transforms = transforms 
  
  def __len__(self):
    return self.data.shape[0]
  
  def __getitem__(self,indx):
    img_file , label = self.data.loc[indx]
    
    image = np.array(Image.open(os.path.join(self.path_image_folder,img_file + ".png"))) # loading image
    
    if self.transforms:
      image = self.transforms(image=image)["image"]
    
    return image , label , img_file

In [11]:
Data  = pd.read_csv("/content/APTOS/train.csv")

# Computing Class Weights

In [12]:
from sklearn.utils import class_weight

class_weights = class_weight.compute_class_weight(class_weight='balanced',classes=np.array([0,1,2,3,4]),y=Data['diagnosis'].values)

class_weights = torch.tensor(class_weights,dtype=torch.float).to(DEVICE)

print(class_weights) 


tensor([0.4058, 1.9795, 0.7331, 3.7948, 2.4827], device='cuda:0')


# Splitting Data in Train and Test set

In [13]:
from sklearn.model_selection import train_test_split

train, test = train_test_split(Data,
                              stratify=Data[['diagnosis']], 
                              test_size=0.2)

train.to_csv("Train_Data.csv",index=False)
test.to_csv("Val_Data.csv",index=False)

# Preparing Data

In [29]:
# Making Dataset Object
CompleteTrainData = MAKEDATASET(
    "/content/APTOS/train_images",
    "/content/APTOS/train.csv",
    True,
    TRAIN_Transforms)

Train_DS = MAKEDATASET(
    "/content/APTOS/train_images",
    "/content/Train_Data.csv",
    True,
    TRAIN_Transforms)

Val_DS = MAKEDATASET(
    "/content/APTOS/train_images",
    "/content/Val_Data.csv",
    True,
    VAL_Transforms)

# Making Data Loader

CompleteTrainData_Loader = DataLoader(
    CompleteTrainData,
    batch_size=BATCH_SIZE,
    num_workers=NUM_WORKERS,
    pin_memory=PIN_MEMORY,
    shuffle=True

)

Train_loader = DataLoader(
    Train_DS,
    batch_size=BATCH_SIZE,
    num_workers=NUM_WORKERS,
    pin_memory=PIN_MEMORY,
    shuffle=True
)

Val_loader = DataLoader(
    Val_DS,
    batch_size = BATCH_SIZE,
    num_workers=2,
    pin_memory=PIN_MEMORY,
    shuffle=False,
)


  cpuset_checked))


# Training

In [15]:
def Train_One_Epoch(loader , model , optimizer , loss_fn , scaler , device):

  Losses  =  []
  loop = tqdm(loader) 

  for batch_idx , (data , target, filename) in enumerate(loop):

    # Getting Data to Gpu if available 
    data = data.to(DEVICE)
    target = target.to(DEVICE)

    data = data.float()
    
    # Compute Scores and Loss 
    with torch.cuda.amp.autocast():
      scores = model(data)
      loss   = loss_fn(scores , target)

    Losses.append(loss.item())

    # Perform weight updates 
    optimizer.zero_grad()
    scaler.scale(loss).backward()
    scaler.step(optimizer)
    scaler.update()
    loop.set_postfix(loss=loss.item())

  print(f"Losses average over epoch : {sum(Losses) / len(Losses)}")

In [31]:
import gc
gc.collect()
torch.cuda.empty_cache()

# Efficientnet Model

In [34]:

# Using Cross Entropy Loss
loss_fn = nn.CrossEntropyLoss(weight=class_weights)

# Creating Model Instance 
model   = EfficientNet.from_pretrained("efficientnet-b3")
model._fc = nn.Linear(1536 , 5) # specifying Class
model   = model.to(DEVICE)


# Using Adam as the optimizer for efficient weight updates
optimizer = optim.Adam(model.parameters(),lr=LEARNING_RATE,weight_decay=WEIGHT_DECAY)

# Scaler is used to performing weight updates
scaler = torch.cuda.amp.GradScaler()

for epoch in range(6):
  Train_One_Epoch(Train_loader , model , optimizer , loss_fn , scaler , DEVICE)

  preds , labels = get_accuracy(model, Val_loader)
  print(f"Quadratic Weighted Kappa Score : ( Validation ): {cohen_kappa_score(preds , labels , weights = 'quadratic')}")


Loaded pretrained weights for efficientnet-b3


  cpuset_checked))
100%|██████████| 184/184 [05:15<00:00,  1.71s/it, loss=1.11]


Losses average over epoch : 1.1894989311695099


100%|██████████| 46/46 [01:14<00:00,  1.63s/it]


Got an accuracy on 501/733 which is 68.35
Quadratic Weighted Kappa Score : ( Validation ): 0.8102498032859995


  cpuset_checked))
100%|██████████| 184/184 [05:12<00:00,  1.70s/it, loss=2.16]


Losses average over epoch : 0.9550899847046189


100%|██████████| 46/46 [01:16<00:00,  1.65s/it]


Got an accuracy on 569/733 which is 77.63
Quadratic Weighted Kappa Score : ( Validation ): 0.885753875285516


  cpuset_checked))
100%|██████████| 184/184 [05:14<00:00,  1.71s/it, loss=1.49]


Losses average over epoch : 0.8634628282616967


100%|██████████| 46/46 [01:15<00:00,  1.64s/it]


Got an accuracy on 574/733 which is 78.31
Quadratic Weighted Kappa Score : ( Validation ): 0.889426324982302


  cpuset_checked))
100%|██████████| 184/184 [05:18<00:00,  1.73s/it, loss=2.4]


Losses average over epoch : 0.8567651085879492


100%|██████████| 46/46 [01:16<00:00,  1.66s/it]


Got an accuracy on 580/733 which is 79.13
Quadratic Weighted Kappa Score : ( Validation ): 0.8938789090307139


  cpuset_checked))
100%|██████████| 184/184 [05:20<00:00,  1.74s/it, loss=1.45]


Losses average over epoch : 0.807983191764873


100%|██████████| 46/46 [01:15<00:00,  1.65s/it]


Got an accuracy on 589/733 which is 80.35
Quadratic Weighted Kappa Score : ( Validation ): 0.8934677236346577


  cpuset_checked))
100%|██████████| 184/184 [05:21<00:00,  1.75s/it, loss=0.742]


Losses average over epoch : 0.7453606329534365


100%|██████████| 46/46 [01:17<00:00,  1.67s/it]

Got an accuracy on 565/733 which is 77.08
Quadratic Weighted Kappa Score : ( Validation ): 0.8583519322994444





In [35]:
def Apply_Thresholding(MyData , Loader,threshold = 0.80):
  model.eval()
  exps = 0
  for indx , (x , y , f) in enumerate(Loader):
    x = x.to(DEVICE)
    y = y.to(DEVICE)
    x = x.float()
    with torch.no_grad():
      scores = model(x)
    sm = torch.nn.Softmax()
    probabilities = sm(scores)

    top_p, top_class = probabilities.topk(1, dim = 1)
    for id , vl in enumerate(top_p):
      if vl < threshold:
        exps += 1
        MyData.drop(MyData.loc[MyData['id_code'] == f[id]].index,inplace=True)
  print(f"The Number of Data Points remove : {exps}")
  model.train()

In [36]:
print(f"Data Points before Thresholding {len(Data)}")
Apply_Thresholding(Data, CompleteTrainData_Loader,threshold=0.80)
print(f"Data Points After Thresholding {len(Data)}")

Data Points before Thresholding 3662


  cpuset_checked))
  # This is added back by InteractiveShellApp.init_path()


The Number of Data Points remove : 1543
Data Points After Thresholding 2119


In [37]:
Data.to_csv("DataAfterThresholding.csv",index=False)

In [39]:
Data_AT = pd.read_csv("DataAfterThresholding.csv")

In [40]:
from sklearn.utils import class_weight

class_weights = class_weight.compute_class_weight(class_weight='balanced',classes=np.array([0,1,2,3,4]),y=Data_AT['diagnosis'].values)

class_weights = torch.tensor(class_weights,dtype=torch.float).to(DEVICE)

print(class_weights) 


tensor([0.2532, 2.7167, 3.6852, 9.8558, 3.2351], device='cuda:0')


In [43]:
Data_AT = pd.read_csv("/content/DataAfterThresholding.csv")


train, test = train_test_split(Data_AT,
                              stratify=Data_AT[['diagnosis']], 
                              test_size=0.2)

train.to_csv("Train_Data_AT.csv",index=False)
test.to_csv("Val_Data_AT.csv",index=False)

In [44]:
Train_DS = MAKEDATASET(
    "/content/APTOS/train_images",
    "/content/Train_Data_AT.csv",
    True,
    TRAIN_Transforms)

Val_DS = MAKEDATASET(
    "/content/APTOS/train_images",
    "/content/Val_Data_AT.csv",
    True,
    VAL_Transforms)

Test_DS = MAKEDATASET(
    "/content/APTOS/test_images",
    "/content/APTOS/test.csv",
    VAL_Transforms)

Test_loader  = DataLoader(Test_DS,batch_size=BATCH_SIZE,num_workers=NUM_WORKERS,shuffle=False)

CompleteTrainData_Loader = DataLoader(
    CompleteTrainData,
    batch_size=BATCH_SIZE,
    num_workers=NUM_WORKERS,
    pin_memory=PIN_MEMORY,
    shuffle=True

)

Train_loader = DataLoader(
    Train_DS,
    batch_size=BATCH_SIZE,
    num_workers=NUM_WORKERS,
    pin_memory=PIN_MEMORY,
    shuffle=True
)

Val_loader = DataLoader(
    Val_DS,
    batch_size = BATCH_SIZE,
    num_workers=2,
    pin_memory=PIN_MEMORY,
    shuffle=False,
)

  cpuset_checked))


In [45]:
loss_fn = nn.CrossEntropyLoss()
model   = EfficientNet.from_pretrained("efficientnet-b3")
model._fc = nn.Linear(1536 , 5)
model   = model.to(DEVICE)
optimizer = optim.Adam(model.parameters(),lr=LEARNING_RATE,weight_decay=WEIGHT_DECAY)
scaler = torch.cuda.amp.GradScaler()

for epoch in range(3):
  Train_One_Epoch(Train_loader , model , optimizer , loss_fn , scaler , DEVICE)

  preds , labels = get_accuracy(model, Val_loader)
  print(f"Quadratic Weighted Kappa Score : ( Validation ): {cohen_kappa_score(preds , labels , weights = 'quadratic')}")


Loaded pretrained weights for efficientnet-b3


  cpuset_checked))
100%|██████████| 106/106 [02:25<00:00,  1.37s/it, loss=0.27]


Losses average over epoch : 0.4855064534634914


100%|██████████| 27/27 [00:31<00:00,  1.16s/it]


Got an accuracy on 364/424 which is 85.85
Quadratic Weighted Kappa Score : ( Validation ): 0.7774203636688558


  cpuset_checked))
100%|██████████| 106/106 [02:19<00:00,  1.31s/it, loss=0.195]


Losses average over epoch : 0.2839576209069423


100%|██████████| 27/27 [00:30<00:00,  1.12s/it]


Got an accuracy on 398/424 which is 93.87
Quadratic Weighted Kappa Score : ( Validation ): 0.9164301482182277


  cpuset_checked))
100%|██████████| 106/106 [02:16<00:00,  1.29s/it, loss=0.162]


Losses average over epoch : 0.25132192425289246


100%|██████████| 27/27 [00:29<00:00,  1.09s/it]

Got an accuracy on 391/424 which is 92.22
Quadratic Weighted Kappa Score : ( Validation ): 0.9330971984098574





# Saving Model

In [46]:
checkpoint = {
    "state_dict": model.state_dict(),
    "optimizer": optimizer.state_dict(),
}
save_checkpoint(checkpoint, filename=f"chk.pth.tar")