In [1]:
import os
import cv2
import time
import random
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T
from torchvision.utils import make_grid
from torchvision.models import resnet50
from sklearn.model_selection import train_test_split
from PIL import Image
import matplotlib.pyplot as plt

In [2]:
dir_train = "data/train"
dir_test = "data/test1"

In [3]:
imgs = os.listdir(dir_train)
test_imgs = os.listdir(dir_test)
print(imgs[:10])
print(len(imgs))
print(test_imgs[:10])
print(len(test_imgs))

['cat.0.jpg', 'cat.1.jpg', 'cat.10.jpg', 'cat.100.jpg', 'cat.1000.jpg', 'cat.10000.jpg', 'cat.10001.jpg', 'cat.10002.jpg', 'cat.10003.jpg', 'cat.10004.jpg']
25000
['1.jpg', '10.jpg', '100.jpg', '1000.jpg', '10000.jpg', '10001.jpg', '10002.jpg', '10003.jpg', '10004.jpg', '10005.jpg']
12500


In [4]:
class_to_int = {"dog" : 0, "cat" : 1}
int_to_class = {0: "dog", 1 : "cat"}

In [5]:
#Augmentations
def get_train_transform():
    return T.Compose([
        T.RandomHorizontalFlip(p=0.5),
        T.RandomRotation(15),
        T.RandomCrop(204),
        T.ToTensor(),
        T.Normalize((0,0,0),(1,1,1)) #-> Standardization as mean = [0,0,0] and std = [1,1,1] for each channel R,G,B
    ])

def get_val_transform():
    return T.Compose([
        T.ToTensor(),
        T.Normalize((0,0,0),(1,1,1))
    ])
#

In [6]:
class DogCatDataset(Dataset):
    def __init__(self, imgs, class_to_int, mode="train", transforms= None):
        super().__init__()
        self.imgs = imgs
        self.class_to_int = class_to_int
        self.mode = mode
        self.transforms = transforms
    
    def __getitem__(self,idx): #For this example: a[idx]
        image_name = self.imgs[idx]
        
        ### Reading, converting and normalizing image for OpenCV
        #img = cv2.imread(DIR_TRAIN + image_name, cv2.IMREAD_COLOR)
        #img = cv2.resize(img, (224,224))
        #img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32)
        #img /= 255.

        img = Image.open(dir_train + "/" + image_name)
        img = img.resize((224,224))
        if self.mode == "train" or self.mode == "val":
            label = self.class_to_int[image_name.split(".")[0]]
            print(label)
            print(type(label))
            label = torch.tensor(label, dtype= torch.float32)
            img = self.transforms(img)
            return img,label
        else:
            img = self.transforms(img)
            return img

    def __len__(self):
        return len(self.imgs)

In [7]:
train_imgs, val_imgs = train_test_split(imgs, test_size = 0.25)

In [8]:
train_dataset = DogCatDataset(train_imgs, class_to_int, mode="train", transforms=get_train_transform())
val_dataset = DogCatDataset(val_imgs, class_to_int, mode="val", transforms=get_train_transform())
test_dataset = DogCatDataset(test_imgs, class_to_int , mode="test", transforms=get_train_transform())

train_data_loader = DataLoader(
    dataset = train_dataset,
    num_workers = 4,
    batch_size = 16,
    shuffle = True
)

val_data_loader = DataLoader (
    dataset= val_dataset,
    num_workers = 4,
    batch_size = 16,
    shuffle = True
)

test_dataloader = DataLoader (
    dataset = test_dataset,
    num_workers = 4,
    batch_size = 16,
    shuffle = True
)

In [9]:
train_dataset[0]

0
<class 'int'>


(tensor([[[0.0000, 0.0000, 0.0000,  ..., 0.5098, 0.5686, 0.4941],
          [0.0000, 0.0000, 0.4392,  ..., 0.3882, 0.3451, 0.3608],
          [0.0000, 0.0000, 0.4706,  ..., 0.3020, 0.3373, 0.4392],
          ...,
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]],
 
         [[0.0000, 0.0000, 0.0000,  ..., 0.4745, 0.5490, 0.4902],
          [0.0000, 0.0000, 0.4000,  ..., 0.3647, 0.3294, 0.3529],
          [0.0000, 0.0000, 0.4353,  ..., 0.2706, 0.3098, 0.4235],
          ...,
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]],
 
         [[0.0000, 0.0000, 0.0000,  ..., 0.3647, 0.4039, 0.3294],
          [0.0000, 0.0000, 0.2941,  ..., 0.2314, 0.1569, 0.1686],
          [0.0000, 0.0000, 0.3098,  ...,

In [10]:
print(train_data_loader)

<torch.utils.data.dataloader.DataLoader object at 0x000001E13E0C8EE0>


In [11]:
# for images, labels in train_data_loader:
#     fig, ax = plt.subplots(figsize = (10, 10))
#     ax.set_xticks([])
#     ax.set_yticks([])
#     ax.imshow(make_grid(images, 4).permute(1,2,0))
#     break

In [12]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(device)
device_name = torch.cuda.get_device_name(0)
print(device_name)

cuda
NVIDIA GeForce GTX 1060


In [13]:
def accuracy(preds, trues):
    
    ### Converting preds to 0 or 1
    preds = [1 if preds[i] >= 0.5 else 0 for i in range(len(preds))]
    
    ### Calculating accuracy by comparing predictions with true labels
    acc = [1 if preds[i] == trues[i] else 0 for i in range(len(preds))]
    
    ### Summing over all correct predictions
    acc = np.sum(acc) / len(preds)
    
    return (acc * 100)

In [None]:
def train_one_epoch(train_data_loader):
    
    ### Local Parameters
    epoch_loss = []
    epoch_acc = []
    start_time = time.time()
    
    ###Iterating over data loader
    for images, labels in train_data_loader:
        
        #Loading images and labels to device
        images = images.to(device)
        labels = labels.to(device)
        print(labels)
        labels = labels.reshape((labels.shape[0], 1)) # [N, 1] - to match with preds shape
        
        #Reseting Gradients
        optimizer.zero_grad()
        
        #Forward
        preds = model(images)
        
        #Calculating Loss
        _loss = criterion(preds, labels)
        loss = _loss.item()
        epoch_loss.append(loss)
        
        #Calculating Accuracy
        acc = accuracy(preds, labels)
        epoch_acc.append(acc)
        
        #Backward
        _loss.backward()
        optimizer.step()
    
    ###Overall Epoch Results
    end_time = time.time()
    total_time = end_time - start_time
    
    ###Acc and Loss
    epoch_loss = np.mean(epoch_loss)
    epoch_acc = np.mean(epoch_acc)
    
    ###Storing results to logs
    train_logs["loss"].append(epoch_loss)
    train_logs["accuracy"].append(epoch_acc)
    train_logs["time"].append(total_time)
        
    return epoch_loss, epoch_acc, total_time

In [15]:
def val_one_epoch(val_data_loader, best_val_acc):
    
    ### Local Parameters
    epoch_loss = []
    epoch_acc = []
    start_time = time.time()
    
    ###Iterating over data loader
    for images, labels in val_data_loader:
        
        #Loading images and labels to device
        images = images.to(device)
        labels = labels.to(device)
        labels = labels.reshape((labels.shape[0], 1)) # [N, 1] - to match with preds shape
        
        #Forward
        preds = model(images)
        
        #Calculating Loss
        _loss = criterion(preds, labels)
        loss = _loss.item()
        epoch_loss.append(loss)
        
        #Calculating Accuracy
        acc = accuracy(preds, labels)
        epoch_acc.append(acc)
    
    ###Overall Epoch Results
    end_time = time.time()
    total_time = end_time - start_time
    
    ###Acc and Loss
    epoch_loss = np.mean(epoch_loss)
    epoch_acc = np.mean(epoch_acc)
    
    ###Storing results to logs
    val_logs["loss"].append(epoch_loss)
    val_logs["accuracy"].append(epoch_acc)
    val_logs["time"].append(total_time)
    
    ###Saving best model
    if epoch_acc > best_val_acc:
        best_val_acc = epoch_acc
        torch.save(model.state_dict(),"resnet50_best.pth")
        
    return epoch_loss, epoch_acc, total_time, best_val_acc

In [16]:
model = resnet50(pretrained = True)

# Modifying Head - classifier

model.fc = nn.Sequential(
    nn.Linear(2048, 1, bias = True),
    nn.Sigmoid()
)



In [18]:
# Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr = 0.0001)

# Learning Rate Scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size = 5, gamma = 0.5)

#Loss Function
criterion = nn.BCELoss()

# Logs - Helpful for plotting after training finishes
train_logs = {"loss" : [], "accuracy" : [], "time" : []}
val_logs = {"loss" : [], "accuracy" : [], "time" : []}

# Loading model to device
model.to(device)
print(model)

# No of epochs 
epochs = 1

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [19]:
best_val_acc = 0
for epoch in range(epochs):
    
    ###Training
    loss, acc, _time = train_one_epoch(train_data_loader)
    
    #Print Epoch Details
    print("\nTraining")
    print("Epoch {}".format(epoch+1))
    print("Loss : {}".format(round(loss, 4)))
    print("Acc : {}".format(round(acc, 4)))
    print("Time : {}".format(round(_time, 4)))
    
    ###Validation
    loss, acc, _time, best_val_acc = val_one_epoch(val_data_loader, best_val_acc)
    
    #Print Epoch Details
    print("\nValidating")
    print("Epoch {}".format(epoch+1))
    print("Loss : {}".format(round(loss, 4)))
    print("Acc : {}".format(round(acc, 4)))
    print("Time : {}".format(round(_time, 4)))