# Import packages

In [1]:
import numpy as np
import pandas as pd
import os

In [27]:
import random
from PIL import Image, ImageDraw
import matplotlib.pyplot as plt
from collections import Counter
from sklearn.model_selection import train_test_split

import torch
import torchvision
from torchvision import transforms as T
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

# Load Train boxes

In [3]:
train = pd.read_excel("F:\\AI ML DL Projects\\Supports\\torchvision\\data\\output.xlsx")

In [4]:
train.head()

Unnamed: 0,image_id,label,x1,y1,x2,y2
0,DJI_20221113111452_0243,coconut,168,258,197,290
1,DJI_20221113111452_0243,coconut,231,239,256,265
2,DJI_20221113111452_0243,coconut,196,233,231,265
3,DJI_20221113111452_0243,coconut,146,280,173,306
4,DJI_20221113111452_0243,coconut,118,280,145,308


In [5]:
unique_imgs = train.image_id.unique()

In [6]:
unique_imgs

array(['DJI_20221113111452_0243', 'DJI_20221113111839_0349',
       'DJI_20221113112130_0431', 'DJI_20221113112138_0435',
       'DJI_20221113112424_0511', 'DJI_20221113112430_0514',
       'DJI_20221113112438_0518', 'DJI_20221113112511_0535',
       'DJI_20221113112525_0542', 'DJI_20221113112533_0546',
       'DJI_20221113112539_0549', 'DJI_20221113112549_0554',
       'DJI_20221113113530_0011', 'DJI_20221113113545_0019',
       'DJI_20221113113723_0064', 'DJI_20221113113725_0065',
       'DJI_20221113113729_0067', 'DJI_20221113113731_0068',
       'DJI_20221113113736_0071', 'DJI_20221113113746_0076',
       'DJI_20221113114523_0291', 'DJI_20221113114545_0302',
       'DJI_20221113114621_0316', 'DJI_20221113114635_0323',
       'DJI_20221113114724_0348', 'DJI_20221113114744_0358',
       'DJI_20221113114910_0397', 'DJI_20221113114938_0411',
       'DJI_20221113115249_0499', 'DJI_20221113115324_0517',
       'DJI_20221113115511_0567', 'DJI_20221113120541_0001',
       'DJI_202211131206

In [7]:
class CustData(torch.utils.data.Dataset):
    def __init__(self, df, unique_imgs, indices):
        self.df = df
        self.unique_imgs = unique_imgs
        self.indices = indices
    
    def __len__(self):
        return len(self.indices)
    
    def __getitem__(self, idx):
        image_name = self.unique_imgs[self.indices[idx]]
        boxes = self.df[self.df.image_id == image_name].values[:,1:]
        img = Image.open("F:\\AI ML DL Projects\\Supports\\torchvision\\data\\images\\" + image_name+'.jpg').convert('RGB')
        # print(boxes)
        mapping = {'coconut': 1, 'palm': 0}
        labels = [mapping[row[0]] for row in boxes]
        # print(labels)
        
        box = boxes[:,1:].astype("float")
        target = {}
        target['boxes'] = torch.tensor(box)
        target['label'] = torch.tensor(labels)
        
        return T.ToTensor()(img), target
        

In [8]:
train_inds, val_inds = train_test_split(range(unique_imgs.shape[0]), test_size=0.1)

In [9]:
def custom_collate(data):
    return data

# Split dataset

In [10]:
train_d1 = torch.utils.data.DataLoader(CustData(train, unique_imgs, train_inds), 
                                  batch_size = 16,
                                  shuffle = True,
                                  collate_fn = custom_collate,
                                  pin_memory = True if torch.cuda.is_available else False)

val_d1 = torch.utils.data.DataLoader(CustData(train, unique_imgs, val_inds), 
                                  batch_size = 8,
                                  shuffle = True,
                                  collate_fn = custom_collate,
                                  pin_memory = True if torch.cuda.is_available else False)

In [11]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained = True)
num_classes = 3
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)



In [12]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [13]:
device

device(type='cpu')

In [14]:
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=0.0005)
num_epochs = 5

In [15]:
model.to(device)

for epochs in range(num_epochs):
    epoch_loss = 0
    for data in train_d1:
        imgs = []
        targets = []
        for d in data:
            imgs.append(d[0].to(device))
            targ = {}
            targ['boxes'] = d[1]['boxes'].to(device)
            targ['labels'] = d[1]['label'].to(device)
            targets.append(targ)
            
        lost_dict = model(imgs, targets)
        loss = sum(v for v in lost_dict.values())
        epoch_loss += loss.cpu().detach().numpy()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print("Epoch Loss = {}", epoch_loss)

Epoch Loss = {} 19.318553403193015
Epoch Loss = {} 10.105393064630565
Epoch Loss = {} 8.853731591516825
Epoch Loss = {} 7.927123300343808
Epoch Loss = {} 6.856249235397409


In [16]:
model.eval()
data  = iter(val_d1).__next__()

In [18]:
img = data[0][0]
boxes = data[0][1]['boxes']
boxes = data[0][1]['label']


In [19]:
output = model([img.to(device)])

In [20]:
output

[{'boxes': tensor([[5.5845e+02, 2.0720e+02, 5.9303e+02, 2.4410e+02],
          [3.4224e+02, 1.8168e+02, 3.7340e+02, 2.1320e+02],
          [1.1869e+02, 3.1181e+02, 1.5071e+02, 3.4650e+02],
          [5.3498e+02, 4.2177e+02, 5.6596e+02, 4.5824e+02],
          [2.6772e+02, 4.5223e+02, 2.9837e+02, 4.8359e+02],
          [1.4297e+02, 4.8944e+02, 1.7675e+02, 5.2777e+02],
          [3.8106e+02, 4.6673e+02, 4.1183e+02, 4.9588e+02],
          [2.1630e+02, 1.8652e+02, 2.4579e+02, 2.1936e+02],
          [2.4938e+02, 1.6394e+02, 2.7949e+02, 1.9431e+02],
          [5.5540e+02, 2.4447e+02, 5.8587e+02, 2.7728e+02],
          [6.8482e+02, 3.5045e+01, 7.2540e+02, 7.9228e+01],
          [5.1487e+01, 2.5552e+02, 8.4661e+01, 2.8824e+02],
          [4.5623e+02, 2.0040e+02, 4.8690e+02, 2.3456e+02],
          [5.9028e+02, 2.4654e+02, 6.2418e+02, 2.7959e+02],
          [2.1787e+02, 2.6163e+02, 2.4605e+02, 2.9230e+02],
          [3.6101e+02, 3.9685e+02, 3.9282e+02, 4.2985e+02],
          [3.9570e+02, 1.3234e+

In [21]:
out_box = output[0]['boxes']
out_scores = output[0]['scores']


In [31]:
keep = torchvision.ops.nms(out_box, out_scores, 0.45)

In [32]:
out_box.shape, keep.shape

(torch.Size([100, 4]), torch.Size([90]))

In [34]:
im = (img.permute(1,2,0).cpu().detach().numpy() * 255).astype('uint8')

In [35]:
im

array([[[156, 140, 114],
        [150, 134, 108],
        [164, 148, 122],
        ...,
        [ 40,  47,  40],
        [ 38,  45,  38],
        [ 42,  49,  42]],

       [[157, 141, 115],
        [151, 135, 109],
        [157, 141, 115],
        ...,
        [ 58,  66,  55],
        [ 49,  57,  46],
        [ 46,  54,  43]],

       [[143, 127, 101],
        [143, 127, 101],
        [149, 133, 107],
        ...,
        [ 64,  71,  55],
        [ 58,  65,  49],
        [ 59,  66,  50]],

       ...,

       [[ 73,  77,  54],
        [ 77,  82,  59],
        [ 68,  73,  50],
        ...,
        [171, 145, 118],
        [184, 158, 131],
        [154, 127, 100]],

       [[ 76,  78,  57],
        [ 77,  80,  59],
        [ 70,  75,  52],
        ...,
        [167, 141, 116],
        [160, 136, 110],
        [152, 131, 104]],

       [[ 76,  78,  57],
        [ 76,  79,  58],
        [ 70,  75,  52],
        ...,
        [155, 129, 104],
        [153, 129, 103],
        [152, 131, 104]]

In [36]:
vsample = Image.fromarray(im)

In [48]:
draw = ImageDraw.Draw(vsample)
for box in boxes:
    draw.rectangle(list(box), fill=None, outline='red')

TypeError: iteration over a 0-d tensor