In [None]:
import pandas as pd
import numpy as np
import torch
import os
import gc


In [None]:
import pickle

In [None]:
from torch.utils.data import DataLoader
from torch.utils.data import Dataset

In [None]:
import cv2

In [None]:

import torchvision.models.detection as tmd
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor as FRP
import inspect
import torchvision
#converting the customised rcnn model
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor


In [None]:
root_dir = 'D:\\Drone-Object-Detection\\VisDroneDataset\\'
test_dir = 'test\\'
train_dir = 'train\\'
val_dir = 'validation\\'

In [None]:
def split_image_with_asp_ratio(image):
    print(image.shape)

In [None]:
class VisDroneDataset(Dataset):
    def __init__(self, path, img_shape):
        
        self.root = path
        self.width = img_shape[1]
        self.height = img_shape[0]
        self.images = os.listdir(path+'images\\')
        self.annotations = os.listdir(path+'annotations\\')
        self.length = len(self.images)
        
    def __len__(self):
        return self.length
    
    def __getitem__(self, idx):
        image = cv2.imread(self.root+'images\\'+self.images[idx])
        #print(image.shape)
        img_width = image.shape[1]
        img_height = image.shape[0]     #right?
        
        image  =cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
        image = cv2.resize(image,(self.width,self.height),interpolation = cv2.INTER_AREA)
        image = image.transpose(2,0,1)
        image = (image/255.0)
        
        
        #bbox_left, bbox_top, bbox_width, bbox_height, category = map(float, data[:5])
        target = self.annotations[idx]    #*.txt file       
        
        boxes = []
        labels = []
        with open(self.root+'annotations\\'+target) as f:
            for line in f:
                data = line.split(',')#strip
                #print(list(map(float, data[:5])))
                box_left, box_top, box_width, box_height,smth, category = map(float, data[:6])
                
                box_width = 1 if box_width < 1 else box_width
                box_height = 1 if box_height < 1 else box_height
                
                box_right = box_left + box_width
                box_bottom = box_top + box_height
                #rescale box            
                box_left = (box_left/img_width) * self.width
                box_right = (box_right/img_width) * self.width
                box_top = (box_top/img_height) * self.height
                box_bottom = (box_bottom/img_height) * self.height
                boxes.append((box_left,box_top,box_right, box_bottom))
#                 for b in boxes:
#                     for bb in b:
#                         if bb > 480 or bb < 0:
#                             print(b,'\n',bb)
                labels.append(int(category))
                
        
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        #print(boxes)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        target = {}
        target['boxes'] = boxes
        target['labels'] = labels
        target['image_id'] = torch.tensor((idx))
        
        return image, target

In [None]:
train_dataset = VisDroneDataset(root_dir+train_dir,(724,1224))
val_dataset = VisDroneDataset(root_dir+val_dir,(724,1224))

In [None]:
def collate_fn(batch):
    images = []
    targets = []

    for img, target in batch:
        images.append(img)
        targets.append(target)

    return (images, targets)


In [None]:
train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True,collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=1, shuffle=True,collate_fn=collate_fn)

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [None]:
torch.hub.set_dir('D:\\torch_models')

In [None]:
from torchvision.models.detection import ssdlite320_mobilenet_v3_large

In [None]:
def create_model_ssd():
    model = ssdlite320_mobilenet_v3_large(num_classes=12)
    print(model)
    return model

In [None]:

def create_model_faster_rcnn(num_classes):
    # Load a pre-trained  R-CNN model 
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights="DEFAULT")
    for p in model.parameters():
        p.requires_grad=False
        
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    
    return model



model = create_model_faster_rcnn(12).to(device)

model.train()

import time
from tqdm import tqdm
def train_one_epoch(model, train_loader, optimizer):
    total_loss=0
    n=0
    prev_time = time.time()
    for data, target in tqdm(train_loader):
        optimizer.zero_grad()
        data = torch.as_tensor(np.array(data),dtype=torch.float32).to(device)
        target = [{k: v.to(device) for k, v in t.items()} for t in target]
        loss_dict = model(data, target)

        losses = sum(loss for loss in loss_dict.values())

        total_loss+=losses.item()
        losses.backward()
        optimizer.step()
        if(n % 20 == 0):
            print('loss', total_loss/(n+1))
            prev_time = time.time()
        n+=1
        del data, target
        #model = model.to(torch.device('cpu'))
        gc.collect()
        torch.cuda.empty_cache()
        
        #model = model.to(device)
    
    print(total_loss/n,'a')

num_epoch = 5
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)

lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)
model = model.to(device)


print(len(params))
print(len(list(model.parameters())))


for epoch in range(num_epoch):
    train_one_epoch(model,train_loader,optimizer)
    lr_scheduler.step()

In [None]:
with open('model.pkl', 'rb') as f:
    model = pickle.load(f)

In [None]:
model.eval()

In [None]:

gc.collect()

torch.cuda.empty_cache()

from GPUtil import showUtilization as gpu_usage
from numba import cuda

def free_gpu_cache():
    print("Initial GPU Usage")
    gpu_usage()                             

    torch.cuda.empty_cache()

    #cuda.select_device(0)
    #cuda.close()
    #cuda.select_device(0)

    print("GPU Usage after emptying the cache")
    gpu_usage()

free_gpu_cache()    

In [None]:
def box_iou_batch(boxes_a: np.ndarray, boxes_b: np.ndarray) -> np.ndarray:

    def box_area(box):
        return (box[2] - box[0]) * (box[3] - box[1])

    area_a = box_area(boxes_a.T)
    area_b = box_area(boxes_b.T)

    top_left = np.maximum(boxes_a[:, None, :2], boxes_b[:, :2])
    bottom_right = np.minimum(boxes_a[:, None, 2:], boxes_b[:, 2:])

    area_inter = np.prod(
    	np.clip(bottom_right - top_left, a_min=0, a_max=None), 2)
        
    return area_inter / (area_a[:, None] + area_b - area_inter)

In [None]:
def non_max_suppression(predictions, iou_threshold: float = 0.9) -> np.ndarray:
    #print(predictions)
    predictions = torch.cat((predictions['boxes'] , predictions['scores'].unsqueeze(dim=1),\
                             predictions['labels'].unsqueeze(dim=1)),dim=1) .detach().cpu().numpy()
    
    
    sort_index = np.flip(predictions[:, 4].argsort())
    
    rows,columns = predictions.shape
    
    predictions = predictions[sort_index]

    boxes = predictions[:, :4]
    categories = predictions[:, 5]
    ious = box_iou_batch(boxes, boxes)
    ious = ious - np.eye(rows)

    keep = np.ones(rows, dtype=bool)

    for index, (iou, category) in enumerate(zip(ious, categories)):
        if not keep[index]:
            continue

        condition = (iou > iou_threshold) & (categories == category)
        keep = keep & ~condition

    return predictions[keep[sort_index.argsort()]]

In [None]:
model.eval()


In [None]:

import matplotlib.pyplot as plt
import matplotlib.patches as patches

In [None]:
labels_map = ['Background', 'Pedestrian', 'People', 'Bycicle', 'Car', 'Van', 'Truck',
                'Tricycle', 'Awning-tricycle', 'Bus', 'Motor', 'Others']
def draw_bounding_boxes(img, boxes,labels,scores, ax):   # image - tensor, others - numpy
    
    ax.imshow(img.detach().cpu().numpy().transpose(1,2,0))
    ax.set_title('title')
    threshold = 0.2
    for box,label,score in zip(boxes,labels.astype(np.uint8),scores):
        #print(label)
        if(score < threshold):
            continue
        x1, y1, x2, y2 = box
        width = x2 - x1
        height = y2 - y1
        rect = patches.Rectangle((x1, y1), width, height, linewidth=1, edgecolor='r', facecolor='none')
        ax.add_patch(rect)
        ax.text(
                x1,
                y1,
                f"{labels_map[label]}: {(np.round(score*100,1))}%",
                verticalalignment='top',
                color='white',
                fontsize=8,
                bbox={'facecolor': 'red', 'alpha': 0.7, 'pad': 1}
            )
    #plt.show()
    return ax

In [None]:
from torchvision.ops import nms

def non_max_suppression_git(preds, iou_threshold=0.5, score_threshold=0.2):
    
    boxes = preds['boxes']
    scores = preds['scores']
    labels = preds['labels']

    # Filter out predictions below the score_threshold
    keep_idx = scores > score_threshold
    boxes = boxes[keep_idx]
    scores = scores[keep_idx]
    labels = labels[keep_idx]

    # Perform NMS and get the indices of the remaining predictions
    keep_indices = nms(boxes, scores, iou_threshold)

    # Filter out the predictions using the keep_indices
    nms_boxes = boxes[keep_indices]
    nms_scores = scores[keep_indices]
    nms_labels = labels[keep_indices]

    # Create a dictionary containing the filtered predictions
    nms_preds = {
        'boxes': nms_boxes,
        'scores': nms_scores,
        'labels': nms_labels
    }

    return nms_preds


image = cv2.imread('D:\\VisDroneDataset\\test\\images\\0000009_01339_d_0000005.jpg')
image  =cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
image = cv2.resize(image,(480,480),interpolation = cv2.INTER_AREA)
image = image.transpose(2,0,1)

image = torch.as_tensor(image, dtype=torch.float32).unsqueeze(dim=0).to(device)

In [None]:
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

In [None]:
def draw_predictions(imgs, preds, mode, i):#preds - list of dictionaries of tensors
    fig, ax = plt.subplots(1,len(imgs), figsize=(40,40))
    for idx,(img,ann) in enumerate(zip(imgs, preds)):
        #print(ann)
        annotations_new = []
        annotations_new.append(torch.cat((ann['boxes'] ,\
                                           (ann['scores']).unsqueeze(dim=1) if mode=='pred' else torch.ones(len(ann['boxes'])).\
                                          unsqueeze(dim=1),\
                                           ann['labels'].unsqueeze(dim=1)),dim=1).detach().cpu().numpy())
        ann = np.array(annotations_new[0])
        #print(ann)
        draw_bounding_boxes(img,ann[:,0:4],ann[:,5],ann[:,4],ax[idx] if len(imgs) != 1 else ax)
        
    if(i!=-1):
        fig.savefig(f'D:\\Drone-Object-Detection\\Results\\Images\\img{i}.png')
        
    plt.show()

In [None]:
from torchmetrics.detection import MeanAveragePrecision

In [None]:
def replace_cuda_with_cpu(data):
    if isinstance(data, torch.Tensor):
        return data.cpu()
    elif isinstance(data, dict):
        return {key: replace_cuda_with_cpu(value) for key, value in data.items()}
    elif isinstance(data, list):
        return [replace_cuda_with_cpu(item) for item in data]
    else:
        return data

In [None]:
def dict_to_cpu(preds):
    new_l = []
    for dic in preds:
        new_l.append(replace_cuda_with_cpu(dic))
        
    return new_l

In [None]:
def from_dict_to_arr(dict):
    annotations_new = []
    for ann in dict:
        
        annotations_new+=(torch.cat((ann['boxes'] ,\
                                           torch.ones(len(ann['boxes'])).unsqueeze(dim=1).to(device),\
                                           ann['labels'].unsqueeze(dim=1)),dim=1).detach().cpu().numpy())
        
    return annotations_new

In [None]:
map_file = open('Results\\mAPs\\file.txt','w')
map_file.close()



In [None]:
def calc_mAP(pred,target):
    metric = MeanAveragePrecision(iou_type="bbox")
    metric.update(preds, annotations)
    map_file.write(str(metric.compute())+'\n')
    print(metric.compute())
    


In [None]:
map_file = open('Results\\mAPs\\file.txt','a')
#path to img - 'Results\\imgs\\img(i).jpg

In [None]:
maps = ""
for i, (batch_imgs, annotations) in enumerate(val_loader):
    batch_imgs = [torch.from_numpy(img).float().to(device) for img in batch_imgs]#tensor
    #print(batch_imgs)
    preds = model(batch_imgs)#tensor
    preds = replace_cuda_with_cpu(preds) 
    
    
    
    
    nms_preds = []
    for pred in preds:
        nms_pred = non_max_suppression_git(pred)
        #nms_pred = pred
        nms_preds.append(nms_pred)
        
    calc_mAP(nms_preds,annotations)
    
    
    
    draw_predictions(batch_imgs,nms_preds,'pred',i)#list of dictionaries of tensors
    
    draw_predictions(batch_imgs,annotations,'tar',-1)
    
    if(i >= 2):
        break

In [None]:
map_file.close()