# Input setup

In [13]:
import torch
import torchvision
import torchvision.transforms as T
import cv2
import numpy as np
import pandas as pd

cap = cv2.VideoCapture('./testVideos/yolovid.mp4')
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))


# FasterRCNN

In [11]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
transform = T.Compose([T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

# weights (COCOv1 dataset trained)
weights=torchvision.models.detection.faster_rcnn.FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT
# model
model = torchvision.models.detection.fasterrcnn_resnet50_fpn_v2(weights)
model = model.to(device)
model.eval()



FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
       

# DeepSort

In [8]:
from deep_sort_realtime.deepsort_tracker import DeepSort
from deep_sort.deep_sort import nn_matching
from deep_sort.deep_sort.detection import Detection
from deep_sort.deep_sort.tracker import Tracker
from deep_sort.tools import generate_detections
from deep_sort.application_util import preprocessing

# Set up the DeepSORT tracker
max_cosine_distance = 0.1
nn_budget = None
nms_max_overlap = 1.0

model_filename = './deep_sort/model_data/mars-small128.pb'
encoder = generate_detections.create_box_encoder(model_filename, batch_size=1)
metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
tracker = Tracker(metric)

Instructions for updating:
non-resource variables are not supported in the long term
Metal device set to: Apple M1

systemMemory: 8.00 GB
maxCacheSize: 2.67 GB



2023-03-22 15:05:49.516348: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-03-22 15:05:49.516789: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


# Detection and Display

In [22]:
ids = []
while(cap.isOpened()):
      
# Capture frame-by-frame
    ret, frame = cap.read()
    if ret == True:
        
        #detecting any object using FasterRCNN
        img = transform(frame).to(device)
        with torch.no_grad():
            result = model([img])[0]

        totaldetections = len(np.array(result['boxes'].to('cpu')))

        #converting output to deepsort input
        boxes, scores, classes, detections = [],[],[],[]
        for i in range(totaldetections):
            label = np.array(result['labels'].to('cpu'))[i]
            score = np.array(result['scores'].to('cpu'))[i]
            bbox = np.array(result['boxes'].to('cpu'))[i]

            # if(result['name'][i] == 'car' or result['name'][i] == 'truck' or result['name'][i] == 'motorcycle' or result['name'][i] == 'bicycle'):
            if(label == 3 or label == 8):
                classes.append('cars')
                scores.append(score)
                bbox = [bbox[0],bbox[1],bbox[2]-bbox[0],bbox[3]-bbox[1]]
                boxes.append(bbox)

        classes = np.array(classes)
        scores = np.array(scores)
        boxes = np.array(boxes)
        features = np.array(encoder(frame,boxes))
        detections = [Detection(bbox,score,feature) for bbox, score, class_name, feature in zip(boxes, scores, classes, features)]

        boxes = np.array([detection.tlwh for detection in detections])
        scores = np.array(detection.confidence for detection in detections)

        # indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,scores)
        # detections = [detections[i] for i in indices]

        # Update the tracker with the current detections
        tracker.predict()
        tracker.update(detections)

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            ids.append(track.track_id)
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255,0,0), 2)
            cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1]) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,0,0), 2)

        cv2.putText(frame, "Total Cars: "+str(len(set(ids))), (10,10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,0,0), 2)
        cv2.imshow('Frame', frame)
            

    # Press Q on keyboard to exit
        if cv2.waitKey(25) & 0xFF == ord('q'):
            break
  
# Break the loop
    else:
        break