##0. Introduction

In this notebook the car counting using Yolow8 is performed. The objective is the accurate counting in city streets in order to  introduce improvements in traffic management. 

#1. GPU Environment

Access to GPU

In [None]:
!nvidia-smi

Wed Mar 15 22:57:34 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   74C    P0    32W /  70W |      0MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
import os
HOME = os.getcwd()
print(HOME)

/content


#2. Load video

In [None]:
#%cd {HOME}
#!wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=1pz68D1Gsx80MoPg-_q-IbEdESEmyVLm-' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=1pz68D1Gsx80MoPg-_q-IbEdESEmyVLm-" -O vehicle-counting.mp4 && rm -rf /tmp/cookies.txt
SOURCE_VIDEO_PATH = f"{HOME}/car_counting.mp4"
#SOURCE_VIDEO_PATH = f"{HOME}/vehicle-counting.mp4"

#3. Install YOLOv8

In [None]:
!pip install ultralytics

from IPython import display
display.clear_output()

import ultralytics
ultralytics.checks()

Ultralytics YOLOv8.0.53 🚀 Python-3.9.16 torch-1.13.1+cu116 CUDA:0 (Tesla T4, 15102MiB)
Setup complete ✅ (2 CPUs, 12.7 GB RAM, 25.5/78.2 GB disk)


#4. Install ByteTrack

In [None]:
%cd {HOME}
!git clone https://github.com/ifzhang/ByteTrack.git
!cd ByteTrack && pip3 install -q -r requirements.txt
!cd ByteTrack && python3 setup.py -q develop
!pip install -q cython_bbox
!pip install -q onemetric

from IPython import display
display.clear_output()

import sys
sys.path.append(f"{HOME}/ByteTrack")

!pip install loguru
import yolox
print("yolox.__version__:", yolox.__version__)

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting loguru
  Using cached loguru-0.6.0-py3-none-any.whl (58 kB)
Installing collected packages: loguru
Successfully installed loguru-0.6.0
yolox.__version__: 0.1.0


In [None]:
!pip install lap
from yolox.tracker.byte_tracker import BYTETracker, STrack
from onemetric.cv.utils.iou import box_iou_batch
from dataclasses import dataclass


@dataclass(frozen=True)
class BYTETrackerArgs:
    track_thresh: float = 0.25
    track_buffer: int = 30
    match_thresh: float = 0.8
    aspect_ratio_thresh: float = 3.0
    min_box_area: float = 1.0
    mot20: bool = False

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting lap
  Using cached lap-0.4.0-cp39-cp39-linux_x86_64.whl
Installing collected packages: lap
Successfully installed lap-0.4.0


#4. Install Supervision

In [None]:
!pip install supervision==0.1.0

from IPython import display
display.clear_output()

import supervision
print("supervision.__version__:", supervision.__version__)

supervision.__version__: 0.1.0


In [None]:
from supervision.draw.color import ColorPalette
from supervision.geometry.dataclasses import Point
from supervision.video.dataclasses import VideoInfo
from supervision.video.source import get_video_frames_generator
from supervision.video.sink import VideoSink
from supervision.notebook.utils import show_frame_in_notebook
from supervision.tools.detections import Detections, BoxAnnotator
from supervision.tools.line_counter import LineCounter, LineCounterAnnotator

#5. Install tracking utils

In [None]:
from typing import List
import numpy as np

# converts detections into format that can be consumed by match_detections_with_tracks function
def detections2boxes(detections: Detections) -> np.ndarray:
    return np.hstack((
        detections.xyxy,
        detections.confidence[:, np.newaxis]
    ))


# converts List[STrack] into format that can be consumed by match_detections_with_tracks function
def tracks2boxes(tracks: List[STrack]) -> np.ndarray:
    return np.array([
        track.tlbr
        for track
        in tracks
    ], dtype=float)


# matches our bounding boxes with predictions
def match_detections_with_tracks(
    detections: Detections, 
    tracks: List[STrack]
) -> Detections:
    if not np.any(detections.xyxy) or len(tracks) == 0:
        return np.empty((0,))

    tracks_boxes = tracks2boxes(tracks=tracks)
    iou = box_iou_batch(tracks_boxes, detections.xyxy)
    track2detection = np.argmax(iou, axis=1)
    
    tracker_ids = [None] * len(detections)
    
    for tracker_index, detection_index in enumerate(track2detection):
        if iou[tracker_index, detection_index] != 0:
            tracker_ids[detection_index] = tracks[tracker_index].track_id

    return tracker_ids

#6. Load pre-trained YOLOv8 model

In [None]:
MODEL = "yolov8x.pt"
from ultralytics import YOLO

model = YOLO(MODEL)
model.fuse()

Downloading https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x.pt to yolov8x.pt...


  0%|          | 0.00/131M [00:00<?, ?B/s]

YOLOv8x summary (fused): 268 layers, 68200608 parameters, 0 gradients, 257.8 GFLOPs


#7. Prediction categories (single frame)

In [None]:
# dict maping class_id to class_name
CLASS_NAMES_DICT = model.model.names
# class_ids of interest - car, motorcycle, bus and truck
CLASS_ID = [2, 3, 5, 7]

In [None]:
# create frame generator
generator = get_video_frames_generator(SOURCE_VIDEO_PATH)

# create instance of BoxAnnotator
box_annotator = BoxAnnotator(color=ColorPalette(), thickness=4, text_thickness=4, text_scale=2)

# acquire first video frame
iterator = iter(generator)
frame = next(iterator)

# model prediction on single frame and conversion to supervision Detections
results = model(frame)
detections = Detections(
    xyxy=results[0].boxes.xyxy.cpu().numpy(),
    confidence=results[0].boxes.conf.cpu().numpy(),
    class_id=results[0].boxes.cls.cpu().numpy().astype(int)
)

# format custom labels
labels = [
    f"{CLASS_NAMES_DICT[class_id]} {confidence:0.2f}"
    for _, confidence, class_id, tracker_id
    in detections
]

# annotate and display frame
frame = box_annotator.annotate(frame=frame, detections=detections, labels=labels)
show_frame_in_notebook(frame, (16, 16))


0: 384x640 2 persons, 8 cars, 1 bus, 61.7ms
Speed: 0.5ms preprocess, 61.7ms inference, 22.9ms postprocess per image at shape (1, 3, 640, 640)


#8. Predict the whole video

In [43]:
# settings
LINE_START = Point(820, 610)
LINE_END = Point(600, 400)

TARGET_VIDEO_PATH = f"{HOME}/vehicle-counting-result.mp4"

In [37]:
VideoInfo.from_video_path(SOURCE_VIDEO_PATH)

VideoInfo(width=1092, height=614, fps=30, total_frames=946)

In [46]:
from tqdm.notebook import tqdm

# create BYTETracker instance
byte_tracker = BYTETracker(BYTETrackerArgs())

# create VideoInfo instance
video_info = VideoInfo.from_video_path(SOURCE_VIDEO_PATH)

# create frame generator
generator = get_video_frames_generator(SOURCE_VIDEO_PATH)

# create LineCounter instance
line_counter = LineCounter(start=LINE_START, end=LINE_END)

# create instance of BoxAnnotator and LineCounterAnnotator
box_annotator = BoxAnnotator(color=ColorPalette(), thickness=4, text_thickness=4, text_scale=2)
line_annotator = LineCounterAnnotator(thickness=4, text_thickness=4, text_scale=2)

# open target video file
with VideoSink(TARGET_VIDEO_PATH, video_info) as sink:
    # loop over video frames
    for frame in tqdm(generator, total=video_info.total_frames):
        # model prediction on single frame and conversion to supervision Detections
        results = model(frame)
        detections = Detections(
            xyxy=results[0].boxes.xyxy.cpu().numpy(),
            confidence=results[0].boxes.conf.cpu().numpy(),
            class_id=results[0].boxes.cls.cpu().numpy().astype(int)
        )
        # filtering out detections with unwanted classes
        mask = np.array([class_id in CLASS_ID for class_id in detections.class_id], dtype=bool)
        detections.filter(mask=mask, inplace=True)
        # tracking detections
        tracks = byte_tracker.update(
            output_results=detections2boxes(detections=detections),
            img_info=frame.shape,
            img_size=frame.shape
        )
        tracker_id = match_detections_with_tracks(detections=detections, tracks=tracks)
        detections.tracker_id = np.array(tracker_id)
        # filtering out detections without trackers
        mask = np.array([tracker_id is not None for tracker_id in detections.tracker_id], dtype=bool)
        detections.filter(mask=mask, inplace=True)
        # format custom labels
        labels = [
           # f"#{CLASS_NAMES_DICT[class_id]}"
            #for _, confidence, class_id, tracker_id
            #in detections
        ]
        # updating line counter
        line_counter.update(detections=detections)
        # annotate and display frame
        #frame = box_annotator.annotate(frame=frame, detections=detections, labels=labels)
        line_annotator.annotate(frame=frame, line_counter=line_counter)
        sink.write_frame(frame)

  0%|          | 0/946 [00:00<?, ?it/s]


0: 384x640 2 persons, 8 cars, 1 bus, 64.3ms
Speed: 0.6ms preprocess, 64.3ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

0: 384x640 2 persons, 8 cars, 1 bus, 64.5ms
Speed: 0.9ms preprocess, 64.5ms inference, 1.8ms postprocess per image at shape (1, 3, 640, 640)

0: 384x640 2 persons, 8 cars, 1 bus, 45.5ms
Speed: 0.5ms preprocess, 45.5ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 640)

0: 384x640 2 persons, 9 cars, 1 bus, 43.1ms
Speed: 0.4ms preprocess, 43.1ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 640)

0: 384x640 2 persons, 9 cars, 1 bus, 43.2ms
Speed: 0.7ms preprocess, 43.2ms inference, 1.8ms postprocess per image at shape (1, 3, 640, 640)

0: 384x640 2 persons, 9 cars, 1 bus, 31.9ms
Speed: 0.5ms preprocess, 31.9ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

0: 384x640 2 persons, 8 cars, 35.0ms
Speed: 0.4ms preprocess, 35.0ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 640)

0: 384x640 2