<a href="https://colab.research.google.com/github/yunmengmengyun/byte/blob/main/ByteTrack.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install supervision tqdm ultralytics numpy

Collecting supervision
  Downloading supervision-0.23.0-py3-none-any.whl.metadata (14 kB)
Collecting ultralytics
  Downloading ultralytics-8.3.3-py3-none-any.whl.metadata (34 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.8-py3-none-any.whl.metadata (9.3 kB)
Downloading supervision-0.23.0-py3-none-any.whl (151 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m151.5/151.5 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics-8.3.3-py3-none-any.whl (881 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m881.4/881.4 kB[0m [31m35.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.8-py3-none-any.whl (26 kB)
Installing collected packages: ultralytics-thop, supervision, ultralytics
Successfully installed supervision-0.23.0 ultralytics-8.3.3 ultralytics-thop-2.0.8


In [None]:
pwd

'/content'

In [None]:
!python sv_bytetracker_yolo.py --source_weights_path yolov8m.pt --source_video_path 33.mp4 --target_video_path test_pred.mp4 --confidence_threshold 0.1

100% 313/313 [00:08<00:00, 38.16it/s]


In [None]:
!python sv_bytetracker_yolo.py --source_weights_path yolov8m.pt --source_video_path PW.mp4 --target_video_path PW-pre.mp4 --confidence_threshold 0.1

100% 108/108 [00:04<00:00, 23.41it/s]


In [None]:
import supervision as sv
from ultralytics import YOLO
from tqdm import tqdm
import argparse
import numpy as np

tracker = sv.ByteTrack()
def process_video(
        source_weights_path: str,
        source_video_path: str,
        target_video_path: str,
        confidence_threshold: float = 0.3,
        iou_threshold: float = 0.7
) -> None:
    model = YOLO(source_weights_path)       # Load YOLO model
    classes = list(model.names.values())    # Class names
    LINE_STARTS = sv.Point(0,500)           # Line start point for count in/out vehicle
    LINE_END = sv.Point(1280, 500)          # Line end point for count in/out vehicle
    tracker = sv.ByteTrack()                # Bytetracker instance
    box_annotator = sv.BoundingBoxAnnotator(thickness=1)     # BondingBox annotator instance
    label_annotator = sv.LabelAnnotator(text_scale=0.2, text_thickness=1, text_padding=3)         # Label annotator instance
    frame_generator = sv.get_video_frames_generator(source_path=source_video_path) # for generating frames from video
    video_info = sv.VideoInfo.from_video_path(video_path=source_video_path)
    line_counter = sv.LineZone(start=LINE_STARTS, end = LINE_END)
    line_annotator = sv.LineZoneAnnotator(thickness=2, text_thickness=1, text_scale= 0.2)

    with sv.VideoSink(target_path=target_video_path, video_info=video_info) as sink:
        for frame in tqdm(frame_generator, total= video_info.total_frames):
            # Getting result from model
            results = model(frame, verbose=False, conf= confidence_threshold, iou = iou_threshold)[0]
            detections = sv.Detections.from_ultralytics(results)    # Getting detections
            #Filtering classes for car and truck only instead of all COCO classes.
            detections = detections[np.where((detections.class_id==2)|(detections.class_id==7))]
            detections = tracker.update_with_detections(detections)  # Updating detection to Bytetracker
            # Annotating detection boxes
            annotated_frame = box_annotator.annotate(scene = frame.copy(), detections= detections)

            #Prepare labels
            labels = []
            for index in range(len(detections.class_id)):
                # creating labels as per required.
                labels.append("#" + str(detections.tracker_id[index]) + " " + classes[detections.class_id[index]] + " "+ str(round(detections.confidence[index],2)) )

            # Line counter in/out trigger
            line_counter.trigger(detections=detections)
            # Annotating labels
            annotated_label_frame = label_annotator.annotate(scene=annotated_frame, detections=detections, labels=labels)
            # Annotating line labels
            line_annotate_frame = line_annotator.annotate(frame=annotated_label_frame, line_counter=line_counter)
            sink.write_frame(frame = line_annotate_frame)

if __name__ == "__main__":
    parser = argparse.ArgumentParser("video processing with YOLO and ByteTrack")
    parser.add_argument(
        "--source_weights_path",
        required=True,
        help="Path to the source weights file",
        type=str
    )
    parser.add_argument(
        "--source_video_path",
        required=True,
        help="Path to the source video file",
        type = str
    )
    parser.add_argument(
        "--target_video_path",
        required=True,
        help="Path to the target video file",
        type= str
    )
    parser.add_argument(
        "--confidence_threshold",
        default = 0.3,
        help= "Confidence threshold for the model",
        type=float
    )
    parser.add_argument(
        "--iou_threshold",
        default=0.7,
        help="Iou threshold for the model",
        type= float
    )
    args = parser.parse_args()
    process_video(
        source_weights_path=args.source_weights_path,
        source_video_path= args.source_video_path,
        target_video_path=args.target_video_path,
        confidence_threshold=args.confidence_threshold,
        iou_threshold=args.iou_threshold
    )

[0m[01;34msample_data[0m/  sv_bytetracker_yolo.py
