# Track football players with YOLOv5 + ByteTrack 

ByteTrack is a multi object tracker that identifies and identifies the trajectory of objects with accuracy.

In this notebook we will track football players on the field from YOLOv5 predictions. The videos of this project can be found in this kaggle [DFL - Bundesliga Data Shootout](https://www.kaggle.com/competitions/dfl-bundesliga-data-shootout/data) comptetition.

YOLOv8 was trained on [football-players-detection](https://universe.roboflow.com/roboflow-jvuqo/football-players-detection-3zvbc) dataset from Roboflow .The weights can be found [here](https://drive.google.com/drive/folders/1-1r2psRgW7JRSEykRmvUYEY31ufuxiDb?usp=share_link)


Setup

In [None]:
#!git clone https://github.com/ifzhang/ByteTrack.git
!cd ByteTrack #&& pip3 install -r requirements.txt
#!cd ByteTrack #&& python3 setup.py develop
#!pip install cython_bbox

Cloning into 'ByteTrack'...
remote: Enumerating objects: 2007, done.[K
remote: Total 2007 (delta 0), reused 0 (delta 0), pack-reused 2007 (from 1)[K
Receiving objects: 100% (2007/2007), 79.60 MiB | 11.72 MiB/s, done.
Resolving deltas: 100% (1141/1141), done.
running develop
!!

        ********************************************************************************
        Please avoid running ``setup.py`` and ``easy_install``.
        Instead, use pypa/build, pypa/installer or other
        standards-based tools.

        See https://github.com/pypa/setuptools/issues/917 for details.
        ********************************************************************************

!!
  easy_install.initialize_options(self)
!!

        ********************************************************************************
        Please avoid running ``setup.py`` directly.
        Instead, use pypa/build, pypa/installer or other
        standards-based tools.

        See https://blog.ganssle.io/art

In [None]:
# !pip install onemetric --quiet 

In [1]:
import sys
sys.path.append("ByteTrack")

In [2]:
from yolox.tracker.byte_tracker import BYTETracker, STrack
import cv2
import torch
import matplotlib.pyplot as plt
from tqdm import tqdm
import numpy as np
from pathlib import Path
from onemetric.cv.utils.iou import box_iou_batch
from dataclasses import dataclass
from typing import List, Optional

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
# !pip install yolox

Defaulting to user installation because normal site-packages is not writeable
Collecting yolox
  Using cached yolox-0.3.0.tar.gz (79 kB)
  Preparing metadata (setup.py) ... [?25ldone
Collecting onnx==1.8.1 (from yolox)
  Using cached onnx-1.8.1.tar.gz (5.2 MB)
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25lerror
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mGetting requirements to build wheel[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m [31m[20 lines of output][0m
  [31m   [0m fatal: not a git repository (or any of the parent directories): .git
  [31m   [0m Traceback (most recent call last):
  [31m   [0m   File "/home/urvashi2022/.local/lib/python3.10/site-packages/pip/_vendor/pyproject_hooks/_in_process/_in_process.py", line 353, in <module>
  [31m   [0m     main()
  [31m   [0m   File "/home/urvashi2022/.local/lib/python3.10/site-packages/pip/_vendor/

In [None]:
# !pip install thop

Defaulting to user installation because normal site-packages is not writeable


In [None]:
# !pip install loguru

Defaulting to user installation because normal site-packages is not writeable


Load the video

In [6]:
def get_video_frames(video_path):

    video = cv2.VideoCapture(str(video_path))

    frames = []
    while video.isOpened():
        success, frame = video.read()
        if not success:
            break
        frames.append(frame)

    video.release()

    return frames

In [7]:
video_path = "/home/urvashi2022/Desktop/UI_DEVELOPMENT/inputv.mp4"

In [8]:
frames = get_video_frames(video_path)

In [9]:
# plt.figure(figsize=(20, 20))
# plt.imshow(cv2.cvtColor(frames[0], cv2.COLOR_BGR2RGB))
# plt.show()

# # Save the image

# cv2.imwrite("frame.jpg", frames[100])

Load YOLOv5

In [10]:
path_weights = "/home/urvashi2022/Desktop/UI_DEVELOPMENT/tracking/best300.pt"
yolo_model = torch.hub.load('ultralytics/yolov5', 'custom', path_weights, device="cpu", force_reload=True)

Downloading: "https://github.com/ultralytics/yolov5/zipball/master" to /home/urvashi2022/.cache/torch/hub/master.zip
[31m[1mrequirements:[0m Ultralytics requirement ['ultralytics>=8.2.34'] not found, attempting AutoUpdate...
[31m[1mrequirements:[0m ❌ AutoUpdate skipped (offline)
YOLOv5 🚀 2024-11-14 Python-3.10.12 torch-1.13.1+cu117 CPU

Fusing layers... 
Model summary: 212 layers, 20865057 parameters, 0 gradients, 47.9 GFLOPs
Adding AutoShape... 


In [11]:
ind_to_cls = {
    0: "ball",
    1 : "goalkeeper",
    2 : "player",
    3 : "referee"
}

colors = {
    "ball": (0,200,200), # yellow
    "player": (255,0,0), # blue
    "goalkeeper":(255,0,255), # magenta
    "referee": (0,0,255) # red
}

colors1 = {
  "Team1" : (0, 0, 0),
  "Team2" : (255, 255, 255)
}

@dataclass
class Detection:
    xywh: List[float]
    xyxy: List[float]
    class_id: int
    class_name: str
    confidence: float
    tracker_id: Optional[int] = None

    @classmethod
    def from_results(cls, pred):
        result = []
        for x_min, y_min, x_max, y_max, confidence, class_id in pred:
            class_id=int(class_id)
            result.append(Detection(
                xywh=[float(x_min), float(y_min), float(x_max - x_min), float(y_max - y_min)],
                xyxy=[float(x_min), float(y_min), float(x_max), float(y_max)],
                class_id=class_id,
                class_name=ind_to_cls[class_id],
                confidence=float(confidence)
            ))
        return result
    
def draw_detections(image, detections, draw_tacker_id: bool = False):
  image = image.copy()
  for pred in detections:
    bbox = pred.xyxy
    cls = pred.class_name
    #cv2.rectangle(img=image, pt1=tuple([int(b) for b in bbox[:2]]), pt2=tuple([int(b) for b in bbox[2:]]), color=colors[cls], thickness=3)
    
    center_bottom = (int((bbox[0] + bbox[2]) / 2), int(bbox[3]))
    bbox_width = int(bbox[2] - bbox[0])
    
    # Draw an ellipse at the bottom center of the bounding box
    cv2.ellipse(image, center_bottom, (bbox_width // 2, 8), 0, 0, 180, color=colors[cls], thickness=3)
        
    if draw_tacker_id and cls != "ball":
      cv2.putText(image, str(pred.tracker_id), (int(bbox[0]), int(bbox[1])-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, colors[cls], 3)
    else:
      cv2.putText(image, cls, (int(bbox[0]), int(bbox[1])-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, colors[cls], 3)

  return image

prediction = yolo_model(frames[0]).pred[0].cpu().numpy()

image = draw_detections(frames[0], Detection.from_results(prediction))

# plt.figure(figsize=(20, 20))
# plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
# plt.grid(False)
# plt.show()

# # Save the image

# cv2.imwrite("detections.jpg", image)

In [12]:
# from sklearn.cluster import KMeans
# import numpy as np

# ind_to_cls = {
#     0: "ball",
#     1 : "goalkeeper",
#     2 : "player",
#     3 : "referee"
# }

# colors = {
#     "ball": (0,200,200), # yellow
#     "player": (255,0,0), # blue
#     "goalkeeper":(255,0,255), # magenta
#     "referee": (0,0,255) # red
# }

# colors1 = {
#   "Team1" : (0, 0, 225),
#   "Team2" : (255, 0, 0)
# }

# # def identify_team_color(image, bbox):
# #     """
# #     Identify the team color of the player based on the bounding box region.
# #     """
# #     # Extract the region within the bounding box
# #     x_min, y_min, x_max, y_max = map(int, bbox)
# #     player_roi = image[y_min:y_max, x_min:x_max]

# #     # Reshape the ROI to a list of pixels
# #     pixels = player_roi.reshape(-1, 3)

# #     # Apply K-means clustering to find the dominant color (assuming 2 clusters)
# #     kmeans = KMeans(n_clusters=2, random_state=0).fit(pixels)
# #     dominant_color = kmeans.cluster_centers_[kmeans.labels_[0]].astype(int)

# #     # Define color thresholds for Team1 and Team2 (black and white in this example)
# #     team1_color = np.array([0, 0, 0])       # Color for Team1 (black)
# #     team2_color = np.array([255, 255, 255]) # Color for Team2 (white)

# #     # Calculate distance to each team color
# #     dist_to_team1 = np.linalg.norm(dominant_color - team1_color)
# #     dist_to_team2 = np.linalg.norm(dominant_color - team2_color)

# #     # Assign team based on the closest color match
# #     return "Team1" if dist_to_team1 < dist_to_team2 else "Team2"

# @dataclass
# class Detection:
#     xywh: List[float]
#     xyxy: List[float]
#     class_id: int
#     class_name: str
#     confidence: float
#     tracker_id: Optional[int] = None

#     @classmethod
#     def from_results(cls, pred, image):
#         result = []
#         for x_min, y_min, x_max, y_max, confidence, class_id in pred:
#             class_id = int(class_id)
#             detection = Detection(
#                 xywh=[float(x_min), float(y_min), float(x_max - x_min), float(y_max - y_min)],
#                 xyxy=[float(x_min), float(y_min), float(x_max), float(y_max)],
#                 class_id=class_id,
#                 class_name=ind_to_cls[class_id],
#                 confidence=float(confidence)
#             )
#             result.append(detection)
#         return result

# def draw_detections(image, detections, draw_tracker_id: bool = False):
    
#     image = image.copy()
#     for pred in detections:
#         bbox = pred.xyxy
#         cls = pred.class_name
#         x_min, y_min, x_max, y_max = map(int, bbox)
#         player_roi = image[y_min:y_max, x_min:x_max]

#         # Reshape the ROI to a list of pixels
#         pixels = player_roi.reshape(-1, 3)

#         # Apply K-means clustering to find the dominant color (assuming 2 clusters)
#         kmeans = KMeans(n_clusters=2, random_state=0).fit(pixels)
#         dominant_color = kmeans.cluster_centers_[kmeans.labels_[0]].astype(int)

#         # Define color thresholds for Team1 and Team2 (black and white in this example)
#         team1_color = np.array([0, 0, 0])       # Color for Team1 (black)
#         team2_color = np.array([255, 255, 255]) # Color for Team2 (white)

#         # Calculate distance to each team color
#         dist_to_team1 = np.linalg.norm(dominant_color - team1_color)
#         dist_to_team2 = np.linalg.norm(dominant_color - team2_color)

#         team = "Team1" if dist_to_team1 < dist_to_team2 else "Team2"
    
#         # Draw the bounding ellipse or box
#         center_bottom = (int((bbox[0] + bbox[2]) / 2), int(bbox[3]))
#         bbox_width = int(bbox[2] - bbox[0])
       
#         cv2.ellipse(image, center_bottom, (bbox_width // 2, 8), 0, 0, 180, color=colors[cls], thickness=1)
        
#         if draw_tracker_id and cls != "ball":
#             cv2.putText(image, f"{pred.tracker_id}", (int(bbox[0]), int(bbox[1]) - 10),
#                         cv2.FONT_HERSHEY_SIMPLEX, 0.9, colors1[team], 3)
#         else:
#             cv2.putText(image, cls, (int(bbox[0]), int(bbox[1]) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, colors1[team], 3)

#     return image

# # Usage example
# prediction = yolo_model(frames[0]).pred[0].cpu().numpy()
# image = draw_detections(frames[0], Detection.from_results(prediction, frames[0]))


# plt.figure(figsize=(20, 20))
# plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
# plt.grid(False)
# plt.show()

# # Save the image

# cv2.imwrite("detections1.jpg", image)


## Create ByteTracker

ByteTrack creates trackers that are identified and attempts to find their trajectory across the frames.  
Also it expects the detection to have this format: (x1, y1, x2, y2, conf).

So we need to convert yolo detections to ByteTrack format and also identify they correspond to which tracker that is followed by ByteTrack.

In [13]:
from dataclasses import dataclass

@dataclass(frozen=True)
class BYTETrackerArgs:
  track_thresh: float = 0.25
  track_buffer: int = 30
  match_thresh: float = 0.8
  aspect_ratio_thresh: float = 3.0
  min_box_area: float = 1.0
  mot20: bool = False

In [14]:
# initiate tracker
byte_tracker = BYTETracker(BYTETrackerArgs)

In [15]:
def format_predictions(predictions, with_conf: bool = True):
  """
  Format yolo detection to ByteTracke format: (x1, y1, x2, y2, conf)
  """
  frame_detections = []
  for pred in predictions:
      bbox = pred.xyxy
      conf = pred.confidence
      if with_conf:
        detection = bbox + [conf]
      else:
        detection = bbox

      frame_detections.append(detection)
  return np.array(frame_detections, dtype=float)


In [16]:
def match_detections_with_tracks(detections, tracks):
  """
  Find which tracker corresponds to yolo detections and set the tracker_id.
  We compute the iou between the detection and trackers.
  """
  detections_bboxes = format_predictions(detections, with_conf=False)
  tracks_bboxes = np.array([track.tlbr for track in tracks], dtype=float)
  iou = box_iou_batch(tracks_bboxes, detections_bboxes)
  track2detection = np.argmax(iou, axis=1)

  for tracker_index, detection_index in enumerate(track2detection):
    if iou[tracker_index, detection_index] != 0:
      detections[detection_index].tracker_id = tracks[tracker_index].track_id
  return detections

In [17]:
def get_video_writer(output_video_path, fps, width, height):
  """
  Create a video writer to save new frames after annotation
  """
  output_video_path.parent.mkdir(exist_ok=True)
  return cv2.VideoWriter(
      str(output_video_path),
      fourcc=cv2.VideoWriter_fourcc(*"mp4v"),
      fps=fps,
      frameSize=(width, height),
      isColor=True
  )

Track players and save to video

In [18]:
output_video_path = Path("/home/urvashi2022/Desktop/UI_DEVELOPMENT/tracking/output.mp4")

video_writer = get_video_writer(
    output_video_path,
    30,
    frames[0].shape[1],
    frames[0].shape[0]
)

In [19]:
for frame in tqdm(frames[:400]):

    # detect players with yolo
    detections = yolo_model(frame).pred[0].cpu().numpy()

    detections = Detection.from_results(detections)

    # create a new list of detection with tracker_id attribute.
    detections_with_tracker = []
    for detection in detections:
      detection.tracker_id = ""
      detections_with_tracker.append(detection)

    # get trackers with ByteTrack
    tracks = byte_tracker.update(
        output_results=format_predictions(detections_with_tracker, with_conf=True),
        img_info=frame.shape,
        img_size=frame.shape
    )

    # set tracker_id in yolo detections
    detections_with_tracker = match_detections_with_tracks(detections_with_tracker, tracks)

    # annotate the frame
    image = draw_detections(frame, detections_with_tracker, True)

    # save the frame to video writer
    video_writer.write(image)

# save the video
video_writer.release()

100%|██████████| 400/400 [00:30<00:00, 13.22it/s]
