In [14]:
!pip install ultralytics
!pip install huggingface_hub
!pip install supervision
!pip install numpy



In [18]:
path = '/content/08fd33_4.mp4'

In [15]:
from ultralytics import YOLO
from huggingface_hub import hf_hub_download

# 1. Download the specific weights file from the nested folder
# The author uploaded the full training directory, so the path is deep.
model_path = hf_hub_download(
    repo_id="Adit-jain/soccana",
    filename="Model/weights/best.pt"
)

print(f"Weights loaded from: {model_path}")

# 2. Load into Ultralytics
# This automatically picks up the 3 custom classes (Player, Ball, Referee)
model = YOLO(model_path)

# 3. Run inference
# Note: The model was trained on 1280x1280 images. For best results,
# use 'imgsz=1280' during inference, otherwise small objects (the ball) may be missed.
results = model(path, imgsz=1280)


Weights loaded from: /root/.cache/huggingface/hub/models--Adit-jain--soccana/snapshots/305936007fe7d19ea528d73d08ccd7e70d088adf/Model/weights/best.pt

inference results will accumulate in RAM unless `stream=True` is passed, causing potential out-of-memory
errors for large sources or long-running streams and videos. See https://docs.ultralytics.com/modes/predict/ for help.

Example:
    results = model(source=..., stream=True)  # generator of Results objects
    for r in results:
        boxes = r.boxes  # Boxes object for bbox outputs
        masks = r.masks  # Masks object for segment masks outputs
        probs = r.probs  # Class probabilities for classification outputs

video 1/1 (frame 1/750) /content/08fd33_4.mp4: 736x1280 21 Players, 1 Ball, 4 Referees, 112.6ms
video 1/1 (frame 2/750) /content/08fd33_4.mp4: 736x1280 21 Players, 1 Ball, 4 Referees, 180.3ms
video 1/1 (frame 3/750) /content/08fd33_4.mp4: 736x1280 21 Players, 1 Ball, 4 Referees, 180.0ms
video 1/1 (frame 4/750) /conte

In [16]:
results

[ultralytics.engine.results.Results object with attributes:
 
 boxes: ultralytics.engine.results.Boxes object
 keypoints: None
 masks: None
 names: {0: 'Player', 1: 'Ball', 2: 'Referee'}
 obb: None
 orig_img: array([[[100, 146, 105],
         [ 92, 138,  97],
         [ 97, 150, 101],
         ...,
         [100,  92,  82],
         [103,  95,  85],
         [105,  97,  87]],
 
        [[ 99, 145, 104],
         [ 99, 145, 104],
         [109, 162, 113],
         ...,
         [105,  97,  87],
         [107,  99,  89],
         [108, 100,  90]],
 
        [[ 96, 149, 100],
         [105, 158, 109],
         [112, 170, 113],
         ...,
         [106,  98,  88],
         [108, 100,  90],
         [110, 102,  92]],
 
        ...,
 
        [[ 74, 103,  78],
         [ 74, 103,  78],
         [ 74, 103,  78],
         ...,
         [ 30,  47,  43],
         [ 31,  48,  44],
         [ 31,  48,  44]],
 
        [[ 74, 103,  78],
         [ 74, 103,  78],
         [ 74, 103,  78],
       

In [23]:
import supervision as sv
import numpy as np

class SoccerDetector:
    def __init__(self, model_path):
        # Load the Soccana model (YOLOv11)
        self.model = YOLO(model_path)
        # Tracker (ByteTrack) is now handled here to keep "State" together
        self.tracker = sv.ByteTrack()

    def process_video(self, video_path, callback=None):
        """
        Generates a stream of (frame_id, tracks) for downstream processing.
        This is a Generator, so it's memory efficient for long videos.
        """
        # Use the model's built-in generator for efficiency
        results_generator = self.model.track(
            source=video_path,
            stream=True,
            persist=True,
            tracker="bytetrack.yaml", # Ultralytics built-in tracker
            conf=0.25
        )

        for frame_idx, result in enumerate(results_generator):
            # Extract boxes & IDs
            # Result.boxes.xyxy -> Coordinates
            # Result.boxes.id -> Track IDs (if available)
            # Result.boxes.cls -> Class IDs (0=Player, 1=Ball, 2=Ref)

            detections = sv.Detections.from_ultralytics(result)

            # Separate Ball (No ID) vs Players (Tracked ID)
            # We return raw data; calibration/smoothing happens later
            yield {
                "frame": frame_idx,
                "detections": detections,
                "orig_img": result.orig_img  # Needed for Homography
            }


In [24]:
detector = SoccerDetector("best.pt")

with open('raw_tracks.csv', 'w') as file:
  file.write('frame,track_id,class_id,x1,y1,x2,y2,conf\n')


  for data in detector.process_video(path):
    frame_num = data['frame']
    detections = data['detections']

    for i in range(len(detections)):
      track_id = detections.tracker_id[i] if detections.tracker_id is not None else -1
      box = detections.xyxy[i]
      cls = detections.class_id[i]
      conf = detections.confidence[i]

      file.write(f"{frame_num},{track_id},{cls},{box[0]},{box[1]},{box[2]},{box[3]},{conf}\n")



video 1/1 (frame 1/750) /content/08fd33_4.mp4: 736x1280 21 Players, 1 Ball, 4 Referees, 167.9ms
video 1/1 (frame 2/750) /content/08fd33_4.mp4: 736x1280 21 Players, 1 Ball, 4 Referees, 116.1ms
video 1/1 (frame 3/750) /content/08fd33_4.mp4: 736x1280 21 Players, 1 Ball, 4 Referees, 164.8ms
video 1/1 (frame 4/750) /content/08fd33_4.mp4: 736x1280 21 Players, 1 Ball, 4 Referees, 111.9ms
video 1/1 (frame 5/750) /content/08fd33_4.mp4: 736x1280 21 Players, 1 Ball, 3 Referees, 164.2ms
video 1/1 (frame 6/750) /content/08fd33_4.mp4: 736x1280 20 Players, 1 Ball, 3 Referees, 112.4ms
video 1/1 (frame 7/750) /content/08fd33_4.mp4: 736x1280 21 Players, 1 Ball, 3 Referees, 167.4ms
video 1/1 (frame 8/750) /content/08fd33_4.mp4: 736x1280 21 Players, 1 Ball, 3 Referees, 113.3ms
video 1/1 (frame 9/750) /content/08fd33_4.mp4: 736x1280 20 Players, 1 Ball, 3 Referees, 170.8ms
video 1/1 (frame 10/750) /content/08fd33_4.mp4: 736x1280 20 Players, 1 Ball, 3 Referees, 115.4ms
video 1/1 (frame 11/750) /content/08fd