In [3]:
from ultralytics import YOLO
import cv2
import numpy as np

In [4]:
model=YOLO("model.pt")

In [5]:
results= model.predict("15sec_input_720p.mp4", save=True)


inference results will accumulate in RAM unless `stream=True` is passed, causing potential out-of-memory
errors for large sources or long-running streams and videos. See https://docs.ultralytics.com/modes/predict/ for help.

Example:
    results = model(source=..., stream=True)  # generator of Results objects
    for r in results:
        boxes = r.boxes  # Boxes object for bbox outputs
        masks = r.masks  # Masks object for segment masks outputs
        probs = r.probs  # Class probabilities for classification outputs

video 1/1 (frame 1/375) /Users/umangaggarwal/Desktop/Soccer Player Identification Assignment/15sec_input_720p.mp4: 384x640 1 ball, 16 players, 2 referees, 394.6ms
video 1/1 (frame 2/375) /Users/umangaggarwal/Desktop/Soccer Player Identification Assignment/15sec_input_720p.mp4: 384x640 18 players, 2 referees, 384.0ms
video 1/1 (frame 3/375) /Users/umangaggarwal/Desktop/Soccer Player Identification Assignment/15sec_input_720p.mp4: 384x640 1 ball, 16 players, 2 refer

In [6]:
print(results[0])

ultralytics.engine.results.Results object with attributes:

boxes: ultralytics.engine.results.Boxes object
keypoints: None
masks: None
names: {0: 'ball', 1: 'goalkeeper', 2: 'player', 3: 'referee'}
obb: None
orig_img: array([[[ 94,  79, 104],
        [ 94,  79, 104],
        [ 93,  77, 103],
        ...,
        [141, 161, 185],
        [150, 169, 193],
        [153, 173, 196]],

       [[136, 120, 146],
        [136, 120, 146],
        [136, 120, 146],
        ...,
        [179, 198, 222],
        [187, 207, 230],
        [190, 210, 233]],

       [[200, 185, 210],
        [200, 185, 210],
        [201, 186, 211],
        ...,
        [181, 201, 224],
        [189, 209, 232],
        [193, 212, 236]],

       ...,

       [[125, 134, 146],
        [125, 134, 146],
        [125, 134, 146],
        ...,
        [ 67, 102,  94],
        [ 67, 102,  94],
        [ 67, 102,  94]],

       [[123, 132, 144],
        [123, 132, 144],
        [123, 132, 144],
        ...,
        [ 75, 110, 10

In [None]:
names= {0: 'ball', 1: 'goalkeeper', 2: 'player', 3: 'referee'}
for box in results[0].boxes:
    cls_id = int(box.cls.item()) if hasattr(box.cls, 'item') else int(box.cls)
    label = names.get(cls_id, str(cls_id))
    xyxy = box.xyxy.cpu().numpy().tolist() if hasattr(box.xyxy, 'cpu') else box.xyxy
    conf = float(box.conf.item()) if hasattr(box.conf, 'item') else float(box.conf)
    xyxy_rounded = [[round(coord, 2) for coord in box_coords] for box_coords in xyxy] if isinstance(xyxy, list) and isinstance(xyxy[0], list) else [round(coord, 2) for coord in xyxy]
    print(f"Label: {label}, Box: {xyxy_rounded}, Confidence: {int(round(conf, 2)*100)}%")

Label: player, Box: [[794.9, 470.35, 863.85, 566.96]], Confidence: 92%
Label: player, Box: [[431.64, 160.54, 467.52, 227.96]], Confidence: 92%
Label: player, Box: [[839.88, 187.53, 873.08, 247.58]], Confidence: 92%
Label: player, Box: [[392.77, 422.96, 457.38, 500.67]], Confidence: 92%
Label: player, Box: [[1086.25, 345.55, 1137.63, 443.3]], Confidence: 91%
Label: player, Box: [[140.87, 266.73, 190.38, 347.99]], Confidence: 91%
Label: player, Box: [[887.35, 124.0, 913.54, 177.0]], Confidence: 91%
Label: player, Box: [[704.73, 82.89, 733.91, 128.93]], Confidence: 91%
Label: player, Box: [[370.64, 154.96, 405.41, 216.96]], Confidence: 91%
Label: player, Box: [[721.68, 195.06, 757.12, 263.09]], Confidence: 90%
Label: player, Box: [[1134.97, 253.48, 1182.1, 338.39]], Confidence: 89%
Label: player, Box: [[1239.53, 146.31, 1269.17, 210.76]], Confidence: 89%
Label: player, Box: [[532.63, 428.71, 581.43, 520.72]], Confidence: 87%
Label: referee, Box: [[568.47, 51.48, 597.23, 90.86]], Confidenc

In [None]:
from utils.video_utils import read_video,save_video
from trackers import Tracker

video_frames=read_video('15sec_input_720p.mp4')

tracker = Tracker('model.pt')
tracks=tracker.get_object_tracks(video_frames,
                                    read_from_stub=True,
                                    stub_path='stubs/track_stubs.pkl')



In [None]:
def convert_numpy(obj):
    if isinstance(obj, dict):
        return {int(k) if isinstance(k, np.integer) else k: convert_numpy(v) for k, v in obj.items()}
    elif isinstance(obj, list):
        return [convert_numpy(i) for i in obj]
    elif isinstance(obj, np.integer):
        return int(obj)
    elif isinstance(obj, np.floating):
        return float(obj)
    else:
        return obj

In [19]:
import sys
import json

# print(sys.getsizeof(tracks["ball"]))
ball_tracks=convert_numpy(tracks["ball"])
json_string=json.dumps(ball_tracks,indent=2)
print(json_string)

[
  {
    "bbox": [
      490.7367858886719,
      508.21868896484375,
      506.6071472167969,
      521.4323120117188
    ]
  },
  {
    "bbox": [
      488.10638427734375,
      503.43994140625,
      503.129638671875,
      519.946044921875
    ]
  },
  {
    "bbox": [
      485.37286376953125,
      499.80950927734375,
      499.82843017578125,
      517.8919067382812
    ]
  },
  {
    "bbox": [
      482.4744873046875,
      498.9549560546875,
      497.3839111328125,
      516.0289306640625
    ]
  },
  {
    "bbox": [
      480.27752685546875,
      497.37713623046875,
      494.78887939453125,
      514.9081420898438
    ]
  },
  {
    "bbox": [
      479.37213134765625,
      502.997314453125,
      492.16241455078125,
      513.553466796875
    ]
  },
  {
    "bbox": [
      477.7081298828125,
      497.6954345703125,
      491.7657470703125,
      513.1766357421875
    ]
  },
  {
    "bbox": [
      478.4713134765625,
      493.867919921875,
      492.5404052734375,
      