In [1]:
from ultralytics import YOLO
import cv2
import supervision as sv

In [2]:
model = YOLO('D:/projects/football_analysis/models/best_11l.pt')

In [3]:
cap = cv2.VideoCapture('D:/projects/football_analysis/input_videos/08fd33_4.mp4')
frames = []
while True:
    ret, frame = cap.read()
    if not ret:
        break
    frames.append(frame)

In [4]:
len(frames)

750

In [5]:
frames[0].shape

(1080, 1920, 3)

In [6]:
frames[1].shape

(1080, 1920, 3)

In [7]:
batch_size = 20
detections = []
for i in range(0, len(frames), batch_size):
    detection_batch = model.predict(frames[i:i+batch_size], conf=0.1)
    detections += detection_batch


0: 384x640 1 ball, 1 goalkeeper, 21 players, 3 referees, 466.5ms
1: 384x640 1 ball, 1 goalkeeper, 21 players, 3 referees, 466.5ms
2: 384x640 1 goalkeeper, 19 players, 3 referees, 466.5ms
3: 384x640 1 ball, 1 goalkeeper, 20 players, 3 referees, 466.5ms
4: 384x640 1 goalkeeper, 19 players, 3 referees, 466.5ms
5: 384x640 1 goalkeeper, 20 players, 3 referees, 466.5ms
6: 384x640 1 goalkeeper, 21 players, 3 referees, 466.5ms
7: 384x640 1 goalkeeper, 20 players, 3 referees, 466.5ms
8: 384x640 1 ball, 1 goalkeeper, 20 players, 3 referees, 466.5ms
9: 384x640 1 ball, 1 goalkeeper, 19 players, 3 referees, 466.5ms
10: 384x640 1 goalkeeper, 22 players, 3 referees, 466.5ms
11: 384x640 21 players, 3 referees, 466.5ms
12: 384x640 20 players, 3 referees, 466.5ms
13: 384x640 21 players, 3 referees, 466.5ms
14: 384x640 1 ball, 22 players, 3 referees, 466.5ms
15: 384x640 1 ball, 20 players, 3 referees, 466.5ms
16: 384x640 19 players, 3 referees, 466.5ms
17: 384x640 1 ball, 19 players, 3 referees, 466.5ms

In [8]:
len(detections)

750

In [9]:
type(detections[0])

ultralytics.engine.results.Results

In [10]:
tracks = {
    "players":[],
    "referees":[],
    "ball":[]
}

In [11]:
tracker = sv.ByteTrack()

In [12]:
for frame_num, detection in enumerate(detections):
    cls_names = detection.names
    cls_names_inv = {v:k for k, v in cls_names.items()}

    detection_supervision = sv.Detections.from_ultralytics(detection)
    
    for obj_idx, class_id in enumerate(detection_supervision.class_id):
        if cls_names[class_id] == "goalkeeper":
            detection_supervision.class_id[obj_idx] = cls_names_inv["player"]

    detection_with_tracks = tracker.update_with_detections(detection_supervision)

    tracks["players"].append({})
    tracks["referees"].append({})
    tracks["ball"].append({})

    for frame_detection in detection_with_tracks:
        bbox = frame_detection[0].tolist()
        cls_id = frame_detection[3]
        track_id = frame_detection[4]

        if cls_id == cls_names_inv["player"]:
            tracks["players"][frame_num][track_id] = {"bbox":bbox}

        if cls_id == cls_names_inv["referee"]:
            tracks["referees"][frame_num][track_id] = {"bbox":bbox}

    for frame_detection in detection_supervision:
        bbox = frame_detection[0].tolist()
        cls_id = frame_detection[3]

        if cls_id == cls_names_inv["ball"]:
            tracks["ball"][frame_num][1] = {"bbox":bbox}

In [13]:
def get_center_of_bbox(bbox):
    x1,y1,x2,y2 = bbox
    return int((x1+x2)/2), int((y1+y2)/2)

def get_bbox_width(bbox):
    return bbox[2] - bbox[0]

In [14]:
def draw_ellipse(frame, bbox, color, track_id): # Drawing ellipse
    y2 = int(bbox[3]) # y2 is the bottom
    x_center,_ = get_center_of_bbox(bbox) # center of the x axis
    width = get_bbox_width(bbox) # Width of ellipse

    cv2.ellipse(frame,
                center=(x_center, y2),
                axes=(int(width), int(0.35*width)), # minor axis will be 35% of major axis.
                angle=0.0,
                startAngle=45, # ellipse drawing will start from 45 degrees
                endAngle=235,   # and end before 235 degrees
                color=color,
                thickness=2,
                lineType=cv2.LINE_4
                )

    return frame

In [15]:
output_video_frames = []
for frame_num, frame in enumerate(frames):
    frame = frame.copy()
    player_dict = tracks['players'][frame_num]
    for track_id, player in player_dict.items():
        frame = draw_ellipse(frame, player["bbox"], (0, 0, 255), track_id)
    output_video_frames.append(frame)

In [21]:
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter('output_video.avi', fourcc, 24, (output_video_frames[0].shape[1], output_video_frames[0].shape[0]))
for frame in output_video_frames:
    out.write(frame)
out.release()