# Deep Learning for Business Applications course

## TOPIC 5: Object detection problem. Play football with YOLO

### 1. Libraries

In [None]:
!pip3 install opencv-python
!pip install ultralytics

In [None]:
import os
import cv2
import numpy as np
from tqdm.auto import tqdm
from PIL import Image
from ultralytics import YOLO
import matplotlib.pyplot as plt

### 2. Video to process

In [None]:
!ls -la /home/jovyan/__DATA/DLBA_F24/topic_02/

In [None]:
vid_path = '/home/jovyan/__DATA/DLBA_F24/topic_02/videoplayback.mp4'

In [None]:
# open the video from the file

cap = cv2.VideoCapture(vid_path)
frames_cnt = cap.get(cv2.CAP_PROP_FRAME_COUNT)
fps = cap.get(cv2.CAP_PROP_FPS)
print('video has {} frames and rate {} fps (frames-per-second)'.format(
    frames_cnt,
    fps
))

In [None]:
def get_frames(vid_path, start_time, num_frames, save_dir):
    """
    Function takes the path to video
    and saves few frames to the disk.

    :vid_path: path to video file
    :start_time: where to start capturing frames
    :num_frames: ho many frames to save
    :save_dir: path to save to

    """
    cap = cv2.VideoCapture(vid_path)
    frames_cnt = cap.get(cv2.CAP_PROP_FRAME_COUNT)
    fps = cap.get(cv2.CAP_PROP_FPS)
    start_pos = int(start_time * fps)
    end_pos = int(start_pos + num_frames)
    if end_pos <= frames_cnt:
        for frame_num in range(start_pos, end_pos):
            cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
            res, frame = cap.read()
            if res:
                file_name = '{}/frame_{}.png'.format(save_dir, frame_num)
                cv2.imwrite(file_name, frame)
    else:
        print('out of video lenght')

In [None]:
WORK_DIR = 'yolofootball'

In [None]:
!mkdir -p $WORK_DIR

In [None]:
START_FRAME = 10
FRAMES_TO_PROC = 5
imgs_dir = 'football'
get_frames(
    vid_path,
    START_FRAME,
    FRAMES_TO_PROC,
    WORK_DIR
)

In [None]:
!ls -la $WORK_DIR

In [None]:
os.listdir(WORK_DIR)

### 3. YOLO one image test

In [None]:
# load an official model
model = YOLO('yolov8n.pt')

In [None]:
test_img_path = f'{WORK_DIR}/{os.listdir(WORK_DIR)[0]}'
print('test image:', test_img_path)

In [None]:
# Run batched inference on a list of images
results = model(test_img_path)  # return a list of Results objects
print('total results:', len(results))

In [None]:
results[0]

In [None]:
# show results
for result in results:
    boxes = result.boxes  # boxes object for bounding box outputs
    im_bgr = result.plot()  # BGR-order numpy array
    im_rgb = Image.fromarray(im_bgr[..., ::-1])  # RGB-order PIL image
    plt.figure(figsize=(16, 16))
    plt.imshow(im_rgb)
    plt.show()

Next steps could be:
1. Find unique features ti identify every player detected
2. Build an algorithm of tracking players through the series of frames
3. Process frame by frame

### 4. YOLO tracking

YOLO offers a ready solution for tracking objects:

In [None]:
START_FRAME = 250
FRAMES = 100

In [None]:
tracked_frames = []
counter = 0
# move cursor to start frame
cap.set(cv2.CAP_PROP_POS_FRAMES, START_FRAME)

# Loop through the video frames
while cap.isOpened():
    # Read a frame from the video
    success, frame = cap.read()
    if success:
        # run YOLO tracking on the frame,
        # persisting tracks between frames
        results = model.track(frame, persist=True)
        annotated_frame = results[0].plot()
        annotated_frame = annotated_frame[..., ::-1]  # convert to RGB
        tracked_frames.append(annotated_frame)
        counter += 1
        if counter >= FRAMES:
            break
    else:
        # break the loop if the end of the video is reached
        break

In [None]:
# combine video from frames with tracked objects
# and write this video to disk
out = cv2.VideoWriter(
    'video_with_tracking.avi',
    cv2.VideoWriter_fourcc(*'DIVX'),
    cap.get(5),  # fps rate
    (int(cap.get(3)), int(cap.get(4))),  # resolution params
)
for frame in tqdm(tracked_frames):
    out.write(frame)
out.release()
cap.release()

What problems still need to be solved?