In [7]:
import os
import cv2
import torch
import numpy as np
import pandas as pd
from ultralytics import YOLO
from collections import defaultdict
from deep_sort_realtime.deepsort_tracker import DeepSort
from utils.dataset_utils import get_swarm_data, get_quality_score, get_full_tracks, get_tensor, group_and_save_tensors

In [8]:
# References: 
# https://learnopencv.com/real-time-deep-sort-with-torchvision-detectors/#Real-Time-Deep-SORT-Setup
# https://pypi.org/project/deep-sort-realtime/

In [9]:
raw_video_folder = r'C:\Users\janni\OneDrive\Dokumente\Privat\Bildung\M. Sc. Social and Economic Data Science\4. Semester\Master Thesis\Code\data\raw\videos'
yolo_path = r'C:\Users\janni\OneDrive\Dokumente\Privat\Bildung\M. Sc. Social and Economic Data Science\4. Semester\Master Thesis\Code\models\costumized_yolo\costumized_yolo\costumized_yolo.pt'
output_folder = r'C:\Users\janni\OneDrive\Dokumente\Privat\Bildung\M. Sc. Social and Economic Data Science\4. Semester\Master Thesis\Code\data\processed'

In [None]:
# Inputs
video = "video_8min" #60 min
clip_size = 10

In [None]:
video_path = raw_video_folder + "\\" + video + ".mp4"
cap = cv2.VideoCapture(video_path)

fps = cap.get(cv2.CAP_PROP_FPS)
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
duration = total_frames / fps
total_traj = round(total_frames / clip_size)

print(f"FPS: {fps}")
print(f"Total frames: {total_frames}")
print(f"Duration (s): {duration:.2f}")
print(f"Total trajectories: {total_traj}")
print("Prey Count: 32")
print("Predator Count: 1")

FPS: 30.0
Total frames: 14471
Duration (s): 482.37
Prey Count: 32
Predator Count: 1


In [12]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = YOLO(yolo_path)
tracker = DeepSort(max_age=5)

video_idx = 0
frame_idx = 0
tensor_data = []

while True:
    frames = []
    for i in range(clip_size):
        success, frame = cap.read()
        if not success:
            break
        frames.append(frame)

    if not frames:
        break
    
    results = []

    for frame in frames:
        df = get_swarm_data(frame, model, tracker, frame_idx)
        results.append(df)
        frame_idx += 1

    combined_df = pd.concat(results, ignore_index=True)
    mean_track_visibility, num_full_tracks, mean_confidence = get_quality_score(combined_df, n=clip_size)
    video_idx += 1
    
    full_tracks_df = get_full_tracks(combined_df, n=clip_size)

    if not full_tracks_df.empty:
        tensor_data.append(get_tensor(full_tracks_df))

cap.release()

group_and_save_tensors(video, tensor_data, output_folder)


0: 736x736 1 Predator, 1 Predator Head, 35 Preys, 23.7ms
Speed: 7.3ms preprocess, 23.7ms inference, 1.7ms postprocess per image at shape (1, 3, 736, 736)

0: 736x736 1 Predator, 1 Predator Head, 34 Preys, 12.3ms
Speed: 4.9ms preprocess, 12.3ms inference, 1.7ms postprocess per image at shape (1, 3, 736, 736)

0: 736x736 1 Predator, 1 Predator Head, 31 Preys, 16.4ms
Speed: 5.1ms preprocess, 16.4ms inference, 2.3ms postprocess per image at shape (1, 3, 736, 736)

0: 736x736 1 Predator, 1 Predator Head, 31 Preys, 14.8ms
Speed: 6.4ms preprocess, 14.8ms inference, 1.6ms postprocess per image at shape (1, 3, 736, 736)

0: 736x736 1 Predator, 1 Predator Head, 33 Preys, 12.1ms
Speed: 4.7ms preprocess, 12.1ms inference, 1.6ms postprocess per image at shape (1, 3, 736, 736)

0: 736x736 1 Predator, 1 Predator Head, 33 Preys, 13.6ms
Speed: 4.6ms preprocess, 13.6ms inference, 2.6ms postprocess per image at shape (1, 3, 736, 736)

0: 736x736 1 Predator, 1 Predator Head, 32 Preys, 11.8ms
Speed: 4.9ms