In [1]:
import os
import cv2
import torch
import numpy as np
import pandas as pd
from ultralytics import YOLO
from collections import defaultdict
from deep_sort_realtime.deepsort_tracker import DeepSort
from utils.dataset_utils import get_swarm_data, get_quality_score, get_full_tracks, get_tensor, group_and_save_tensors

In [2]:
# References: 
# https://learnopencv.com/real-time-deep-sort-with-torchvision-detectors/#Real-Time-Deep-SORT-Setup
# https://pypi.org/project/deep-sort-realtime/

In [3]:
raw_video_folder = r'C:\Users\janni\OneDrive\Dokumente\Privat\Bildung\M. Sc. Social and Economic Data Science\4. Semester\Master Thesis\Code\data\raw\videos'
yolo_path = r'C:\Users\janni\OneDrive\Dokumente\Privat\Bildung\M. Sc. Social and Economic Data Science\4. Semester\Master Thesis\Code\models\costumized_yolo\costumized_yolo\costumized_yolo.pt'
output_folder = r'C:\Users\janni\OneDrive\Dokumente\Privat\Bildung\M. Sc. Social and Economic Data Science\4. Semester\Master Thesis\Code\data\processed\sliding_window'

In [4]:
# Inputs
video = "video_8min"
#video = "video_5s" #4.45 min
clip_size = 10

In [5]:
video_path = raw_video_folder + "\\" + video + ".mp4"
cap = cv2.VideoCapture(video_path)

fps = cap.get(cv2.CAP_PROP_FPS)
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
duration = total_frames / fps
total_traj = total_frames - clip_size + 1

print(f"FPS: {fps}")
print(f"Total frames: {total_frames}")
print(f"Duration (s): {duration:.2f}")
print(f"Total trajectories: {total_traj}")
print("Prey Count: 32")
print("Predator Count: 1")

FPS: 30.0
Total frames: 14471
Duration (s): 482.37
Total trajectories: 14462
Prey Count: 32
Predator Count: 1


In [None]:
device  = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model   = YOLO(yolo_path)
tracker = DeepSort(max_age=5)

tensor_data = []
video_idx   = 0

for start_frame in range(total_traj):
    end_frame = start_frame + clip_size - 1
    print(f"[Clip {video_idx}] Startframe: {start_frame}, Endframe: {end_frame}")

    cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)

    frames = []
    for _ in range(clip_size):
        success, frame = cap.read()
        if not success:
            break
        frames.append(frame)
    if len(frames) < clip_size:
        break

    dfs = []
    for offset, frm in enumerate(frames):
        frame_idx = start_frame + offset
        df = get_swarm_data(frm, model, tracker, frame_idx)
        dfs.append(df)

    combined_df = pd.concat(dfs, ignore_index=True)

    mean_track_visibility, num_full_tracks, mean_confidence = get_quality_score(combined_df, n=clip_size)
    video_idx += 1

    full_tracks_df = get_full_tracks(combined_df, n=clip_size)
    if not full_tracks_df.empty:
        tensor_data.append(get_tensor(full_tracks_df))

cap.release()

group_and_save_tensors(video, tensor_data, output_folder)