In [1]:
import os
import cv2
import numpy as np
import pandas as pd
import torch
import torchvision
import time
from ultralytics import YOLO
from torchvision.transforms import ToTensor
from deep_sort_realtime.deepsort_tracker import DeepSort

In [2]:
# References: 
# https://learnopencv.com/real-time-deep-sort-with-torchvision-detectors/#Real-Time-Deep-SORT-Setup
# https://pypi.org/project/deep-sort-realtime/

In [3]:
raw_video_folder = r'C:\Users\janni\OneDrive\Dokumente\Privat\Bildung\M. Sc. Social and Economic Data Science\4. Semester\Master Thesis\Code\data\raw\videos'
yolo_path      = r'C:\Users\janni\OneDrive\Dokumente\Privat\Bildung\M. Sc. Social and Economic Data Science\4. Semester\Master Thesis\Code\models\costumized_yolo\costumized_yolo\costumized_yolo.pt'
output_folder    = r'C:\Users\janni\OneDrive\Dokumente\Privat\Bildung\M. Sc. Social and Economic Data Science\4. Semester\Master Thesis\Code\data\processed'

In [4]:
video = "video_8min" #58 min
video_path = raw_video_folder + "\\" + video + ".mp4"
cap = cv2.VideoCapture(video_path)

fps = cap.get(cv2.CAP_PROP_FPS)
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
duration = total_frames / fps

print(f"FPS: {fps}")
print(f"Total frames: {total_frames}")
print(f"Duration (s): {duration:.2f}")
print("Prey Count: 32")
print("Predator Count: 1")

FPS: 30.0
Total frames: 14471
Duration (s): 482.37
Prey Count: 32
Predator Count: 1


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = YOLO(yolo_path)

max_age = 10

tracker = DeepSort(max_age=max_age)

records = []
frame_idx = 0

while True:
    success, frame = cap.read()
    if not success:
        break
    frame_idx += 1

    # YOLO inference
    results = model(frame)[0]
    bboxes, confidences, class_ids = [], [], []
    for box, score, cls in zip(results.boxes.xyxy.cpu().numpy(), results.boxes.conf.cpu().numpy(), results.boxes.cls.cpu().numpy()):
        label = model.names[int(cls)]
        if label not in ("Prey", "Predator Head"):
            continue
        x1, y1, x2, y2 = box
        w, h = x2 - x1, y2 - y1
        bboxes.append([x1, y1, w, h])
        confidences.append(float(score))
        class_ids.append(label)

    # DeepSORT update
    detections = list(zip(bboxes, confidences, class_ids))
    tracks = tracker.update_tracks(detections, frame=frame)

    # Collect results
    for t in tracks:
        if not t.is_confirmed():
            continue
        
        tid = t.track_id
        cx, cy = t.mean[0], t.mean[1]
        vx, vy = float(t.mean[4]), float(t.mean[5])

        speed = np.hypot(vx, vy) # pixel per frame

        angle = np.degrees(np.arctan2(vy, vx))

        label = t.det_class

        records.append({
            "frame":    int(frame_idx),
            "track_id": int(tid),
            "label":    str(label),
            "x":        float(cx),
            "y":        float(cy),
            "vx":       float(vx),
            "vy":       float(vy),
            "speed":    float(speed),
            "angle":    float(angle),
            })

movement_data = pd.DataFrame(records)

filename = f"{video}_tracking_{max_age}.csv"
filepath = os.path.join(output_folder, filename)
movement_data.to_csv(filepath, index=False)
print(f"Saved tracking data to {filepath}")


0: 736x736 1 Predator, 1 Predator Head, 35 Preys, 938.5ms
Speed: 23.6ms preprocess, 938.5ms inference, 16.9ms postprocess per image at shape (1, 3, 736, 736)

0: 736x736 1 Predator, 1 Predator Head, 34 Preys, 1038.3ms
Speed: 32.5ms preprocess, 1038.3ms inference, 41.7ms postprocess per image at shape (1, 3, 736, 736)

0: 736x736 1 Predator, 1 Predator Head, 31 Preys, 1228.1ms
Speed: 15.3ms preprocess, 1228.1ms inference, 2.9ms postprocess per image at shape (1, 3, 736, 736)

0: 736x736 1 Predator, 1 Predator Head, 31 Preys, 1707.3ms
Speed: 35.9ms preprocess, 1707.3ms inference, 24.1ms postprocess per image at shape (1, 3, 736, 736)

0: 736x736 1 Predator, 1 Predator Head, 33 Preys, 840.7ms
Speed: 18.3ms preprocess, 840.7ms inference, 2.3ms postprocess per image at shape (1, 3, 736, 736)

0: 736x736 1 Predator, 1 Predator Head, 33 Preys, 759.2ms
Speed: 16.0ms preprocess, 759.2ms inference, 2.0ms postprocess per image at shape (1, 3, 736, 736)

0: 736x736 1 Predator, 1 Predator Head, 32

KeyboardInterrupt: 

In [None]:
movement_data

Unnamed: 0,frame,track_id,x,y,vx,vy,speed,angle
0,3,1,1211.756873,1034.552879,0.112005,0.195892,0.225652,60.240442
1,3,2,1254.001421,1126.116845,0.186116,0.205937,0.277578,47.894165
2,3,3,1322.846989,1125.858166,0.049475,0.628333,0.630278,85.497827
3,3,4,734.832494,1350.035868,-0.677866,-0.089678,0.683772,-172.463833
4,3,5,1380.766020,1107.180935,0.047250,0.180188,0.186280,75.306378
...,...,...,...,...,...,...,...,...
505292,14471,10098,324.275383,946.687185,-0.746780,0.158753,0.763468,167.998539
505293,14471,10100,287.243132,805.594929,0.034968,-0.121887,0.126804,-73.992353
505294,14471,10105,707.634762,772.247278,0.410215,-0.571810,0.703735,-54.344476
505295,14471,10106,380.055686,1235.406958,-0.645748,0.995330,1.186453,122.974608
