In [76]:
import cv2
import pandas as pd
import os
from ultralytics import YOLO

In [77]:
raw_video_folder = r'C:\Users\janni\OneDrive\Dokumente\Privat\Bildung\M. Sc. Social and Economic Data Science\4. Semester\Master Thesis\Code\data\raw\videos'
yolo_path      = r'C:\Users\janni\OneDrive\Dokumente\Privat\Bildung\M. Sc. Social and Economic Data Science\4. Semester\Master Thesis\Code\models\costumized_yolo\costumized_yolo\costumized_yolo.pt'
output_folder    = r'C:\Users\janni\OneDrive\Dokumente\Privat\Bildung\M. Sc. Social and Economic Data Science\4. Semester\Master Thesis\Code\data\processed'

In [97]:
model = YOLO(yolo_path)
video = "video_5s"
video_path = raw_video_folder + "\\" + video + ".mp4"
cap = cv2.VideoCapture(video_path)

fps = cap.get(cv2.CAP_PROP_FPS)
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
duration = total_frames / fps

print(f"FPS: {fps}")
print(f"Total frames: {total_frames}")
print(f"Duration (s): {duration:.2f}")
print("Prey Count: 32")
print("Predator Count: 1")

FPS: 30.0
Total frames: 153
Duration (s): 5.10
Prey Count: 32
Predator Count: 1


In [None]:
'''VISUAL MODEL REPRESENTATION'''

label_colors = {
    "Predator Head": (0, 0, 255),
    "Predator":      (0, 102, 255),
    "Prey Head":     (255, 0, 0),
    "Prey":          (255, 153, 102),
}

frame_number = 0

while True:
    success, frame = cap.read()
    if not success:
        break

    # Inferenz
    results = model(frame, conf=0.2)
    boxes = results[0].boxes

    for box in boxes:
        x1, y1, x2, y2 = map(int, box.xyxy[0])
        cx, cy       = (x1 + x2)//2, (y1 + y2)//2
        cls_id       = int(box.cls[0])
        label        = model.names[cls_id]
        color        = label_colors.get(label, (255, 255, 255))

        # Bounding-Box und Centroid zeichnen
        cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
        cv2.circle(frame, (cx, cy), 3, (0, 0, 255), -1)

    frame_number += 1

    # Fenstergröße anpassen und anzeigen
    annotated_frame = cv2.resize(frame, (720, 720))
    cv2.imshow("YOLO Detection", annotated_frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Aufräumen
cap.release()
cv2.destroyAllWindows()


0: 736x736 1 Predator, 1 Predator Head, 34 Preys, 705.5ms
Speed: 19.2ms preprocess, 705.5ms inference, 2.3ms postprocess per image at shape (1, 3, 736, 736)

0: 736x736 1 Predator, 1 Predator Head, 34 Preys, 781.0ms
Speed: 28.9ms preprocess, 781.0ms inference, 1.1ms postprocess per image at shape (1, 3, 736, 736)

0: 736x736 1 Predator, 1 Predator Head, 35 Preys, 651.4ms
Speed: 16.1ms preprocess, 651.4ms inference, 2.0ms postprocess per image at shape (1, 3, 736, 736)


In [None]:
#Prey: 32
#Predator: 1

def process_video(video):
    video_path = os.path.join(raw_video_folder, f"{video}.mp4")
    cap = cv2.VideoCapture(video_path)
    centroids_data = []
    frame_number = 0

    while True:
        success, frame = cap.read()
        if not success:
            break

        # Inferenz
        results = model(frame, conf=0.3) # based on observation

        for box in results[0].boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            cx = (x1 + x2) // 2
            cy = (y1 + y2) // 2

            cls_id = int(box.cls[0])
            label = model.names[cls_id]
            conf = float(box.conf[0])

            # Daten speichern
            centroids_data.append({
                "frame": frame_number,
                "label": label,
                "confidence": conf,
                "x": cx,
                "y": cy
            })

        frame_number += 1

    cap.release()

    # DataFrame und Export
    movement_data = pd.DataFrame(centroids_data)
    output_path = os.path.join(output_folder, f"movement_data_{video}.csv")
    movement_data.to_csv(output_path, index=False)
    
    return movement_data

In [81]:
movement_data = process_video(video)
movement_data


0: 736x736 1 Predator, 1 Predator Head, 33 Preys, 932.1ms
Speed: 28.6ms preprocess, 932.1ms inference, 2.0ms postprocess per image at shape (1, 3, 736, 736)

0: 736x736 1 Predator, 1 Predator Head, 33 Preys, 779.7ms
Speed: 19.7ms preprocess, 779.7ms inference, 2.6ms postprocess per image at shape (1, 3, 736, 736)

0: 736x736 1 Predator, 1 Predator Head, 34 Preys, 866.5ms
Speed: 31.0ms preprocess, 866.5ms inference, 2.0ms postprocess per image at shape (1, 3, 736, 736)

0: 736x736 1 Predator, 1 Predator Head, 35 Preys, 771.5ms
Speed: 22.9ms preprocess, 771.5ms inference, 2.0ms postprocess per image at shape (1, 3, 736, 736)

0: 736x736 1 Predator, 1 Predator Head, 35 Preys, 741.6ms
Speed: 31.1ms preprocess, 741.6ms inference, 1.8ms postprocess per image at shape (1, 3, 736, 736)

0: 736x736 1 Predator, 1 Predator Head, 33 Preys, 715.8ms
Speed: 15.6ms preprocess, 715.8ms inference, 2.0ms postprocess per image at shape (1, 3, 736, 736)

0: 736x736 1 Predator, 1 Predator Head, 34 Preys, 7

Unnamed: 0,frame,label,confidence,x,y
0,0,Prey,0.848912,993,852
1,0,Prey,0.838825,1270,972
2,0,Prey,0.828723,1153,896
3,0,Prey,0.824260,1748,748
4,0,Prey,0.817484,1234,1033
...,...,...,...,...,...
4966,152,Prey,0.490877,594,818
4967,152,Prey,0.487457,642,783
4968,152,Prey,0.451108,1003,985
4969,152,Prey,0.401255,981,767


In [None]:
'''def track_video(video):
    video_path = os.path.join(raw_video_folder, f"{video}.mp4")
    cap = cv2.VideoCapture(video_path)
    centroids_data = []
    frame_num = 0
    data=[]

    while True:
        success, frame = cap.read()
        if not success:
            break

        # statt model(frame) jetzt model.track(...)
        results = model.track(
            frame,
            conf=0.2,
            tracker="bytetrack.yaml",
            persist=True
        )

        # Ergebnisse durchgehen und Track-ID mit abspeichern
        for box in results[0].boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            cx = (x1 + x2) // 2
            cy = (y1 + y2) // 2

            cls_id    = int(box.cls[0])
            label     = model.names[cls_id]
            conf      = float(box.conf[0])
            track_id  = int(box.id[0])  # <-- hier kommt die Track-ID

            data.append({
                "frame":      frame_num,
                "track_id":   track_id,
                "label":      label,
                "confidence": conf,
                "x":          cx,
                "y":          cy
            })

        frame_num += 1

    cap.release()

    # In DataFrame umwandeln und speichern
    tracking_data = pd.DataFrame(data)
    out_path = os.path.join(output_folder, f"tracked_movement_{video}.csv")
    tracking_data.to_csv(out_path, index=False)
    
    return tracking_data


tracking_data = track_video(video)
tracking_data'''

In [84]:
'''# {0: 'Predator', 1: 'Predator Head', 2: 'Prey', 3: 'Prey Head'}
predator = movement_data[movement_data['label'] == 'Predator Head']

if not os.path.exists("data/pred_movement.csv"):
    predator.to_csv("data/pred_movement.csv", index=False)

prey = movement_data[movement_data['label'] == 'Prey']

if not os.path.exists("data/prey_movement.csv"):
    prey.to_csv("data/prey_movement.csv", index=False)'''

'# {0: \'Predator\', 1: \'Predator Head\', 2: \'Prey\', 3: \'Prey Head\'}\npredator = movement_data[movement_data[\'label\'] == \'Predator Head\']\n\nif not os.path.exists("data/pred_movement.csv"):\n    predator.to_csv("data/pred_movement.csv", index=False)\n\nprey = movement_data[movement_data[\'label\'] == \'Prey\']\n\nif not os.path.exists("data/prey_movement.csv"):\n    prey.to_csv("data/prey_movement.csv", index=False)'

In [100]:
import os
import cv2
import numpy as np
import pandas as pd
from scipy.optimize import linear_sum_assignment

# Kalman Filter helper
def create_kalman_filter(dt=1.0, process_noise=1e-2, measurement_noise=1e-1):
    kf = cv2.KalmanFilter(4, 2)
    # State transition (constant velocity)
    kf.transitionMatrix = np.array([
        [1, 0, dt, 0],
        [0, 1, 0, dt],
        [0, 0, 1, 0],
        [0, 0, 0, 1]
    ], dtype=np.float32)
    # Measurement matrix
    kf.measurementMatrix = np.array([
        [1, 0, 0, 0],
        [0, 1, 0, 0]
    ], dtype=np.float32)
    # Noise covariances
    kf.processNoiseCov = np.eye(4, dtype=np.float32) * process_noise
    kf.measurementNoiseCov = np.eye(2, dtype=np.float32) * measurement_noise
    return kf


def process_video_with_kalman(video, raw_video_folder, output_folder,
                              num_fish=33, num_prey=32, num_pred=1, max_distance=50.0,
                              num_frames=100):
    video_path = os.path.join(raw_video_folder, f"{video}.mp4")
    cap = cv2.VideoCapture(video_path)
    centroids_data = []

    # --- First frame: detect and select exactly num_fish detections ---
    success, first_frame = cap.read()
    if not success:
        raise RuntimeError(f"Cannot read first frame of {video}")

    results = model(first_frame, conf=0.3)
    # Collect detections by label
    prey_dets, pred_dets = [], []  # each: (cx, cy, conf)
    for box in results[0].boxes:
        x1, y1, x2, y2 = map(int, box.xyxy[0])
        cx, cy = (x1 + x2)//2, (y1 + y2)//2
        cls_id = int(box.cls[0])
        label = model.names[cls_id]
        conf = float(box.conf[0])
        if label.lower() == 'predator':
            pred_dets.append((cx, cy, conf))
        else:
            prey_dets.append((cx, cy, conf))
    # Handle counts
    if len(pred_dets) < num_pred or len(prey_dets) < num_prey:
        raise ValueError(f"Not enough detections: got {len(prey_dets)} prey, {len(pred_dets)} predator")
    # Select top confidences
    pred_dets = sorted(pred_dets, key=lambda x: x[2], reverse=True)[:num_pred]
    prey_dets = sorted(prey_dets, key=lambda x: x[2], reverse=True)[:num_prey]
    detections0 = prey_dets + pred_dets  # list of (cx, cy, conf)

    # Initialize trackers
    trackers = []  # [{'kf': kf, 'id': idx}]
    for idx, (cx, cy, _) in enumerate(detections0):
        kf = create_kalman_filter()
        init_state = np.array([[cx], [cy], [0], [0]], dtype=np.float32)
        kf.statePre = init_state.copy()
        kf.statePost = init_state.copy()
        kf.correct(np.array([[np.float32(cx)], [np.float32(cy)]]))
        trackers.append({'kf': kf, 'id': idx})
        centroids_data.append({
            'track_id': idx, 'frame': 0, 'x': cx, 'y': cy, 'imputed': False
        })

    # --- Subsequent frames ---
    frame_number = 1
    while frame_number < num_frames:
        success, frame = cap.read()
        if not success:
            break
        # Predict
        preds = [ (t['kf'].predict()[0,0], t['kf'].predict()[1,0]) for t in trackers ]
        # Detect
        results = model(frame, conf=0.3)
        dets = []  # (cx, cy)
        for box in results[0].boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            dets.append(((x1+x2)//2, (y1+y2)//2))
        # Associate
        matched, unmatched = [], []
        if dets:
            cost = np.zeros((len(trackers), len(dets)), dtype=np.float32)
            for i, (px, py) in enumerate(preds):
                for j, (dx, dy) in enumerate(dets): cost[i,j] = np.hypot(px-dx, py-dy)
            row, col = linear_sum_assignment(cost)
            for i in range(len(trackers)):
                if i not in row: unmatched.append(i)
            for ri, ci in zip(row, col):
                if cost[ri,ci] < max_distance:
                    matched.append((ri,ci))
                else:
                    unmatched.append(ri)
        else:
            unmatched = list(range(len(trackers)))
        # Update/Impute
        for ti, di in matched:
            cx, cy = dets[di]
            trackers[ti]['kf'].correct(np.array([[np.float32(cx)], [np.float32(cy)]]))
            centroids_data.append({'track_id': trackers[ti]['id'], 'frame': frame_number, 'x': cx, 'y': cy, 'imputed': False})
        for ti in unmatched:
            pred = trackers[ti]['kf'].statePost
            cx, cy = int(pred[0,0]), int(pred[1,0])
            centroids_data.append({'track_id': trackers[ti]['id'], 'frame': frame_number, 'x': cx, 'y': cy, 'imputed': True})
        frame_number += 1

    cap.release()
    # Export
    movement_data = pd.DataFrame(centroids_data)
    movement_data.to_csv(os.path.join(output_folder, f"movement_data_{video}.csv"), index=False)
    return movement_data


In [102]:
data1 = process_video_with_kalman(video, raw_video_folder, output_folder,
                              num_fish=33, max_distance=50.0)


0: 736x736 1 Predator, 1 Predator Head, 33 Preys, 762.1ms
Speed: 16.1ms preprocess, 762.1ms inference, 6.8ms postprocess per image at shape (1, 3, 736, 736)

0: 736x736 1 Predator, 1 Predator Head, 33 Preys, 655.7ms
Speed: 17.4ms preprocess, 655.7ms inference, 1.2ms postprocess per image at shape (1, 3, 736, 736)

0: 736x736 1 Predator, 1 Predator Head, 34 Preys, 644.4ms
Speed: 22.2ms preprocess, 644.4ms inference, 3.8ms postprocess per image at shape (1, 3, 736, 736)

0: 736x736 1 Predator, 1 Predator Head, 35 Preys, 664.0ms
Speed: 15.8ms preprocess, 664.0ms inference, 1.9ms postprocess per image at shape (1, 3, 736, 736)

0: 736x736 1 Predator, 1 Predator Head, 35 Preys, 914.4ms
Speed: 18.0ms preprocess, 914.4ms inference, 2.1ms postprocess per image at shape (1, 3, 736, 736)

0: 736x736 1 Predator, 1 Predator Head, 33 Preys, 668.8ms
Speed: 16.9ms preprocess, 668.8ms inference, 4.4ms postprocess per image at shape (1, 3, 736, 736)

0: 736x736 1 Predator, 1 Predator Head, 34 Preys, 6

In [106]:
test = data1[data1["track_id"] == 1]

In [107]:
test[:50]

Unnamed: 0,track_id,frame,x,y,imputed
1,1,0,1270,972,False
34,1,1,1270,971,False
67,1,2,1268,970,False
100,1,3,1266,970,False
133,1,4,1266,970,False
166,1,5,1265,969,False
199,1,6,1264,969,False
232,1,7,1262,966,False
265,1,8,1259,964,False
298,1,9,1258,963,False
