# Cell 1: Setup Dependencies

In [1]:
import cv2
import numpy as np
from ultralytics import YOLO
from deep_sort_realtime.deepsort_tracker import DeepSort
import torch
from torchreid.utils import FeatureExtractor
import time
import sqlite3
import logging
from scipy.spatial.distance import cosine

logging.basicConfig(filename='debug.log', level=logging.DEBUG)

# Cell 2: Load Models

In [2]:
# Load YOLOv8 model
model = YOLO("yolov9s.pt")

# Initialize DeepSORT tracker
tracker = DeepSort(max_age=30, nn_budget=100)


# Cell 3: Feature Extractor

In [3]:
# Load ReID model (OSNet)
extractor = FeatureExtractor(
    model_name='osnet_x1_0',
    model_path='osnet_x1_0.pth',  # Ensure you have the model weights
    device='cuda' if torch.cuda.is_available() else 'cpu'
)



Successfully loaded imagenet pretrained weights from "C:\Users\Amrit Baskota/.cache\torch\checkpoints\osnet_x1_0_imagenet.pth"
** The following layers are discarded due to unmatched keys or layer size: ['classifier.weight', 'classifier.bias']
Model: osnet_x1_0
- params: 2,193,616
- flops: 978,878,352


# Cell 3: Define Video Sources

In [4]:
# Define video sources
video_sources = ["video1.mp4", "video2.mp4"]  # Replace with actual camera feeds
caps = [cv2.VideoCapture(src) for src in video_sources]


# Cell 4: Global Tracking Variables

In [5]:
# Global tracking dictionary {global_id: {"features": (color_hist, body_size), "last_seen": timestamp, "camera": cam_id}}
global_tracks = {}

# Store rectangles for click detection
rectangles = {}


# Cell 5: Mouse Click Callback

In [6]:
# Mouse callback function to handle rectangle click
def click_callback(event, x, y, flags, param):
    if event == cv2.EVENT_LBUTTONDOWN:
        for global_id, rect in rectangles.items():
            x1, y1, x2, y2, color = rect
            if x1 <= x <= x2 and y1 <= y <= y2:
                new_color = (0, 255, 0) if color == (0, 0, 255) else (0, 0, 255)
                rectangles[global_id] = (x1, y1, x2, y2, new_color)
                logging.debug(f"Clicked ID {global_id}, Color changed.")

# Cell 6: Database Functions

In [7]:
# Connect to SQLite database
def connect_db():
    return sqlite3.connect('person_tracking.db')

# Store person information in the database
def store_person_info(global_id, features, body_size, last_seen, location):
    conn = sqlite3.connect('person_data.db')
    cursor = conn.cursor()

    cursor.execute('''CREATE TABLE IF NOT EXISTS persons (
                        global_id INTEGER PRIMARY KEY, 
                        color_hist BLOB, 
                        body_size REAL, 
                        last_seen REAL, 
                        x1 REAL, y1 REAL, x2 REAL, y2 REAL)''')

    cursor.execute('''INSERT OR REPLACE INTO persons 
                      (global_id, color_hist, body_size, last_seen, x1, y1, x2, y2) 
                      VALUES (?, ?, ?, ?, ?, ?, ?, ?)''', 
                   (global_id, features, body_size, last_seen, *location))

    conn.commit()
    conn.close()


# Cell 7: Feature Extraction

In [8]:
# Extract deep features for ReID
def extract_deep_features(image, bbox):
    x1, y1, x2, y2 = map(int, bbox)
    person_crop = image[y1:y2, x1:x2]
    person_crop = cv2.resize(person_crop, (128, 256))
    person_crop = np.transpose(person_crop, (2, 0, 1)) / 255.0
    person_crop = torch.tensor(person_crop, dtype=torch.float32).unsqueeze(0)
    features = extractor(person_crop)
    return features[0].cpu().detach().numpy()


# Cell 8: Person Matching

In [9]:
def find_matching_person(new_feature, cam_id, threshold=0.4):
    best_match = None
    best_score = float("inf")

    for track_id, data in global_tracks.items():
        if data["camera"] == cam_id:
            continue  # Skip if the person is already in this camera

        old_feature = data["features"]
        score = cosine(new_feature, old_feature)

        if score < threshold and score < best_score:
            best_score = score
            best_match = track_id

    return best_match

# Cell 9: Video Processing Function

In [10]:
# Process each video stream
def process_video(src, cam_id):
    cap = cv2.VideoCapture(video_sources[cam_id])
    if not cap.isOpened():
        print(f"Error: Couldn't open video {video_sources[cam_id]}")
        return

    # Create a VideoWriter object to save the processed video
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # You can use other codecs like 'MP4V' or 'MJPG'
    out = cv2.VideoWriter(f"output_{cam_id}.mp4", fourcc, 20.0, (640, 480))  # Change resolution if needed

    window_name = f"Camera {cam_id}"
    cv2.namedWindow(window_name)
    cv2.setMouseCallback(window_name, click_callback)

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break  

        results = model(frame)
        detections = []
        track_map = {}

        for r in results:
            for box in r.boxes:
                x1, y1, x2, y2 = map(int, box.xyxy[0])
                conf = float(box.conf[0])
                cls = int(box.cls[0])

                if cls == 0 and conf > 0.6:
                    bbox = [x1, y1, x2-x1, y2-y1]
                    deep_features = extract_deep_features(frame, (x1, y1, x2, y2))
                    
                    matched_id = find_matching_person(deep_features, cam_id)

                    if matched_id is None:
                        global_id = len(global_tracks) + 1
                        global_tracks[global_id] = {"features": deep_features, "last_seen": time.time(), "camera": cam_id}
                    else:
                        global_id = matched_id
                        global_tracks[global_id]["last_seen"] = time.time()
                        global_tracks[global_id]["camera"] = cam_id

                    track_map[len(detections)] = global_id
                    detections.append((bbox, conf, "person"))

        tracks = tracker.update_tracks(detections, frame=frame)

        for i, track in enumerate(tracks):
            if track.is_confirmed():
                bbox = track.to_tlbr()
                local_id = track.track_id
                global_id = track_map.get(local_id, local_id)

                color = rectangles.get(global_id, (0, 0, 0, 0, (0, 255, 0)))[4]
                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2)
                cv2.putText(frame, f"ID {global_id} (Cam {cam_id})", (int(bbox[0]), int(bbox[1]) - 10),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
                rectangles[global_id] = (int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]), color)

        # Write the processed frame to the video file
        out.write(frame)

        # Display the frame in a window
        cv2.imshow(window_name, frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    out.release()  # Release the VideoWriter
    cv2.destroyWindow(window_name)

# Cell 10: Run Video Processing

In [None]:
# Run the video processing for each camera sequentially
for i, cap in enumerate(caps):
    process_video(cap, i)

cv2.destroyAllWindows()