In [1]:
import cv2
import numpy as np
import pandas as pd
from sklearn.cluster import DBSCAN
from ultralytics import YOLO

# Load YOLOv8 model
model = YOLO('yolov8n.pt')  # Replace with 'yolov8s.pt' for more accuracy if needed

# Initialize variables
frame_count = 0
crowd_log = []

# DBSCAN parameters
eps = 50  # Distance threshold for clustering (in pixels)
min_samples = 3  # Minimum number of persons for a cluster

# Sliding window for tracking crowds
crowd_frames = {}

# Function to detect persons and compute centroids
def detect_persons(frame, model):
    """
    Detect persons in a given frame using YOLOv8.

    Args:
        frame (numpy.ndarray): Input video frame.
        model: Pre-trained YOLOv8 model.

    Returns:
        List of centroids of detected persons [(x, y), ...].
    """
    results = model(frame)
    boxes = results[0].boxes.xyxy  # Bounding boxes
    classes = results[0].boxes.cls  # Class labels
    confidences = results[0].boxes.conf  # Confidence scores
    
    persons = []
    for box, cls, conf in zip(boxes, classes, confidences):
        if int(cls) == 0 and conf > 0.5:  # Class 0 corresponds to 'person'
            x1, y1, x2, y2 = map(int, box)
            centroid = ((x1 + x2) // 2, (y1 + y2) // 2)  # Compute centroid
            persons.append(centroid)
    return persons

# Function to visualize detected persons and clusters (optional)
def visualize_crowd(frame, persons, labels):
    """
    Draw bounding boxes and clusters on the frame for visualization.

    Args:
        frame (numpy.ndarray): Input video frame.
        persons (list): List of person centroids.
        labels (list): Cluster labels from DBSCAN.
    """
    for (x, y), label in zip(persons, labels):
        color = (0, 255, 0) if label != -1 else (0, 0, 255)  # Green for clusters, red for noise
        cv2.circle(frame, (x, y), 5, color, -1)
    cv2.imshow("Crowd Detection", frame)

# Process video
cap = cv2.VideoCapture("/Users/yuvraj/Downloads/WhatsApp Video 2024-11-16 at 17.21.10.mp4")

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    
    frame_count += 1
    persons = detect_persons(frame, model)  # Detect persons in the frame
    
    if len(persons) >= min_samples:
        # Perform clustering to identify crowds
        clustering = DBSCAN(eps=eps, min_samples=min_samples).fit(persons)
        labels = clustering.labels_
        
        # Count unique clusters
        unique_clusters = set(labels)
        for cluster in unique_clusters:
            if cluster == -1:
                continue  # Noise (not part of any cluster)
            
            cluster_members = np.array(persons)[labels == cluster]
            if len(cluster_members) >= min_samples:
                # Track crowd persistence
                if cluster not in crowd_frames:
                    crowd_frames[cluster] = 1
                else:
                    crowd_frames[cluster] += 1
                
                # Log crowd event if persistence >= 10 frames
                if crowd_frames[cluster] == 10:
                    crowd_log.append([frame_count, len(cluster_members)])
            else:
                crowd_frames.pop(cluster, None)
    
    # Visualize results (optional)
    visualize_crowd(frame, persons, labels)

    # Break loop on 'q' key press
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

# Save results to CSV
df = pd.DataFrame(crowd_log, columns=["Frame_Number", "Person_Count"])
df.to_csv("crowd_events.csv", index=False)




0: 384x640 6 persons, 1 backpack, 44.6ms
Speed: 1.5ms preprocess, 44.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 persons, 1 backpack, 1 handbag, 1 baseball glove, 42.2ms
Speed: 1.5ms preprocess, 42.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 persons, 1 backpack, 1 handbag, 48.4ms
Speed: 1.1ms preprocess, 48.4ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 persons, 1 backpack, 1 handbag, 48.5ms
Speed: 1.1ms preprocess, 48.5ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 persons, 1 backpack, 53.1ms
Speed: 1.3ms preprocess, 53.1ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 persons, 1 backpack, 65.0ms
Speed: 1.2ms preprocess, 65.0ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 7 persons, 47.0ms
Speed: 1.5ms preprocess, 47.0ms inference, 0.8ms postprocess per image at shape (1, 3

2024-11-17 20:35:11.723 Python[30396:2837253] +[IMKClient subclass]: chose IMKClient_Modern
2024-11-17 20:35:11.723 Python[30396:2837253] +[IMKInputSession subclass]: chose IMKInputSession_Modern


0: 384x640 6 persons, 1 backpack, 63.4ms
Speed: 1.1ms preprocess, 63.4ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 persons, 1 backpack, 51.2ms
Speed: 1.4ms preprocess, 51.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 1 backpack, 36.4ms
Speed: 1.2ms preprocess, 36.4ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 1 backpack, 1 handbag, 36.4ms
Speed: 1.1ms preprocess, 36.4ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 1 backpack, 1 handbag, 36.2ms
Speed: 1.1ms preprocess, 36.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 34.1ms
Speed: 1.1ms preprocess, 34.1ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 35.9ms
Speed: 1.1ms preprocess, 35.9ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 persons, 1 backp