In [None]:
import torch
from ultralytics import YOLO
import cv2

# Load the trained YOLOv8 model (replace with your trained model file)
MODEL_PATH = "../model/100epoch/best.pt"  # Ensure model exists
model = YOLO(MODEL_PATH)

# Open webcam (or replace with a video file path)
VIDEO_SOURCE = 0  # Use "video.mp4" if needed
cap = cv2.VideoCapture(VIDEO_SOURCE)

# Check if video source is opened
if not cap.isOpened():
    print("Error: Could not open video source.")
    exit()

# Loop for real-time detection
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("Error: Could not read frame.")
        break

    # Perform YOLOv8 inference
    results = model(frame)

    # Define confidence threshold
    CONFIDENCE_THRESHOLD = 0.70

    violence_detected = False  # Flag to track if violence is detected

    for r in results:
        for box in r.boxes:
            conf = box.conf[0]  # Confidence score
            cls = int(box.cls[0])  # Class label
    
            if conf >= CONFIDENCE_THRESHOLD:  # Apply threshold
                x1, y1, x2, y2 = map(int, box.xyxy[0])
                label = f"Violence: {conf:.2f}" if cls == 0 else "Non-Violent"
                color = (0, 0, 255) if cls == 0 else (0, 255, 0)
    
                cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
                cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

                # If violence is detected, update flag
                if cls == 0:
                    violence_detected = True

    # Draw a fixed red dot in the bottom right corner if violence is detected
    if violence_detected:
        height, width, _ = frame.shape  # Get frame dimensions
        dot_position = (width - 50, height - 50)  # Bottom-right corner
        cv2.circle(frame, dot_position, 10, (0, 0, 255), -1)  # Red filled circle
        cv2.putText(frame, f"Violence Detected: {conf:.2f}", (width - 400, height - 55), 
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)

    # Display the result
    cv2.imshow("Violence Detection", frame)

    # Exit on 'q' key press
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

# Release resources
cap.release()
cv2.destroyAllWindows()


0: 384x640 1 non_violence, 85.9ms
Speed: 1.9ms preprocess, 85.9ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 non_violence, 150.6ms
Speed: 1.5ms preprocess, 150.6ms inference, 0.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 non_violence, 80.4ms
Speed: 1.1ms preprocess, 80.4ms inference, 0.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 non_violence, 101.2ms
Speed: 7.6ms preprocess, 101.2ms inference, 0.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 non_violence, 78.9ms
Speed: 1.2ms preprocess, 78.9ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 non_violence, 83.9ms
Speed: 1.4ms preprocess, 83.9ms inference, 0.3ms postprocess per image at shape (1, 3, 384, 640)



2025-03-15 18:05:35.025 python[91952:452225] +[IMKClient subclass]: chose IMKClient_Modern
2025-03-15 18:05:35.025 python[91952:452225] +[IMKInputSession subclass]: chose IMKInputSession_Modern


0: 384x640 1 non_violence, 73.1ms
Speed: 1.3ms preprocess, 73.1ms inference, 0.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 non_violence, 78.0ms
Speed: 1.2ms preprocess, 78.0ms inference, 0.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 non_violence, 75.8ms
Speed: 1.1ms preprocess, 75.8ms inference, 0.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 non_violence, 81.1ms
Speed: 1.1ms preprocess, 81.1ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 non_violence, 90.0ms
Speed: 1.9ms preprocess, 90.0ms inference, 0.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 non_violence, 82.7ms
Speed: 1.2ms preprocess, 82.7ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 non_violence, 79.5ms
Speed: 1.7ms preprocess, 79.5ms inference, 0.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 non_violence, 71.4ms
Speed: 1.1ms preprocess, 71.4ms inference,