In [1]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
from ultralytics import YOLO 

def Video_Pro(video_path):
    def object_detection(frame, yolo_model, classes):
        results = yolo_model(frame)
        for result in results:
            bboxes = result.boxes
            b = bboxes.xyxy.cpu().numpy()
            boxes = b.astype(int)
            cls = bboxes.cls
            class_indices = cls.cpu().numpy()
            labels = [result.names[i] for i in class_indices]
        return boxes, labels

    def frames_classifying(frame, model):
        frame = cv2.resize(frame, (224, 224))
        frame = frame / 255.0 
        frame = np.expand_dims(frame, axis=0)
        prediction = model.predict(frame)
        if prediction[0][0] > 0.5:
            return "Violent"
        else:
            return "Non-Violent"
        
    

    yolo_model = YOLO('yolov8n.pt')
    with open("coco.names", "r") as f:
        yolo_classes = [line.strip() for line in f.readlines()]
    # video_capture = cv2.VideoCapture(0)
    cnn_mobilenetv2_model = load_model("/Users/vamshi/Desktop/AI-ML lab/VIOLENCE DETECTION/CNN_mobilenetv2_model.h5")
    
    # Open the webcam
    # cam = Picamera2()
    # cam.preview_configuration.main.size = (640, 360)
    # cam.preview_configuration.main.format = "RGB888"
    # cam.preview_configuration.controls.FrameRate = 30
    # cam.preview_configuration.align()
    # cam.configure("preview")
    # cam.start()
    video_capture = cv2.VideoCapture(video_path)
    
    while True:
        ret, frame = video_capture.read()
        # frame = cam.capture_array()
        if not ret:
            break
        
        detected_boxes, detected_labels = object_detection(frame, yolo_model, yolo_classes)
        
        for bbox, label in zip(detected_boxes, detected_labels):
            x, y, w, h = bbox
            object_frame = frame[y:y+h, x:x+w]
            violence_label = frames_classifying(object_frame, cnn_mobilenetv2_model)
            
            # Draw bounding box and label based on classification
            if violence_label == 'Violent':
                cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 0, 255), 2)
                cv2.putText(frame, 'Violent', (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
                print("Warning: Violent Object Detected!")
            else:
                cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
                cv2.putText(frame, 'Non-Violent', (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        
        cv2.imshow('Violence Detection', frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    
    # cam.stop
    # # vid.release()
    # cv.destroyAllWindows()
    video_capture.release()
    cv2.destroyAllWindows()

# Video_Pro()
Video_Pro('/Users/vamshi/Desktop/AI-ML lab/VIOLENCE DETECTION/test_data/non_aggressive/0.mp4')





0: 384x640 1 person, 66.7ms
Speed: 3.9ms preprocess, 66.7ms inference, 528.9ms postprocess per image at shape (1, 3, 384, 640)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 447ms/step

0: 384x640 1 person, 54.4ms
Speed: 1.2ms preprocess, 54.4ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step

0: 384x640 1 person, 48.3ms
Speed: 1.2ms preprocess, 48.3ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step

0: 384x640 1 person, 54.0ms
Speed: 1.2ms preprocess, 54.0ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step

0: 384x640 1 person, 52.7ms
Speed: 1.4ms preprocess, 52.7ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step

0: 384x640 1 person, 52.6ms
Speed: 1.7ms preprocess, 52.6ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step

0: 384x640 1 person, 62.0ms
Speed: 1.3ms preprocess, 62.0ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step

0: 384x640 1 person, 53.8ms
Speed: 1.5ms preprocess, 53.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)
[1m1/1[0

KeyboardInterrupt: 