In [3]:
import cv2
import tensorflow as tf
import numpy as np
from datetime import datetime
import os
from google.colab import drive
from ultralytics import YOLO  # YOLOv8 library

# Mount Google Drive
drive.mount('/content/drive')

# Paths
saved_model_path = '/content/drive/MyDrive/movinet2'
input_video_path = '/content/drive/MyDrive/IMG_1195.MOV'
output_video_path = f'/content/drive/MyDrive/Fall/Fall_Detection_{datetime.now().strftime("%Y%m%d_%H%M%S")}.mp4'
yolov8_weights_path = '/content/drive/MyDrive/yolov8n.pt'

# Load the fine-tuned MoviNet model
model = tf.saved_model.load(saved_model_path)
infer = model.signatures["serving_default"]

# Load YOLOv8 model
yolo_model = YOLO(yolov8_weights_path)  # Load YOLOv8 model

# Function to preprocess frames for MoviNet
def preprocess_frame(frame):
    frame = cv2.resize(frame, (224, 224))  # Resize to model input size
    frame = frame / 255.0  # Normalize to range [0, 1]
    frame = np.expand_dims(frame, axis=0)  # Add batch dimension
    frame = np.expand_dims(frame, axis=0)  # Add temporal dimension
    return frame.astype(np.float32)

# Function to detect persons using YOLOv8
def detect_objects(frame):
    """Detect persons in a frame using YOLOv8."""
    results = yolo_model(frame)
    detections = results[0].boxes.xyxy.cpu().numpy()  # Bounding boxes
    confidences = results[0].boxes.conf.cpu().numpy()  # Confidence scores
    class_ids = results[0].boxes.cls.cpu().numpy().astype(int)  # Class IDs
    return [(bbox, conf) for bbox, conf, cls_id in zip(detections, confidences, class_ids) if cls_id == 0]  # Filter persons

# Open the input video
cap = cv2.VideoCapture(input_video_path)
if not cap.isOpened():
    print("Error: Could not open the input video.")
    exit()

fps = int(cap.get(cv2.CAP_PROP_FPS))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Define codec and create VideoWriter
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

# Variables to track fall detection and bounding box
fall_detected = False
tracker = None  # To hold the OpenCV tracker
fall_occurred = False  # To track if fall has already occurred

# Process video frame by frame
frame_number = 0
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Initialize tracker if not already initialized
    if tracker is None and not fall_occurred:
        # Detect persons using YOLOv8
        detections = detect_objects(frame)
        if detections:
            bbox, _ = detections[0]  # Take the first detected person
            x1, y1, x2, y2 = map(int, bbox)
            tracker = cv2.TrackerCSRT_create()  # Robust tracker
            tracker.init(frame, (x1, y1, x2 - x1, y2 - y1))  # Initialize tracker with bounding box

    # Update tracker
    if tracker is not None:
        success, tracked_bbox = tracker.update(frame)
        if success:
            x, y, w, h = map(int, tracked_bbox)
        else:
            tracker = None  # Reset tracker if tracking fails

    # Preprocess the frame for MoviNet
    processed_frame = preprocess_frame(frame)
    try:
        # Run inference
        predictions = infer(image=processed_frame)
        raw_output = predictions['classifier_head_2'].numpy()
        fall_probability = raw_output[0][0]  # Probability for "fall" class
        nofall_probability = raw_output[0][1]  # Probability for "nofall" class
        fall_detected = fall_probability > nofall_probability
    except Exception as e:
        print(f"Error during MoviNet inference: {e}")

    # Once fall is detected, keep the rectangle red and text "FALL DETECTED"
    if fall_detected and not fall_occurred:
        fall_occurred = True  # Mark that fall has occurred

    # Draw bounding box and text
    if tracker is not None:
        color = (0, 0, 255) if fall_occurred else (0, 255, 0)  # Red for fall, green otherwise
        thickness = 5  # Thicker bounding box for better visibility
        label = "FALL DETECTED" if fall_occurred else "Person"
        cv2.rectangle(frame, (x, y), (x + w, y + h), color, thickness)
        # More elegant text: bigger and shadowed
        cv2.putText(frame, label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 1.5, color, 3)

    # Overlay status text with improved design
    status_text = "FALL DETECTED" if fall_occurred else "NO FALL"
    status_color = (0, 0, 255) if fall_occurred else (0, 255, 0)
    # Add shadow to status text for better visibility
    cv2.putText(frame, status_text, (20, 50), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 0), 5, cv2.LINE_AA)
    cv2.putText(frame, status_text, (20, 50), cv2.FONT_HERSHEY_SIMPLEX, 2, status_color, 3, cv2.LINE_AA)

    # Adjust bounding box around the person if necessary (in case the person is outside the box)
    if fall_occurred:
        # Detect the person again if fall occurred and update tracker accordingly
        detections = detect_objects(frame)
        if detections:
            bbox, _ = detections[0]  # Get updated bounding box
            x1, y1, x2, y2 = map(int, bbox)
            # Increase bounding box size to ensure it fully includes the person after falling
            margin = 20  # Add margin to the bounding box
            x1, y1 = max(0, x1 - margin), max(0, y1 - margin)
            x2, y2 = min(width, x2 + margin), min(height, y2 + margin)
            x, y, w, h = x1, y1, x2 - x1, y2 - y1
            tracker = cv2.TrackerCSRT_create()  # Reinitialize the tracker to better fit the new position
            tracker.init(frame, (x, y, w, h))

    # Write the frame to the output video
    out.write(frame)
    frame_number += 1

# Release resources
cap.release()
out.release()

# Report results
print(f"Video saved to: {output_video_path}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).

0: 640x384 1 person, 10.4ms
Speed: 3.8ms preprocess, 10.4ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 person, 19.2ms
Speed: 3.6ms preprocess, 19.2ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 person, 8.0ms
Speed: 4.5ms preprocess, 8.0ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 person, 7.6ms
Speed: 4.9ms preprocess, 7.6ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 2 persons, 8.8ms
Speed: 3.3ms preprocess, 8.8ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 person, 8.3ms
Speed: 5.6ms preprocess, 8.3ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 person, 8.0ms
Speed: 4.5ms preprocess, 8.0ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 384