In [10]:
!pip install ultralytics
!pip install opencv-python
!pip install deepface

[0m

In [1]:
!pip install gradio

Collecting gradio
  Obtaining dependency information for gradio from https://files.pythonhosted.org/packages/a5/ba/18ad189474e730baa47697c55afd4ab7c0d19d429232f7b8f771f3fd76d5/gradio-5.4.0-py3-none-any.whl.metadata
  Downloading gradio-5.4.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Obtaining dependency information for aiofiles<24.0,>=22.0 from https://files.pythonhosted.org/packages/c5/19/5af6804c4cc0fed83f47bff6e413a98a36618e7d40185cd36e69737f3b0e/aiofiles-23.2.1-py3-none-any.whl.metadata
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Obtaining dependency information for fastapi<1.0,>=0.115.2 from https://files.pythonhosted.org/packages/57/95/4c5b79e7ca1f7b372d16a32cad7c9cc6c3c899200bed8f45739f4415cfae/fastapi-0.115.3-py3-none-any.whl.metadata
  Downloading fastapi-0.115.3-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Obtaining dependency information for ffm

In [None]:
import cv2 as cv
import numpy as np
from ultralytics import YOLO
from deepface import DeepFace
import random
import time

# Load YOLO model for object detection
model = YOLO("yolo11n.pt")  # Adjust path if using another YOLO model

# COCO class names (trimmed for brevity, expand as needed)
COCO_CLASSES = ["person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", 
                "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", 
                "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", 
                "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", 
                "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", 
                "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", 
                "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", 
                "chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", 
                "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", 
                "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", 
                "toothbrush"]

# Items and emotions contributing to the fall score
FALL_OBJECTS = {"cup": 10, "orange": 10, "cat": 8, "dog": 8, "bottle": 8, "wine glass": 8, "bowl": 6, "teddy bear": 6,
                "vase": 6, "umbrella": 5, "handbag": 4, "backpack": 3, "bench": 2, "chair": 3, "couch": 4,
                "potted plant": 5, "book": 7, "clock": 2, "tv": 2}
FALL_EMOTIONS = {"sad": 10, "neutral": 5}

# Load leaf images and print dimensions for verification
leaf_images = []
for i in range(1, 5):
    leaf = cv.imread(f'leaves/leaf{i}.png', cv.IMREAD_UNCHANGED)
    if leaf is not None:
        leaf_images.append(leaf)
    else:
        print(f"Failed to load leaf{i}.png")

# Replicate leaves
num_leaves = 10
if len(leaf_images) < num_leaves:
    # Repeat the leaf images to reach the desired number of leaves
    leaf_images = leaf_images * (num_leaves // len(leaf_images)) + leaf_images[:num_leaves % len(leaf_images)]

# Now `leaf_images` should have exactly `num_leaves` elements
print(f"Number of leaves in use: {len(leaf_images)}")

# Initialize video capture
# Initialize video capture
cap = cv.VideoCapture(0)
cap.set(cv.CAP_PROP_FRAME_WIDTH, 800)
cap.set(cv.CAP_PROP_FRAME_HEIGHT, 600)

# Initialize leaf positions to be distributed evenly across the frame width
frame_width = int(cap.get(cv.CAP_PROP_FRAME_WIDTH))
segment_width = frame_width // num_leaves

leaf_positions = [
    {"x": random.randint(i * segment_width, (i + 1) * segment_width - 1), "y": random.randint(0, 20)}
    for i in range(num_leaves)
]

def animate_leaves(frame):
    """ Animate falling leaves on the frame. """
    for i, leaf in enumerate(leaf_images):
        leaf_pos = leaf_positions[i]
        x, y = leaf_pos["x"], leaf_pos["y"]
        h, w, _ = leaf.shape

        # Ensure the leaf stays within the frame width
        if x + w > frame.shape[1]:
            x = frame.shape[1] - w

        # Ensure the leaf stays within the frame height
        if y + h > frame.shape[0]:
            y = frame.shape[0] - h

        # Debugging: Print leaf position and size
        print(f"Overlaying leaf {i}: position ({x}, {y}), size ({h}, {w})")

        # Overlay the leaf image on the frame
        overlay_image_alpha(frame, leaf, x, y, leaf)

        # Update leaf position for the next frame to fall faster
        leaf_pos["y"] += random.randint(4, 10)  # Increased speed to make leaves more noticeable

        # Reset leaf position if it moves off-screen
        if y > frame.shape[0]:
            leaf_pos["y"] = random.randint(-30, -10)  # Reduced reset range for quicker visibility
            leaf_pos["x"] = random.randint(i * segment_width, (i + 1) * segment_width - w)

def get_class_label(class_id):
    if 0 <= class_id < len(COCO_CLASSES):
        return COCO_CLASSES[class_id]
    else:
        return "Unknown"

def analyze_emotion(face_img):
    try:
        result = DeepFace.analyze(face_img, actions=['emotion'], enforce_detection=False)
        if isinstance(result, list):
            result = result[0]
        dominant_emotion = result["dominant_emotion"]
        confidence = result["emotion"][dominant_emotion]
        return dominant_emotion, confidence
    except Exception as e:
        print(f"DeepFace error: {e}")
        return "No emotion detected", 0.0

def calculate_fall_score(detected_objects, dominant_emotion):
    fall_score = 0
    for obj in detected_objects:
        label = obj["label"]
        if label in FALL_OBJECTS:
            fall_score += FALL_OBJECTS[label]
    if dominant_emotion in FALL_EMOTIONS:
        fall_score += FALL_EMOTIONS[dominant_emotion]
    return fall_score

def overlay_image_alpha(background, overlay, x, y, alpha_mask):
    """ Overlay `overlay` onto `background` at position (x, y) with an alpha mask. """
    h, w, _ = overlay.shape
    background_h, background_w, _ = background.shape

    if x < 0 or y < 0:
        return

    if x + w > background_w:
        w = background_w - x
    if y + h > background_h:
        h = background_h - y

    if w <= 0 or h <= 0:
        return

    overlay_image = overlay[:h, :w, :3]
    mask = alpha_mask[:h, :w, 3] / 255.0

    for c in range(3):
        background[y:y+h, x:x+w, c] = (1.0 - mask) * background[y:y+h, x:x+w, c] + mask * overlay_image[:, :, c]

# Function to draw multi-line text on the frame
def draw_multiline_text(frame, text, font, font_scale, color, thickness, line_spacing=1.5):
    """ Draw multi-line text centered on an OpenCV frame. """
    # Split the text into lines
    lines = text.split('\n')

    # Get the frame dimensions
    frame_height, frame_width, _ = frame.shape

    # Calculate the total height of all lines combined
    total_text_height = int(len(lines) * font_scale * 30 * line_spacing)

    # Calculate the y-coordinate to start the text block so that it is centered vertically
    y_start = (frame_height - total_text_height) // 2

    for i, line in enumerate(lines):
        # Calculate the width of each line to center it horizontally
        text_size = cv.getTextSize(line, font, font_scale, thickness)[0]
        x = (frame_width - text_size[0]) // 2

        # Calculate the y-coordinate for each line
        y = y_start + int(i * font_scale * 30 * line_spacing)

        # Draw the line on the frame
        cv.putText(frame, line, (x, y), font, font_scale, color, thickness, cv.LINE_AA)

def process_results(results, frame):
    detected_objects = []
    dominant_emotion = "neutral"
    frame_height, frame_width, _ = frame.shape

    for box in results[0].boxes:
        class_id = int(box.cls)
        label = get_class_label(class_id)
        x_min, y_min, x_max, y_max = map(int, box.xyxy[0])
        confidence = box.conf
        detected_objects.append({
            "label": label,
            "coordinates": (x_min, y_min, x_max, y_max),
            "confidence": confidence
        })
        
        cv.rectangle(frame, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)
        cv.putText(frame, label, (x_min, max(y_min - 10, 0)), cv.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)

        if label == "person":
            face_img = frame[y_min:y_max, x_min:x_max]
            if face_img.size == 0:
                continue
            emotion_label, confidence = analyze_emotion(face_img)
            dominant_emotion = emotion_label
            text_x, text_y = max(x_min, 0), min(y_max + 30, frame_height - 10)
            cv.putText(frame, f"Emotion: {emotion_label} ({confidence:.2f})", 
                       (text_x, text_y), 
                       cv.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 255), 2)
    
    fall_score = calculate_fall_score(detected_objects, dominant_emotion)
    
    if fall_score > 25:
        draw_multiline_text(
            frame, 
            "It seems like the 'Hoa Hoa Hoa Season' has started for you.\nGrab some snacks, a blanket, and enjoy a timeless masterpiece!", 
            font=cv.FONT_HERSHEY_SIMPLEX, 
            font_scale=1.2, 
            color=(0, 0, 255), 
            thickness=3, 
            line_spacing=1.5  # Adjust line spacing as needed
        )
        cv.imshow("Emotion Recognition and Fall Score", frame)
        cv.waitKey(0)
        return detected_objects, fall_score

    cv.putText(frame, f"Fall Score: {fall_score}", 
               (10, 30), 
               cv.FONT_HERSHEY_SIMPLEX, 0.8, (0, 165, 255), 2)

    animate_leaves(frame)

    return detected_objects, fall_score

# Start time for the 3-second delay
start_time = time.time()
max_duration = 5

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Measure elapsed time
    elapsed_time = time.time() - start_time
    if elapsed_time < max_duration:
        # Display the live feed for the defined period without calculating the fall score
        cv.imshow("Emotion Recognition and Fall Score", frame)

        # Break if 'q' is pressed
        if cv.waitKey(1) & 0xFF == ord('q'):
            break
        continue
    
    # Once 3 seconds have passed, process the frame
    results = model(frame)
    detected_objects, fall_score = process_results(results, frame)
    
    # Show the final frame with labels, emotions, and fall score
    cv.imshow("Emotion Recognition and Fall Score", frame)

    # Introduce a delay to control the frame rate
    key = cv.waitKey(33)  # Approximately 30 FPS (1000 ms / 30)
    if key == ord('q'):
        break

cap.release()
cv.destroyAllWindows()

Number of leaves in use: 10

0: 512x640 1 person, 93.1ms
Speed: 6.0ms preprocess, 93.1ms inference, 7.0ms postprocess per image at shape (1, 3, 512, 640)
Overlaying leaf 0: position (87, 19), size (100, 100)
Overlaying leaf 1: position (192, 19), size (100, 100)
Overlaying leaf 2: position (382, 19), size (100, 100)
Overlaying leaf 3: position (570, 19), size (100, 100)
Overlaying leaf 4: position (747, 8), size (100, 100)
Overlaying leaf 5: position (1054, 5), size (100, 100)
Overlaying leaf 6: position (1148, 19), size (100, 100)
Overlaying leaf 7: position (1353, 10), size (100, 100)
Overlaying leaf 8: position (1534, 14), size (100, 100)
Overlaying leaf 9: position (1603, 12), size (100, 100)

0: 512x640 1 person, 1 banana, 70.8ms
Speed: 2.6ms preprocess, 70.8ms inference, 0.5ms postprocess per image at shape (1, 3, 512, 640)
Overlaying leaf 0: position (87, 28), size (100, 100)
Overlaying leaf 1: position (192, 23), size (100, 100)
Overlaying leaf 2: position (382, 24), size (100,