In [5]:
import cv2
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import img_to_array
import moviepy.editor as mp
import speech_recognition as sr
from transformers import pipeline

# Load the pre-trained models
face_emotion_model = load_model('Facial_emotion_detection.keras')  # Facial emotion model
text_emotion_model = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base")  # Text emotion model

# A helper function to map emotions to categories for facial recognition
def categorize_emotion(emotion_index):
    if emotion_index == 0:  # Positive (Happy)
        return 'Neutral'
    elif emotion_index == 1:  # Negative (Sad, Fear, Anger, Disgust, Surprise)
        return 'Positive'
    else:  # Neutral
        return 'Negative'

# Function to extract audio from video
def extract_audio_from_video(video_path, audio_output_path):
    video_clip = mp.VideoFileClip(video_path)
    video_clip.audio.write_audiofile(audio_output_path)

# Function to convert audio to text
def audio_to_text(audio_path):
    recognizer = sr.Recognizer()
    audio_file = sr.AudioFile(audio_path)
    with audio_file as source:
        audio_data = recognizer.record(source)
    try:
        text = recognizer.recognize_google(audio_data)
        return text
    except sr.UnknownValueError:
        return "Sorry, I couldn't understand the audio."
    except sr.RequestError:
        return "Sorry, there was an error with the speech recognition service."

# Function to get emotion from video using face emotion model
def get_video_emotion(video_path, frame_skip=5):
    cap = cv2.VideoCapture(video_path)
    
    if not cap.isOpened():
        print("Error: Couldn't open the video.")
        return None
    
    emotion_counts = {'Positive': 0, 'Negative': 0, 'Neutral': 0}
    frame_count = 0
    face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

    while True:
        ret, frame = cap.read()
        
        if not ret:
            break  # End of video
        
        frame_count += 1
        if frame_count % frame_skip != 0:
            continue
        
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
        
        if len(faces) == 0:
            continue
        
        for (x, y, w, h) in faces:
            face = frame[y:y+h, x:x+w]
            face_resized = cv2.resize(face, (48, 48))
            face_resized = cv2.cvtColor(face_resized, cv2.COLOR_BGR2GRAY)
            face_resized = face_resized.astype('float32') / 255.0
            face_resized = img_to_array(face_resized)
            face_resized = np.expand_dims(face_resized, axis=0)
            
            prediction = face_emotion_model.predict(face_resized)
            max_index = np.argmax(prediction[0])
            emotion = categorize_emotion(max_index)
            emotion_counts[emotion] += 1
    
    cap.release()
    final_emotion = max(emotion_counts, key=emotion_counts.get)
    return final_emotion

# Function to integrate all models and get final emotion
def get_final_emotion_from_video(video_path):
    # Step 1: Extract audio from video
    audio_output_path = "extracted_audio.wav"
    extract_audio_from_video(video_path, audio_output_path)

    # Step 2: Convert audio to text
    audio_text = audio_to_text(audio_output_path)

    # Step 3: Get emotion from text
    text_emotion = text_emotion_model(audio_text)[0]['label']

    # Step 4: Get emotion from video (face)
    video_emotion = get_video_emotion(video_path)

    # Step 5: Return combined results (you can apply a decision strategy here)
    print(f"Emotion from text: {text_emotion}")
    print(f"Emotion from face: {video_emotion}")

    # Decision logic: You can choose to prioritize either or apply a strategy
    # Here we simply return the most common emotion
    if text_emotion == video_emotion:
        return text_emotion
    else:
        return 'Mixed emotions'  # If there is a mismatch

# Example usage:
video_path = 'video_input.mp4'
final_emotion = get_final_emotion_from_video(video_path)
print(f"Final emotion detected: {final_emotion}")


RuntimeError: No ffmpeg exe could be found. Install ffmpeg on your system, or set the IMAGEIO_FFMPEG_EXE environment variable.