In [1]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf

from tensorflow.keras.models import Sequential
from tensorflow.keras.models import load_model, Model
from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten, Dense, Dropout, Activation, GlobalAveragePooling2D, Input
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from tensorflow.keras.applications.mobilenet import MobileNet
from tensorflow.keras.applications import EfficientNetB0

from tensorflow.keras.applications.mobilenet import preprocess_input as preprocess_mobile
from tensorflow.keras.applications.efficientnet import preprocess_input as preprocess_efficient


from sklearn.preprocessing import LabelEncoder
from sklearn.utils import class_weight
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score

import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
import cv2 as cv
import time


In [2]:
# Emotion classes
emotion_labels = ["Angry", "Disgust", "Fear", "Happy", "Neutral", "Sad", "Surprise"]

In [3]:
# Loading the models
# Mobile Net
#mobile_model_load = load_model('best_model.h5')




In [4]:
# Check afterwards

# Rebuild same architecture
base_model = EfficientNetB0(include_top=False, input_shape=(224, 224, 3), weights='imagenet')
base_model.trainable = True  # Or use .trainable = False if you want to freeze

x = base_model.output
x = GlobalAveragePooling2D(name='global_avg_pool')(x)
x = Dropout(0.4, name='dropout_x')(x)
# x = Dense(256, activation='relu', kernel_regularizer=l2(0.001), name='dense_1')(x)
# x = Dropout(0.3, name='dropout_2')(x)
x = Dense(128, activation='relu', kernel_regularizer=l2(0.001), name='dense_2')(x)
x = Dropout(0.3, name='dropout_3')(x)
outputs = Dense(7, activation='softmax', name='output', dtype='float32')(x)

base_model_loaded = Model(inputs=base_model.input, outputs=outputs)

# Compile the model again
base_model_loaded.compile(
    optimizer=Adam(learning_rate=1e-4),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Load weights
base_model_loaded.load_weights("effnet_model_saved_weights.h5")

In [5]:
#mobile_model_load = load_model(r'D:\AIML\EmotionRecognition\models\best_model.h5')

In [6]:
# Start Webcam
cap = cv.VideoCapture(0)

# Haar Cascade for face detection
face_cascade = cv.CascadeClassifier(cv.data.haarcascades + 'haarcascade_frontalface_default.xml')

print("Starting Webcam.....press 'q' to quit.")

pTime = 0
overall_emotion = {label: 0 for label in ['Angry', 'Disgust', 'Fear', 'Happy', 'Neutral', 'Sad', 'Surprise']}
frame_count = 0
inference_interval = 1

# To retain previous prediction
last_emotion = "Detecting..."
last_box = None

while True:
    ret, frame = cap.read()
    if not ret:
        print("Frame capture failed")
        break



    if frame_count % inference_interval == 0:
        # Convert to grayscale and equalize histogram
        gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
        gray = cv.equalizeHist(gray)

        # Detect Face
        faces = face_cascade.detectMultiScale(gray, scaleFactor=1.3, minNeighbors=5)

        if len(faces) > 0:
            (x, y, w, h) = faces[0]  # take only the first face
            face_img = frame[y:y+h, x:x+w]
            face_img_resized = cv.resize(face_img, (224, 224)).astype('float32')

            #face_batch_mobile = np.expand_dims(face_img_resized.copy(), axis=0)
            face_batch_eff = np.expand_dims(face_img_resized.copy(), axis=0)

            # Preprocessing
            #face_batch_mobilenet = preprocess_mobile(face_batch_mobile)
            face_batch_effnet = preprocess_efficient(face_batch_eff)

            # Predict
            #preds_mobile = mobile_model_load.predict(face_batch_mobilenet, verbose=0)
            preds_effnet = base_model_loaded.predict(face_batch_effnet, verbose=0)

            # Ensemble
            #ensemble_pred = (preds_mobile + preds_effnet) / 2.0
            #emotion = emotion_labels[np.argmax(preds_mobile)]
            emotion = emotion_labels[np.argmax(preds_effnet)]

            # Save for next frames
            last_emotion = emotion
            last_box = (x, y, w, h)

            overall_emotion[emotion] += 1

    most_common_emotion = max(overall_emotion, key=overall_emotion.get)

    # Draw previous prediction if available
    if last_box is not None:
        x, y, w, h = last_box
        cv.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
        cv.putText(frame, last_emotion, (x, y-10), cv.FONT_HERSHEY_SIMPLEX, 0.9, (36, 255, 12), 2)
        cv.putText(frame, f'Overall: {most_common_emotion}', (x+100, y+h+20), cv.FONT_HERSHEY_PLAIN, 0.9, (36, 255, 12), 2)

    # Always calculate FPS
    cTime = time.time()
    fps = 1 / (cTime - pTime)
    pTime = cTime
    
    # Always show FPS
    cv.putText(frame, f'FPS: {int(fps)}', (10, 30), cv.FONT_HERSHEY_PLAIN, 1, (36, 255, 12), 2)

    frame_count += 1
    cv.imshow('RealTime Emotion Detection', frame)

    if cv.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv.destroyAllWindows()


Starting Webcam.....press 'q' to quit.
