In [2]:
import tensorflow as tf
print("Tensorflow version: ", tf.__version__)
print("CUDA Built: ", tf.test.is_built_with_cuda())
print("GPU: ", tf.config.list_physical_devices("GPU"))

Tensorflow version:  2.10.1
CUDA Built:  True
GPU:  [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [2]:
# Setting memory growth
# By Default, Tensorflow may allocate all GPU memory at once, which can cause issue if 
# you're running multiple GPU applications
# set memory growth tells Tensorflow to only allocate memory as needed, dynamically growing the memory footprint as needed
# This helps avoid out-of-memory errors and allows multiple programs to share GPU efficiently/safely

physical_devices = tf.config.list_physical_devices('GPU')
for gpu in physical_devices:
    tf.config.experimental.set_memory_growth(gpu, True)

In [3]:
# Setting mixed precision

# Mixed precision uses both float16 and float32 during training
# uses float16 where possible 
# keeps critical variables in float32 for numerical stability

#tf.keras.mixed_precision.set_global_policy("mixed_float16")

In [3]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf

from tensorflow.keras.models import Sequential
from tensorflow.keras.models import load_model
from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten, Dense, Dropout, Activation, GlobalAveragePooling2D
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from tensorflow.keras.applications.mobilenet import MobileNet
from tensorflow.keras.applications import EfficientNetB0

from tensorflow.keras.applications.mobilenet import preprocess_input as preprocess_mobile
from tensorflow.keras.applications.efficientnet import preprocess_input as preprocess_efficient


from sklearn.preprocessing import LabelEncoder
from sklearn.utils import class_weight
from sklearn.metrics import f1_score, accuracy_score

import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
import cv2 as cv
import time


### MobileNet V1 Model

In [4]:
# MobileNet Model

mobile = MobileNet(weights='imagenet', input_shape=(224,224,3)) 
# mobile = MobileNet()

In [5]:
# Our Mobil Net Model
mobile_model = Sequential()

In [6]:
c = 0
for layer in mobile.layers[:-5]:
    mobile_model.add(layer)
    c +=1
print(c)

86


In [9]:
trainable_params = np.sum([np.prod(v.get_shape()) for v in mobile_model.trainable_weights])
non_trainable_params = np.sum([np.prod(v.get_shape()) for v in mobile_model.non_trainable_weights])
total_params = trainable_params + non_trainable_params
    
print(trainable_params)
print(non_trainable_params)
print(total_params)

3206976
21888
3228864


In [8]:
mobile_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1 (Conv2D)              (None, 112, 112, 32)      864       
                                                                 
 conv1_bn (BatchNormalizatio  (None, 112, 112, 32)     128       
 n)                                                              
                                                                 
 conv1_relu (ReLU)           (None, 112, 112, 32)      0         
                                                                 
 conv_dw_1 (DepthwiseConv2D)  (None, 112, 112, 32)     288       
                                                                 
 conv_dw_1_bn (BatchNormaliz  (None, 112, 112, 32)     128       
 ation)                                                          
                                                                 
 conv_dw_1_relu (ReLU)       (None, 112, 112, 32)      0

In [10]:
trainable_layers = 50 # To be set as a constant

if trainable_layers == 0: 
    mobile_model.trainable = False
elif trainable_layers == 1:
    mobile_model.trainable = True
elif trainable_layers < 0:
    for layer in mobile_model.layers[:trainable_layers]:
        layer.trainable = False
    for layer in mobile_model.layers[trainable_layers:]:
        layer.trainable = True

# for layer in mobile_model.layers[:-50]:
#     layer.trainable=False


In [10]:
mobile_model.add(GlobalAveragePooling2D())
mobile_model.add(Dropout(0.5, name='dropout_x'))
mobile_model.add(Dense(128, activation='relu', kernel_regularizer=l2(0.001), name='dense_1'))
mobile_model.add(Dropout(0.3, name='dropout_2'))
# mobile_model.add(Dense(7, activation='softmax', name='output'))
mobile_model.add(Dense(7, activation='softmax', name='output', dtype='float32'))

In [11]:
mobile_model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=1e-4), metrics=['accuracy'])

In [12]:
train_path = r'D:/AIML/fer2013/train'

In [13]:
# Apply data augmentation and MobileNet preprocessing to train the data

train_datagen = ImageDataGenerator(
    preprocessing_function = preprocess_mobile,
    rotation_range = 10,
    zoom_range = 0.1,
    width_shift_range = 0.1,
    height_shift_range = 0.1,
    shear_range = 0.1,
    horizontal_flip = True,
    fill_mode = 'nearest',
    validation_split = 0.2
)

val_datagen = ImageDataGenerator(
    preprocessing_function = preprocess_mobile,
    validation_split = 0.2
)

train_generator = train_datagen.flow_from_directory(
    directory = train_path,
    target_size = (224,224),
    batch_size = 32,
    class_mode = 'categorical',
    subset = 'training',
    shuffle = True,
    seed = 42
)

val_generator = val_datagen.flow_from_directory(
    directory = train_path,
    target_size = (224,224),
    batch_size = 32,
    class_mode = 'categorical',
    subset = 'validation',
    shuffle = False,      # Turn shuffle off for validation
    seed = 42
)
    

Found 22968 images belonging to 7 classes.
Found 5741 images belonging to 7 classes.


In [14]:
# Define Class weights

class_weights = class_weight.compute_class_weight(
    class_weight = 'balanced',
    classes = np.unique(train_generator.classes),
    y = train_generator.classes)

In [15]:
class_weights

array([1.02664044, 9.40155546, 1.00095877, 0.56845857, 0.82606819,
       0.84915705, 1.29331607])

In [16]:
class_weights = dict(enumerate(class_weights))

In [17]:
class_weights

{0: 1.0266404434114071,
 1: 9.401555464592715,
 2: 1.0009587727708533,
 3: 0.5684585684585685,
 4: 0.826068191627104,
 5: 0.8491570541259982,
 6: 1.2933160650937552}

In [18]:
# Configuring callbacks

lr_schedule = ReduceLROnPlateau(monitor='val_loss', patience=3, factor=0.5, verbose=1)
early_stop = EarlyStopping(monitor = 'val_loss', patience=7, restore_best_weights=True)


In [19]:
# Training 
Epochs = 30
Verbose = 1

mobile_model.fit( x = train_generator,
                 validation_data = val_generator,
                 epochs = Epochs,
                 verbose = Verbose,
                 class_weight = class_weights,
                 callbacks = [lr_schedule, early_stop]
                )

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 15: ReduceLROnPlateau reducing learning rate to 4.999999873689376e-05.
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 19: ReduceLROnPlateau reducing learning rate to 2.499999936844688e-05.
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 22: ReduceLROnPlateau reducing learning rate to 1.249999968422344e-05.
Epoch 23/30


<keras.callbacks.History at 0x29d6eaba130>

In [20]:
mobile_model.save('mobile_model_saved.keras')  # Keras native format


In [14]:
# Load the model

mobile_model_load = load_model('mobile_model_saved.keras')

In [15]:
test_path = r'D:/AIML/fer2013/test'

In [16]:
test_batches = ImageDataGenerator(
    preprocessing_function = preprocess_mobile).flow_from_directory(
    directory = test_path,
    target_size=(224,224),
    batch_size=10,
    shuffle=False,
)

Found 7178 images belonging to 7 classes.


In [17]:
test_labels = test_batches.classes

In [18]:
predictions = mobile_model_load.predict(x=test_batches, verbose=0)

In [19]:
predicted_labels = np.argmax(predictions, axis=1)

In [20]:
# | Average Type       | Meaning                                                                                                                              | Use When                                                                      |
# | ------------------ | ------------------------------------------------------------------------------------------------------------------------------------ | ----------------------------------------------------------------------------- |
# | `'macro'`          | Compute F1 for each class **independently**, then take the **unweighted mean**. Treats all classes equally, regardless of frequency. | Classes are imbalanced, but you want **equal importance** given to each class |
# | `'micro'`          | Aggregate total true positives, false negatives, and false positives before calculating F1.                                          | You care about **overall accuracy**, not per-class performance                |
# | `'weighted'`       | Compute F1 per class, then take the **weighted mean by support (number of true instances)**.                                         | When classes are imbalanced, but you want to **weight by frequency**          |
# | `None` or `'none'` | Returns F1 score **per class** (as an array)                                                                                         | You want class-specific insights                                              |


f1_score_cal = f1_score(test_labels, predicted_labels, average='weighted')

In [21]:
f1_score_cal

0.6626569327935868

### EfficientNet B0 Model

In [4]:
# Image Data Generator
train_path = r'D:/AIML/fer2013/train'
train_datagen_eff = ImageDataGenerator(
    preprocessing_function = preprocess_efficient,
    rotation_range = 15,
    zoom_range = 0.15,
    width_shift_range= 0.1,
    height_shift_range= 0.1,
    shear_range = 0.1,
    horizontal_flip = True,
    fill_mode = 'nearest',
    validation_split = 0.2
)


train_generator_eff = train_datagen_eff.flow_from_directory(
    train_path,
    target_size = (224,224),
    batch_size = 8,
    class_mode = 'categorical',
    subset = 'training',
    shuffle = True,
    seed = 42
)

val_generator_eff = train_datagen_eff.flow_from_directory(
    train_path,
    target_size = (224,224),
    batch_size = 8,
    class_mode = 'categorical',
    subset ='validation',
    shuffle=False,
    seed = 42
)
    
    

Found 22968 images belonging to 7 classes.
Found 5741 images belonging to 7 classes.


In [5]:
# Compute class weight

class_weights_arr = class_weight.compute_class_weight(
    class_weight = 'balanced',
    classes = np.unique(train_generator_eff.classes),
    y = train_generator_eff.classes
)

In [6]:
class_weights_eff = dict(enumerate(class_weights_arr))

In [7]:
# Build the Model
base_model = EfficientNetB0(weights = 'imagenet', include_top = False, input_shape=(224,224,3))
base_model.trainable = True

In [8]:
effnet_model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dropout(0.4),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(7, activation='softmax', dtype='float32')
])
    

In [9]:
effnet_model.compile( loss='categorical_crossentropy', optimizer = Adam(learning_rate=1e-4), metrics=['accuracy'])

In [10]:
# Callbacks
early_stop_eff = EarlyStopping(monitor='val_loss', patience=7, restore_best_weights=True)
lr_schedule_eff = ReduceLROnPlateau(monitor='val_loss', patience=3, factor=0.5, verbose=1)

In [11]:
effnet_model.fit(
    x = train_generator_eff,
    validation_data = val_generator_eff,
    epochs = 30,
    class_weight = class_weights_eff,
    callbacks = [early_stop_eff, lr_schedule_eff]
)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 13: ReduceLROnPlateau reducing learning rate to 4.999999873689376e-05.
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 16: ReduceLROnPlateau reducing learning rate to 2.499999936844688e-05.
Epoch 17/30


<keras.callbacks.History at 0x203f4a02100>

In [24]:
effnet_model.fit(
    x = train_generator_eff,
    validation_data = val_generator_eff,
    epochs = 30,
    class_weight = class_weights_eff,
    callbacks = [early_stop_eff, lr_schedule_eff]
)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 5: ReduceLROnPlateau reducing learning rate to 1.249999968422344e-05.
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 8: ReduceLROnPlateau reducing learning rate to 6.24999984211172e-06.
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 11: ReduceLROnPlateau reducing learning rate to 3.12499992105586e-06.


<keras.callbacks.History at 0x28f27762520>

In [12]:
#effnet_model.save('effnet_model_saved.keras')  # Keras native format
effnet_model.save_weights("effnet_model_saved.h5")

In [5]:
# Check afterwards

# Rebuild same architecture
base_model_loaded = EfficientNetB0(include_top=False, input_shape=(224, 224, 3), weights='imagenet')
base_model_loaded.trainable = True  # Or use .trainable = False if you want to freeze

effnet_model_loaded = Sequential([
    base_model_loaded,
    GlobalAveragePooling2D(),
    Dropout(0.4),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(7, activation='softmax', dtype='float32')
])

# Compile the model again
effnet_model_loaded.compile(
    optimizer=Adam(learning_rate=1e-4),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Load weights
effnet_model_loaded.load_weights("effnet_model_saved.h5")

In [6]:
#testing 

test_path = r'D:/AIML/fer2013/test'
test_batches_eff = ImageDataGenerator(
    preprocessing_function = preprocess_efficient).flow_from_directory(
    directory = test_path,
    target_size=(224,224),
    batch_size=10,
    shuffle=False,
)

Found 7178 images belonging to 7 classes.


In [9]:
test_labels_eff = test_batches_eff.classes

In [10]:
predictions_eff = effnet_model_loaded.predict(x=test_batches_eff, verbose=0)

In [11]:
predicted_labels_eff = np.argmax(predictions_eff, axis=1)

In [12]:
f1_score_cal_eff = f1_score(test_labels_eff, predicted_labels_eff, average='weighted')

In [13]:
f1_score_cal_eff

0.6537826501281392

### Ensemble Metrics

In [22]:
# Ensure matching Order

assert np.array_equal( test_batches_eff.filenames, test_batches.filenames), "Image Order Mismatch!!"

In [23]:
# Ensemble Predictions

ensemble_preds = (predictions_eff + predictions)/2.0

In [24]:
ensemble_labels = np.argmax(ensemble_preds, axis = 1)

In [25]:
# Accuracy and F1-score
acc_mobile = accuracy_score( test_labels_eff, predicted_labels)
acc_eff = accuracy_score( test_labels_eff, predicted_labels_eff)
acc_ensemble = accuracy_score( test_labels_eff, ensemble_labels)

In [26]:
f1_mobile = f1_score( test_labels_eff, predicted_labels, average='macro')
f1_effnet = f1_score ( test_labels_eff, predicted_labels_eff, average='macro')
f1_ensemble = f1_score( test_labels_eff, ensemble_labels, average='macro')

In [27]:
# Print Results

print('Testing Accuracy...........\n')
print( f"Mobile Net Accuracy: {acc_mobile * 100:.2f}%")
print( f"Efficient Net Accuracy: {acc_eff * 100:.2f}%")
print( f"Ensemble Accuracy: {acc_ensemble * 100:.2f}%")

print('\nMacro F1 Score............\n')
print(f"Mobile Net : {f1_mobile:.4f}")
print(f"Efficient Net: {f1_effnet:.4f}")
print(f"Ensemble: {f1_ensemble:.4f}")

Testing Accuracy...........

Mobile Net Accuracy: 66.72%
Efficient Net Accuracy: 65.71%
Ensemble Accuracy: 68.04%

Macro F1 Score............

Mobile Net : 0.6361
Efficient Net: 0.6339
Ensemble: 0.6552


### Ensemble Live Prediction

In [2]:
# Emotion classes
emotion_labels = ["Angry", "Disgust", "Fear", "Happy", "Neutral", "Sad", "Surprise"]

In [3]:
# Loading the models
# Mobile Net
mobile_model_load = load_model('mobile_model_saved.keras')

# Efficient Net

# Rebuild same architecture
base_model_loaded = EfficientNetB0(include_top=False, input_shape=(224, 224, 3), weights='imagenet')
base_model_loaded.trainable = True  # Or use .trainable = False if you want to freeze

effnet_model_loaded = Sequential([
    base_model_loaded,
    GlobalAveragePooling2D(),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(7, activation='softmax', dtype='float32')
])

# Compile the model again
effnet_model_loaded.compile(
    optimizer=Adam(learning_rate=1e-4),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Load weights
effnet_model_loaded.load_weights("effnet_model_saved.h5")



In [8]:


# Start Webcam
cap = cv.VideoCapture(0)

# Haar Cascade for face detection
face_cascade = cv.CascadeClassifier(cv.data.haarcascades + 'haarcascade_frontalface_default.xml')

print("Starting Webcam.....press 'q' to quit.")

pTime = 0
overall_emotion = {'Angry':0, 'Disgust':0, 'Fear':0, 'Happy':0, 'Neutral':1, 'Sad':0, 'Surprise':0}

while True:
    ret, frame = cap.read()
    if not ret:
        print("Frame capture failed")
        break


    # Convert to grayscale for face detection
    gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)

    # Detect Face
    faces = face_cascade.detectMultiScale( gray, scaleFactor=1.3, minNeighbors = 5)

    for (x,y,w,h) in faces:
        # Extract face Region-of-Interest(ROI) and convert to float32
        face_img = frame[y:y+h, x:x+w]
        face_img_resized = cv.resize(face_img, (224,224)).astype('float32')

        face_batch_mobile = np.expand_dims(face_img_resized.copy(), axis = 0)
        face_batch_effnet = np.expand_dims(face_img_resized.copy(), axis = 0)

        # apply model specific preprocessing
        face_batch_mobilenet = preprocess_mobile(face_batch_mobile)
        face_batch_effnet = preprocess_efficient(face_batch_effnet)

        #predict using both models
        preds_mobile = mobile_model_load.predict(face_batch_mobilenet, verbose=0)
        preds_effnet = effnet_model_loaded.predict(face_batch_effnet, verbose=0)

        #Ensemble Average
        ensemble_pred = (preds_mobile + preds_effnet)/2.0
        print(np.argmax(ensemble_pred))
        emotion = emotion_labels[np.argmax(ensemble_pred)]

        #emotion = emotion_labels[np.argmax(preds_mobile)]

        overall_emotion[emotion] +=1
        most_common_emotion = max(overall_emotion, key = overall_emotion.get)
       # emotion = emotion_labels[np.argmax(preds_effnet)]

        cTime = time.time()
        fps = 1/(cTime - pTime)
        pTime = cTime

        #Draw Box and display emotion
        cv.rectangle(frame, (x,y), (x+w, y+h), (0,255,0), 2)
        cv.putText(frame, emotion, (x, y-10), cv.FONT_HERSHEY_SIMPLEX, 0.9, (36,255,12), 2)
        cv.putText(frame, f'FPS: {int(fps)}', (x, y+h+20), cv.FONT_HERSHEY_PLAIN, 0.9, (36,255,12), 2)
        cv.putText(frame, f'Overall Emotion: {most_common_emotion}', (x+100, y+h+20), cv.FONT_HERSHEY_PLAIN, 0.9, (36,255,12), 2)

    cv.imshow('RealTime Emotion Detection', frame)
    if cv.waitKey(1) & 0xFF == ord('q'):
        break


cap.release()
cv.destroyAllWindows()



Starting Webcam.....press 'q' to quit.
4
4
4
4
4
5
4
4
4
5
4
4
5
4
4
5
5
4
4
5
4
5
5
4
4
5
3
3
3
3
5
5
3
3
3
4
5
5
3
5
5
5
5
3
3
5
3
3
5
5
5
4
5
5


In [5]:


# Start Webcam
cap = cv.VideoCapture(0)

# Haar Cascade for face detection
face_cascade = cv.CascadeClassifier(cv.data.haarcascades + 'haarcascade_frontalface_default.xml')

print("Starting Webcam.....press 'q' to quit.")

pTime = 0
overall_emotion = {'Angry':0, 'Disgust':0, 'Fear':0, 'Happy':0, 'Neutral':1, 'Sad':0, 'Surprise':0}
frame_count = 0
inference_interval = 5


while True:
    ret, frame = cap.read()
    if not ret:
        print("Frame capture failed")
        break

    if frame_count % inference_interval == 0:
        
        # Convert to grayscale for face detection
        gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
    
        # Detect Face
        faces = face_cascade.detectMultiScale( gray, scaleFactor=1.3, minNeighbors = 5)
    
        for (x,y,w,h) in faces:
            # Extract face Region-of-Interest(ROI) and convert to float32
            face_img = frame[y:y+h, x:x+w]
            face_img_resized = cv.resize(face_img, (224,224)).astype('float32')
    
            face_batch_mobile = np.expand_dims(face_img_resized.copy(), axis = 0)
            face_batch_effnet = np.expand_dims(face_img_resized.copy(), axis = 0)
    
            # apply model specific preprocessing
            face_batch_mobilenet = preprocess_mobile(face_batch_mobile)
            face_batch_effnet = preprocess_efficient(face_batch_effnet)
    
            #predict using both models
            preds_mobile = mobile_model_load.predict(face_batch_mobilenet, verbose=0)
            preds_effnet = effnet_model_loaded.predict(face_batch_effnet, verbose=0)
    
            #Ensemble Average
            ensemble_pred = (preds_mobile + preds_effnet)/2.0
            emotion = emotion_labels[np.argmax(ensemble_pred)]
    
            #emotion = emotion_labels[np.argmax(preds_mobile)]
    
            overall_emotion[emotion] +=1
            most_common_emotion = max(overall_emotion, key = overall_emotion.get)
           # emotion = emotion_labels[np.argmax(preds_effnet)]
    
    cTime = time.time()
    fps = 1/(cTime - pTime)
    pTime = cTime
    
    #Draw Box and display emotion
    cv.rectangle(frame, (x,y), (x+w, y+h), (0,255,0), 2)
    cv.putText(frame, emotion, (x, y-10), cv.FONT_HERSHEY_SIMPLEX, 0.9, (36,255,12), 2)
    cv.putText(frame, f'FPS: {int(fps)}', (x, y+h+20), cv.FONT_HERSHEY_PLAIN, 0.9, (36,255,12), 2)
    cv.putText(frame, f'Overall Emotion: {most_common_emotion}', (x+100, y+h+20), cv.FONT_HERSHEY_PLAIN, 0.9, (36,255,12), 2)


    frame_count += 1
    cv.imshow('RealTime Emotion Detection', frame)
    if cv.waitKey(1) & 0xFF == ord('q'):
        break


cap.release()
cv.destroyAllWindows()



Starting Webcam.....press 'q' to quit.


### Final One

In [5]:
# Start Webcam
cap = cv.VideoCapture(0)

# Haar Cascade for face detection
face_cascade = cv.CascadeClassifier(cv.data.haarcascades + 'haarcascade_frontalface_default.xml')

print("Starting Webcam.....press 'q' to quit.")

pTime = 0
overall_emotion = {label: 0 for label in ['Angry', 'Disgust', 'Fear', 'Happy', 'Neutral', 'Sad', 'Surprise']}
frame_count = 0
inference_interval = 5

# To retain previous prediction
last_emotion = "Detecting..."
last_box = None

while True:
    ret, frame = cap.read()
    if not ret:
        print("Frame capture failed")
        break

    # Always calculate FPS
    cTime = time.time()
    fps = 1 / (cTime - pTime)
    pTime = cTime

    if frame_count % inference_interval == 0:
        # Convert to grayscale and equalize histogram
        gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
        gray = cv.equalizeHist(gray)

        # Detect Face
        faces = face_cascade.detectMultiScale(gray, scaleFactor=1.3, minNeighbors=5)

        if len(faces) > 0:
            (x, y, w, h) = faces[0]  # take only the first face
            face_img = frame[y:y+h, x:x+w]
            face_img_resized = cv.resize(face_img, (224, 224)).astype('float32')

            face_batch_mobile = np.expand_dims(face_img_resized.copy(), axis=0)
            face_batch_effnet = np.expand_dims(face_img_resized.copy(), axis=0)

            # Preprocessing
            face_batch_mobilenet = preprocess_mobile(face_batch_mobile)
            face_batch_effnet = preprocess_efficient(face_batch_effnet)

            # Predict
            preds_mobile = mobile_model_load.predict(face_batch_mobilenet, verbose=0)
            preds_effnet = effnet_model_loaded.predict(face_batch_effnet, verbose=0)

            # Ensemble
            ensemble_pred = (preds_mobile + preds_effnet) / 2.0
            emotion = emotion_labels[np.argmax(ensemble_pred)]

            # Save for next frames
            last_emotion = emotion
            last_box = (x, y, w, h)

            overall_emotion[emotion] += 1

    most_common_emotion = max(overall_emotion, key=overall_emotion.get)

    # Draw previous prediction if available
    if last_box is not None:
        x, y, w, h = last_box
        cv.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
        cv.putText(frame, last_emotion, (x, y-10), cv.FONT_HERSHEY_SIMPLEX, 0.9, (36, 255, 12), 2)
        cv.putText(frame, f'Overall: {most_common_emotion}', (x+100, y+h+20), cv.FONT_HERSHEY_PLAIN, 0.9, (36, 255, 12), 2)

    # Always show FPS
    cv.putText(frame, f'FPS: {int(fps)}', (10, 30), cv.FONT_HERSHEY_PLAIN, 1, (36, 255, 12), 2)

    frame_count += 1
    cv.imshow('RealTime Emotion Detection', frame)

    if cv.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv.destroyAllWindows()


Starting Webcam.....press 'q' to quit.


### Increased FPS

In [6]:
# Initialize video capture
cap = cv.VideoCapture(0)

# Haar cascade for face detection
face_cascade = cv.CascadeClassifier(cv.data.haarcascades + 'haarcascade_frontalface_default.xml')

print("Starting Webcam.....press 'q' to quit.")

pTime = 0
frame_count = 0
inference_interval = 5

# Emotion counters
overall_emotion = {'Angry': 0, 'Disgust': 0, 'Fear': 0, 'Happy': 0, 'Neutral': 1, 'Sad': 0, 'Surprise': 0}
most_common_emotion = 'Neutral'

# Track last detections
last_detections = []  # List of (x, y, w, h, emotion)

while True:
    ret, frame = cap.read()
    if not ret:
        print("Frame capture failed")
        break

    if frame_count % inference_interval == 0:
        gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
        faces = face_cascade.detectMultiScale(gray, scaleFactor=1.3, minNeighbors=5)

        if len(faces) > 0:
            last_detections = []

            for (x, y, w, h) in faces:
                face_img = frame[y:y+h, x:x+w]
                face_img_resized = cv.resize(face_img, (224, 224)).astype('float32')

                face_batch_mobile = np.expand_dims(face_img_resized.copy(), axis=0)
                face_batch_effnet = np.expand_dims(face_img_resized.copy(), axis=0)

                # Apply model-specific preprocessing
                face_batch_mobilenet = preprocess_mobile(face_batch_mobile)
                face_batch_effnet = preprocess_efficient(face_batch_effnet)

                # Get predictions from both models
                preds_mobile = mobile_model_load.predict(face_batch_mobilenet, verbose=0)
                preds_effnet = effnet_model_loaded.predict(face_batch_effnet, verbose=0)

                # Ensemble average
                ensemble_pred = (preds_mobile + preds_effnet) / 2.0
                emotion = emotion_labels[np.argmax(ensemble_pred)]

                # Update emotion tracking
                overall_emotion[emotion] += 1
                most_common_emotion = max(overall_emotion, key=overall_emotion.get)

                last_detections.append((x, y, w, h, emotion))

    # Draw last known detections on every frame
    for (x, y, w, h, emotion) in last_detections:
        cv.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
        cv.putText(frame, emotion, (x, y - 10), cv.FONT_HERSHEY_SIMPLEX, 0.9, (36, 255, 12), 2)
        cv.putText(frame, f'Overall Emotion: {most_common_emotion}', (x + 100, y + h + 20),
                   cv.FONT_HERSHEY_PLAIN, 0.9, (36, 255, 12), 2)

    # FPS calculation
    cTime = time.time()
    fps = 1 / (cTime - pTime) if cTime != pTime else 0
    pTime = cTime
    cv.putText(frame, f'FPS: {int(fps)}', (20, 50), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)

    # Display the frame
    cv.imshow('RealTime Emotion Detection', frame)
    frame_count += 1

    if cv.waitKey(1) & 0xFF == ord('q'):
        break

# Cleanup
cap.release()
cv.destroyAllWindows()


Starting Webcam.....press 'q' to quit.


#### A. Temporal Emotion Smoothing
Keeps a history of the last N=10 predicted emotions.

Displays the most common emotion to reduce jitter.

#### B. Frame Downscaling for Faster Face Detection
Speeds up face detection significantly with no major quality loss.

#### C. Threading for Non-Blocking Emotion Inference
Keeps UI smooth while predictions run in a background thread.

In [8]:
import cv2 as cv
import numpy as np
import time
import threading
from collections import deque, Counter
from queue import Queue

# Your model loading & preprocessing code here
# Example:
# mobile_model_load = ...
# effnet_model_loaded = ...
# preprocess_mobile = ...
# preprocess_efficient = ...
# emotion_labels = ['Angry', 'Disgust', 'Fear', 'Happy', 'Neutral', 'Sad', 'Surprise']

# Initialize webcam
cap = cv.VideoCapture(0)
face_cascade = cv.CascadeClassifier(cv.data.haarcascades + 'haarcascade_frontalface_default.xml')

print("Starting Webcam.....press 'q' to quit.")

# Emotion history
emotion_history = deque(maxlen=10)
overall_emotion = {label: 0 for label in ['Angry', 'Disgust', 'Fear', 'Happy', 'Neutral', 'Sad', 'Surprise']}
overall_emotion['Neutral'] = 1  # Start with some value
most_common_emotion = 'Neutral'

# Threading result queue
results_queue = Queue()
lock = threading.Lock()

# Store last predictions
last_detections = []

def process_faces(frame, faces):
    global last_detections, overall_emotion, most_common_emotion

    detections = []

    for (x, y, w, h) in faces:
        face_img = frame[y:y + h, x:x + w]
        face_img_resized = cv.resize(face_img, (224, 224)).astype('float32')

        face_batch_mobile = np.expand_dims(face_img_resized.copy(), axis=0)
        face_batch_effnet = np.expand_dims(face_img_resized.copy(), axis=0)

        face_batch_mobilenet = preprocess_mobile(face_batch_mobile)
        face_batch_effnet = preprocess_efficient(face_batch_effnet)

        preds_mobile = mobile_model_load.predict(face_batch_mobilenet, verbose=0)
        preds_effnet = effnet_model_loaded.predict(face_batch_effnet, verbose=0)

        ensemble_pred = (preds_mobile + preds_effnet) / 2.0
        raw_emotion = emotion_labels[np.argmax(ensemble_pred)]

        # Add to history for smoothing
        emotion_history.append(raw_emotion)
        smoothed_emotion = Counter(emotion_history).most_common(1)[0][0]

        with lock:
            overall_emotion[smoothed_emotion] += 1
            most_common_emotion = max(overall_emotion, key=overall_emotion.get)

        detections.append((x, y, w, h, smoothed_emotion))

    results_queue.put(detections)

pTime = 0

while True:
    ret, frame = cap.read()
    if not ret:
        print("Frame capture failed")
        break

    # Resize for faster detection
    scale = 0.5
    small_frame = cv.resize(frame, (0, 0), fx=scale, fy=scale)
    gray_small = cv.cvtColor(small_frame, cv.COLOR_BGR2GRAY)
    faces_small = face_cascade.detectMultiScale(gray_small, scaleFactor=1.3, minNeighbors=5)

    # Scale boxes back to original size
    faces = [(int(x / scale), int(y / scale), int(w / scale), int(h / scale)) for (x, y, w, h) in faces_small]

    if len(faces) > 0:
        thread = threading.Thread(target=process_faces, args=(frame.copy(), faces))
        thread.start()

    # Update detection results if thread has finished
    if not results_queue.empty():
        last_detections = results_queue.get()

    # Draw results from last detection
    for (x, y, w, h, emotion) in last_detections:
        cv.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
        cv.putText(frame, emotion, (x, y - 10), cv.FONT_HERSHEY_SIMPLEX, 0.9, (36, 255, 12), 2)
        cv.putText(frame, f'Overall: {most_common_emotion}', (x + 100, y + h + 20),
                   cv.FONT_HERSHEY_PLAIN, 0.9, (36, 255, 12), 2)

    # FPS calculation
    cTime = time.time()
    fps = 1 / (cTime - pTime) if cTime != pTime else 0
    pTime = cTime
    cv.putText(frame, f'FPS: {int(fps)}', (20, 50), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)

    cv.imshow('RealTime Emotion Detection', frame)

    if cv.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv.destroyAllWindows()


Starting Webcam.....press 'q' to quit.


In [1]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from sklearn.metrics import confusion_matrix
import os

def plot_accuracy_loss(history, save_path='accuracy_loss.png'):
    acc = history.history.get('accuracy', [])
    val_acc = history.history.get('val_accuracy', [])
    loss = history.history.get('loss', [])
    val_loss = history.history.get('val_loss', [])
    epochs = range(1, len(acc)+1)

    plt.figure(figsize=(10, 4))

    plt.subplot(1, 2, 1)
    plt.plot(epochs, acc, label='Train Acc')
    plt.plot(epochs, val_acc, label='Val Acc')
    plt.title('Accuracy vs Epochs')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(epochs, loss, label='Train Loss')
    plt.plot(epochs, val_loss, label='Val Loss')
    plt.title('Loss vs Epochs')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()

    plt.tight_layout()
    plt.savefig(save_path)
    plt.close()

def plot_confusion(y_true, y_pred, labels, save_path='confusion_matrix.png'):
    cm = confusion_matrix(y_true, y_pred, labels=labels)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', xticklabels=labels, yticklabels=labels, cmap='Blues')
    plt.title('Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.tight_layout()
    plt.savefig(save_path)
    plt.close()

def plot_fps(before_fps, after_fps, save_path='fps_comparison.png'):
    plt.figure(figsize=(5, 4))
    plt.bar(['Before', 'After'], [before_fps, after_fps], color=['red', 'green'])
    plt.ylabel('FPS')
    plt.title('FPS Improvement')
    plt.tight_layout()
    plt.savefig(save_path)
    plt.close()

def plot_emotion_distribution(emotion_counts, save_path='emotion_distribution.png'):
    """
    emotion_counts: dictionary like {'happy': 5000, 'sad': 4000, ...}
    """
    df = pd.DataFrame(list(emotion_counts.items()), columns=['Emotion', 'Count'])
    plt.figure(figsize=(8, 5))
    sns.barplot(data=df, x='Emotion', y='Count', palette='pastel')
    plt.title('Emotion Distribution')
    plt.tight_layout()
    plt.savefig(save_path)
    plt.close()
