In [2]:
import cv2
import mediapipe as mp
import numpy as np
from collections import deque

mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1)
mp_drawing = mp.solutions.drawing_utils

# to smoothen the predictions
expression_history = deque(maxlen=10)

# calibrate
neutral_mouth_ratio = None
neutral_brow_ratio = None

def get_points(landmarks, image_shape):
    ih, iw = image_shape
    return [(int(lm.x * iw), int(lm.y * ih)) for lm in landmarks.landmark]

# for detecting expression 
def Detect_express(landmarks, image_shape, calibrate=False):
    points = get_points(landmarks, image_shape)

    # manipulation of 468 landmarks for expression
    left_mouth = points[61]
    right_mouth = points[291]
    upper_lip = points[13]
    lower_lip = points[14]
    left_eyebrow = points[105]
    right_eyebrow = points[334]
    left_eye_top = points[159]
    left_eye_bottom = points[145]
    right_eye_top = points[386]
    right_eye_bottom = points[374]
    nose_tip = points[1]
    chin_tip = points[152]

    # cal the distances
    mouth_width = np.linalg.norm(np.array(left_mouth) - np.array(right_mouth))
    mouth_openness = np.linalg.norm(np.array(upper_lip) - np.array(lower_lip))
    brow_eye_dist = abs(left_eyebrow[1] - left_eye_top[1])
    left_eye_openness = np.linalg.norm(np.array(left_eye_top) - np.array(left_eye_bottom))
    right_eye_openness = np.linalg.norm(np.array(right_eye_top) - np.array(right_eye_bottom))

    # normalization
    ratio = mouth_openness / mouth_width
    brow_ratio = brow_eye_dist / mouth_width

    # adaption of calibration
    if calibrate:
        return ratio, brow_ratio 
    
    # comparison b/w calibrated and non-calibrated
    mouth_change = ratio - neutral_mouth_ratio
    brow_change = brow_ratio - neutral_brow_ratio


    # New logic for thinking gesture: one eyebrow raised compared to the other
    brow_diff = abs(left_eyebrow[1] - right_eyebrow[1])
    brow_diff_threshold = 8  # tweak this threshold for sensitivity

    # --- Yawning detection ---
    # Yawning: very wide mouth open (mouth openness ratio above threshold)

    yawning_threshold = 0.25
    if ratio > yawning_threshold:
        return "Yawning "

    # --- Eyebrow raise detection ---
    # Eyebrow raise: brow_eye_dist noticeably bigger than neutral
    eyebrow_raise_threshold = 0.1
    if brow_change > eyebrow_raise_threshold:
        return "Eyebrow Raised "

    # Existing expression logic
    if mouth_change > 0.12 and brow_change > 0.05:
        return "Surprised"
    elif mouth_change > 0.08:
        return "Happy"
    elif brow_change < -0.03:
        return "Angry"
    elif (left_eyebrow[1] < right_eyebrow[1] and brow_diff > brow_diff_threshold) or \
         (right_eyebrow[1] < left_eyebrow[1] and brow_diff > brow_diff_threshold):
        # One eyebrow higher than the other => Think
        return "Thinking ..."
    elif chin_tip[1] - nose_tip[1] > 30 and mouth_change < 0.02:
        return "Sad"
    else:
        return "Neutral"

# start capturing
cap = cv2.VideoCapture(0)

# Set camera resolution to 1280x720
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)

calibrated = False
print("CALIBRATION: Please keep a neutral face...")

neutral_mouths = []
neutral_brows = []

frame_count = 0
calibration_frames = 60

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Resize frame to 1280x720 if camera does not support native resolution
    frame = cv2.resize(frame, (1280, 720))

    # mirror
    frame = cv2.flip(frame, 1)
    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = face_mesh.process(rgb)

    if results.multi_face_landmarks:
        face_landmarks = results.multi_face_landmarks[0]

        # Calculate bounding box
        points = get_points(face_landmarks, frame.shape[:2])
        xs = [p[0] for p in points]
        ys = [p[1] for p in points]
        x_min, x_max = min(xs), max(xs)
        y_min, y_max = min(ys), max(ys)

        # Draw bounding box around the face
        cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (255, 0, 0), 2)

        if not calibrated: 
            m_ratio, b_ratio = Detect_express(face_landmarks, frame.shape[:2], calibrate=True)
            neutral_mouths.append(m_ratio)
            neutral_brows.append(b_ratio)

            frame_count += 1
            cv2.putText(frame, "Calibrating... Keep a neutral face", (30, 50),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)

            if frame_count >= calibration_frames:
                neutral_mouth_ratio = np.mean(neutral_mouths)
                neutral_brow_ratio = np.mean(neutral_brows)
                calibrated = True
                print("Calibration completed.")
            
            # Show calibration frame and wait
            cv2.imshow("Facial Expression Recognition", frame)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
            continue  # Skip further processing until calibration is done

        # detecting expressions of calibrated face
        expression = Detect_express(face_landmarks, frame.shape[:2])
        expression_history.append(expression)

        # smooth prediction
        most_common = max(set(expression_history), key=expression_history.count)

        # Draw expression label connected to bounding box

        label = f"Expression: {most_common}"

        # Get text size
        (text_width, text_height), baseline = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2)

        # Set label background rectangle coordinates (above bounding box)
        label_y_min = max(0, y_min - text_height - baseline - 10)  # 10 pixels padding above bbox
        label_y_max = y_min
        label_x_min = x_min
        label_x_max = x_min + text_width + 10  # 10 pixels padding right

        # Draw filled rectangle for label background
        cv2.rectangle(frame, (label_x_min, label_y_min), (label_x_max, label_y_max), (255, 0, 0), cv2.FILLED)

        # Put expression text over the rectangle
        cv2.putText(frame, label, (x_min + 5, y_min - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)

        # draw face mesh
        mp_drawing.draw_landmarks(
            frame, face_landmarks, mp_face_mesh.FACEMESH_TESSELATION,
            mp_drawing.DrawingSpec(color=(0,255,0), thickness=1, circle_radius=1)
        )

    # show frame
    cv2.imshow("Facial Expression Recognition", frame)

    # key to quit


    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


CALIBRATION: Please keep a neutral face...
Calibration completed.
