# 05_Webcam_Demo.ipynb
### Real-time Facial Emotion Recognition Demo (Webcam)
This notebook demonstrates running the webcam demo locally. It uses MTCNN for face detection and a trained Keras model for emotion classification. For best results run this notebook in a local environment (not cloud notebook) where a webcam is available.
If running inside Jupyter, the video window will open via OpenCV (`cv2.imshow`). Press **q** in the video window to quit.

In [1]:
# Install dependencies (run once in the environment)
!pip install mtcnn opencv-python tensorflow pretty_midi numpy


Collecting mtcnn
  Downloading mtcnn-0.1.1-py3-none-any.whl.metadata (5.8 kB)
Collecting opencv-python
  Downloading opencv_python-4.12.0.88-cp37-abi3-macosx_13_0_arm64.whl.metadata (19 kB)
Collecting tensorflow
  Downloading tensorflow-2.20.0-cp39-cp39-macosx_12_0_arm64.whl.metadata (4.5 kB)
Collecting pretty_midi
  Downloading pretty_midi-0.2.11.tar.gz (5.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.6/5.6 MB[0m [31m1.6 MB/s[0m  [33m0:00:03[0m eta [36m0:00:01[0m
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Collecting keras>=2.0.0 (from mtcnn)
  Downloading keras-3.10.0-py3-none-any.whl.metadata (6.0 kB)
Collecting absl-py>=1.0.0 (from tensorflow)
  Downloading absl_py-2.3.1-py3-none-any.whl.metadata (3.3 kB)
Collecting astunparse>=1.6.0 (from tensorflow)
  Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Coll

In [2]:
import os
import time
import cv2
import numpy as np
import tensorflow as tf
from mtcnn.mtcnn import MTCNN
from datetime import datetime
print('OpenCV version:', cv2.__version__)


OpenCV version: 4.12.0


In [3]:
# Parameters - edit as needed
MODEL_PATH = '../models/mobilenet_emotion.h5'   # path to your trained model
DATA_DIR = '../data/cropped_faces'              # used to infer labels (train subfolder names)
INPUT_SIZE = 224                             # model input size
CAMERA_INDEX = 0                             # change if you have multiple cameras
GENERATE_MIDI = False                         # set True to save MIDI when emotion changes
MIDI_OUT_DIR = 'outputs/generated_music'
os.makedirs(MIDI_OUT_DIR, exist_ok=True)


In [4]:
def get_labels_from_train_dir(train_dir):
    if not os.path.isdir(train_dir):
        raise ValueError(f"Train dir not found: {train_dir}")
    labels = sorted([d for d in os.listdir(train_dir) if os.path.isdir(os.path.join(train_dir, d))])
    return labels

labels = get_labels_from_train_dir(os.path.join(DATA_DIR, 'train'))
print('Labels:', labels)


ValueError: Train dir not found: ../data/cropped_faces/train

In [None]:
print('Loading model...')
model = tf.keras.models.load_model(MODEL_PATH)
print('Model loaded.')


In [None]:
detector = MTCNN()
cap = cv2.VideoCapture(CAMERA_INDEX)
prev_label = None
print('Starting webcam. Press q in the video window to quit.')

try:
    while True:
        ret, frame = cap.read()
        if not ret:
            print('Failed to read from camera. Exiting.')
            break
        rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        detections = detector.detect_faces(rgb)
        for det in detections:
            x, y, w, h = det['box']
            x, y = max(0, x), max(0, y)
            face = frame[y:y+h, x:x+w]
            try:
                face_resized = cv2.resize(face, (INPUT_SIZE, INPUT_SIZE))
            except Exception:
                continue
            face_arr = face_resized.astype('float32') / 255.0
            face_arr = np.expand_dims(face_arr, axis=0)
            preds = model.predict(face_arr, verbose=0)
            idx = int(np.argmax(preds))
            prob = float(np.max(preds))
            label = labels[idx] if idx < len(labels) else str(idx)

            color = (0, 255, 0) if prob > 0.6 else (0, 200, 200)
            cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
            cv2.putText(frame, f"{label} {prob:.2f}", (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255,255,255), 2)

            # Optional: save MIDI when emotion changes (if you enabled generation and have emotion_to_midi)
            if GENERATE_MIDI and label != prev_label and prob > 0.6:
                try:
                    from scripts.emotion_to_midi import generate_melody
                    midi_path = os.path.join(MIDI_OUT_DIR, f"{label}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mid")
                    generate_melody(label, length=16, out_path=midi_path)
                    print('Saved MIDI:', midi_path)
                except Exception as e:
                    print('MIDI generation failed:', e)
                prev_label = label

        cv2.imshow('FER Webcam Demo', frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
except KeyboardInterrupt:
    pass
finally:
    cap.release()
    cv2.destroyAllWindows()
    print('Webcam demo ended.')


## Troubleshooting
- If the webcam window doesn't open in your environment, run `python scripts/webcam_demo.py` from a terminal instead of the notebook.
- If MTCNN is slow, consider using OpenCV Haar cascade for face detection as a faster (but less accurate) alternative.
- If model loading fails, ensure `MODEL_PATH` points to a valid Keras `.h5` or SavedModel.
