In [1]:
def classify_head_pose(yaw, pitch, roll, YAW_THRESHOLD = 20, PITCH_THRESHOLD = 20, ROLL_THRESHOLD = 20):
    """
    Classifies head pose based on yaw, pitch, and roll angles.

    Parameters:
    - yaw (float): Horizontal head rotation (left/right).
    - pitch (float): Vertical head movement (up/down).
    - roll (float): Head tilt (sideways).

    Returns:
    - String representing the classified head pose.
    """

    # Classify yaw (left/right)
    if yaw < -YAW_THRESHOLD:
        return "Looking Down"
    elif yaw > YAW_THRESHOLD:
        return "Looking Up"

    # Classify pitch (up/down)
    if pitch > PITCH_THRESHOLD:
        return "Looking Left"
    elif pitch < -PITCH_THRESHOLD:
        return "Looking Right"

    # Classify roll (head tilting)
    if roll > ROLL_THRESHOLD:
        return "Tilting Left"
    elif roll < -ROLL_THRESHOLD:
        return "Tilting Right"

    # Default case: looking straight
    return "Straight"


In [15]:
!pip install onnxruntime

Collecting onnxruntime
  Downloading onnxruntime-1.23.1-cp313-cp313-macosx_13_0_arm64.whl.metadata (5.0 kB)
Collecting coloredlogs (from onnxruntime)
  Using cached coloredlogs-15.0.1-py2.py3-none-any.whl.metadata (12 kB)
Collecting flatbuffers (from onnxruntime)
  Using cached flatbuffers-25.9.23-py2.py3-none-any.whl.metadata (875 bytes)
Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime)
  Using cached humanfriendly-10.0-py2.py3-none-any.whl.metadata (9.2 kB)
Downloading onnxruntime-1.23.1-cp313-cp313-macosx_13_0_arm64.whl (17.2 MB)
[2K   [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.2/17.2 MB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m[36m0:00:01[0m[36m0:00:01[0m:01[0m
[?25hUsing cached coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)
Using cached humanfriendly-10.0-py2.py3-none-any.whl (86 kB)
Using cached flatbuffers-25.9.23-py2.py3-none-any.whl (30 kB)
Installing collected packages: flatbuffers, humanfriendly, coloredlogs, onnxruntime
[2K   

In [2]:
import insightface
import cv2

# Load the face detection model
detector = insightface.app.FaceAnalysis(name='buffalo_l', providers=['CPUExecutionProvider'])
detector.prepare(ctx_id=0)

# Read an image
img = cv2.imread("faceset/image.jpg")
faces = detector.get(img)
# Get the largest detected face (assuming main subject)
face = max(faces, key=lambda f: f.bbox[2] - f.bbox[0])

# Extract pose values (yaw, pitch, roll)
yaw, pitch, roll = face.pose

# Classify head pose
predicted_class = classify_head_pose(yaw, pitch, roll)

print(predicted_class)

Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /Users/hoangtrung/.insightface/models/buffalo_l/1k3d68.onnx landmark_3d_68 ['None', 3, 192, 192] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /Users/hoangtrung/.insightface/models/buffalo_l/2d106det.onnx landmark_2d_106 ['None', 3, 192, 192] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /Users/hoangtrung/.insightface/models/buffalo_l/det_10g.onnx detection [1, 3, '?', '?'] 127.5 128.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /Users/hoangtrung/.insightface/models/buffalo_l/genderage.onnx genderage ['None', 3, 96, 96] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /Users/hoangtrung/.insightface/models/buffalo_l/w600k_r50.onnx recognition ['None', 3,

In [12]:
!pip install \
  insightface \
  torch \
  torchvision \
  numpy \
  matplotlib \
  tqdm \
  scipy \
#   bcolz \
  easydict \
  opencv-python \
  Pillow \
  scikit-learn \
  tensorboardX \
  mxnet
# Nếu có CUDA 9.0 thì thay dòng mxnet ở trên bằng:
#   mxnet-cu90==1.2.1



Collecting torch
  Using cached torch-2.8.0-cp313-none-macosx_11_0_arm64.whl.metadata (30 kB)
Collecting torchvision
  Downloading torchvision-0.23.0-cp313-cp313-macosx_11_0_arm64.whl.metadata (6.1 kB)
Downloading torch-2.8.0-cp313-none-macosx_11_0_arm64.whl (73.6 MB)
[2K   [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m73.6/73.6 MB[0m [31m781.2 kB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m[36m0:00:03[0m
[?25hDownloading torchvision-0.23.0-cp313-cp313-macosx_11_0_arm64.whl (1.9 MB)
[2K   [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m802.7 kB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m[36m0:00:01[0m
[?25hInstalling collected packages: torch, torchvision
[2K   [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2/2[0m [torchvision][0m [32m1/2[0m [torchvision]
[1A[2KSuccessfully installed torch-2.8.0 torchvision-0.23.0


In [None]:
import cv2
import numpy as np
import threading
import queue

def test_with_camera():
    # Initialize webcam with reduced resolution
    cap = cv2.VideoCapture(0)
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)  # Set width to 640
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)  # Set height to 480
    cap.set(cv2.CAP_PROP_FPS, 30)  # Target 30 FPS (if supported)

    # Frame skipping parameters
    process_every_n_frames = 2  # Process every 2nd frame
    frame_count = 0

    # Queue for thread-safe face detection results
    result_queue = queue.Queue()
    last_face = None

    def face_detection_thread(frame_queue, result_queue):
        while True:
            try:
                rgb_frame = frame_queue.get(timeout=1)
                # Detect faces
                faces = detector.get(rgb_frame)
                result_queue.put(faces if faces else None)
            except queue.Empty:
                continue

    # Start face detection in a separate thread
    frame_queue = queue.Queue(maxsize=1)
    detection_thread = threading.Thread(target=face_detection_thread, args=(frame_queue, result_queue), daemon=True)
    detection_thread.start()

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        frame_count += 1
        # Resize frame for display (optional, if lower resolution is desired for display)
        display_frame = frame  # Can resize further: cv2.resize(frame, (320, 240))

        # Process every nth frame for face detection
        if frame_count % process_every_n_frames == 0:
            # Convert BGR to RGB for face detection
            rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            # Optionally downscale for faster detection
            small_rgb_frame = cv2.resize(rgb_frame, (320, 240))
            
            # Put frame in queue for detection
            if frame_queue.empty():
                frame_queue.put(small_rgb_frame)

        # Check for detection results
        try:
            faces = result_queue.get_nowait()
            if faces:
                last_face = max(faces, key=lambda f: f.bbox[2] - f.bbox[0])
        except queue.Empty:
            pass

        # Use the last detected face (if available)
        if last_face:
            # Scale bounding box back to original frame size (if resized)
            scale_x, scale_y = frame.shape[1] / 320, frame.shape[0] / 240
            x1, y1, x2, y2 = map(int, [last_face.bbox[0] * scale_x, last_face.bbox[1] * scale_y,
                                       last_face.bbox[2] * scale_x, last_face.bbox[3] * scale_y])
            cv2.rectangle(display_frame, (x1, y1), (x2, y2), (0, 255, 0), 2)

            # Extract and classify pose
            yaw, pitch, roll = last_face.pose
            predicted_class = classify_head_pose(yaw, pitch, roll)

            # Display pose and classification (update less frequently to save time)
            if frame_count % process_every_n_frames == 0:
                cv2.putText(display_frame, f"Yaw: {yaw:.2f}, Pitch: {pitch:.2f}, Roll: {roll:.2f}",
                            (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
                cv2.putText(display_frame, f"Pose: {predicted_class}",
                            (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)

        # Show the frame
        cv2.imshow('Real-time Head Pose Classification', display_frame)

        # Press 'q' to quit
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # Cleanup
    cap.release()
    cv2.destroyAllWindows()

# Run the function
if __name__ == "__main__":
    test_with_camera()

: 

In [3]:
# Run webcam test
test_with_camera()

Exception in thread Thread-6 (face_detection_thread):
Traceback (most recent call last):
  File "/opt/homebrew/Cellar/python@3.10/3.10.19/Frameworks/Python.framework/Versions/3.10/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/Users/hoangtrung/Documents/doancheating/.venv/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 772, in run_closure
    _threading_Thread_run(self)
  File "/opt/homebrew/Cellar/python@3.10/3.10.19/Frameworks/Python.framework/Versions/3.10/lib/python3.10/threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "/var/folders/tn/l4vhfvtd4tj07xtfn85qhgm00000gn/T/ipykernel_48753/189718314.py", line 26, in face_detection_thread
NameError: name 'detector' is not defined
