In [None]:
## 🧠 Head Pose Estimation Trong Jupyter Lab

In [8]:
import pickle
import numpy as np
import cv2
import ipywidgets as widgets
from IPython.display import display, Image as IPImage, clear_output
import io
import insightface
from numpy.linalg import norm

# Khởi tạo InsightFace model
try:
    print("🔧 Initializing InsightFace model (buffalo_l, CPU)...")
    model = insightface.app.FaceAnalysis(name='buffalo_l', providers=['CPUExecutionProvider'])
    model.prepare(ctx_id=-1)  # CPU
    print("✅ InsightFace model loaded successfully!")
except Exception as e:
    print(f"⚠️ Error loading InsightFace model: {e}")
    print("Please ensure 'insightface' and 'onnxruntime' are installed: `pip install insightface onnxruntime`")
    raise

# Hàm tính head pose từ landmarks và vẽ visualization lên ảnh
def estimate_head_pose(landmarks, img, img_size):
    """
    Ước lượng head pose (pitch, yaw, roll) từ landmarks và vẽ landmarks + axes lên ảnh.
    Input: landmarks (3D, shape 68x3), img (ảnh gốc), kích thước ảnh (height, width)
    Output: (pitch, yaw, roll) in degrees, và ảnh đã vẽ
    """
    try:
        # Các điểm 3D model (tương ứng với các điểm chính trên khuôn mặt trung bình)
        model_points = np.array([
            [0.0, 0.0, 0.0],             # Nose tip (30)
            [0.0, -330.0, -65.0],        # Chin (8)
            [-225.0, 170.0, -135.0],     # Left eye left corner (36)
            [225.0, 170.0, -135.0],      # Right eye right corner (45)
            [-150.0, -150.0, -125.0],    # Left Mouth corner (48)
            [150.0, -150.0, -125.0]      # Right mouth corner (54)
        ], dtype="double")

        # Các điểm 2D tương ứng từ landmarks 3D (lấy x,y)
        image_points = np.array([
            landmarks[30][:2],  # Nose tip
            landmarks[8][:2],   # Chin
            landmarks[36][:2],  # Left eye left corner
            landmarks[45][:2],  # Right eye right corner
            landmarks[48][:2],  # Left mouth corner
            landmarks[54][:2]   # Right mouth corner
        ], dtype="double")

        # Camera nội tại (giả định đơn giản)
        focal_length = img_size[1]
        center = (img_size[1]/2, img_size[0]/2)
        camera_matrix = np.array([
            [focal_length, 0, center[0]],
            [0, focal_length, center[1]],
            [0, 0, 1]
        ], dtype="double")

        # Giả định không có biến dạng thấu kính
        dist_coeffs = np.zeros((4,1))

        # Giải bài toán PnP
        success, rotation_vector, translation_vector = cv2.solvePnP(
            model_points, image_points, camera_matrix, dist_coeffs
        )

        if success:
            # Chuyển rotation vector sang rotation matrix
            rmat, _ = cv2.Rodrigues(rotation_vector)
            
            # Tính Euler angles sử dụng atan2 để tránh gimbal lock và normalize tốt hơn
            # Pitch (θ): nghiêng dọc
            pitch = np.arctan2(-rmat[2,0], np.sqrt(rmat[0,0]**2 + rmat[1,0]**2)) * 180 / np.pi
            
            # Yaw (ψ): xoay ngang
            yaw = np.arctan2(rmat[1,0], rmat[0,0]) * 180 / np.pi
            
            # Roll (φ): nghiêng ngang
            roll = np.arctan2(rmat[2,1], rmat[2,2]) * 180 / np.pi
            
            # Normalize các góc về [-180, 180] nếu cần (tránh wrap-around)
            if pitch > 180:
                pitch -= 360
            elif pitch < -180:
                pitch += 360
            if yaw > 180:
                yaw -= 360
            elif yaw < -180:
                yaw += 360
            if roll > 180:
                roll -= 360
            elif roll < -180:
                roll += 360
            
            # Vẽ landmarks (các điểm mốc chính) lên ảnh
            for point in image_points:
                cv2.circle(img, (int(point[0]), int(point[1])), 3, (0, 0, 255), -1)  # Vòng tròn đỏ
            
            # Vẽ axes (trục pose) từ mũi (nose tip)
            nose_tip = (int(image_points[0][0]), int(image_points[0][1]))
            
            # Chiếu các điểm 3D axes lên 2D (scale 100 để dễ nhìn)
            axis_points, _ = cv2.projectPoints(np.array([
                [100.0, 0.0, 0.0],    # X-axis (red - yaw/right)
                [0.0, 100.0, 0.0],    # Y-axis (green - pitch/up)
                [0.0, 0.0, 100.0]     # Z-axis (blue - forward)
            ]), rotation_vector, translation_vector, camera_matrix, dist_coeffs)
            
            # Vẽ axes với màu chuẩn
            p1 = nose_tip
            # X-axis (red - yaw)
            p2 = (int(axis_points[0][0][0]), int(axis_points[0][0][1]))
            cv2.arrowedLine(img, p1, p2, (0, 0, 255), 2)  # Red
            # Y-axis (green - pitch)
            p2 = (int(axis_points[1][0][0]), int(axis_points[1][0][1]))
            cv2.arrowedLine(img, p1, p2, (0, 255, 0), 2)  # Green
            # Z-axis (blue - forward)
            p2 = (int(axis_points[2][0][0]), int(axis_points[2][0][1]))
            cv2.arrowedLine(img, p1, p2, (255, 0, 0), 2)  # Blue
            
            return pitch, yaw, roll, img
        else:
            return None, None, None, img
    except Exception as e:
        print(f"⚠️ Error in head pose estimation: {e}")
        return None, None, None, img

# Widget cho head pose estimation
uploader_pose = widgets.FileUpload(accept='image/*', multiple=False, description='Upload Ảnh')
pose_button = widgets.Button(description='Ước Lượng Head Pose', button_style='success')
pose_output = widgets.Output()

def on_pose_button_clicked(b):
    with pose_output:
        clear_output()
        if not uploader_pose.value:
            print("⚠️ Vui lòng upload ảnh!")
            return
        try:
            file_info = uploader_pose.value[0]
            image_bytes = file_info['content']
            img = cv2.imdecode(np.frombuffer(image_bytes, np.uint8), cv2.IMREAD_COLOR)
            if img is None:
                print("⚠️ Không thể đọc ảnh!")
                return
            faces = model.get(img)
            if len(faces) == 0:
                print("⚠️ Không phát hiện khuôn mặt trong ảnh.")
                return
            
            # Lấy landmarks và tính head pose + vẽ lên ảnh
            landmarks = faces[0].landmark_3d_68  # Sử dụng 3D landmarks 68 points
            img_size = img.shape[:2]  # (height, width)
            pitch, yaw, roll, visualized_img = estimate_head_pose(landmarks, img, img_size)
            
            # Chuyển ảnh visualized sang bytes để hiển thị
            _, buffer = cv2.imencode('.png', visualized_img)
            visualized_bytes = buffer.tobytes()
            
            # Hiển thị ảnh đã vẽ
            display(IPImage(data=visualized_bytes, width=300))
            if pitch is not None:
                print(f"🎯 Head Pose:")
                print(f"   Pitch (nghiêng dọc): {pitch:.2f}°")
                print(f"   Yaw (xoay ngang): {yaw:.2f}°")
                print(f"   Roll (nghiêng ngang): {roll:.2f}°")
            else:
                print("⚠️ Không thể ước lượng head pose.")
            
            # Reset uploader
            uploader_pose.value = ()
        except Exception as e:
            print(f"⚠️ Error processing image: {e}")

pose_button.on_click(on_pose_button_clicked)

# Hiển thị widget
print("\n🧠 Ước lượng Head Pose từ ảnh:")
display(widgets.VBox([uploader_pose, pose_button, pose_output]))

🔧 Initializing InsightFace model (buffalo_l, CPU)...
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /Users/hoangtrung/.insightface/models/buffalo_l/1k3d68.onnx landmark_3d_68 ['None', 3, 192, 192] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /Users/hoangtrung/.insightface/models/buffalo_l/2d106det.onnx landmark_2d_106 ['None', 3, 192, 192] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /Users/hoangtrung/.insightface/models/buffalo_l/det_10g.onnx detection [1, 3, '?', '?'] 127.5 128.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /Users/hoangtrung/.insightface/models/buffalo_l/genderage.onnx genderage ['None', 3, 96, 96] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /Users/hoangtrung/.insightface/mo

VBox(children=(FileUpload(value=(), accept='image/*', description='Upload Ảnh'), Button(button_style='success'…

In [None]:
import pickle
import numpy as np
import cv2
import ipywidgets as widgets
from IPython.display import display, Image as IPImage, clear_output
import io
import insightface
from numpy.linalg import norm

# Khởi tạo InsightFace model
try:
    print("🔧 Initializing InsightFace model (buffalo_l, CPU)...")
    model = insightface.app.FaceAnalysis(name='buffalo_l', providers=['CPUExecutionProvider'])
    model.prepare(ctx_id=-1)  # CPU
    print("✅ InsightFace model loaded successfully!")
except Exception as e:
    print(f"⚠️ Error loading InsightFace model: {e}")
    print("Please ensure 'insightface' and 'onnxruntime' are installed: `pip install insightface onnxruntime`")
    raise

# Lớp PoseEstimator từ code tham khảo
class PoseEstimator:
    """Estimate head pose according to the facial landmarks"""

    def __init__(self, image_width, image_height):
        """Init a pose estimator.

        Args:
            image_width (int): input image width
            image_height (int): input image height
        """
        self.size = (image_height, image_width)
        self.model_points_68 = self._get_full_model_points()

        # Camera internals
        self.focal_length = self.size[1]
        self.camera_center = (self.size[1] / 2, self.size[0] / 2)
        self.camera_matrix = np.array(
            [[self.focal_length, 0, self.camera_center[0]],
             [0, self.focal_length, self.camera_center[1]],
             [0, 0, 1]], dtype="double")

        # Assuming no lens distortion
        self.dist_coeefs = np.zeros((4, 1))

        # Rotation vector and translation vector
        self.r_vec = np.array([[0.01891013], [0.08560084], [-3.14392813]])
        self.t_vec = np.array(
            [[-14.97821226], [-10.62040383], [-2053.03596872]])

    def _get_full_model_points(self):
        """Get all 68 3D model points (hardcoded from model.txt)"""
        raw_value = [
            -73.393523, -89.31522, 69.48,
            -72.775015, -87.47549, 52.95,
            -78.202742, -75.51543, 43.22,
            -77.849368, -68.25236, 28.9,
            -84.465767, -66.2853, 14.3,
            -85.636795, -62.60433, -4.79,
            -83.646607, -50.824226, -29.43,
            -81.9452, -42.12885, -43.1,
            -77.486112, -29.393612, -52.43,
            -78.638795, -21.982149, -46.22,
            -79.062126, -11.8819, -50.54,
            -78.753606, -3.995317, -46.72,
            -80.9039, 6.0479, -40.3,
            -81.246819, 14.532797, -43.14,
            -80.16907, 25.147775, -35.69,
            -77.058449, 30.77057, -42.19,
            -74.221957, 38.799888, -25.33,
            0.0, 48.22903, -30.0,
            72.775015, 87.47549, 52.95,
            78.202742, 75.51543, 43.22,
            77.849368, 68.25236, 28.9,
            84.465767, 66.2853, 14.3,
            85.636795, 62.60433, -4.79,
            83.646607, 50.824226, -29.43,
            81.9452, 42.12885, -43.1,
            77.486112, 29.393612, -52.43,
            78.638795, 21.982149, -46.22,
            79.062126, 11.8819, -50.54,
            78.753606, 3.995317, -46.72,
            80.9039, -6.0479, -40.3,
            81.246819, -14.532797, -43.14,
            80.16907, -25.147775, -35.69,
            77.058449, -30.77057, -42.19,
            74.221957, -38.799888, -25.33,
            0.0, -48.22903, -30.0,
            36.845203, 65.246994, 47.58,
            24.229742, 55.074575, 56.61,
            11.971302, 50.004105, 61.5,
            0.0, 45.0, 66.0,
            -11.971302, 50.004105, 61.5,
            -24.229742, 55.074575, 56.61,
            -36.845203, 65.246994, 47.58,
            29.680221, 20.650688, 66.49,
            18.775135, 13.113989, 70.63,
            9.226971, 6.391162, 75.14,
            0.0, 0.0, 80.0,
            -9.226971, 6.391162, 75.14,
            -18.775135, 13.113989, 70.63,
            -29.680221, 20.650688, 66.49,
            37.389622, -3.124168, 55.5,
            24.474024, -6.189816, 59.76,
            12.229014, -3.160895, 66.25,
            0.0, 0.0, 70.0,
            -12.229014, -3.160895, 66.25,
            -24.474024, -6.189816, 59.76,
            -37.389622, -3.124168, 55.5,
            22.755451, -18.931895, 45.9,
            14.2635, -23.070902, 55.13,
            0.0, -27.0, 60.0,
            -14.2635, -23.070902, 55.13,
            -22.755451, -18.931895, 45.9,
            12.482902, -39.47496, 37.17,
            3.733898, -41.321726, 46.31,
            0.0, -43.0, 51.0,
            -3.733898, -41.321726, 46.31,
            -12.482902, -39.47496, 37.17,
            0.0, -73.0, 15.0,
            0.0, -85.0, 0.0,
            0.0, -77.0, -37.0,
            0.0, -63.0, -65.0,
            0.0, -48.0, -84.0,
            0.0, -28.0, -96.0,
            0.0, -8.0, -102.0
        ]
        model_points = np.array(raw_value, dtype=np.float32)
        model_points = np.reshape(model_points, (3, -1)).T

        # Transform the model into a front view.
        model_points[:, 2] *= -1

        return model_points

    def solve(self, points):
        """Solve pose with all the 68 image points
        Args:
            points (np.ndarray): points on image.

        Returns:
            Tuple: (rotation_vector, translation_vector) as pose.
        """
        if self.r_vec is None:
            (_, rotation_vector, translation_vector) = cv2.solvePnP(
                self.model_points_68, points, self.camera_matrix, self.dist_coeefs)
            self.r_vec = rotation_vector
            self.t_vec = translation_vector

        (_, rotation_vector, translation_vector) = cv2.solvePnP(
            self.model_points_68,
            points,
            self.camera_matrix,
            self.dist_coeefs,
            rvec=self.r_vec,
            tvec=self.t_vec,
            useExtrinsicGuess=True)

        return (rotation_vector, translation_vector)

    def visualize(self, image, pose, color=(255, 255, 255), line_width=2):
        """Draw a 3D box as annotation of pose"""
        rotation_vector, translation_vector = pose
        point_3d = []
        rear_size = 75
        rear_depth = 0
        point_3d.append((-rear_size, -rear_size, rear_depth))
        point_3d.append((-rear_size, rear_size, rear_depth))
        point_3d.append((rear_size, rear_size, rear_depth))
        point_3d.append((rear_size, -rear_size, rear_depth))
        point_3d.append((-rear_size, -rear_size, rear_depth))

        front_size = 100
        front_depth = 100
        point_3d.append((-front_size, -front_size, front_depth))
        point_3d.append((-front_size, front_size, front_depth))
        point_3d.append((front_size, front_size, front_depth))
        point_3d.append((front_size, -front_size, front_depth))
        point_3d.append((-front_size, -front_size, front_depth))
        point_3d = np.array(point_3d, dtype=np.float32).reshape(-1, 3)

        # Map to 2d image points
        (point_2d, _) = cv2.projectPoints(point_3d,
                                          rotation_vector,
                                          translation_vector,
                                          self.camera_matrix,
                                          self.dist_coeefs)
        point_2d = np.int32(point_2d.reshape(-1, 2))

        # Draw all the lines
        cv2.polylines(image, [point_2d], True, color, line_width, cv2.LINE_AA)
        cv2.line(image, tuple(point_2d[1]), tuple(point_2d[6]), color, line_width, cv2.LINE_AA)
        cv2.line(image, tuple(point_2d[2]), tuple(point_2d[7]), color, line_width, cv2.LINE_AA)
        cv2.line(image, tuple(point_2d[3]), tuple(point_2d[8]), color, line_width, cv2.LINE_AA)

    def draw_axes(self, img, pose):
        R, t = pose
        img = cv2.drawFrameAxes(img, self.camera_matrix, self.dist_coeefs, R, t, 30)

# Widget cho head pose estimation
uploader_pose = widgets.FileUpload(accept='image/*', multiple=False, description='Upload Ảnh')
pose_button = widgets.Button(description='Ước Lượng Head Pose', button_style='success')
pose_output = widgets.Output()

def on_pose_button_clicked(b):
    with pose_output:
        clear_output()
        if not uploader_pose.value:
            print("⚠️ Vui lòng upload ảnh!")
            return
        try:
            file_info = uploader_pose.value[0]
            image_bytes = file_info['content']
            img = cv2.imdecode(np.frombuffer(image_bytes, np.uint8), cv2.IMREAD_COLOR)
            if img is None:
                print("⚠️ Không thể đọc ảnh!")
                return
            faces = model.get(img)
            if len(faces) == 0:
                print("⚠️ Không phát hiện khuôn mặt trong ảnh.")
                return
            
            # Lấy landmarks (InsightFace trả về 2D or 3D, nhưng code dùng 2D)
            landmarks = faces[0].landmark_3d_68[:, :2]  # Lấy x,y cho 68 points
            
            # Tạo PoseEstimator với size ảnh
            height, width = img.shape[:2]
            pose_estimator = PoseEstimator(width, height)
            
            # Tính pose
            rotation_vector, translation_vector = pose_estimator.solve(landmarks)
            
            # Vẽ visualization
            visualized_img = img.copy()
            pose_estimator.visualize(visualized_img, (rotation_vector, translation_vector))
            pose_estimator.draw_axes(visualized_img, (rotation_vector, translation_vector))
            
            # Tính Euler angles từ rotation_vector
            rmat, _ = cv2.Rodrigues(rotation_vector)
            pitch = np.arctan2(-rmat[2,0], np.sqrt(rmat[0,0]**2 + rmat[1,0]**2)) * 180 / np.pi
            yaw = np.arctan2(rmat[1,0], rmat[0,0]) * 180 / np.pi
            roll = np.arctan2(rmat[2,1], rmat[2,2]) * 180 / np.pi
            
            # Normalize góc về [-180, 180]
            for angle in [pitch, yaw, roll]:
                if angle > 180:
                    angle -= 360
                elif angle < -180:
                    angle += 360
            
            # Chuyển ảnh visualized sang bytes
            _, buffer = cv2.imencode('.png', visualized_img)
            visualized_bytes = buffer.tobytes()
            
            # Hiển thị
            display(IPImage(data=visualized_bytes, width=300))
            print(f"🎯 Head Pose:")
            print(f"   Pitch (nghiêng dọc): {pitch:.2f}°")
            print(f"   Yaw (xoay ngang): {yaw:.2f}°")
            print(f"   Roll (nghiêng ngang): {roll:.2f}°")
            
            # Reset uploader
            uploader_pose.value = ()
        except Exception as e:
            print(f"⚠️ Error processing image: {e}")

pose_button.on_click(on_pose_button_clicked)

# Hiển thị widget
print("\n🧠 Ước lượng Head Pose từ ảnh:")
display(widgets.VBox([uploader_pose, pose_button, pose_output]))

🔧 Initializing InsightFace model (buffalo_l, CPU)...
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /Users/hoangtrung/.insightface/models/buffalo_l/1k3d68.onnx landmark_3d_68 ['None', 3, 192, 192] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /Users/hoangtrung/.insightface/models/buffalo_l/2d106det.onnx landmark_2d_106 ['None', 3, 192, 192] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /Users/hoangtrung/.insightface/models/buffalo_l/det_10g.onnx detection [1, 3, '?', '?'] 127.5 128.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /Users/hoangtrung/.insightface/models/buffalo_l/genderage.onnx genderage ['None', 3, 96, 96] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /Users/hoangtrung/.insightface/mo

VBox(children=(FileUpload(value=(), accept='image/*', description='Upload Ảnh'), Button(button_style='success'…