In [None]:
import cv2
import mediapipe as mp

# 初始化MediaPipe Hands模块
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_hands = mp.solutions.hands

# 打开摄像头
cap = cv2.VideoCapture(0)

with mp_hands.Hands(
    model_complexity=0,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5,
    max_num_hands=2
) as hands:
    
    while cap.isOpened():
        success, image = cap.read()
        if not success:
            print("忽略空摄像头帧")
            continue
        
        # 转换颜色空间 BGR to RGB
        image.flags.writeable = False
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        # 处理手势检测
        results = hands.process(image)
        
        # 绘制检测结果
        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        
        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                # 绘制手部关键点和连接线
                mp_drawing.draw_landmarks(
                    image,
                    hand_landmarks,
                    mp_hands.HAND_CONNECTIONS,
                    mp_drawing_styles.get_default_hand_landmarks_style(),
                    mp_drawing_styles.get_default_hand_connections_style()
                )
        
        # 水平翻转图像以获得自拍视图
        image = cv2.flip(image, 1)
        
        # 显示提示信息
        cv2.putText(image, "按 'P' 打印坐标 | ESC退出", (10, 30), 
                   cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
        
        # 显示结果
        cv2.imshow('MediaPipe Hands', image)
        
        key = cv2.waitKey(5)
        # 按ESC退出
        if key & 0xFF == 27:
            break
        # 按P打印坐标
        elif key & 0xFF == ord('p') or key & 0xFF == ord('P'):
            if results.multi_hand_landmarks:
                print("\n=== 手部关键点坐标 ===")
                for hand_idx, hand_landmarks in enumerate(results.multi_hand_landmarks):
                    print(f"\n手 #{hand_idx + 1}:")
                    for landmark_idx, landmark in enumerate(hand_landmarks.landmark):
                        print(f"点 {landmark_idx}: (X: {landmark.x:.4f}, Y: {landmark.y:.4f}, Z: {landmark.z:.4f})")
                print("=====================\n")
            else:
                print("当前帧未检测到手部！")

# 释放资源
cap.release()
cv2.destroyAllWindows()

In [None]:
import cv2
import numpy as np
import mediapipe as mp

# 初始化MediaPipe手部模型
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.5)
mp_drawing = mp.solutions.drawing_utils

# 假设摄像头的焦距（需根据实际摄像头标定，此处为示例值）
FOCAL_LENGTH = 1000  # 单位：像素
REAL_HAND_WIDTH = 0.09  # 成人手掌平均宽度（单位：米，约9cm）

def estimate_depth(image, landmarks):
    # 取手腕（0号点）和中指根部（9号点）的坐标
    wrist = landmarks.landmark[0]
    mid_finger = landmarks.landmark[9]
    
    # 计算两点在图像中的像素距离
    image_height, image_width = image.shape[:2]
    x1, y1 = int(wrist.x * image_width), int(wrist.y * image_height)
    x2, y2 = int(mid_finger.x * image_width), int(mid_finger.y * image_height)
    pixel_distance = np.sqrt((x2 - x1)**2 + (y2 - y1)**2)
    
    # 通过相似三角形估算深度
    depth = (REAL_HAND_WIDTH * FOCAL_LENGTH) / pixel_distance
    return depth, (x1, y1, x2, y2)

# 打开摄像头
cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    
    # 转换为RGB格式（MediaPipe需要）
    image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(image_rgb)
    
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            # 估算深度并获取参考线坐标
            depth, (x1, y1, x2, y2) = estimate_depth(frame, hand_landmarks)
            
            # 在图像上绘制手部关键点和参考线
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
            cv2.line(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            
            # 显示深度值
            cv2.putText(frame, f"Depth: {depth:.2f}m", (10, 30), 
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
    
    # 显示画面
    cv2.imshow("Hand Tracking with Depth Estimation", frame)
    
    # 按ESC退出
    if cv2.waitKey(1) & 0xFF == 27:
        break

# 释放资源
cap.release()
cv2.destroyAllWindows()

T3. 
 A test for calculating the distance for bending angles between all the 10 joints.

In [None]:
import cv2
import numpy as np
import mediapipe as mp

# 初始化MediaPipe手部模型
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.5)

# 定义所有需要标注的关节角度（包括大拇指）
JOINT_ANGLES = [
    (1, 2, 3),   # 大拇指第一关节
    (2, 3, 4),   # 大拇指第二关节
    (5, 6, 7),   # 食指第一关节
    (6, 7, 8),   # 食指第二关节
    (9, 10, 11), # 中指第一关节
    (10, 11, 12),# 中指第二关节
    (13, 14, 15),# 无名指第一关节
    (14, 15, 16),# 无名指第二关节
    (17, 18, 19),# 小指第一关节
    (18, 19, 20) # 小指第二关节
]

# 为每个角度定义偏移量，避免文字重叠
ANGLE_OFFSETS = {
    (1, 2, 3): (-40, 20),   # 大拇指第一关节
    (2, 3, 4): (-40, -20),  # 大拇指第二关节
    (5, 6, 7): (0, 30),     # 食指第一关节
    (6, 7, 8): (0, -30),    # 食指第二关节
    (9, 10, 11): (0, 30),   # 中指第一关节
    (10, 11, 12): (0, -30), # 中指第二关节
    (13, 14, 15): (0, 30),  # 无名指第一关节
    (14, 15, 16): (0, -30), # 无名指第二关节
    (17, 18, 19): (0, 30),  # 小指第一关节
    (18, 19, 20): (0, -30)  # 小指第二关节
}

def calculate_angle(a, b, c):
    """计算三个点之间的夹角（单位：度）"""
    a, b, c = np.array(a), np.array(b), np.array(c)
    ba, bc = a - b, c - b
    cosine_angle = np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc))
    return np.degrees(np.arccos(np.clip(cosine_angle, -1, 1)))

cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # 转换为RGB并处理
    image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(image_rgb)

    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            # 获取所有关键点的像素坐标
            h, w = frame.shape[:2]
            landmarks = [(int(lm.x * w), int(lm.y * h)) for lm in hand_landmarks.landmark]

            # 绘制所有关节角度
            for (i, j, k), offset in zip(JOINT_ANGLES, ANGLE_OFFSETS.values()):
                if j < len(landmarks):
                    angle = calculate_angle(landmarks[i], landmarks[j], landmarks[k])
                    # 应用偏移量避免重叠
                    text_pos = (landmarks[j][0] + offset[0], landmarks[j][1] + offset[1])
                    # 为不同手指使用不同颜色
                    color = (0, 200, 255) if i in [1,2,3,4] else (0, 255, 200)  # 大拇指橙色，其他青色
                    cv2.putText(frame, f"{angle:.0f}°", text_pos, 
                              cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2)

    # 显示画面
    cv2.imshow("Hand Joint Angles", frame)
    if cv2.waitKey(1) & 0xFF == 27:  # ESC退出
        break

cap.release()
cv2.destroyAllWindows()

At this point, I started to think about how to use the angle as a weight for building the gesture recognition sample. The pain point is to figure the recognition for overlapping fingers.

There are multiple ways to think about it. If we are using static images of overlapping fingers, we need the model to predict which finger is on top. If we are using video frames, we may predict from standard finger position to the overlapping state. We may use the video frame of one finger approaching the over finger to train a model, and make the prediction when there is a tendency for this motion.

In [None]:
import cv2
import numpy as np
import mediapipe as mp

# 初始化MediaPipe手部模型
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=True, max_num_hands=1, min_detection_confidence=0.7)

# 指尖关键点索引
FINGER_TIPS = {
    "thumb": 4,
    "index": 8,
    "middle": 12,
    "ring": 16,
    "pinky": 20
}

def are_fingers_overlapping(finger1, finger2, landmarks, img_size, xy_threshold=0.05, z_threshold=0.1):
    """
    判断两根手指是否重叠及上下关系
    :param finger1: 手指1名称（如"index"）
    :param finger2: 手指2名称（如"middle"）
    :param landmarks: MediaPipe输出的手部关键点
    :param img_size: 图像尺寸（宽,高）
    :param xy_threshold: XY平面重叠阈值（归一化距离）
    :param z_threshold: Z轴深度差异阈值
    :return: (是否重叠, 上方手指名称)
    """
    # 获取指尖的归一化坐标
    tip1 = landmarks[FINGER_TIPS[finger1]]
    tip2 = landmarks[FINGER_TIPS[finger2]]
    
    # 计算XY平面距离（像素单位）
    w, h = img_size
    xy_distance = np.sqrt(((tip1.x - tip2.x) * w)**2 + ((tip1.y - tip2.y) * h)**2)
    xy_distance_normalized = xy_distance / max(w, h)
    
    # 如果XY距离超过阈值，直接返回不重叠
    if xy_distance_normalized > xy_threshold:
        return False, None
    
    # 比较Z轴深度（注意：MediaPipe的Z轴值越小越近）
    if tip1.z < tip2.z - z_threshold:
        return True, finger1  # finger1在上
    elif tip2.z < tip1.z - z_threshold:
        return True, finger2  # finger2在上
    else:
        return True, None  # 重叠但无法区分上下

# 示例：处理摄像头帧
cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    
    # 转换为RGB并处理
    image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(image_rgb)
    
    if results.multi_hand_landmarks:
        hand_landmarks = results.multi_hand_landmarks[0]
        img_size = (frame.shape[1], frame.shape[0])
        
        # 检测食指和中指是否重叠
        is_overlap, top_finger = are_fingers_overlapping(
            "index", "middle", hand_landmarks.landmark, img_size
        )
        
        # 在图像上显示结果
        if is_overlap:
            text = f"Overlap: {top_finger} on top" if top_finger else "Overlap: Uncertain"
            cv2.putText(frame, text, (50, 50), 
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    
    cv2.imshow("Finger Overlap Detection", frame)
    if cv2.waitKey(1) & 0xFF == 27:
        break

cap.release()
cv2.destroyAllWindows()

我能不能结合xy平面，xz平面，和zy平面进行推算？比如yz平面接近、zx平面其次的时候交叠就产生了。

In [None]:
import cv2
import numpy as np
import mediapipe as mp

# 初始化MediaPipe
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=True, max_num_hands=1, min_detection_confidence=0.7)

# 指尖及关键关节点索引
FINGER_TIPS = {
    'thumb': 4, 'index': 8, 'middle': 12, 'ring': 16, 'pinky': 20
}
FINGER_BASES = {
    'thumb': 2, 'index': 5, 'middle': 9, 'ring': 13, 'pinky': 17
}

def get_finger_bbox(finger_name, landmarks, img_size):
    """获取手指的包围盒（矩形区域）"""
    tip = landmarks[FINGER_TIPS[finger_name]]
    base = landmarks[FINGER_BASES[finger_name]]
    w, h = img_size
    
    # 计算包围盒坐标（扩大10%避免边缘误差）
    x_min = min(tip.x, base.x) * w * 0.9
    x_max = max(tip.x, base.x) * w * 1.1
    y_min = min(tip.y, base.y) * h * 0.9
    y_max = max(tip.y, base.y) * h * 1.1
    
    return (x_min, y_min, x_max, y_max)

def is_overlapping(finger1, finger2, landmarks, img_size):
    """判断两根手指是否空间重叠"""
    # 获取两手指的包围盒
    bbox1 = get_finger_bbox(finger1, landmarks, img_size)
    bbox2 = get_finger_bbox(finger2, landmarks, img_size)
    
    # 检查包围盒交集（AABB碰撞检测）
    no_overlap = (bbox1[2] < bbox2[0] or bbox1[0] > bbox2[2] or 
                  bbox1[3] < bbox2[1] or bbox1[1] > bbox2[3])
    return not no_overlap

def determine_finger_order(finger1, finger2, landmarks):
    """确定哪根手指在上（基于深度和几何关系）"""
    tip1 = landmarks[FINGER_TIPS[finger1]]
    tip2 = landmarks[FINGER_TIPS[finger2]]
    
    # 优先比较Z轴（直接深度信息）
    if tip1.z < tip2.z - 0.05:  # finger1更靠近摄像头
        return finger1
    elif tip2.z < tip1.z - 0.05:  # finger2更靠近摄像头
        return finger2
    
    # 若深度相近，检查手指角度（弯曲的手指更可能在上）
    angle1 = abs(landmarks[FINGER_TIPS[finger1]].y - landmarks[FINGER_BASES[finger1]].y)
    angle2 = abs(landmarks[FINGER_TIPS[finger2]].y - landmarks[FINGER_BASES[finger2]].y)
    return finger1 if angle1 > angle2 else finger2

# 示例：处理单张图片
image = cv2.imread("your_image.jpg")
h, w = image.shape[:2]

# 手部检测
results = hands.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

if results.multi_hand_landmarks:
    landmarks = results.multi_hand_landmarks[0].landmark
    
    # 检测所有可能的手指对
    fingers = list(FINGER_TIPS.keys())
    for i in range(len(fingers)):
        for j in range(i+1, len(fingers)):
            finger1, finger2 = fingers[i], fingers[j]
            
            if is_overlapping(finger1, finger2, landmarks, (w, h)):
                top_finger = determine_finger_order(finger1, finger2, landmarks)
                
                # 在图像上标注结果
                tip_pos = (int(landmarks[FINGER_TIPS[top_finger]].x * w), 
                          int(landmarks[FINGER_TIPS[top_finger]].y * h))
                cv2.putText(image, f"{top_finger} on top", tip_pos,
                           cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2)

cv2.imshow("Result", image)
cv2.waitKey(0)
cv2.destroyAllWindows()

The below code can recognize when index and middle finger are both on the top finger.

In [7]:
import cv2
import numpy as np
import mediapipe as mp

# 初始化MediaPipe
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=True, max_num_hands=1, min_detection_confidence=0.7)

# 指尖及第二关节索引（增强深度鲁棒性）
FINGER_DATA = {
    "thumb": {"tip": 4, "joint": 3},
    "index": {"tip": 8, "joint": 7},
    "middle": {"tip": 12, "joint": 11},
    "ring": {"tip": 16, "joint": 15},
    "pinky": {"tip": 20, "joint": 19}
}

def check_finger_overlap_3d(finger1, finger2, landmarks, img_size, 
                          xy_thresh=0.03, xz_thresh=0.05, yz_thresh=0.05):
    """
    基于三维坐标的多平面重叠检测
    :return: (是否重叠, 上方手指名称)
    """
    # 获取指尖和第二关节的三维坐标（归一化）
    def get_coords(finger):
        tip = landmarks[FINGER_DATA[finger]["tip"]]
        joint = landmarks[FINGER_DATA[finger]["joint"]]
        return (tip.x, tip.y, tip.z), (joint.x, joint.y, joint.z)
    
    (tip1, joint1), (tip2, joint2) = get_coords(finger1), get_coords(finger2)
    
    # 计算多平面距离（归一化）
    w, h = img_size
    xy_dist = np.sqrt((tip1[0] - tip2[0])**2 + (tip1[1] - tip2[1])**2)  # XY平面
    xz_dist = np.sqrt((tip1[0] - tip2[0])**2 + (tip1[2] - tip2[2])**2)  # XZ平面
    yz_dist = np.sqrt((tip1[1] - tip2[1])**2 + (tip1[2] - tip2[2])**2)  # YZ平面
    
    # 检查是否在多平面接近
    is_close_xy = xy_dist < xy_thresh
    is_close_xz = xz_dist < xz_thresh
    is_close_yz = yz_dist < yz_thresh
    
    # 判定逻辑：至少两个平面接近且XY平面必须接近
    if is_close_xy and (is_close_xz or is_close_yz):
        # 计算两根手指的均值深度（指尖+关节）
        z1 = (tip1[2] + joint1[2]) / 2
        z2 = (tip2[2] + joint2[2]) / 2
        return True, finger1 if z1 < z2 else finger2
    return False, None

# 可视化
def draw_overlap_result(frame, finger1, finger2, is_overlap, top_finger):
    text = f"{finger1}-{finger2}: "
    if is_overlap:
        text += f"Overlap ({top_finger} on top)" if top_finger else "Overlap (Uncertain)"
    else:
        text += "No overlap"
    cv2.putText(frame, text, (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2)

# 主循环
cap = cv2.VideoCapture(0)
while cap.isOpened():
    ret, frame = cap.read()
    if not ret: break
    
    image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(image_rgb)
    
    if results.multi_hand_landmarks:
        hand_landmarks = results.multi_hand_landmarks[0]
        img_size = (frame.shape[1], frame.shape[0])
        
        # 检测食指与中指（示例）
        is_overlap, top_finger = check_finger_overlap_3d(
            "index", "middle", hand_landmarks.landmark, img_size
        )
        draw_overlap_result(frame, "Index", "Middle", is_overlap, top_finger)
    
    cv2.imshow("3D Finger Overlap Detection", frame)
    if cv2.waitKey(1) & 0xFF == 27: break

cap.release()
cv2.destroyAllWindows()

KeyboardInterrupt: 

In [None]:
import numpy as np
import cv2
import mediapipe as mp
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import joblib
import os

# 初始化MediaPipe手部模型
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=True, max_num_hands=1, min_detection_confidence=0.7)

# 配置参数
DATA_DIR = "finger_data"  # 数据存储目录
MODEL_PATH = "finger_overlap_model.pkl"  # 模型保存路径
FINGER_PAIRS = [("index", "middle"), ("thumb", "index")]  # 要检测的手指组合
TEST_SIZE = 0.2  # 测试集比例
RANDOM_STATE = 42

# 手指关键点索引配置
FINGER_CONFIG = {
    "thumb": [1, 2, 3, 4],   # 大拇指1-4号关键点
    "index": [5, 6, 7, 8],    # 食指
    "middle": [9, 10, 11, 12], # 中指
    "ring": [13, 14, 15, 16],  # 无名指
    "pinky": [17, 18, 19, 20]  # 小指
}

def extract_features(hand_landmarks, finger1, finger2):
    """从MediaPipe结果中提取两个手指的特征"""
    # 获取两个手指的所有关键点（3D坐标）
    f1_points = [hand_landmarks.landmark[i] for i in FINGER_CONFIG[finger1]]
    f2_points = [hand_landmarks.landmark[i] for i in FINGER_CONFIG[finger2]]
    
    # 计算特征
    features = []
    
    # 1. 指尖距离特征
    tip1 = f1_points[-1]  # 指尖是每个手指的最后一个关键点
    tip2 = f2_points[-1]
    features.extend([
        tip1.x - tip2.x,  # X轴差值
        tip1.y - tip2.y,  # Y轴差值
        tip1.z - tip2.z   # Z轴差值
    ])
    
    # 2. 手指方向向量差异
    vec1 = np.array([f1_points[-1].x - f1_points[0].x, 
                    f1_points[-1].y - f1_points[0].y,
                    f1_points[-1].z - f1_points[0].z])
    vec2 = np.array([f2_points[-1].x - f2_points[0].x,
                    f2_points[-1].y - f2_points[0].y,
                    f2_points[-1].z - f2_points[0].z])
    features.extend(vec1 - vec2)
    
    # 3. 包围盒重叠特征
    def get_bbox(points):
        x = [p.x for p in points]
        y = [p.y for p in points]
        return min(x), max(x), min(y), max(y)
    
    f1_bbox = get_bbox(f1_points)
    f2_bbox = get_bbox(f2_points)
    features.extend([
        max(0, min(f1_bbox[1], f2_bbox[1]) - max(f1_bbox[0], f2_bbox[0])),  # X轴重叠
        max(0, min(f1_bbox[3], f2_bbox[3]) - max(f1_bbox[2], f2_bbox[2]))   # Y轴重叠
    ])
    
    return features

def collect_data(num_samples=1000):
    """收集训练数据"""
    if not os.path.exists(DATA_DIR):
        os.makedirs(DATA_DIR)
    
    # 初始化数据存储
    X = []
    y_overlap = []
    y_top = []
    
    cap = cv2.VideoCapture(0)
    sample_count = 0
    
    print(f"正在收集数据，需要{num_samples}个样本...")
    print("请摆出不同手指重叠/不重叠的手势，按's'保存当前帧，按'q'退出")
    
    while sample_count < num_samples:
        ret, frame = cap.read()
        if not ret:
            continue
            
        # 显示提示
        cv2.putText(frame, f"Collected: {sample_count}/{num_samples}", (10, 30),
                   cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
        cv2.putText(frame, "Press 's' to save, 'q' to quit", (10, 60),
                   cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
        
        # 检测手部
        image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = hands.process(image_rgb)
        
        if results.multi_hand_landmarks:
            hand_landmarks = results.multi_hand_landmarks[0]
            
            # 可视化手部关键点
            mp.solutions.drawing_utils.draw_landmarks(
                frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
            
            cv2.imshow("Data Collection", frame)
            
            key = cv2.waitKey(1)
            if key == ord('s'):  # 保存样本
                for f1, f2 in FINGER_PAIRS:
                    features = extract_features(hand_landmarks, f1, f2)
                    X.append(features)
                    
                    # 手动标注（实际应用中应该自动或半自动标注）
                    print(f"当前手指对: {f1} vs {f2}")
                    overlap = int(input("是否重叠？(0/1): "))
                    y_overlap.append(overlap)
                    
                    if overlap:
                        top = int(input(f"哪根手指在上？(0={f1}, 1={f2}): "))
                        y_top.append(top)
                    else:
                        y_top.append(-1)  # 无重叠标记为-1
                
                sample_count += 1
                print(f"已保存样本 {sample_count}/{num_samples}")
                
            elif key == ord('q'):
                break
    
    cap.release()
    cv2.destroyAllWindows()
    
    # 保存数据
    np.savez(os.path.join(DATA_DIR, "finger_data.npz"),
             X=np.array(X), 
             y_overlap=np.array(y_overlap),
             y_top=np.array(y_top))
    print(f"数据已保存到 {DATA_DIR}")

def train_model():
    """训练重叠检测模型"""
    # 加载数据
    data = np.load(os.path.join(DATA_DIR, "finger_data.npz"))
    X = data["X"]
    y_overlap = data["y_overlap"]
    y_top = data["y_top"]
    
    # 划分训练集和测试集
    X_train, X_test, y_train, y_test = train_test_split(
        X, y_overlap, test_size=TEST_SIZE, random_state=RANDOM_STATE)
    
    # 训练重叠检测模型
    overlap_model = RandomForestClassifier(
        n_estimators=100,
        max_depth=10,
        class_weight="balanced",
        random_state=RANDOM_STATE
    )
    overlap_model.fit(X_train, y_train)
    
    # 评估重叠检测
    print("\n重叠检测模型性能:")
    print(classification_report(y_test, overlap_model.predict(X_test)))
    
    # 训练上下关系模型（仅使用重叠样本）
    X_top = X[y_overlap == 1]
    y_top = y_top[y_overlap == 1]
    X_train_top, X_test_top, y_train_top, y_test_top = train_test_split(
        X_top, y_top, test_size=TEST_SIZE, random_state=RANDOM_STATE)
    
    top_model = RandomForestClassifier(
        n_estimators=50,
        max_depth=5,
        class_weight="balanced",
        random_state=RANDOM_STATE
    )
    top_model.fit(X_train_top, y_train_top)
    
    # 评估上下关系检测
    print("\n上下关系模型性能:")
    print(classification_report(y_test_top, top_model.predict(X_test_top)))
    
    # 保存模型
    model = {
        "overlap_model": overlap_model,
        "top_model": top_model,
        "feature_names": ["tip_x_diff", "tip_y_diff", "tip_z_diff",
                         "vec_x_diff", "vec_y_diff", "vec_z_diff",
                         "bbox_x_overlap", "bbox_y_overlap"]
    }
    joblib.dump(model, MODEL_PATH)
    print(f"\n模型已保存到 {MODEL_PATH}")

# if __name__ == "__main__":
#     # 第一步：收集数据（如果已有数据可以跳过）
#     # collect_data(num_samples=500)
    
#     # 第二步：训练模型
#     train_model()

FileNotFoundError: [Errno 2] No such file or directory: 'finger_data\\finger_data.npz'