In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Configure necessary libraries

In [2]:
!pip install ultralytics
!pip install mediapipe

Collecting ultralytics
  Downloading ultralytics-8.3.27-py3-none-any.whl.metadata (35 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.10-py3-none-any.whl.metadata (9.4 kB)
Downloading ultralytics-8.3.27-py3-none-any.whl (878 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m879.0/879.0 kB[0m [31m33.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.10-py3-none-any.whl (26 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.3.27 ultralytics-thop-2.0.10
Collecting mediapipe
  Downloading mediapipe-0.10.18-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.7 kB)
Collecting protobuf<5,>=4.25.3 (from mediapipe)
  Downloading protobuf-4.25.5-cp37-abi3-manylinux2014_x86_64.whl.metadata (541 bytes)
Collecting sounddevice>=0.4.4 (from mediapipe)
  Downloading sounddevice-0.5.1-py3-none-any.whl.metadata (1.4 kB)
Downloading mediapipe-0.10.18-cp3

# Setting the Path

In [21]:
"""
# Already completed videos:

Front_T1F01.mp4, Front_T1F02.mp4, Front_T2F01.mp4, Front_T2F02.mp4, Front_T3F01.mp4, Front_T3F02.mp4,
Front_T1N01.mp4, Front_T1N02.mp4, Front_T2N01.mp4, Front_T2N02.mp4, Front_T3N01.mp4, Front_T3N02.mp4,
Front_T2S01.mp4, Front_T2S02.mp4, Front_T3S01.mp4, Frent_T3S02.mp4.

# Processed separately：
(Front_T1S01.mp4, Front_T1S02.mp4),
"""

# 设置视频文件路径
VIDEO_PATH = '/content/drive/MyDrive/Random/Hiroshima/ABC2025/New_Data/Video/Front_T1S01.mp4'  # 视频路径

# 设置保存路径
MARKED_VIDEO_PATH = '/content/drive/MyDrive/Random/Hiroshima/ABC2025/New_Data/Ann_Video/KeyPoint_Ann_video/Front_T1S01.mp4'  # 保存带标记视频的路径
BODY_KEYPOINTS_CSV_PATH = '/content/drive/MyDrive/Random/Hiroshima/ABC2025/New_Data/Body_KeyPoint_CSV/Front_T1S01.csv'  # 保存人体骨架数据的CSV路径

# Main

In [12]:
import cv2
import random
import pandas as pd
from ultralytics import YOLO
import matplotlib.pyplot as plt

In [13]:
# 加载YOLO11模型
# model = YOLO('yolo11n-pose.pt')
model = YOLO('yolo11m-pose.pt')

In [14]:
# 定义骨骼点的名称
keypoint_names = ["nose", "right_eye", "left_eye", "right_ear", "left_ear",
                  "right_shoulder", "left_shoulder", "right_elbow", "left_elbow",
                  "right_wrist", "left_wrist", "right_hip", "left_hip",
                  "right_knee", "left_knee", "right_ankle", "left_ankle"]

In [15]:
# 定义骨架连接顺序
skeleton_map = [
    # 腿部
    {'srt_kpt_id': 15, 'dst_kpt_id': 13, 'color': (255, 182, 193), 'thickness': 2},  # 右脚踝到右膝
    {'srt_kpt_id': 13, 'dst_kpt_id': 11, 'color': (255, 182, 193), 'thickness': 2},  # 右膝到右臀
    {'srt_kpt_id': 16, 'dst_kpt_id': 14, 'color': (255, 182, 193), 'thickness': 2},  # 左脚踝到左膝
    {'srt_kpt_id': 14, 'dst_kpt_id': 12, 'color': (255, 182, 193), 'thickness': 2},  # 左膝到左臀

    # 躯干
    {'srt_kpt_id': 11, 'dst_kpt_id': 12, 'color': (0, 255, 0), 'thickness': 2},       # 右臀到左臀
    {'srt_kpt_id': 5, 'dst_kpt_id': 11, 'color': (0, 255, 0), 'thickness': 2},        # 右肩到右臀
    {'srt_kpt_id': 6, 'dst_kpt_id': 12, 'color': (0, 255, 0), 'thickness': 2},        # 左肩到左臀
    {'srt_kpt_id': 5, 'dst_kpt_id': 6, 'color': (0, 255, 0), 'thickness': 2},         # 右肩到左肩

    # 手臂
    {'srt_kpt_id': 5, 'dst_kpt_id': 7, 'color': (0, 0, 255), 'thickness': 2},         # 右肩到右肘
    {'srt_kpt_id': 6, 'dst_kpt_id': 8, 'color': (0, 0, 255), 'thickness': 2},         # 左肩到左肘
    {'srt_kpt_id': 7, 'dst_kpt_id': 9, 'color': (0, 0, 255), 'thickness': 2},         # 右肘到右腕
    {'srt_kpt_id': 8, 'dst_kpt_id': 10, 'color': (0, 0, 255), 'thickness': 2},        # 左肘到左腕

    # 脸部
    {'srt_kpt_id': 1, 'dst_kpt_id': 2, 'color': (255, 165, 0), 'thickness': 2},       # 右眼到左眼
    {'srt_kpt_id': 0, 'dst_kpt_id': 1, 'color': (255, 165, 0), 'thickness': 2},       # 鼻到右眼
    {'srt_kpt_id': 0, 'dst_kpt_id': 2, 'color': (255, 165, 0), 'thickness': 2},       # 鼻到左眼
    {'srt_kpt_id': 1, 'dst_kpt_id': 3, 'color': (255, 165, 0), 'thickness': 2},       # 右眼到右耳
    {'srt_kpt_id': 2, 'dst_kpt_id': 4, 'color': (255, 165, 0), 'thickness': 2},       # 左眼到左耳
    {'srt_kpt_id': 3, 'dst_kpt_id': 5, 'color': (255, 165, 0), 'thickness': 2},       # 右耳到右肩
    {'srt_kpt_id': 4, 'dst_kpt_id': 6, 'color': (255, 165, 0), 'thickness': 2}        # 左耳到左肩
]

In [16]:
# 定义连接绘制函数
def draw_skeleton(annotated_frame, keypoints, skeleton_map):
    for connection in skeleton_map:
        srt_kpt_id = connection['srt_kpt_id']
        dst_kpt_id = connection['dst_kpt_id']
        color = connection['color']
        thickness = connection['thickness']

        # 获取起始和结束关键点的坐标
        x1, y1 = keypoints.get(f"{keypoint_names[srt_kpt_id]}_x", -1), keypoints.get(f"{keypoint_names[srt_kpt_id]}_y", -1)
        x2, y2 = keypoints.get(f"{keypoint_names[dst_kpt_id]}_x", -1), keypoints.get(f"{keypoint_names[dst_kpt_id]}_y", -1)

        # 检查关键点是否有效 (非 -1 且不在 (0, 0) 位置)
        if (x1 != -1 and y1 != -1 and (x1, y1) != (0, 0)) and (x2 != -1 and y2 != -1 and (x2, y2) != (0, 0)):
            # 画出连接线
            cv2.line(annotated_frame, (int(x1), int(y1)), (int(x2), int(y2)), color, thickness)

In [17]:
# 可视化和保存函数（适用于没有背景干扰的情况）
def visualize_and_save(video_path, body_keypoints_csv_path, marked_video_path):
    cap = cv2.VideoCapture(video_path)
    frame_index = 0
    body_all_keypoints = []

    # 设置视频输出
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    out = cv2.VideoWriter(marked_video_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        annotated_frame = frame.copy()

        # YOLO 进行骨架点检测
        body_results = model(frame)
        for result in body_results:
            keypoints_with_conf = result.keypoints.data.tolist()  # 包含 x, y, conf
            if len(keypoints_with_conf) == 0:
                continue  # 如果没有检测到关键点，跳过
            body_keypoints_with_frame = {"frame": frame_index}
            for i, (x, y, conf) in enumerate(keypoints_with_conf[0]):
                if i < len(keypoint_names):
                    body_keypoints_with_frame[f"{keypoint_names[i]}_x"] = x
                    body_keypoints_with_frame[f"{keypoint_names[i]}_y"] = y
                    body_keypoints_with_frame[f"{keypoint_names[i]}_conf"] = conf
                    cv2.circle(annotated_frame, (int(x), int(y)), 3, (0, 255, 0), -1)
            body_all_keypoints.append(body_keypoints_with_frame)

            # 绘制骨架连线
            draw_skeleton(annotated_frame, body_keypoints_with_frame, skeleton_map)

        out.write(annotated_frame)
        frame_index += 1

    # 保存人体骨架点数据到 CSV 文件
    body_df = pd.DataFrame(body_all_keypoints)
    body_df.to_csv(body_keypoints_csv_path, index=False)

    cap.release()
    out.release()
    print(f"标记视频已保存到 {marked_video_path}")
    print(f"人体骨架数据已保存到 {body_keypoints_csv_path}")

# 提取人体骨架点数据并保存
visualize_and_save(VIDEO_PATH, BODY_KEYPOINTS_CSV_PATH, MARKED_VIDEO_PATH)

# Dealing with background noise

In [19]:
import cv2
from ultralytics import YOLO
from IPython.display import display, Image
import PIL.Image
import io

In [22]:
"""
Only for videos:

Front_T1S01.mp4, Front_T1S02.mp4.
"""


# 加载YOLO11模型
model = YOLO('yolo11m-pose.pt')

# 设置边界框 (左, 上, 右, 下) - 1920x1080
bbox = (300, 0, 1920, 1080)

def show_bbox(video_path, show_frames=1):
    cap = cv2.VideoCapture(video_path)
    frame_index = 0

    while cap.isOpened() and frame_index < show_frames:
        ret, frame = cap.read()
        if not ret:
            break

        annotated_frame = frame.copy()

        # 显示边界框
        cv2.rectangle(annotated_frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2)  # 绿色边界框用于展示

        # 转换为JPG格式
        _, img_encoded = cv2.imencode('.jpg', annotated_frame)
        img_bytes = img_encoded.tobytes()
        img = PIL.Image.open(io.BytesIO(img_bytes))

        # 显示图片
        display(img)

        frame_index += 1

    cap.release()
    print(f"前 {show_frames} 帧已显示完毕")

# 显示前几帧带边界框的视频
show_bbox(VIDEO_PATH)

Output hidden; open in https://colab.research.google.com to view.

In [23]:
# 定义骨骼点的名称
keypoint_names = ["nose", "right_eye", "left_eye", "right_ear", "left_ear",
                  "right_shoulder", "left_shoulder", "right_elbow", "left_elbow",
                  "right_wrist", "left_wrist", "right_hip", "left_hip",
                  "right_knee", "left_knee", "right_ankle", "left_ankle"]

In [24]:
# 定义骨架连接顺序
skeleton_map = [
    # 腿部
    {'srt_kpt_id': 15, 'dst_kpt_id': 13, 'color': (255, 182, 193), 'thickness': 2},  # 右脚踝到右膝
    {'srt_kpt_id': 13, 'dst_kpt_id': 11, 'color': (255, 182, 193), 'thickness': 2},  # 右膝到右臀
    {'srt_kpt_id': 16, 'dst_kpt_id': 14, 'color': (255, 182, 193), 'thickness': 2},  # 左脚踝到左膝
    {'srt_kpt_id': 14, 'dst_kpt_id': 12, 'color': (255, 182, 193), 'thickness': 2},  # 左膝到左臀

    # 躯干
    {'srt_kpt_id': 11, 'dst_kpt_id': 12, 'color': (0, 255, 0), 'thickness': 2},       # 右臀到左臀
    {'srt_kpt_id': 5, 'dst_kpt_id': 11, 'color': (0, 255, 0), 'thickness': 2},        # 右肩到右臀
    {'srt_kpt_id': 6, 'dst_kpt_id': 12, 'color': (0, 255, 0), 'thickness': 2},        # 左肩到左臀
    {'srt_kpt_id': 5, 'dst_kpt_id': 6, 'color': (0, 255, 0), 'thickness': 2},         # 右肩到左肩

    # 手臂
    {'srt_kpt_id': 5, 'dst_kpt_id': 7, 'color': (0, 0, 255), 'thickness': 2},         # 右肩到右肘
    {'srt_kpt_id': 6, 'dst_kpt_id': 8, 'color': (0, 0, 255), 'thickness': 2},         # 左肩到左肘
    {'srt_kpt_id': 7, 'dst_kpt_id': 9, 'color': (0, 0, 255), 'thickness': 2},         # 右肘到右腕
    {'srt_kpt_id': 8, 'dst_kpt_id': 10, 'color': (0, 0, 255), 'thickness': 2},        # 左肘到左腕

    # 脸部
    {'srt_kpt_id': 1, 'dst_kpt_id': 2, 'color': (255, 165, 0), 'thickness': 2},       # 右眼到左眼
    {'srt_kpt_id': 0, 'dst_kpt_id': 1, 'color': (255, 165, 0), 'thickness': 2},       # 鼻到右眼
    {'srt_kpt_id': 0, 'dst_kpt_id': 2, 'color': (255, 165, 0), 'thickness': 2},       # 鼻到左眼
    {'srt_kpt_id': 1, 'dst_kpt_id': 3, 'color': (255, 165, 0), 'thickness': 2},       # 右眼到右耳
    {'srt_kpt_id': 2, 'dst_kpt_id': 4, 'color': (255, 165, 0), 'thickness': 2},       # 左眼到左耳
    {'srt_kpt_id': 3, 'dst_kpt_id': 5, 'color': (255, 165, 0), 'thickness': 2},       # 右耳到右肩
    {'srt_kpt_id': 4, 'dst_kpt_id': 6, 'color': (255, 165, 0), 'thickness': 2}        # 左耳到左肩
]

In [25]:
# 加载YOLO11模型
model = YOLO('yolo11m-pose.pt')

# 设置边界框 (左, 上, 右, 下) - 1920x1080
bbox = (300, 0, 1920, 1080)

# 绘制骨架连接的函数
def draw_skeleton(annotated_frame, keypoints, skeleton_map):
    for connection in skeleton_map:
        srt_kpt_id = connection['srt_kpt_id']
        dst_kpt_id = connection['dst_kpt_id']
        color = connection['color']
        thickness = connection['thickness']

        # 获取起始和结束关键点的坐标
        x1, y1 = keypoints.get(f"{keypoint_names[srt_kpt_id]}_x", -1), keypoints.get(f"{keypoint_names[srt_kpt_id]}_y", -1)
        x2, y2 = keypoints.get(f"{keypoint_names[dst_kpt_id]}_x", -1), keypoints.get(f"{keypoint_names[dst_kpt_id]}_y", -1)

        # 检查关键点是否有效 (非 -1 且不在 (0, 0) 位置)
        if (x1 != -1 and y1 != -1 and (x1, y1) != (0, 0)) and (x2 != -1 and y2 != -1 and (x2, y2) != (0, 0)):
            # 画出连接线
            cv2.line(annotated_frame, (int(x1), int(y1)), (int(x2), int(y2)), color, thickness)

# 可视化和保存函数（适用于有背景干扰的情况）
def visualize_and_save_with_bbox(video_path, body_keypoints_csv_path, marked_video_path):
    cap = cv2.VideoCapture(video_path)
    frame_index = 0
    body_all_keypoints = []

    # 设置视频输出
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    out = cv2.VideoWriter(marked_video_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # 裁剪出边界框内的区域
        cropped_frame = frame[bbox[1]:bbox[3], bbox[0]:bbox[2]]
        annotated_frame = frame.copy()

        # YOLO 进行骨架点检测
        body_results = model(cropped_frame)
        for result in body_results:
            keypoints_with_conf = result.keypoints.data.tolist()  # 包含 x, y, conf
            if len(keypoints_with_conf) == 0:
                continue  # 如果没有检测到关键点，跳过
            body_keypoints_with_frame = {"Frame": frame_index}
            for i, (x, y, conf) in enumerate(keypoints_with_conf[0]):
                # 将关键点坐标转换回原始帧坐标
                x += bbox[0]
                y += bbox[1]

                if i < len(keypoint_names):
                    body_keypoints_with_frame[f"{keypoint_names[i]}_x"] = x
                    body_keypoints_with_frame[f"{keypoint_names[i]}_y"] = y
                    body_keypoints_with_frame[f"{keypoint_names[i]}_conf"] = conf
                    cv2.circle(annotated_frame, (int(x), int(y)), 3, (0, 255, 0), -1)
            body_all_keypoints.append(body_keypoints_with_frame)

            # 绘制骨架连线
            draw_skeleton(annotated_frame, body_keypoints_with_frame, skeleton_map)

        out.write(annotated_frame)
        frame_index += 1

    # 保存人体骨架点数据到 CSV 文件
    body_df = pd.DataFrame(body_all_keypoints)
    body_df.to_csv(body_keypoints_csv_path, index=False)

    cap.release()
    out.release()
    print(f"标记视频已保存到 {marked_video_path}")
    print(f"人体骨架数据已保存到 {body_keypoints_csv_path}")

# 提取人体骨架点数据并保存
visualize_and_save(VIDEO_PATH, BODY_KEYPOINTS_CSV_PATH, MARKED_VIDEO_PATH)

[1;30;43m流式输出内容被截断，只能显示最后 5000 行内容。[0m
Speed: 2.3ms preprocess, 14.4ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 14.6ms
Speed: 2.2ms preprocess, 14.6ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 14.1ms
Speed: 3.0ms preprocess, 14.1ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 14.2ms
Speed: 2.9ms preprocess, 14.2ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 14.8ms
Speed: 2.5ms preprocess, 14.8ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 14.2ms
Speed: 2.3ms preprocess, 14.2ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 17.5ms
Speed: 2.5ms preprocess, 17.5ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 14.4ms
Speed: 2.3ms preprocess, 14.4ms inference, 1.6ms postprocess per image at sha

KeyboardInterrupt: 