In [36]:
import cv2 as cv
import mediapipe as mp
import dataclasses
import time
import numpy as np

In [33]:
model_path = './data/pose_landmarker_heavy.task'

In [34]:
BaseOptions = mp.tasks.BaseOptions
PoseLandmarker = mp.tasks.vision.PoseLandmarker
PoseLandmarkerOptions = mp.tasks.vision.PoseLandmarkerOptions
VisionRunningMode = mp.tasks.vision.RunningMode
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

options = PoseLandmarkerOptions(
    base_options=BaseOptions(model_asset_path=model_path),
    num_poses = 1,
    running_mode=VisionRunningMode.VIDEO,
    min_pose_detection_confidence = 0.3,
    min_tracking_confidence = 0.3)    

In [35]:
from mediapipe.framework.formats import landmark_pb2

def draw_landmarks_on_image(rgb_image, detection_result):
    """
    Function used to draw landmarks on images
    Code came from Mediapipe's code examples
    [https://github.com/googlesamples/mediapipe/blob/cd8753722b4c1052a12e019ededdbdebbbc1a313/examples/pose_landmarker/python/%5BMediaPipe_Python_Tasks%5D_Pose_Landmarker.ipynb]
    :param rgb_image: Mediapipe image for drawing landmarks
    :param detection_result: PoseLandmarkerResult object
    :return:
    """
    pose_landmarks_list = detection_result.pose_landmarks
    annotated_image = np.copy(rgb_image)

    # Loop through the detected poses
    for idx in range(len(pose_landmarks_list)):
        pose_landmarks = pose_landmarks_list[idx]

        # Draw the pose landmarks.
        pose_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
        pose_landmarks_proto.landmark.extend([
            landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in pose_landmarks
        ])
        mp.solutions.drawing_utils.draw_landmarks(
            annotated_image,
            pose_landmarks_proto,
            mp.solutions.pose.POSE_CONNECTIONS,
            mp.solutions.drawing_styles.get_default_pose_landmarks_style()
        )
    return annotated_image

In [39]:
video_path_dir = './data/train/0a0cc140-6a6f-4644-a6d1-f8c45009a5da.mp4'
cap = cv.VideoCapture(video_path_dir)
with PoseLandmarker.create_from_options(options) as landmarker:

    pose_landmarker_results = []
    while True:
        ret, frame = cap.read()

        if not ret:
                print("Can't receive frame (stream end?). Exiting ...")
                break

        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame)
        pose_landmarker_result = landmarker.detect_for_video(mp_image, int(time.time() * 1000))
        pose_landmarker_results.append(dataclasses.asdict(pose_landmarker_result))

        annotated_image = draw_landmarks_on_image(mp_image.numpy_view(), pose_landmarker_result)
        cv.imshow('Pose Landmarks', annotated_image)

        # Break the loop if 'q' is pressed
        if cv.waitKey(1) & 0xFF == ord('q'):
            break


cap.release()
cv.destroyAllWindows()
cv.waitKey(1)



I0000 00:00:1701463819.068950       1 gl_context.cc:344] GL version: 2.1 (2.1 Metal - 88), renderer: Apple M2


Can't receive frame (stream end?). Exiting ...


-1

In [23]:
pose_landmarker_results

[{'pose_landmarks': [[{'x': 0.5178722143173218,
     'y': 0.39901241660118103,
     'z': -0.9575273394584656,
     'visibility': 0.998818576335907,
     'presence': 0.9979127049446106},
    {'x': 0.5401189923286438,
     'y': 0.3763403594493866,
     'z': -0.9002618789672852,
     'visibility': 0.9971351623535156,
     'presence': 0.9951993823051453},
    {'x': 0.5589460134506226,
     'y': 0.3766316771507263,
     'z': -0.9012742042541504,
     'visibility': 0.9969038367271423,
     'presence': 0.9958926439285278},
    {'x': 0.5734947919845581,
     'y': 0.37728995084762573,
     'z': -0.9013708233833313,
     'visibility': 0.9973684549331665,
     'presence': 0.9944251775741577},
    {'x': 0.4904577136039734,
     'y': 0.3762320578098297,
     'z': -0.893762469291687,
     'visibility': 0.9978098273277283,
     'presence': 0.9923970103263855},
    {'x': 0.47466033697128296,
     'y': 0.376823753118515,
     'z': -0.8946189880371094,
     'visibility': 0.9981525540351868,
     'presen

In [32]:
# По каким точкам модель определяет позу
from IPython.display import Image
Image(url='https://developers.google.com/static/mediapipe/images/solutions/pose_landmarks_index.png', width=500)