In [1]:
import os
from os.path import exists

import cv2
import mediapipe as mp
import numpy as np
from mediapipe.tasks.python import BaseOptions
from mediapipe.tasks.python.vision import PoseLandmarker, PoseLandmarkerOptions, RunningMode

In [11]:
class PoseLandmarkerExtractor:

    def __init__(self, dataset_name, model_path, video_path):
        self.dataset_name = dataset_name
        self.model_path = model_path
        self.video_path = video_path
        self.filename = self.video_path.split('/')[-1].replace('.mp4', '')
        self.pose_world_landmarks = []
        self.pose_normalized_landmarks = []
        self.normalized_npy_path = f"output/extraction/{self.dataset_name}/{self.filename}-normalized"
        self.world_npy_path = f"output/extraction/{self.dataset_name}/{self.filename}-world"
        self.options = PoseLandmarkerOptions(
            base_options=BaseOptions(model_asset_path=self.model_path),
            running_mode=RunningMode.VIDEO
        )

    def __extract_landmarks(self):
        current_timestamp = 0
        with PoseLandmarker.create_from_options(self.options) as landmarker:
            # Capturing the video
            video_capture = cv2.VideoCapture(self.video_path)

            while video_capture.isOpened():
                has_frame, frame = video_capture.read()
                if not has_frame:
                    break

                # Waiting 'q' key be pressed to cancel
                # if cv2.waitKey(1) & 0xFF == ord('q'):
                #     break

                cv2.imshow('Depth Frame', frame)

                # Convert the frame to a NumPy array
                numpy_frame = np.array(frame)

                # Transforming into an Image MediaPipe object
                mp_image = mp.Image(image_format=mp.ImageFormat.SRGBA, data=numpy_frame)

                pose_landmarker_result = landmarker.detect_for_video(mp_image, current_timestamp)
                self.pose_world_landmarks += pose_landmarker_result.pose_world_landmarks
                self.pose_normalized_landmarks += pose_landmarker_result.pose_landmarks
                current_timestamp += 1

    def __export_landmarks_arrays(self):

        np.save(file=self.normalized_npy_path, arr=self.pose_world_landmarks)
        np.save(file=self.world_npy_path, arr=self.pose_normalized_landmarks)

    # def __show_normalized_data(self):
    #     print(f"{10 * '+='} NORMALIZED DATA {10 * '+='}")
    #     helpers.show_npy_data(f'{self.normalized_npy_path}.npy')
    # 
    # def __show_world_data(self):
    #     print(f"{10 * '+='} WORLD DATA {10 * '+='}")
    #     helpers.show_npy_data(f'{self.world_npy_path}.npy')

    def extract(self):
        self.__extract_landmarks()
        self.__export_landmarks_arrays()


In [12]:
POSE_LITE_MODEL_PATH = './models/pose_landmarker_lite.task'
POSE_FULL_MODEL_PATH = './models/pose_landmarker_full.task'
POSE_HEAVY_MODEL_PATH = './models/pose_landmarker_heavy.task'
HAND_MODEL_PATH = './models/hand_landmarker.task'
FACE_MODEL_PATH = './models/face_landmarker.task'

autsl_rgb_files = os.listdir('./dataset/autsl_rgb')
autsl_depth_files = os.listdir('./dataset/autsl_depth')

# Pose extraction - RGB data
for file_path in autsl_rgb_files:
    pose_landmarker_extractor = PoseLandmarkerExtractor(
        dataset_name='autsl_rgb',
        model_path=POSE_HEAVY_MODEL_PATH,
        video_path=f'dataset/autsl_depth/{file_path}'
    )
    pose_landmarker_extractor.extract()

In [9]:
def show_npy_data(dataset_path):
    print(np.load(file=dataset_path, allow_pickle=True))

In [10]:
show_npy_data('./output/extraction/autsl_rgb/signer0_sample1_color-normalized.npy')

[]
