In [1]:
import cv2
import numpy as np
from ultralytics import YOLO
import os

In [2]:
# YOLOv8-poseモデルの読み込み
model = YOLO('yolov8n-pose.pt')

In [None]:
def process_video(video_path, max_frames=300): 
    cap = cv2.VideoCapture(video_path)
    frames = []
    while len(frames) < max_frames:
        ret, frame = cap.read()
        if not ret:
            break
        frames.append(frame)
    cap.release()
    return frames

def extract_keypoints(frames):
    all_keypoints = []
    for frame in frames:
        results = model(frame)
        if len(results[0].keypoints) > 0:
            # choose the biggest rectangle, the nearest person
            keypoints = results[0].keypoints[0].xy[0].cpu().numpy()
            all_keypoints.append(keypoints)
    return np.array(all_keypoints)

def normalize_keypoints(keypoints):
    hip_index = 11  #  index of left hip
    
    # 各フレームで腰のポイントを基準点として使用
    normalized_keypoints = []
    for frame_keypoints in keypoints:
        hip_point = frame_keypoints[hip_index]
        
        # 腰のポイントを原点(0,0)とし、他のポイントを相対位置として計算
        relative_points = frame_keypoints - hip_point
        
        # schaling, which make the model robust
        shoulder_index = 5  # index of shoulder
        scale_factor = np.linalg.norm(frame_keypoints[shoulder_index] - frame_keypoints[hip_index])
        if scale_factor != 0:
            relative_points /= scale_factor
        
        normalized_keypoints.append(relative_points)
    return np.array(normalized_keypoints)

def process_folder(folder_path, label):
    video_data = []
    for video_file in os.listdir(folder_path):
        if video_file.endswith(('.mp4', '.avi', '.mov')): 
            video_path = os.path.join(folder_path, video_file)
            frames = process_video(video_path)
            keypoints = extract_keypoints(frames)
            if len(keypoints) > 0:
                normalized_keypoints = normalize_keypoints(keypoints)
                video_data.append((normalized_keypoints, label))
    return video_data

# 全てのショットタイプを処理
#shot_types = ['forehand_stroke','forehand_slice','forehand_volley', 'backhand_stroke', 'backhand_volley', 'backhand_slice']  
shot_types = ['forehand_slice',]  
all_data = []
for label, shot_type in enumerate(shot_types):
    folder_path = f'/Users/yusuke.s/Documents/pickleball_videos/{shot_type}' 
    all_data.extend(process_folder(folder_path, label))


0: 384x640 2 persons, 258.0ms
Speed: 21.2ms preprocess, 258.0ms inference, 4.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 198.9ms
Speed: 0.9ms preprocess, 198.9ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 184.1ms
Speed: 1.3ms preprocess, 184.1ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 179.6ms
Speed: 1.4ms preprocess, 179.6ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 211.1ms
Speed: 1.4ms preprocess, 211.1ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 192.8ms
Speed: 1.6ms preprocess, 192.8ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 183.0ms
Speed: 5.0ms preprocess, 183.0ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 181.4ms
Speed: 1.1ms preprocess, 181.4ms inference, 0.7ms postprocess per

In [None]:
import numpy as np
print(np.asarray(all_data).shape)