In [1]:
import cv2
import numpy as np
from ultralytics import YOLO
import os

KeyboardInterrupt: 

In [None]:
# YOLOv8-poseモデルの読み込み
model = YOLO('yolov8n-pose.pt')

In [None]:
def process_video(video_path):
        cap = cv2.VideoCapture(video_path)
        frames = []
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            frames.append(frame)
        cap.release()
        return frames
    
def preprocess_frame(frame):
        # Resize the frame to a specific size (e.g., 640x640)
        frame = cv2.resize(frame, (640, 640))
        # Convert BGR to RGB
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        # Normalize pixel values to [0, 1]
        frame = frame.astype(np.float32) / 255.0
        # Add batch dimension
        frame = np.expand_dims(frame, axis=0)
        # Permute dimensions to match the model's expected input (BCHW)
        frame = np.transpose(frame, (0, 3, 1, 2))
        return frame



In [15]:
import torch

def extract_keypoints(frames):
    all_keypoints = []
    device = torch.device('mps')  # MPSデバイスを指定
    for frame in frames:
        frame = preprocess_frame(frame)
        frame = torch.tensor(frame, dtype=torch.float32).to(device)  # フレームをGPUに転送
        results = model(frame)
        
        if len(results) > 0 and len(results[0].keypoints) > 0:
            keypoints_list = results[0].keypoints
            bboxes = results[0].boxes  # バウンディングボックスのリスト
            
            if bboxes is not None and len(bboxes) > 0:
                # バウンディングボックスの面積を計算
                try:
                    xyxy = bboxes.xyxy.cpu().numpy()  # バウンディングボックスの座標を取得
                    areas = [(box[2] - box[0]) * (box[3] - box[1]) for box in xyxy]
                except IndexError as e:
                    print(f"Error calculating areas: {e}")
                    print(f"Bounding boxes: {bboxes}")
                    continue
                
                # 最も大きいバウンディングボックスのインデックスを取得
                max_area_index = np.argmax(areas)
                
                # 最も大きいバウンディングボックスに対応するキーポイントを取得
                keypoints = keypoints_list[max_area_index].xy[0].cpu().numpy()
                
                all_keypoints.append(keypoints)
            else:
                print(f"Warning: No bounding boxes found in frame.")
        else:
            print(f"Warning: No keypoints found in frame.")
    
    return all_keypoints  # np.arrayの変換を削除


def normalize_keypoints(keypoints):
    hip_index = 11  #  index of left hip
    shoulder_index = 5  # index of shoulder
    
    normalized_keypoints = []
    for frame_keypoints in keypoints:
        if len(frame_keypoints) > max(hip_index, shoulder_index):
            hip_point = frame_keypoints[hip_index]
            shoulder_point = frame_keypoints[shoulder_index]
            
            # 腰のポイントを原点(0,0)とし、他のポイントを相対位置として計算
            relative_points = frame_keypoints - hip_point
            
            # スケーリング、モデルをロバストにするため
            scale_factor = np.linalg.norm(shoulder_point - hip_point)
            if scale_factor != 0:
                relative_points /= scale_factor
            
            normalized_keypoints.append(relative_points)
        else:
            print(f"Warning: Frame with insufficient keypoints detected. Skipping this frame.")
    
    return np.array(normalized_keypoints)

def process_folder(folder_path, label):
    print(f"Processing folder: {folder_path}")
    video_data = []
    for video_file in os.listdir(folder_path):
        if video_file.endswith(('.mp4', '.avi', '.mov')): 
            video_path = os.path.join(folder_path, video_file)
            frames = process_video(video_path)
            keypoints = extract_keypoints(frames)
            if len(keypoints) > 0:
                normalized_keypoints = normalize_keypoints(keypoints)
                if len(normalized_keypoints) > 0:
                    video_data.append((normalized_keypoints, label))
                else:
                    print(f"Warning: No valid keypoints found in video {video_file}")
            else:
                print(f"Warning: No keypoints detected in video {video_file}")
    return video_data

    # デバッグ情報の出力
    print(f"Number of videos processed: {len(video_data)}")

# 全てのショットタイプを処理
shot_types = ['forehand_stroke','forehand_slice','forehand_volley', 'backhand_stroke', 'backhand_volley', 'backhand_slice'] 
all_data = []
for label, shot_type in enumerate(shot_types):
    folder_path = f'/Users/yusuke.s/Documents/pickleball_videos_2/{shot_type}' 
    all_data.extend(process_folder(folder_path, label))




Processing folder: /Users/yusuke.s/Documents/pickleball_videos_2/swing_begin


0: 640x640 1 person, 1265.5ms
Speed: 2.0ms preprocess, 1265.5ms inference, 90.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 person, 413.4ms
Speed: 1.0ms preprocess, 413.4ms inference, 3.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 person, 378.7ms
Speed: 1.9ms preprocess, 378.7ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 person, 435.1ms
Speed: 0.7ms preprocess, 435.1ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 person, 358.4ms
Speed: 0.6ms preprocess, 358.4ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 person, 363.0ms
Speed: 1.6ms preprocess, 363.0ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 person, 365.4ms
Speed: 0.6ms preprocess, 365.4ms inference, 3.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 person, 363.2ms
Speed: 0.7ms preprocess, 363.2ms inference, 3.6ms postprocess per image 

Processing folder: /Users/yusuke.s/Documents/pickleball_videos_2/swing_middle


0: 640x640 2 persons, 742.9ms
Speed: 0.8ms preprocess, 742.9ms inference, 6.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 persons, 684.7ms
Speed: 4.7ms preprocess, 684.7ms inference, 6.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 persons, 701.7ms
Speed: 1.4ms preprocess, 701.7ms inference, 4.4ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 persons, 672.6ms
Speed: 1.8ms preprocess, 672.6ms inference, 3.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 persons, 720.9ms
Speed: 1.5ms preprocess, 720.9ms inference, 8.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 persons, 764.7ms
Speed: 1.2ms preprocess, 764.7ms inference, 4.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 persons, 639.7ms
Speed: 1.0ms preprocess, 639.7ms inference, 5.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 persons, 688.2ms
Speed: 0.8ms preprocess, 688.2ms inference, 4.3ms postprocess per i

Processing folder: /Users/yusuke.s/Documents/pickleball_videos_2/swing_end


0: 640x640 4 persons, 649.9ms
Speed: 1.2ms preprocess, 649.9ms inference, 3.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 4 persons, 413.4ms
Speed: 0.7ms preprocess, 413.4ms inference, 2.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 4 persons, 396.1ms
Speed: 0.7ms preprocess, 396.1ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 4 persons, 399.1ms
Speed: 0.6ms preprocess, 399.1ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 4 persons, 401.1ms
Speed: 0.7ms preprocess, 401.1ms inference, 2.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 4 persons, 402.3ms
Speed: 0.7ms preprocess, 402.3ms inference, 2.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 4 persons, 399.6ms
Speed: 0.7ms preprocess, 399.6ms inference, 2.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 4 persons, 399.6ms
Speed: 0.7ms preprocess, 399.6ms inference, 3.0ms postprocess per i

In [16]:
import numpy as np
# Convert to numpy array
all_data_array = np.array(all_data, dtype=object)
# Save to .npy file
np.save('all_data.npy', all_data_array)

In [17]:
all_data_array.shape

(831, 2)

In [3]:
import numpy as np
all_data = np.load('all_data.npy', allow_pickle=True)

In [4]:
def pad_sequences(sequences, max_length):
    # シーケンスの長さを揃える
    return [seq[:max_length] if len(seq) > max_length else np.pad(seq, ((0, max_length - len(seq)), (0, 0), (0, 0)), 'constant') for seq in sequences]

# データの整形
X = [data[0] for data in all_data]
y = [data[1] for data in all_data]

# シーケンスの長さを揃える（例：最大150フレーム）
X_padded = pad_sequences(X, 150)

# numpy配列に変換
X_array = np.array(X_padded)
y_array = np.array(y)

# one-hot エンコーディング
shot_types = ['forehand_stroke','forehand_slice','forehand_volley', 'backhand_stroke', 'backhand_volley', 'backhand_slice']  
y_onehot = np.eye(len(shot_types))[y_array]

In [None]:
#import os
#os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

In [None]:
from sklearn.model_selection import train_test_split
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, TimeDistributed, Flatten
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import Sequence
import tensorflow as tf

In [None]:
class DataGenerator(Sequence):
    def __init__(self, x_set, y_set, batch_size):
        self.x, self.y = x_set, y_set
        self.batch_size = batch_size

    def __len__(self):
        return int(np.ceil(len(self.x) / float(self.batch_size)))

    def __getitem__(self, idx):
        batch_x = self.x[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_y = self.y[idx * self.batch_size:(idx + 1) * self.batch_size]
        # 形状を変更: (batch_size, 150, 17, 2) -> (batch_size, 150, 34)
        batcimport psutil

cpu_percent = psutil.cpu_percent(percpu=True)
mem = psutil.virtual_memory() 

print('cpu: ',cpu_percent)
print('memory: ',mem)import psutil

cpu_percent = psutil.cpu_percent(percpu=True)
mem = psutil.virtual_memory() 

print('cpu: ',cpu_percent)
print('memory: ',mem)h_x_reshaped = batch_x.reshape(batch_x.shape[0], batch_x.shape[1], -1)
        return batch_x_reshaped, batch_y

In [1]:
import psutil

cpu_percent = psutil.cpu_percent(percpu=True)
mem = psutil.virtual_memory() 

print('cpu: ',cpu_percent)
print('memory: ',mem)

cpu:  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
memory:  svmem(total=8589934592, available=689143808, percent=92.0, used=1021968384, free=10981376, active=682627072, inactive=645713920, wired=339341312)


In [None]:

def create_model(sequence_length, num_keypoints, num_coords, num_classes):
    model = Sequential([
        TimeDistributed(Flatten(), input_shape=(sequence_length, num_keypoints, num_coords)),
        LSTM(64, return_sequences=True),
        LSTM(32),
        Dense(32, activation='relu'),
        Dropout(0.5),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# トレーニング、検証、テストデータに分割
X_train, X_test, y_train, y_test = train_test_split(X_array, y_onehot, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# データジェネレータの作成
train_generator = DataGenerator(X_train, y_train, batch_size=16)
val_generator = DataGenerator(X_val, y_val, batch_size=16)
test_generator = DataGenerator(X_test, y_test, batch_size=16)

# モデルの作成
sequence_length = 150
num_keypoints = 17
num_coords = 2
num_classes = 6
model = create_model(sequence_length, num_keypoints, num_coords, num_classes)

# Early Stoppingの設定
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# モデルのトレーニング
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=100,
    callbacks=[early_stopping]
)

# モデルの評価
test_loss, test_accuracy = model.evaluate(test_generator)
print(f"Test accuracy: {test_accuracy:.4f}")