In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd

file_path = '/content/drive/Shareddrives/Vision/train_data.csv'  # 파일 경로를 변경해야 합니다.
df = pd.read_csv(file_path, sep=",")

In [None]:
# 제거할 컬럼 리스트
columns_to_drop = ['subject_id', 'height', 'weight', 'BMI', 'sex', 'age',
                   'arm_length', 'chest_length', 'waist_length', 'hip',
                   'inseam', 'outseam', 'camera_id', 'image_width',
                   'image_height', 'altitude', 'heading', 'file_name',
                   'is_training']

# 컬럼 제거
df_cleaned = df.drop(columns=columns_to_drop)
df_cleaned.head()

Unnamed: 0,pose_id,pose_name,Head_x_3d,Head_y_3d,Head_z_3d,Neck_x_3d,Neck_y_3d,Neck_z_3d,Thorax_x_3d,Thorax_y_3d,...,Right_Foot_Index_x_2d,Right_Foot_Index_y_2d,Left_Foot_Index_x_2d,Left_Foot_Index_y_2d,Right_Heel_x_2d,Right_Heel_y_2d,Left_Heel_x_2d,Left_Heel_y_2d,altitude_angle,heading_angle
0,R,머리 뒤 깍지를 낀 자세,2.653502,170.639897,4.631431,2.638867,153.686852,2.128825,1.318643,145.891601,...,126.209368,315.361334,170.10422,314.88489,131.686023,322.283141,164.945129,321.823439,0,180
1,R,머리 뒤 깍지를 낀 자세,2.653502,170.639897,4.631431,2.638867,153.686852,2.128825,1.318643,145.891601,...,158.073254,233.034442,141.127402,261.501437,142.982483,230.016581,130.076802,249.555423,300,60
2,R,머리 뒤 깍지를 낀 자세,2.653502,170.639897,4.631431,2.638867,153.686852,2.128825,1.318643,145.891601,...,132.336624,326.3767,133.480067,309.760961,152.811668,323.643854,152.557601,311.432147,0,270
3,R,머리 뒤 깍지를 낀 자세,2.653502,170.639897,4.631431,2.638867,153.686852,2.128825,1.318643,145.891601,...,165.444543,319.424269,125.054263,328.004993,149.92185,314.38686,121.65252,319.302089,0,30
4,R,머리 뒤 깍지를 낀 자세,2.653502,170.639897,4.631431,2.638867,153.686852,2.128825,1.318643,145.891601,...,153.128212,297.249447,119.653436,282.684171,156.356303,284.217519,133.292159,273.901865,330,330


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

def analyze_pose_dataset(df):
    # 1. 기본 통계
    print("=== 포즈 클래스 분포 ===")
    pose_distribution = df_cleaned['pose_name'].value_counts()
    print(pose_distribution)
    print("\n총 클래스 수:", len(pose_distribution))

    # 2. 좌표값 범위 분석
    coord_ranges = {}

    # 3D 좌표 범위
    coords_3d = [col for col in df_cleaned.columns if '_3d' in col]
    coord_3d_stats = df_cleaned[coords_3d].agg(['min', 'max', 'mean', 'std'])

    # 2D 좌표 범위
    coords_2d = [col for col in df_cleaned.columns if '_2d' in col]
    coord_2d_stats = df_cleaned[coords_2d].agg(['min', 'max', 'mean', 'std'])

    print("\n=== 3D 좌표 범위 ===")
    print(coord_3d_stats.describe())

    print("\n=== 2D 좌표 범위 ===")
    print(coord_2d_stats.describe())

    # 3. 각도 정보 분석
    print("\n=== 각도 정보 통계 ===")
    angle_stats = df_cleaned[['altitude_angle', 'heading_angle']].describe()
    print(angle_stats)

    return {
        'pose_distribution': pose_distribution,
        'coord_3d_stats': coord_3d_stats,
        'coord_2d_stats': coord_2d_stats,
        'angle_stats': angle_stats
    }

# 사용 예:
results = analyze_pose_dataset(df_cleaned)

=== 포즈 클래스 분포 ===
pose_name
머리 뒤 깍지를 낀 자세                  15264
달리기(전력질주)                      15264
통화하는 자세                        15264
뒷짐                             15264
허리 회전을 최대로 한 자세                15264
한 손과 반대편 발을 들며 신난 자세           15264
T포즈                            15264
벽에 기대어 신발 신기                   15264
막대를 양손으로 잡고 골반 뒤쪽으로 쭉 뻗은 자세    15264
조깅                             15264
한다리 올리고 편하게 앉은 자세              15264
계단 오르기                         15264
I포즈                            15264
팔짱                             15264
몸을 앞으로 숙인 자세                   15264
기지개                            15264
A포즈                            15264
발레                             15264
의자에 앉은 자세                      15264
공을 던지려고 힘을 주는 자세               15264
Name: count, dtype: int64

총 클래스 수: 20

=== 3D 좌표 범위 ===
       Head_x_3d   Head_y_3d  Head_z_3d  Neck_x_3d   Neck_y_3d  Neck_z_3d  \
count   4.000000    4.000000   4.000000   4.000000    4.000000   4.000000   
mean    8.530529  121

In [None]:
COMMON_KEYPOINTS = {
    "Nose": {
        "OpenPose": 0,  # Nose
        "MediaPipe": 0,  # nose
        "Current": ["Nose_x_2d", "Nose_y_2d"]
    },
    "Right_Eye": {
        "OpenPose": 15,  # REye
        "MediaPipe": 5,  # right_eye
        "Current": ["Right_Eye_x_2d", "Right_Eye_y_2d"]
    },
    "Left_Eye": {
        "OpenPose": 16,  # LEye
        "MediaPipe": 2,  # left_eye
        "Current": ["Left_Eye_x_2d", "Left_Eye_y_2d"]
    },
    "Right_Ear": {
        "OpenPose": 17,  # REar
        "MediaPipe": 8,  # right_ear
        "Current": ["Right_Ear_x_2d", "Right_Ear_y_2d"]
    },
    "Left_Ear": {
        "OpenPose": 18,  # LEar
        "MediaPipe": 7,  # left_ear
        "Current": ["Left_Ear_x_2d", "Left_Ear_y_2d"]
    },
    "Right_Shoulder": {
        "OpenPose": 2,  # RShoulder
        "MediaPipe": 12,  # right_shoulder
        "Current": ["Right_Shoulder_x_2d", "Right_Shoulder_y_2d"]
    },
    "Left_Shoulder": {
        "OpenPose": 5,  # LShoulder
        "MediaPipe": 11,  # left_shoulder
        "Current": ["Left_Shoulder_x_2d", "Left_Shoulder_y_2d"]
    },
    "Right_Elbow": {
        "OpenPose": 3,  # RElbow
        "MediaPipe": 14,  # right_elbow
        "Current": ["Right_Elbow_x_2d", "Right_Elbow_y_2d"]
    },
    "Left_Elbow": {
        "OpenPose": 6,  # LElbow
        "MediaPipe": 13,  # left_elbow
        "Current": ["Left_Elbow_x_2d", "Left_Elbow_y_2d"]
    },
    "Right_Wrist": {
        "OpenPose": 4,  # RWrist
        "MediaPipe": 16,  # right_wrist
        "Current": ["Right_Wrist_x_2d", "Right_Wrist_y_2d"]
    },
    "Left_Wrist": {
        "OpenPose": 7,  # LWrist
        "MediaPipe": 15,  # left_wrist
        "Current": ["Left_Wrist_x_2d", "Left_Wrist_y_2d"]
    },
    "Right_Hip": {
        "OpenPose": 9,  # RHip
        "MediaPipe": 24,  # right_hip
        "Current": ["Right_Hip_x_2d", "Right_Hip_y_2d"]
    },
    "Left_Hip": {
        "OpenPose": 12,  # LHip
        "MediaPipe": 23,  # left_hip
        "Current": ["Left_Hip_x_2d", "Left_Hip_y_2d"]
    },
    "Right_Knee": {
        "OpenPose": 10,  # RKnee
        "MediaPipe": 26,  # right_knee
        "Current": ["Right_Knee_x_2d", "Right_Knee_y_2d"]
    },
    "Left_Knee": {
        "OpenPose": 13,  # LKnee
        "MediaPipe": 25,  # left_knee
        "Current": ["Left_Knee_x_2d", "Left_Knee_y_2d"]
    },
    "Right_Ankle": {
        "OpenPose": 11,  # RAnkle
        "MediaPipe": 28,  # right_ankle
        "Current": ["Right_Ankle_x_2d", "Right_Ankle_y_2d"]
    },
    "Left_Ankle": {
        "OpenPose": 14,  # LAnkle
        "MediaPipe": 27,  # left_ankle
        "Current": ["Left_Ankle_x_2d", "Left_Ankle_y_2d"]
    }
}

In [None]:
print("=== df 컬럼 ===")
print(df.columns)
print("\n=== df_cleaned 컬럼 ===")
print(df_cleaned.columns)
print("=== df 정보 ===")
print(df.info())
print("\n=== df_cleaned 정보 ===")
print(df_cleaned.info())
print("=== 제거된 컬럼 ===")
print(set(df.columns) - set(df_cleaned.columns))

=== df 컬럼 ===
Index(['subject_id', 'pose_id', 'pose_name', 'height', 'weight', 'BMI', 'sex',
       'age', 'arm_length', 'chest_length',
       ...
       'Right_Heel_x_2d', 'Right_Heel_y_2d', 'Left_Heel_x_2d',
       'Left_Heel_y_2d', 'image_width', 'image_height', 'altitude_angle',
       'heading_angle', 'file_name', 'is_training'],
      dtype='object', length=183)

=== df_cleaned 컬럼 ===
Index(['pose_id', 'pose_name', 'Head_x_3d', 'Head_y_3d', 'Head_z_3d',
       'Neck_x_3d', 'Neck_y_3d', 'Neck_z_3d', 'Thorax_x_3d', 'Thorax_y_3d',
       ...
       'Right_Foot_Index_x_2d', 'Right_Foot_Index_y_2d',
       'Left_Foot_Index_x_2d', 'Left_Foot_Index_y_2d', 'Right_Heel_x_2d',
       'Right_Heel_y_2d', 'Left_Heel_x_2d', 'Left_Heel_y_2d', 'altitude_angle',
       'heading_angle'],
      dtype='object', length=164)
=== df 정보 ===
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 305280 entries, 0 to 305279
Columns: 183 entries, subject_id to is_training
dtypes: bool(1), float64(169), int64(7

In [None]:
def drop_3d_columns(df_cleaned):
    # 2D 컬럼과 필수 컬럼만 남기기
    columns_to_keep = ['pose_id', 'pose_name', 'altitude_angle', 'heading_angle'] + \
                     [col for col in df_cleaned.columns if '_2d' in col]

    df_2d = df_cleaned[columns_to_keep]

    print(f"Original columns: {df_cleaned.shape[1]}")
    print(f"Columns after dropping 3D: {df_2d.shape[1]}")

    return df_2d

# 3D 데이터 드롭
df_2d = drop_3d_columns(df_cleaned)

Original columns: 164
Columns after dropping 3D: 68


In [None]:
import pandas as pd
import numpy as np

class KeypointStandardizer:
    def __init__(self):
        self.common_keypoints = COMMON_KEYPOINTS

    def standardize_keypoints(self, df_2d):
        """데이터프레임의 키포인트를 표준 형식으로 변환"""
        standardized_data = []

        for idx, row in df_2d.iterrows():
            std_row = {
                'pose_id': row['pose_id'],
                'pose_name': row['pose_name']
            }

            # 각 공통 키포인트에 대해
            for keypoint_name, info in self.common_keypoints.items():
                current_cols = info['Current']
                if all(col in df_2d.columns for col in current_cols):
                    std_row[f'{keypoint_name}_x'] = row[current_cols[0]]
                    std_row[f'{keypoint_name}_y'] = row[current_cols[1]]

            standardized_data.append(std_row)

        return pd.DataFrame(standardized_data)

    def inverse_standardize(self, df_2d):
        """표준화된 데이터를 원래 형식으로 변환"""
        original_format = {}

        for idx, row in df_2d.iterrows():
            for keypoint_name, info in self.common_keypoints.items():
                current_cols = info['Current']
                original_format[current_cols[0]] = row[f'{keypoint_name}_x']
                original_format[current_cols[1]] = row[f'{keypoint_name}_y']

        return pd.DataFrame(original_format)

def process_dataset(df_2d):
    """데이터셋 전체 처리"""
    standardizer = KeypointStandardizer()

    # 1. 키포인트 표준화
    std_df = standardizer.standardize_keypoints(df_2d)

    # 2. 기본적인 전처리 적용
    # 골반 중심을 원점으로 이동
    hip_center_x = (std_df['Right_Hip_x'] + std_df['Left_Hip_x']) / 2
    hip_center_y = (std_df['Right_Hip_y'] + std_df['Left_Hip_y']) / 2

    for col in std_df.columns:
        if col.endswith('_x'):
            std_df[col] = std_df[col] - hip_center_x
        elif col.endswith('_y'):
            std_df[col] = std_df[col] - hip_center_y

    # 어깨 너비로 정규화
    shoulder_width = np.sqrt(
        (std_df['Right_Shoulder_x'] - std_df['Left_Shoulder_x'])**2 +
        (std_df['Right_Shoulder_y'] - std_df['Left_Shoulder_y'])**2
    )

    for col in std_df.columns:
        if col.endswith('_x') or col.endswith('_y'):
            std_df[col] = std_df[col] / shoulder_width

    return std_df

# 사용 예시
def prepare_data(df_2d):
    # 1. 표준화된 형식으로 변환
    std_df = process_dataset(df_2d)

    # 2. 필요한 컬럼만 선택
    feature_cols = [col for col in std_df.columns
                   if col.endswith('_x') or col.endswith('_y')]

    X = std_df[feature_cols]
    y = std_df['pose_name']

    return X, y, std_df

In [None]:
# 데이터 준비
X, y, std_df = prepare_data(df_2d)

# 이제 확인
print("=== Shape 확인 ===")
print(f"X shape: {X.shape}")
print(f"y shape: {y.shape}")

print("\n=== 표준화된 데이터프레임 컬럼 ===")
print(std_df.columns.tolist())

print("\n=== 정규화 확인 ===")
# 골반 중심이 0인지 확인
hip_center_x = (std_df['Right_Hip_x'] + std_df['Left_Hip_x']) / 2
hip_center_y = (std_df['Right_Hip_y'] + std_df['Left_Hip_y']) / 2
print(f"Hip center X mean: {hip_center_x.mean()}")
print(f"Hip center Y mean: {hip_center_y.mean()}")

# 어깨 너비 정규화 확인
shoulder_dist = np.sqrt(
    (std_df['Right_Shoulder_x'] - std_df['Left_Shoulder_x'])**2 +
    (std_df['Right_Shoulder_y'] - std_df['Left_Shoulder_y'])**2
)
print(f"Shoulder distance mean: {shoulder_dist.mean()}")

# 데이터 분포 확인
print("\n=== 좌표값 분포 ===")
print(X.describe())

=== Shape 확인 ===
X shape: (305280, 34)
y shape: (305280,)

=== 표준화된 데이터프레임 컬럼 ===
['pose_id', 'pose_name', 'Nose_x', 'Nose_y', 'Right_Eye_x', 'Right_Eye_y', 'Left_Eye_x', 'Left_Eye_y', 'Right_Ear_x', 'Right_Ear_y', 'Left_Ear_x', 'Left_Ear_y', 'Right_Shoulder_x', 'Right_Shoulder_y', 'Left_Shoulder_x', 'Left_Shoulder_y', 'Right_Elbow_x', 'Right_Elbow_y', 'Left_Elbow_x', 'Left_Elbow_y', 'Right_Wrist_x', 'Right_Wrist_y', 'Left_Wrist_x', 'Left_Wrist_y', 'Right_Hip_x', 'Right_Hip_y', 'Left_Hip_x', 'Left_Hip_y', 'Right_Knee_x', 'Right_Knee_y', 'Left_Knee_x', 'Left_Knee_y', 'Right_Ankle_x', 'Right_Ankle_y', 'Left_Ankle_x', 'Left_Ankle_y']

=== 정규화 확인 ===
Hip center X mean: -1.2841594615413426e-18
Hip center Y mean: 2.7377632069979876e-18
Shoulder distance mean: 1.0

=== 좌표값 분포 ===
              Nose_x         Nose_y    Right_Eye_x    Right_Eye_y  \
count  305280.000000  305280.000000  305280.000000  305280.000000   
mean        0.010441      -3.163735       0.009739      -3.337694   
std      

In [None]:
# 키포인트 간 비율 확인
print("=== 키포인트 비율 확인 ===")
# 예: 어깨-엉덩이 비율
torso_ratio = np.abs(std_df['Right_Hip_y'].mean() - std_df['Right_Shoulder_y'].mean())
print(f"Torso ratio: {torso_ratio}")

# 이상치가 있는 샘플 확인
print("\n=== 이상치 샘플 확인 ===")
threshold = 100  # 임의의 큰 값
outliers = std_df[
    (std_df.filter(like='_x').abs() > threshold).any(axis=1) |
    (std_df.filter(like='_y').abs() > threshold).any(axis=1)
]
print(f"이상치 샘플 수: {len(outliers)}")
print("\n이상치 샘플의 포즈:")
print(outliers['pose_name'].value_counts())

=== 키포인트 비율 확인 ===
Torso ratio: 2.2998780079801757

=== 이상치 샘플 확인 ===
이상치 샘플 수: 249

이상치 샘플의 포즈:
pose_name
통화하는 자세                        38
몸을 앞으로 숙인 자세                   27
팔짱                             22
기지개                            22
막대를 양손으로 잡고 골반 뒤쪽으로 쭉 뻗은 자세    18
A포즈                            14
조깅                             12
허리 회전을 최대로 한 자세                12
머리 뒤 깍지를 낀 자세                  11
계단 오르기                         11
뒷짐                             10
T포즈                             9
달리기(전력질주)                       9
I포즈                             8
한다리 올리고 편하게 앉은 자세               8
공을 던지려고 힘을 주는 자세                6
한 손과 반대편 발을 들며 신난 자세            4
벽에 기대어 신발 신기                    4
발레                              4
Name: count, dtype: int64


In [None]:
import numpy as np
import pandas as pd
from scipy.interpolate import interp1d
from tqdm.auto import tqdm
import os

class EnhancedPoseAugmenterV2:
    def __init__(self, random_state=42):
        self.random_state = random_state
        np.random.seed(random_state)

    def augment(self, X, y, std_df, augmentation_factor=9):
        """메인 증강 함수"""
        augmented_data = []
        augmented_labels = []

        # 포즈별로 그룹화하여 보간을 위한 준비
        pose_groups = {}
        for idx in range(len(X)):
            pose_name = y.iloc[idx]
            if pose_name not in pose_groups:
                pose_groups[pose_name] = []
            pose_groups[pose_name].append(X.iloc[idx].values.reshape(-1, 2))

        for idx in tqdm(range(len(X)), desc="Augmenting poses"):
            pose_coords = X.iloc[idx].values.reshape(-1, 2)
            pose_name = y.iloc[idx]

            # 원본 데이터 포함
            augmented_data.append(pose_coords.flatten())
            augmented_labels.append(pose_name)

            # 기본 증강 (3개)
            for _ in range(3):
                aug_pose = pose_coords.copy()
                aug_pose = self._apply_basic_augmentations(aug_pose)
                augmented_data.append(aug_pose.flatten())
                augmented_labels.append(pose_name)

            # 고급 증강 (3개)
            for _ in range(3):
                aug_pose = pose_coords.copy()
                aug_pose = self._apply_advanced_augmentations(aug_pose)
                augmented_data.append(aug_pose.flatten())
                augmented_labels.append(pose_name)

            # 조합 증강 (2개)
            for _ in range(2):
                aug_pose = pose_coords.copy()
                aug_pose = self._apply_combined_augmentations(aug_pose)
                augmented_data.append(aug_pose.flatten())
                augmented_labels.append(pose_name)

            # 같은 클래스 내 포즈 보간 (1개)
            if len(pose_groups[pose_name]) > 1:
                random_pose = pose_groups[pose_name][np.random.randint(len(pose_groups[pose_name]))]
                interpolated_pose = self._interpolate_poses(pose_coords, random_pose)
                augmented_data.append(interpolated_pose.flatten())
                augmented_labels.append(pose_name)

        return np.array(augmented_data), np.array(augmented_labels)

    def _apply_basic_augmentations(self, pose):
        """기본 증강 적용"""
        if np.random.random() > 0.3:
            pose = self._rotate_pose(pose, angle=np.random.uniform(-25, 25))
        if np.random.random() > 0.3:
            pose = self._scale_pose(pose, scale_factor=np.random.uniform(0.8, 1.2))
        if np.random.random() > 0.3:
            pose = self._add_noise(pose, noise_level=0.02)
        if np.random.random() > 0.5:
            pose = self._horizontal_flip(pose)
        return pose

    def _apply_advanced_augmentations(self, pose):
        """고급 증강 적용"""
        if np.random.random() > 0.3:
            pose = self._joint_jitter(pose, jitter_range=0.08)
        if np.random.random() > 0.3:
            pose = self._limb_stretch(pose, stretch_factor=np.random.uniform(0.85, 1.15))
        if np.random.random() > 0.3:
            pose = self._keypoint_dropout(pose, dropout_prob=0.15)
        if np.random.random() > 0.3:
            pose = self._perspective_transform(pose)
        if np.random.random() > 0.3:
            pose = self._add_occlusion(pose)
        return pose

    def _apply_combined_augmentations(self, pose):
        """조합 증강 적용"""
        # 여러 증강을 순차적으로 적용
        aug_sequences = [
            ['flip_rotate_scale', 'noise'],
            ['rotate_jitter', 'stretch'],
            ['flip_occlude', 'perspective'],
            ['rotate_stretch', 'dropout']
        ]
        aug_sequence = aug_sequences[np.random.randint(len(aug_sequences))]  # 수정된 부분

        if 'flip_rotate_scale' in aug_sequence:
            pose = self._horizontal_flip(pose)
            pose = self._rotate_pose(pose, angle=np.random.uniform(-20, 20))
            pose = self._scale_pose(pose, scale_factor=np.random.uniform(0.9, 1.1))
        if 'rotate_jitter' in aug_sequence:
            pose = self._rotate_pose(pose, angle=np.random.uniform(-15, 15))
            pose = self._joint_jitter(pose, jitter_range=0.05)
        if 'flip_occlude' in aug_sequence:
            pose = self._horizontal_flip(pose)
            pose = self._add_occlusion(pose)
        if 'rotate_stretch' in aug_sequence:
            pose = self._rotate_pose(pose, angle=np.random.uniform(-10, 10))
            pose = self._limb_stretch(pose, stretch_factor=np.random.uniform(0.95, 1.05))
        if 'noise' in aug_sequence:
            pose = self._add_noise(pose, noise_level=0.03)
        if 'stretch' in aug_sequence:
            pose = self._limb_stretch(pose, stretch_factor=np.random.uniform(0.9, 1.1))
        if 'perspective' in aug_sequence:
            pose = self._perspective_transform(pose)
        if 'dropout' in aug_sequence:
            pose = self._keypoint_dropout(pose, dropout_prob=0.1)

        return pose

    def _interpolate_poses(self, pose1, pose2, t=0.5):
        """두 포즈 사이 보간"""
        return pose1 * (1 - t) + pose2 * t

    # 기존 메서드들 유지...
    def _rotate_pose(self, pose, angle):
        """포즈 회전"""
        angle_rad = np.radians(angle)
        rotation_matrix = np.array([
            [np.cos(angle_rad), -np.sin(angle_rad)],
            [np.sin(angle_rad), np.cos(angle_rad)]
        ])
        center = pose.mean(axis=0)
        centered_pose = pose - center
        rotated_pose = np.dot(centered_pose, rotation_matrix.T)
        return rotated_pose + center

    def _scale_pose(self, pose, scale_factor):
        """포즈 스케일링"""
        center = pose.mean(axis=0)
        centered_pose = pose - center
        scaled_pose = centered_pose * scale_factor
        return scaled_pose + center

    def _add_noise(self, pose, noise_level):
        """가우시안 노이즈 추가"""
        noise = np.random.normal(0, noise_level, pose.shape)
        pose_scale = np.abs(pose).max()
        return pose + noise * pose_scale

    def _horizontal_flip(self, pose):
        """수평 뒤집기"""
        flipped_pose = pose.copy()
        flipped_pose[:, 0] = -flipped_pose[:, 0]
        pairs = [(1,2), (3,4), (5,6), (7,8), (9,10), (11,12), (13,14), (15,16)]
        for i, j in pairs:
            flipped_pose[i], flipped_pose[j] = flipped_pose[j].copy(), flipped_pose[i].copy()
        return flipped_pose

    def _joint_jitter(self, pose, jitter_range):
        """관절 지터링"""
        jitter = np.random.uniform(-jitter_range, jitter_range, pose.shape)
        jitter[0] *= 0.5  # 코
        jitter[11:13] *= 0.5  # 엉덩이
        return pose + jitter

    def _limb_stretch(self, pose, stretch_factor):
        """팔다리 길이 변화"""
        stretched_pose = pose.copy()
        limb_indices = [
            ([5,7,9], [6,8,10]),  # 팔
            ([11,13,15], [12,14,16])  # 다리
        ]

        for right_chain, left_chain in limb_indices:
            for i in range(len(right_chain)-1):
                for chain in [right_chain, left_chain]:
                    vec = stretched_pose[chain[i+1]] - stretched_pose[chain[i]]
                    stretched_pose[chain[i+1]] = stretched_pose[chain[i]] + vec * stretch_factor

        return stretched_pose

    def _perspective_transform(self, pose):
        """원근 변환"""
        depth = np.random.uniform(0.9, 1.1)
        scale_x = np.random.uniform(0.9, 1.1)
        scale_y = np.random.uniform(0.9, 1.1)
        transformed = pose.copy()
        transformed[:, 0] *= scale_x * depth
        transformed[:, 1] *= scale_y * depth
        return transformed

    def _add_occlusion(self, pose):
        """부분 가림 효과"""
        occluded = pose.copy()
        # 랜덤하게 일부 관절 그룹 선택
        if np.random.random() > 0.5:
            # 팔 그룹
            if np.random.random() > 0.5:
                indices = [5,7,9]  # 오른팔
            else:
                indices = [6,8,10]  # 왼팔
        else:
            # 다리 그룹
            if np.random.random() > 0.5:
                indices = [11,13,15]  # 오른다리
            else:
                indices = [12,14,16]  # 왼다리

        # 선택된 관절들에 노이즈 추가
        occluded[indices] += np.random.normal(0, 0.1, (len(indices), 2))
        return occluded

    def _keypoint_dropout(self, pose, dropout_prob):
        """키포인트 드롭아웃"""
        dropout_mask = np.random.random(len(pose)) > dropout_prob
        dropout_mask[0] = True  # 코
        dropout_mask[11:13] = True  # 엉덩이

        dropped_pose = pose.copy()
        mean_pos = pose[dropout_mask].mean(axis=0)
        dropped_pose[~dropout_mask] = mean_pos
        return dropped_pose

def augment_and_save_dataset(X, y, std_df, output_file, augmentation_factor=9):
    """데이터셋 증강 및 저장"""
    augmenter = EnhancedPoseAugmenterV2()

    # 증강 수행
    print("Augmenting dataset...")
    X_aug, y_aug = augmenter.augment(X, y, std_df, augmentation_factor)

    # DataFrame 생성
    columns = [f"{coord}_{axis}" for coord in [
        'Nose', 'Right_Eye', 'Left_Eye', 'Right_Ear', 'Left_Ear',
        'Right_Shoulder', 'Left_Shoulder', 'Right_Elbow', 'Left_Elbow',
        'Right_Wrist', 'Left_Wrist', 'Right_Hip', 'Left_Hip',
        'Right_Knee', 'Left_Knee', 'Right_Ankle', 'Left_Ankle'
    ] for axis in ['x', 'y']]

    X_aug_df = pd.DataFrame(X_aug, columns=columns)
    X_aug_df['pose_name'] = y_aug

    # Parquet으로 저장
    print(f"Saving augmented dataset to {output_file}")
    X_aug_df.to_parquet(output_file, compression='gzip')

    # 통계 출력
    memory_usage = X_aug_df.memory_usage(deep=True).sum() / 1024**2
    file_size = os.path.getsize(output_file) / 1024**2

    print(f"\nDataset Statistics:")
    print(f"Original samples: {len(X)}")
    print(f"Augmented samples: {len(X_aug_df)}")
    print(f"Memory usage: {memory_usage:.2f} MB")
    print(f"Parquet file size: {file_size:.2f} MB")

    return X_aug_df

# 사용
output_file = 'augmented_pose_dataset_v2.parquet'
augmented_df = augment_and_save_dataset(X, y, std_df, output_file)

Augmenting dataset...


Augmenting poses:   0%|          | 0/305280 [00:00<?, ?it/s]

Saving augmented dataset to augmented_pose_dataset_v2.parquet

Dataset Statistics:
Original samples: 305280
Augmented samples: 3052800
Memory usage: 1153.92 MB
Parquet file size: 775.78 MB
