# 라이브러리 임포트


In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# 1. 기본 라이브러리 임포트
import torch  # PyTorch - 딥러닝 프레임워크
import torch.nn as nn  # 신경망 모듈
import torch.nn.functional as F  # 활성화 함수 등 기능성 모듈
from torch.utils.data import Dataset, DataLoader  # 데이터 처리 도구

# 2. 데이터 처리 및 분석 라이브러리
import numpy as np  # 수치 연산
import pandas as pd  # 데이터프레임 처리
from sklearn.model_selection import train_test_split  # 데이터 분할
from sklearn.preprocessing import LabelEncoder  # 레이블 인코딩
from sklearn.metrics import (
    precision_recall_fscore_support,  # 정밀도, 재현율, F1 점수 계산
    confusion_matrix,  # 혼동 행렬
    roc_auc_score,  # ROC AUC 점수
    classification_report  # 분류 보고서
)

# 3. 시각화 라이브러리
import seaborn as sns  # 통계 시각화
import matplotlib.pyplot as plt  # 기본 시각화

# 4. 기타 유틸리티
from tqdm.auto import tqdm  # 진행률 표시
import json  # JSON 파일 처리
import os  # 파일/디렉토리 처리

# 5. 초기 설정
print("=== Initializing ===")

# 기본 경로 설정
BASE_DIR = '/content/drive/Shareddrives/Vision'

# matplotlib 한글 폰트 설정
plt.rcParams['font.family'] = 'NanumGothic'  # 나눔고딕으로 변경
plt.rcParams['axes.unicode_minus'] = False  # 마이너스 기호 깨짐 방지

# seaborn 스타일 설정
sns.set_style('whitegrid')  # seaborn의 whitegrid 스타일 사용

# CUDA 사용 가능 여부 확인
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

=== Initializing ===
Using device: cuda


# 데이터셋 클래스

In [3]:
# 데이터셋 클래스 정의
class PoseDataset(Dataset):
    """
    포즈 데이터를 처리하기 위한 사용자 정의 데이터셋 클래스

    Args:
        X (pandas.DataFrame): 입력 특성 데이터
        y (numpy.array): 레이블 데이터
    """
    def __init__(self, X, y):
        self.X = torch.FloatTensor(X.values)  # 입력 데이터를 PyTorch 텐서로 변환
        self.y = torch.LongTensor(y)  # 레이블을 PyTorch 텐서로 변환

    def __len__(self):
        return len(self.X)  # 데이터셋의 총 샘플 수 반환

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]  # 인덱스에 해당하는 샘플과 레이블 반환

def normalize_pose_data(df):
    """
    포즈 데이터를 정규화하는 함수

    1. 골반 중심을 기준으로 모든 관절 위치를 정규화
    2. 어깨 너비를 기준으로 스케일링

    Args:
        df (pandas.DataFrame): 정규화할 포즈 데이터

    Returns:
        pandas.DataFrame: 정규화된 포즈 데이터
    """
    print("Normalizing data...")
    print("Original data range:", df.min().min(), "to", df.max().max())

    # 1. 골반 중심점 계산
    hip_center_x = (df['Right_Hip_x'] + df['Left_Hip_x']) / 2
    hip_center_y = (df['Right_Hip_y'] + df['Left_Hip_y']) / 2

    # 2. 중심점 기준 정규화
    normalized_df = df.copy()
    for col in df.columns:
        if col.endswith('_x'):
            normalized_df[col] = df[col] - hip_center_x
        elif col.endswith('_y'):
            normalized_df[col] = df[col] - hip_center_y

    # 3. 어깨 너비 계산 및 스케일링
    shoulder_width = np.sqrt(
        (normalized_df['Right_Shoulder_x'] - normalized_df['Left_Shoulder_x'])**2 +
        (normalized_df['Right_Shoulder_y'] - normalized_df['Left_Shoulder_y'])**2
    )

    # 4. 어깨 너비로 스케일링
    for col in normalized_df.columns:
        if col.endswith('_x') or col.endswith('_y'):
            normalized_df[col] = normalized_df[col] / shoulder_width

    print("Normalized data range:", normalized_df.min().min(), "to", normalized_df.max().max())
    return normalized_df

print("=== Dataset and Normalization Functions Defined ===")

=== Dataset and Normalization Functions Defined ===


In [4]:
print("\n=== Loading and Preprocessing Data ===")

# 1. 데이터 로드
print("Loading training data...")
train_data = pd.read_parquet(f'{BASE_DIR}/augmented_pose_dataset_v2.parquet')

# 2. 테스트용 데이터 로드
print("Loading test data...")
test_data = pd.read_csv(f'{BASE_DIR}/val_data.csv')

# 3. 테스트 데이터 전처리
print("\nPreprocessing test data...")
# 불필요한 컬럼 제거
columns_to_drop = ['subject_id', 'height', 'weight', 'BMI', 'sex', 'age',
                  'arm_length', 'chest_length', 'waist_length', 'hip',
                  'inseam', 'outseam', 'camera_id', 'image_width',
                  'image_height', 'altitude', 'heading', 'file_name',
                  'is_training']
test_data = test_data.drop(columns=columns_to_drop)

# 3D 컬럼 제거 및 2D 컬럼 이름 변경
cols_to_use = {
    'Nose_x_2d': 'Nose_x', 'Nose_y_2d': 'Nose_y',
    'Right_Eye_x_2d': 'Right_Eye_x', 'Right_Eye_y_2d': 'Right_Eye_y',
    'Left_Eye_x_2d': 'Left_Eye_x', 'Left_Eye_y_2d': 'Left_Eye_y',
    'Right_Ear_x_2d': 'Right_Ear_x', 'Right_Ear_y_2d': 'Right_Ear_y',
    'Left_Ear_x_2d': 'Left_Ear_x', 'Left_Ear_y_2d': 'Left_Ear_y',
    'Right_Shoulder_x_2d': 'Right_Shoulder_x', 'Right_Shoulder_y_2d': 'Right_Shoulder_y',
    'Left_Shoulder_x_2d': 'Left_Shoulder_x', 'Left_Shoulder_y_2d': 'Left_Shoulder_y',
    'Right_Elbow_x_2d': 'Right_Elbow_x', 'Right_Elbow_y_2d': 'Right_Elbow_y',
    'Left_Elbow_x_2d': 'Left_Elbow_x', 'Left_Elbow_y_2d': 'Left_Elbow_y',
    'Right_Wrist_x_2d': 'Right_Wrist_x', 'Right_Wrist_y_2d': 'Right_Wrist_y',
    'Left_Wrist_x_2d': 'Left_Wrist_x', 'Left_Wrist_y_2d': 'Left_Wrist_y',
    'Right_Hip_x_2d': 'Right_Hip_x', 'Right_Hip_y_2d': 'Right_Hip_y',
    'Left_Hip_x_2d': 'Left_Hip_x', 'Left_Hip_y_2d': 'Left_Hip_y',
    'Right_Knee_x_2d': 'Right_Knee_x', 'Right_Knee_y_2d': 'Right_Knee_y',
    'Left_Knee_x_2d': 'Left_Knee_x', 'Left_Knee_y_2d': 'Left_Knee_y',
    'Right_Ankle_x_2d': 'Right_Ankle_x', 'Right_Ankle_y_2d': 'Right_Ankle_y',
    'Left_Ankle_x_2d': 'Left_Ankle_x', 'Left_Ankle_y_2d': 'Left_Ankle_y'
}

test_data = test_data[list(cols_to_use.keys()) + ['pose_name']].rename(columns=cols_to_use)

# 4. 학습/검증 세트 분리 (8:2 비율)
print("\nSplitting training data into train and validation sets...")
train_df, val_df = train_test_split(
    train_data,
    test_size=0.2,
    random_state=42,
    stratify=train_data['pose_name']
)
print(f"Training set size: {len(train_df)}")
print(f"Validation set size: {len(val_df)}")

# 5. 레이블 인코딩
print("\nEncoding labels...")
label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(train_df['pose_name'])
y_val = label_encoder.transform(val_df['pose_name'])
y_test = label_encoder.transform(test_data['pose_name'])

# 6. 특징 데이터 준비 및 정규화
X_train = train_df.drop('pose_name', axis=1)
X_val = val_df.drop('pose_name', axis=1)
X_test = test_data.drop('pose_name', axis=1)

print("\nNormalizing data...")
X_train_normalized = X_train
X_val_normalized = X_val
X_test_normalized = normalize_pose_data(X_test)

print("\nData shapes after preprocessing:")
print(f"X_train: {X_train_normalized.shape}")
print(f"X_val: {X_val_normalized.shape}")
print(f"X_test: {X_test_normalized.shape}")

# 7. 데이터셋 생성
train_dataset = PoseDataset(X_train_normalized, y_train)
val_dataset = PoseDataset(X_val_normalized, y_val)
test_dataset = PoseDataset(X_test_normalized, y_test)

# 8. 데이터 로더 생성
batch_size = 8192
train_loader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=0,
    pin_memory=True,
    persistent_workers=False
)

val_loader = DataLoader(
    val_dataset,
    batch_size=batch_size,
    num_workers=0,
    pin_memory=True,
    persistent_workers=False
)

test_loader = DataLoader(
    test_dataset,
    batch_size=batch_size,
    num_workers=0,
    pin_memory=True,
    persistent_workers=False
)

# 9. 클래스 매핑 저장
class_mapping = {i: label for i, label in enumerate(label_encoder.classes_)}
with open(f'{BASE_DIR}/clear/class_mapping.json', 'w') as f:
    json.dump(class_mapping, f)

print("\n=== Data Preparation Completed ===")
print(f"Number of classes: {len(class_mapping)}")
print("\nClass distribution:")
print("Training set:")
print(train_df['pose_name'].value_counts().to_string())
print("\nValidation set:")
print(val_df['pose_name'].value_counts().to_string())
print("\nTest set:")
print(test_data['pose_name'].value_counts().to_string())


=== Loading and Preprocessing Data ===
Loading training data...
Loading test data...

Preprocessing test data...

Splitting training data into train and validation sets...
Training set size: 2442240
Validation set size: 610560

Encoding labels...

Normalizing data...
Normalizing data...
Original data range: 20.931238824188423 to 337.4588755517672
Normalized data range: -281.2816911820616 to 302.9522902914642

Data shapes after preprocessing:
X_train: (2442240, 34)
X_val: (610560, 34)
X_test: (38160, 34)

=== Data Preparation Completed ===
Number of classes: 20

Class distribution:
Training set:
pose_name
팔짱                             122112
발레                             122112
기지개                            122112
통화하는 자세                        122112
조깅                             122112
달리기(전력질주)                      122112
A포즈                            122112
공을 던지려고 힘을 주는 자세               122112
I포즈                            122112
계단 오르기                         122112
머리 뒤 깍지

# 모델 구조 클래스

In [5]:
# ===== 모델 구조 정의 =====
print("\n=== Defining Model Architectures ===")

class PoseTransformer(nn.Module):
    def __init__(self, input_dim=34, num_classes=20, num_heads=8, dim_feedforward=512, num_layers=4):
        super().__init__()

        self.input_projection = nn.Linear(2, 128)
        self.positional_embedding = nn.Parameter(torch.randn(17, 128))

        encoder_layer = nn.TransformerEncoderLayer(
            d_model=128,
            nhead=num_heads,
            dim_feedforward=dim_feedforward,
            dropout=0.1,
            batch_first=True
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)

        self.classifier = nn.Sequential(
            nn.Linear(128 * 17, 512),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        x = x.view(-1, 17, 2)
        x = self.input_projection(x)
        x = x + self.positional_embedding
        x = self.transformer(x)
        x = x.reshape(x.size(0), -1)
        x = self.classifier(x)
        return x

class PoseMLP(nn.Module):
    def __init__(self, input_dim=34, num_classes=20):
        super().__init__()

        self.block1 = nn.Sequential(
            nn.Linear(input_dim, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(0.3)
        )

        self.res_blocks = nn.ModuleList([
            self._make_res_block(512, 512) for _ in range(4)
        ])

        self.classifier = nn.Sequential(
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(256, num_classes)
        )

    def _make_res_block(self, in_dim, out_dim):
        return nn.Sequential(
            nn.Linear(in_dim, out_dim),
            nn.BatchNorm1d(out_dim),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(out_dim, out_dim),
            nn.BatchNorm1d(out_dim)
        )

    def forward(self, x):
        x = self.block1(x)
        for res_block in self.res_blocks:
            identity = x
            x = res_block(x)
            x = F.relu(x + identity)
        x = self.classifier(x)
        return x

class PoseGRU(nn.Module):
    def __init__(self, input_dim=34, num_classes=20, hidden_dim=256):
        super().__init__()

        self.spatial_embedding = nn.Linear(2, hidden_dim)

        self.gru = nn.GRU(
            input_size=hidden_dim,
            hidden_size=hidden_dim,
            num_layers=3,
            batch_first=True,
            dropout=0.2,
            bidirectional=True
        )

        self.classifier = nn.Sequential(
            nn.Linear(hidden_dim * 2, hidden_dim),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(hidden_dim, num_classes)
        )

    def forward(self, x):
        x = x.view(-1, 17, 2)
        x = self.spatial_embedding(x)
        x, _ = self.gru(x)
        x = x[:, -1, :]
        x = self.classifier(x)
        return x

# 1. CNN 기반 모델
class PoseCNN(nn.Module):
    def __init__(self, input_dim=34, num_classes=20):
        super().__init__()

        self.spatial_conv = nn.Sequential(
            nn.Conv1d(2, 64, kernel_size=3, padding=1),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Conv1d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Conv1d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm1d(256),
            nn.ReLU()
        )

        self.classifier = nn.Sequential(
            nn.Linear(256 * 17, 512),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        x = x.view(-1, 2, 17)  # [batch, channels(x,y), keypoints]
        x = self.spatial_conv(x)
        x = x.reshape(x.size(0), -1)
        x = self.classifier(x)
        return x

# 2. VIT
class EnhancedPoseViT(nn.Module):
    def __init__(self, input_dim=34, num_classes=20):
        super().__init__()

        # 초기 embedding layer
        self.embedding = nn.Linear(2, 128)

        # Transformer Encoder
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=128,
            nhead=4,
            dim_feedforward=256,
            dropout=0.1,
            batch_first=True
        )
        self.transformer = nn.TransformerEncoder(
            encoder_layer,
            num_layers=3
        )

        # Global attention pooling
        self.attention_pooling = nn.Sequential(
            nn.Linear(128, 64),
            nn.Tanh(),
            nn.Linear(64, 1)
        )

        # Classification head
        self.classifier = nn.Sequential(
            nn.Linear(128, 256),
            nn.LayerNorm(256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        # Input reshaping [batch, 34] -> [batch, 17, 2]
        x = x.view(-1, 17, 2)

        # Initial embedding
        x = self.embedding(x)  # [batch, 17, 128]

        # Transformer encoding
        x = self.transformer(x)  # [batch, 17, 128]

        # Attention-weighted pooling
        attn_weights = self.attention_pooling(x)  # [batch, 17, 1]
        attn_weights = F.softmax(attn_weights, dim=1)
        x = (x * attn_weights).sum(dim=1)  # [batch, 128]

        # Classification
        x = self.classifier(x)

        return x

# 스태킹 모델
class StackingModel(nn.Module):
    def __init__(self, num_classes=20, num_base_models=5):
        super().__init__()

        # Base models
        self.transformer = PoseTransformer()
        self.mlp = PoseMLP()
        self.gru = PoseGRU()
        self.cnn = PoseCNN()
        self.attention = EnhancedPoseViT()

        # Meta-learner (스태킹 레이어)
        self.meta_features = nn.Sequential(
            nn.Linear(num_classes * 5, 512),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(0.1)
        )

        # 최종 분류기
        self.final_classifier = nn.Sequential(
            nn.Linear(256, num_classes),
            nn.LogSoftmax(dim=1)
        )

        # 각 모델의 가중치
        self.model_weights = nn.Parameter(torch.ones(5))

    def forward(self, x):
        # 각 모델의 예측
        trans_out = self.transformer(x)
        mlp_out = self.mlp(x)
        gru_out = self.gru(x)
        cnn_out = self.cnn(x)
        attn_out = self.attention(x)

        # 소프트맥스 적용
        trans_prob = F.softmax(trans_out, dim=1)
        mlp_prob = F.softmax(mlp_out, dim=1)
        gru_prob = F.softmax(gru_out, dim=1)
        cnn_prob = F.softmax(cnn_out, dim=1)
        attn_prob = F.softmax(attn_out, dim=1)

        # 가중치 정규화
        weights = F.softmax(self.model_weights, dim=0)

        # 모델 출력 결합
        stacked_features = torch.cat([
            trans_prob * weights[0],
            mlp_prob * weights[1],
            gru_prob * weights[2],
            cnn_prob * weights[3],
            attn_prob * weights[4]
        ], dim=1)

        # 메타 특성 추출
        meta_features = self.meta_features(stacked_features)

        # 최종 예측
        output = self.final_classifier(meta_features)
        return output

print("Model architectures defined successfully!")


=== Defining Model Architectures ===
Model architectures defined successfully!


# 학습 함수2

In [6]:
# ===== 학습 및 평가 함수 정의 =====
print("\n=== Defining Training and Evaluation Functions ===")

def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs=50, patience=5):
    """
    모델 학습을 위한 함수

    Args:
        model: 학습할 모델
        train_loader: 학습 데이터 로더
        val_loader: 검증 데이터 로더
        criterion: 손실 함수
        optimizer: 옵티마이저
        scheduler: 학습률 스케줄러
        num_epochs: 총 에폭 수
        patience: Early stopping 인내심

    Returns:
        dict: 학습 결과 (loss, accuracy 등)
    """
    best_val_loss = float('inf')
    patience_counter = 0
    history = {
        'train_loss': [], 'val_loss': [],
        'train_acc': [], 'val_acc': [],
        'epoch': []
    }

    for epoch in range(num_epochs):
        # Training phase
        model.train()
        train_loss = 0
        correct = 0
        total = 0

        # 학습 진행률 표시
        train_pbar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs} [Train]')
        for inputs, labels in train_pbar:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

            train_pbar.set_postfix({
                'loss': f'{loss.item():.4f}',
                'acc': f'{100.*correct/total:.2f}%'
            })

        train_loss = train_loss / len(train_loader)
        train_acc = 100. * correct / total

        # Validation phase
        val_metrics = evaluate_model(model, val_loader, criterion)
        val_loss = val_metrics['loss']
        val_acc = val_metrics['accuracy'] * 100

        # Learning rate 조정
        scheduler.step(val_loss)

        # 결과 저장
        history['train_loss'].append(train_loss)
        history['val_loss'].append(val_loss)
        history['train_acc'].append(train_acc)
        history['val_acc'].append(val_acc)
        history['epoch'].append(epoch + 1)

        print(f'\nEpoch {epoch+1}/{num_epochs}:')
        print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%')
        print(f'Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%')

        # Early stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save({
                'epoch': epoch + 1,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'val_loss': val_loss,
                'val_acc': val_acc,
            }, f'{BASE_DIR}/best_{model.__class__.__name__}.pth')
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print('Early stopping!')
                break

    return history

def evaluate_model(model, data_loader, criterion):
    """
    모델 평가를 위한 함수

    Args:
        model: 평가할 모델
        data_loader: 데이터 로더
        criterion: 손실 함수

    Returns:
        dict: 평가 결과
    """
    model.eval()
    total_loss = 0
    all_preds = []
    all_labels = []
    all_probs = []

    with torch.no_grad():
        for inputs, labels in data_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            total_loss += loss.item()
            probs = F.softmax(outputs, dim=1)
            preds = outputs.argmax(dim=1)

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            all_probs.extend(probs.cpu().numpy())

    metrics = {
        'loss': total_loss / len(data_loader),
        'accuracy': (np.array(all_preds) == np.array(all_labels)).mean(),
        'predictions': all_preds,
        'true_labels': all_labels,
        'probabilities': all_probs
    }

    return metrics

def plot_training_curves(history, model_name):
    """
    학습 곡선을 그리는 함수

    Args:
        history: 학습 히스토리
        model_name: 모델 이름
    """
    plt.figure(figsize=(12, 5))

    # Loss plot
    plt.subplot(1, 2, 1)
    plt.plot(history['epoch'], history['train_loss'], label='Train')
    plt.plot(history['epoch'], history['val_loss'], label='Validation')
    plt.title(f'{model_name} Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(True)

    # Accuracy plot
    plt.subplot(1, 2, 2)
    plt.plot(history['epoch'], history['train_acc'], label='Train')
    plt.plot(history['epoch'], history['val_acc'], label='Validation')
    plt.title(f'{model_name} Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy (%)')
    plt.legend()
    plt.grid(True)

    plt.tight_layout()
    plt.savefig(f'{BASE_DIR}/{model_name}_training_curves.png')
    plt.close()

def plot_confusion_matrix(true_labels, predictions, save_path):
    """
    혼동 행렬을 그리는 함수
    """
    plt.figure(figsize=(15, 15))
    cm = confusion_matrix(true_labels, predictions)
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=class_mapping.values(),
                yticklabels=class_mapping.values())
    plt.title('Confusion Matrix')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.savefig(save_path)
    plt.close()

print("Training and evaluation functions defined successfully!")


=== Defining Training and Evaluation Functions ===
Training and evaluation functions defined successfully!


# 메인 함수

In [8]:
# ===== 모델 학습 및 평가 실행 =====
print("\n=== Starting Model Training and Evaluation ===")

# 1. 기본 설정
models = {
    'transformer': PoseTransformer(),
    'mlp': PoseMLP(),
    'gru': PoseGRU(),
    'cnn': PoseCNN(),
    'vit': EnhancedPoseViT()
}

results = {}

# 2. 개별 모델 학습 및 평가
for model_name, model in models.items():
    print(f"\n=== Training {model_name} ===")
    model_path = f'{BASE_DIR}/best_{model.__class__.__name__}.pth'

    # 2.1 이미 학습된 모델 체크
    if os.path.exists(model_path):
        print(f"{model_name} already trained. Loading best model...")
        checkpoint = torch.load(model_path)
        model.load_state_dict(checkpoint['model_state_dict'])
        print(f"Best validation accuracy: {checkpoint['val_acc']:.2f}%")
        continue

    # 2.2 모델 학습 준비
    model = model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.AdamW(model.parameters(), lr=0.001, weight_decay=0.01)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='min', patience=3, factor=0.5, verbose=True
    )

    # 2.3 모델 학습
    history = train_model(
        model, train_loader, val_loader,
        criterion, optimizer, scheduler
    )

    # 2.4 학습 곡선 그리기
    plot_training_curves(history, model_name)

    # 2.5 모델 평가
    model.eval()
    train_metrics = evaluate_model(model, train_loader, criterion)
    val_metrics = evaluate_model(model, val_loader, criterion)
    test_metrics = evaluate_model(model, test_loader, criterion)

    # 2.6 혼동 행렬 생성
    plot_confusion_matrix(
        test_metrics['true_labels'],
        test_metrics['predictions'],
        f'{BASE_DIR}/{model_name}_confusion_matrix.png'
    )

    # 2.7 결과 저장
    results[model_name] = {
        'train_acc': train_metrics['accuracy'] * 100,
        'val_acc': val_metrics['accuracy'] * 100,
        'test_acc': test_metrics['accuracy'] * 100,
        'train_loss': train_metrics['loss'],
        'val_loss': val_metrics['loss'],
        'test_loss': test_metrics['loss'],
        'history': history
    }

# 3. 스태킹 모델 학습
print("\n=== Training Stacking Model ===")
stacking_model_path = f'{BASE_DIR}/best_StackingModel.pth'

if not os.path.exists(stacking_model_path):
    stacking_model = StackingModel().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.AdamW(stacking_model.parameters(), lr=0.001, weight_decay=0.01)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='min', patience=3, factor=0.5, verbose=True
    )

    history = train_model(
        stacking_model, train_loader, val_loader,
        criterion, optimizer, scheduler
    )

    plot_training_curves(history, 'stacking')

    # 스태킹 모델 평가
    stacking_model.eval()
    train_metrics = evaluate_model(stacking_model, train_loader, criterion)
    val_metrics = evaluate_model(stacking_model, val_loader, criterion)
    test_metrics = evaluate_model(stacking_model, test_loader, criterion)

    plot_confusion_matrix(
        test_metrics['true_labels'],
        test_metrics['predictions'],
        f'{BASE_DIR}/stacking_confusion_matrix.png'
    )

    results['stacking'] = {
        'train_acc': train_metrics['accuracy'] * 100,
        'val_acc': val_metrics['accuracy'] * 100,
        'test_acc': test_metrics['accuracy'] * 100,
        'train_loss': train_metrics['loss'],
        'val_loss': val_metrics['loss'],
        'test_loss': test_metrics['loss'],
        'history': history
    }
else:
    print("Stacking model already trained. Skipping training.")

# 4. 최종 결과 출력
print("\n=== Final Results ===")
for model_name, result in results.items():
    print(f"\n{model_name.upper()} Results:")
    print(f"Training - Loss: {result['train_loss']:.4f}, Accuracy: {result['train_acc']:.2f}%")
    print(f"Validation - Loss: {result['val_loss']:.4f}, Accuracy: {result['val_acc']:.2f}%")
    print(f"Test - Loss: {result['test_loss']:.4f}, Accuracy: {result['test_acc']:.2f}%")

# 5. 결과 저장
def convert_to_serializable(obj):
    """JSON 직렬화를 위해 데이터 타입을 변환하는 함수"""
    if isinstance(obj, np.integer):
        return int(obj)
    elif isinstance(obj, np.floating):
        return float(obj)
    elif isinstance(obj, np.ndarray):
        return obj.tolist()
    elif isinstance(obj, dict):
        return {key: convert_to_serializable(value) for key, value in obj.items()}
    elif isinstance(obj, list):
        return [convert_to_serializable(item) for item in obj]
    else:
        return obj

# 결과를 JSON 파일로 저장
print("\nSaving results to JSON...")
serializable_results = convert_to_serializable(results)
with open(f'{BASE_DIR}/final_results.json', 'w') as f:
    json.dump(serializable_results, f, indent=4)

print("\n=== Training and Evaluation Completed ===")


=== Starting Model Training and Evaluation ===

=== Training transformer ===
transformer already trained. Loading best model...
Best validation accuracy: 99.71%

=== Training mlp ===
mlp already trained. Loading best model...


  checkpoint = torch.load(model_path)


Best validation accuracy: 99.39%

=== Training gru ===
gru already trained. Loading best model...
Best validation accuracy: 99.52%

=== Training cnn ===
cnn already trained. Loading best model...
Best validation accuracy: 99.52%

=== Training vit ===
vit already trained. Loading best model...
Best validation accuracy: 97.75%

=== Training Stacking Model ===




Epoch 1/50 [Train]:   0%|          | 0/299 [00:00<?, ?it/s]


Epoch 1/50:
Train Loss: 1.1445, Train Acc: 61.44%
Val Loss: 0.2025, Val Acc: 94.58%


Epoch 2/50 [Train]:   0%|          | 0/299 [00:00<?, ?it/s]


Epoch 2/50:
Train Loss: 0.1422, Train Acc: 96.23%
Val Loss: 0.0648, Val Acc: 98.37%


Epoch 3/50 [Train]:   0%|          | 0/299 [00:00<?, ?it/s]


Epoch 3/50:
Train Loss: 0.0725, Train Acc: 98.05%
Val Loss: 0.0629, Val Acc: 98.10%


Epoch 4/50 [Train]:   0%|          | 0/299 [00:00<?, ?it/s]


Epoch 4/50:
Train Loss: 0.0507, Train Acc: 98.59%
Val Loss: 0.0322, Val Acc: 99.07%


Epoch 5/50 [Train]:   0%|          | 0/299 [00:00<?, ?it/s]


Epoch 5/50:
Train Loss: 0.0404, Train Acc: 98.84%
Val Loss: 0.0258, Val Acc: 99.27%


Epoch 6/50 [Train]:   0%|          | 0/299 [00:00<?, ?it/s]


Epoch 6/50:
Train Loss: 0.0354, Train Acc: 98.98%
Val Loss: 0.0256, Val Acc: 99.26%


Epoch 7/50 [Train]:   0%|          | 0/299 [00:00<?, ?it/s]


Epoch 7/50:
Train Loss: 0.0313, Train Acc: 99.08%
Val Loss: 0.0279, Val Acc: 99.15%


Epoch 8/50 [Train]:   0%|          | 0/299 [00:00<?, ?it/s]


Epoch 8/50:
Train Loss: 0.0288, Train Acc: 99.15%
Val Loss: 0.0324, Val Acc: 99.00%


Epoch 9/50 [Train]:   0%|          | 0/299 [00:00<?, ?it/s]


Epoch 9/50:
Train Loss: 0.0271, Train Acc: 99.19%
Val Loss: 0.0210, Val Acc: 99.38%


Epoch 10/50 [Train]:   0%|          | 0/299 [00:00<?, ?it/s]


Epoch 10/50:
Train Loss: 0.0250, Train Acc: 99.26%
Val Loss: 0.0204, Val Acc: 99.38%


Epoch 11/50 [Train]:   0%|          | 0/299 [00:00<?, ?it/s]


Epoch 11/50:
Train Loss: 0.0238, Train Acc: 99.28%
Val Loss: 0.0198, Val Acc: 99.42%


Epoch 12/50 [Train]:   0%|          | 0/299 [00:00<?, ?it/s]


Epoch 12/50:
Train Loss: 0.0223, Train Acc: 99.32%
Val Loss: 0.0235, Val Acc: 99.27%


Epoch 13/50 [Train]:   0%|          | 0/299 [00:00<?, ?it/s]


Epoch 13/50:
Train Loss: 0.0218, Train Acc: 99.34%
Val Loss: 0.0194, Val Acc: 99.42%


Epoch 14/50 [Train]:   0%|          | 0/299 [00:00<?, ?it/s]


Epoch 14/50:
Train Loss: 0.0204, Train Acc: 99.38%
Val Loss: 0.0213, Val Acc: 99.35%


Epoch 15/50 [Train]:   0%|          | 0/299 [00:00<?, ?it/s]


Epoch 15/50:
Train Loss: 0.0196, Train Acc: 99.40%
Val Loss: 0.0261, Val Acc: 99.18%


Epoch 16/50 [Train]:   0%|          | 0/299 [00:00<?, ?it/s]


Epoch 16/50:
Train Loss: 0.0199, Train Acc: 99.38%
Val Loss: 0.0234, Val Acc: 99.26%


Epoch 17/50 [Train]:   0%|          | 0/299 [00:00<?, ?it/s]


Epoch 17/50:
Train Loss: 0.0187, Train Acc: 99.42%
Val Loss: 0.0204, Val Acc: 99.36%


Epoch 18/50 [Train]:   0%|          | 0/299 [00:00<?, ?it/s]


Epoch 18/50:
Train Loss: 0.0123, Train Acc: 99.61%
Val Loss: 0.0137, Val Acc: 99.59%


Epoch 19/50 [Train]:   0%|          | 0/299 [00:00<?, ?it/s]


Epoch 19/50:
Train Loss: 0.0108, Train Acc: 99.66%
Val Loss: 0.0144, Val Acc: 99.56%


Epoch 20/50 [Train]:   0%|          | 0/299 [00:00<?, ?it/s]


Epoch 20/50:
Train Loss: 0.0105, Train Acc: 99.66%
Val Loss: 0.0159, Val Acc: 99.51%


Epoch 21/50 [Train]:   0%|          | 0/299 [00:00<?, ?it/s]


Epoch 21/50:
Train Loss: 0.0102, Train Acc: 99.67%
Val Loss: 0.0234, Val Acc: 99.25%


Epoch 22/50 [Train]:   0%|          | 0/299 [00:00<?, ?it/s]


Epoch 22/50:
Train Loss: 0.0100, Train Acc: 99.68%
Val Loss: 0.0143, Val Acc: 99.57%


Epoch 23/50 [Train]:   0%|          | 0/299 [00:00<?, ?it/s]


Epoch 23/50:
Train Loss: 0.0075, Train Acc: 99.75%
Val Loss: 0.0136, Val Acc: 99.60%


Epoch 24/50 [Train]:   0%|          | 0/299 [00:00<?, ?it/s]


Epoch 24/50:
Train Loss: 0.0066, Train Acc: 99.78%
Val Loss: 0.0134, Val Acc: 99.61%


Epoch 25/50 [Train]:   0%|          | 0/299 [00:00<?, ?it/s]


Epoch 25/50:
Train Loss: 0.0063, Train Acc: 99.79%
Val Loss: 0.0137, Val Acc: 99.61%


Epoch 26/50 [Train]:   0%|          | 0/299 [00:00<?, ?it/s]


Epoch 26/50:
Train Loss: 0.0063, Train Acc: 99.79%
Val Loss: 0.0145, Val Acc: 99.59%


Epoch 27/50 [Train]:   0%|          | 0/299 [00:00<?, ?it/s]


Epoch 27/50:
Train Loss: 0.0063, Train Acc: 99.79%
Val Loss: 0.0168, Val Acc: 99.51%


Epoch 28/50 [Train]:   0%|          | 0/299 [00:00<?, ?it/s]


Epoch 28/50:
Train Loss: 0.0058, Train Acc: 99.80%
Val Loss: 0.0140, Val Acc: 99.61%


Epoch 29/50 [Train]:   0%|          | 0/299 [00:00<?, ?it/s]


Epoch 29/50:
Train Loss: 0.0048, Train Acc: 99.84%
Val Loss: 0.0153, Val Acc: 99.57%
Early stopping!


  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()



=== Final Results ===

STACKING Results:
Training - Loss: 0.0039, Accuracy: 99.86%
Validation - Loss: 0.0153, Accuracy: 99.57%
Test - Loss: 0.0375, Accuracy: 99.75%

Saving results to JSON...

=== Training and Evaluation Completed ===


# 코드 진행상 문제 없어 다음 셀에서 해결 후 진행

In [None]:
def load_model(model_name):
    model_path = f'{BASE_DIR}/{model_name}_complete.pth'
    checkpoint = torch.load(model_path)

    if checkpoint['model_class'] == 'PoseTransformer':
        model = PoseTransformer(
            input_dim=checkpoint['input_dim'],
            num_classes=checkpoint['num_classes']
        )
    elif checkpoint['model_class'] == 'PoseMLP':
        model = PoseMLP(
            input_dim=checkpoint['input_dim'],
            num_classes=checkpoint['num_classes']
        )
    elif checkpoint['model_class'] == 'PoseGRU':
        model = PoseGRU(
            input_dim=checkpoint['input_dim'],
            num_classes=checkpoint['num_classes']
        )
    elif checkpoint['model_class'] == 'PoseCNN':
        model = PoseCNN(
            input_dim=checkpoint['input_dim'],
            num_classes=checkpoint['num_classes']
        )
    elif checkpoint['model_class'] == 'EnhancedPoseViT':  # 수정된 부분
        model = EnhancedPoseViT(
            input_dim=checkpoint['input_dim'],
            num_classes=checkpoint['num_classes']
        )
    else:  # StackingModel
        model = StackingModel(  # 수정된 부분
            num_classes=checkpoint['num_classes'],
            num_base_models=5
        )

    model.load_state_dict(checkpoint['model_state_dict'])
    model = model.to(device)
    model.eval()

    return model, checkpoint['class_mapping']

def save_models():
    print("\n=== Saving Models ===")
    # 기본 모델들 저장
    for model_name, model in models.items():
        save_path = f'{BASE_DIR}/{model_name}_complete.pth'
        torch.save({
            'model_state_dict': model.state_dict(),
            'model_class': model.__class__.__name__,  # EnhancedPoseViT가 올바르게 저장됨
            'input_dim': 34,
            'num_classes': 20,
            'class_mapping': class_mapping
        }, save_path)
        print(f"Saved {model_name} to {save_path}")

    # 스태킹 모델 저장
    save_path = f'{BASE_DIR}/stacking_complete.pth'
    torch.save({
        'model_state_dict': stacking_model.state_dict(),
        'model_class': 'AdvancedStackingModel',  # 수정된 부분
        'input_dim': 34,
        'num_classes': 20,
        'class_mapping': class_mapping
    }, save_path)
    print(f"Saved stacking model to {save_path}")

print(stacking_model.state_dict().keys())  # 저장된 키 출력
# 모델 저장 실행
save_models()

# 모델 로드 테스트
print("\n=== Testing Model Loading ===")
for model_name in list(models.keys()) + ['stacking']:
    try:
        loaded_model, class_map = load_model(model_name)
        print(f"Successfully loaded {model_name}")
    except Exception as e:
        print(f"Error loading {model_name}: {str(e)}")

odict_keys(['model_weights', 'transformer.positional_embedding', 'transformer.input_projection.weight', 'transformer.input_projection.bias', 'transformer.transformer.layers.0.self_attn.in_proj_weight', 'transformer.transformer.layers.0.self_attn.in_proj_bias', 'transformer.transformer.layers.0.self_attn.out_proj.weight', 'transformer.transformer.layers.0.self_attn.out_proj.bias', 'transformer.transformer.layers.0.linear1.weight', 'transformer.transformer.layers.0.linear1.bias', 'transformer.transformer.layers.0.linear2.weight', 'transformer.transformer.layers.0.linear2.bias', 'transformer.transformer.layers.0.norm1.weight', 'transformer.transformer.layers.0.norm1.bias', 'transformer.transformer.layers.0.norm2.weight', 'transformer.transformer.layers.0.norm2.bias', 'transformer.transformer.layers.1.self_attn.in_proj_weight', 'transformer.transformer.layers.1.self_attn.in_proj_bias', 'transformer.transformer.layers.1.self_attn.out_proj.weight', 'transformer.transformer.layers.1.self_attn

  checkpoint = torch.load(model_path)


Successfully loaded stacking


In [9]:
!pip install mediapipe

Collecting mediapipe
  Downloading mediapipe-0.10.18-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.7 kB)
Collecting sounddevice>=0.4.4 (from mediapipe)
  Downloading sounddevice-0.5.1-py3-none-any.whl.metadata (1.4 kB)
Downloading mediapipe-0.10.18-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (36.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m36.1/36.1 MB[0m [31m60.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading sounddevice-0.5.1-py3-none-any.whl (32 kB)
Installing collected packages: sounddevice, mediapipe
Successfully installed mediapipe-0.10.18 sounddevice-0.5.1


In [10]:

import torch.nn as nn
import torch.nn.functional as F
import cv2
import mediapipe as mp
import os
import numpy as np
import torch
import pandas as pd
# Explicitly import and initialize torch.device
from torch import device as torch_device
import json

class_mapping = {
    0: "A포즈",
    1: "I포즈",
    2: "T포즈",
    3: "계단 오르기",
    4: "공을 던지려고 힘을 주는 자세",
    5: "기지개",
    6: "달리기(전력질주)",
    7: "뒷짐",
    8: "막대를 양손으로 잡고 골반 뒤쪽으로 쭉 뻗은 자세",
    9: "머리 뒤 깍지를 낀 자세",
    10: "몸을 앞으로 숙인 자세",
    11: "발레",
    12: "벽에 기대어 신발 신기",
    13: "의자에 앉은 자세",
    14: "조깅",
    15: "통화하는 자세",
    16: "팔짱",
    17: "한 손과 반대편 발을 들며 신난 자세",
    18: "한 다리 올리고 편하게 앉은 자세",
    19: "허리 회전을 최대로 한 자세"
}


class PoseTransformer(nn.Module):
    def __init__(self, input_dim=34, num_classes=20, num_heads=8, dim_feedforward=512, num_layers=4):
        super().__init__()
        self.input_projection = nn.Linear(2, 128)
        self.positional_embedding = nn.Parameter(torch.randn(17, 128))

        encoder_layer = nn.TransformerEncoderLayer(
            d_model=128,
            nhead=num_heads,
            dim_feedforward=dim_feedforward,
            dropout=0.1,
            batch_first=True
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)

        self.classifier = nn.Sequential(
            nn.Linear(128 * 17, 512),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        x = x.view(-1, 17, 2)
        x = self.input_projection(x)
        x = x + self.positional_embedding
        x = self.transformer(x)
        x = x.reshape(x.size(0), -1)
        x = self.classifier(x)
        return x

class PoseMLP(nn.Module):
    def __init__(self, input_dim=34, num_classes=20):
        super().__init__()
        self.block1 = nn.Sequential(
            nn.Linear(input_dim, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(0.3)
        )

        self.res_blocks = nn.ModuleList([
            self._make_res_block(512, 512) for _ in range(4)
        ])

        self.classifier = nn.Sequential(
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(256, num_classes)
        )

    def _make_res_block(self, in_dim, out_dim):
        return nn.Sequential(
            nn.Linear(in_dim, out_dim),
            nn.BatchNorm1d(out_dim),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(out_dim, out_dim),
            nn.BatchNorm1d(out_dim)
        )

    def forward(self, x):
        x = self.block1(x)
        for res_block in self.res_blocks:
            identity = x
            x = res_block(x)
            x = F.relu(x + identity)
        x = self.classifier(x)
        return x

class PoseGRU(nn.Module):
    def __init__(self, input_dim=34, num_classes=20, hidden_dim=256):
        super().__init__()
        self.spatial_embedding = nn.Linear(2, hidden_dim)

        self.gru = nn.GRU(
            input_size=hidden_dim,
            hidden_size=hidden_dim,
            num_layers=3,
            batch_first=True,
            dropout=0.2,
            bidirectional=True
        )

        self.classifier = nn.Sequential(
            nn.Linear(hidden_dim * 2, hidden_dim),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(hidden_dim, num_classes)
        )

    def forward(self, x):
        x = x.view(-1, 17, 2)
        x = self.spatial_embedding(x)
        x, _ = self.gru(x)
        x = x[:, -1, :]
        x = self.classifier(x)
        return x

def load_models(model_paths):
    # Use the initialized torch.device object
    device = torch_device('cuda' if torch.cuda.is_available() else 'cpu')
    models = {}
    for model_name, path in model_paths.items():
        if model_name == 'transformer':
            model = PoseTransformer()
        elif model_name == 'mlp':
            model = PoseMLP()
        else:  # gru
            model = PoseGRU()

        # 체크포인트에서 모델 state_dict 추출
        checkpoint = torch.load(path)
        model.load_state_dict(checkpoint['model_state_dict'])
        model = model.to(device) # Now uses the correctly initialized device
        model.eval()
        models[model_name] = model

    return models

class PosePredictor:
    def __init__(self, model_paths, base_dir):
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.base_dir = base_dir

        # MediaPipe 초기화
        self.mp_pose = mp.solutions.pose
        self.pose = self.mp_pose.Pose(
            static_image_mode=True,
            model_complexity=2,
            min_detection_confidence=0.5
        )

        # 모델 로드
        self.base_models = self.load_base_models(model_paths)
        self.stacking_model = self.load_stacking_model(model_paths['stacking'])

        # MediaPipe와 우리 모델의 키포인트 매핑
        self.keypoint_names = {
            self.mp_pose.PoseLandmark.NOSE: 'nose',
            self.mp_pose.PoseLandmark.RIGHT_EYE: 'right_eye',
            self.mp_pose.PoseLandmark.LEFT_EYE: 'left_eye',
            self.mp_pose.PoseLandmark.RIGHT_EAR: 'right_ear',
            self.mp_pose.PoseLandmark.LEFT_EAR: 'left_ear',
            self.mp_pose.PoseLandmark.RIGHT_SHOULDER: 'right_shoulder',
            self.mp_pose.PoseLandmark.LEFT_SHOULDER: 'left_shoulder',
            self.mp_pose.PoseLandmark.RIGHT_ELBOW: 'right_elbow',
            self.mp_pose.PoseLandmark.LEFT_ELBOW: 'left_elbow',
            self.mp_pose.PoseLandmark.RIGHT_WRIST: 'right_wrist',
            self.mp_pose.PoseLandmark.LEFT_WRIST: 'left_wrist',
            self.mp_pose.PoseLandmark.RIGHT_HIP: 'right_hip',
            self.mp_pose.PoseLandmark.LEFT_HIP: 'left_hip',
            self.mp_pose.PoseLandmark.RIGHT_KNEE: 'right_knee',
            self.mp_pose.PoseLandmark.LEFT_KNEE: 'left_knee',
            self.mp_pose.PoseLandmark.RIGHT_ANKLE: 'right_ankle',
            self.mp_pose.PoseLandmark.LEFT_ANKLE: 'left_ankle'
        }

        self.column_mapping = {
            'nose': ['Nose_x', 'Nose_y'],
            'right_eye': ['Right_Eye_x', 'Right_Eye_y'],
            'left_eye': ['Left_Eye_x', 'Left_Eye_y'],
            'right_ear': ['Right_Ear_x', 'Right_Ear_y'],
            'left_ear': ['Left_Ear_x', 'Left_Ear_y'],
            'right_shoulder': ['Right_Shoulder_x', 'Right_Shoulder_y'],
            'left_shoulder': ['Left_Shoulder_x', 'Left_Shoulder_y'],
            'right_elbow': ['Right_Elbow_x', 'Right_Elbow_y'],
            'left_elbow': ['Left_Elbow_x', 'Left_Elbow_y'],
            'right_wrist': ['Right_Wrist_x', 'Right_Wrist_y'],
            'left_wrist': ['Left_Wrist_x', 'Left_Wrist_y'],
            'right_hip': ['Right_Hip_x', 'Right_Hip_y'],
            'left_hip': ['Left_Hip_x', 'Left_Hip_y'],
            'right_knee': ['Right_Knee_x', 'Right_Knee_y'],
            'left_knee': ['Left_Knee_x', 'Left_Knee_y'],
            'right_ankle': ['Right_Ankle_x', 'Right_Ankle_y'],
            'left_ankle': ['Left_Ankle_x', 'Left_Ankle_y']
        }

    def load_base_models(self, model_paths):
        models = {}
        for model_name, path in model_paths.items():
            if model_name == 'stacking':
                continue

            if model_name == 'transformer':
                model = PoseTransformer()
            elif model_name == 'mlp':
                model = PoseMLP()
            elif model_name == 'gru':
                model = PoseGRU()
            elif model_name == 'cnn':
                model = PoseCNN()
            else:  # attention
                model = EnhancedPoseViT()

            checkpoint = torch.load(path)
            model.load_state_dict(checkpoint['model_state_dict'])
            model = model.to(self.device)
            model.eval()
            models[model_name] = model
        return models

    def load_stacking_model(self, path):
        model = StackingModel()
        checkpoint = torch.load(path)
        model.load_state_dict(checkpoint['model_state_dict'])
        model = model.to(self.device)
        model.eval()
        return model


    def extract_keypoints(self, image_path):
        # 이미지 로드 및 MediaPipe 처리
        image = cv2.imread(image_path)
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        results = self.pose.process(image_rgb)

        if not results.pose_landmarks:
            return None

        # 키포인트 추출
        keypoints = {}
        landmarks = results.pose_landmarks.landmark
        for landmark_enum, name in self.keypoint_names.items():
            landmark = landmarks[landmark_enum.value]
            keypoints[name] = [landmark.x, landmark.y]

        return keypoints

    def normalize_keypoints(self, keypoints):
        # DataFrame 생성
        kp_flat = []
        for name, cols in self.column_mapping.items():
            if name in keypoints:
                kp_flat.extend(keypoints[name])
            else:
                kp_flat.extend([0, 0])

        df = pd.DataFrame([kp_flat], columns=[c for pair in self.column_mapping.values() for c in pair])

        # 정규화
        hip_center_x = (df['Right_Hip_x'] + df['Left_Hip_x']) / 2
        hip_center_y = (df['Right_Hip_y'] + df['Left_Hip_y']) / 2

        for col in df.columns:
            if col.endswith('_x'):
                df[col] = df[col] - hip_center_x.iloc[0]
            elif col.endswith('_y'):
                df[col] = df[col] - hip_center_y.iloc[0]

        shoulder_width = np.sqrt(
            (df['Right_Shoulder_x'] - df['Left_Shoulder_x'])**2 +
            (df['Right_Shoulder_y'] - df['Left_Shoulder_y'])**2
        )

        for col in df.columns:
            if col.endswith('_x') or col.endswith('_y'):
                df[col] = df[col] / shoulder_width.iloc[0]

        return df

    def predict(self, image_path):
        # 키포인트 추출
        keypoints = self.extract_keypoints(image_path)
        if keypoints is None:
            return "No pose detected"

        # 키포인트 정규화
        normalized_kps = self.normalize_keypoints(keypoints)
        inputs = torch.FloatTensor(normalized_kps.values).to(self.device)

        # 각 기본 모델의 예측
        base_predictions = []
        with torch.no_grad():
            for model in self.base_models.values():
                outputs = model(inputs)
                probs = F.softmax(outputs, dim=1)
                base_predictions.append(probs)

            # 스태킹 모델을 통한 최종 예측
            stacked_output = self.stacking_model(inputs)
            final_probs = F.softmax(stacked_output, dim=1)
            predicted_class = final_probs.argmax(dim=1).item()

        return class_mapping[predicted_class]
def main():
    BASE_DIR = r"/content/drive/Shareddrives/Vision/"
    # 모델 경로
    model_paths = {
        'transformer': f'{BASE_DIR}/best_PoseTransformer.pth',
        'mlp': f'{BASE_DIR}/best_PoseMLP.pth',
        'gru': f'{BASE_DIR}/best_PoseGRU.pth',
        'cnn': f'{BASE_DIR}/best_PoseCNN.pth',
        'vit': f'{BASE_DIR}/best_EnhancedPoseViT.pth',
        'stacking': f'{BASE_DIR}/best_StackingModel.pth'
    }

    # 예측기 초기화
    predictor = PosePredictor(model_paths, BASE_DIR)

    # 이미지 디렉토리 경로
    image_dir = r"/content/drive/Shareddrives/Vision/새 폴더"

    # 모든 서브디렉토리 순회
    for root, dirs, files in os.walk(image_dir):
        for filename in files:
            if filename.lower().endswith(('jpg', 'jpeg', 'png')):
                img_path = os.path.join(root, filename)
                prediction = predictor.predict(img_path)

                # 디렉토리 이름을 기준으로 결과 저장
                dir_name = os.path.basename(root)
                print(f"{dir_name}/{filename}: {prediction}")

                # 결과 저장 경로
                result_path = os.path.join(BASE_DIR, f'{dir_name}_predictions.json')
                with open(result_path, 'a') as f:
                    json.dump({filename: prediction}, f, indent=4)
                    f.write('\n')

if __name__ == "__main__":
    main()

Downloading model to /usr/local/lib/python3.10/dist-packages/mediapipe/modules/pose_landmark/pose_landmark_heavy.tflite


  checkpoint = torch.load(path)
  checkpoint = torch.load(path)


b_A포즈/b-1.jpg: A포즈
b_A포즈/b (3).jpg: 발레
b_A포즈/b.jpg: A포즈
b_A포즈/b (2).jpg: A포즈
a_T포즈/a-12.jpg: T포즈
a_T포즈/a.jpg: T포즈
a_T포즈/a-1.jpg: T포즈
a_T포즈/a-2.jpg: T포즈
e_기지개/e-1.jpg: 기지개
e_기지개/e (2).jpg: 기지개
e_기지개/e (3).jpg: 기지개
e_기지개/e.jpg: 기지개
g_뒷짐/g-1.jpg: 뒷짐
g_뒷짐/g (2).jpg: 뒷짐
g_뒷짐/g (3).jpg: 뒷짐
g_뒷짐/g.jpg: 뒷짐
i_달리기(전력질주)/i (2).jpg: 달리기(전력질주)
i_달리기(전력질주)/i (3).jpg: 달리기(전력질주)
i_달리기(전력질주)/i.jpg: 달리기(전력질주)
h_조깅/h-1.jpg: 조깅
h_조깅/h (2).jpg: 달리기(전력질주)
h_조깅/h.jpg: 조깅
h_조깅/h (3).jpg: 조깅
c_I포즈/c-1.jpg: I포즈
c_I포즈/c (3).jpg: I포즈
c_I포즈/c (2).jpg: 계단 오르기
c_I포즈/c.jpg: I포즈
d_팔짱/d-1.jpg: 팔짱
d_팔짱/d (3).jpg: 팔짱
d_팔짱/d (2).jpg: 팔짱
d_팔짱/d.jpg: 팔짱
f_벽에 기대어 신발 신기/f-1.jpg: 벽에 기대어 신발 신기
f_벽에 기대어 신발 신기/f (2).jpg: 벽에 기대어 신발 신기
f_벽에 기대어 신발 신기/f.jpg: 벽에 기대어 신발 신기
f_벽에 기대어 신발 신기/f (3).jpg: 달리기(전력질주)
j_계단 오르기/j (3).jpg: 계단 오르기
j_계단 오르기/j (2).jpg: I포즈
j_