In [None]:
import numpy as np
import os
# success case 

import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
import numpy as np
from sklearn.decomposition import PCA
import os
import pickle
from tqdm import tqdm
from collections import defaultdict
from dataclasses import dataclass
from typing import Dict, List, Tuple, Optional

@dataclass
class ImageOnlyConfig:
    """Configuration for image-only processing"""
    
    # ===== PATHS =====
    IMAGE_FOLDER: str = "success_traj_img"
    
    OUTPUT_PATH: str = "image_features.npz"
    PCA_MODEL_PATH: str = "image_pca_models.pkl"
    
    # ===== IMAGE PROCESSING =====
    RESNET_FEATURE_DIM: int = 512  # ResNet18 final layer per view
    VIEWS: List[str] = None
    
    # ===== PCA COMPRESSION =====
    COMPRESSED_DIM: int = 64  # Final compressed dimension per view
    TOTAL_COMPRESSED_DIM: int = 192  # 64 * 3 views
    
    # ===== MODEL =====
    DEVICE: str = "cuda" if torch.cuda.is_available() else "cpu"
    BATCH_SIZE: int = 32
    
    def __post_init__(self):
        if self.VIEWS is None:
            self.VIEWS = ["front", "top", "wrist"]
        
        print(f"Image-Only Processor Config")
        print(f"Views: {self.VIEWS}")
        print(f"ResNet Features: {self.RESNET_FEATURE_DIM} per view")
        print(f"Compressed Features: {self.COMPRESSED_DIM} per view")
        print(f"Total Compressed: {self.TOTAL_COMPRESSED_DIM}")
        print(f"Device: {self.DEVICE}")

class EpisodeProcessor:
    def __init__(self, config: ImageOnlyConfig):
        self.config = config
        self.device = torch.device(config.DEVICE)

        # ResNet18 feature extractor
        self.model = models.resnet18(pretrained=True)
        self.model = nn.Sequential(*list(self.model.children())[:-1])
        self.model = self.model.to(self.device)
        self.model.eval()

        self.transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                                 std=[0.229, 0.224, 0.225])
        ])

        self.pca_models = {}
        print(f"[INFO] ResNet18 feature extractor initialized on {self.device}")

    def extract_features(self, image_path: str) -> np.ndarray:
        try:
            image = Image.open(image_path).convert('RGB')
            image_tensor = self.transform(image).unsqueeze(0).to(self.device)
            with torch.no_grad():
                features = self.model(image_tensor).view(1, -1)
            return features.cpu().numpy().flatten()
        except Exception as e:
            print(f"[WARN] Failed to process {image_path}: {e}")
            return np.zeros(self.config.RESNET_FEATURE_DIM)

    def process_episode(self, episode_dir: str) -> Dict[str, np.ndarray]:
        """Process a single episode directory"""
        print(f"[INFO] Processing episode: {episode_dir}")
        
        # Load robot state
        state_path = os.path.join(episode_dir, "robot_state.npz")
        if not os.path.exists(state_path):
            raise FileNotFoundError(f"No robot_state.npz found in {episode_dir}")
        state_data = np.load(state_path)
        state_key = list(state_data.keys())[0]  
        robot_states = state_data[state_key]
        print(f"[INFO] Robot state shape: {robot_states.shape}")

        # Build timestep list
        front_dir = os.path.join(episode_dir, "front_view")
        timesteps = sorted([
            int(f.split('_')[-1].replace('.png', ''))
            for f in os.listdir(front_dir) if f.endswith('.png')
        ])

        features = []
        for i, ts in enumerate(timesteps):
            view_feats = []
            for view in self.config.VIEWS:
                img_path = os.path.join(episode_dir, f"{view}_view", f"{view}_view_{ts}.png")
                feat = self.extract_features(img_path)
                view_feats.append(feat)

            combined_img_feat = np.concatenate(view_feats)  # [1536]
            features.append(np.concatenate([combined_img_feat, robot_states[i]]))

        return {"observation": np.vstack(features)}

    def fit_pca(self, all_episode_features: List[np.ndarray]):
        """Fit PCA per view across all episodes"""
        print("[INFO] Fitting PCA models...")

        total_img_dim = len(self.config.VIEWS) * self.config.RESNET_FEATURE_DIM
        sample_feat = all_episode_features[0]
        state_dim = sample_feat.shape[1] - total_img_dim
        print(f"[INFO] Detected state_dim = {state_dim}")

        view_features = {view: [] for view in self.config.VIEWS}

        for episode_feat in all_episode_features:
            img_feats = episode_feat[:, :-state_dim]
            for i, view in enumerate(self.config.VIEWS):
                start, end = i * self.config.RESNET_FEATURE_DIM, (i + 1) * self.config.RESNET_FEATURE_DIM
                view_features[view].append(img_feats[:, start:end])

        for view in self.config.VIEWS:
            X = np.vstack(view_features[view])  # (N*T, 512)
            pca = PCA(n_components=self.config.COMPRESSED_DIM)
            pca.fit(X)
            self.pca_models[view] = pca
            print(f"[INFO] {view} view PCA variance explained: {pca.explained_variance_ratio_.sum():.3f}")

    def compress_episode(self, episode_dict: Dict[str, np.ndarray]) -> Dict[str, np.ndarray]:
        """Apply PCA compression to images, concat state as-is"""
        obs = episode_dict["observation"]
        total_img_dim = len(self.config.VIEWS) * self.config.RESNET_FEATURE_DIM
        state_dim = obs.shape[1] - total_img_dim

        img_feats, state_feats = obs[:, :-state_dim], obs[:, -state_dim:]

        compressed_features = []
        for row in img_feats:
            comp_views = []
            for i, view in enumerate(self.config.VIEWS):
                start, end = i*self.config.RESNET_FEATURE_DIM, (i+1)*self.config.RESNET_FEATURE_DIM
                view_feat = row[start:end].reshape(1, -1)
                comp_views.append(self.pca_models[view].transform(view_feat).flatten())
            compressed_features.append(np.concatenate(comp_views))

        compressed_features = np.vstack(compressed_features)
        final_obs = np.hstack([compressed_features, state_feats])  # PCA된 이미지 + 원본 state
        return {"observation": final_obs}

import os
import numpy as np
from tqdm import tqdm

if __name__ == "__main__":
    config = ImageOnlyConfig(
        IMAGE_FOLDER="/AILAB-summer-school-2025/success_data_raw", # image path 
        OUTPUT_PATH="success_data_preprocessing",
        PCA_MODEL_PATH="pca_models.pkl",
        COMPRESSED_DIM=64
    )
    processor = EpisodeProcessor(config)

    # Step 1. 전체 episode 폴더 탐색
    episode_dirs = [
        os.path.join(config.IMAGE_FOLDER, d)
        for d in os.listdir(config.IMAGE_FOLDER)
        if os.path.isdir(os.path.join(config.IMAGE_FOLDER, d))
        and ("success_" in d or "fail_" in d)
    ]
    print(f"[INFO] Found {len(episode_dirs)} episodes.")

    # Step 2. 각 episode feature 추출
    all_episode_features = []
    raw_episode_dicts = {}
    for epi_dir in tqdm(episode_dirs, desc="Processing episodes"):
        try:
            epi_name = os.path.basename(epi_dir)
            episode_dict = processor.process_episode(epi_dir)  # raw features + state
            raw_episode_dicts[epi_name] = episode_dict
            all_episode_features.append(episode_dict["observation"])
        except Exception as e:
            print(f"[WARN] Skipping {epi_dir}: {e}")

    # Step 3. PCA 학습 (view별)
    processor.fit_pca(all_episode_features)

    # Step 4. PCA 압축 적용 및 저장
    output_dir = "model_path"
    os.makedirs(output_dir, exist_ok=True)

    for epi_name, epi_dict in raw_episode_dicts.items():
        compressed_dict = processor.compress_episode(epi_dict)

        save_path = os.path.join(output_dir, f"{epi_name}.npz")
        np.savez_compressed(save_path, **compressed_dict)
        
        print(f"[INFO] Saved compressed episode: {save_path}")



Image-Only Processor Config
Views: ['front', 'top', 'wrist']
ResNet Features: 512 per view
Compressed Features: 64 per view
Total Compressed: 192
Device: cuda
[INFO] ResNet18 feature extractor initialized on cuda
[INFO] Found 506 episodes.


Processing episodes:   0%|          | 0/506 [00:00<?, ?it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/success_data_raw/success_episode230_steps306
[INFO] Robot state shape: (62, 7)


Processing episodes:   0%|          | 1/506 [00:00<04:22,  1.92it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/success_data_raw/success_episode365_steps308
[INFO] Robot state shape: (62, 7)


Processing episodes:   0%|          | 2/506 [00:01<05:08,  1.63it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/success_data_raw/success_episode149_steps307
[INFO] Robot state shape: (62, 7)


Processing episodes:   1%|          | 3/506 [00:01<05:29,  1.53it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/success_data_raw/success_episode481_steps323
[INFO] Robot state shape: (65, 7)


Processing episodes:   1%|          | 4/506 [00:02<05:45,  1.45it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/success_data_raw/success_episode395_steps313
[INFO] Robot state shape: (63, 7)


Processing episodes:   1%|          | 5/506 [00:03<05:53,  1.42it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/success_data_raw/success_episode38_steps359
[INFO] Robot state shape: (72, 7)


Processing episodes:   1%|          | 6/506 [00:04<06:15,  1.33it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/success_data_raw/success_episode305_steps315
[INFO] Robot state shape: (63, 7)


Processing episodes:   1%|▏         | 7/506 [00:04<06:13,  1.34it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/success_data_raw/success_episode4_steps314
[INFO] Robot state shape: (63, 7)


Processing episodes:   2%|▏         | 8/506 [00:05<06:11,  1.34it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/success_data_raw/success_episode267_steps347
[INFO] Robot state shape: (70, 7)


Processing episodes:   2%|▏         | 9/506 [00:06<05:43,  1.45it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/success_data_raw/success_episode118_steps332
[INFO] Robot state shape: (67, 7)


Processing episodes:   2%|▏         | 10/506 [00:06<05:43,  1.44it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/success_data_raw/success_episode157_steps308
[INFO] Robot state shape: (62, 7)


Processing episodes:   2%|▏         | 11/506 [00:07<05:43,  1.44it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/success_data_raw/success_episode218_steps330
[INFO] Robot state shape: (66, 7)


Processing episodes:   2%|▏         | 12/506 [00:08<05:55,  1.39it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/success_data_raw/success_episode119_steps313
[INFO] Robot state shape: (63, 7)


Processing episodes:   3%|▎         | 13/506 [00:09<05:56,  1.38it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/success_data_raw/success_episode287_steps333
[INFO] Robot state shape: (67, 7)


Processing episodes:   3%|▎         | 14/506 [00:09<06:07,  1.34it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/success_data_raw/success_episode113_steps324
[INFO] Robot state shape: (65, 7)


Processing episodes:   3%|▎         | 15/506 [00:10<05:53,  1.39it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/success_data_raw/success_episode13_steps360
[INFO] Robot state shape: (72, 7)


Processing episodes:   3%|▎         | 16/506 [00:11<06:16,  1.30it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/success_data_raw/success_episode443_steps309
[INFO] Robot state shape: (62, 7)


Processing episodes:   3%|▎         | 17/506 [00:12<06:11,  1.32it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/success_data_raw/success_episode141_steps303
[INFO] Robot state shape: (61, 7)


Processing episodes:   4%|▎         | 18/506 [00:12<06:05,  1.34it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/success_data_raw/success_episode333_steps305
[INFO] Robot state shape: (61, 7)


Processing episodes:   4%|▍         | 19/506 [00:13<06:02,  1.34it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/success_data_raw/success_episode402_steps307
[INFO] Robot state shape: (62, 7)


Processing episodes:   4%|▍         | 20/506 [00:14<06:01,  1.35it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/success_data_raw/success_episode145_steps325
[INFO] Robot state shape: (65, 7)


Processing episodes:   4%|▍         | 21/506 [00:15<06:06,  1.32it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/success_data_raw/success_episode156_steps327
[INFO] Robot state shape: (66, 7)


Processing episodes:   4%|▍         | 22/506 [00:15<05:45,  1.40it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/success_data_raw/success_episode339_steps306
[INFO] Robot state shape: (62, 7)


Processing episodes:   5%|▍         | 23/506 [00:16<05:43,  1.40it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/success_data_raw/success_episode136_steps336
[INFO] Robot state shape: (68, 7)


Processing episodes:   5%|▍         | 24/506 [00:17<05:31,  1.45it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/success_data_raw/success_episode447_steps325
[INFO] Robot state shape: (65, 7)


Processing episodes:   5%|▍         | 25/506 [00:17<05:46,  1.39it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/success_data_raw/success_episode196_steps327
[INFO] Robot state shape: (66, 7)


Processing episodes:   5%|▌         | 26/506 [00:18<05:51,  1.37it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/success_data_raw/success_episode15_steps313
[INFO] Robot state shape: (63, 7)


Processing episodes:   5%|▌         | 27/506 [00:19<05:51,  1.36it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/success_data_raw/success_episode273_steps328
[INFO] Robot state shape: (66, 7)


Processing episodes:   6%|▌         | 28/506 [00:20<05:57,  1.34it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/success_data_raw/success_episode293_steps321
[INFO] Robot state shape: (65, 7)


Processing episodes:   6%|▌         | 29/506 [00:21<05:57,  1.33it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/success_data_raw/success_episode207_steps313
[INFO] Robot state shape: (63, 7)


Processing episodes:   6%|▌         | 30/506 [00:21<05:52,  1.35it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/success_data_raw/success_episode292_steps329
[INFO] Robot state shape: (66, 7)


Processing episodes:   6%|▌         | 31/506 [00:22<05:54,  1.34it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/success_data_raw/success_episode338_steps317
[INFO] Robot state shape: (64, 7)


Processing episodes:   6%|▌         | 31/506 [00:22<05:48,  1.36it/s]


KeyboardInterrupt: 

In [10]:
import numpy as np
import os
# success case 

import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
import numpy as np
from sklearn.decomposition import PCA
import os
import pickle
from tqdm import tqdm
from collections import defaultdict
from dataclasses import dataclass
from typing import Dict, List, Tuple, Optional

@dataclass
class ImageOnlyConfig:
    """Configuration for image-only processing"""
    
    # ===== PATHS =====
    IMAGE_FOLDER: str = "success_traj_img"
    
    OUTPUT_PATH: str = "image_features.npz"
    PCA_MODEL_PATH: str = "image_pca_models.pkl"
    
    # ===== IMAGE PROCESSING =====
    RESNET_FEATURE_DIM: int = 512  # ResNet18 final layer per view
    VIEWS: List[str] = None
    
    # ===== PCA COMPRESSION =====
    COMPRESSED_DIM: int = 64  # Final compressed dimension per view
    TOTAL_COMPRESSED_DIM: int = 192  # 64 * 3 views
    
    # ===== MODEL =====
    DEVICE: str = "cuda" if torch.cuda.is_available() else "cpu"
    BATCH_SIZE: int = 32
    
    def __post_init__(self):
        if self.VIEWS is None:
            self.VIEWS = ["front", "top", "wrist"]
        
        print(f"Image-Only Processor Config")
        print(f"Views: {self.VIEWS}")
        print(f"ResNet Features: {self.RESNET_FEATURE_DIM} per view")
        print(f"Compressed Features: {self.COMPRESSED_DIM} per view")
        print(f"Total Compressed: {self.TOTAL_COMPRESSED_DIM}")
        print(f"Device: {self.DEVICE}")

class EpisodeProcessor:
    def __init__(self, config: ImageOnlyConfig):
        self.config = config
        self.device = torch.device(config.DEVICE)

        # ResNet18 feature extractor
        self.model = models.resnet18(pretrained=True)
        self.model = nn.Sequential(*list(self.model.children())[:-1])
        self.model = self.model.to(self.device)
        self.model.eval()

        self.transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                                 std=[0.229, 0.224, 0.225])
        ])

        self.pca_models = {}
        print(f"[INFO] ResNet18 feature extractor initialized on {self.device}")

    def extract_features(self, image_path: str) -> np.ndarray:
        try:
            image = Image.open(image_path).convert('RGB')
            image_tensor = self.transform(image).unsqueeze(0).to(self.device)
            with torch.no_grad():
                features = self.model(image_tensor).view(1, -1)
            return features.cpu().numpy().flatten()
        except Exception as e:
            print(f"[WARN] Failed to process {image_path}: {e}")
            return np.zeros(self.config.RESNET_FEATURE_DIM)

    def process_episode(self, episode_dir: str) -> Dict[str, np.ndarray]:
        """Process a single episode directory"""
        print(f"[INFO] Processing episode: {episode_dir}")
        
        # Load robot state
        state_path = os.path.join(episode_dir, "robot_state.npz")
        if not os.path.exists(state_path):
            raise FileNotFoundError(f"No robot_state.npz found in {episode_dir}")
        state_data = np.load(state_path)
        state_key = list(state_data.keys())[0]  
        robot_states = state_data[state_key]
        print(f"[INFO] Robot state shape: {robot_states.shape}")

        # Build timestep list
        front_dir = os.path.join(episode_dir, "front_view")
        timesteps = sorted([
            int(f.split('_')[-1].replace('.png', ''))
            for f in os.listdir(front_dir) if f.endswith('.png')
        ])

        features = []
        for i, ts in enumerate(timesteps):
            view_feats = []
            for view in self.config.VIEWS:
                img_path = os.path.join(episode_dir, f"{view}_view", f"{view}_view_{ts}.png")
                feat = self.extract_features(img_path)
                view_feats.append(feat)

            combined_img_feat = np.concatenate(view_feats)  # [1536]
            features.append(np.concatenate([combined_img_feat, robot_states[i]]))

        return {"observation": np.vstack(features)}

    def fit_pca(self, all_episode_features: List[np.ndarray]):
        """Fit PCA per view across all episodes"""
        print("[INFO] Fitting PCA models...")

        total_img_dim = len(self.config.VIEWS) * self.config.RESNET_FEATURE_DIM
        sample_feat = all_episode_features[0]
        state_dim = sample_feat.shape[1] - total_img_dim
        print(f"[INFO] Detected state_dim = {state_dim}")

        view_features = {view: [] for view in self.config.VIEWS}

        for episode_feat in all_episode_features:
            img_feats = episode_feat[:, :-state_dim]
            for i, view in enumerate(self.config.VIEWS):
                start, end = i * self.config.RESNET_FEATURE_DIM, (i + 1) * self.config.RESNET_FEATURE_DIM
                view_features[view].append(img_feats[:, start:end])

        for view in self.config.VIEWS:
            X = np.vstack(view_features[view])  # (N*T, 512)
            pca = PCA(n_components=self.config.COMPRESSED_DIM)
            pca.fit(X)
            self.pca_models[view] = pca
            print(f"[INFO] {view} view PCA variance explained: {pca.explained_variance_ratio_.sum():.3f}")

    def compress_episode(self, episode_dict: Dict[str, np.ndarray]) -> Dict[str, np.ndarray]:
        """Apply PCA compression to images, concat state as-is"""
        obs = episode_dict["observation"]
        total_img_dim = len(self.config.VIEWS) * self.config.RESNET_FEATURE_DIM
        state_dim = obs.shape[1] - total_img_dim

        img_feats, state_feats = obs[:, :-state_dim], obs[:, -state_dim:]

        compressed_features = []
        for row in img_feats:
            comp_views = []
            for i, view in enumerate(self.config.VIEWS):
                start, end = i*self.config.RESNET_FEATURE_DIM, (i+1)*self.config.RESNET_FEATURE_DIM
                view_feat = row[start:end].reshape(1, -1)
                comp_views.append(self.pca_models[view].transform(view_feat).flatten())
            compressed_features.append(np.concatenate(comp_views))

        compressed_features = np.vstack(compressed_features)
        final_obs = np.hstack([compressed_features, state_feats])  # PCA된 이미지 + 원본 state
        return {"observation": final_obs}

import os
import numpy as np
from tqdm import tqdm

def format_episode_name(epi_dir: str) -> str:
    parts = epi_dir.split(os.sep)
    base = parts[-1]       
    parent = parts[-2] if len(parts) >= 2 else ""

    if parent.startswith("fail_case"):
        failtype_num = ''.join([c for c in parent if c.isdigit()])
        new_base = base.replace(f"fail{failtype_num}_", f"failtype{failtype_num}_")
        return f"{new_base}"
    else:
        return base  

if __name__ == "__main__":
    config = ImageOnlyConfig(
        IMAGE_FOLDER="/AILAB-summer-school-2025/fail_data_raw", # image path 
        OUTPUT_PATH="final_episode_dicts.npz",
        PCA_MODEL_PATH="/AILAB-summer-school-2025/success_data_preprocessing/pca_weight/image_pca_models.pkl",
        COMPRESSED_DIM=64)

    processor = EpisodeProcessor(config)

    # Step 1. 전체 episode 폴더 탐색 (success/fail 구분)
    episode_dirs = []
    for root in os.listdir(config.IMAGE_FOLDER):
        root_path = os.path.join(config.IMAGE_FOLDER, root)
        if not os.path.isdir(root_path):
            continue

        if root.startswith("success_"):  
            episode_dirs.append(root_path)
        elif root.startswith("fail_case"):
            for sub in os.listdir(root_path):
                sub_path = os.path.join(root_path, sub)
                if os.path.isdir(sub_path) and sub.startswith("fail"):
                    episode_dirs.append(sub_path)

    print(f"[INFO] Found {len(episode_dirs)} episodes.")

    # Step 2. 각 episode feature 추출
    all_episode_features = []
    raw_episode_dicts = {}
    for epi_dir in tqdm(episode_dirs, desc="Processing episodes"):
        try:
            epi_name = format_episode_name(epi_dir)
            episode_dict = processor.process_episode(epi_dir)
            raw_episode_dicts[epi_name] = episode_dict
            all_episode_features.append(episode_dict["observation"])
        except Exception as e:
            print(f"[WARN] Skipping {epi_dir}: {e}")

    # Step 3. PCA 학습 (view별)
    processor.fit_pca(all_episode_features)

    # Step 4. PCA 압축 적용 및 저장
    output_dir = "compressed_episodes"
    os.makedirs(output_dir, exist_ok=True)

    for epi_name, epi_dict in raw_episode_dicts.items():
        compressed_dict = processor.compress_episode(epi_dict)
        epi_name = epi_name.replace("/", "_")
        save_path = os.path.join(output_dir, f"{epi_name}.npz")
        np.savez_compressed(save_path, **compressed_dict)
        print(f"[INFO] Saved compressed episode: {save_path}")



Image-Only Processor Config
Views: ['front', 'top', 'wrist']
ResNet Features: 512 per view
Compressed Features: 64 per view
Total Compressed: 192
Device: cuda
[INFO] ResNet18 feature extractor initialized on cuda
[INFO] Found 22 episodes.


Processing episodes:   0%|          | 0/22 [00:00<?, ?it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/fail_data_raw/fail_case1_missingGrasp/fail1_episode5_step349
[INFO] Robot state shape: (70, 7)


Processing episodes:   5%|▍         | 1/22 [00:00<00:16,  1.25it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/fail_data_raw/fail_case1_missingGrasp/fail1_episode3_step349
[INFO] Robot state shape: (70, 7)


Processing episodes:   9%|▉         | 2/22 [00:01<00:16,  1.24it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/fail_data_raw/fail_case1_missingGrasp/fail1_episode2_step349
[INFO] Robot state shape: (70, 7)


Processing episodes:  14%|█▎        | 3/22 [00:02<00:15,  1.24it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/fail_data_raw/fail_case1_missingGrasp/fail1_episode4_step349
[INFO] Robot state shape: (70, 7)


Processing episodes:  18%|█▊        | 4/22 [00:03<00:14,  1.25it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/fail_data_raw/fail_case1_missingGrasp/fail1_episode1_step349
[INFO] Robot state shape: (70, 7)


Processing episodes:  23%|██▎       | 5/22 [00:04<00:13,  1.24it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/fail_data_raw/fail_case4_release/fail4_episode2_step349
[INFO] Robot state shape: (70, 7)


Processing episodes:  27%|██▋       | 6/22 [00:04<00:12,  1.24it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/fail_data_raw/fail_case4_release/fail4_episode4_step349
[INFO] Robot state shape: (70, 7)


Processing episodes:  32%|███▏      | 7/22 [00:05<00:12,  1.24it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/fail_data_raw/fail_case4_release/fail4_episode1_step349
[INFO] Robot state shape: (70, 7)


Processing episodes:  36%|███▋      | 8/22 [00:06<00:11,  1.25it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/fail_data_raw/fail_case4_release/fail4_episode5_step349
[INFO] Robot state shape: (70, 7)


Processing episodes:  41%|████      | 9/22 [00:07<00:10,  1.25it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/fail_data_raw/fail_case4_release/fail4_episode3_step349
[INFO] Robot state shape: (70, 7)


Processing episodes:  45%|████▌     | 10/22 [00:08<00:09,  1.24it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/fail_data_raw/fail_case2_outofcontrolPregrasp/fail2_episode4_step349_noise30
[INFO] Robot state shape: (70, 7)


Processing episodes:  50%|█████     | 11/22 [00:08<00:08,  1.24it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/fail_data_raw/fail_case2_outofcontrolPregrasp/fail2_episode1_step349_noise30
[INFO] Robot state shape: (70, 7)


Processing episodes:  55%|█████▍    | 12/22 [00:09<00:08,  1.24it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/fail_data_raw/fail_case2_outofcontrolPregrasp/fail2_episode5_step349_noise30
[INFO] Robot state shape: (70, 7)


Processing episodes:  59%|█████▉    | 13/22 [00:10<00:07,  1.24it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/fail_data_raw/fail_case2_outofcontrolPregrasp/fail2_episode2_step349_noise30
[INFO] Robot state shape: (70, 7)


Processing episodes:  64%|██████▎   | 14/22 [00:11<00:06,  1.24it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/fail_data_raw/fail_case2_outofcontrolPregrasp/fail2_episode3_step349_noise30
[INFO] Robot state shape: (70, 7)


Processing episodes:  68%|██████▊   | 15/22 [00:12<00:05,  1.24it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/fail_data_raw/fail_case3_outofcontrolMovetobin/fail3_episode5_step349_noise200
[INFO] Robot state shape: (70, 7)


Processing episodes:  73%|███████▎  | 16/22 [00:12<00:04,  1.35it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/fail_data_raw/fail_case3_outofcontrolMovetobin/fail3_episode4_step349_noise200
[INFO] Robot state shape: (70, 7)


Processing episodes:  77%|███████▋  | 17/22 [00:13<00:03,  1.49it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/fail_data_raw/fail_case3_outofcontrolMovetobin/fail3_episode3_step349_noise200
[INFO] Robot state shape: (70, 7)


Processing episodes:  82%|████████▏ | 18/22 [00:13<00:02,  1.58it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/fail_data_raw/fail_case3_outofcontrolMovetobin/fail3_episode1_step349_noise200
[INFO] Robot state shape: (70, 7)


Processing episodes:  86%|████████▋ | 19/22 [00:14<00:02,  1.46it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/fail_data_raw/fail_case3_outofcontrolMovetobin/fail3_episode2_step349_noise200
[INFO] Robot state shape: (70, 7)


Processing episodes:  91%|█████████ | 20/22 [00:15<00:01,  1.39it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/fail_data_raw/fail_case5_light/fail5_episode5_step349
[INFO] Robot state shape: (70, 7)


Processing episodes:  95%|█████████▌| 21/22 [00:16<00:00,  1.35it/s]

[INFO] Processing episode: /AILAB-summer-school-2025/fail_data_raw/fail_case5_light/fail5_episode4_step349
[INFO] Robot state shape: (70, 7)


Processing episodes: 100%|██████████| 22/22 [00:16<00:00,  1.31it/s]


[INFO] Fitting PCA models...
[INFO] Detected state_dim = 7
[INFO] front view PCA variance explained: 0.938
[INFO] top view PCA variance explained: 0.944
[INFO] wrist view PCA variance explained: 0.935
[INFO] Saved compressed episode: compressed_episodes/failtype1_episode5_step349.npz
[INFO] Saved compressed episode: compressed_episodes/failtype1_episode3_step349.npz
[INFO] Saved compressed episode: compressed_episodes/failtype1_episode2_step349.npz
[INFO] Saved compressed episode: compressed_episodes/failtype1_episode4_step349.npz
[INFO] Saved compressed episode: compressed_episodes/failtype1_episode1_step349.npz
[INFO] Saved compressed episode: compressed_episodes/failtype4_episode2_step349.npz
[INFO] Saved compressed episode: compressed_episodes/failtype4_episode4_step349.npz
[INFO] Saved compressed episode: compressed_episodes/failtype4_episode1_step349.npz
[INFO] Saved compressed episode: compressed_episodes/failtype4_episode5_step349.npz
[INFO] Saved compressed episode: compressed

In [2]:
import os
import pickle
import numpy as np
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
from typing import Dict
from dataclasses import dataclass

@dataclass
class TestConfig:
    TEST_FOLDER: str = "" #이미지 폴더 경로   put this folder 2 images
    PCA_MODEL_PATH: str = "pca_models.pkl" #load path
    DEVICE: str = "cuda" if torch.cuda.is_available() else "cpu"
    VIEWS: list = None
    RESNET_FEATURE_DIM: int = 512
    COMPRESSED_DIM: int = 64

    def __post_init__(self):
        if self.VIEWS is None:
            self.VIEWS = ["front", "top", "wrist"]
        print(f"[INFO] Test Config Loaded. Views = {self.VIEWS}")

class TestProcessor:
    def __init__(self, config: TestConfig):
        self.config = config
        self.device = torch.device(config.DEVICE)

        self.model = models.resnet18(pretrained=True)
        self.model = nn.Sequential(*list(self.model.children())[:-1])
        self.model = self.model.to(self.device)
        self.model.eval()

        self.transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                                 std=[0.229, 0.224, 0.225])
        ])

        with open(config.PCA_MODEL_PATH, "rb") as f:
            self.pca_models = pickle.load(f)
        print(f"[INFO] Loaded PCA models from {config.PCA_MODEL_PATH}")

    def extract_feature(self, image_path: str) -> np.ndarray:
        try:
            image = Image.open(image_path).convert('RGB')
            image_tensor = self.transform(image).unsqueeze(0).to(self.device)
            with torch.no_grad():
                feat = self.model(image_tensor).view(1, -1)
            return feat.cpu().numpy().flatten()
        except Exception as e:
            print(f"[WARN] Could not process {image_path}: {e}")
            return np.zeros(self.config.RESNET_FEATURE_DIM)

    def process_test_folder(self) -> Dict[str, np.ndarray]:
        """Process all images in test folder and output compressed features"""
        results = {}
        for fname in os.listdir(self.config.TEST_FOLDER):
            if not fname.endswith(".png"):
                continue

            fpath = os.path.join(self.config.TEST_FOLDER, fname)

            # 파일명에서 view 추출 (front, top, wrist 중 하나)
            view = None
            for v in self.config.VIEWS:
                if v in fname.lower():
                    view = v
                    break

            if view is None:
                print(f"[WARN] No matching view found for {fname}, skipping")
                continue

            feat = self.extract_feature(fpath)
            pca_model = self.pca_models[view]
            comp_feat = pca_model.transform(feat.reshape(1, -1)).flatten()

            results[fname] = comp_feat
            print(f"[INFO] {fname} ({view}) compressed to shape {comp_feat.shape}")

        return results


if __name__ == "__main__":
    config = TestConfig(
        TEST_FOLDER="/AILAB-summer-school-2025/test",              
        PCA_MODEL_PATH="/AILAB-summer-school-2025/success_data_preprocessing/pca_weight/image_pca_models.pkl"  #load path
    )
    tester = TestProcessor(config)

    results = tester.process_test_folder()

    print("\n[INFO] ===== Compressed Features =====")
    for fname, comp_feat in results.items():
        print(f"File: {fname}, Feature shape: {comp_feat}")
        print("-" * 60)

    if len(results) > 1:
        names = list(results.keys())
        feats = np.vstack(list(results.values()))

        # Cosine 유사도
        from sklearn.metrics.pairwise import cosine_similarity
        cos_sim = cosine_similarity(feats)
        # Uclidean Distance
        from sklearn.metrics.pairwise import euclidean_distances
        euclid_dist = euclidean_distances(feats)

        print("\n[INFO] Pairwise Feature Comparison")
        for i in range(len(names)):
            for j in range(i + 1, len(names)):
                print(f"{names[i]} vs {names[j]}:")
                print(f"  Cosine Similarity = {cos_sim[i, j]:.4f}")
                print(f"  Euclidean Distance = {euclid_dist[i, j]:.4f}")
                print("-" * 60)

[INFO] Test Config Loaded. Views = ['front', 'top', 'wrist']
[INFO] Loaded PCA models from /AILAB-summer-school-2025/success_data_preprocessing/pca_weight/image_pca_models.pkl
[INFO] front_view_15.png (front) compressed to shape (64,)
[INFO] front_view_55.png (front) compressed to shape (64,)

[INFO] ===== Compressed Features =====
File: front_view_15.png, Feature shape: [ 4.72703    -1.072504   -0.87296116 -0.43789268  2.0988083  -0.6866239
  0.04479933 -0.02085042  0.09806824 -1.0059185   0.06412601 -0.780879
  0.91437745 -0.40677467 -0.37339795 -0.27510643  0.8253875  -0.9524193
  0.756865   -0.53242755  0.5253252  -0.5092813   0.30531973 -0.21605682
 -0.21085715  0.12881082 -0.15477014  0.66942155 -0.865785    0.1336565
  0.08154511  0.3179567   0.4893713  -0.1618681  -0.6881063   0.11584532
  0.3137312   0.12497234  0.3625562   0.9273153   0.18536142 -0.10525858
 -0.2437501  -0.14380884 -0.15398502  0.51048946 -0.7326085   0.10164288
  0.06678283  0.302735    0.8178067   0.8597662

