In [None]:
"""
apply resent18 (for success)

structure
data = {
    "success_episode{episode_number}_steps{episode_length//5+1}":
        [episode_length//5+1, 512*3] #front, top, wrist
    }
"""
import os
import re
import pickle
from typing import Dict, Tuple, List

import numpy as np
import torch
import torch.nn as nn
from PIL import Image
from torchvision import models, transforms
from torchvision.models import ResNet18_Weights
os.makedirs("dataset", exist_ok=True)

# ====== 설정 ======
ROOT_DIR = "/home/user/Desktop/endToEnd_forSuccess/success_data_raw"   # 입력 루트
OUT_PATH = "dataset/success_data_resnet18_robotX.pkl"                    # 출력 파일
VIEWS = ("front", "top", "wrist")                         # 처리할 뷰

# 이미지 전처리 (요청 그대로)
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406],
                         std= [0.229,0.224,0.225]),
])

# 에피소드 폴더명: success_episode{num}_steps{episode_length}
EP_DIR_RE = re.compile(r"^success_episode(\d+)_steps(\d+)$")


def parse_episode_dirname(name: str) -> Tuple[int, int]:
    m = EP_DIR_RE.match(name)
    if not m:
        raise ValueError(f"Invalid episode dir name: {name}")
    return int(m.group(1)), int(m.group(2))


def list_episode_dirs(root: str) -> List[Tuple[int, int, str, str]]:
    """(ep_num, ep_len, abs_path, dir_name) 를 ep_num 오름차순으로 반환"""
    eps = []
    for name in os.listdir(root):
        p = os.path.join(root, name)
        if not os.path.isdir(p):
            continue
        if EP_DIR_RE.match(name):
            ep_num, ep_len = parse_episode_dirname(name)
            eps.append((ep_num, ep_len, p, name))
    eps.sort(key=lambda x: x[0])
    return eps


def expected_steps(ep_len: int) -> List[int]:
    """0부터 (ep_len-1)까지 5 간격 스텝 (예: ep_len=320 -> [0,5,...,315], 총 64개)"""
    return list(range(0, ep_len, 5))


def img_path(ep_path: str, view: str, step: int) -> str:
    """뷰/스텝에 해당하는 이미지 경로"""
    return os.path.join(ep_path, f"{view}_view", f"{view}_view_{step}.png")


@torch.inference_mode()
def extract_feature(model: nn.Module, path: str, device: torch.device) -> np.ndarray:
    with Image.open(path).convert("RGB") as img:
        x = transform(img).unsqueeze(0).to(device)
        f = model(x).squeeze(0).detach().cpu().numpy().astype(np.float32)  # (512,)
    return f


def main():
    # 모델 준비 (ResNet18 + ImageNet 가중치, FC 제거 → 512차원)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = models.resnet18(weights=ResNet18_Weights.DEFAULT)
    model.fc = nn.Identity()
    model.eval().to(device)

    # 에피소드 스캔
    episodes = list_episode_dirs(ROOT_DIR)
    if not episodes:
        raise RuntimeError(f"No valid episode directories found under '{ROOT_DIR}'")

    data: Dict[str, np.ndarray] = {}

    for ep_num, ep_len, ep_path, ep_name in episodes:
        steps = expected_steps(ep_len)                 # 길이 = ep_len // 5
        print(f"[Episode {ep_num:>4}] {ep_name} -> steps expected: {len(steps)} (0..{ep_len-1} by 5)")

        # 파일 존재 검증(세 뷰 모두)
        missing = []
        for s in steps:
            for v in VIEWS:
                p = img_path(ep_path, v, s)
                if not os.path.isfile(p):
                    missing.append(p)
        if missing:
            # 어떤 에피소드에서든 필수 스텝이 비어있으면 바로 알려주고 중단
            preview = "\n".join(missing[:20])
            tail = "" if len(missing) <= 20 else f"\n... (+{len(missing)-20} more)"
            raise FileNotFoundError(
                f"[{ep_name}] Missing required image files:\n{preview}{tail}"
            )

        # 특징 추출
        episode_feats = []
        for s in steps:
            paths = [img_path(ep_path, v, s) for v in VIEWS]
            feats = [extract_feature(model, p, device) for p in paths]  # 3 x (512,)
            merged = np.concatenate(feats, axis=0)  # (1536,)
            episode_feats.append(merged)
            #print(f"    - step {s:>4}  OK")

        arr = np.stack(episode_feats, axis=0)  # (T, 1536), T = ep_len // 5
        key = f"success_episode{ep_num}_steps{len(steps)}"
        data[key] = arr
        print(f"  -> Added episode: key='{key}', array shape={arr.shape}")

    # 저장
    with open(OUT_PATH, "wb") as f:
        pickle.dump(data, f, protocol=pickle.HIGHEST_PROTOCOL)

    print(f"\nSaved: {OUT_PATH}")
    print(f"Total episodes: {len(data)}")


if __name__ == "__main__":
    main()


In [None]:
#!/usr/bin/env python3
import os, re, sys, pickle
from typing import Dict, Tuple
import numpy as np

# ===== 설정 =====
ROOT_DIR = "success_data_raw"   # 에피소드 디렉토리들의 상위 경로
PKL_PATH = "dataset/success_data_resnet18_robotX.pkl"            # 기존 데이터셋 pkl (값 shape=(T,1536) 가정)
OUT_PATH = "dataset/success_data_resnet18_robotO.pkl"                                           # None이면 PKL_PATH에 덮어씀. 아니면 새 경로 지정.

# 디렉토리/키 패턴
DIR_RE = re.compile(r"^success_episode(\d+)_steps(\d+)$")
KEY_RE = re.compile(r"^success_episode(\d+)_steps(\d+)$")

def err(msg: str):
    print(f"[ERROR] {msg}", file=sys.stderr)
    sys.exit(1)

def load_pkl(path: str) -> Dict[str, np.ndarray]:
    if not os.path.isfile(path):
        err(f"PKL not found: {path}")
    with open(path, "rb") as f:
        obj = pickle.load(f)
    if not isinstance(obj, dict):
        err("PKL root must be a dict.")
    return obj

def parse_key(key: str) -> Tuple[int, int]:
    m = KEY_RE.match(key)
    if not m:
        err(f"Unexpected key format: {key}")
    return int(m.group(1)), int(m.group(2))  # episode_num, key_steps

def find_episode_dir(root: str, episode_num: int) -> Tuple[str, int]:
    """
    같은 episode_num을 가진 디렉토리를 찾고 반환.
    반환: (abs_path, raw_steps_from_dir)
    주: 디렉토리명 steps는 '원 프레임 수'로 가정.
    """
    candidates = []
    for name in os.listdir(root):
        m = DIR_RE.match(name)
        if not m:
            continue
        ep = int(m.group(1)); raw_steps = int(m.group(2))
        if ep == episode_num:
            candidates.append((os.path.join(root, name), raw_steps, name))
    if not candidates:
        err(f"Episode dir not found under '{root}' for episode{episode_num}")
    if len(candidates) > 1:
        # 에피소드 번호 충돌 방지: 가장 최근 수정시간 기준 선택
        candidates.sort(key=lambda x: os.path.getmtime(x[0]), reverse=True)
    return candidates[0][0], candidates[0][1]

def load_ee_pose(ep_dir: str, expected_T: int) -> np.ndarray:
    npz_path = os.path.join(ep_dir, "robot_state.npz")
    if not os.path.isfile(npz_path):
        err(f"robot_state.npz not found: {npz_path}")
    state = np.load(npz_path, allow_pickle=True)
    if "EE_pose" not in state:
        err(f"'EE_pose' key not found in {npz_path}")
    ee = state["EE_pose"]
    ee = np.asarray(ee, dtype=np.float32)
    if ee.ndim != 2 or ee.shape[1] != 7:
        err(f"EE_pose must have shape (T,7). Got {ee.shape} at {npz_path}")
    if ee.shape[0] != expected_T:
        err(f"EE_pose T mismatch. Expected {expected_T}, got {ee.shape[0]} at {npz_path}")
    return ee

def main():
    data = load_pkl(PKL_PATH)

    # 키가 하나도 없으면 종료
    if not data:
        err("PKL has no keys.")

    # 모든 키 확인 및 처리
    updated: Dict[str, np.ndarray] = {}
    for key, val in data.items():
        ep_num, key_T = parse_key(key)

        # 값 shape 검사 (T,1536)
        arr = np.asarray(val)
        if arr.ndim != 2 or arr.shape[1] != 1536:
            err(f"Value shape must be (T,1536). Key={key}, got {arr.shape}")

        # 디렉토리 찾기
        ep_dir, raw_steps = find_episode_dir(ROOT_DIR, ep_num)

        # 검증: 디렉 steps를 5로 나눈 몫 + 1 == key_T 이어야 함
        derived_T = ((raw_steps -1) // 5) + 1
        if derived_T != key_T:
            err(
                f"steps check failed. Dir '{os.path.basename(ep_dir)}' has raw_steps={raw_steps} "
                f"-> (raw//5)+1={derived_T}, but key steps={key_T}."
            )

        # episode 번호도 이미 일치 확인됨. EE_pose 로드 및 shape 검사
        ee_pose = load_ee_pose(ep_dir, expected_T=key_T)

        # concat -> (T, 1543)
        if arr.shape[0] != key_T:
            err(f"T mismatch between value and key. Key={key}, value.shape[0]={arr.shape[0]}")
        merged = np.concatenate([arr.astype(np.float32, copy=False), ee_pose], axis=1)
        if merged.shape != (key_T, 1543):
            err(f"Merged shape must be (T,1543). Got {merged.shape} for key={key}")

        updated[key] = merged
        # 진행 상황 표시
        print(f"[OK] {key}: (T,1536) + (T,7) -> (T,1543) | dir={os.path.basename(ep_dir)}")

    # 저장
    out_path = OUT_PATH or PKL_PATH
    # 백업
    if OUT_PATH is None:
        bak = PKL_PATH + ".bak"
        try:
            if os.path.exists(bak):
                os.remove(bak)
            os.rename(PKL_PATH, bak)
            print(f"[INFO] Backup saved: {bak}")
        except Exception as e:
            err(f"Failed to backup original PKL: {e}")

    with open(out_path, "wb") as f:
        pickle.dump(updated, f, protocol=pickle.HIGHEST_PROTOCOL)
    print(f"[DONE] Saved: {out_path} | keys={len(updated)}")

if __name__ == "__main__":
    main()


In [None]:
import os
import re
import pickle
import numpy as np

# ===== 설정 =====
ROOT_DIR = "success_data_raw"   # 에피소드 디렉토리들의 상위 경로
PKL_PATH = "dataset/success_data_resnet18_robotO.pkl"    

# 정규식
KEY_RE = re.compile(r"^success_episode(\d+)_steps(\d+)$")
DIR_RE = re.compile(r"^success_episode(\d+)_steps(\d+)$")

def parse_key(key):
    m = KEY_RE.match(key)
    if not m:
        raise ValueError(f"Invalid key format: {key}")
    return int(m.group(1)), int(m.group(2))

def find_dir_for_episode(ep_num):
    matches = []
    for name in os.listdir(ROOT_DIR):
        m = DIR_RE.match(name)
        if m and int(m.group(1)) == ep_num:
            matches.append(os.path.join(ROOT_DIR, name))
    if not matches:
        raise FileNotFoundError(f"Dir not found for episode {ep_num}")
    if len(matches) > 1:
        print(f"[WARN] Multiple dirs found for ep{ep_num}, using first: {matches[0]}")
    return matches[0]
def main():
    with open(PKL_PATH, "rb") as f:
        data = pickle.load(f)

    all_ok = True
    for key, arr in data.items():
        ep_num, step_count = parse_key(key)

        # 값 형상 확인
        arr = np.asarray(arr)
        if arr.shape[0] != step_count:
            print(f"[FAIL] {key}: array length mismatch {arr.shape[0]} != {step_count}")
            all_ok = False
            continue

        # 디렉토리 찾기
        ep_dir = find_dir_for_episode(ep_num)

        # EE_pose 로드
        npz_path = os.path.join(ep_dir, "robot_state.npz")
        if not os.path.isfile(npz_path):
            print(f"[FAIL] {key}: robot_state.npz not found in {ep_dir}")
            all_ok = False
            continue

        state = np.load(npz_path, allow_pickle=True)
        if "EE_pose" not in state:
            print(f"[FAIL] {key}: 'EE_pose' key missing in {npz_path}")
            all_ok = False
            continue

        ee_pose = np.asarray(state["EE_pose"], dtype=np.float32)
        if ee_pose.shape != (step_count, 7):
            print(f"[FAIL] {key}: EE_pose shape mismatch {ee_pose.shape} != ({step_count}, 7)")
            all_ok = False
            continue

        # 마지막 7차원 값 비교
        pkl_pose = arr[:, -7:].astype(np.float32)
        if not np.allclose(pkl_pose, ee_pose, atol=1e-6):
            print(f"[FAIL] {key}: EE_pose values differ")
            all_ok = False
        else:
            print(f"[OK]   {key}")

    if all_ok:
        print("\nAll episodes passed verification.")
    else:
        print("\nSome episodes failed verification.")

if __name__ == "__main__":
    main()

In [None]:
#!/usr/bin/env python3
import os, re, pickle, sys
from typing import Dict
import numpy as np

IN_PATH  = "dataset/success_data_resnet18_robotX.pkl"
OUT_PATH = "dataset/success_data_resnet18_pca_robotX.pkl"
VIEWS    = ("front", "top", "wrist")
KEY_RE   = re.compile(r"^success_episode(\d+)_steps(\d+)$")

CHUNK = 8192  # 메모리 크면 줄여서 사용. 전체 한 번에 처리 가능하면 None로.
def parse_key_nums(key: str) -> tuple[int, int]:
    m = KEY_RE.match(key)
    if not m:
        raise ValueError(f"Invalid key format: {key}")
    return int(m.group(1)), int(m.group(2))  # (episode_num, steps_num)

def load_pca(view: str):
    p = f"model/model_pca_{view}_view.pkl"
    if not os.path.isfile(p):
        raise FileNotFoundError(f"PCA model not found: {p}")
    with open(p, "rb") as f:
        model = pickle.load(f)
    # 입력 차원 확인
    n_in = getattr(model, "n_features_in_", 512)
    if n_in != 512:
        raise ValueError(f"{view} PCA expects 512-d input, got {n_in}")
    # 출력 차원 사전 확인(가능할 때)
    n_out = getattr(model, "n_components_", None)
    if n_out is not None and n_out != 64:
        raise ValueError(f"{view} PCA expects 64-d output, got {n_out}")
    return model

def pca_transform_chunked(X: np.ndarray, model, chunk: int | None) -> np.ndarray:
    if chunk is None or X.shape[0] <= (chunk or 0):
        Z = model.transform(X)
    else:
        out = []
        for i in range(0, X.shape[0], chunk):
            out.append(model.transform(X[i:i+chunk]))
        Z = np.concatenate(out, axis=0)
    if Z.shape[1] != 64:
        raise RuntimeError(f"PCA transform produced width {Z.shape[1]}, expected 64")
    return Z.astype(np.float32, copy=False)

def main():
    pca = {v: load_pca(v) for v in VIEWS}

    with open(IN_PATH, "rb") as f:
        data: Dict[str, np.ndarray] = pickle.load(f)
    if not isinstance(data, dict):
        raise TypeError("Input PKL must be dict {key: ndarray}.")

    out: Dict[str, np.ndarray] = {}
    for key, arr in sorted(data.items(), key=lambda kv: parse_key_nums(kv[0])):
        ep_num, steps = parse_key_nums(key)
        arr = np.asarray(arr)
        if arr.ndim != 2:
            raise ValueError(f"{key}: value must be 2D, got {arr.ndim}D.")
        T, D = arr.shape
        if T != steps:
            raise ValueError(f"{key}: time length mismatch. value T={T} != key steps={steps}")
        if D < 1536:
            raise ValueError(f"{key}: expected >=1536 dims, got {D}")
        if np.isnan(arr[:, :1536]).any():
            raise ValueError(f"{key}: NaN detected in first 1536 dims")

        feat1536 = arr[:, :1536].astype(np.float32, copy=False)
        f_front, f_top, f_wrist = feat1536[:, :512], feat1536[:, 512:1024], feat1536[:, 1024:1536]
        if f_front.shape[1] != 512 or f_top.shape[1] != 512 or f_wrist.shape[1] != 512:
            raise ValueError(f"{key}: 512-split failed. got {[f_front.shape, f_top.shape, f_wrist.shape]}")

        z_front = pca_transform_chunked(f_front, pca["front"], CHUNK)
        z_top   = pca_transform_chunked(f_top,   pca["top"],   CHUNK)
        z_wrist = pca_transform_chunked(f_wrist, pca["wrist"], CHUNK)

        z = np.concatenate([z_front, z_top, z_wrist], axis=1)
        if z.shape != (T, 192):
            raise RuntimeError(f"{key}: PCA concat shape expected {(T,192)}, got {z.shape}")

        out[key] = z
        print(f"[OK] {key}: {arr.shape} -> {z.shape}")

    with open(OUT_PATH, "wb") as f:
        pickle.dump(out, f, protocol=pickle.HIGHEST_PROTOCOL)
    print(f"\nSaved: {OUT_PATH} | keys={len(out)}")

if __name__ == "__main__":
    np.set_printoptions(suppress=True)
    main()


In [None]:
#!/usr/bin/env python3
import pickle
import numpy as np

# ===== 경로 =====
PCA_PATH   = "dataset/success_data_resnet18_pca_robotX.pkl"  # (T,192)
ROBOT_PATH = "dataset/success_data_resnet18_robotX.pkl"      # (T,1536+7) or (T,1543)
OUT_PATH   = "dataset/success_data_resnet18_pca_robotO.pkl"  # (T,199) 저장 경로

def main():
    with open(PCA_PATH, "rb") as f:
        pca_data = pickle.load(f)
    with open(ROBOT_PATH, "rb") as f:
        robot_data = pickle.load(f)

    out = {}
    for key, pca_arr in pca_data.items():
        if key not in robot_data:
            raise KeyError(f"{key} not in robot_data PKL")
        pca_arr = np.asarray(pca_arr, dtype=np.float32)
        rob_arr = np.asarray(robot_data[key], dtype=np.float32)

        # 로봇 데이터 7차원 추출
        if rob_arr.shape[1] < 7:
            raise ValueError(f"{key}: robot_data shape {rob_arr.shape} has no 7-dim tail")
        robot7 = rob_arr[:, -7:].astype(np.float32, copy=False)

        if pca_arr.shape[0] != robot7.shape[0]:
            raise ValueError(f"{key}: step mismatch {pca_arr.shape[0]} vs {robot7.shape[0]}")

        merged = np.concatenate([pca_arr, robot7], axis=1)
        if merged.shape != (pca_arr.shape[0], 192 + 7):
            raise RuntimeError(f"{key}: unexpected merged shape {merged.shape}")
        out[key] = merged
        print(f"[OK] {key}: {pca_arr.shape} + (T,7) -> {merged.shape}")

    with open(OUT_PATH, "wb") as f:
        pickle.dump(out, f, protocol=pickle.HIGHEST_PROTOCOL)
    print(f"\nSaved: {OUT_PATH} | keys={len(out)}")

if __name__ == "__main__":
    main()
