In [57]:
# Step 0. 구글 드라이브 마운트
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [58]:
# Step 1. 기본 경로 설정
import os
import pandas as pd

BASE_DIR      = "/content/drive/MyDrive/preprocessing"
HAND_IMG_ROOT = os.path.join(BASE_DIR, "Holistic_hands_frames")

print("BASE_DIR      :", BASE_DIR)
print("HAND_IMG_ROOT :", HAND_IMG_ROOT)
print("손 폴더 목록  :", os.listdir(HAND_IMG_ROOT))

BASE_DIR      : /content/drive/MyDrive/preprocessing
HAND_IMG_ROOT : /content/drive/MyDrive/preprocessing/Holistic_hands_frames
손 폴더 목록  : ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', 'seq_embeddings_hands', 'gru_attn_best_model.pt', 'seq_embeddings_meta_hands.csv', 'gru_attn_best_model_2.pt', 'gru_attn_v2_best_model.pt', 'gru_fusion_best_model.pt', 'seq_cnn_gru_attn_hands_light_best.pt', 'seq_cnn_mobilenet_gru_attn_hands_best.pt', 'seq_cnn_mobilenet_tcn_attn_hands_best.pt', 'tsn_hands_seq_meta.csv', 'tsn_hands_3fold_splits.json']


In [59]:
# Step 2. seq_embeddings_meta_hands.csv 읽기
meta_path = os.path.join(HAND_IMG_ROOT, "seq_embeddings_meta_hands.csv")
hands_df = pd.read_csv(meta_path)

print("rows:", len(hands_df))
print(hands_df.head())
print(hands_df.columns)

rows: 1097
                                       seq_id  person_id      word  label  \
0  p1_WORD0029_검사_NIA_SL_WORD0029_REAL01_D          1  WORD0029      0   
1  p1_WORD0029_검사_NIA_SL_WORD0029_REAL01_F          1  WORD0029      0   
2  p1_WORD0029_검사_NIA_SL_WORD0029_REAL01_L          1  WORD0029      0   
3  p1_WORD0029_검사_NIA_SL_WORD0029_REAL01_R          1  WORD0029      0   
4  p1_WORD0029_검사_NIA_SL_WORD0029_REAL01_U          1  WORD0029      0   

  view                                             frames  \
0    D  /content/drive/MyDrive/preprocessing/Holistic_...   
1    F  /content/drive/MyDrive/preprocessing/Holistic_...   
2    L  /content/drive/MyDrive/preprocessing/Holistic_...   
3    R  /content/drive/MyDrive/preprocessing/Holistic_...   
4    U  /content/drive/MyDrive/preprocessing/Holistic_...   

                                          embed_path  
0  /content/drive/MyDrive/preprocessing/Holistic_...  
1  /content/drive/MyDrive/preprocessing/Holistic_

In [60]:
# Step 3. frames 문자열을 리스트(길이 16)로 바꾸기
FRAMES_COL = "frames"

def parse_frames(val):
    # 이미 리스트면 그대로 사용
    if isinstance(val, list):
        return val
    # 문자열이면 '|' 기준으로 split
    if isinstance(val, str):
        parts = [p for p in val.split("|") if p]  # 빈 문자열 제거
        return parts
    # 그 외 타입(NaN 등)은 빈 리스트
    return []

hands_df[FRAMES_COL] = hands_df[FRAMES_COL].apply(parse_frames)

print(type(hands_df.iloc[0][FRAMES_COL]),
      len(hands_df.iloc[0][FRAMES_COL]))
print(hands_df.iloc[0][FRAMES_COL][:3])

<class 'list'> 16
['/content/drive/MyDrive/preprocessing/Holistic_hands_frames/1/WORD0029_검사_NIA_SL_WORD0029_REAL01_D_s00.png', '/content/drive/MyDrive/preprocessing/Holistic_hands_frames/1/WORD0029_검사_NIA_SL_WORD0029_REAL01_D_s01.png', '/content/drive/MyDrive/preprocessing/Holistic_hands_frames/1/WORD0029_검사_NIA_SL_WORD0029_REAL01_D_s02.png']


In [61]:
# Step 4. 길이 16 & 실제 파일 존재 여부 간단 체크 (검증용)
NUM_FRAMES = 16

def is_valid_seq(frames):
    return isinstance(frames, (list, tuple)) and len(frames) == NUM_FRAMES

mask_len = hands_df[FRAMES_COL].apply(is_valid_seq)
print("전체 시퀀스:", len(hands_df), "/ 길이 16:", mask_len.sum())

hands_df = hands_df[mask_len].reset_index(drop=True)

전체 시퀀스: 1097 / 길이 16: 1097


In [62]:
# Step 5. TSN용 메타 DataFrame으로 정리해서 저장
# 이름 정리: word → word_code, label → label_idx, frames → frames_img
tsn_df = hands_df.rename(columns={
    "word":   "word_code",
    "label":  "label_idx",
    "frames": "frames_img",
})

# 우리가 쓸 핵심 컬럼들만 추리기
keep_cols = ["seq_id", "person_id", "word_code", "label_idx", "view", "frames_img"]
keep_cols = [c for c in keep_cols if c in tsn_df.columns]  # 없는건 자동 제거

tsn_df = tsn_df[keep_cols].copy()

TSN_META_PATH = os.path.join(HAND_IMG_ROOT, "tsn_hands_seq_meta.csv")
tsn_df.to_csv(TSN_META_PATH, index=False)

print("TSN 메타 저장 완료:", TSN_META_PATH)
tsn_df.head()

TSN 메타 저장 완료: /content/drive/MyDrive/preprocessing/Holistic_hands_frames/tsn_hands_seq_meta.csv


Unnamed: 0,seq_id,person_id,word_code,label_idx,view,frames_img
0,p1_WORD0029_검사_NIA_SL_WORD0029_REAL01_D,1,WORD0029,0,D,[/content/drive/MyDrive/preprocessing/Holistic...
1,p1_WORD0029_검사_NIA_SL_WORD0029_REAL01_F,1,WORD0029,0,F,[/content/drive/MyDrive/preprocessing/Holistic...
2,p1_WORD0029_검사_NIA_SL_WORD0029_REAL01_L,1,WORD0029,0,L,[/content/drive/MyDrive/preprocessing/Holistic...
3,p1_WORD0029_검사_NIA_SL_WORD0029_REAL01_R,1,WORD0029,0,R,[/content/drive/MyDrive/preprocessing/Holistic...
4,p1_WORD0029_검사_NIA_SL_WORD0029_REAL01_U,1,WORD0029,0,U,[/content/drive/MyDrive/preprocessing/Holistic...
