## Extract Poses from Amass Dataset

In [30]:
%load_ext autoreload
%autoreload 2
%matplotlib notebook
%matplotlib inline

import sys, os
import torch
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from tqdm import tqdm

from human_body_prior.tools.omni_tools import copy2cpu as c2c

os.environ['PYOPENGL_PLATFORM'] = 'egl'

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Please remember to download the following subdataset from AMASS website: https://amass.is.tue.mpg.de/download.php. Note only download the <u>SMPL+H G</u> data.
* ACCD (ACCD)
* HDM05 (MPI_HDM05)
* TCDHands (TCD_handMocap)
* SFU (SFU)
* BMLmovi (BMLmovi)
* CMU (CMU)
* Mosh (MPI_mosh)
* EKUT (EKUT)
* KIT  (KIT)
* Eyes_Janpan_Dataset (Eyes_Janpan_Dataset)
* BMLhandball (BMLhandball)
* Transitions (Transitions_mocap)
* PosePrior (MPI_Limits)
* HumanEva (HumanEva)
* SSM (SSM_synced)
* DFaust (DFaust_67)
* TotalCapture (TotalCapture)
* BMLrub (BioMotionLab_NTroje)

### Unzip all datasets. In the bracket we give the name of the unzipped file folder. Please correct yours to the given names if they are not the same.

### Place all files under the directory **./amass_data/**. The directory structure shoud look like the following:  
./amass_data/  
./amass_data/ACCAD/  
./amass_data/BioMotionLab_NTroje/  
./amass_data/BMLhandball/  
./amass_data/BMLmovi/   
./amass_data/CMU/  
./amass_data/DFaust_67/  
./amass_data/EKUT/  
./amass_data/Eyes_Japan_Dataset/  
./amass_data/HumanEva/  
./amass_data/KIT/  
./amass_data/MPI_HDM05/  
./amass_data/MPI_Limits/  
./amass_data/MPI_mosh/  
./amass_data/SFU/  
./amass_data/SSM_synced/  
./amass_data/TCD_handMocap/  
./amass_data/TotalCapture/  
./amass_data/Transitions_mocap/  

**Please make sure the file path are correct, otherwise it can not succeed.**

In [31]:
# Choose the device to run the body model on.
comp_device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
comp_device

device(type='cpu')

In [32]:
from human_body_prior.body_model.body_model import BodyModel

male_bm_path = './body_models/smplh/male/model.npz'
male_dmpl_path = './body_models/dmpls/male/model.npz'

female_bm_path = './body_models/smplh/female/model.npz'
female_dmpl_path = './body_models/dmpls/female/model.npz'

num_betas = 10 # number of body parameters
num_dmpls = 8 # number of DMPL parameters

male_bm = BodyModel(bm_fname=male_bm_path, num_betas=num_betas, num_dmpls=num_dmpls, dmpl_fname=male_dmpl_path).to(comp_device)
faces = c2c(male_bm.f)

female_bm = BodyModel(bm_fname=female_bm_path, num_betas=num_betas, num_dmpls=num_dmpls, dmpl_fname=female_dmpl_path).to(comp_device)

In [33]:
from pathlib import Path

paths = []
folders = []
dataset_names = []

# amass_data 내의 모든 폴더와 파일을 재귀적으로 탐색
for root, dirs, files in os.walk('./amass_data'):
    folders.append(root)
    
    # 각 파일에 대해 처리
    for name in files:
        # npz 파일만 처리 (AMASS 데이터 파일)
        if name.endswith('.npz'):
            full_path = os.path.join(root, name)
            paths.append(full_path)
            
            # 경로에서 데이터셋 이름 추출
            root_path = Path(root)
            # amass_data 이후의 첫 번째 폴더를 데이터셋 이름으로 사용
            if len(root_path.parts) > 1:  # ./amass_data 다음에 폴더가 있는 경우
                for i, part in enumerate(root_path.parts):
                    if part == 'amass_data' and i + 1 < len(root_path.parts):
                        dataset_name = root_path.parts[i + 1]
                        if dataset_name not in dataset_names:
                            dataset_names.append(dataset_name)
                        break

# 현재 상황 확인을 위한 디버깅
print("Found folders:", folders)
print("Found dataset names:", dataset_names)
print("Found files:", len(paths))
print("Sample files:")
for i, path in enumerate(paths[:5]):  # 처음 5개 파일만 출력
    print(f"  {i+1}: {path}")
if len(paths) > 5:
    print(f"  ... and {len(paths)-5} more files")

Found folders: ['./amass_data', './amass_data\\csvtonpz']
Found dataset names: ['csvtonpz']
Found files: 129
Sample files:
  1: ./amass_data\csvtonpz\Dancejj0101.npz
  2: ./amass_data\csvtonpz\Dancejj0102.npz
  3: ./amass_data\csvtonpz\Dancejj0103.npz
  4: ./amass_data\csvtonpz\Dancejj0201.npz
  5: ./amass_data\csvtonpz\Dancejj0202.npz
  ... and 124 more files


In [34]:
save_root = './pose_data'
save_folders = [folder.replace('./amass_data', './pose_data') for folder in folders]
for folder in save_folders:
    os.makedirs(folder, exist_ok=True)

# 각 데이터셋별로 파일들을 그룹화
group_path = []
for dataset_name in dataset_names:
    dataset_files = [path for path in paths if dataset_name in path]
    if dataset_files:  # 파일이 있는 경우만 추가
        group_path.append(dataset_files)

print(f"Created {len(group_path)} groups:")
for i, group in enumerate(group_path):
    dataset_name = dataset_names[i] if i < len(dataset_names) else f"group_{i}"
    print(f"  {dataset_name}: {len(group)} files")

Created 1 groups:
  csvtonpz: 129 files


In [35]:
trans_matrix = np.array([[1.0, 0.0, 0.0],
                        [0.0, 0.0, 1.0],
                        [0.0, 1.0, 0.0]])
ex_fps = 20

def amass_to_pose(src_path, save_path):
    bdata = np.load(src_path, allow_pickle=True)
    fps = 0
    try:
        fps = bdata['mocap_framerate']
        frame_number = bdata['trans'].shape[0]
    except:
        #print(list(bdata.keys()))
        return fps
    
    fId = 0 # frame id of the mocap sequence
    pose_seq = []
    if bdata['gender'] == 'male':
        bm = male_bm
    else:
        bm = female_bm
    down_sample = int(fps / ex_fps)
    print(frame_number)
    print(fps)

    # For Downsampling
    #bdata_poses = bdata['poses'][::down_sample,...]
    #bdata_trans = bdata['trans'][::down_sample,...]
    
    # For Full Sequence
    bdata_poses = bdata['poses']
    bdata_trans = bdata['trans']
    
    body_parms = {
            'root_orient': torch.Tensor(bdata_poses[:, :3]).to(comp_device),
            'pose_body': torch.Tensor(bdata_poses[:, 3:66]).to(comp_device),
            'pose_hand': torch.Tensor(bdata_poses[:, 66:]).to(comp_device),
            'trans': torch.Tensor(bdata_trans).to(comp_device),
            'betas': torch.Tensor(np.repeat(bdata['betas'][:num_betas][np.newaxis], repeats=len(bdata_trans), axis=0)).to(comp_device),
        }
    
    with torch.no_grad():
        body = bm(**body_parms)
    pose_seq_np = body.Jtr.detach().cpu().numpy()
    pose_seq_np_n = np.dot(pose_seq_np, trans_matrix)
    
    
    np.save(save_path, pose_seq_np_n)
    return fps

In [36]:
group_path = group_path
all_count = sum([len(paths) for paths in group_path])
cur_count = 0

This will take a few hours for all datasets, here we take one dataset as an example

To accelerate the process, you could run multiple scripts like this at one time.

In [37]:
import time
for paths in group_path:
    # Windows 호환 경로 분리
    dataset_name = Path(paths[0]).parts[2] if len(Path(paths[0]).parts) > 2 else "unknown"
    pbar = tqdm(paths)
    pbar.set_description('Processing: %s'%dataset_name)
    fps = 0
    for path in pbar:
        save_path = path.replace('./amass_data', './pose_data')
        save_path = save_path[:-3] + 'npy'
        fps = amass_to_pose(path, save_path)
        
    cur_count += len(paths)
    print('Processed / All (fps %d): %d/%d'% (fps, cur_count, all_count) )
    time.sleep(0.5)

Processing: Dancejj0101.npz:   1%|          | 1/129 [00:00<00:14,  8.83it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:   2%|▏         | 3/129 [00:00<00:14,  8.74it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:   4%|▍         | 5/129 [00:00<00:14,  8.62it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:   5%|▌         | 7/129 [00:00<00:14,  8.62it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:   7%|▋         | 9/129 [00:01<00:13,  8.57it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:   9%|▊         | 11/129 [00:01<00:14,  8.42it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  10%|█         | 13/129 [00:01<00:13,  8.39it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  12%|█▏        | 15/129 [00:01<00:13,  8.44it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  13%|█▎        | 17/129 [00:02<00:13,  8.42it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  15%|█▍        | 19/129 [00:02<00:12,  8.49it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  16%|█▋        | 21/129 [00:02<00:13,  8.23it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  18%|█▊        | 23/129 [00:02<00:15,  7.03it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  19%|█▉        | 25/129 [00:03<00:14,  7.33it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  21%|██        | 27/129 [00:03<00:13,  7.83it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  22%|██▏       | 29/129 [00:03<00:12,  8.18it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  24%|██▍       | 31/129 [00:03<00:11,  8.30it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  26%|██▌       | 33/129 [00:04<00:12,  7.54it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  27%|██▋       | 35/129 [00:04<00:12,  7.65it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  29%|██▊       | 37/129 [00:04<00:11,  8.16it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  30%|███       | 39/129 [00:04<00:11,  8.17it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  32%|███▏      | 41/129 [00:05<00:10,  8.37it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  33%|███▎      | 43/129 [00:05<00:11,  7.31it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  35%|███▍      | 45/129 [00:05<00:10,  7.77it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  36%|███▋      | 47/129 [00:05<00:10,  8.09it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  38%|███▊      | 49/129 [00:06<00:09,  8.27it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  40%|███▉      | 51/129 [00:06<00:09,  7.96it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  41%|████      | 53/129 [00:06<00:09,  8.03it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  43%|████▎     | 55/129 [00:06<00:09,  7.51it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  44%|████▍     | 57/129 [00:07<00:09,  7.75it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  46%|████▌     | 59/129 [00:07<00:08,  7.85it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  47%|████▋     | 61/129 [00:07<00:08,  7.57it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  49%|████▉     | 63/129 [00:07<00:09,  7.29it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  50%|█████     | 65/129 [00:08<00:08,  7.79it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  52%|█████▏    | 67/129 [00:08<00:08,  7.22it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  53%|█████▎    | 69/129 [00:08<00:08,  7.42it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  55%|█████▌    | 71/129 [00:08<00:07,  7.66it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  57%|█████▋    | 73/129 [00:09<00:07,  7.53it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  58%|█████▊    | 75/129 [00:09<00:07,  7.70it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  60%|█████▉    | 77/129 [00:09<00:06,  7.65it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  61%|██████    | 79/129 [00:09<00:06,  7.82it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  63%|██████▎   | 81/129 [00:10<00:06,  7.95it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  64%|██████▍   | 83/129 [00:10<00:05,  8.11it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  66%|██████▌   | 85/129 [00:10<00:05,  8.22it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  67%|██████▋   | 87/129 [00:10<00:05,  8.13it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  69%|██████▉   | 89/129 [00:11<00:05,  6.98it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  71%|███████   | 91/129 [00:11<00:06,  6.30it/s]

200
120.0


Processing: Dancejj0101.npz:  71%|███████▏  | 92/129 [00:11<00:05,  6.77it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  73%|███████▎  | 94/129 [00:12<00:04,  7.29it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  74%|███████▍  | 96/129 [00:12<00:04,  7.62it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  76%|███████▌  | 98/129 [00:12<00:04,  7.41it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  78%|███████▊  | 100/129 [00:12<00:03,  7.59it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  79%|███████▉  | 102/129 [00:13<00:04,  6.56it/s]

200
120.0


Processing: Dancejj0101.npz:  80%|███████▉  | 103/129 [00:13<00:03,  6.88it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  81%|████████▏ | 105/129 [00:13<00:03,  7.55it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  83%|████████▎ | 107/129 [00:13<00:02,  7.52it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  84%|████████▍ | 109/129 [00:14<00:02,  7.66it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  86%|████████▌ | 111/129 [00:14<00:02,  7.36it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  88%|████████▊ | 113/129 [00:14<00:02,  7.91it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  89%|████████▉ | 115/129 [00:14<00:01,  8.19it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  91%|█████████ | 117/129 [00:15<00:01,  8.01it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  92%|█████████▏| 119/129 [00:15<00:01,  7.97it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  94%|█████████▍| 121/129 [00:15<00:00,  8.23it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  95%|█████████▌| 123/129 [00:15<00:00,  8.28it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  97%|█████████▋| 125/129 [00:16<00:00,  7.92it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz:  98%|█████████▊| 127/129 [00:16<00:00,  7.89it/s]

200
120.0
200
120.0


Processing: Dancejj0101.npz: 100%|██████████| 129/129 [00:16<00:00,  7.80it/s]


200
120.0
Processed / All (fps 120): 129/129


## Segment, Mirror and Relocate Motions에 사용할 index.csv 생성

In [38]:
# source_path,start_frame,end_frame,new_name

def create_index_csv_from_pose_data():
    """
    ./pose_data 폴더의 모든 .npy 파일들을 스캔하여 index.csv 파일을 생성합니다.
    """
    pose_data_dir = './pose_data'
    output_csv = './index_test.csv'
    
    # 결과를 저장할 리스트
    csv_data = []
    
    # pose_data 폴더를 재귀적으로 탐색
    for root, dirs, files in os.walk(pose_data_dir):
        for file in files:
            if file.endswith('.npy'):
                # 전체 파일 경로
                full_path = os.path.join(root, file)
                
                try:
                    # 파일 로드하여 프레임 수 확인
                    data = np.load(full_path)
                    total_frames = data.shape[0]
                    
                    # 상대 경로로 변환 (./pose_data/...)
                    relative_path = full_path.replace('\\', '/')
                    
                    # new_name은 파일명만 사용
                    new_name = file
                    
                    # CSV 행 추가
                    csv_data.append({
                        'source_path': relative_path,
                        'start_frame': 0,
                        'end_frame': total_frames,
                        'new_name': new_name
                    })
                    
                    print(f"Added: {relative_path} (frames: {total_frames})")
                    
                except Exception as e:
                    print(f"Error processing {full_path}: {e}")
    
    # DataFrame 생성 및 CSV 저장
    df = pd.DataFrame(csv_data)
    df.to_csv(output_csv, index=False)
    
    print(f"\nIndex CSV created: {output_csv}")
    print(f"Total files processed: {len(csv_data)}")
    
    return df

# 실행
df = create_index_csv_from_pose_data()

# 결과 미리보기
print("\nFirst 10 entries:")
print(df.head(10))

Added: ./pose_data/csvtonpz/Dancejj0101.npy (frames: 200)
Added: ./pose_data/csvtonpz/Dancejj0102.npy (frames: 200)
Added: ./pose_data/csvtonpz/Dancejj0103.npy (frames: 200)
Added: ./pose_data/csvtonpz/Dancejj0201.npy (frames: 200)
Added: ./pose_data/csvtonpz/Dancejj0202.npy (frames: 200)
Added: ./pose_data/csvtonpz/Dancejj0203.npy (frames: 200)
Added: ./pose_data/csvtonpz/Dancejj0301.npy (frames: 200)
Added: ./pose_data/csvtonpz/Dancejj0302.npy (frames: 200)
Added: ./pose_data/csvtonpz/Dancejj0303.npy (frames: 200)
Added: ./pose_data/csvtonpz/Dancejj0401.npy (frames: 200)
Added: ./pose_data/csvtonpz/Dancejj0402.npy (frames: 200)
Added: ./pose_data/csvtonpz/Dancejj0403.npy (frames: 200)
Added: ./pose_data/csvtonpz/Dancejj0501.npy (frames: 200)
Added: ./pose_data/csvtonpz/Dancejj0502.npy (frames: 200)
Added: ./pose_data/csvtonpz/Dancejj0503.npy (frames: 200)
Added: ./pose_data/csvtonpz/Dancejj0601.npy (frames: 200)
Added: ./pose_data/csvtonpz/Dancejj0602.npy (frames: 200)
Added: ./pose_

The above code will extract poses from **AMASS** dataset, and put them under directory **"./pose_data"**

The source data from **HumanAct12** is already included in **"./pose_data"** in this repository. You need to **unzip** it right in this folder.

## Segment, Mirror and Relocate Motions

In [39]:
import codecs as cs
import pandas as pd
import numpy as np
from tqdm import tqdm
from os.path import join as pjoin

In [40]:
def swap_left_right(data):
    assert len(data.shape) == 3 and data.shape[-1] == 3
    data = data.copy()
    data[..., 0] *= -1
    right_chain = [2, 5, 8, 11, 14, 17, 19, 21]
    left_chain = [1, 4, 7, 10, 13, 16, 18, 20]
    left_hand_chain = [22, 23, 24, 34, 35, 36, 25, 26, 27, 31, 32, 33, 28, 29, 30]
    right_hand_chain = [43, 44, 45, 46, 47, 48, 40, 41, 42, 37, 38, 39, 49, 50, 51]
    tmp = data[:, right_chain]
    data[:, right_chain] = data[:, left_chain]
    data[:, left_chain] = tmp
    if data.shape[1] > 24:
        tmp = data[:, right_hand_chain]
        data[:, right_hand_chain] = data[:, left_hand_chain]
        data[:, left_hand_chain] = tmp
    return data

In [41]:
index_path = './index_test.csv'
save_dir = './joints'
index_file = pd.read_csv(index_path)
total_amount = index_file.shape[0]
fps = 20

In [42]:
for i in tqdm(range(total_amount)):
    source_path = index_file.loc[i]['source_path']
    new_name = index_file.loc[i]['new_name']
    data = np.load(source_path)
    start_frame = index_file.loc[i]['start_frame']
    end_frame = index_file.loc[i]['end_frame']
    if 'humanact12' not in source_path:
        if 'Eyes_Japan_Dataset' in source_path:
            data = data[3*fps:]
        if 'MPI_HDM05' in source_path:
            data = data[3*fps:]
        if 'TotalCapture' in source_path:
            data = data[1*fps:]
        if 'MPI_Limits' in source_path:
            data = data[1*fps:]
        if 'Transitions_mocap' in source_path:
            data = data[int(0.5*fps):]
        data = data[start_frame:end_frame]
        data[..., 0] *= -1
    
    data_m = swap_left_right(data)
#     save_path = pjoin(save_dir, )
    np.save(pjoin(save_dir, new_name), data)
    np.save(pjoin(save_dir, 'M'+new_name), data_m)

100%|██████████| 129/129 [00:01<00:00, 94.02it/s] 


## Text 파일 생성

In [None]:
import os
import random
from pathlib import Path

# texts_tmp 폴더 생성
texts_tmp_dir = './HumanML3D/texts_tmp'
os.makedirs(texts_tmp_dir, exist_ok=True)

# joints 폴더에 생성된 모든 .npy 파일에 대해 txt 파일 생성
joints_dir = './joints'

print("Creating text files for all motion files...")

# 모든 파일 이름을 저장할 리스트
all_file_names = []
skipped_files = []

# joints 폴더의 모든 .npy 파일 찾기
if os.path.exists(joints_dir):
    for filename in os.listdir(joints_dir):
        if filename.endswith('.npy'):
            # 012314.npy 파일 제외
            if filename == '012314.npy':
                print(f"Skipped: {filename}")
                skipped_files.append(filename)
                continue
                
            # .npy 확장자를 .txt로 변경
            txt_filename = filename.replace('.npy', '.txt')
            txt_filepath = os.path.join(texts_tmp_dir, txt_filename)
            
            # 기본 텍스트 설명 생성 (파일명 기반)
            base_name = filename.replace('.npy', '')
            
            # all.txt에 추가할 파일명 (확장자 제거)
            all_file_names.append(base_name)
            
            # 미러링된 파일인지 확인
            is_mirrored = base_name.startswith('M')
            if is_mirrored:
                original_name = base_name[1:]  # M 제거
                description = f"A mirrored version of motion {original_name}. The movement is horizontally flipped."
            else:
                description = f"A human motion sequence from file {base_name}."
            
            # 데이터셋별 특별한 설명 추가
            if 'humanact12' in base_name.lower():
                description += " This is from the HumanAct12 dataset."
            elif any(dataset in base_name.lower() for dataset in ['cmu', 'kit', 'bml', 'sfu', 'hdm05']):
                for dataset in ['cmu', 'kit', 'bml', 'sfu', 'hdm05']:
                    if dataset in base_name.lower():
                        description += f" This is from the {dataset.upper()} dataset."
                        break
            
            # txt 파일에 설명 저장
            with open(txt_filepath, 'w', encoding='utf-8') as f:
                f.write(description)
            
            print(f"Created: {txt_filepath}")

# MDancejj0201 확인
if 'MDancejj0201' in all_file_names:
    print("✓ MDancejj0201 found in all_file_names")
else:
    print("✗ MDancejj0201 NOT found in all_file_names")

# all.txt 파일 생성 (모든 파일 이름 저장)
all_txt_path = './HumanML3D/all.txt'
os.makedirs(os.path.dirname(all_txt_path), exist_ok=True)

# 파일 이름 정렬 (선택사항)
all_file_names.sort()

with open(all_txt_path, 'w', encoding='utf-8') as f:
    for file_name in all_file_names:
        f.write(file_name + '\n')

# train/test 분할 (10% test, 90% train_val)
random.seed(42)  # 재현 가능한 결과를 위한 시드 설정
shuffled_names = all_file_names.copy()
random.shuffle(shuffled_names)

total_count = len(shuffled_names)
test_count = int(total_count * 0.1)  # 10%를 테스트용으로

test_names = shuffled_names[:test_count]
train_val_names = shuffled_names[test_count:]

# train_val을 다시 train과 val로 분할 (train_val의 10%를 val로)
random.seed(42)  # 같은 시드로 재현 가능성 보장
shuffled_train_val = train_val_names.copy()
random.shuffle(shuffled_train_val)

train_val_count = len(shuffled_train_val)
val_count = int(train_val_count * 0.1)  # train_val의 10%를 val로

val_names = shuffled_train_val[:val_count]
train_names = shuffled_train_val[val_count:]

# test.txt 파일 생성
test_txt_path = './HumanML3D/test.txt'
with open(test_txt_path, 'w', encoding='utf-8') as f:
    for file_name in test_names:
        f.write(file_name + '\n')

# train.txt 파일 생성
train_txt_path = './HumanML3D/train.txt'
with open(train_txt_path, 'w', encoding='utf-8') as f:
    for file_name in train_names:
        f.write(file_name + '\n')

# val.txt 파일 생성
val_txt_path = './HumanML3D/val.txt'
with open(val_txt_path, 'w', encoding='utf-8') as f:
    for file_name in val_names:
        f.write(file_name + '\n')

# 기존 train_val.txt도 유지 (필요한 경우)
train_val_txt_path = './HumanML3D/train_val.txt'
with open(train_val_txt_path, 'w', encoding='utf-8') as f:
    for file_name in train_val_names:
        f.write(file_name + '\n')

print(f"\nText files created in: {texts_tmp_dir}")
print(f"Total text files: {len(os.listdir(texts_tmp_dir)) if os.path.exists(texts_tmp_dir) else 0}")
print(f"All file names saved to: {all_txt_path}")
print(f"Total file names in all.txt: {len(all_file_names)}")

print(f"\nDataset split:")
print(f"Test set ({len(test_names)} files, {len(test_names)/total_count*100:.1f}%): {test_txt_path}")
print(f"Train set ({len(train_names)} files, {len(train_names)/train_val_count*100:.1f}% of train_val): {train_txt_path}")
print(f"Val set ({len(val_names)} files, {len(val_names)/train_val_count*100:.1f}% of train_val): {val_txt_path}")
print(f"Train_val set ({len(train_val_names)} files, {len(train_val_names)/total_count*100:.1f}%): {train_val_txt_path}")

print(f"\nVerification:")
print(f"Total: {total_count}")
print(f"Test: {len(test_names)}")
print(f"Train: {len(train_names)}")
print(f"Val: {len(val_names)}")
print(f"Sum: {len(test_names) + len(train_names) + len(val_names)}")
print(f"Match: {total_count == len(test_names) + len(train_names) + len(val_names)}")

Creating text files for all motion files...
Skipped: 012314.npy
Created: ./HumanML3D/texts_tmp\Dancejj0101.txt
Created: ./HumanML3D/texts_tmp\Dancejj0102.txt
Created: ./HumanML3D/texts_tmp\Dancejj0103.txt
Created: ./HumanML3D/texts_tmp\Dancejj0201.txt
Created: ./HumanML3D/texts_tmp\Dancejj0202.txt
Created: ./HumanML3D/texts_tmp\Dancejj0203.txt
Created: ./HumanML3D/texts_tmp\Dancejj0301.txt
Created: ./HumanML3D/texts_tmp\Dancejj0302.txt
Created: ./HumanML3D/texts_tmp\Dancejj0303.txt
Created: ./HumanML3D/texts_tmp\Dancejj0401.txt
Created: ./HumanML3D/texts_tmp\Dancejj0402.txt
Created: ./HumanML3D/texts_tmp\Dancejj0403.txt
Created: ./HumanML3D/texts_tmp\Dancejj0501.txt
Created: ./HumanML3D/texts_tmp\Dancejj0502.txt
Created: ./HumanML3D/texts_tmp\Dancejj0503.txt
Created: ./HumanML3D/texts_tmp\Dancejj0601.txt
Created: ./HumanML3D/texts_tmp\Dancejj0602.txt
Created: ./HumanML3D/texts_tmp\Dancejj0603.txt
Created: ./HumanML3D/texts_tmp\Dancejj0701.txt
Created: ./HumanML3D/texts_tmp\Dancejj0702.