In [1]:
import os
import json
import pandas as pd
from tqdm import tqdm

def convert_json_to_csv(json_path):
    """JSON 파일을 DataFrame으로 변환"""
    with open(json_path, 'r', encoding='utf-8') as f:
        data = json.load(f)
    
    # JSON 데이터를 평탄화하여 DataFrame으로 변환
    df = pd.json_normalize(data)
    return df

def collect_json_files(base_dir):
    """디렉토리에서 모든 JSON 파일 경로 수집"""
    json_files = []
    for root, dirs, files in os.walk(base_dir):
        for file in files:
            if file.endswith('.json'):
                json_files.append(os.path.join(root, file))
    return json_files

# 경로 설정
train_dir = r"C:\Users\tjdwn\OneDrive\Desktop\169-1.한국인 신체 3D 스캐닝 데이터\01-1.정식개방데이터\Training"
val_dir = r"C:\Users\tjdwn\OneDrive\Desktop\169-1.한국인 신체 3D 스캐닝 데이터\01-1.정식개방데이터\Validation"

print("Collecting JSON files...")
# JSON 파일 수집
train_jsons = collect_json_files(train_dir)
val_jsons = collect_json_files(val_dir)

print(f"Found {len(train_jsons)} training JSON files and {len(val_jsons)} validation JSON files")

# 학습 데이터 처리
print("\nProcessing training data...")
train_dfs = []
for json_path in tqdm(train_jsons, desc="Converting training JSONs"):
    try:
        df = convert_json_to_csv(json_path)
        train_dfs.append(df)
    except Exception as e:
        print(f"Error processing {json_path}: {str(e)}")

# 검증 데이터 처리
print("\nProcessing validation data...")
val_dfs = []
for json_path in tqdm(val_jsons, desc="Converting validation JSONs"):
    try:
        df = convert_json_to_csv(json_path)
        val_dfs.append(df)
    except Exception as e:
        print(f"Error processing {json_path}: {str(e)}")

# DataFrame 결합
print("\nCombining DataFrames...")
train_df = pd.concat(train_dfs, ignore_index=True)
val_df = pd.concat(val_dfs, ignore_index=True)

# CSV로 저장
print("\nSaving to CSV...")
train_df.to_csv('training_data.csv', index=False)
val_df.to_csv('validation_data.csv', index=False)

print("\nDone!")
print(f"Training data shape: {train_df.shape}")
print(f"Validation data shape: {val_df.shape}")

Collecting JSON files...
Found 313760 training JSON files and 39220 validation JSON files

Processing training data...


Converting training JSONs: 100%|██████████| 313760/313760 [31:11<00:00, 167.66it/s] 



Processing validation data...


Converting validation JSONs: 100%|██████████| 39220/39220 [03:36<00:00, 181.29it/s]



Combining DataFrames...

Saving to CSV...

Done!
Training data shape: (313760, 56)
Validation data shape: (39220, 56)
