In [1]:
import os
import re
from collections import defaultdict
from concurrent.futures import ProcessPoolExecutor, as_completed

import numpy as np
import nibabel as nib

In [3]:
"""
高速版 NIfTI カットスクリプト

改善点
* nibabel の遅延読み込み (mmap) で必要領域のみをロード
* ProcessPoolExecutor によるマルチプロセス並列化
* ファイルリストを辞書キャッシュして線形探索を 1 回に集約
* dtype を保持して余分な float64 変換を回避
"""
# パース関連
# -----------------------------

def parse_cut_file(file_path: str):
    """cut_li*.txt を辞書のリストに変換"""
    cut_info = []
    with open(file_path) as f:
        for line in f:
            line = line.strip().rstrip(',')
            if not line:
                continue
            vals = list(map(int, line.split(',')))
            cut_info.append({
                "vertebra_num": vals[0],
                "slb": vals[1],
                "slb2": vals[2],
                "slice_count": vals[3],
                "base_size": vals[4],
                "x_range": vals[5:7],
                "y_range": vals[7:9],
                "z_range": vals[9:11]
            })
    return cut_info

# -----------------------------
# NIfTI 切り出し
# -----------------------------

def _clip(start: int, end: int, maxv: int, margin: int = 1):
    """範囲を margin 分だけ広げてクリップ"""
    return max(start - margin, 0), min(end + margin, maxv)

def apply_cut_to_nifti(args):
    """ワーカー関数: 指定領域を切り出して保存"""
    input_path, output_path, cut_info = args
    img = nib.load(input_path, mmap=True)  # lazy + memmap

    xs, xe = _clip(*cut_info["x_range"], img.shape[0])
    ys, ye = _clip(*cut_info["y_range"], img.shape[1])
    zs, ze = _clip(*cut_info["z_range"], img.shape[2])

    # 必要領域だけを実体化
    cut_data = np.asanyarray(img.dataobj[xs:xe, ys:ye, zs:ze])

    nib.save(nib.Nifti1Image(cut_data, img.affine, img.header), output_path)
    return output_path

# -----------------------------
# ディレクトリ処理
# -----------------------------

def process_directory(input_dir: str, output_base_dir: str, max_workers: int | None = None):
    files = os.listdir(input_dir)
    cut_files = [f for f in files if f.startswith("cut_li") and f.endswith(".txt")]
    nii_files = [f for f in files if f.endswith(".nii") or f.endswith(".nii.gz")]

    # 数字 -> 対応する nii ファイル群
    file_map: dict[str, list[str]] = defaultdict(list)
    for f in nii_files:
        num = re.search(r"\d+", f).group()
        file_map[num].append(f)

    # 並列実行用ジョブ作成
    jobs: list[tuple[str, str, dict]] = []
    for cut_file in cut_files:
        number = re.search(r"\d+", cut_file).group()
        cut_info_list = parse_cut_file(os.path.join(input_dir, cut_file))
        for cut_info in cut_info_list:
            for nii_file in file_map[number]:
                in_path = os.path.join(input_dir, nii_file)
                out_dir = os.path.join(output_base_dir, f"inp{number}", str(cut_info["vertebra_num"]))
                os.makedirs(out_dir, exist_ok=True)
                out_path = os.path.join(out_dir, f"cut_{nii_file}")
                jobs.append((in_path, out_path, cut_info))

    # マルチプロセス実行
    with ProcessPoolExecutor(max_workers=max_workers or os.cpu_count()) as ex:
        futures = [ex.submit(apply_cut_to_nifti, j) for j in jobs]
        for _ in as_completed(futures):
            pass  # 進捗だけ待つ

# -----------------------------
# エントリポイント
# -----------------------------

if __name__ == "__main__":
    #train_dir = "/mnt/nfs1/home/yamamoto-hiroto/research/vertebrae/Sakaguchi_file/S_train"
    #val_dir   = "/mnt/nfs1/home/yamamoto-hiroto/research/vertebrae/Sakaguchi_file/S_val"
    test_dir  = "/mnt/nfs1/home/yamamoto-hiroto/research/vertebrae_saka/Sakaguchi_file/S_test"

    #output_train_dir = "/mnt/nfs1/home/yamamoto-hiroto/research/vertebrae/Sakaguchi_file/processed_train"
    #output_val_dir   = "/mnt/nfs1/home/yamamoto-hiroto/research/vertebrae_saka/Sakaguchi_file/processed_val"
    output_test_dir = "/mnt/nfs1/home/yamamoto-hiroto/research/vertebrae_saka/Sakaguchi_file/processed_test"

    #process_directory(train_dir, output_train_dir)
    #process_directory(val_dir, output_val_dir)
    process_directory(test_dir, output_test_dir)

    print("カット処理が完了しました。")


カット処理が完了しました。
