In [8]:
from pathlib import Path
from glob import glob
import cv2
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
import os

In [5]:
class cfg:
    dir_train = "/kaggle/input/blood-vessel-segmentation/train"
    dir_external = "/kaggle/input/blood-vessel-segmentation_external/50um_LADAF-2020-31_kidney_pag-0.01_0.02_jp2_"
    dir_raw = "/kaggle/working/dataset/stack_raw"
    dir_viz = "/kaggle/working/dataset/stack_viz"
    dir_clipped = "/kaggle/working/dataset/stack_train01"

# Stack img

In [6]:
def stack_tifs(dir_dataset, save_path):
    print(dir_dataset)
    os.makedirs(os.path.dirname(save_path), exist_ok=True)
    img_paths = glob(f"{dir_dataset}/*.???")

    stack = []
    for img_path in sorted(img_paths):
        img = cv2.imread(img_path, -1)
        stack.append(img)

    stack = np.stack(stack)
    np.save(save_path, stack)

In [16]:
# kidney_1_dense_images, kidney_1_dense_labels, kidney_2_sparse_images, kidney_3_sparse_imagesのみstackする
# kidney_2, kidney_3のlabelsはgoogle driveにアップロードしたものを使用

for dir_dataset in Path(f"{cfg.dir_train}").glob("*/*"):
    dir_dataset = dir_dataset.as_posix()
    data_name = dir_dataset.split("/")[-2]
    data_type = dir_dataset.split("/")[-1]
    if "voi" in data_name:
        continue
    if "kidney_2" in data_name:
        data_name = "kidney_2_sparse"
    if ("kidney_1" not in data_name) and (data_type == "labels"):
        continue

    save_path = f"{cfg.dir_raw}/{data_name}_{data_type}.npy".replace("sparse", "pseudo")
    print(save_path)
    stack_tifs(dir_dataset, save_path)

/kaggle/working/dataset/stack_raw/kidney_1_dense_images.npy
/kaggle/working/dataset/stack_raw/kidney_1_dense_labels.npy
/kaggle/working/dataset/stack_raw/kidney_2_pseudo_images.npy
/kaggle/working/dataset/stack_raw/kidney_3_pseudo_images.npy


In [17]:
# 外部データセットのstack
data_name = "kidney_9_dense"
data_type = "pseudo"

save_path = f"{cfg.dir_raw}/{data_name}_{data_type}.npy"
print(save_path)
stack_tifs(cfg.dir_external, save_path)

/kaggle/working/dataset/stack_raw/kidney_9_dense_pseudo.npy


# Clipped img

In [19]:
def clip_based_on_percentile(npy, percentile=0.05):
    stack_len = npy.shape[0]

    upper = round(stack_len * 0.3)
    lower = round(stack_len * 0.7)

    p_low = int(np.percentile(npy[upper:lower], percentile))  # 上下端に近い部分はpercentile計算対象から除外
    p_high = int(np.percentile(npy[upper:lower], 100 - percentile))

    npy = np.clip(npy, p_low, p_high).astype("float32")
    scale = float(p_high - p_low)
    npy = (npy - p_low) / scale
    return npy


# 訓練のためにpercentileに基づいて値をクリップしfloat32で保存
def save_clipped_npy(dir_raw, dir_clipped, percentile=0.05):
    os.makedirs(dir_clipped, exist_ok=True)
    for npy_path in Path(dir_raw).glob("*.npy"):
        npy_path = npy_path.as_posix()
        # print(npy_path)
        data_name = npy_path.split("/")[-1].split(".")[0]
        data_type = data_name.split("_")[-1]
        save_path = f"{dir_clipped}/{data_name}.npy"

        if "labels" == data_type: # labelsはbool型に変換してdir_clipped内に保存
            npy = np.load(npy_path).astype(bool)

        elif data_type in ["images", "pseudo"]:
            npy = np.load(npy_path)
            npy = clip_based_on_percentile(npy, percentile)

        np.save(save_path, npy)


percentile = 0.05  # 0.05～99.95percentile
save_clipped_npy(cfg.dir_raw, cfg.dir_clipped, percentile)

/kaggle/working/dataset/stack_train01/kidney_1_dense_images.npy
/kaggle/working/dataset/stack_train01/kidney_1_dense_labels.npy
/kaggle/working/dataset/stack_train01/kidney_2_sparse_images.npy
/kaggle/working/dataset/stack_train01/kidney_2_sparse_labels.npy
/kaggle/working/dataset/stack_train01/kidney_3_dense_labels.npy
/kaggle/working/dataset/stack_train01/kidney_3_sparse_images.npy
/kaggle/working/dataset/stack_train01/kidney_3_sparse_labels.npy
/kaggle/working/dataset/stack_train01/kidney_9_pseudo_images.npy
