In [1]:
import os
import sys
import shutil
import math
from glob import glob
from pathlib import Path
from tqdm import tqdm

import tifffile
import matplotlib.pyplot as plt
import cv2
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.cuda.amp import autocast
import segmentation_models_pytorch as smp

# config

In [2]:
class cfg:
    # ============== model cfg =============
    in_chans = 1
    # ============== _ cfg =============
    image_size = 256
    stride = image_size // 2
    drop_egde_pixel = 32

    # ============== fold =============
    valid_batch_size = 128
    model_paths = [
        "/kaggle/working/notebook/train/train02/train02/train02_best_fold0.pth",
        "/kaggle/working/notebook/train/train02/train02/train02_best_fold1.pth",
        # "/kaggle/input/train02/train02_best_fold1.pth",
        # "/kaggle/input/train02/train02_best_fold2.pth",
        # "/kaggle/input/train02/train02_best_fold3.pth",
    ]
    merged_model_path = "/home/merged.pth"
    dir_raw = "/home/dataset_test/stack_raw"
    dir_clipped = "/home/dataset_test/stack_clipped"
    dir_pred = "/home/pred_test"
    test_dataset = "BaseInferenceDataset"

    is_kaggle_notebook = "kaggle_web_client" in sys.modules
    if is_kaggle_notebook:
        dir_test = "/kaggle/input/blood-vessel-segmentation/test"
    else:
        dir_test = "/kaggle/input/blood-vessel-segmentation/train/"
        # dir_test = "/kaggle/input/blood-vessel-segmentation/test"

In [3]:
def stack_tifs(dir_dataset, save_path):
    print(dir_dataset)
    os.makedirs(os.path.dirname(save_path), exist_ok=True)
    img_paths = glob(f"{dir_dataset}/*.???")

    stack = []
    for img_path in sorted(img_paths):
        img = cv2.imread(img_path, -1)
        stack.append(img)

    stack = np.stack(stack)
    np.save(save_path, stack)


for dir_dataset in Path(f"{cfg.dir_test}").glob("*/images"):
    dir_dataset = dir_dataset.as_posix()

    data_name = dir_dataset.split("/")[-2]
    data_type = dir_dataset.split("/")[-1]
    save_path = f"{cfg.dir_raw}/{data_name}_{data_type}.npy"
    print(dir_dataset, save_path)
    stack_tifs(dir_dataset, save_path)

/kaggle/input/blood-vessel-segmentation/train/kidney_1_dense/images /home/dataset_test/stack_raw/kidney_1_dense_images.npy
/kaggle/input/blood-vessel-segmentation/train/kidney_1_dense/images
/kaggle/input/blood-vessel-segmentation/train/kidney_1_voi/images /home/dataset_test/stack_raw/kidney_1_voi_images.npy
/kaggle/input/blood-vessel-segmentation/train/kidney_1_voi/images


KeyboardInterrupt: 

In [3]:
def clip_based_on_percentile(npy, percentile=0.05):
    stack_len = npy.shape[0]
    upper = round(stack_len * 0.3)
    lower = round(stack_len * 0.7)
    p_low = int(np.percentile(npy[upper:lower], percentile))  # 上下端に近い部分はpercentile計算対象から除外
    p_high = int(np.percentile(npy[upper:lower], 100 - percentile))
    npy = np.clip(npy, p_low, p_high).astype(np.float16)
    scale = p_high - p_low
    npy = npy - p_low
    npy = npy / scale
    return npy


def save_clipped_npy(dir_raw, dir_clipped, percentile=0.05):
    os.makedirs(dir_clipped, exist_ok=True)
    for npy_path in Path(dir_raw).glob("*.npy"):
        npy_path = npy_path.as_posix()

        print(npy_path)
        data_name = npy_path.split("/")[-1].split(".")[0]
        data_type = data_name.split("_")[-1]
        save_path = f"{dir_clipped}/{data_name}.npy"

        if os.path.exists(save_path):
            continue

        if "voi" in npy_path:
            continue

        if "labels" == data_type:
            npy = np.load(npy_path).astype(bool)

        elif data_type in ["images", "pseudo"]:
            npy = np.load(npy_path)
            npy = clip_based_on_percentile(npy, percentile)

        np.save(save_path, npy)


save_clipped_npy(cfg.dir_raw, cfg.dir_clipped)

/home/dataset_test/stack_raw/kidney_1_dense_images.npy
1


In [3]:
class CustomInferenceModel(nn.Module):
    def __init__(self, model_arch, backbone, in_chans, target_size, weight, cfg):
        super().__init__()

        self.model = smp.create_model(
            model_arch,
            encoder_name=backbone,
            encoder_weights=weight,
            in_channels=in_chans,
            classes=target_size,
            activation=None,
        )
        self.batch = cfg.valid_batch_size
        self.in_chans = in_chans
        self.target_chans = math.ceil(cfg.in_chans / 2) - 1

    def forward_(self, image):
        output = self.model(image)
        return output[:, self.target_chans]

    def forward(self, image):
        # image.shape=(batch,c,h,w)
        image = image.to(torch.float32)

        shape = image.shape
        image = [torch.rot90(image, k=i, dims=(-2, -1)) for i in range(4)]
        image = torch.cat(image, dim=0)
        with autocast():
            with torch.no_grad():
                image = [self.forward_(image[i * self.batch : (i + 1) * self.batch]) for i in range(image.shape[0] // self.batch + 1)]
                image = torch.cat(image, dim=0)
        image = image.sigmoid()
        image = image.reshape(4, shape[0], *shape[2:])
        image = [torch.rot90(image[i], k=-i, dims=(-2, -1)) for i in range(4)]
        image = torch.stack(image, dim=0).mean(0)

        return image


def load_inference_model(model_path, cfg):
    pth = torch.load(model_path)

    print("model_name", pth["model_arch"])
    print("backbone", pth["backbone"])
    model = CustomInferenceModel(pth["model_arch"], pth["backbone"], pth["in_chans"], pth["target_size"], None, cfg)
    model.load_state_dict(pth["model"])

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()
    return model

In [4]:
class BaseInferenceDataset(Dataset):
    def __init__(self, stack, in_chans):
        self.in_chans = in_chans
        self.target_chans = math.ceil(in_chans / 2)

        pad_top = torch.zeros(self.target_chans - 1, *stack.shape[1:], dtype=stack.dtype)
        pad_bottom = torch.zeros(self.in_chans - self.target_chans, *stack.shape[1:], dtype=stack.dtype)

        self.stack = torch.cat((pad_top, stack, pad_bottom), dim=0)

    def __len__(self):
        return self.stack.shape[0] - self.in_chans

    def __getitem__(self, z_):
        stack = self.stack[z_ : z_ + self.in_chans]
        return stack, z_


class LargeImageInferenceDataset(Dataset):
    def __init__(self, stack, in_chans):
        self.in_chans = in_chans
        self.target_chans = math.ceil(in_chans / 2)

        pad_top = torch.zeros(self.target_chans - 1, *stack.shape[1:], dtype=stack.dtype)
        pad_bottom = torch.zeros(self.in_chans - self.target_chans, *stack.shape[1:], dtype=stack.dtype)

        self.stack = torch.cat((pad_top, stack, pad_bottom), dim=0)

    def __len__(self):
        return self.stack.shape[0] - self.in_chans

    def __getitem__(self, z_):
        stack = self.stack[z_ : z_ + self.in_chans]
        return stack, z_


def get_test_dataset(cfg):
    if cfg.test_dataset == "BaseInferenceDataset":
        dataset = BaseInferenceDataset
    elif cfg.test_dataset == "LargeImageInferenceDataset":
        dataset = LargeImageInferenceDataset
    else:
        raise ValueError(f"Invalid dataset name: {cfg.dataset}")
    return dataset

In [5]:
def add_pad(stack: torch.Tensor, pad: int):
    # stack=(C,H,W)
    # output=(C,H+2*pad,W+2*pad)
    mean_ = int(stack.to(torch.float32).mean())
    stack = torch.cat([stack, torch.ones([stack.shape[0], pad, stack.shape[2]], dtype=stack.dtype, device=stack.device) * mean_], dim=1)
    stack = torch.cat([stack, torch.ones([stack.shape[0], stack.shape[1], pad], dtype=stack.dtype, device=stack.device) * mean_], dim=2)
    stack = torch.cat([torch.ones([stack.shape[0], pad, stack.shape[2]], dtype=stack.dtype, device=stack.device) * mean_, stack], dim=1)
    stack = torch.cat([torch.ones([stack.shape[0], stack.shape[1], pad], dtype=stack.dtype, device=stack.device) * mean_, stack], dim=2)
    return stack


def shift_axis(tensor, axis):
    perm = [axis, (axis + 1) % 3, (axis + 2) % 3]  # 軸の順番をシフト
    tensor = tensor.permute(*perm)
    return tensor


def remove_pad(pred: torch.Tensor, pad: int):
    pred = pred[..., pad:-pad, pad:-pad]
    return pred


def cutout_chip(img, stack_shape, stride, img_size, edge):
    chip = []
    xy_indexs = []

    x1_list = np.arange(0, stack_shape[-2] + 1, stride)
    y1_list = np.arange(0, stack_shape[-1] + 1, stride)

    for y1 in y1_list:
        for x1 in x1_list:
            x2 = x1 + img_size
            y2 = y1 + img_size
            chip.append(img[..., x1:x2, y1:y2])
            xy_indexs.append([x1 + edge, x2 - edge, y1 + edge, y2 - edge])
    return chip, xy_indexs


def infer_each_z(model, img, stack_shape, cfg):
    img = img.to("cuda:0")
    img = add_pad(img[0], cfg.image_size // 2)[None]

    chip, xy_indexs = cutout_chip(img, stack_shape, cfg.stride, cfg.image_size, cfg.drop_egde_pixel)
    chip = torch.cat(chip)

    preds = model.forward(chip).to(device=0)
    preds = preds.unsqueeze(1)
    preds = remove_pad(preds, cfg.drop_egde_pixel)

    pred = torch.zeros_like(img[:, 0], dtype=torch.float16, device=img.device)
    count = torch.zeros_like(img[:, 0], dtype=torch.float16, device=img.device)
    for i, (x1, x2, y1, y2) in enumerate(xy_indexs):
        pred[..., x1:x2, y1:y2] += preds[i]
        count[..., x1:x2, y1:y2] += 1
    pred /= count
    pred = remove_pad(pred, cfg.image_size // 2)

    pred = (pred[0]).to(torch.float16).cpu()
    return pred


def inference_each_axis(model, stack_path, axis, save_path, cfg):
    stack = torch.tensor(np.load(stack_path))
    print(stack.shape)
    stack = shift_axis(stack, axis)

    preds = torch.zeros_like(stack, dtype=torch.float16)

    dataset = get_test_dataset(cfg)(stack, cfg.in_chans)
    dataloader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=2)

    for img, z_ in tqdm(dataloader):  # img=(1,C,H,W)
        pred = infer_each_z(model, img, stack.shape, cfg)
        preds[z_] = pred

    preds = shift_axis(preds, -axis)
    os.makedirs(os.path.dirname(save_path), exist_ok=True)
    np.save(save_path, preds)


def inference(model, stack_path, save_path, cfg):
    save_dir = os.path.dirname(save_path)
    os.makedirs(save_dir, exist_ok=True)
    for axis in range(3):
        kidney = stack_path.split("/")[-1].split(".")[0]

        save_path_each_axis = (f"{save_dir}/{kidney}_{axis}.npy").replace("images", "preds")

        if os.path.exists(save_path_each_axis):
            continue

        inference_each_axis(model, stack_path, axis, save_path_each_axis, cfg)

    preds = np.mean(np.stack([np.load((f"{save_dir}/{kidney}_{axis}.npy").replace("images", "preds")) for axis in range(3)]), axis=0)
    np.save(save_path, preds)
    for axis in range(3):
        os.remove((f"{save_dir}/{kidney}_{axis}.npy").replace("images", "preds"))

In [6]:
from collections import OrderedDict

pth = OrderedDict()
pth_ = torch.load(cfg.model_paths[0])

for key in torch.load((cfg.model_paths[0]))["model"].keys():
    pth[key] = 0

for model_i, model_path in enumerate(cfg.model_paths):
    pth_ = torch.load(model_path)
    for key in pth_["model"].keys():
        pth[key] += pth_["model"][key]

for key in pth_["model"].keys():
    pth[key] = pth[key] / len(cfg.model_paths)

save_dict = {
    "model": pth,
    "model_arch": pth_["model_arch"],
    "backbone": pth_["backbone"],
    "in_chans": pth_["in_chans"],
    "target_size": pth_["target_size"],
}
torch.save(save_dict, cfg.merged_model_path)
cfg.model_paths = [cfg.merged_model_path]

In [15]:
tmp = np.load("/kaggle/working/preds0.npy")

In [13]:
torch.load(cfg.merged_model_path)

{'model': OrderedDict([('model.encoder.layer0.conv1.weight',
               tensor([[[[ 5.8477e-02, -6.3154e-01, -9.4307e-01,  ...,  1.3521e+00,
                          -1.0718e+00,  1.0006e+00],
                         [ 1.6835e-01, -1.8120e+00, -2.6739e+00,  ...,  3.4248e+00,
                          -1.6701e+00, -1.1369e+00],
                         [ 1.1254e+00, -3.0486e+00, -4.5755e+00,  ...,  6.5488e+00,
                          -1.9247e+00, -2.9820e+00],
                         ...,
                         [ 1.8212e+00, -3.6942e+00, -6.1013e+00,  ...,  8.7004e+00,
                          -1.1639e+00, -4.3285e+00],
                         [ 1.1179e+00, -2.1376e+00, -4.3298e+00,  ...,  6.3496e+00,
                          -7.5982e-01, -2.2293e+00],
                         [ 4.3991e-01, -8.0721e-01, -2.6147e+00,  ...,  3.4319e+00,
                          -4.0873e-01,  2.8376e-02]]],
               
               
                       [[[ 3.7444e-01, -1.5045e-01,  

In [7]:
for model_i, model_path in enumerate(cfg.model_paths):
    model = load_inference_model(model_path, cfg)
    for stack_path in Path(cfg.dir_clipped).glob("*.npy"):
        stack_path = stack_path.as_posix()
        kidney = os.path.basename(stack_path).split(".")[0]
        preds_path = f"{cfg.dir_pred}/{kidney}/preds{model_i}.npy"

        print(stack_path)
        inference(model, stack_path, preds_path, cfg)
        break

model_name Unet
backbone se_resnext50_32x4d
/home/dataset_test/stack_clipped/kidney_1_dense_images.npy
torch.Size([2279, 1303, 912])


100%|██████████| 2278/2278 [09:12<00:00,  4.12it/s]


torch.Size([2279, 1303, 912])


100%|██████████| 1302/1302 [08:14<00:00,  2.64it/s]


torch.Size([2279, 1303, 912])


100%|██████████| 911/911 [07:54<00:00,  1.92it/s]


In [None]:
for pred_kidney in glob(f"{cfg.dir_pred}/*images"):
    pred_paths = glob(f"{pred_kidney}/*.npy")

    pred = np.load(pred_paths[0])
    os.remove(pred_paths[0])

    for pred_path in pred_paths[1:]:
        pred += np.load(pred_path)
        os.remove(pred_path)

    pred /= len(pred_paths)
    pred = (pred > 0.5).astype(np.uint8)
    np.save(f"{pred_kidney}/preds.npy", pred)

In [9]:
# shutil.copy(f"{pred_kidney}/preds.npy", "/kaggle/working/")
# tmp = np.load("/kaggle/working/preds.npy")
# np.save("/kaggle/working/preds.npy", tmp * 255)
shutil.copy(preds_path, "/kaggle/working/")
tmp = np.load("/kaggle/working/preds0.npy")
np.save("/kaggle/working/preds0.npy", tmp * 255)

In [11]:
def rle_encode(mask):
    pixel = mask.flatten()
    pixel = np.concatenate([[0], pixel, [0]])
    run = np.where(pixel[1:] != pixel[:-1])[0] + 1
    run[1::2] -= run[::2]
    rle = " ".join(str(r) for r in run)
    if rle == "":
        rle = "1 0"
    return rle


def get_id(img_path):
    id = img_path.split("/")[-3:]
    id.pop(1)
    id = "_".join(id)
    return id[:-4]


def get_ids(img_paths):
    ids = []
    for img_path in img_paths:
        img_path = img_path.as_posix()
        ids.append(get_id(img_path))
    return ids


img_paths = sorted(Path(cfg.dir_test).glob("*/images/*.tif"))
ids = get_ids(img_paths)
ids

['kidney_1_dense_0000',
 'kidney_1_dense_0001',
 'kidney_1_dense_0002',
 'kidney_1_dense_0003',
 'kidney_1_dense_0004',
 'kidney_1_dense_0005',
 'kidney_1_dense_0006',
 'kidney_1_dense_0007',
 'kidney_1_dense_0008',
 'kidney_1_dense_0009',
 'kidney_1_dense_0010',
 'kidney_1_dense_0011',
 'kidney_1_dense_0012',
 'kidney_1_dense_0013',
 'kidney_1_dense_0014',
 'kidney_1_dense_0015',
 'kidney_1_dense_0016',
 'kidney_1_dense_0017',
 'kidney_1_dense_0018',
 'kidney_1_dense_0019',
 'kidney_1_dense_0020',
 'kidney_1_dense_0021',
 'kidney_1_dense_0022',
 'kidney_1_dense_0023',
 'kidney_1_dense_0024',
 'kidney_1_dense_0025',
 'kidney_1_dense_0026',
 'kidney_1_dense_0027',
 'kidney_1_dense_0028',
 'kidney_1_dense_0029',
 'kidney_1_dense_0030',
 'kidney_1_dense_0031',
 'kidney_1_dense_0032',
 'kidney_1_dense_0033',
 'kidney_1_dense_0034',
 'kidney_1_dense_0035',
 'kidney_1_dense_0036',
 'kidney_1_dense_0037',
 'kidney_1_dense_0038',
 'kidney_1_dense_0039',
 'kidney_1_dense_0040',
 'kidney_1_dense

In [13]:
tmp.shape

(2279, 1303, 912)

In [14]:
ids[2270]

'kidney_1_dense_2270'

In [None]:
submission_df = pd.DataFrame(
    {
        "id": ids,
        "kidney": ["_".join(id.split("_")[:2]) for id in ids],
        "slice": [int(id.split("_")[-1]) for id in ids],
        "rle": None,
    }
)

for pred_kidney in glob(f"{cfg.dir_pred}/*images"):
    print(pred_kidney)

    kidney = os.path.basename(pred_kidney)
    pred_paths = f"{cfg.dir_pred}/{kidney}/preds.npy"
    npy = np.load(pred_paths)
    npy_shape = npy.shape

    for i in range(npy_shape[0]):
        rle = rle_encode(npy[i])
        submission_df.loc[(submission_df["kidney"] == kidney.replace("_images","")) & (submission_df["slice"] == i), "rle"] = rle
submission_df=submission_df.drop(["kidney","slice"],axis=1)


In [None]:
# rmtree
submission_df.to_csv("submission.csv", index=False)