In [2]:
import os

import nibabel as nib
import numpy as np
import scipy.ndimage as ndi

datapath = "/media/lm/Samsung_T5/Uni/Medml/training/train"

files = os.listdir(datapath)
files = filter(lambda x: x.endswith("_orig.nii.gz"), files)
files = list(map(lambda x: x.replace("_orig.nii.gz", ""), files))
sorted(files)

volumes = []
for idx, name in enumerate(files):
    print(f"{idx} / {len(files)}: {name}")

    raw = nib.load(os.path.join(datapath, name + "_orig.nii.gz"))
    label = nib.load(os.path.join(datapath, name + "_masks.nii.gz"))

    raw_np = raw.get_fdata()
    label_np = label.get_fdata()

    raw_np = ndi.zoom(raw_np, (0.5, 0.5, 0.5), order=3)
    label_np = ndi.zoom(label_np, (0.5, 0.5, 0.5), order=0)
    label_np = label_np > 0.5

    perc = np.percentile(raw_np, 99)

    t = (raw_np > perc).astype(int)

    tt, num_labels = ndi.label(t)

    t2, num_labels_mask = ndi.label(label_np)
    for i in range(1, num_labels_mask+1):
        volume = np.sum(t2 == i)
        overlap = np.logical_and(t2 == i, t)
        overlap_size = (np.logical_and(t2 == i, t)).sum()

        x_idx, y_idx, z_idx = np.where(overlap)

        idxx = len(x_idx) // 2
        label = tt[x_idx[idxx], y_idx[idxx], z_idx[idxx]]

        artery_volume = np.sum(tt == label)

        volumes.append((overlap_size, artery_volume, volume))


# t = ndi.binary_dilation(t, iterations=1).astype(int)


0 / 76: A012
1 / 76: A100


KeyboardInterrupt: 

In [2]:
import pandas as pd
df = pd.DataFrame(volumes, columns=["overlap_size", "artery_volume", "volume"])
df.describe()

Unnamed: 0,overlap_size,artery_volume,volume
count,87.0,87.0,87.0
mean,312.126437,8994.942529,352.137931
std,646.625081,2615.956201,713.402952
min,9.0,2253.0,12.0
25%,48.5,7632.0,60.0
50%,105.0,9013.0,122.0
75%,293.0,10453.0,328.5
max,4595.0,16783.0,5006.0


In [41]:
import os
import shutil
### Split train test

data_path = "/media/lm/Samsung_T5/Uni/Medml/training"

train_split = 0.7
# Load files
files = os.listdir(data_path)
files = filter(lambda x: x.endswith("_orig.nii.gz"), files)
files = list(map(lambda x: x.replace("_orig.nii.gz", ""), files))
np.random.shuffle(files)

split = int(len(files) * train_split)
train = files[:split]
val = files[split:]

print(train, val)

def move(files_f, folder):
    folder = os.path.join(data_path, folder)
    if not os.path.exists(folder):
        os.makedirs(folder)

    for i in files_f:
        shutil.move(os.path.join(data_path, i + "_orig.nii.gz"), os.path.join(folder, i + "_orig.nii.gz"))
        shutil.move(os.path.join(data_path, i + "_masks.nii.gz"), os.path.join(folder, i + "_masks.nii.gz"))

move(train, "train")
move(val, "val")



['A012', 'A100', 'A056', 'A123', 'A057', 'A029', 'A041', 'A098', 'A067', 'A076', 'A091_R', 'A038_R', 'A130_L', 'PA5', 'A135', 'A083', 'A059_L', 'A086', 'A082', 'A050', 'A097', 'A071', 'A105_R', 'A014', 'PA6', 'A001', 'A017_L', 'A133', 'A040', 'A003', 'A074', 'A044', 'A084', 'A085', 'A066', 'A126', 'A064', 'A043', 'A079', 'A015', 'A027', 'A028', 'A062_L', 'A081', 'A070', 'A087', 'A103', 'A121', 'A138', 'A008', 'A051_R', 'A096_L', 'A112', 'A095', 'A080', 'A092', 'A130_R', 'A038_M', 'A096_R', 'A010', 'A026', 'A134', 'A078_L', 'A032', 'A038_L', 'A060', 'A119', 'A113', 'A108', 'A024', 'A094_R', 'A105_L', 'A046', 'A093', 'A089_R', 'A077'] ['A045', 'A129', 'A114', 'A118', 'A073', 'A006', 'A136', 'A047', 'A049', 'A025', 'A019', 'A023_R', 'A120', 'A127', 'A042', 'A115', 'A088', 'A137', 'A072', 'A016', 'A009', 'A068', 'A059_R', 'A018', 'A013', 'A031', 'A124', 'A035', 'A078_R', 'A021', 'A005', 'A033', 'A054']


In [10]:
import scipy.ndimage as ndi
import matplotlib.pyplot as plt
case = "A003"
raw = nib.load(f"/media/lm/Samsung_T5/Uni/Medml/training/train/{case}_orig.nii.gz")
label = nib.load(f"/media/lm/Samsung_T5/Uni/Medml/training/train/{case}_masks.nii.gz")

raw_np = raw.get_fdata()
label_np = label.get_fdata()

raw_np = ndi.zoom(raw_np, (0.5, 0.5, 0.5), order=3)
label_np = ndi.zoom(label_np, (0.5, 0.5, 0.5), order=0)
label_np = label_np > 0.5

perc = np.percentile(raw_np, 99)

t = (raw_np > perc).astype(int)

tt, num_labels = ndi.label(t)
unique, counts = np.unique(tt, return_counts=True)

t = ndi.binary_closing(t, iterations=1)

keep_idx = unique[counts > 2000]
keep = None
for i in keep_idx:
    if i == 0:
        continue

    if keep is None:
        keep = tt == i
    else:
        keep = np.logical_or(keep, (tt == i))

remove_idx = np.logical_not(keep)

t[remove_idx] = 0

idx = np.argmax(t.sum(axis=(1, 2)))
print(idx)

#t = ndi.gaussian_filter(t, sigma=0.1, order=0)
# t = t > 0.5

# plt.figure(figsize=(20, 10))
# plt.subplot(1, 2, 1)
# plt.imshow(t[idx])
# plt.subplot(1, 2, 2)
# t = ndi.gaussian_filter(t, sigma=0.15, order=0)
# plt.imshow(t[idx])


nib.save(nib.Nifti1Image(t.astype(int), raw.affine), "./data/test_thres/art.nii.gz")
nib.save(nib.Nifti1Image(label_np.astype(int), raw.affine), "./data/test_thres/mask.nii.gz")

56


In [None]:
import h5py
import os

datapath = "/media/lm/Samsung_T5/Uni/Medml/training/train"

files = os.listdir(datapath)
files = filter(lambda x: x.endswith("_orig.nii.gz"), files)
files = list(map(lambda x: x.replace("_orig.nii.gz", ""), files))
sorted(files)

h5_save_folder = os.path.join(datapath, "h5")

if not os.path.exists(h5_save_folder):
    os.makedirs(h5_save_folder)

# PARAMS
zoom = False
volume_threshold = 30000 #2000
closing_thres = 3

volumes = []
for idx, name in enumerate(files):
    print(f"{idx} / {len(files)}: {name}")

    raw = nib.load(os.path.join(datapath, name + "_orig.nii.gz"))
    label = nib.load(os.path.join(datapath, name + "_masks.nii.gz"))

    raw_np = raw.get_fdata()
    label_np = label.get_fdata()

    if zoom:
        raw_np = ndi.zoom(raw_np, (0.5, 0.5, 0.5), order=3)
        label_np = ndi.zoom(label_np, (0.5, 0.5, 0.5), order=0)
        label_np = label_np > 0.5

    # find artery
    perc = np.percentile(raw_np, 99)

    t = (raw_np > perc).astype(int)

    tt, num_labels = ndi.label(t)
    unique, counts = np.unique(tt, return_counts=true)

    keep_idx = unique[counts > volume_threshold]
    keep = none
    for i in keep_idx:
        if i == 0:
            continue

        if keep is none:
            keep = tt == i
        else:
            keep = np.logical_or(keep, (tt == i))

    remove_idx = np.logical_not(keep)

    t[remove_idx] = 0

    t = ndi.binary_closing(t, iterations=closing_thres)

    overlap_mask = np.logical_and(label_np, t)

    # Normalize
    min_val = raw_np.min()
    max_val = raw_np.max()

    raw_np = (raw_np - min_val) / (max_val - min_val)

    with h5py.File(os.path.join(h5_save_folder, f"{name}.h5"), "w") as f:
        f.create_dataset("raw", data=raw_np)
        f.create_dataset("label", data=label_np)
        f.create_dataset("artery", data=t)
        f.create_dataset("overlap_mask", data=overlap_mask)

In [None]:
import h5py
import nibabel as nib
outpath = "./data/test_thres"

def h5_to_nii(p):
    with h5py.File(p, "r") as f:
        raw = f["raw"][:]
        mask = f["label"][:]
        artery = f["artery"][:]
        # overlap_mask = f["overlap_mask"][:]

        artery = ndi.binary_closing(artery, iterations=3)

        nib.save(nib.Nifti1Image(raw, np.eye(4)), f"{outpath}/raw.nii.gz")
        nib.save(nib.Nifti1Image(mask.astype(int), np.eye(4)), f"{outpath}/mask.nii.gz")
        nib.save(nib.Nifti1Image(artery.astype(float), np.eye(4)), f"{outpath}/artery.nii.gz")
        # nib.save(nib.Nifti1Image(overlap_mask.astype(int), np.eye(4)), f"{outpath}/overlap_mask.nii.gz")

#h5_to_nii("/media/lm/Samsung_T5/Uni/Medml/training/train/h5/A001.h5")

In [None]:
import scipy.ndimage as ndi
import numpy as np
# Calc stats
datapath = "/media/lm/Samsung_T5/Uni/Medml/training/train/h5_size_adjusted"

files = os.listdir(datapath)
sorted(files)

volumes = []
for idx, name in enumerate(files):
    if not name.endswith(".h5"):
        continue

    print(f"{idx} / {len(files)}: {name}")

    with h5py.File(os.path.join(datapath, name), "r") as f:
        raw = f["raw"][:]
        mask = f["label"][:]
        artery = f["artery"][:]

        artery_labels, num_artery_labels = ndi.label(artery)

        t2, num_labels_mask = ndi.label(mask)
        for i in range(1, num_labels_mask+1):
            # Volume and overlap
            cur_mask = t2 == i
            volume = np.sum(cur_mask)
            overlap = np.logical_and(cur_mask, artery)
            overlap_size = overlap.sum()

            x_idx, y_idx, z_idx = np.where(overlap)

            idxx = len(x_idx) // 2
            label = artery_labels[x_idx[idxx], y_idx[idxx], z_idx[idxx]]

            artery_volume = np.sum(artery_labels == label)

            # Aneurysm start and pixel size

            x_s = cur_mask.sum(axis=(1, 2))
            y_s = cur_mask.sum(axis=(0, 2))
            z_s = cur_mask.sum(axis=(0, 1))

            x = np.where(x_s)[0][[0, -1]]
            y = np.where(y_s)[0][[0, -1]]
            z = np.where(z_s)[0][[0, -1]]

            x_start, x_end = x[0], x[1]
            y_start, y_end = y[0], y[1]
            z_start, z_end = z[0], z[1]

            x_size = x_end - x_start
            y_size = y_end - y_start
            z_size = z_end - z_start

            volumes.append((name, overlap_size, artery_volume, volume, x_size, y_size, z_size, x_start, y_start, z_start, x_end, y_end, z_end))


In [None]:
import pandas as pd
df = pd.DataFrame(volumes, columns=["file", "overlap_size", "artery_volume", "volume", "x_size", "y_size", "z_size", "x_start", "y_start", "z_start", "x_end", "y_end", "z_end"])
df["overlap_to_volume_ratio"] = df["overlap_size"] / df["volume"]

In [None]:
df["min_size"] = df[["x_size", "y_size", "z_size"]].min(axis=1)
df["max_size"] = df[["x_size", "y_size", "z_size"]].max(axis=1)

df[["file", "min_size", "max_size"]].describe()


# thres = 16
# out = []
# for index, row in df.iterrows():

#     for fac in [1.5, 1.25, 1, 0.75, 0.5, 0.25]:
#         if row["max_size"] * fac < thres:
#             out.append([row["file"], row["min_size"], row["max_size"], fac, row["min_size"] * fac, row["max_size"] * fac])
#             break

# df2 = pd.DataFrame(out, columns=["file", "min_before", "max_before", "fac", "min_after", "max_after"])
# df2.sort_values(by='min_after', inplace=True)

# df2.to_csv("./aneu_sizes.csv")

In [None]:
t = df2.groupby('file')['fac'].apply(lambda x: list(np.unique(x)))
t2 = df2.groupby('file')['fac'].apply(lambda x: len(list(x)))

t[t2 >= 2]

In [None]:
#Adjusted Sizes Dataset

import scipy.ndimage as ndi
import numpy as np
# Calc stats
datapath = "/media/lm/Samsung_T5/Uni/Medml/training/val/h5"
datapath_out = "/media/lm/Samsung_T5/Uni/Medml/training/val/h5_size_adjusted"
thres = 16

if not os.path.exists(datapath_out):
    os.makedirs(datapath_out)

files = os.listdir(datapath)
sorted(files)

volumes = []
for idx, name in enumerate(files):
    if not name.endswith(".h5"):
        continue


    with h5py.File(os.path.join(datapath, name), "r") as f:
        raw = f["raw"][:]
        mask = f["label"][:]
        artery = f["artery"][:]
        overlap_mask = f["overlap_mask"][:]

        fac_sizes = []
        t2, num_labels_mask = ndi.label(mask)
        for i in range(1, num_labels_mask + 1):
            # Volume and overlap
            cur_mask = t2 == i
            x_s = cur_mask.sum(axis=(1, 2))
            y_s = cur_mask.sum(axis=(0, 2))
            z_s = cur_mask.sum(axis=(0, 1))

            x = np.where(x_s)[0][[0, -1]]
            y = np.where(y_s)[0][[0, -1]]
            z = np.where(z_s)[0][[0, -1]]

            x_start, x_end = x[0], x[1]
            y_start, y_end = y[0], y[1]
            z_start, z_end = z[0], z[1]

            x_size = x_end - x_start
            y_size = y_end - y_start
            z_size = z_end - z_start

            max_size = max(x_size, y_size, z_size)

            for fac in [1.5, 1.25, 1, 0.75, 0.5, 0.3]:
                if max_size * fac < thres:
                    fac_sizes.append(fac)
                    break
        
        fac_size = np.unique(fac_sizes)[0]
        if len(np.unique(fac_sizes)) > 1:
            print(f"Different sizes: {np.unique(fac_sizes)}")
            fac_size = 1
        

        if fac_size != 1:
            raw = ndi.zoom(raw, (fac_size, fac_size, fac_size), order=3)
            mask = ndi.zoom(mask, (fac_size, fac_size, fac_size), order=0)
            mask = mask > 0.5
            artery = ndi.zoom(artery, (fac_size, fac_size, fac_size), order=0)
            artery = artery > 0.5
            overlap_mask = ndi.zoom(overlap_mask, (fac_size, fac_size, fac_size), order=0)
            overlap_mask = overlap_mask > 0.5


        # Normalize
        min_val = raw.min()
        max_val = raw.max()

        raw_np = (raw - min_val) / (max_val - min_val)
        

        with h5py.File(os.path.join(datapath_out, name), "w") as f:
            f.create_dataset("raw", data=raw)
            f.create_dataset("label", data=mask)
            f.create_dataset("artery", data=artery)
            f.create_dataset("overlap_mask", data=overlap_mask)

    print(f"{idx} / {len(files)}: {name} {fac_size}")

In [None]:
test = {}

test[(1,3, 4)] = 5
(1,3, 4) in test

In [3]:
test = {}

test[(1,3, 4)] = 5
(1,3, 4) in test

True

In [None]:
with h5py.File("/media/lm/Samsung_T5/Uni/Medml/training/train/h5/A001.h5", "r") as f:
    fac = 0.25
    raw = f["raw"][:]
    mask = f["label"][:]

    raw = ndi.zoom(raw, (fac, fac, fac), order=3)
    label = ndi.zoom(mask, (fac, fac, fac), order=0)
    label_np = label > 0.5

In [None]:
# fit mask to artery

datapath = "/media/lm/Samsung_T5/Uni/Medml/training/train/h5"

files = os.listdir(datapath)
sorted(files)

volumes = []
for idx, name in enumerate(files):
    if not name.endswith(".h5"):
        continue

    print(f"{idx} / {len(files)}: {name}")

    with h5py.File(os.path.join(datapath, name), "r+") as f:
        raw = f["raw"][:]
        mask = f["label"][:]
        artery = f["artery"][:]

        artery = ndi.binary_closing(artery, iterations=3)

        overlap_mask = np.logical_and(mask, artery)

        f.create_dataset("overlap_mask", data=overlap_mask)





In [None]:
import torch


a = torch.tensor([5, 0.5])

torch.maximum(torch.ones(a.shape), a)


In [None]:
test = np.zeros((20, 20, 20))


test[4:8, 3:5, 2:7] = 1
test[14:20, 8:14, 9:11] = 2

test