In [1]:
import sys


sys.path.append("../src/")

In [2]:
from sennet.core.mmap_arrays import read_mmap_array, create_mmap_array
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path
from tqdm import tqdm
import cc3d

In [3]:
path = Path("/home/clay/research/kaggle/sennet/data_dumps/predicted/ensembled/kidney_3_merged/chunk_00")
# path = Path("/home/clay/research/kaggle/sennet/data_dumps/predicted/for_rg/kidney_3_sparse/chunk_00")
# path = Path("/home/clay/research/kaggle/sennet/data_dumps/predicted/for_rg/kidney_2/chunk_00")

In [4]:
seed_path = path / "seed"
out_path = path / "out"

In [5]:
mean_pred = read_mmap_array(path / "mean_prob")


In [None]:
out_mmap = create_mmap_array(out_path, shape=mean_pred.data.shape, dtype=bool)
out_mmap.data[:] = False
out_mmap.data.flush()

# Picking Largest cc

In [None]:
threshold = 0.003
thresholded_pred = np.ascontiguousarray(mean_pred.data > threshold)

In [None]:
largest_cluster, label_n = cc3d.largest_k(
    thresholded_pred,
    k=1,
    connectivity=26,
    # delta=0,
    return_N=True,
)

In [None]:
count = largest_cluster.sum()
label_count = thresholded_pred.sum()
print(f"{count}/{label_count}, {count/label_count}")

In [None]:
seed_mmap = create_mmap_array(seed_path, shape=mean_pred.data.shape, dtype=bool)
seed_mmap.data[:] = largest_cluster
seed_mmap.data.flush()

In [None]:
largest_cluster.shape

# Dust Heavily Thresholded

In [None]:
threshold = 0.5
thresholded_pred = np.ascontiguousarray(mean_pred.data > threshold)

In [None]:
dusted_pred = cc3d.dust(
    thresholded_pred,
    threshold=100,
    connectivity=26,
)

In [None]:
seed_mmap = create_mmap_array(seed_path, shape=mean_pred.data.shape, dtype=bool)
seed_mmap.data[:] = dusted_pred
seed_mmap.data.flush()

# Raw Post Processing Things

In [6]:
threshold = 0.003
thresholded_pred = np.ascontiguousarray(mean_pred.data > threshold)

In [7]:
largest_k_out, label_n = cc3d.largest_k(
    thresholded_pred,
    k=100,
    connectivity=26,
    # delta=0,
    return_N=True,
)

In [23]:
stats = cc3d.statistics(largest_k_out, no_slice_conversion=True)

In [24]:
# these are sorted ascending via voxel count (so the 100th cluster is the largest)

voxel_counts = stats["voxel_counts"][1:]
bboxes = stats["bounding_boxes"][1:]
centroids = stats["centroids"][1:]

In [20]:
voxel_counts

(slice(0, 1035, None), slice(0, 1706, None), slice(0, 1510, None))

In [19]:
def get_3d_bbox(bool_image):
    zs, ys, xs = np.nonzero(bool_image)
    bbox = np.array([
        xs.min(),
        ys.min(),
        zs.min(),
        xs.max(),
        ys.max(),
        zs.max(),
    ])
    return bbox


def intersect_3d_bbox(bbox0: list[int, int, int, int, int, int], bbox1: list[int, int, int, int, int, int]) -> None | list[int, int, int, int, int, int]:
    x0_min, y0_min, z0_min, x0_max, y0_max, z0_max = bbox0
    x1_min, y1_min, z1_min, x1_max, y1_max, z1_max = bbox1

    inter_x_min = max(x0_min, x1_min)
    inter_y_min = max(y0_min, y1_min)
    inter_z_min = max(z0_min, z1_min)

    inter_x_max = min(x0_max, x1_max)
    inter_y_max = min(y0_max, y1_max)
    inter_z_max = min(z0_max, z1_max)

    if inter_x_min < inter_x_max and inter_y_min < inter_y_max and inter_z_min < inter_z_max:
        intersection_bbox = [inter_x_min, inter_y_min, inter_z_min, inter_x_max, inter_y_max, inter_z_max]
        return intersection_bbox
    else:
        return None

In [27]:
i = 0
print(bboxes[i])
print(get_3d_bbox(largest_k_out == (i+1)))

[ 499  516  895  909 1076 1085]
[1076  895  499 1085  909  516]


In [30]:
def cc3d_bbox_to_bbox(bbox):
    return [bbox[4], bbox[2], bbox[0], bbox[5], bbox[3], bbox[1]]


cc3d_bbox_to_bbox(bboxes[i])

[1076, 895, 499, 1085, 909, 516]

In [44]:
from tqdm import tqdm


largest_bbox = cc3d_bbox_to_bbox(bboxes[-1])
keeps = []
for b in tqdm(bboxes[:-1]):
    bbox = cc3d_bbox_to_bbox(b)
    intersection = intersect_3d_bbox(largest_bbox, bbox)
    if intersection is None:
        keeps.append(False)
    else:
        keeps.append(True)
keeps.append(True)


100%|████████████████████████████████████████████████████████████████████████████████████████████████████| 99/99 [00:00<00:00, 11187.22it/s]


In [45]:
out_mmap = create_mmap_array(path / "out", shape=mean_pred.data.shape, dtype=bool)

for label, image in tqdm(cc3d.each(largest_k_out, binary=True, in_place=True)):
    # label starts from 1
    i = label-1
    if not keeps[i]:
        continue
    out_mmap.data[image] = True

out_mmap.data.flush()

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:20<00:00,  4.87it/s]


## Trying out Region Graph

In [46]:
surface_per_contact = cc3d.contacts(
    largest_k_out, 
    connectivity=26,
    surface_area=True, 
    anisotropy=(64, 64, 64)
)

In [47]:
surface_per_contact

{}

# Post Processing Watershed

In [None]:
# out_mmap = read_mmap_array("/home/clay/research/kaggle/sennet/data_dumps/predicted/for_rg/kidney_3_sparse/chunk_00/out/", mode="readwrite")
out_mmap = read_mmap_array("/home/clay/research/kaggle/sennet/data_dumps/predicted/for_rg/kidney_3_sparse/chunk_00/thresholded_prob/", mode="readwrite")
out = np.ascontiguousarray(out_mmap.data > 0.01)

In [None]:
out_cc, n_out_cc = cc3d.connected_components(
    out, 
    return_N=True,
    connectivity=26,
)

In [None]:
n_out_cc

In [None]:
ret = np.unique(out_cc, return_counts=True)
print(ret)

In [None]:
vals, sizes = ret

In [None]:
# sizes = np.array([x.sum() for (_, x) in tqdm(cc3d.each(
#     out_cc, 
#     binary=False,
#     in_place=True,
# ))])
# print(sizes)
# print([f"{s*100:.3f}" for s in (sizes / sizes.sum())])

In [None]:
sorted_sizes[:10]

In [None]:
sorted_sizes = sorted(sizes, reverse=True)
plt.plot(sorted_sizes)
plt.scatter(list(range(len(sorted_sizes))), sorted_sizes)

In [None]:
out_largest_cluster, _ = cc3d.largest_k(
    out,
    k=1,
    connectivity=26,
    return_N=True,
)

In [None]:
out_mmap.data[:] = out_largest_cluster
out_mmap.data.flush()