In [1]:
import os

%load_ext autoreload
%autoreload 2

%cd ../../

!hostname

/p/fastdata/pli/Private/oberstrass1/datasets/vervet1818-3d
jrlogin08.jureca


In [2]:
import sys, os
import re

import h5py as h5
import numpy as np
import pandas as pd

import imageio
from tqdm import tqdm

import pli
import pli.image as im

In [None]:
# Build volume

WM = 1
GM = 2
BG = 3

# Get section info

cortex_path = "data/aa/masks/cortex/"

# Pyramid level to build volume with
data_group = 'volume'
pyramid = 4
class_mapping = {WM: 200, GM: 100, BG: 0} # {WM: 200, GM: 100, BG: 0}, pial: {WM: True, GM: True, BG: False}, wm: {WM: True, GM: False, BG: False}
dtype =  np.uint8 # np.uint8, np.bool
chunks = (1, 256, 256)  # (1, 256, 256)
section_thickness = 60  # mu
compression ='gzip'

out_file = f"data/aa/volume/cortex/cortex_{pyramid}.h5"

###

sections = []
p = re.compile('.*s([0-9]{4})_.*')
for f in os.listdir(cortex_path):
    id = int(p.match(f)[1])
    sections.append({'id': id, 'file': f})

sections_df = pd.DataFrame(sections).sort_values('id').reset_index(drop=True)

z_min = sections_df.id.min()
z_max = sections_df.id.max()
z_stacks = z_max - z_min + 1
missing = sorted(set(np.arange(z_min, z_max)).difference(set(sections_df.id)))

print("Missing", missing)
sections_df.head()

with h5.File(out_file, 'w') as f:
    print("Create H5 File")
    cur_section = pli.data.Section(os.path.join(cortex_path, sections_df.file[0]))

    # Determine spacing of the volume (z, y, x)
    out_spacing = (section_thickness, (2 ** pyramid) * cur_section.spacing[0], (2 ** pyramid) * cur_section.spacing[1])
    print(f"Volume spacing:\t{out_spacing}")

    # Determine output shape of the volume (z, y, x)
    out_shape = (z_stacks, *cur_section.pyramid[pyramid].shape)
    print(f"Volume shape:\t{out_shape}")

    affine = np.eye(4)[:3] * np.array(out_spacing)[None].T
    f['affine'] = affine

    # Create empty dataset
    v_ds = f.create_dataset(
        name=data_group,
        shape=out_shape,
        dtype=dtype,
        chunks=chunks,
        compression=compression,
    )
    v_ds.attrs['spacing'] = out_spacing
    v_ds.attrs['BG'] = class_mapping[BG]
    v_ds.attrs['GM'] = class_mapping[GM]
    v_ds.attrs['WM'] = class_mapping[WM]

    print("Write masks to volume...")
    for k, r in tqdm(sections_df.iterrows(), total=len(sections_df)):
        # Load mask from disk
        mask = cur_section.pyramid[pyramid][:]

        # Map class indices to their values
        mask = np.vectorize(class_mapping.get, otypes=[dtype])(mask)

        # Write mask to volume
        z_ix = r.id - z_min
        v_ds[z_ix] = mask

        if k in sections_df.index:
            cur_section = pli.data.Section(os.path.join(cortex_path, sections_df.file[k]))

    print("Infer missing masks...")
    for m in tqdm(missing):
        # Get two closes sections to infer values from
        close = sections_df.iloc[(sections_df.id - m).abs().argsort()[:2]].id.values
        ix_1 = close[0] - z_min
        mask_1 = v_ds[ix_1][:]
        ix_2 = close[1] - z_min
        mask_2 = v_ds[ix_2][:]
        print(f"Infer mask between {close[0]}({ix_1}) and {close[1]}({ix_2})")

        # Interpolate mask by next to neighbors
        #mask_inter = np.full_like(mask_1, class_mapping[GM], dtype=dtype)
        mask_inter = mask_1
        mask_inter[mask_2 == class_mapping[GM]] = class_mapping[GM]

        # Write mask to volume
        z_ix = m - z_min
        v_ds[z_ix] = mask_inter

        sections_df = sections_df.append(pd.DataFrame([{'id': m, 'file': None}]), ignore_index=True)