In [None]:
import pathlib
import multiprocessing as mproc
from tqdm.notebook import tqdm
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import torch
from mayoclinic.slide import SlideManager

import warnings
from rasterio.errors import NotGeoreferencedWarning
warnings.filterwarnings("ignore", category=NotGeoreferencedWarning)

In [None]:
DATA_ROOT = pathlib.Path("../../data")
DATASET_DIR = DATA_ROOT / "input/mayo-clinic-strip-ai"
TRAIN_TIFF_DIR = DATASET_DIR / "train"
TEST_TIFF_DIR = DATASET_DIR / "test"
OUTPUT_DIR = DATA_ROOT / "working"
OUTPUT_FG_DIR = OUTPUT_DIR / "fg_ids"
OUTPUT_DOWSCALED_DIR  = OUTPUT_DIR / "downscaled"
DEVICE = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
N_CPUS = mproc.cpu_count()

In [None]:
OUTPUT_FG_DIR.mkdir(parents=True, exist_ok=True)
OUTPUT_DOWSCALED_DIR.mkdir(parents=True, exist_ok=True)

In [None]:
train_df = pd.read_csv(DATASET_DIR / "train.csv")
test_df = pd.read_csv(DATASET_DIR / "test.csv")
slide_paths = [TRAIN_TIFF_DIR / (x + ".tif" ) for x in train_df["image_id"]]
embeddings_dir = OUTPUT_DIR / "embs"
foreground_ids_dir = OUTPUT_DIR / "fg_ids"

In [None]:
slide_ids = []
for path in TRAIN_TIFF_DIR.glob("*.tif"):
    slide_ids.append(".".join(path.name.split(".")[:-1]))

train_df = train_df[train_df["image_id"].isin(slide_ids)]

In [None]:
CONFIG = {
    "window_yx": (256, 256),
    "bg_detection":
    {
        "tile_fg_criterion": 0.01,
        "tile_bg_brightness": 0.99,
        "block_size_factor": 0.05,
        "offset": 1,
        "erode_n_it": 2,
        "erode_kernel_size": 5
    }
}

In [None]:
slide_manager  = SlideManager(
                window_yx = CONFIG["window_yx"],
                tile_fg_criterion = CONFIG["bg_detection"]["tile_fg_criterion"],
                tile_bg_brightness = CONFIG["bg_detection"]["tile_bg_brightness"],
                slide_thresh_params = {
                    "block_size_factor": CONFIG["bg_detection"]["block_size_factor"],
                    "offset": CONFIG["bg_detection"]["offset"],
                    "erode_n_it": CONFIG["bg_detection"]["erode_n_it"],
                    "erode_kernel": np.ones([CONFIG["bg_detection"]["erode_kernel_size"]]*2)

                },
)

In [None]:
for idx, sample in tqdm(train_df[2:3].iterrows(), total = len(train_df)):
    slide_path = TRAIN_TIFF_DIR / (sample.image_id + ".tif" )
    slide_manager.new_slide(slide_path, n_cpus=N_CPUS)
    np.save(OUTPUT_FG_DIR/ (sample.image_id + ".npy"),slide_manager.foreground_map, allow_pickle=False)
    np.save(OUTPUT_DOWSCALED_DIR/  (sample.image_id + ".npy"),slide_manager.downscaled, allow_pickle=False)

In [None]:
for idx, sample in train_df[0:10].iterrows():
    fg_path = OUTPUT_FG_DIR / (sample.image_id + ".npy")
    downscaled_path = OUTPUT_DOWSCALED_DIR / (sample.image_id + ".npy")
    foreground_map = np.load(fg_path)
    downscaled = np.load(downscaled_path)


    fig, axs = plt.subplots(1,3, figsize = (16, 16))
    axs[0].imshow(np.moveaxis(downscaled, 0, -1))
    axs[1].imshow(foreground_map)
    axs[2].imshow(np.moveaxis(downscaled, 0, -1))
    axs[2].imshow(np.stack([foreground_map*255]*3 + [np.ones_like(foreground_map)*100], axis=-1))
    plt.show()