In [None]:
import os
import openslide
import numpy as np
import cv2
from tqdm import tqdm

# Configuration
MRXS_FOLDER_TRAIN = "F:\\Peleg\\2021-06-14 black box 3"
TRAIN_OUTPUT = "C:\\Users\\shpigel-lab\\Desktop\\tiles\\train"
TEST_OUTPUT = "C:\\Users\\shpigel-lab\\Desktop\\tiles\\test"
PATCH_SIZE = 512
STRIDE = 256
LEVEL = 0
BLACK_THRESHOLD = 0.8
WHITE_THRESHOLD = 0.8

os.makedirs(TRAIN_OUTPUT, exist_ok=True)
os.makedirs(TEST_OUTPUT, exist_ok=True)

# Define training and test sets
train_ids = [f"2M{str(i).zfill(2)}" for i in range(4, 13)]
test_ids = [f"2M{str(i).zfill(2)}" for i in range(13, 20)]

def is_bad_patch(patch_np):
    gray = cv2.cvtColor(patch_np, cv2.COLOR_RGB2GRAY)
    total = gray.size
    black = np.sum(gray < 20)
    white = np.sum(gray > 235)
    return (black / total > BLACK_THRESHOLD) or (white / total > WHITE_THRESHOLD)

def extract_patches(slide_path, slide_name, output_folder):
    slide = openslide.OpenSlide(slide_path)
    w, h = slide.level_dimensions[LEVEL]
    saved = 0

    for y in tqdm(range(0, h - PATCH_SIZE, STRIDE), desc=slide_name):
        for x in range(0, w - PATCH_SIZE, STRIDE):
            patch = slide.read_region((x, y), LEVEL, (PATCH_SIZE, PATCH_SIZE)).convert("RGB")
            patch_np = np.array(patch)

            if is_bad_patch(patch_np):
                continue

            fname = f"{slide_name}_{x}_{y}.png"
            cv2.imwrite(os.path.join(output_folder, fname), cv2.cvtColor(patch_np, cv2.COLOR_RGB2BGR))
            saved += 1

    print(f"{slide_name}: saved {saved} tiles.")

# Run extraction
for slide_id in train_ids + test_ids:
    mrxs_path = os.path.join(MRXS_FOLDER_TRAIN, f"{slide_id}.mrxs")
    if not os.path.exists(mrxs_path):
        print(f"⚠️ Missing file: {mrxs_path}")
        continue

    output = TRAIN_OUTPUT if slide_id in train_ids else TEST_OUTPUT
    extract_patches(mrxs_path, slide_id, output)
