In [None]:
from pathlib import Path

import pandas as pd
import cv2
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt

In [None]:
pattern_map = {
    0: "background",
    1: "cribriform",               # unclear — green is not in the original doc
    2: "micropapillary",
    3: "solid",
    4: "papillary",
    5: "acinar",
    6: "lepidic",
}

In [None]:
pattern_to_class = {
    "background": 0,
    "cribriform": 1,
    "micropapillary": 2,
    "solid": 3,
    "papillary": 4,
    "acinar": 5,
    "lepidic": 6,
}
# Actually let them as this, not sure about the true class mapping
pattern_to_class = {
    "label0": 0,
    "label1": 1,
    "label2": 2,
    "label3": 3,
    "label4": 4,
    "label5": 5,
    "label6": 6,
}

In [None]:
mask = cv2.imread(
            "/home/valentin/workspaces/luadseg/data/processed/training_patches/maskPng/train001_Da382_3.png",
            cv2.IMREAD_UNCHANGED,
        )

In [None]:
images_directory = Path("/home/valentin/workspaces/luadseg/data/processed/training_patches/image")
masks_directory = Path("/home/valentin/workspaces/luadseg/data/processed/training_patches/maskPng")
mask_paths = [
    f.resolve() for f in masks_directory.glob("*.png")
]

In [None]:
ratio_df = pd.DataFrame(columns=["image_id", "background", "lepidic", "papillary", "acinar", "cribriform", "micropapillary", "solid"])


In [None]:

ratios_list = []
for mask_path in tqdm(mask_paths, desc="Processing masks"):
    image_id = mask_path.stem
    mask = cv2.imread(
        str(mask_path),
        cv2.IMREAD_UNCHANGED,
    )

    if mask is None:
        print(f"Mask not found for {image_id}")
        continue

    # Count the number of pixels for each class
    pattern_dict = {k: np.mean(mask == v) for k, v in pattern_to_class.items()}

    ratios_list.append(
        {
            "image_id": image_id,
            **pattern_dict,
        }
    )

ratio_df = pd.DataFrame(ratios_list)

In [None]:
ratio_df = ratio_df.sort_values(by="image_id").reset_index(drop=True)
ratio_df.head()

In [None]:
ratio_df.to_csv("/home/valentin/workspaces/luadseg/data/processed/training_patches/class_ratio.csv", index=False)

In [None]:
# Define your label -> RGB color mapping
label_colors = {
    0: (0, 0, 0),         # background - black
    1: (0, 255, 0),       # cribriform - cyan/greenish
    2: (255, 0, 255),     # micropapillary - magenta
    3: (128, 0, 0),       # solid - dark red
    4: (255, 255, 0),     # papillary - yellow
    5: (255, 0, 0),       # acinar - red
    6: (0, 0, 255),       # lepidic - blue
}

def visualize_data(image_id, images_directory, masks_directory):
    image_path = images_directory / f"{image_id}.png"
    mask_path = masks_directory / f"{image_id}.png"

    image = cv2.imread(str(image_path), cv2.IMREAD_COLOR)
    if image is None:
        print(f"Image not found: {image_path}")
        return
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    mask = cv2.imread(str(mask_path), cv2.IMREAD_UNCHANGED)
    if mask is None:
        print(f"Mask not found: {mask_path}")
        return

    # Convert the grayscale mask to a color RGB mask
    mask_rgb = np.zeros((*mask.shape, 3), dtype=np.uint8)
    for label, color in label_colors.items():
        mask_rgb[mask == label] = color

    # Display the image and the color-coded mask
    plt.figure(figsize=(12, 6))
    plt.subplot(1, 2, 1)
    plt.imshow(image)
    plt.title(f"Image: {image_id}")
    plt.axis('off')

    plt.subplot(1, 2, 2)
    plt.imshow(mask_rgb)
    plt.title(f"Segmentation Mask")
    plt.axis('off')

    plt.tight_layout()
    plt.show()

In [None]:
visualize_data("train021_Da35_7", images_directory, masks_directory )