# Notebook Details
This notebook enhances training labels by applying pseudo-labeling to the False Positive (FP) regions identified from the original Mask2Former model, using the Segment Anything Model (SAM) for refinement.

At the end of this notebook, a new folder named pseudo_dataset will be generated, containing four folders: (i) train_image (ii) train_mask (iii) val_image (iv) val_mask

* train_image: Same images as Input_data/train_image
* val_image: GT + pseudo-labeled masks (FP corrected)
* train_mask: Same images as Input_data/val_image
* val_mask: GT + pseudo-labeled masks (FP corrected)




**IMPORTANT: This notebook is solely for generating pseudo and ground truth (GT) masks. To retrain the model using the enhanced masks, please use "Notebook 2: Mask2Former" and update the training input directory from "Input_data" to "pseudo_dataset".**

**Note:**
1. "<<<" in the code indicates values that you can modify.

2. This notebook currently focuses on False Positives only, but can be modified to target False Negatives.


    




# Environment Setup & Imports

In [None]:
# Install dependencies if not yet installed
# Only run this section if needed.

# Set UTF-8 locale to avoid encoding-related errors (e.g., with gsutil)
import os
os.environ["LC_ALL"] = "C.UTF-8"
os.environ["LANG"] = "C.UTF-8"

# Install basic dependencies: OpenCV for image processing, matplotlib for visualization
!pip install opencv-python matplotlib

# Clone the Segment Anything repository from GitHub (Meta AI)
!git clone https://github.com/facebookresearch/segment-anything.git
%cd segment-anything
!pip install -e .

# Download the pre-trained SAM (Segment Anything Model) ViT-B weights
!wget https://dl.fbaipublicfiles.com/segment_anything/sam_vit_b_01ec64.pth -O /content/segment-anything/sam_vit_b_01ec64.pth


In [None]:
#<<< Access your bucket or path


# Utility Functions & Model Setup


This section sets up important parameters such as image size, confidence thresholds, and Input/Output paths. It also defines two key functions:



*   generate_pseudo_negative_mask() for SAM segmentation.
*   generate_final_mask() to merge GT and pseudo labels.





In [None]:
# install all neccessary libraries, delete when neccessary
!pip install evaluate

In [None]:
# === [Package Locking: ensure stable versions]
!pip install -U "dill==0.3.6" "evaluate==0.4.0" --quiet

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/110.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m110.5/110.5 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/81.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.4/81.4 kB[0m [31m14.9 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/134.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.3/134.3 kB[0m [31m22.2 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:

import os
import cv2
import numpy as np
import torch
from tqdm import tqdm

from segment_anything import SamPredictor, sam_model_registry
from transformers import Mask2FormerForUniversalSegmentation, Mask2FormerImageProcessor
import evaluate
import matplotlib.pyplot as plt

# === Select the appropriate device: GPU if available, otherwise CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# === Load the SAM model using 'vit_b' (base variant) and the pre-trained checkpoint
sam = sam_model_registry["vit_b"](checkpoint="/content/segment-anything/sam_vit_b_01ec64.pth").to(device)
# === Create a predictor instance to run SAM inference later
predictor = SamPredictor(sam)

  state_dict = torch.load(f)


In [None]:
def generate_pseudo_negative_mask(image, fp_mask, predictor, score_threshold=0.8, area_threshold=100):
    """
    Generate a pseudo negative mask using SAM clipped to FP area.

    Args:
        image (np.ndarray): RGB image.
        fp_mask (np.ndarray): Binary mask where FP area is 255.
        predictor (SamPredictor): SAM predictor instance.
        score_threshold (float): SAM mask confidence score threshold.
        area_threshold (int): Minimum size of connected components to keep.

    Returns:
        np.ndarray: Pseudo-negative mask (uint8, 0 or 255).
    """
    # Erode FP mask to reduce border noise
    kernel = np.ones((3, 3), np.uint8)
    fp_mask_eroded = cv2.erode(fp_mask, kernel, iterations=1)

    predictor.set_image(image)
    ys, xs = np.where(fp_mask_eroded == 255)

    if len(xs) == 0:
        return np.zeros_like(fp_mask)

    # Bounding box as prompt
    x1, y1, x2, y2 = np.min(xs), np.min(ys), np.max(xs), np.max(ys)
    input_box = np.array([x1, y1, x2, y2])

    masks, scores, _ = predictor.predict(
        box=input_box[None, :],
        multimask_output=True
    )

    best_mask = masks[np.argmax(scores)]
    best_score = np.max(scores)

    if best_score < score_threshold:
        return np.zeros_like(fp_mask)

    # Only keep confident SAM mask clipped by FP
    pseudo_negative_mask = np.logical_and(best_mask, fp_mask > 0).astype(np.uint8) * 255

    # Remove small noise areas
    num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(pseudo_negative_mask, connectivity=8)
    cleaned_mask = np.zeros_like(pseudo_negative_mask)
    for i in range(1, num_labels):
        area = stats[i, cv2.CC_STAT_AREA]
        if area > area_threshold:
            cleaned_mask[labels == i] = 255

    return cleaned_mask


In [None]:
def generate_final_mask(gt_mask, pred_mask, pseudo_negative_mask, save_path=None):
    """
    Combine GT and pseudo-negative mask into final mask and optionally save it.

    Args:
        gt_mask (np.ndarray): Ground truth binary mask (0/1).
        pred_mask (np.ndarray): Predicted mask from model (0/1).
        pseudo_negative_mask (np.ndarray): Pseudo mask from SAM (0/255).
        save_path (str, optional): Path to save the final mask. If None, mask is not saved.

    Returns:
        np.ndarray: Final mask for training (values: 0, 1, 255).
    """
    # Shape alignment
    if pseudo_negative_mask.shape != gt_mask.shape:
        pseudo_negative_mask = cv2.resize(
            pseudo_negative_mask,
            (gt_mask.shape[1], gt_mask.shape[0]),
            interpolation=cv2.INTER_NEAREST
        )

    final_mask = np.full_like(gt_mask, fill_value=255)

    # Keep true buildings
    final_mask[gt_mask == 1] = 1

    # Replace confident FP regions with background (0)
    fp_area = ((pred_mask == 1) & (gt_mask == 0))
    sam_fp_area = ((pseudo_negative_mask == 255) & fp_area)
    final_mask[sam_fp_area] = 0

    # Optional save
    if save_path is not None:
      os.makedirs(os.path.dirname(save_path), exist_ok=True)
      cv2.imwrite(save_path, final_mask)

# Pseudo-Label Generation Loop

This section performs the core logic:

Loads images and GT masks from Input_data

*   Runs Mask2Former to generate predictions
*   Computes mIoU and identifies poor predictions
*   Applies SAM on FP regions when necessary
*   Saves results into pseudo_dataset

Both train and val sets are processed.

In [None]:
# Return to the root content directory (to avoid path issues)
%cd /content

/content


In [None]:
import os
import cv2
import numpy as np
from tqdm import tqdm

# === Default parameters
miou_threshold = 0.7           #<<< Skip SAM if mIoU is already good
score_threshold = 0.8          #<<< Confidence threshold for SAM masks
area_threshold = 100           #<<< Minimum area (in pixels) for valid SAM regions
image_size = (256, 256)        #<<< Resize target size for all inputs

# Load trained Mask2Former model (modify path if needed)
inf_model = Mask2FormerForUniversalSegmentation.from_pretrained("outputs/model_iou").to(device).eval()

# Image processor to format input image
processor = Mask2FormerImageProcessor()

# Evaluation metric (e.g., mIoU)
infer_metric = evaluate.load("mean_iou")

# === Process both train and val sets
for split in ["train", "val"]:
    print(f"\nProcessing {split} set...")

    input_img_dir = os.path.join("Input_data", f"{split}_image")
    input_mask_dir = os.path.join("Input_data", f"{split}_mask")

    output_img_dir = os.path.join("pseudo_dataset", f"{split}_image")
    output_mask_dir = os.path.join("pseudo_dataset", f"{split}_mask")

    os.makedirs(output_img_dir, exist_ok=True)
    os.makedirs(output_mask_dir, exist_ok=True)

    image_files = sorted(os.listdir(input_img_dir))

    for img_file in tqdm(image_files):
        # === 1. Load image and GT mask
        img_path = os.path.join(input_img_dir, img_file)
        mask_path = os.path.join(input_mask_dir, img_file.replace("img", "mask"))

        orig_image = cv2.imread(img_path)
        orig_image = cv2.resize(orig_image, image_size)
        image = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB)

        gt_mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
        gt_mask = cv2.resize(gt_mask, image_size)

        # === 2. Predict with Mask2Former
        inputs = processor(images=image, return_tensors="pt").to(device)
        outputs = inf_model(**inputs)
        labels = processor.post_process_semantic_segmentation(outputs, target_sizes=[image_size])[0]
        pred_mask = labels.cpu().numpy()

        # === 3. Compute mIoU
        infer_metric.add_batch(references=[gt_mask], predictions=[pred_mask])
        miou = infer_metric.compute(num_labels=2, ignore_index=255, reduce_labels=False)['mean_iou']

        # === 4. Save original mask if mIoU >= threshold
        out_img_path = os.path.join(output_img_dir, img_file)
        out_mask_path = os.path.join(output_mask_dir, img_file.replace("img", "mask"))

        if miou >= miou_threshold:
            cv2.imwrite(out_img_path, orig_image)
            cv2.imwrite(out_mask_path, gt_mask)
            continue

        # === 5. Generate pseudo-negative mask using SAM
        # Default: False Positive (predicted as 1, but GT is 0)
        fp_mask = ((pred_mask == 1) & (gt_mask == 0)).astype(np.uint8) * 255
        # Optional: Target False Negatives instead (predicted as 0, but GT is 1)
        # fn_mask = ((pred_mask == 0) & (gt_mask == 1)).astype(np.uint8) * 255

        pseudo_negative_mask = generate_pseudo_negative_mask(
            image=image,
            fp_mask=fp_mask,
            predictor=predictor,
            score_threshold=score_threshold,
            area_threshold=area_threshold
        )

        # === 6. Merge GT and pseudo mask, then save
        cv2.imwrite(out_img_path, orig_image)

        final_mask = generate_final_mask(
            gt_mask=gt_mask,
            pred_mask=pred_mask,
            pseudo_negative_mask=pseudo_negative_mask,
            save_path=out_mask_path
        )


Downloading builder script:   0%|          | 0.00/13.1k [00:00<?, ?B/s]


Processing train set...


  acc = total_area_intersect / total_area_label
  iou = total_area_intersect / total_area_union
100%|██████████| 600/600 [03:35<00:00,  2.78it/s]



Processing val set...


100%|██████████| 200/200 [01:11<00:00,  2.80it/s]
