In [29]:
import cv2
import json
import numpy as np
from pathlib import Path
from datetime import datetime
from typing import List, Dict, Any

In [27]:
def convert_segment_masks_to_yolo_seg(masks_dir: str, output_dir: str, bbox_output_dir: str = None, classes: int = 80):
    """
    Convert a dataset of segmentation mask images to the YOLO segmentation format.

    This function takes the directory containing the binary format mask images and converts them into YOLO segmentation
    format. The converted masks are saved in the specified output directory.

    Args:
        masks_dir (str): The path to the directory where all mask images (png, jpg) are stored.
        output_dir (str): The path to the directory where the converted YOLO segmentation masks will be stored.
        classes (int): Total classes in the dataset i.e. for COCO classes=80

    """
    Path(output_dir).mkdir(parents=True, exist_ok=True)
    Path(bbox_output_dir).mkdir(parents=True, exist_ok=True)

    #Multiclass
    #pixel_to_class_mapping = {i + 1: i for i in range(classes)}
    
    # For binary masks: pixel 0 = background, pixel 255 = class 0  
    pixel_to_class_mapping = {255: 0}  # Map pixel value 255 to class 0
    
    for mask_path in Path(masks_dir).iterdir():
        if mask_path.suffix in {".png", ".jpg", ".tiff"}:
            mask = cv2.imread(str(mask_path), cv2.IMREAD_GRAYSCALE)  # Read the mask image in grayscale
            img_height, img_width = mask.shape  # Get image dimensions
            print(f"Processing {mask_path} imgsz = {img_height} x {img_width}")

            unique_values = np.unique(mask)  # Get unique pixel values representing different classes
            yolo_format_data = []
            bbox_format_data = []

            for value in unique_values:
                if value == 0:
                    continue  # Skip background
                class_index = pixel_to_class_mapping.get(value, -1)
                if class_index == -1:
                    print(f"Unknown class for pixel value {value} in file {mask_path}, skipping.")
                    continue

                # Create a binary mask for the current class and find contours
                binary_mask = (mask == value).astype(np.uint8)
                contours, _ = cv2.findContours(
                    binary_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
                )  # Find contours

                for contour in contours:
                    if len(contour) >= 3:  # YOLO requires at least 3 points for a valid segmentation
                        contour = contour.squeeze()  # Remove single-dimensional entries
                        
                        yolo_format = [class_index]
                        for point in contour:
                            # Normalize the coordinates
                            yolo_format.append(round(point[0] / img_width, 6))  # Rounding to 6 decimal places
                            yolo_format.append(round(point[1] / img_height, 6))
                        yolo_format_data.append(yolo_format)

                    x, y, w, h = cv2.boundingRect(contour)
                    center_x = (x + w / 2) / img_width
                    center_y = (y + h / 2) / img_height
                    norm_width = w / img_width
                    norm_height = h / img_height

                    bbox_format = [
                        class_index,
                        round(center_x, 6),
                        round(center_y, 6),
                        round(norm_width, 6),
                        round(norm_height, 6)
                    ]
                    bbox_format_data.append(bbox_format)
            
            # Save YOLO format data to file
            seg_output_path = Path(output_dir) / f"{mask_path.stem}.txt"
            with open(seg_output_path, "w", encoding="utf-8") as file:
                for item in yolo_format_data:
                    line = " ".join(map(str, item))
                    file.write(line + "\n")

            if bbox_output_dir is not None:
                # Save YOLO bounding box format data to file
                bbox_output_path = Path(bbox_output_dir) / f"{mask_path.stem}.txt"
                with open(bbox_output_path, "w", encoding="utf-8") as file:
                    for item in bbox_format_data:
                        line = " ".join(map(str, item))
                        file.write(line + "\n")
            
            print(f"Processed and stored:")
            print(f"  Segmentation: {seg_output_path}")
            print(f"  Bounding boxes: {bbox_output_path}")
            print(f"  Image size: {img_height} x {img_width}")

In [None]:
def convert_segment_masks_to_coco(masks_dir: str, output_file: str, category_name: str = "object", 
                                 category_id: int = 1, dataset_name: str = "Custom Dataset"):
    """
    Convert binary segmentation mask images to COCO format JSON.
    
    Args:
        masks_dir (str): Path to directory containing binary mask images (0=background, 255=object)
        output_file (str): Path where the COCO JSON file will be saved
        category_name (str): Name of the object category (default: "object")
        category_id (int): ID of the object category (default: 1)
        dataset_name (str): Name of the dataset (default: "Custom Dataset")
        
    Returns:
        dict: COCO format dictionary
        
    Example:
        >>> convert_segment_masks_to_coco(
        ...     masks_dir="masks/",
        ...     output_file="annotations.json",
        ...     category_name="person",
        ...     category_id=1
        ... )
    """
    
    # Initialize COCO structure
    coco_data = {
        "info": {
            "description": dataset_name,
            "url": "",
            "version": "1.0",
            "year": datetime.now().year,
            "contributor": "Auto-generated",
            "date_created": datetime.now().isoformat()
        },
        "licenses": [
            {
                "id": 1,
                "name": "Unknown",
                "url": ""
            }
        ],
        "images": [],
        "annotations": [],
        "categories": [
            {
                "id": category_id,
                "name": category_name,
                "supercategory": "object"
            }
        ]
    }
    
    annotation_id = 1
    image_id = 1
    
    # Process each mask file
    for mask_path in sorted(Path(masks_dir).iterdir()):
        if mask_path.suffix.lower() in {".png", ".jpg", ".jpeg"}:
            print(f"Processing {mask_path.name}...")
            
            # Read mask
            mask = cv2.imread(str(mask_path), cv2.IMREAD_GRAYSCALE)
            if mask is None:
                print(f"Warning: Could not read {mask_path}")
                continue
                
            img_height, img_width = mask.shape
            
            # Add image info
            image_info = {
                "id": image_id,
                "width": img_width,
                "height": img_height,
                "file_name": mask_path.name,
                "license": 1,
                "flickr_url": "",
                "coco_url": "",
                "date_captured": datetime.now().isoformat()
            }
            coco_data["images"].append(image_info)
            
            # Find contours for objects (pixel value 255)
            binary_mask = (mask == 255).astype(np.uint8)
            contours, _ = cv2.findContours(binary_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
            
            for contour in contours:
                if len(contour) >= 3:  # Valid contour
                    # Calculate area
                    area = cv2.contourArea(contour)
                    if area < 1:  # Skip very small areas
                        continue
                    
                    # Get bounding box
                    x, y, w, h = cv2.boundingRect(contour)
                    bbox = [float(x), float(y), float(w), float(h)]
                    
                    # Convert contour to segmentation format
                    contour = contour.squeeze()
                    if contour.ndim == 1:  # Single point case
                        contour = contour.reshape(1, -1)
                    
                    segmentation = []
                    for point in contour:
                        segmentation.extend([float(point[0]), float(point[1])])
                    
                    # Create annotation
                    annotation = {
                        "id": annotation_id,
                        "image_id": image_id,
                        "category_id": category_id,
                        "segmentation": [segmentation],
                        "area": float(area),
                        "bbox": bbox,
                        "iscrowd": 0
                    }
                    
                    coco_data["annotations"].append(annotation)
                    annotation_id += 1
            
            image_id += 1
    
    # Save COCO JSON file
    output_path = Path(output_file)
    output_path.parent.mkdir(parents=True, exist_ok=True)
    
    with open(output_path, 'w', encoding='utf-8') as f:
        json.dump(coco_data, f, indent=2, ensure_ascii=False)
    
    print(f"\nCOCO annotations saved to: {output_path}")
    print(f"Total images: {len(coco_data['images'])}")
    print(f"Total annotations: {len(coco_data['annotations'])}")
    
    return coco_data


def convert_segment_masks_to_coco_multi_class(masks_dir: str, output_file: str, 
                                            class_mapping: Dict[int, str],
                                            dataset_name: str = "Custom Dataset"):
    """
    Convert multi-class segmentation masks to COCO format JSON.
    Supports masks where different pixel values represent different classes.
    
    Args:
        masks_dir (str): Path to directory containing mask images
        output_file (str): Path where the COCO JSON file will be saved
        class_mapping (Dict[int, str]): Mapping from pixel values to class names
                                       Example: {255: "person", 128: "car", 64: "bicycle"}
        dataset_name (str): Name of the dataset
        
    Example:
        >>> class_mapping = {255: "person", 128: "car"}
        >>> convert_segment_masks_to_coco_multi_class(
        ...     masks_dir="masks/",
        ...     output_file="annotations.json",
        ...     class_mapping=class_mapping
        ... )
    """
    
    # Create categories from class mapping
    categories = []
    for pixel_value, class_name in class_mapping.items():
        categories.append({
            "id": pixel_value,
            "name": class_name,
            "supercategory": "object"
        })
    
    # Initialize COCO structure
    coco_data = {
        "info": {
            "description": dataset_name,
            "url": "",
            "version": "1.0",
            "year": datetime.now().year,
            "contributor": "Auto-generated",
            "date_created": datetime.now().isoformat()
        },
        "licenses": [
            {
                "id": 1,
                "name": "Unknown",
                "url": ""
            }
        ],
        "images": [],
        "annotations": [],
        "categories": categories
    }
    
    annotation_id = 1
    image_id = 1
    
    # Process each mask file
    for mask_path in sorted(Path(masks_dir).iterdir()):
        if mask_path.suffix.lower() in {".png", ".jpg", ".jpeg"}:
            print(f"Processing {mask_path.name}...")
            
            # Read mask
            mask = cv2.imread(str(mask_path), cv2.IMREAD_GRAYSCALE)
            if mask is None:
                print(f"Warning: Could not read {mask_path}")
                continue
                
            img_height, img_width = mask.shape
            
            # Add image info
            image_info = {
                "id": image_id,
                "width": img_width,
                "height": img_height,
                "file_name": mask_path.name,
                "license": 1,
                "flickr_url": "",
                "coco_url": "",
                "date_captured": datetime.now().isoformat()
            }
            coco_data["images"].append(image_info)
            
            # Process each class
            for pixel_value, class_name in class_mapping.items():
                # Create binary mask for this class
                binary_mask = (mask == pixel_value).astype(np.uint8)
                contours, _ = cv2.findContours(binary_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
                
                for contour in contours:
                    if len(contour) >= 3:  # Valid contour
                        # Calculate area
                        area = cv2.contourArea(contour)
                        if area < 1:  # Skip very small areas
                            continue
                        
                        # Get bounding box
                        x, y, w, h = cv2.boundingRect(contour)
                        bbox = [float(x), float(y), float(w), float(h)]
                        
                        # Convert contour to segmentation format
                        contour = contour.squeeze()
                        if contour.ndim == 1:  # Single point case
                            contour = contour.reshape(1, -1)
                        
                        segmentation = []
                        for point in contour:
                            segmentation.extend([float(point[0]), float(point[1])])
                        
                        # Create annotation
                        annotation = {
                            "id": annotation_id,
                            "image_id": image_id,
                            "category_id": pixel_value,
                            "segmentation": [segmentation],
                            "area": float(area),
                            "bbox": bbox,
                            "iscrowd": 0
                        }
                        
                        coco_data["annotations"].append(annotation)
                        annotation_id += 1
            
            image_id += 1
    
    # Save COCO JSON file
    output_path = Path(output_file)
    output_path.parent.mkdir(parents=True, exist_ok=True)
    
    with open(output_path, 'w', encoding='utf-8') as f:
        json.dump(coco_data, f, indent=2, ensure_ascii=False)
    
    print(f"\nCOCO annotations saved to: {output_path}")
    print(f"Total images: {len(coco_data['images'])}")
    print(f"Total annotations: {len(coco_data['annotations'])}")
    print(f"Categories: {[cat['name'] for cat in categories]}")
    
    return coco_data


def validate_coco_json(json_file: str) -> bool:
    """
    Validate COCO JSON format and print statistics.
    
    Args:
        json_file (str): Path to COCO JSON file
        
    Returns:
        bool: True if valid, False otherwise
    """
    try:
        with open(json_file, 'r', encoding='utf-8') as f:
            data = json.load(f)
        
        # Check required fields
        required_fields = ['info', 'images', 'annotations', 'categories']
        for field in required_fields:
            if field not in data:
                print(f"Error: Missing required field '{field}'")
                return False
        
        print("COCO JSON Validation Results:")
        print(f"✓ Total images: {len(data['images'])}")
        print(f"✓ Total annotations: {len(data['annotations'])}")
        print(f"✓ Total categories: {len(data['categories'])}")
        
        # Print categories
        print("\nCategories:")
        for cat in data['categories']:
            cat_annotations = [ann for ann in data['annotations'] if ann['category_id'] == cat['id']]
            print(f"  - {cat['name']} (ID: {cat['id']}): {len(cat_annotations)} annotations")
        
        return True
        
    except Exception as e:
        print(f"Error validating COCO JSON: {e}")
        return False


# Example usage

# Example 1: Binary masks (0=background, 255=object)
# convert_segment_masks_to_coco(
#     masks_dir="path/to/binary_masks",
#     output_file="annotations_binary.json",
#     category_name="person",
#     category_id=1
# )

# Example 2: Multi-class masks
# class_mapping = {
#     255: "person",
#     128: "car", 
#     64: "bicycle"
# }
# convert_segment_masks_to_coco_multi_class(
#     masks_dir="path/to/multiclass_masks",
#     output_file="annotations_multiclass.json",
#     class_mapping=class_mapping
# )

# Example 3: Validate generated JSON
# validate_coco_json("annotations.json")

In [28]:
convert_segment_masks_to_yolo_seg(
    masks_dir="./masks", 
    output_dir = "./data/seg", 
    bbox_output_dir = "./data/bbox",
    classes=1)

Processing ../data/raw/dataset/masks/dataset4_01006.tiff imgsz = 640 x 640
Processed and stored:
  Segmentation: ../data/raw/dataset/test/dataset4_01006.txt
  Bounding boxes: ../data/raw/dataset/bbox/dataset4_01006.txt
  Image size: 640 x 640
Processing ../data/raw/dataset/masks/dataset4_01725.tiff imgsz = 640 x 640
Processed and stored:
  Segmentation: ../data/raw/dataset/test/dataset4_01725.txt
  Bounding boxes: ../data/raw/dataset/bbox/dataset4_01725.txt
  Image size: 640 x 640
Processing ../data/raw/dataset/masks/dataset4_02101.tiff imgsz = 640 x 640
Processed and stored:
  Segmentation: ../data/raw/dataset/test/dataset4_02101.txt
  Bounding boxes: ../data/raw/dataset/bbox/dataset4_02101.txt
  Image size: 640 x 640
Processing ../data/raw/dataset/masks/dataset4_01007.tiff imgsz = 640 x 640
Processed and stored:
  Segmentation: ../data/raw/dataset/test/dataset4_01007.txt
  Bounding boxes: ../data/raw/dataset/bbox/dataset4_01007.txt
  Image size: 640 x 640
Processing ../data/raw/datas