# Make the polygon valid
Fix self-Intersecting and other invalid polygons as in https://shapely.readthedocs.io/en/stable/manual.html

In [2]:
import os
import json
import glob
from shapely.geometry import Polygon, MultiPolygon, GeometryCollection
from shapely.validation import explain_validity, make_valid

def shapely_valid_transform(input_dir: str = None, if_explain_invalid: bool = False):
    """Transform the polygons to be valid for further processing with libraries like Sahi and Shapely"""
    json_paths = glob.glob(os.path.join(input_dir, f"*{'.json'}"))  # get all json file paths
    for json_path in json_paths:
        with open(json_path, 'r', encoding='utf-8') as file:
            data = json.load(file)  # load the json data

        for obj in data['objects']:
            corrected_segmentation = []  # to collect valid segmentation points
            min_x, max_x, min_y, max_y = float('inf'), float('-inf'), float('inf'), float('-inf')  # ensure updates will be replaced by real numbers

            for point in obj['segmentation']:
                x, y = point  # get the ISAT segmentation coordinates
                if int(x) == 0:
                    x += 1  # if the point locates on the left side, shift 1 pixel inward
                elif int(x) == data['info']['width'] - 1:
                    x -= 1  # if the point locates on the right side, shift 1 pixel inward
                if int(y) == 0:
                    y += 1  # if the point locates on the bottom, shift 1 pixel inward
                elif int(y) == data['info']['height'] - 1:
                    y -= 1  # if the points locates on the top, shift 1 pixel inward
                corrected_segmentation.append([x, y])  # collect the corrected segmentation coordinates
                min_x, max_x = min(min_x, x), max(max_x, x)  # get the min/max x to update bbox
                min_y, max_y = min(min_y, y), max(max_y, y)  # same for y

            polygon_shapely = Polygon(corrected_segmentation)  # noqa: convert the ISAT segmentation points to shapely format
            if not polygon_shapely.is_valid:
                if if_explain_invalid:
                    invalid_reason = explain_validity(polygon_shapely)  # the reason for invalidity
                    print(f"Polygon from {os.path.basename(json_path)} is invalid: {invalid_reason}")  # print the reason for invalidity
                corrected_geometry = make_valid(polygon_shapely)
                if isinstance(corrected_geometry, MultiPolygon):
                    corrected_geometry = max(corrected_geometry.geoms, key=lambda polygon: polygon.area)  # selecting the largest polygon
                elif isinstance(corrected_geometry, GeometryCollection):
                    polygons = [geom for geom in corrected_geometry.geoms if isinstance(geom, Polygon)]  # filter out only polygonal components
                    if polygons:
                        corrected_geometry = max(polygons, key=lambda polygon: polygon.area)  # selecting the largest polygon
                    else:
                        continue  # if there are no polygons, use the original corrected_segmentation
                corrected_segmentation = [[pt[0], pt[1]] for pt in corrected_geometry.exterior.coords[:-1]]  # update segmentation from the largest or corrected polygon

            obj['segmentation'] = corrected_segmentation  # update segmentation coordinates
            obj['bbox'] = [min_x, min_y, max_x, max_y]  # update the bbox

        with open(json_path, 'w', encoding='utf-8') as file:
            json.dump(data, file, indent=4)  # save the updated json data
    return None


input_dir = 'Datasets/Jayakody2017/Processed/V. vinifera - Copy'
shapely_valid_transform(input_dir=input_dir, if_explain_invalid=True)

Polygon from V. vinifera Jayakody2017 raw_dataset_1 Image_002.json is invalid: Self-intersection[4293.62 3598]
Polygon from V. vinifera Jayakody2017 raw_dataset_1 Image_003.json is invalid: Self-intersection[3456.53 3598]
Polygon from V. vinifera Jayakody2017 raw_dataset_1 Image_004.json is invalid: Ring Self-intersection[3362.21 3598]
Polygon from V. vinifera Jayakody2017 raw_dataset_1 Image_008.json is invalid: Self-intersection[1911.37 3598]
Polygon from V. vinifera Jayakody2017 raw_dataset_1 Image_010.json is invalid: Self-intersection[4798 2201.12504605937]
Polygon from V. vinifera Jayakody2017 raw_dataset_1 Image_014.json is invalid: Ring Self-intersection[1 293.21]
Polygon from V. vinifera Jayakody2017 raw_dataset_1 Image_017.json is invalid: Self-intersection[2798.04 3598]
Polygon from V. vinifera Jayakody2017 raw_dataset_1 Image_022.json is invalid: Self-intersection[2829.6 3598]
Polygon from V. vinifera Jayakody2017 raw_dataset_1 Image_022.json is invalid: Self-intersection[1