In [1]:
from pathlib import Path

import geopandas as gpd
from shapely.geometry import LineString
import numpy as np
from rasterio import features
import rasterio as rio
from tqdm.auto import tqdm
from shapely.geometry import shape, box
import pandas as pd
from multiprocessing import Pool

In [2]:
input_rasters = Path("/media/nick/SNEAKERNET/inference_scenes_5")

In [3]:
rasters = list(input_rasters.glob("*pred.tif"))
len(rasters)

24

In [4]:
output_vector = input_rasters / f"{rasters[0].stem}.gpkg"
output_vector

PosixPath('/media/nick/SNEAKERNET/inference_scenes_5/55GEQ_2022_2022_pred.gpkg')

In [5]:
def simplify_geometries(gdf: gpd.GeoDataFrame, tolerance: float) -> gpd.GeoDataFrame:
    new_gdf = gdf.copy()
    new_gdf["geometry"] = new_gdf["geometry"].simplify(
        tolerance, preserve_topology=False
    )
    return gpd.GeoDataFrame(new_gdf)

In [6]:
def extract_polygons(chunk, px_size):
    with rio.open(chunk) as src:
        local_epsg = src.meta["crs"].to_epsg()
        water_array = src.read(1).astype("uint8")
        mask = water_array == 1
        # data_bouds = box(*src.bounds)

    shapes = features.shapes(
        water_array, mask=mask, transform=src.transform, connectivity=4
    )
    water_array = None
    geoms = []
    values = []
    for shapedict, value in shapes:
        geoms.append(shape(shapedict))
        values.append(value)

    water_gdf = gpd.GeoDataFrame({"geometry": geoms}, crs=f"EPSG:{local_epsg}")
    water_gdf = simplify_geometries(water_gdf, px_size)

    water_gdf_wgs = water_gdf.to_crs(3857)

    water_gdf_wgs["geometry"] = water_gdf_wgs.buffer(0)

    return water_gdf_wgs

In [7]:
water_polygons = []
for i in rasters:
    water_polygons.append(extract_polygons(i, px_size=10))

In [8]:
# join all chunks into one gds
joined_water_gdf = pd.concat(water_polygons, ignore_index=True)
joined_water_gdf_dis = joined_water_gdf.dissolve()
joined_water_gdf_dis

Unnamed: 0,geometry
0,"MULTIPOLYGON (((15963288.922 -5058573.822, 159..."


In [9]:
# convert multipart poly to single part
single_part_gdf = joined_water_gdf_dis.explode(index_parts=False)
single_part_gdf

Unnamed: 0,geometry
0,"POLYGON ((15963288.922 -5058573.822, 15963315...."
0,"POLYGON ((16106308.595 -4965016.231, 16106347...."
0,"POLYGON ((16377595.030 -5518427.784, 16370766...."
0,"POLYGON ((16068021.311 -4934795.064, 16068009...."
0,"POLYGON ((16024691.510 -4880928.831, 16024650...."
...,...
0,"POLYGON ((16609398.652 -4862047.823, 16609399...."
0,"POLYGON ((16507120.125 -4860747.133, 16507068...."
0,"POLYGON ((16508257.772 -4860102.518, 16508271...."
0,"POLYGON ((16574715.463 -4851664.603, 16574689...."


In [10]:
single_part_gdf.geometry = single_part_gdf.boundary
single_part_gdf = single_part_gdf.explode(index_parts=False)
single_part_gdf

Unnamed: 0,geometry
0,"LINESTRING (15963288.922 -5058573.822, 1596331..."
0,"LINESTRING (16106308.595 -4965016.231, 1610634..."
0,"LINESTRING (16377595.030 -5518427.784, 1637076..."
0,"LINESTRING (16111838.632 -4963032.377, 1611182..."
0,"LINESTRING (16121069.540 -5085767.741, 1612105..."
...,...
0,"LINESTRING (16609398.652 -4862047.823, 1660939..."
0,"LINESTRING (16507120.125 -4860747.133, 1650706..."
0,"LINESTRING (16508257.772 -4860102.518, 1650827..."
0,"LINESTRING (16574715.463 -4851664.603, 1657468..."


In [11]:
def chaikin_corner_cutting_optimized(
    points: np.ndarray, num_iterations: int = 1
) -> np.ndarray:
    for _ in range(num_iterations):
        if np.array_equal(points[0], points[-1]):
            points = np.append(points, [points[1]], axis=0)

        p0 = points[:-1]
        p1 = points[1:]
        q = p0 * 0.75 + p1 * 0.25
        r = p0 * 0.25 + p1 * 0.75
        new_points = np.empty((2 * len(points) - 2, points.shape[1]))
        new_points[0::2] = q
        new_points[1::2] = r

        if np.array_equal(points[0], points[-2]):
            new_points = new_points[1:]
            new_points = np.append(new_points, [new_points[0]], axis=0)
        else:
            new_points = np.append(new_points, [points[-1]], axis=0)

        points = new_points

    return points


def smooth_geodataframe_optimized(
    gdf: gpd.GeoDataFrame, num_iterations: int = 1
) -> gpd.GeoDataFrame:
    gdf["geometry"] = gdf["geometry"].apply(
        lambda line: LineString(
            chaikin_corner_cutting_optimized(
                np.array(line.coords), num_iterations=num_iterations
            )
        )
    )

    return gdf

In [12]:
lines_gpd = smooth_geodataframe_optimized(single_part_gdf, num_iterations=2)
lines_gpd

Unnamed: 0,geometry
0,"LINESTRING (15963311.483 -5058578.201, 1596331..."
0,"LINESTRING (16106345.157 -4965034.552, 1610634..."
0,"LINESTRING (16369912.587 -5518394.202, 1636820..."
0,"LINESTRING (16111826.931 -4963007.288, 1611182..."
0,"LINESTRING (16121063.596 -5085805.239, 1612107..."
...,...
0,"LINESTRING (16609392.170 -4862082.335, 1660938..."
0,"LINESTRING (16507068.921 -4860755.234, 1650706..."
0,"LINESTRING (16508260.641 -4860131.917, 1650825..."
0,"LINESTRING (16574693.396 -4851660.170, 1657469..."


In [13]:
lines_gpd.to_file(output_vector)