In [2]:
import glob

import geopandas as gpd
import numpy as np
import pandas as pd

regions_datadir = "/data/uscuni-ulce/"
data_dir = "/data/uscuni-ulce/processed_data/"
eubucco_files = glob.glob(regions_datadir + "eubucco_raw/*")
graph_dir = data_dir + "neigh_graphs/"
chars_dir = "/data/uscuni-ulce/processed_data/chars/"

In [3]:
building_region_mapping = pd.read_parquet(
    regions_datadir + "regions/" + "id_to_region.parquet", engine="pyarrow"
)
typed_dict = pd.Series(
    np.arange(building_region_mapping["id"].values.shape[0]),
    index=building_region_mapping["id"].values,
)
region_ids = building_region_mapping.groupby("region")["id"].unique()
del building_region_mapping  # its 2/3 gb
region_hulls = gpd.read_parquet(regions_datadir + "regions/" + "regions_hull.parquet")

In [5]:
for region_id, region_hull in region_hulls.iterrows():
    region_hull = region_hull["convex_hull"]
    if region_id == 69300:
        break

In [8]:
# orig_buildings = read_region_buildings(typed_dict, region_ids, region_hull, region_id)
orig_buildings.shape

(317934, 2)

In [20]:
buildings = orig_buildings.copy()

In [21]:
buildings["geometry"] = buildings.make_valid()
buildings.shape

(317934, 2)

In [22]:
## explode multipolygons
buildings = buildings.explode(ignore_index=True)
buildings.shape

(317936, 2)

In [23]:
## keep only polygons
buildings = buildings[buildings["geometry"].geom_type == "Polygon"].reset_index(
    drop=True
)
buildings.shape

(317934, 2)

In [24]:
# set precision to speed up calc.
buildings["geometry"] = buildings.set_precision(0.001)
buildings.shape

(317934, 2)

In [25]:
import geoplanar

In [26]:
## merge buildings that overlap either 1) at least .10 percent or 2) 500sqm
buildings = geoplanar.merge_overlaps(buildings, merge_limit=30, overlap_limit=0.1)
buildings.shape

(299209, 2)

In [27]:
## drop remaining overlaps
buildings = geoplanar.trim_overlaps(buildings, largest=False)
buildings.shape

(299209, 2)

In [28]:
## fix any multipolygons
buildings = buildings.explode(ignore_index=True)
buildings.shape

(309771, 2)

In [29]:
# drop non-polygons
buildings = buildings[buildings.geom_type == "Polygon"].reset_index(drop=True)
buildings.shape

(304020, 2)

In [30]:
# merge touching collapsing buildings
shrink = buildings.buffer(-0.4, resolution=2)
buildings = geoplanar.merge_touching(buildings, np.where(shrink.is_empty), largest=True)
buildings.shape

(299191, 2)

In [32]:
# drop non polygons
buildings = buildings.explode()
buildings = buildings[buildings.geom_type == "Polygon"].reset_index(drop=True)
buildings.shape

(299206, 2)

In [33]:
##finally snap nearby buildings
buildings["geometry"] = geoplanar.snap(buildings, threshold=0.5)
buildings.shape

(299206, 2)

In [34]:
## need one more pass to ensure only valid geometries
buildings["geometry"] = buildings.make_valid()
buildings = buildings[buildings.geom_type == "Polygon"].reset_index(drop=True)
buildings.shape

(298875, 2)

In [35]:
buildings["geometry"] = buildings.normalize()
buildings.shape

(298875, 2)

In [36]:
from lonboard import PolygonLayer

In [40]:
layer = PolygonLayer.from_geopandas(gdf=buildings, get_line_color=[255, 255, 255, 255])



In [42]:
# m = Map(layer)
# m

In [43]:
buildings.to_parquet(data_dir + f"buildings/buildings_{region_id}.parquet")