# 1. Fix Overlapping Buildings

Overlapping buildings should either be merged to a neighbouring polygon if smaller than a size ``merge_limit`` or if they share area larger than ``area * overlap_limit``, or trimmed if not.

In [1]:
import geopandas as gpd
from tqdm import tqdm
import numpy as np
import libpysal
import geoplanar

In [6]:
buildings=gpd.read_parquet('buildings_berlin_1.parquet',columns=['geometry'])

In [58]:
buildings=buildings.reset_index(drop=True)

In [51]:
buildings

Unnamed: 0,geometry
0,"POLYGON ((13.30277 52.51964, 13.30332 52.51964..."
1,"POLYGON ((13.35264 52.53331, 13.35276 52.53322..."
2,"POLYGON ((13.35397 52.52302, 13.35395 52.52295..."
3,"POLYGON ((13.35522 52.52727, 13.35512 52.52739..."
4,"POLYGON ((13.36014 52.53152, 13.35987 52.53184..."
...,...
1057246,"POLYGON ((13.4585 52.50118, 13.45827 52.50122,..."
1057247,"POLYGON ((13.11685 52.39058, 13.1174 52.39058,..."
1057248,"POLYGON ((13.2772 52.53379, 13.27694 52.53323,..."
1057249,"POLYGON ((13.43921 52.43791, 13.43934 52.43803..."


## Merge Overlapping Buildings

In [31]:
def merge_overlapping(gdf, merge_limit, overlap_limit):
    """Merge overlapping polygons based on a set of conditions.
    
    Overlapping polygons smaller than ``merge_limit`` are merged to a neighboring polygon.
    If ``largest=None`` it picks one randomly, otherwise it picks the largest (True) or the
    smallest (False).
    
    Polygons larger than ``merge_limit`` are merged to neighboring if they share area larger
    than ``area * overlap_limit``.
    
    Parameters
    ----------
    gdf : GeoDataFrame
        GeoDataFrame with polygon or mutli polygon geometry
    merge_limit : float
        area of overlapping polygons that are to be merged with neighbors no matter the size
        of the overlap
    overlap_limit : float (0-1)
        ratio of area of an overlapping polygon that has to be shared with other polygon 
        to merge both into a one
    largest : bool (default None)
        Merge each overlapping polygons smaller than merge_limit with  the polygon with the largest intersection (True), or smallest (False) neighbor.
        If None, merge with any neighbor non-deterministically but performantly.
        
    Returns
    -------

    GeoDataFrame
    """
    neighbors = {}
    for i, poly in tqdm(gdf.geometry.items(), total=len(gdf)):
        
        hits_overlaps = gdf.sindex.query(poly, predicate='overlaps')
        hits_overlaps = hits_overlaps[hits_overlaps != i]

        hits_contains = gdf.sindex.query(poly, predicate='contains')
        hits_contains = hits_contains[hits_contains != i]

        if poly.area < merge_limit:
            neighbors[i] = np.unique(np.concatenate([hits_overlaps,hits_contains]))
        else:
            sub = gdf.geometry.iloc[np.unique(np.concatenate([hits_overlaps,hits_contains]))]
            inters = sub.intersection(poly)
            include = sub.index[inters.area > (sub.area * overlap_limit)]
            neighbors[i] = list(include)
    
    W = libpysal.weights.W(neighbors, silence_warnings=True)
    return gdf.dissolve(W.component_labels)

In [32]:
# all overlapping buildings merged
merged=merge_overlapping(buildings,500,0.1)

100%|██████████| 1057251/1057251 [04:38<00:00, 3793.28it/s]


In [65]:
# some overlapping buildings not merged, need to be trimmed
merged=merge_overlapping(buildings,300,0.2)

100%|██████████| 1057251/1057251 [07:27<00:00, 2363.80it/s]


In [69]:
check=momepy.CheckTessellationInput(merged)

Collapsed features  : 200
Split features      : 411
Overlapping features: 26


In [33]:
merged

Unnamed: 0,geometry
0,"POLYGON ((13.30277 52.51964, 13.30332 52.51964..."
1,"POLYGON ((13.35264 52.53331, 13.35276 52.53322..."
2,"POLYGON ((13.35397 52.52302, 13.35395 52.52295..."
3,"POLYGON ((13.35522 52.52727, 13.35512 52.52739..."
4,"POLYGON ((13.36014 52.53152, 13.35987 52.53184..."
...,...
1056493,"POLYGON ((13.4585 52.50118, 13.45827 52.50122,..."
1056494,"POLYGON ((13.11685 52.39058, 13.1174 52.39058,..."
1056495,"POLYGON ((13.2772 52.53379, 13.27694 52.53323,..."
1056496,"POLYGON ((13.43921 52.43791, 13.43934 52.43803..."


## Trim Overlapping buildings

In [9]:
buildings_trimmed = geoplanar.trim_overlaps(merged)

In [12]:
# very slow
geoplanar.is_overlapping(buildings_trimmed)

  uu = gdf.unary_union


: 

In [11]:
import momepy

In [12]:
check=momepy.CheckTessellationInput(buildings_trimmed)

Collapsed features  : 200
Split features      : 413
Overlapping features: 0


## Check: Validate and Geometry Types

In [13]:
buildings_trimmed['geometry'].geom_type.value_counts()

Polygon         1056590
MultiPolygon          3
Name: count, dtype: int64

In [17]:
buildings_trimmed[buildings_trimmed.geom_type=='MultiPolygon'].explore()

In [23]:
buildings_trimmed=buildings_trimmed.explode(ignore_index=True)

`geo_col_name = gdf.active_geometry_name; gdf.set_geometry(new_geo_col).drop(columns=geo_col_name).rename_geometry(geo_col_name)`.
  return gf.set_geometry(col, drop=drop, inplace=False, crs=crs)


geoplanar.trim can lead to MultiPolygons

In [14]:
from shapely.validation import explain_validity

In [18]:
merged = buildings_trimmed

In [20]:
merged['validity'] = merged.apply(lambda row: explain_validity(row.geometry), axis=1)

In [21]:
merged['validity'].value_counts()

validity
Valid Geometry    1056593
Name: count, dtype: int64

### Save to parquet

In [None]:
buildings_trimmed.to_parquet('buildings_berlin_1.parquet')