# 2. Fix Collapsed Buildings

Collapsed buildings are buildings that are usually too small to represent an independent building. These collapse into empty polygons when shrinked to perform Voronoi tessellation and do not have a tessellation cell at the end.

Collapsed buildings should either be removed if they are not neighbouring another building, as they represent structures such as sheds and bike stands, or merged to a neighbouring polygon, in the case of garages, windows and passageways.

In [93]:
import geopandas as gpd
from tqdm import tqdm
import libpysal
import pandas as pd
import numpy as np
from folium.plugins import MousePosition

In [94]:
path = '/Users/lisawinkler/Documents/Prague/BuildingFootprints/buildings_berlin_1.parquet'

In [95]:
buildings=gpd.read_parquet(path)

In [96]:
buildings['geometry'].geom_type.value_counts()

Polygon    1056597
Name: count, dtype: int64

In [117]:
buildings=buildings.reset_index(drop=True)

In [6]:
buildings.cx[4602104.79012012:4604824.65732128,5810724.070977277:5812978.191857001].explore()

## Set Precision Grid

In [98]:
buildings = buildings.set_precision(0.001)

In [99]:
buildings=gpd.GeoDataFrame(geometry=buildings, crs=31468)

In [116]:
buildings['geometry'].geom_type.value_counts()

Polygon    1056435
Name: count, dtype: int64

## Merge Collapsed

In [118]:
def merge_collapsed(gdf, shrink_limit, largest=None):
    """Merge or remove collapsed polygons based on a set of conditions.

    If collapsed polygon is not touching any other polygon, remove. If touching a neighbouring polygon, join to that polygon. If ``largest=None`` it picks one randomly, otherwise it picks the largest (True) or the
    smallest (False).

    
    Parameters
    ----------
    gdf : GeoDataFrame
        GeoDataFrame with polygon or mutli polygon geometry
    shrink_limit : float
        distance to shrink polygon by
    largest : bool (default None)
        Merge collapsed polygon with its largest (True), or smallest (False) neighbor.
        If None, merge with any neighbor non-deterministically but performantly.
        
    Returns
    -------

    GeoDataFrame
    """
    shrink=gdf.buffer(-0.4)
    emptycheck = shrink.is_empty
    collapse = gdf[emptycheck]

    overlap_a, overlap_b = gdf.boundary.sindex.query(
            gdf.boundary, predicate="overlaps")

    self_mask = overlap_a != overlap_b
    overlap_a = overlap_a[self_mask]
    overlap_b = overlap_b[self_mask]

    overlap = pd.MultiIndex.from_arrays(
            [overlap_a, overlap_b], names=("source", "target")
        )

    neighbors={}
    delete = []
    for i, poly in tqdm(gdf.geometry.items(), total=len(gdf)):
        if i in collapse.index:
            if i in overlap.get_level_values('source'):
                if largest is None:
                    neighbors[i]=[overlap[overlap.get_level_values('source')==i].get_level_values('target').tolist()[0]]
                else:
                    sub = gdf.geometry.iloc[overlap[overlap.get_level_values('source')==i].get_level_values('target').tolist()]
                    inters = sub.intersection(poly.exterior)
                    if largest:
                        neighbors[i] = [inters.length.idxmax()]
                    else:
                        neighbors[i] = [inters.length.idxmin()]
            else:
                delete.append(i)
                neighbors[i]=[]
        else:
            neighbors[i]=[]
        
    gdf.drop(delete, inplace=True)
    [neighbors.pop(key) for key in delete]

    W = libpysal.weights.W(neighbors, silence_warnings=True)
    return gdf.dissolve(W.component_labels)


In [119]:
buildings_merged=merge_collapsed(buildings,1)

100%|██████████| 1056435/1056435 [00:13<00:00, 81012.99it/s]


In [135]:
buildings_merged['geometry'].geom_type.value_counts()

Polygon    1056395
Name: count, dtype: int64

In [122]:
check = momepy.CheckTessellationInput(buildings_merged)

Collapsed features  : 0
Split features      : 411
Overlapping features: 0


In [126]:
buildings_merged.cx[4602104.79012012:4604824.65732128,5810724.070977277:5813100.191857001].explore()

In [128]:
buildings_merged.to_crs(4326).cx[13.36:13.365,52.495:52.51].explore(column=None, popup_columns=['ID'])

In [125]:
buildings_merged.to_crs(4326).cx[13.444:13.446,52.4747:52.4752].explore(column=None, popup_columns=['ID'])

merge_collapsed() increases the number of split features

## Check: Validity and Geometry Types

In [131]:
buildings_merged['geometry'].geom_type.value_counts()

Polygon    1056395
Name: count, dtype: int64

In [136]:
buildings_merged.is_valid.value_counts()

True    1056395
Name: count, dtype: int64

## Save to parquet

In [132]:
buildings_merged.to_parquet('buildings_berlin_2.parquet')