# 2. Fix Collapsed Buildings

Collapsed buildings are buildings that are usually too small to represent an independent building. These collapse into empty polygons when shrinked to perform Voronoi tessellation and do not have a tessellation cell at the end.

Collapsed buildings should either be removed if they are not neighbouring another building, as they represent structures such as sheds and bike stands, or merged to a neighbouring polygon, in the case of garages, windows and passageways.

In [2]:
import geopandas as gpd
from tqdm import tqdm
import libpysal

In [34]:
buildings=gpd.read_parquet('buildings_berlin_1.parquet')

In [10]:
def merge_collapsed(gdf, shrink_limit, largest=None):
    """Merge or remove collapsed polygons based on a set of conditions.

    If collapsed polygon is not touching any other polygon, remove. If touching a neighbouring polygon, join to that polygon. If ``largest=None`` it picks one randomly, otherwise it picks the largest (True) or the
    smallest (False).

    
    Parameters
    ----------
    gdf : GeoDataFrame
        GeoDataFrame with polygon or mutli polygon geometry
    shrink_limit : float
        distance to shrink polygon by
    largest : bool (default None)
        Merge collapsed polygon with its largest (True), or smallest (False) neighbor.
        If None, merge with any neighbor non-deterministically but performantly.
        
    Returns
    -------

    GeoDataFrame
    """
    shrink=gdf.buffer(-shrink_limit)
    emptycheck = shrink.is_empty
    collapse = gdf[emptycheck]
    print(collapse)
    
    neighbors = {}
    delete = []
    for i, poly in tqdm(gdf.geometry.items(), total=len(gdf)):
        hits = gdf.sindex.query(poly, predicate='touches')
        hits = hits[hits != i]

        if i in collapse.index:
            if hits.size == 0:
                #remove
                #print('remove',i)
                delete.append(i)
                neighbors[i] = list(hits)
            else:
                if largest is None:
                    neighbors[i] = [hits[0]]
                    #print('neighbour of',i,'is',neighbors[i])
                else:
                    if poly.geom_type == 'Polygon': #poly.exterior only works for polygons
                        sub = gdf.geometry.iloc[hits]
                        inters = sub.intersection(poly.exterior)
                        if largest:
                            neighbors[i] = [inters.length.idxmax()]
                        else:
                            neighbors[i] = [inters.length.idxmin()]
                    else:
                        neighbors[i] = [hits[0]]
        else:
            neighbors[i] = []

    gdf.drop(delete, inplace=True)
    [neighbors.pop(key) for key in delete]
    
    W = libpysal.weights.W(neighbors, silence_warnings=True)
    return gdf.dissolve(W.component_labels)

## Check number of collapsed features

In [11]:
import momepy

In [35]:
check=momepy.CheckTessellationInput(buildings)

Collapsed features  : 202
Split features      : 411
Overlapping features: 0


In [36]:
buildings['geometry'].geom_type.value_counts()

Polygon    1056597
Name: count, dtype: int64

## Merge collapsed buildings

In [38]:
buildings=merge_collapsed(buildings,0.4)

               validity                                           geometry
250096   Valid Geometry  POLYGON ((4574970.024 5807552.343, 4574969.204...
255528   Valid Geometry  POLYGON ((4589133.618 5833747.873, 4589134.401...
260128   Valid Geometry  POLYGON ((4588696.855 5827179.714, 4588696.557...
260147   Valid Geometry  POLYGON ((4588773.145 5827856.791, 4588777.834...
260148   Valid Geometry  POLYGON ((4588608.294 5827641.74, 4588608.32 5...
...                 ...                                                ...
1054828  Valid Geometry  POLYGON ((4604333.249 5812020.376, 4604334.819...
1056198  Valid Geometry  POLYGON ((4574425.979 5803937.681, 4574435.095...
1056318  Valid Geometry  POLYGON ((4590243.415 5822744.295, 4590243.115...
1056340  Valid Geometry  POLYGON ((4574349.027 5807042.493, 4574355.244...
1056569  Valid Geometry  POLYGON ((4590535.699 5814740.967, 4590534.81 ...

[164 rows x 2 columns]


100%|██████████| 1056559/1056559 [02:17<00:00, 7661.71it/s]


In [39]:
check2=momepy.CheckTessellationInput(buildings)

Collapsed features  : 0
Split features      : 537
Overlapping features: 0


merge_collapsed() increases the number of split features

## Check: Validity and Geometry Types

In [40]:
from shapely.validation import explain_validity
buildings['validity'] = buildings.apply(lambda row: explain_validity(row.geometry), axis=1)

In [41]:
buildings['validity'].value_counts()

validity
Valid Geometry    1056395
Name: count, dtype: int64

In [20]:
buildings_valid=buildings.make_valid()

In [24]:
buildings=gpd.GeoDataFrame({'geometry':buildings_valid})

In [42]:
buildings['geometry'].geom_type.value_counts()

Polygon         1056269
MultiPolygon        126
Name: count, dtype: int64

### merge_collapsed() leads to MultiPolygons

In [31]:
buildings[buildings.geom_type=='MultiPolygon'].explore()

## Save to parquet

In [43]:
buildings.to_parquet('buildings_berlin_2.parquet')