## Building Preprocessing Pipeline

0. Make valid and keep only polygons  
1. Overlapping buildings  
    a. Merge overlapping buildings  
    b. Trim remaining overlapping buildings  
2. Merge collapsed buildings
3. Snap buildings

In [5]:
import geopandas as gpd
import geoplanar

In [6]:
buildings=gpd.read_parquet('buildings_berlin.parquet',columns=['geometry'])

In [7]:
buildings

Unnamed: 0,geometry
0,"POLYGON ((13.30277 52.51964, 13.30332 52.51964..."
1,"POLYGON ((13.35264 52.53331, 13.35276 52.53322..."
2,"POLYGON ((13.35397 52.52302, 13.35395 52.52295..."
3,"POLYGON ((13.35522 52.52727, 13.35512 52.52739..."
4,"POLYGON ((13.36014 52.53152, 13.35987 52.53184..."
...,...
1057114,"POLYGON ((13.4585 52.50118, 13.45827 52.50122,..."
1057115,"POLYGON ((13.11685 52.39058, 13.1174 52.39058,..."
1057116,"POLYGON ((13.2772 52.53379, 13.27694 52.53323,..."
1057117,"POLYGON ((13.43921 52.43791, 13.43934 52.43803..."


In [8]:
buildings = buildings.to_crs(31468)

## 0. Make valid and keep only polygons

In [9]:
buildings.is_valid.value_counts()

True     1057112
False          7
Name: count, dtype: int64

In [10]:
buildings_valid=buildings.make_valid()

In [11]:
buildings=gpd.GeoDataFrame({'geometry':buildings_valid})

In [12]:
buildings['geometry'].geom_type.value_counts()

Polygon               1056951
MultiPolygon               82
LineString                 49
MultiLineString            33
GeometryCollection          4
Name: count, dtype: int64

In [13]:
buildings=buildings.explode(ignore_index=True)

`geo_col_name = gdf.active_geometry_name; gdf.set_geometry(new_geo_col).drop(columns=geo_col_name).rename_geometry(geo_col_name)`.
  return gf.set_geometry(col, drop=drop, inplace=False, crs=crs)


In [14]:
buildings['geometry'].geom_type.value_counts()

Polygon            1057241
LineString             169
MultiLineString          4
MultiPolygon             3
Name: count, dtype: int64

In [15]:
buildings=buildings.explode(ignore_index=True)

`geo_col_name = gdf.active_geometry_name; gdf.set_geometry(new_geo_col).drop(columns=geo_col_name).rename_geometry(geo_col_name)`.
  return gf.set_geometry(col, drop=drop, inplace=False, crs=crs)


In [16]:
buildings=buildings[buildings['geometry'].geom_type=='Polygon']

In [17]:
buildings['geometry'].geom_type.value_counts()

Polygon    1057251
Name: count, dtype: int64

## 1. Overlapping buildings

### 1a. Merge overlapping buildings

In [18]:
buildings=buildings.reset_index(drop=True)

In [19]:
# set precision grid
buildings = buildings.set_precision(0.001)
buildings=gpd.GeoDataFrame(geometry=buildings, crs=31468)

In [20]:
merged=geoplanar.merge_overlaps(buildings,500,0.1)

In [21]:
merged

Unnamed: 0,geometry
0,"POLYGON ((4588570.786 5821522.429, 4588570.911..."
1,"POLYGON ((4591929.045 5823139.268, 4591928.629..."
2,"POLYGON ((4592000.684 5821954.624, 4591985.462..."
3,"POLYGON ((4592070.997 5822450.123, 4592070.662..."
4,"POLYGON ((4592383.652 5822950.81, 4592401.503 ..."
...,...
1056529,"POLYGON ((4599129.491 5819674.681, 4599113.569..."
1056530,"POLYGON ((4576173.534 5806950.331, 4576177.445..."
1056531,"POLYGON ((4586824.286 5823051.312, 4586799.392..."
1056532,"POLYGON ((4597984.496 5812617.902, 4597996.368..."


In [22]:
geoplanar.is_overlapping(merged)

True

### 1b. Trim remaining overlapping buildings

In [23]:
buildings_trimmed = geoplanar.trim_overlaps(merged,largest=False)

In [24]:
buildings_trimmed

Unnamed: 0,geometry
0,"POLYGON ((4588570.786 5821522.429, 4588570.911..."
1,"POLYGON ((4591929.016 5823140.091, 4591933.465..."
2,"POLYGON ((4591986.823 5821964.593, 4592002.064..."
3,"POLYGON ((4592070.997 5822450.123, 4592070.662..."
4,"POLYGON ((4592383.652 5822950.81, 4592401.503 ..."
...,...
1056529,"POLYGON ((4599129.491 5819674.681, 4599113.569..."
1056530,"POLYGON ((4576173.534 5806950.331, 4576177.445..."
1056531,"POLYGON ((4586824.286 5823051.312, 4586799.392..."
1056532,"POLYGON ((4597984.496 5812617.902, 4597996.368..."


In [25]:
buildings_trimmed['geometry'].geom_type.value_counts()

Polygon               1056530
GeometryCollection          4
Name: count, dtype: int64

In [26]:
buildings_trimmed

Unnamed: 0,geometry
0,"POLYGON ((4588570.786 5821522.429, 4588570.911..."
1,"POLYGON ((4591929.016 5823140.091, 4591933.465..."
2,"POLYGON ((4591986.823 5821964.593, 4592002.064..."
3,"POLYGON ((4592070.997 5822450.123, 4592070.662..."
4,"POLYGON ((4592383.652 5822950.81, 4592401.503 ..."
...,...
1056529,"POLYGON ((4599129.491 5819674.681, 4599113.569..."
1056530,"POLYGON ((4576173.534 5806950.331, 4576177.445..."
1056531,"POLYGON ((4586824.286 5823051.312, 4586799.392..."
1056532,"POLYGON ((4597984.496 5812617.902, 4597996.368..."


Some trimmed buildings are slightly overlapping due to floating point errors.

In [27]:
geoplanar.is_overlapping(buildings_trimmed)

True

Trim buildings results in some MultiPolygons due to the uncerlying topology of the polygons. This is expected and the MultiPolygons need to be exploded into Polygons.

In [31]:
buildings_trimmed=buildings_trimmed.explode(ignore_index=True)

`geo_col_name = gdf.active_geometry_name; gdf.set_geometry(new_geo_col).drop(columns=geo_col_name).rename_geometry(geo_col_name)`.
  return gf.set_geometry(col, drop=drop, inplace=False, crs=crs)


In [32]:
buildings_trimmed[buildings_trimmed['geometry'].geom_type == 'MultiPolygon']

Unnamed: 0,geometry


## 2. Merge Collapsed Buildings

In [33]:
buildings=buildings_trimmed

In [34]:
buildings=buildings.reset_index(drop=True)

Set Precision Grid

In [35]:
buildings = buildings.set_precision(0.001)
buildings=gpd.GeoDataFrame(geometry=buildings, crs=31468)

Find indices of collapsed buildings

In [36]:
shrink=buildings.buffer(-0.4)
emptycheck = shrink.is_empty
collapse = buildings[emptycheck]

In [37]:
collapse

Unnamed: 0,geometry
45607,"POLYGON ((4593663.599 5820915.698, 4593666.151..."
46256,"POLYGON ((4593697.057 5820923.562, 4593697.037..."
47852,"POLYGON ((4593635.027 5820965.599, 4593632.753..."
47853,"POLYGON ((4593636.126 5820966.41, 4593638.325 ..."
47854,"POLYGON ((4593641.7 5820965.748, 4593639.385 5..."
...,...
1056137,"POLYGON ((4574435.095 5803938.142, 4574425.98 ..."
1056149,"POLYGON ((4570493.729 5809459.369, 4570508.914..."
1056254,"POLYGON ((4590242.858 5822757.197, 4590243.115..."
1056276,"POLYGON ((4574349.027 5807042.493, 4574355.244..."


In [38]:
index = list(collapse.index)

Use geoplanar merge_touching() with indices of collapsed buildings

In [39]:
buildings_collapsed = geoplanar.merge_touching(buildings,index,largest=True)

In [40]:
buildings_collapsed['geometry'].geom_type.value_counts()

Polygon    1056332
Name: count, dtype: int64

## 3. Snap buildings

In [41]:
final = geoplanar.snap(buildings_collapsed, threshold=0.5)

  return lib.simplify_preserve_topology(geometry, tolerance, **kwargs)
  return lib.simplify_preserve_topology(geometry, tolerance, **kwargs)


In [42]:
gdf=gpd.GeoDataFrame(geometry=final)

Snap buildings creates invalid geometries. These need to be made valid again.

In [43]:
gdf.is_valid.value_counts()

True     1056031
False        301
Name: count, dtype: int64

In [44]:
buildings_valid=gdf.make_valid()
buildings=gpd.GeoDataFrame({'geometry':buildings_valid})

In [None]:
buildings.cx[4595000:4600000, 5818788:5823292].explore()