# 0. Shapely Validation and Checking Geometry Types

Two main issues in input data
- Invalid Geometry
- Shapes of type Linestring, Multilinestring, Multipolygon and GeometryCollection

In [16]:
import geopandas as gpd

Raw data found here https://drive.google.com/drive/folders/1v7xLIkI-Va7H_dUAJ1z9yBJgV3YuYp6T

In [21]:
buildings=gpd.read_parquet('buildings_berlin.parquet',columns=['geometry'])

In [27]:
from folium.plugins import MousePosition

In [29]:
m = buildings[buildings.geom_type=='MultiPolygon'].explore(column=None, popup_columns=['ID'])
MousePosition().add_to(m)

<folium.plugins.mouse_position.MousePosition at 0x153cf0e90>

In [30]:
m

In [35]:
buildings.cx[13.52:13.54,52.42:52.44].explore()

In [38]:
buildings = buildings.to_crs(31468)

In [39]:
buildings.cx[4602104.79012012:4604824.65732128,5810724.070977277:5812978.191857001].explore()

In [25]:
buildings

Unnamed: 0,geometry,ID
0,"POLYGON ((13.30277 52.51964, 13.30332 52.51964...",0
1,"POLYGON ((13.35264 52.53331, 13.35276 52.53322...",1
2,"POLYGON ((13.35397 52.52302, 13.35395 52.52295...",2
3,"POLYGON ((13.35522 52.52727, 13.35512 52.52739...",3
4,"POLYGON ((13.36014 52.53152, 13.35987 52.53184...",4
...,...,...
1057114,"POLYGON ((13.4585 52.50118, 13.45827 52.50122,...",1057114
1057115,"POLYGON ((13.11685 52.39058, 13.1174 52.39058,...",1057115
1057116,"POLYGON ((13.2772 52.53379, 13.27694 52.53323,...",1057116
1057117,"POLYGON ((13.43921 52.43791, 13.43934 52.43803...",1057117


In [24]:
# add ID column based on dataframe index
buildings['ID'] = buildings.index

In [11]:
buildings = buildings.to_crs(31468)

## Make valid

In [12]:
# check validity of geodataframe geometries
valid = buildings.is_valid

In [15]:
valid.value_counts()

True     1057112
False          7
Name: count, dtype: int64

In [17]:
buildings_valid=buildings.make_valid()

In [18]:
buildings=gpd.GeoDataFrame({'geometry':buildings_valid})

In [19]:
buildings

Unnamed: 0,geometry
0,"POLYGON ((4588533.734 5821521.726, 4588570.786..."
1,"POLYGON ((4591889.97 5823105.866, 4591898.467 ..."
2,"POLYGON ((4592002.064 5821961.774, 4592000.684..."
3,"POLYGON ((4592077.946 5822437.286, 4592070.997..."
4,"POLYGON ((4592403.073 5822915.869, 4592383.652..."
...,...
1057114,"POLYGON ((4599145.094 5819669.998, 4599129.491..."
1057115,"POLYGON ((4576136.11 5806949.507, 4576173.534 ..."
1057116,"POLYGON ((4586770.205 5823065.392, 4586753.742..."
1057117,"POLYGON ((4597976.059 5812604.354, 4597984.496..."


## Check Geometry Type

In [88]:
buildings['geometry'].geom_type.value_counts()

Polygon               1056951
MultiPolygon               82
LineString                 49
MultiLineString            33
GeometryCollection          4
Name: count, dtype: int64

In [20]:
buildings[buildings.geom_type=='MultiPolygon'].explore(column=None, popup_columns=['ID'])

### Explode dataframe to turn multipolygons into polygons and remove linestrings

In [90]:
buildings=buildings.explode(ignore_index=True)

`geo_col_name = gdf.active_geometry_name; gdf.set_geometry(new_geo_col).drop(columns=geo_col_name).rename_geometry(geo_col_name)`.
  return gf.set_geometry(col, drop=drop, inplace=False, crs=crs)


In [91]:
buildings

Unnamed: 0,geometry
0,"POLYGON ((4588533.734 5821521.726, 4588570.786..."
1,"POLYGON ((4591889.97 5823105.866, 4591898.467 ..."
2,"POLYGON ((4592002.064 5821961.774, 4592000.684..."
3,"POLYGON ((4592077.946 5822437.286, 4592070.997..."
4,"POLYGON ((4592403.073 5822915.869, 4592383.652..."
...,...
1057412,"POLYGON ((4599145.094 5819669.998, 4599129.491..."
1057413,"POLYGON ((4576136.11 5806949.507, 4576173.534 ..."
1057414,"POLYGON ((4586770.205 5823065.392, 4586753.742..."
1057415,"POLYGON ((4597976.059 5812604.354, 4597984.496..."


In [92]:
buildings['geometry'].geom_type.value_counts()

Polygon            1057241
LineString             169
MultiLineString          4
MultiPolygon             3
Name: count, dtype: int64

In [93]:
buildings=buildings[buildings['geometry'].geom_type=='Polygon']

In [94]:
buildings['geometry'].geom_type.value_counts()

Polygon    1057241
Name: count, dtype: int64

### Save to parquet

In [95]:
buildings.to_parquet('buildings_berlin_0.parquet')