In [None]:
import geopandas as gpd
import os

# WFS type name
typename = "erhaltungsverordnungsgebiete:erhaltgeb_es"

# URL
url = "https://gdi.berlin.de/services/wfs/erhaltungsverordnungsgebiete"

# Downloading GeoDataFrame directly
gdf = gpd.read_file(
    f"{url}?SERVICE=WFS&VERSION=2.0.0&REQUEST=GetFeature&TYPENAMES={typename}&OUTPUTFORMAT=application/json"
)

# Creating a directory
os.makedirs("sources", exist_ok=True)

# Saving as GeoJSON
gdf.to_file("sources/milieuschutzgebiete.geojson", driver="GeoJSON")

# Viewing the first lines
print(gdf.head())

                    id schluessel bezirk  \
0  erhaltgeb_es.ES0101     ES0101  Mitte   
1  erhaltgeb_es.ES0102     ES0102  Mitte   
2  erhaltgeb_es.ES0103     ES0103  Mitte   
3  erhaltgeb_es.ES0104     ES0104  Mitte   
4  erhaltgeb_es.ES0105     ES0105  Mitte   

                                         gebietsname  f_gvbl_dat  f_in_kraft  \
0                                        Poststadion  30.12.1988  31.12.1988   
1                                 Spandauer Vorstadt  25.06.1993  26.06.1993   
2  Südliche Brunnenstraße Teile der Rosenthaler V...  09.12.1995  10.12.1995   
3                            Friedrich-Wilhelm-Stadt  31.08.1996  01.09.1996   
4                     Dorotheenstadt, Friedrichstadt  10.04.1997  11.04.1997   

  ae_gvbldat ae_inkraft fl_in_ha  \
0       None       None     53.2   
1       None       None    109.1   
2       None       None     18.8   
3       None       None     69.5   
4       None       None     98.7   

                                     

In [None]:
gdf = gdf.rename(columns={
    "schluessel": "code",
    "bezirk": "neighborhood",
    "gebietsname": "zone_name",
    "f_gvbl_dat": "publication_date",
    "f_in_kraft": "effective_date",
    "ae_gvbldat": "alt_publication_date",
    "ae_inkraft": "alt_effective_date",
    "fl_in_ha": "area_ha"
})

In [None]:
# Copy the GeoDataFrame so as not to alter the original
gdf_wkt = gdf.copy()

# Convert geometry to string (WKT)
gdf_wkt["geometry"] = gdf_wkt["geometry"].apply(lambda geom: geom.wkt)

# Save to CSV with geometry as text
gdf_wkt.to_csv("sources/milieuschutz_areas_with_geom.csv", index=False)

  gdf_wkt["geometry"] = gdf_wkt["geometry"].apply(lambda geom: geom.wkt)


In [None]:
# And if we want to make a GeoDataFrame from this CSV again:
import pandas as pd
from shapely import wkt

df = pd.read_csv("sources/milieuschutz_areas_with_geom.csv")
df["geometry"] = df["geometry"].apply(wkt.loads)
gdf_restored = gpd.GeoDataFrame(df, geometry="geometry", crs="EPSG:25833")

In [None]:
import geopandas as gpd

url = "https://gdi.berlin.de/services/wfs/adressen_berlin"
houses = gpd.read_file(
    f"{url}?SERVICE=WFS&VERSION=2.0.0&REQUEST=GetFeature&TYPENAMES=adressen_berlin&SRSNAME=EPSG:25833&OUTPUTFORMAT=application/json"
)

In [None]:
# gdf_zones — protected areas (Milieuschutz)
# houses — target points

print(gdf_restored.crs)
print(houses.crs)

EPSG:25833
EPSG:25833


In [None]:
# Spatial join — which houses fall within security zones
houses_in_zones = gpd.sjoin(houses, gdf_restored, how='inner', predicate='intersects')

# Save to CSV
houses_in_zones.to_csv("sources/houses_in_milieuschutz.csv", index=False)

# result
print(houses_in_zones.head())

                    id_left adressid    hnr hnr_zusatz             str_name  \
2       adressen_berlin.100      100   51.0          B       Gollanczstraße   
3      adressen_berlin.1000     1000  458.0       None       Stadtbahnbogen   
111    adressen_berlin.1001     1001  460.0       None       Stadtbahnbogen   
140  adressen_berlin.100126   100126    2.0       None  Sobernheimer Straße   
141  adressen_berlin.100127   100127    4.0       None  Sobernheimer Straße   

    str_nr    plz          bez_name bez_nr      ort_name  ... index_right  \
2    01623  13465     Reinickendorf     12       Frohnau  ...          90   
3    09138  10555             Mitte     01  Hansaviertel  ...          15   
111  09138  10555             Mitte     01  Hansaviertel  ...          15   
140  42717  12559  Treptow-Köpenick     09    Müggelheim  ...          82   
141  42717  12559  Treptow-Köpenick     09    Müggelheim  ...          82   

                id_right    code      neighborhood            

In [None]:
df = pd.read_csv("sources/houses_in_milieuschutz.csv")

column_mapping = {
    "adressid": "address_id",
    "hnr": "house_number",
    "hnr_zusatz": "house_number_extra",
    "str_name": "street_name",
    "plz": "postal_code",
    "code": "zone_code",
    "district": "neighborhood",
    "zone_name": "zone_name",
    "publication_date": "publication_date",
    "effective_date": "effective_date",
    "area_ha": "area_ha"
}

# We apply renaming
df = df.rename(columns=column_mapping)

# Leave only the necessary columns
columns_to_keep = list(column_mapping.values())
df_cleaned = df[columns_to_keep]

# save as CSV
df_cleaned.to_csv("sources/cleaned_houses_in_zones.csv", index=False)

# first 5
df_cleaned.head()

  df = pd.read_csv("sources/houses_in_milieuschutz.csv")


Unnamed: 0,address_id,house_number,house_number_extra,street_name,postal_code,zone_code,neighborhood,zone_name,publication_date,effective_date,area_ha
0,100,51.0,B,Gollanczstraße,13465,ES1201,Reinickendorf,Frohnau,17.05.1997,18.05.1997,783.8
1,1000,458.0,,Stadtbahnbogen,10555,ES0117,Mitte,Hansaviertel,30.01.2020,31.01.2020,54.3
2,1001,460.0,,Stadtbahnbogen,10555,ES0117,Mitte,Hansaviertel,30.01.2020,31.01.2020,54.3
3,100126,2.0,,Sobernheimer Straße,12559,ES0911,Treptow-Köpenick,Dorfkern Alt-Müggelheim,06.03.2008,07.03.2008,7.0
4,100127,4.0,,Sobernheimer Straße,12559,ES0911,Treptow-Köpenick,Dorfkern Alt-Müggelheim,06.03.2008,07.03.2008,7.0


In [None]:
print(df_cleaned.dtypes)

address_id             object
house_number          float64
house_number_extra     object
street_name            object
postal_code             int64
zone_code              object
neighborhood           object
zone_name              object
publication_date       object
effective_date         object
area_ha               float64
dtype: object


In [None]:
# Convert the necessary columns to the correct types
df_cleaned["address_id"] = df_cleaned["address_id"].astype(str)
df_cleaned["house_number"] = df_cleaned["house_number"].astype("Int64").astype(str)
df_cleaned["publication_date"] = pd.to_datetime(df_cleaned["publication_date"], errors="coerce", dayfirst=True)
df_cleaned["effective_date"] = pd.to_datetime(df_cleaned["effective_date"], errors="coerce", dayfirst=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cleaned["address_id"] = df_cleaned["address_id"].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cleaned["house_number"] = df_cleaned["house_number"].astype("Int64").astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cleaned["publication_date"] = pd.to_datetime(df_cleane

In [None]:
print(df_cleaned.dtypes)

address_id                    object
house_number                  object
house_number_extra            object
street_name                   object
postal_code                    int64
zone_code                     object
neighborhood                  object
zone_name                     object
publication_date      datetime64[ns]
effective_date        datetime64[ns]
area_ha                      float64
dtype: object


In [None]:
# Save the cleaned data in CSV format
df_cleaned.to_csv("sources/cleaned_houses_in_zones.csv", index=False)

# Let's check — display the first lines
df_cleaned.head()

Unnamed: 0,address_id,house_number,house_number_extra,street_name,postal_code,zone_code,neighborhood,zone_name,publication_date,effective_date,area_ha
0,100,51,B,Gollanczstraße,13465,ES1201,Reinickendorf,Frohnau,1997-05-17,1997-05-18,783.8
1,1000,458,,Stadtbahnbogen,10555,ES0117,Mitte,Hansaviertel,2020-01-30,2020-01-31,54.3
2,1001,460,,Stadtbahnbogen,10555,ES0117,Mitte,Hansaviertel,2020-01-30,2020-01-31,54.3
3,100126,2,,Sobernheimer Straße,12559,ES0911,Treptow-Köpenick,Dorfkern Alt-Müggelheim,2008-03-06,2008-03-07,7.0
4,100127,4,,Sobernheimer Straße,12559,ES0911,Treptow-Köpenick,Dorfkern Alt-Müggelheim,2008-03-06,2008-03-07,7.0


In [None]:
len(df_cleaned)

38019

In [None]:
bmissing_values = df_cleaned.isna().sum()
print(missing_values)

address_id                0
house_number              0
house_number_extra    32071
street_name               0
postal_code               0
zone_code                 0
neighborhood              0
zone_name                 0
publication_date          0
effective_date            0
area_ha                   0
dtype: int64
