In [None]:
import pandas as pd
from geopy.geocoders import Nominatim
from time import sleep

# First, I load the previously enriched dataset that already contains the Stadtteil info
df = pd.read_csv("ubahn_with_stadtteil.csv")

# Then, I initialize a new Nominatim geocoder instance for retrieving Bezirks
geolocator = Nominatim(user_agent="berlin_bezirk_locator")

# Now I define a reverse geocoding function to extract the official Berlin district (Bezirk)
# I include a 1-second sleep to stay within Nominatim's API rate limit
def get_bezirk(lat, lon):
    try:
        location = geolocator.reverse((lat, lon), exactly_one=True, language='de')
        sleep(1)  # to avoid being blocked by the API
        if location and "address" in location.raw:
            address = location.raw["address"]
            return (
                address.get("city_district") or
                address.get("borough") or
                address.get("county") or
                None
            )
        return None
    except:
        return None

# I apply this function row by row to create a new column with the Bezirk information
df["neighborhood"] = df.apply(
    lambda row: get_bezirk(row["latitude"], row["longitude"]) if pd.notnull(row["latitude"]) else None,
    axis=1
)

# Just to be safe, I make sure the postcode stays in a clean numeric format
df["postcode"] = pd.to_numeric(df["postcode"], errors="coerce").astype("Int64")

# Finally, I save the fully enriched dataset with both Stadtteil and Bezirk columns
df.to_csv("ubahn_with_neighborhoods.csv", index=False)