In [None]:
import pandas as pd
from geopy.geocoders import Nominatim
from time import sleep

# First, I load the station-line dataset I previously created and cleaned
df = pd.read_csv("merged_ubahn_line.csv")

# Then I initialize the Nominatim geocoder with a custom user agent to avoid being blocked
geolocator = Nominatim(user_agent="berlin_ubahn_locator")

# Now I define a reverse geocoding function that retrieves the Stadtteil from latitude and longitude
# I include a 1-second delay after each request to comply with Nominatim’s rate limit policy
def get_stadtteil(lat, lon):
    try:
        location = geolocator.reverse((lat, lon), exactly_one=True, language='de')
        sleep(1)  # Respect Nominatim API limits
        if location and 'suburb' in location.raw['address']:
            return location.raw['address']['suburb']
        elif location and 'city_district' in location.raw['address']:
            return location.raw['address']['city_district']
        else:
            return None
    except:
        return None

# I now apply this function row by row to extract the Stadtteil for each station
df["stadtteil"] = df.apply(
    lambda row: get_stadtteil(row["latitude"], row["longitude"]) if pd.notnull(row["latitude"]) else None,
    axis=1
)

# Finally, I save the enriched dataset with the new Stadtteil column
df.to_csv("ubahn_with_stadtteil.csv", index=False)