## Berlin Pools Geocoding Script

#### This Python script enriches a dataset of Berlin swimming pools (berlin_pools_clean.csv) by performing the following tasks:

- Reverse geocoding: Uses latitude and longitude to fetch the official Berlin district (district) for each pool 
  via the Nominatim API.

- District ID creation: Generates a unique integer identifier (district_id) for each district

In [1]:
import pandas as pd 
from geopy.geocoders import Nominatim
from time import sleep

In [9]:

# Load your pool data
df = pd.read_csv("berlin_pools_clean.csv")

# Initialize Nominatim geocoder
geolocator = Nominatim(user_agent="berlin_bezirk_locator")

# Function to get the district from lat/lon
def get_bezirk(lat, lon):
    try:
        location = geolocator.reverse(
            (lat, lon),
            exactly_one=True,
            language='de',
            timeout=10  # increased timeout to avoid connection errors
        )
        sleep(1)  # Stay within API limits (1 request/sec)
        if location and location.raw.get("address"):
            address = location.raw["address"]
            return (
                address.get("city_district") or
                address.get("borough") or
                address.get("county") or
                None
            )
        return None
    except Exception as e:
        print(f"Error for coords ({lat}, {lon}): {e}")
        return None

# Apply geocoding to each row to get the district name
df["district"] = df.apply(
    lambda row: get_bezirk(row["latitude"], row["longitude"])
    if pd.notnull(row["latitude"]) and pd.notnull(row["longitude"]) else None,
    axis=1
)

# Create a unique district_id column as int
district_mapping = {district: idx + 1 for idx, district in enumerate(df['district'].dropna().unique())}
df['district_id'] = df['district'].map(district_mapping).astype(int)

# Save updated file
df.to_csv("berlin_pools.csv", index=False)

