In [2]:
import requests
import pandas as pd
import logging

# Logging setup
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

OVERPASS_URL = "https://overpass-api.de/api/interpreter"

# Query Berlin for restaurants, cafes, and bars
query = """
[out:json][timeout:25];
area[name="Berlin"]->.searchArea;
(
  node["amenity"~"restaurant|cafe|bar"](area.searchArea);
  way["amenity"~"restaurant|cafe|bar"](area.searchArea);
  relation["amenity"~"restaurant|cafe|bar"](area.searchArea);
);
out center 10;   // only first 10 results for test
"""

logging.info("Fetching test venues from Overpass...")
response = requests.post(OVERPASS_URL, data={"data": query}, headers={"User-Agent": "venue-data-tester/1.0"})
response.raise_for_status()
data = response.json()

elements = data.get("elements", [])
logging.info(f"Retrieved {len(elements)} elements")

# Parse into dataframe
records = []
for el in elements[:10]:
    tags = el.get("tags", {})
    lat = el.get("lat") or el.get("center", {}).get("lat")
    lon = el.get("lon") or el.get("center", {}).get("lon")

    records.append({
        "name": tags.get("name", "Unknown"),
        "category": tags.get("amenity", "Unknown"),
        "cuisine": tags.get("cuisine"),
        "address": f"{tags.get('addr:street', '')} {tags.get('addr:housenumber', '')}, {tags.get('addr:postcode', '')} {tags.get('addr:city', '')}".strip(", "),
        "latitude": lat,
        "longitude": lon,
        "website": tags.get("website"),
        "phone": tags.get("phone"),
        "opening_hours": tags.get("opening_hours"),
        "takeaway": tags.get("takeaway"),
        "wheelchair": tags.get("wheelchair")
    })

df = pd.DataFrame(records)

# Reverse geolocation functions
def get_bezirk(lat, lon):
    url = "https://nominatim.openstreetmap.org/reverse"
    params = {"lat": lat, "lon": lon, "format": "json", "addressdetails": 1}
    headers = {"User-Agent": "venue-data-tester/1.0"}
    try:
        r = requests.get(url, params=params, headers=headers, timeout=5)
        r.raise_for_status()
        data = r.json()
        return data.get("address", {}).get("suburb") \
            or data.get("address", {}).get("city_district") \
            or data.get("address", {}).get("borough")
    except requests.exceptions.RequestException as e:
        logging.warning(f"Error fetching district for ({lat}, {lon}): {e}")
        return None

def get_address(lat, lon):
    url = "https://nominatim.openstreetmap.org/reverse"
    params = {"lat": lat, "lon": lon, "format": "json", "addressdetails": 1}
    headers = {"User-Agent": "venue-data-tester/1.0"}
    try:
        r = requests.get(url, params=params, headers=headers, timeout=5)
        r.raise_for_status()
        data = r.json()
        return data.get("display_name")
    except requests.exceptions.RequestException as e:
        logging.warning(f"Error fetching address for ({lat}, {lon}): {e}")
        return None

# Apply reverse geolocation
df["district"] = df.apply(lambda r: get_bezirk(r["latitude"], r["longitude"]), axis=1)
df["full_address"] = df.apply(lambda r: get_address(r["latitude"], r["longitude"]), axis=1)

# Show results
print(df.head(10))

2025-08-18 11:08:47,720 - INFO - Fetching test venues from Overpass...
2025-08-18 11:08:57,690 - INFO - Retrieved 10 elements


              name    category        cuisine  \
0             Aida  restaurant  italian;pizza   
1       Madame Ngo  restaurant          asian   
2        Nam Thuân  restaurant     vietnamese   
3          La Rose  restaurant        italian   
4  Eiscafe Eisberg        cafe           None   
5       Tibet Haus  restaurant        tibetan   
6      Coffee Star        cafe           None   
7           Paella  restaurant        mexican   
8           Cancún  restaurant        mexican   
9        Schatulle  restaurant       regional   

                              address   latitude  longitude  \
0      Knesebeckstraße , 10623 Berlin  52.506864  13.322859   
1         Kantstraße 30, 10623 Berlin  52.506212  13.318081   
2  Pestalozzistraße 106, 10625 Berlin  52.507320  13.320780   
3    Neue Kantstraße 19, 14057 Berlin  52.506318  13.284626   
4      Brunnenstraße 55, 13355 Berlin  52.538855  13.396097   
5    Zossener Straße 19, 10961 Berlin  52.490710  13.393981   
6      Müllerstraße