## Script to retrieve venues from OpenStreetMap
    * Retrieves bars, caffes and restaurants from opsen source database
    * Creates data frame inside the root folder named "berlin_venues"
    * Types of amenities can be modifyed in amenities={"...", "..."}

In [None]:
import requests
import pandas as pd
import time
import logging

In [None]:

# Logging setup
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

OVERPASS_URL = "https://overpass-api.de/api/interpreter"

amenities = ['restaurant', 'cafe', 'bar']

# Function to fetch data with retries
def fetch_with_retries(query, retries=3, delay=5):
    for attempt in range(retries):
        try:
            response = requests.post(OVERPASS_URL, data={'data': query}, timeout=120)
            response.raise_for_status()
            return response.json()
        except Exception as e:
            logging.warning(f"Attempt {attempt+1} failed: {e}")
            time.sleep(delay * (attempt+1))
    logging.error("All retries failed")
    return None

# Function to parse a single venue element
def parse_venue(el):
    tags = el.get('tags', {})
    address = ", ".join(filter(None, [
        tags.get('addr:street'),
        tags.get('addr:housenumber'),
        tags.get('addr:postcode'),
        tags.get('addr:city')
    ]))
    
    return {
        'name': tags.get('name', 'Unknown'),
        'category': tags.get('amenity', 'Unknown'),
        'cuisine': tags.get('cuisine', None),
        'address': address if address else None,
        'lat': el.get('lat') or el.get('center', {}).get('lat'),
        'lon': el.get('lon') or el.get('center', {}).get('lon'),
        'website': tags.get('website', None),
        'phone': tags.get('phone', None),
        'opening_hours': tags.get('opening_hours', None),
        'takeaway': tags.get('takeaway', None),
        'wheelchair': tags.get('wheelchair', None)
    }

all_records = []

# Fetch venues using Berlin administrative boundary
logging.info("Fetching venues using Berlin administrative boundary...")

query = '''
[out:json][timeout:120];
area["name"="Berlin"]["boundary"="administrative"]->.searchArea;
(
  node["amenity"~"^(restaurant|cafe|bar)$"](area.searchArea);
  way["amenity"~"^(restaurant|cafe|bar)$"](area.searchArea);
  relation["amenity"~"^(restaurant|cafe|bar)$"](area.searchArea);
);
out center;
'''

data = fetch_with_retries(query)
if data:
    elements = data.get('elements', [])
    logging.info(f"Retrieved {len(elements)} venues from Berlin administrative area")
    for el in elements:
        all_records.append(parse_venue(el))
else:
    logging.error("Failed to fetch data")



2025-08-15 13:03:53,806 - INFO - Fetching venues using Berlin administrative boundary...
2025-08-15 13:04:00,002 - INFO - Retrieved 8194 venues from Berlin administrative area
2025-08-15 13:04:00,052 - INFO - Saved 8194 venues to berlin_places.csv


In [None]:
# Convert to DataFrame
df = pd.DataFrame(all_records)

# Save to CSV
df.to_csv('berlin_venues_raw.csv', index=False, encoding='utf-8')
logging.info(f"Saved {len(df)} venues to berlin_places.csv")