In [1]:
import requests
import json
import csv
import logging
import os
import time
from datetime import datetime
from tenacity import retry, wait_exponential, stop_after_attempt
from dotenv import load_dotenv
from pydantic import BaseModel
from typing import Optional, List

load_dotenv()

class Venue(BaseModel):
    name: str
    latitude: float
    longitude: float
    venue_type: str
    cuisine_type: Optional[str] = None
    address: Optional[str] = None
    contact_info: Optional[str] = None
    opening_hours: Optional[List[str]] = None
    rating: Optional[float] = None
    source: Optional[str] = None


In [2]:
# Logging setup
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s",
    handlers=[logging.StreamHandler()],
)

# Output folder
OUTPUT_DIR = "output"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# API Endpoints
OVERPASS_ENDPOINT = "https://overpass-api.de/api/interpreter"
FSQ_API_KEY = os.getenv("FSQ_API_KEY")  # Optional for ratings
FSQ_SEARCH_URL = "https://api.foursquare.com/v3/places/search"
FSQ_DETAILS_URL = "https://api.foursquare.com/v3/places/{fsq_id}"
FSQ_HEADERS = {"Authorization": FSQ_API_KEY} if FSQ_API_KEY else {}


In [3]:
@retry(wait=wait_exponential(multiplier=2, min=4, max=60), stop=stop_after_attempt(5))
def fetch_osm(lat: float, lon: float, radius: int):
    logging.info(f"Fetching OSM venues at {lat},{lon} within {radius}m")
    query = f"""
    [out:json][timeout:300];
    node["amenity"~"restaurant|cafe|bar"](around:{radius},{lat},{lon});
    out;
    """
    resp = requests.post(OVERPASS_ENDPOINT, data={"data": query}, timeout=60)
    resp.raise_for_status()
    return resp.json()


In [4]:
def fsq_find_id(name: str, lat: float, lon: float):
    if not FSQ_API_KEY:
        return None
    try:
        params = {"ll": f"{lat},{lon}", "radius": 75, "query": name, "limit": 1}
        r = requests.get(FSQ_SEARCH_URL, headers=FSQ_HEADERS, params=params, timeout=15)
        r.raise_for_status()
        items = r.json().get("results", [])
        return items[0]["fsq_id"] if items else None
    except Exception:
        return None

def fsq_get_rating(fsq_id: str):
    if not FSQ_API_KEY or not fsq_id:
        return None
    try:
        r = requests.get(FSQ_DETAILS_URL.format(fsq_id=fsq_id), headers=FSQ_HEADERS, timeout=15)
        r.raise_for_status()
        data = r.json()
        return data.get("rating")
    except Exception:
        return None


In [5]:
def save_json(data, filename):
    path = os.path.join(OUTPUT_DIR, filename)
    with open(path, "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=2)
    logging.info(f"Saved JSON: {path}")

def save_csv(data, filename):
    path = os.path.join(OUTPUT_DIR, filename)
    with open(path, "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(
            f,
            fieldnames=[
                "name","latitude","longitude","venue_type","cuisine_type",
                "address","contact_info","opening_hours","rating","source"
            ]
        )
        writer.writeheader()
        for v in data:
            writer.writerow(v)
    logging.info(f"Saved CSV: {path}")


In [6]:
# ---- Main Run Example (Berlin) ----
city = "Berlin"
lat = 52.5200
lon = 13.4050
radius = 1000
max_enrich = 50  # limit ratings enrichment for demo

# Step 1: Fetch from OSM
osm = fetch_osm(lat, lon, radius)
venues = []
for el in osm.get("elements", []):
    v = Venue(
        name=el["tags"].get("name", "Unknown"),
        latitude=el["lat"],
        longitude=el["lon"],
        venue_type=el["tags"].get("amenity", "unknown"),
        cuisine_type=el["tags"].get("cuisine"),
        address=el["tags"].get("addr:street"),
        contact_info=el["tags"].get("phone"),
        source="OpenStreetMap"
    ).model_dump()
    venues.append(v)

logging.info(f"Collected {len(venues)} venues from OSM.")

# Step 2: Optional ratings enrichment
if FSQ_API_KEY:
    logging.info(f"Enriching up to {max_enrich} venues with Foursquare ratings...")
    enriched = 0
    for v in venues:
        if enriched >= max_enrich:
            break
        fsq_id = fsq_find_id(v["name"], v["latitude"], v["longitude"])
        if fsq_id:
            rating = fsq_get_rating(fsq_id)
            if rating is not None:
                v["rating"] = float(rating)
                enriched += 1
        time.sleep(0.12)  # polite rate limiting
    logging.info(f"Enriched ratings for {enriched} venues.")
else:
    logging.info("No FSQ_API_KEY provided — skipping ratings.")

# Step 3: Save results
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
save_json(venues, f"{city.lower()}_venues_{timestamp}.json")
save_csv(venues, f"{city.lower()}_venues_{timestamp}.csv")


2025-08-12 16:59:06,558 [INFO] Fetching OSM venues at 52.52,13.405 within 1000m
2025-08-12 16:59:07,786 [INFO] Collected 326 venues from OSM.
2025-08-12 16:59:07,787 [INFO] No FSQ_API_KEY provided — skipping ratings.
2025-08-12 16:59:07,792 [INFO] Saved JSON: output/berlin_venues_20250812_165907.json
2025-08-12 16:59:07,794 [INFO] Saved CSV: output/berlin_venues_20250812_165907.csv
