# Boundaries

Get boundaries from OpenStreetMaps. Note: for US cities we are using Census tracts from the US Census.

Use Census FTP Server:
https://www2.census.gov/geo/tiger/TIGER_RD18/STATE/

Example for New York: get the following file
- 36_NEW_YORK/36/tl_rd22_36_tract.zip

# Import modules

In [1]:
import pandas as pd
import geopandas as gpd
import osmnx as ox
import requests

from osm2geojson import json2geojson
from shapely.geometry import Point
from geopy.geocoders import Nominatim
from pathlib import Path

ModuleNotFoundError: No module named 'geopandas'

# Helper functions

In [2]:
def get_city_id(city_name):
    geolocator = Nominatim(user_agent="get-city-id")
    geo_results = geolocator.geocode(city_name, exactly_one=False, limit=3)

    city = None
    for r in geo_results:
        if r.raw.get("osm_type") == "relation":
            city = r
            break
            
    if not city:
        # print("No relations for city", city_name)
        raise ValueError(f"No results found for city: {city_name}")
        return None

    area_id = int(city.raw.get("osm_id")) + 3600000000
    return area_id

# Choose city

In [3]:
city_list = [
    'Melbourne',
    'Jerusalem',
    'Buenos Aires',
    'Paris',
    'Rotterdam',
    'Nashville',
    'Singapore',
    'Cape Town',
    'New York',
    'Los Angeles',
    'Chicago',
    'Boston',
    'Austin',
    'Seattle',
    'Philadelphia',
    'Pittsburgh',
    'Washington DC',
    'San Francisco',
    'SF Bay Area ',
    'Raleigh',
    'Milwaukee',
    'Portland',
    'San Diego',
    'Denver',
    'Miami',
    'Saint Louis',
    'Houston',
    'Atlanta',
    'Phoenix',
    'Detroit',
    'Minneapolis',
    'Savannah',
    'Charlotte',
    'Las Vegas',
    'Cincinnati',
    'Kansas City',
    'Nashville']


city = city_list[9]
print("City:", city)

City: Los Angeles


# Create query

In [4]:
# Get city id from Nominatim
city_id = get_city_id(city)

In [5]:
# id = city['id']
# admin_level = city['admin_level']

query = f"""
[out:json][timeout:25];
// fetch area to search in
area(id:{city_id})->.searchArea;
(
  //node["admin_level"="{{admin_level}}"](area.searchArea);
  //way["admin_level"="{{admin_level}}"](area.searchArea);
  relation["admin_level"="{{admin_level}}"](area.searchArea);
);
out body;
>;
out skel qt;
"""

## Make request

In [6]:
url = 'http://overpass-api.de/api/interpreter'  # Overpass API URL

# Search for most granular admin_level
for admin_level in reversed(range(11)):
    print("admin_level:", admin_level)
    r = requests.get(url, params={'data': query.format(admin_level=admin_level)})
    if r.json()['elements']:
        print("Success!")
        break

admin_level: 10
Success!


## Save query data

In [7]:
out_dict = {'city':city, 'city_id':city_id, 'admin_level':admin_level}
out_df = pd.DataFrame([out_dict])

try:
    df = pd.read_csv('query_data.csv')
    if city in df['city'].unique():
        df.loc[df['city']==city, 'city_id'] = city_id
        df.loc[df['city']==city, 'admin_level'] = admin_level
        out_df = df.copy()
    else: 
        out_df = pd.concat([df, out_df])
except FileNotFoundError:
    pass
    
out_df.to_csv('query_data.csv', index=False)

# Clean results

In [8]:
geojson = json2geojson(r.json())
gdf = gpd.GeoDataFrame().from_features(geojson)

gdf = gdf.loc[gdf['type'] == 'relation']

# Filter only boundaries
# mylist = []
# for item in r.json()['elements']:
#     try:
#         if item['tags']['type'] == 'boundary':
#             mylist.append(item)
#     except KeyError:
#         pass

gdf = gdf.set_crs('epsg:4326')
gdf = gdf.loc[gdf['type'] == 'relation']

# Save

In [9]:
data_folder = Path("../data")

out_file = data_folder / "0_boundaries" / (city + ".gpkg")
gdf.to_file(out_file, driver="GPKG")
print("Saved:", out_file)

Saved: ../data/admin-levels/Los Angeles.gpkg
