In [None]:
import osmnx as ox
import pandas as pd
import time
import requests
import os
from pathlib import Path
import geopandas as gpd
from fiona import listlayers
from datetime import datetime

### Try "Los Angeles County, California"
- attempt at "California, USA" did not make any progress in 2+ hours, and said "attempt exceeded geographic limit by 222 times"

In [18]:
start = time.time()

place = "Los Angeles County, California"
tags = {"leisure":["pitch", "park", "sports_centre"], 
        'building':'school', 
        'amenity':'school'}
features_gdf = ox.features.features_from_place(place, tags)

end = time.time()

print(f"Execution time: {start - end:0.4f} seconds")

Execution time: -155.4614 seconds


 # 1. Get list of all US Counties 
 
The attempt to download data for a very large county - Los Angeles - was successful.
 
Now, i will attempt to obtain a list of all US Counties...

I should be able to get this data from the US Census API Geography Endpoint: https://www.census.gov/data/developers/data-sets/geo-info.html

In [19]:
census_api_key = " 349179f9505abb551eb4de4ab998d2a0f58d06aa"

In [None]:
url = 'https://api.census.gov/data/2023/geoinfo'

# needed! 
headers = {'User-Agent': 'Mozilla/5.0'}

params = {'get':'NAME', 
                        'for': 'county:*',
                      }

response = requests.get(url, headers=headers, params=params, timeout=10)

if response.status_code == 200:
    data = response.json()
    print("Success")
else:
    print(response.status_code)

Success


The data is in list form

In [41]:
data[:4]

[['NAME', 'state', 'county'],
 ['Autauga County, Alabama', '01', '001'],
 ['Baldwin County, Alabama', '01', '003'],
 ['Barbour County, Alabama', '01', '005']]

I can easily convert this to a Pandas dataframe

In [50]:
# extract county names
counties_list = [item[0] for item in data][1:]

counties_df = pd.DataFrame(counties_list, columns=['name_full'])

# add State column 
counties_df['state'] = counties_df.apply(lambda x: x['name_full'].split(",")[1].strip(), axis=1)

counties_df.head()

Unnamed: 0,name_full,state
0,"Autauga County, Alabama",Alabama
1,"Baldwin County, Alabama",Alabama
2,"Barbour County, Alabama",Alabama
3,"Bibb County, Alabama",Alabama
4,"Blount County, Alabama",Alabama


### Save the County names to file

In [55]:
counties_df.to_csv('us_counties.csv', index = False)

# 2. Download all US Counies

In [10]:
counties_df = pd.read_csv('us_counties.csv')

# df for results
results_df = pd.DataFrame(columns = ['state', 'name_full', 'name_abbrev', 'downloaded', 'file_path', 'size', 'date'])


In [11]:
counties_df.head()

Unnamed: 0,name_full,state
0,"Autauga County, Alabama",Alabama
1,"Baldwin County, Alabama",Alabama
2,"Barbour County, Alabama",Alabama
3,"Bibb County, Alabama",Alabama
4,"Blount County, Alabama",Alabama


In [7]:
def get_abbrev_name(county_row):
    county = county_row['name_full'].split(",")[0]

    # remove 'County' suffix
    suffix_loc = county.find("County")

    if suffix_loc != -1:
        county = county[:suffix_loc].strip()
    
    return county

In [None]:
tags = {"leisure":["pitch", "park", "sports_centre"], 
        'building':'school', 
        'amenity':'school'}

states = counties_df['state'].unique()

download_folder = Path("data")

for state in states[:1]:
    # create state folder
    state_path = download_folder / state
    os.makedirs(state_path, exist_ok=True)

    for _, county_row in counties_df[counties_df['state'] == state][:3].iterrows():

        county_full = county_row['name_full']
        county_abbrev = get_abbrev_name(county_row).replace(".","").replace(" ", "_")
        file_path = state_path / f"{county_abbrev}.gpkg"

        try:
            # call to OSMnX 
            features_gdf = ox.features.features_from_place(county_full, tags)

            # separate features
            parks = features_gdf[features_gdf['leisure'] == 'park']
            pitches = features_gdf[features_gdf['leisure'] == 'pitch']
            sports_centres = features_gdf[features_gdf['leisure'] == 'sports_centre']
            schools = features_gdf[features_gdf.apply(lambda x: True if "school" in [str(x['amenity']).lower(), str(x['building']).lower(), str(x['name']).lower()] else False, axis=1)] 

            # write geopackage to disk
            for gdf_obj, layer_name in [(parks, "parks"), (pitches, "pitches"), 
                            (sports_centres, "sports_centres"), (schools, "schools")]:

                # drop FID column (it's sepcial if GeoPackages, and not needed)
                if 'FID' in gdf_obj.columns:
                    gdf_obj = gdf_obj.drop(columns=['FID'])

                # save as layer in a GeoPackage
                gdf_obj.to_file(file_path, layer=layer_name, driver="GPKG")

            # get file size (decimal, not binary)
            size_kb = file_path.stat().st_size / 1000
            size_text = f"{size_kb} KB" if size_kb < 1000 else f"{size_kb / 1000:0.1f} MB"

            downloaded = 'Success'

            print(f"Success: {county_full}")

        except:
            downloaded = 'Fail'
            file_path = size_text = ''

            print(f"Fail: {county_full}")
        finally:
            date = datetime.today().strftime('%Y-%m-%d')
            
            # save results 
            results_data = {'state':state, 'name_full':county_full, 'name_abbrev':county_abbrev,
                             'downloaded':downloaded, 'file_path':file_path, 'size':size_text, 'date':date}
            
            results_df.loc[len(results_df)] = results_data

# write the results file
todays_date = datetime.strftime(datetime.now(), '%Y-%m-%d')

results_df.to_csv(f"results_{todays_date}.csv", index=False)

Success: Autauga County, Alabama
Success: Baldwin County, Alabama
Success: Barbour County, Alabama
