In [45]:
import json
import os
import random
import time

In [46]:
import cloudscraper
import geopandas as gpd
import pandas as pd
import h3

In [47]:
from shapely.geometry import Point
from tqdm.notebook import tqdm

## Get State Data

In [48]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

In [49]:
states_df = states_df[["STUSPS", "NAME", "geometry"]]

## Get McDonalds Data

In [50]:
scraper = cloudscraper.create_scraper()

In [56]:
national_restaurant_list = []
res = 5
for i in tqdm(range(len(states_df)), desc="States"):

    state_restaurant_list = []
    
    state_code = states_df.iloc[i]['STUSPS']
    if state_code in ['PR', 'VI', 'GU', 'MP', 'AS']:
        continue
    
    # If File exists continue
    if os.path.isfile(f"data/states/{state_code}.gpkg"):
        continue
    
    state_shape = states_df.iloc[i]["geometry"]
    h3_shape = h3.geo_to_h3shape(state_shape)
    h3_cells = h3.h3shape_to_cells(h3_shape, res=res)
    for h3_cell in tqdm(h3_cells, desc=f'Parsing state: {state_code} in Resolution: {res}'):
        lat, lng = h3.cell_to_latlng(h3_cell)
        url = (
            "https://www.mcdonalds.com/googleappsv2/geolocation?latitude="
            + str(lat)
            + "&longitude="
            + str(lng)
            + "&radius=100&maxResults=1000&country=us&language=en-us"
        )
        locations = scraper.get(url).text
        if not locations:
            continue
            
        features = json.loads(locations)["features"]
        for feature in features:
            state_code_from_url = feature["properties"]["subDivision"]
            id_val = feature['properties']['identifierValue']
            record = {"ID": id_val, "STATE": state_code_from_url, "geometry": Point(feature["geometry"]["coordinates"])}
            state_restaurant_list.append(record)
            national_restaurant_list.append(record)

        time.sleep(random.uniform(0.01, 0.5))
    
    state_mcdonalds_gdf = gpd.GeoDataFrame(state_restaurant_list, crs=4326)
    state_mcdonalds_gdf = state_mcdonalds_gdf.drop_duplicates('ID')
    state_mcdonalds_gdf = state_mcdonalds_gdf[state_mcdonalds_gdf['STATE']== state_code]
    state_mcdonalds_gdf.to_file(f"data/states/{state_code}.gpkg")

States:   0%|          | 0/56 [00:00<?, ?it/s]

Parsing state: TN in Resolution: 5:   0%|          | 0/452 [00:00<?, ?it/s]

Parsing state: WY in Resolution: 5:   0%|          | 0/976 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
mcdonalds_gdf = gpd.GeoDataFrame(national_restaurant_list, crs=4326)
mcdonalds_gdf = mcdonalds_gdf.drop_duplicates('ID')
mcdonalds_gdf.to_file(f"data/mcdonalds_h3_{res}.gpkg")

In [112]:
mcdonalds_gdf = gpd.GeoDataFrame(restaurant_list, crs=4326)

In [113]:
mcdonalds_new_gdf = mcdonalds_gdf.drop_duplicates().reset_index()[["STATE", "geometry"]]

In [114]:
mcdonalds_new_gdf.to_file("data/mcdonalds_va_h3_6.gpkg")

## Get Population Data

In [126]:
state_populations = pd.read_excel(
    "data/NST-EST2023-POP.xlsx", sheet_name=None, engine="openpyxl"
)

In [127]:
state_populations_df = state_populations["NST-EST2023-POP"][
    [
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)",
        "Unnamed: 5",
    ]
]
state_populations_df = state_populations_df.rename(
    columns={
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)": "NAME",
        "Unnamed: 5": "POPULATION",
    }
)
state_populations_df["NAME"] = state_populations_df["NAME"].str[1:]

In [128]:
states_with_population_df = states_df.merge(state_populations_df, on="NAME", how="left")
states_with_population_df = states_with_population_df[
    ["STUSPS", "POPULATION", "geometry", "NAME"]
]

In [129]:
states_with_population_df = states_with_population_df.dropna()

## Merge Data

In [136]:
state_counts = {}
for i in tqdm(range(len(mcdonalds_new_gdf)), desc="Parsing McDonalds"):
    try:
        state_code = states_df[
            states_df.intersects(mcdonalds_new_gdf.iloc[i]["geometry"])
        ]["STUSPS"].iloc[0]
    except IndexError:
        print(i)
        continue

    if state_code not in state_counts:
        state_counts[state_code] = 1
    else:
        state_counts[state_code] += 1

Parsing McDonalds:   0%|          | 0/13512 [00:00<?, ?it/s]

8825


In [139]:
mcdonalds_per_state_df = pd.Series(state_counts, name="stores")
mcdonalds_per_state_df = mcdonalds_per_state_df.reset_index()
mcdonalds_per_state_df = mcdonalds_per_state_df.rename(columns={"index": "STUSPS"})

In [141]:
mcdonalds_per_state_df = states_with_population_df.merge(
    mcdonalds_per_state_df, on="STUSPS", how="left"
)
mcdonalds_per_state_df = mcdonalds_per_state_df.fillna(0)

In [142]:
mcdonalds_per_state_df["per_1000"] = mcdonalds_per_state_df["stores"] / (
    mcdonalds_per_state_df["POPULATION"] / 1000
)
mcdonalds_per_state_df["per_10k"] = mcdonalds_per_state_df["stores"] / (
    mcdonalds_per_state_df["POPULATION"] / 10_000
)
mcdonalds_per_state_df["per_100k"] = mcdonalds_per_state_df["stores"] / (
    mcdonalds_per_state_df["POPULATION"] / 100000
)
mcdonalds_per_state_df["per_500k"] = mcdonalds_per_state_df["stores"] / (
    mcdonalds_per_state_df["POPULATION"] / 500_000
)
mcdonalds_per_state_df["per_1m"] = mcdonalds_per_state_df["stores"] / (
    mcdonalds_per_state_df["POPULATION"] / 1_000_000
)
mcdonalds_per_state_df["per_capita"] = (
    mcdonalds_per_state_df["POPULATION"] / mcdonalds_per_state_df["stores"]
)

In [146]:
mcdonalds_per_state_df = mcdonalds_per_state_df.to_crs(9311)

In [147]:
mcdonalds_per_state_df.to_file("data/mcdonalds_per_state.gpkg")