In [1]:
import json
import os
import random
import time
from urllib import parse

In [2]:
import cloudscraper
import geopandas as gpd
import pandas as pd
import h3

In [3]:
from shapely.geometry import Point
from tqdm.notebook import tqdm

## Get State Data

In [4]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

In [5]:
states_df = states_df[["STUSPS", "NAME", "geometry"]]

## Get Popeyes Data

In [7]:
scraper = cloudscraper.create_scraper()

In [12]:
url_root = "https://use1-prod-plk-gateway.rbictg.com/graphql"
headers = {
    "accept": "*/*",
    "accept-language": "en-US,en;q=0.9,ru-RU;q=0.8,ru;q=0.7",
    "content-type": "application/json",
    "priority": "u=1, i",
    "sec-ch-ua": '"Not A(Brand";v="8", "Chromium";v="132", "Google Chrome";v="132"',
    "sec-ch-ua-mobile": "?0",
    "sec-ch-ua-platform": '"Windows"',
    "sec-fetch-dest": "empty",
    "sec-fetch-mode": "cors",
    "sec-fetch-site": "cross-site",
    "x-aws-waf-token": "592c2aa2-3ebb-41ba-81a3-d6e7912995a8:EQoAuocKRhbpAAAA:nJ7mLBJkONDux6Yf6B9WWlPwFugBs/8em5qO2lu/ekmCIAX6sZpW1d3ydpjXUCMj4DjpPR+rJ2i1WNMribj++SWhmkVhBor+RjLeNZtYZ6TcE7mJxjdYjnCRmoLEyiKoTkUDGaGrXJO7Hc5KQkGsKgcEji1mYYWtwB5JpvVNI3cY3sfSuf8weyOrUet62Mu99yhWq49jBSdt3BD37A7UCyojbaoIykfr950JC73fZUAXrunKHSf6PB/VBSOx",
    "x-client-name": "plk-rn-web",
    "x-client-version": "-no-uid-83d730e",
    "x-device-id": "9d2816a2-0b8f-48b0-a15c-509ec3bed620",
    "x-forter-token": "29bf6fba84f947a98ec7c5f8824d902b_1739237863565_396_dUAL9_13ck_tt",
    "x-platform-framework": "react-dom",
    "x-session-id": "1739237863454",
    "x-ui-language": "en",
    "x-ui-platform": "web",
    "x-ui-region": "US",
    "x-user-datetime": "2025-02-10T17:37:58-08:00",
}

In [13]:
res = 4
usa_geom = states_df[~states_df["STUSPS"].isin(["PR", "VI", "GU", "MP", "AS"])][
    "geometry"
].union_all()
h3_cells = h3.geo_to_cells(usa_geom, res=res)
parent_cells = list(set([h3.cell_to_parent(h3_cell) for h3_cell in h3_cells]))
all_res_cells = list(
    set(sum([h3.cell_to_children(parent_cell) for parent_cell in parent_cells], []))
)
all_res_cells_sorted = sorted(all_res_cells)
all_cells_including_neighbors = list(
    set(
        sum(
            [
                h3.grid_disk(res_cells_sorted, 2)
                for res_cells_sorted in all_res_cells_sorted
            ],
            [],
        )
    )
)

In [None]:
open("data/cells_neighbors_2x.geojson", "w").write(
    json.dumps(h3.cells_to_geo(all_neighbors_cells))
)

In [14]:
restaurant_list = []
for h3_cell in tqdm(
    all_cells_including_neighbors, desc=f"Parsing cells in Resolution: {res}"
):
    lat, lng = h3.cell_to_latlng(h3_cell)
    variables = {
        "input": {
            "pagination": {"first": 100},
            "coordinates": {"userLat": lat, "userLng": lng},
            "radiusStrictMode": True,
        }
    }
    params = {
        "operationName": "GetNearbyRestaurants",
        "variables": json.dumps(variables),
        "extensions": '{"persistedQuery":{"version":1,"sha256Hash":"4f7636962d84eeab7b47b60f6eb2a1e527b8fbc656c881a179cfe4f847a641da"}}',
    }
    r = scraper.get(url_root, params=params, headers=headers)
    stores = r.json()["data"]["restaurantsV2"]["nearby"]["nodes"]
    for store in stores:
        store_dict = {
            "geometry": Point(store["longitude"], store["latitude"]),
            "storeId": store["storeId"],
            "STUSPS": store["physicalAddress"]["stateProvinceShort"],
            "url": f"https://www.popeyes.com/store-locator/store/{store['_id']}",
        }
        restaurant_list.append(store_dict)

    # time.sleep(random.uniform(0.01, 0.5))

Parsing cells in Resolution: 4:   0%|          | 0/7612 [00:00<?, ?it/s]

In [16]:
gdf = gpd.GeoDataFrame(restaurant_list, crs=4326)
gdf.drop_duplicates("storeId").reset_index(drop=True).to_file("data/popeyes_v2.gpkg")

In [6]:
popeyes_gdf = gpd.read_file("data/popeyes_v2.gpkg")

## Get Population Data

In [8]:
state_populations = pd.read_excel(
    "data/NST-EST2024-POP.xlsx", sheet_name=None, engine="openpyxl"
)

In [9]:
state_populations_df = state_populations["NST-EST2024-POP"][
    [
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)",
        "Unnamed: 5",
    ]
]
state_populations_df = state_populations_df.rename(
    columns={
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)": "NAME",
        "Unnamed: 5": "POPULATION",
    }
)
state_populations_df["NAME"] = state_populations_df["NAME"].str[1:]

In [10]:
states_with_population_df = states_df.merge(state_populations_df, on="NAME", how="left")
states_with_population_df = states_with_population_df[
    ["STUSPS", "POPULATION", "geometry", "NAME"]
]

In [11]:
states_with_population_df = states_with_population_df.dropna()

## Merge Data

In [18]:
popeyes_counts_df = pd.DataFrame(
    popeyes_gdf.groupby("STUSPS").size(), columns=["POPEYES"]
).reset_index()

In [19]:
popeyes_per_state_df = states_with_population_df.merge(
    popeyes_counts_df, on="STUSPS", how="left"
)
popeyes_per_state_df = popeyes_per_state_df.fillna(0)

In [20]:
popeyes_per_state_df["per_1000"] = popeyes_per_state_df["POPEYES"] / (
    popeyes_per_state_df["POPULATION"] / 1000
)
popeyes_per_state_df["per_10k"] = popeyes_per_state_df["POPEYES"] / (
    popeyes_per_state_df["POPULATION"] / 10_000
)
popeyes_per_state_df["per_100k"] = popeyes_per_state_df["POPEYES"] / (
    popeyes_per_state_df["POPULATION"] / 100000
)
popeyes_per_state_df["per_500k"] = popeyes_per_state_df["POPEYES"] / (
    popeyes_per_state_df["POPULATION"] / 500_000
)
popeyes_per_state_df["per_1m"] = popeyes_per_state_df["POPEYES"] / (
    popeyes_per_state_df["POPULATION"] / 1_000_000
)
popeyes_per_state_df["per_capita"] = (
    popeyes_per_state_df["POPULATION"] / popeyes_per_state_df["POPEYES"]
)

In [21]:
popeyes_per_state_df = popeyes_per_state_df.to_crs(9311)

In [22]:
popeyes_per_state_df.to_file("data/popeyes_per_state.gpkg")