In [7]:
import json
import os
import random
import requests
import time

In [8]:
import geopandas as gpd
import h3
import pandas as pd

In [9]:
from shapely.geometry import Point
from tqdm.notebook import tqdm

## Get State Data

In [10]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

In [11]:
states_df = states_df[["STUSPS", "NAME", "geometry"]]

## Get Hunt Brothers Data

In [16]:
national_restaurant_list = []
res = 3
margin = 0.5
state_counts = {}

In [19]:
for i in tqdm(range(len(states_df)), desc="States"):
    state_restaurant_list = []

    state_code = states_df.iloc[i]["STUSPS"]
    if state_code in ["PR", "VI", "GU", "MP", "AS"]:
        continue

    # If File exists continue
    if os.path.isfile(f"data/states/{state_code}.gpkg"):
        continue

    state_shape = states_df.iloc[i]["geometry"]
    h3_shape = h3.geo_to_h3shape(state_shape)
    h3_cells = h3.h3shape_to_cells(h3_shape, res=res)
    parents = [
        h3.cell_to_children(parent)
        for parent in [h3.cell_to_parent(h3_cell) for h3_cell in h3_cells]
    ]
    h3_cells = list(set(sum(parents, [])))

    for h3_cell in tqdm(
        h3_cells, desc=f"Parsing state: {state_code} in Resolution: {res}"
    ):
        boundary_points = h3.cell_to_boundary(h3_cell)
        lats = [point[0] for point in boundary_points]
        lngs = [point[1] for point in boundary_points]

        url = "https://api.huntbrotherspizza.com/location/wp_search_result"
        params = {
            "latitude:range[start]": str(min(lats) - margin),
            "latitude:range[end]": str(max(lats) + margin),
            "longitude:range[start]": str(min(lngs) - margin),
            "longitude:range[end]": str(max(lngs) + margin),
            "published": True,
        }
        locations = requests.get(url, params=params)

        if not locations.text:
            continue

        # pattern = r"\[id\] => (\d+).*?\[latitude\] => ([\d\.-]+).*?\[longitude\] => ([\d\.-]+)"
        # matches = re.findall(pattern, locations.text, re.DOTALL)

        # for id_val, lat, lon in matches:
        #    store_dict = {"id": id_val, "geometry": Point(lon, lat)}
        #    state_restaurant_list.append(store_dict)
        #    national_restaurant_list.append(store_dict)

        stores = locations.json()["payload"]
        for store in stores:
            store_dict = {
                "id": store["id"],
                "geometry": Point(store["longitude"], store["latitude"]),
            }
            state_restaurant_list.append(store_dict)
            national_restaurant_list.append(store_dict)

        time.sleep(random.uniform(0.01, 0.5))

    if not state_restaurant_list:
        print(state_code)
        continue

    state_hunt_brothers = gpd.GeoDataFrame(state_restaurant_list, crs=4326)
    state_hunt_brothers = state_hunt_brothers.drop_duplicates("id")
    state_hunt_brothers = state_hunt_brothers[
        state_hunt_brothers.intersects(state_shape)
    ]
    state_hunt_brothers = state_hunt_brothers.reset_index(drop=True)
    state_hunt_brothers.to_file(f"data/states/{state_code}.gpkg")
    state_counts[state_code] = len(state_hunt_brothers)

States:   0%|          | 0/56 [00:00<?, ?it/s]

Parsing state: MA in Resolution: 3:   0%|          | 0/7 [00:00<?, ?it/s]

MA


Parsing state: WA in Resolution: 3:   0%|          | 0/35 [00:00<?, ?it/s]

WA


Parsing state: CA in Resolution: 3:   0%|          | 0/70 [00:00<?, ?it/s]

CA


Parsing state: AK in Resolution: 3:   0%|          | 0/203 [00:00<?, ?it/s]

AK


Parsing state: NV in Resolution: 3:   0%|          | 0/42 [00:00<?, ?it/s]

NV


Parsing state: NH in Resolution: 3:   0%|          | 0/7 [00:00<?, ?it/s]

NH


Parsing state: DC in Resolution: 3: 0it [00:00, ?it/s]

DC


Parsing state: NJ in Resolution: 3:   0%|          | 0/7 [00:00<?, ?it/s]

Parsing state: MD in Resolution: 3:   0%|          | 0/14 [00:00<?, ?it/s]

Parsing state: ME in Resolution: 3:   0%|          | 0/21 [00:00<?, ?it/s]

ME


Parsing state: HI in Resolution: 3:   0%|          | 0/14 [00:00<?, ?it/s]

HI


Parsing state: DE in Resolution: 3: 0it [00:00, ?it/s]

DE


Parsing state: RI in Resolution: 3:   0%|          | 0/7 [00:00<?, ?it/s]

RI


Parsing state: KY in Resolution: 3:   0%|          | 0/21 [00:00<?, ?it/s]

Parsing state: OH in Resolution: 3:   0%|          | 0/21 [00:00<?, ?it/s]

Parsing state: WI in Resolution: 3:   0%|          | 0/21 [00:00<?, ?it/s]

Parsing state: OR in Resolution: 3:   0%|          | 0/49 [00:00<?, ?it/s]

OR


Parsing state: ND in Resolution: 3:   0%|          | 0/35 [00:00<?, ?it/s]

Parsing state: AR in Resolution: 3:   0%|          | 0/21 [00:00<?, ?it/s]

Parsing state: IN in Resolution: 3:   0%|          | 0/28 [00:00<?, ?it/s]

Parsing state: MN in Resolution: 3:   0%|          | 0/49 [00:00<?, ?it/s]

Parsing state: CT in Resolution: 3:   0%|          | 0/7 [00:00<?, ?it/s]

In [23]:
national_hunt_brothers = gpd.GeoDataFrame(national_restaurant_list, crs=4326)
national_hunt_brothers = national_hunt_brothers.drop_duplicates("id")
national_hunt_brothers = national_hunt_brothers.reset_index(drop=True)
national_hunt_brothers.to_file(f"data/locations.gpkg")

In [59]:
national_hunt_brothers_gdfs = []
states_path = "data/states"
for file in os.listdir("data/states"):
    constructed_path = os.path.join(states_path, file)
    if constructed_path.endswith(".gpkg"):
        state_hunt_brother_gdf = gpd.read_file(constructed_path)
        national_hunt_brothers_gdfs.append(state_hunt_brother_gdf)

## Get Population Data

In [60]:
state_populations = pd.read_excel(
    "data/NST-EST2023-POP.xlsx", sheet_name=None, engine="openpyxl"
)

In [61]:
state_populations_df = state_populations["NST-EST2023-POP"][
    [
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)",
        "Unnamed: 5",
    ]
]
state_populations_df = state_populations_df.rename(
    columns={
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)": "NAME",
        "Unnamed: 5": "POPULATION",
    }
)
state_populations_df["NAME"] = state_populations_df["NAME"].str[1:]

In [62]:
states_with_population_df = states_df.merge(state_populations_df, on="NAME", how="left")
states_with_population_df = states_with_population_df[
    ["STUSPS", "POPULATION", "geometry", "NAME"]
]

In [63]:
states_with_population_df = states_with_population_df.dropna()

## Merge Data

In [64]:
hunt_brothers_per_state_counts_df = pd.Series(state_counts, name="stores")
hunt_brothers_per_state_counts_df = hunt_brothers_per_state_counts_df.reset_index()
hunt_brothers_per_state_counts_df = hunt_brothers_per_state_counts_df.rename(
    columns={"index": "STUSPS"}
)

In [65]:
hunt_brothers_per_state_df = states_with_population_df.merge(
    hunt_brothers_per_state_counts_df, on="STUSPS", how="left"
)
hunt_brothers_per_state_df = national_hunt_brothers.fillna(0)

In [66]:
hunt_brothers_per_state_df["per_1000"] = hunt_brothers_per_state_df["stores"] / (
    hunt_brothers_per_state_df["POPULATION"] / 1000
)
hunt_brothers_per_state_df["per_10k"] = hunt_brothers_per_state_df["stores"] / (
    hunt_brothers_per_state_df["POPULATION"] / 10_000
)
hunt_brothers_per_state_df["per_100k"] = hunt_brothers_per_state_df["stores"] / (
    hunt_brothers_per_state_df["POPULATION"] / 100000
)
hunt_brothers_per_state_df["per_500k"] = hunt_brothers_per_state_df["stores"] / (
    hunt_brothers_per_state_df["POPULATION"] / 500_000
)
hunt_brothers_per_state_df["per_1m"] = hunt_brothers_per_state_df["stores"] / (
    hunt_brothers_per_state_df["POPULATION"] / 1_000_000
)
hunt_brothers_per_state_df["per_capita"] = (
    hunt_brothers_per_state_df["POPULATION"] / hunt_brothers_per_state_df["stores"]
)

In [67]:
hunt_brothers_per_state_df = hunt_brothers_per_state_df.to_crs(9311)

In [68]:
hunt_brothers_per_state_df.to_file("data/hunt_brothers_per_state.gpkg")