In [None]:
import os

In [None]:
import cloudscraper
import geopandas as gpd
import h3
import pandas as pd

In [3]:
from shapely.geometry import Point
from tqdm.notebook import tqdm

## Get State Data

In [4]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

In [5]:
states_df = states_df[["STUSPS", "NAME", "geometry"]]

## Get Population data

In [6]:
state_populations = pd.read_excel(
    "data/NST-EST2024-POP.xlsx", sheet_name=None, engine="openpyxl"
)

In [7]:
state_populations_df = state_populations["NST-EST2024-POP"][
    [
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)",
        "Unnamed: 5",
    ]
]
state_populations_df = state_populations_df.rename(
    columns={
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)": "NAME",
        "Unnamed: 5": "POPULATION",
    }
)
state_populations_df["NAME"] = state_populations_df["NAME"].str[1:]

In [8]:
states_with_population_df = states_df.merge(state_populations_df, on="NAME", how="left")
states_with_population_df = states_with_population_df[
    ["STUSPS", "NAME", "POPULATION", "geometry"]
]

## Get Royal Farms Data

In [22]:
scraper = cloudscraper.create_scraper()

In [26]:
url = "https://royalfarms.com/wp-admin/admin-ajax.php"
res = 4
for i in tqdm(range(len(states_df)), desc="States"):
    state_restaurant_list = []

    state_code = states_df.iloc[i]["STUSPS"]

    # Skip territories
    if state_code not in ["MD", "DE", "VA", "PA", "NJ", "WV", "NC"]:
        continue

    # If File exists continue
    if os.path.isfile(f"data/states/{state_code}.gpkg"):
        continue

    state_shape = states_df.iloc[i]["geometry"]
    h3_shape = h3.geo_to_h3shape(state_shape.buffer(distance=0.2))
    h3_cells = h3.h3shape_to_cells(h3_shape, res=res)
    for h3_cell in tqdm(
        h3_cells, desc=f"Parsing state: {state_code} in Resolution: {res}"
    ):
        lat, lng = h3.cell_to_latlng(h3_cell)
        params = {
            "action": "store_search",
            "lat": lat,
            "lng": lng,
            "max_results": 50,
            "search_radius": 500,
        }
        r = scraper.get(url=url, params=params)
        if not r.status_code == 200:
            continue
        else:
            json_data = r.json()
        if len(json_data) == 0:
            continue

        for store_json in json_data:
            store_dict = {
                "ID": store_json["id"],
                "geometry": Point(store_json["lng"], store_json["lat"]),
                "STUSPS": store_json["state"],
            }
            state_restaurant_list.append(store_dict)

    state_rf_gdf = gpd.GeoDataFrame(state_restaurant_list, crs=4326)
    state_rf_gdf = state_rf_gdf.drop_duplicates("ID")
    state_rf_gdf = state_rf_gdf[state_rf_gdf["STUSPS"] == state_code]
    state_rf_gdf.to_file(f"data/states/{state_code}.gpkg")

States:   0%|          | 0/56 [00:00<?, ?it/s]

Parsing state: NC in Resolution: 4:   0%|          | 0/113 [00:00<?, ?it/s]

Parsing state: VA in Resolution: 4:   0%|          | 0/91 [00:00<?, ?it/s]

Parsing state: WV in Resolution: 4:   0%|          | 0/58 [00:00<?, ?it/s]

Parsing state: PA in Resolution: 4:   0%|          | 0/85 [00:00<?, ?it/s]

Parsing state: NJ in Resolution: 4:   0%|          | 0/20 [00:00<?, ?it/s]

Parsing state: MD in Resolution: 4:   0%|          | 0/34 [00:00<?, ?it/s]

Parsing state: DE in Resolution: 4:   0%|          | 0/8 [00:00<?, ?it/s]

In [34]:
rf_gdfs = []
states_path = "data/states"
for file in os.listdir("data/states"):
    constructed_path = os.path.join(states_path, file)
    if constructed_path.endswith(".gpkg"):
        rf_gdf = gpd.read_file(constructed_path)
        rf_gdfs.append(rf_gdf)

In [35]:
rf_gdf = gpd.GeoDataFrame(pd.concat(rf_gdfs, ignore_index=True))
rf_gdf = rf_gdf.to_crs(3968)
rf_gdf.to_file(f"data/stores.gpkg")

In [36]:
rf_state_counts_df = (
    rf_gdf.groupby("STUSPS").size().reset_index().rename(columns={0: "RFs"})
)

## Combine With States

In [37]:
rf_state_counts_gdf = states_with_population_df.merge(
    rf_state_counts_df, on="STUSPS", how="left"
)

In [None]:
rf_state_counts_gdf = rf_state_counts_gdf.fillna(0)
rf_state_counts_gdf["RFs"] = rf_state_counts_gdf["RFs"].astype(int)

In [39]:
rf_state_counts_gdf["per_100k"] = rf_state_counts_gdf["RFs"] / (
    rf_state_counts_gdf["POPULATION"] / 100_000
)
rf_state_counts_gdf["per_1m"] = rf_state_counts_gdf["RFs"] / (
    rf_state_counts_gdf["POPULATION"] / 1_000_000
)

In [40]:
rf_state_counts_gdf = rf_state_counts_gdf.fillna(0)

In [42]:
rf_state_counts_gdf = rf_state_counts_gdf.to_crs(3968)
rf_state_counts_gdf.to_file(f"data/royal_farms_per_state.gpkg")