In [1]:
import cloudscraper
import geopandas as gpd
import pandas as pd

In [2]:
from shapely.geometry import Point

## Get State Data

In [3]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

In [4]:
states_df = states_df[["STUSPS", "NAME", "geometry"]]

## Get Crumbl Data

In [36]:
scraper = cloudscraper.create_scraper()

In [37]:
r = scraper.get(
    "https://crumblcookies.com/_next/data/Qfw251bnmrP52or8JVfsV/en-US/stores.json"
)

In [38]:
json_data = r.json()

In [39]:
stores_df = pd.DataFrame(json_data["pageProps"]["allActiveStores"])

In [40]:
stores_df["geometry"] = stores_df.apply(
    lambda row: Point(float(row["longitude"]), float(row["latitude"])), axis=1
)

In [41]:
stores_gdf = gpd.GeoDataFrame(stores_df, crs=4326)

In [42]:
stores_gdf = stores_gdf.to_crs(9311)
stores_gdf.to_file("data/stores.gpkg")

## Get Population Data

In [43]:
state_populations = pd.read_excel(
    "data/NST-EST2024-POP.xlsx", sheet_name=None, engine="openpyxl"
)

In [44]:
state_populations_df = state_populations["NST-EST2024-POP"][
    [
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)",
        "Unnamed: 5",
    ]
]
state_populations_df = state_populations_df.rename(
    columns={
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)": "NAME",
        "Unnamed: 5": "POPULATION",
    }
)
state_populations_df["NAME"] = state_populations_df["NAME"].str[1:]

In [45]:
states_with_population_df = states_df.merge(state_populations_df, on="NAME", how="left")
states_with_population_df = states_with_population_df[
    ["STUSPS", "NAME", "POPULATION", "geometry"]
]

In [46]:
states_with_population_df = states_with_population_df.dropna()

## Merge Data

In [51]:
crumble_cookies_state_df = pd.DataFrame(
    stores_gdf.groupby("stateInitials").size(), columns=["COUNTS"]
).reset_index(names="STUSPS")

In [52]:
crumble_cookies_state_gdf = states_with_population_df.merge(
    crumble_cookies_state_df, on="STUSPS", how="left"
)
crumble_cookies_state_gdf = crumble_cookies_state_gdf.fillna(0)

In [53]:
crumble_cookies_state_gdf["per_100k"] = crumble_cookies_state_gdf["COUNTS"] / (
    crumble_cookies_state_gdf["POPULATION"] / 100000
)
crumble_cookies_state_gdf["per_1m"] = crumble_cookies_state_gdf["COUNTS"] / (
    crumble_cookies_state_gdf["POPULATION"] / 1_000_000
)

In [54]:
crumble_cookies_state_gdf = crumble_cookies_state_gdf.to_crs(9311)

In [55]:
crumble_cookies_state_gdf.to_file("data/crumble_cookies_state.gpkg")