In [15]:
import json
import os

In [16]:
import cloudscraper
from bs4 import BeautifulSoup
import geopandas as gpd
import pandas as pd

In [17]:
from shapely.geometry import Point
from tqdm.notebook import tqdm

## Get State Data

In [18]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

In [19]:
states_df = states_df[["STUSPS", "NAME", "geometry"]]

## Get WH Data

In [48]:
scraper = cloudscraper.create_scraper()

In [49]:
r = scraper.get("https://locations.wafflehouse.com/")
soup_state = BeautifulSoup(r.text, "html.parser")

In [50]:
locations = json.loads(
    soup_state.find("script", {"id": "__NEXT_DATA__", "type": "application/json"}).text
)["props"]["pageProps"]["locations"]

In [51]:
waffle_house_df = pd.DataFrame(locations)

In [61]:
waffle_house_locations_gdf = gpd.GeoDataFrame(
    waffle_house_df,
    geometry=gpd.points_from_xy(
        waffle_house_df["longitude"], waffle_house_df["latitude"]
    ),
    crs=4326,
)

In [62]:
waffle_house_locations_gdf.to_file("data/wafflehouse_locations.gpkg")

In [75]:
wafflehouse_counts_df = (
    pd.DataFrame(waffle_house_df.groupby("state").size())
    .reset_index()
    .rename(columns={"state": "STUSPS", 0: "WHS"})
)

## Get Population Data

In [66]:
state_populations = pd.read_excel(
    "data/NST-EST2024-POP.xlsx", sheet_name=None, engine="openpyxl"
)

In [67]:
state_populations_df = state_populations["NST-EST2024-POP"][
    [
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)",
        "Unnamed: 5",
    ]
]
state_populations_df = state_populations_df.rename(
    columns={
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)": "NAME",
        "Unnamed: 5": "POPULATION",
    }
)
state_populations_df["NAME"] = state_populations_df["NAME"].str[1:]

In [68]:
states_with_population_df = states_df.merge(state_populations_df, on="NAME", how="left")
states_with_population_df = states_with_population_df[
    ["STUSPS", "NAME", "POPULATION", "geometry"]
]

In [69]:
states_with_population_df = states_with_population_df.dropna()

## Merge Data

In [77]:
wafflehouse_counts_gdf = states_with_population_df.merge(
    wafflehouse_counts_df, on="STUSPS", how="left"
)
wafflehouse_counts_gdf = wafflehouse_counts_gdf.fillna(0)

In [78]:
wafflehouse_counts_gdf["per_100k"] = wafflehouse_counts_gdf["WHS"] / (
    wafflehouse_counts_gdf["POPULATION"] / 100000
)
wafflehouse_counts_gdf["per_1m"] = wafflehouse_counts_gdf["WHS"] / (
    wafflehouse_counts_gdf["POPULATION"] / 1_000_000
)

In [79]:
wafflehouse_counts_gdf = wafflehouse_counts_gdf.to_crs(9311)

In [80]:
wafflehouse_counts_gdf.to_file("data/whs_per_state.gpkg")