In [1]:
import os
import random
import time

In [2]:
import cloudscraper
from bs4 import BeautifulSoup
import geopandas as gpd
import pandas as pd

In [3]:
from shapely.geometry import Point
from tqdm.notebook import tqdm

## Get State Data

In [4]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

In [5]:
states_df = states_df[["STUSPS", "NAME", "geometry"]]

## Get Whataburger Data

In [6]:
scraper = cloudscraper.create_scraper()

In [8]:
r = scraper.get("https://locations.whataburger.com/directory.html")
soup = BeautifulSoup(r.text, "html.parser")
state_as = soup.find_all("a", {"class": "Directory-listLink"})

In [9]:
def get_coords(url: str) -> Point:
    r = scraper.get(url)
    soup = BeautifulSoup(r.text, "html.parser")
    latitude = float(soup.find("meta", {"itemprop": "latitude"}).attrs["content"])
    longitude = float(soup.find("meta", {"itemprop": "longitude"}).attrs["content"])
    return Point(longitude, latitude)

In [10]:
url_root = "https://locations.whataburger.com/"
for state_a in tqdm(state_as, desc="Parsing State"):
    state_store_list = []

    state_code = state_a.attrs["href"].split("/")[0].split(".")[0].upper()
    state_url = os.path.join(url_root, state_code.lower()) + ".html"

    # If File exists continue
    if os.path.isfile(f"data/states/{state_code}.gpkg"):
        continue

    r_state = scraper.get(state_url)
    state_soup = BeautifulSoup(r_state.text, "html.parser")
    city_as = state_soup.find_all("a", {"class": "Directory-listLink"})
    city_hrefs = [
        "/".join(city_a.attrs["href"].split(".")[0].split("/")[:2]) + ".html"
        for city_a in city_as
    ]

    for city_href in tqdm(city_hrefs, desc=f"Parsing Cities in {state_code}"):
        city_url = os.path.join(url_root, city_href)
        r_city = scraper.get(city_url)
        city_soup = BeautifulSoup(r_city.text, "html.parser")
        city_lis = city_soup.find_all(
            "li", {"class": "ResultList-item js-resultlist-item"}
        )
        for city_li in city_lis:
            try:
                store_url = city_li.attrs["data-url"].replace("../", url_root)
                point = get_coords(store_url)
                store_dict = {"STUSPS": state_code, "geometry": point, "url": store_url}
                state_store_list.append(store_dict)
                time.sleep(random.uniform(0.01, 0.5))
            except Exception:
                print(city_url)

    if state_store_list:
        print(len(state_store_list), "stores")
        whataburger_state_gdf = gpd.GeoDataFrame(state_store_list, crs=4326)
        whataburger_state_gdf.to_file(f"data/states/{state_code}.gpkg")

Parsing State:   0%|          | 0/17 [00:00<?, ?it/s]

Parsing Cities in AL:   0%|          | 0/34 [00:00<?, ?it/s]

42 stores


Parsing Cities in AZ:   0%|          | 0/16 [00:00<?, ?it/s]

42 stores


Parsing Cities in AR:   0%|          | 0/19 [00:00<?, ?it/s]

20 stores


Parsing Cities in CO:   0%|          | 0/3 [00:00<?, ?it/s]

8 stores


Parsing Cities in FL:   0%|          | 0/24 [00:00<?, ?it/s]

46 stores


Parsing Cities in GA:   0%|          | 0/27 [00:00<?, ?it/s]

32 stores


Parsing Cities in KS:   0%|          | 0/5 [00:00<?, ?it/s]

8 stores


Parsing Cities in LA:   0%|          | 0/19 [00:00<?, ?it/s]

30 stores


Parsing Cities in MS:   0%|          | 0/13 [00:00<?, ?it/s]

14 stores


Parsing Cities in MO:   0%|          | 0/15 [00:00<?, ?it/s]

20 stores


Parsing Cities in NV:   0%|          | 0/1 [00:00<?, ?it/s]

1 stores


Parsing Cities in NM:   0%|          | 0/6 [00:00<?, ?it/s]

16 stores


Parsing Cities in NC:   0%|          | 0/9 [00:00<?, ?it/s]

10 stores


Parsing Cities in OK:   0%|          | 0/29 [00:00<?, ?it/s]

48 stores


Parsing Cities in SC:   0%|          | 0/9 [00:00<?, ?it/s]

11 stores


Parsing Cities in TN:   0%|          | 0/17 [00:00<?, ?it/s]

26 stores


Parsing Cities in TX:   0%|          | 0/315 [00:00<?, ?it/s]

764 stores


In [11]:
whataburger_gdfs = []
states_path = "data/states"
for file in os.listdir("data/states"):
    constructed_path = os.path.join(states_path, file)
    if constructed_path.endswith(".gpkg"):
        whataburger_gdf = gpd.read_file(constructed_path)
        whataburger_gdfs.append(whataburger_gdf)

In [13]:
whataburger_gdf = gpd.GeoDataFrame(pd.concat(whataburger_gdfs, ignore_index=True))

In [14]:
whataburger_gdf.to_file("data/whataburger_locations.gpkg")

In [15]:
whataburger_gdf = whataburger_gdf.to_crs(9311)
whataburger_gdf.to_file(f"data/whataburger.gpkg")

## Get Population Data

In [16]:
state_populations = pd.read_excel(
    "data/NST-EST2024-POP.xlsx", sheet_name=None, engine="openpyxl"
)

In [17]:
state_populations_df = state_populations["NST-EST2024-POP"][
    [
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)",
        "Unnamed: 5",
    ]
]
state_populations_df = state_populations_df.rename(
    columns={
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)": "NAME",
        "Unnamed: 5": "POPULATION",
    }
)
state_populations_df["NAME"] = state_populations_df["NAME"].str[1:]

In [18]:
states_with_population_df = states_df.merge(state_populations_df, on="NAME", how="left")
states_with_population_df = states_with_population_df[
    ["STUSPS", "NAME", "POPULATION", "geometry"]
]

In [19]:
states_with_population_df = states_with_population_df.dropna()

## Merge Data

In [20]:
whataburger_state_counts_gdf = pd.DataFrame(
    whataburger_gdf.groupby("STUSPS").size(), columns=["WHATABURGERs"]
).reset_index()

In [21]:
whataburger_state_counts_population_gdf = states_with_population_df.merge(
    whataburger_state_counts_gdf, on="STUSPS", how="left"
)
whataburger_state_counts_population_gdf = (
    whataburger_state_counts_population_gdf.fillna(0)
)

In [22]:
whataburger_state_counts_population_gdf[
    "per_100k"
] = whataburger_state_counts_population_gdf["WHATABURGERs"] / (
    whataburger_state_counts_population_gdf["POPULATION"] / 100000
)
whataburger_state_counts_population_gdf[
    "per_1m"
] = whataburger_state_counts_population_gdf["WHATABURGERs"] / (
    whataburger_state_counts_population_gdf["POPULATION"] / 1_000_000
)

In [23]:
whataburger_state_counts_population_gdf = (
    whataburger_state_counts_population_gdf.to_crs(9311)
)

In [24]:
whataburger_state_counts_population_gdf.to_file("data/whataburgers_per_state.gpkg")