In [9]:
import json
import os
import re
import random
import time

In [10]:
import cloudscraper
from bs4 import BeautifulSoup
import geopandas as gpd
import pandas as pd

In [11]:
from shapely.geometry import Point
from tqdm.notebook import tqdm

## Get State Data

In [12]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

In [13]:
states_df = states_df[["STUSPS", "NAME", "geometry"]]

## Get Denny's Data

In [14]:
scraper = cloudscraper.create_scraper()

In [15]:
def get_coords(store_url: str) -> Point:
    r = scraper.get(store_url)
    soup = BeautifulSoup(r.text, "html.parser")
    store_json = json.loads(soup.find("main").find("script").text)
    lat = float(store_json["geo"]["latitude"])
    lon = float(store_json["geo"]["longitude"])
    return Point(lon, lat)

In [16]:
r = scraper.get("https://locations.dennys.com/")

soup = BeautifulSoup(r.text, "html.parser")
divs = soup.find_all("div", {"class": "states-group-list"})
state_as = sum([div.find_all("a") for div in divs], [])

# Parse all locations in a state
for state_a in tqdm(state_as, desc="Parsing States"):
    state_store_list = []
    state_url = state_a.attrs["href"]
    state_code = state_url.split("/")[-1]

    # If File exists continue
    if os.path.isfile(f"data/states/{state_code}.gpkg"):
        continue

    r_state = scraper.get(state_url)

    if r_state.status_code != 200:
        print(state_code)
        continue

    soup_state = BeautifulSoup(r_state.text, "html.parser")
    city_divs = soup_state.find_all(
        "div", {"class": "city-name col-xs-12 col-sm-6 col-md-4 col-lg-3"}
    )
    city_as = [city_div.find("a") for city_div in city_divs]

    for city_a in tqdm(city_as, desc=f"Parsing Locations in {state_code}"):
        city_url = city_a.attrs["href"]
        r_city = scraper.get(city_url)
        soup_city = BeautifulSoup(r_city.text, "html.parser")
        store_divs = soup_city.find_all("div", {"class": "gtm-store shadow"})
        store_as = [store_div.find("a") for store_div in store_divs]

        for store_a in store_as:
            try:
                store_url = store_a.attrs["href"]
                point = get_coords(store_url)
            except Exception as e:
                print(store_url, e)
            store_dict = {"STUSPS": state_code, "geometry": point}
            state_store_list.append(store_dict)
            time.sleep(random.uniform(0.01, 0.5))

    if state_store_list:
        print(len(state_store_list), "stores")
        state_dennys_gdf = gpd.GeoDataFrame(state_store_list, crs=4326)
        state_dennys_gdf.to_file(f"data/states/{state_code}.gpkg")

Parsing States:   0%|          | 0/50 [00:00<?, ?it/s]

Parsing Locations in AL:   0%|          | 0/3 [00:00<?, ?it/s]

3 stores


Parsing Locations in AK:   0%|          | 0/1 [00:00<?, ?it/s]

1 stores


Parsing Locations in AZ:   0%|          | 0/45 [00:00<?, ?it/s]

79 stores


Parsing Locations in AR:   0%|          | 0/7 [00:00<?, ?it/s]

8 stores


Parsing Locations in CA:   0%|          | 0/240 [00:00<?, ?it/s]

355 stores


Parsing Locations in CO:   0%|          | 0/14 [00:00<?, ?it/s]

18 stores


Parsing Locations in CT:   0%|          | 0/5 [00:00<?, ?it/s]

5 stores


Parsing Locations in DC:   0%|          | 0/1 [00:00<?, ?it/s]

2 stores


Parsing Locations in FL:   0%|          | 0/80 [00:00<?, ?it/s]

115 stores


Parsing Locations in GA:   0%|          | 0/10 [00:00<?, ?it/s]

10 stores


Parsing Locations in HI:   0%|          | 0/4 [00:00<?, ?it/s]

5 stores


Parsing Locations in ID:   0%|          | 0/7 [00:00<?, ?it/s]

7 stores


Parsing Locations in IL:   0%|          | 0/43 [00:00<?, ?it/s]

43 stores


Parsing Locations in IN:   0%|          | 0/20 [00:00<?, ?it/s]

29 stores


Parsing Locations in IA:   0%|          | 0/3 [00:00<?, ?it/s]

3 stores


Parsing Locations in KS:   0%|          | 0/4 [00:00<?, ?it/s]

4 stores


Parsing Locations in KY:   0%|          | 0/9 [00:00<?, ?it/s]

9 stores


Parsing Locations in LA:   0%|          | 0/6 [00:00<?, ?it/s]

6 stores


Parsing Locations in ME:   0%|          | 0/2 [00:00<?, ?it/s]

2 stores


Parsing Locations in MD:   0%|          | 0/21 [00:00<?, ?it/s]

22 stores


Parsing Locations in MA:   0%|          | 0/4 [00:00<?, ?it/s]

4 stores


Parsing Locations in MI:   0%|          | 0/12 [00:00<?, ?it/s]

12 stores


Parsing Locations in MN:   0%|          | 0/13 [00:00<?, ?it/s]

13 stores


Parsing Locations in MS:   0%|          | 0/5 [00:00<?, ?it/s]

5 stores


Parsing Locations in MO:   0%|          | 0/22 [00:00<?, ?it/s]

26 stores


Parsing Locations in MT:   0%|          | 0/2 [00:00<?, ?it/s]

2 stores


Parsing Locations in NE:   0%|          | 0/3 [00:00<?, ?it/s]

3 stores


Parsing Locations in NV:   0%|          | 0/13 [00:00<?, ?it/s]

40 stores


Parsing Locations in NH:   0%|          | 0/2 [00:00<?, ?it/s]

2 stores


Parsing Locations in NJ:   0%|          | 0/6 [00:00<?, ?it/s]

6 stores


Parsing Locations in NM:   0%|          | 0/22 [00:00<?, ?it/s]

29 stores


Parsing Locations in NY:   0%|          | 0/32 [00:00<?, ?it/s]

33 stores


Parsing Locations in NC:   0%|          | 0/16 [00:00<?, ?it/s]

17 stores


Parsing Locations in ND:   0%|          | 0/3 [00:00<?, ?it/s]

3 stores


Parsing Locations in OH:   0%|          | 0/23 [00:00<?, ?it/s]

24 stores


Parsing Locations in OK:   0%|          | 0/4 [00:00<?, ?it/s]

7 stores


Parsing Locations in OR:   0%|          | 0/15 [00:00<?, ?it/s]

18 stores


Parsing Locations in PA:   0%|          | 0/30 [00:00<?, ?it/s]

33 stores


Parsing Locations in RI:   0%|          | 0/2 [00:00<?, ?it/s]

2 stores


Parsing Locations in SC:   0%|          | 0/8 [00:00<?, ?it/s]

9 stores


Parsing Locations in SD:   0%|          | 0/1 [00:00<?, ?it/s]

1 stores


Parsing Locations in TN:   0%|          | 0/4 [00:00<?, ?it/s]

4 stores


Parsing Locations in TX:   0%|          | 0/115 [00:00<?, ?it/s]

195 stores


Parsing Locations in UT:   0%|          | 0/18 [00:00<?, ?it/s]

21 stores


Parsing Locations in VT:   0%|          | 0/1 [00:00<?, ?it/s]

1 stores


Parsing Locations in VA:   0%|          | 0/18 [00:00<?, ?it/s]

18 stores


Parsing Locations in WA:   0%|          | 0/35 [00:00<?, ?it/s]

41 stores


Parsing Locations in WV:   0%|          | 0/2 [00:00<?, ?it/s]

2 stores


Parsing Locations in WI:   0%|          | 0/20 [00:00<?, ?it/s]

23 stores


Parsing Locations in WY:   0%|          | 0/4 [00:00<?, ?it/s]

4 stores


In [17]:
dennys_state_gdfs = []
states_path = "data/states"
for file in os.listdir("data/states"):
    constructed_path = os.path.join(states_path, file)
    if constructed_path.endswith(".gpkg"):
        dennys_state_gdf = gpd.read_file(constructed_path)
        dennys_state_gdfs.append(dennys_state_gdf)

In [18]:
dennys_gdf = gpd.GeoDataFrame(pd.concat(dennys_state_gdfs, ignore_index=True))

In [19]:
dennys_gdf = dennys_gdf.to_crs(9311)
dennys_gdf.to_file(f"data/dennys.gpkg")

## Get Population Data

In [24]:
state_populations = pd.read_excel(
    "data/NST-EST2024-POP.xlsx", sheet_name=None, engine="openpyxl"
)

In [25]:
state_populations_df = state_populations["NST-EST2024-POP"][
    [
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)",
        "Unnamed: 5",
    ]
]
state_populations_df = state_populations_df.rename(
    columns={
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)": "NAME",
        "Unnamed: 5": "POPULATION",
    }
)
state_populations_df["NAME"] = state_populations_df["NAME"].str[1:]

In [26]:
states_with_population_df = states_df.merge(state_populations_df, on="NAME", how="left")
states_with_population_df = states_with_population_df[
    ["STUSPS", "NAME", "POPULATION", "geometry"]
]

In [27]:
states_with_population_df = states_with_population_df.dropna()

## Merge Data

In [28]:
dennys_state_counts_df = pd.DataFrame(
    dennys_gdf.groupby("STUSPS").size(), columns=["DENNYS"]
).reset_index()

In [29]:
dennys_state_counts_df = states_with_population_df.merge(
    dennys_state_counts_df, on="STUSPS", how="left"
)
dennys_state_counts_df = dennys_state_counts_df.fillna(0)

In [30]:
dennys_state_counts_df["per_1000"] = dennys_state_counts_df["DENNYS"] / (
    dennys_state_counts_df["POPULATION"] / 1000
)
dennys_state_counts_df["per_10k"] = dennys_state_counts_df["DENNYS"] / (
    dennys_state_counts_df["POPULATION"] / 10_000
)
dennys_state_counts_df["per_100k"] = dennys_state_counts_df["DENNYS"] / (
    dennys_state_counts_df["POPULATION"] / 100000
)
dennys_state_counts_df["per_500k"] = dennys_state_counts_df["DENNYS"] / (
    dennys_state_counts_df["POPULATION"] / 500_000
)
dennys_state_counts_df["per_1m"] = dennys_state_counts_df["DENNYS"] / (
    dennys_state_counts_df["POPULATION"] / 1_000_000
)
dennys_state_counts_df["per_capita"] = (
    dennys_state_counts_df["POPULATION"] / dennys_state_counts_df["DENNYS"]
)

In [31]:
dennys_state_counts_df = dennys_state_counts_df.to_crs(9311)

In [32]:
dennys_state_counts_df.to_file("data/dennys_per_state.gpkg")