In [9]:
import json
import os
import random
import time

In [10]:
import cloudscraper
from bs4 import BeautifulSoup
import geopandas as gpd
import pandas as pd

In [11]:
from shapely.geometry import Point
from tqdm.notebook import tqdm

## Get State Data

In [12]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

In [13]:
states_df = states_df[["STUSPS", "NAME", "geometry"]]

## Get IHOP's Data

In [14]:
scraper = cloudscraper.create_scraper()

In [15]:
def get_coords(store_url: str) -> Point:
    r = scraper.get(store_url)
    soup = BeautifulSoup(r.text, "html.parser")
    div = soup.find("div", {"id": "locator-content"})
    store_json = json.loads(
        div.find_all("script", {"type": "application/ld+json"})[0].text
    )[0]
    lat = float(store_json["geo"]["latitude"])
    lon = float(store_json["geo"]["longitude"])
    return Point(lon, lat)

In [16]:
url_root = "https://restaurants.ihop.com/en-us/"

# Parse all locations in a state
for i in tqdm(range(len(states_df)), desc="Parsing States"):
    state_code = states_df.iloc[i]["STUSPS"].lower()
    state_url = f"{url_root}{state_code}/"
    state_store_list = []

    # If File exists continue
    if os.path.isfile(f"data/states/{state_code}.gpkg"):
        continue

    r_state = scraper.get(state_url)

    if r_state.status_code != 200:
        print(state_code)
        continue

    soup_state = BeautifulSoup(r_state.text, "html.parser")
    location_lis = soup_state.find_all("li", {"class": "map-list-item-wrap is-single"})
    city_as = [location_li.find("a") for location_li in location_lis]

    for city_a in tqdm(city_as, desc=f"Parsing Locations in {state_code}"):
        city_url = city_a.attrs["href"]
        r_city = scraper.get(city_url)
        soup_city = BeautifulSoup(r_city.text, "html.parser")
        store_divs = soup_city.find_all("div", {"class": "map-list-item-header"})
        store_as = [store_div.find("a") for store_div in store_divs]

        for store_a in store_as:
            try:
                store_url = store_url = store_a.attrs["href"]
                point = get_coords(store_url)
            except Exception as e:
                print(store_url, e)
                continue
            store_dict = {"STUSPS": state_code, "geometry": point}
            state_store_list.append(store_dict)
            time.sleep(random.uniform(0.01, 0.5))

    if state_store_list:
        print(len(state_store_list), "stores")
        state_IHOP_gdf = gpd.GeoDataFrame(state_store_list, crs=4326)
        state_IHOP_gdf.to_file(f"data/states/{state_code}.gpkg")

Parsing States:   0%|          | 0/56 [00:00<?, ?it/s]

Parsing Locations in ms:   0%|          | 0/12 [00:00<?, ?it/s]

13 stores


Parsing Locations in nc:   0%|          | 0/40 [00:00<?, ?it/s]

https://restaurants.ihop.com/en-us/nc/garner/breakfast-1450-us-hwy-70-w-4434 list index out of range
https://restaurants.ihop.com/en-us/nc/kannapolis/breakfast-800-cloverleaf-plaza-4417 list index out of range
https://restaurants.ihop.com/en-us/nc/knightdale/breakfast-6707-knightdale-blvd-3180 list index out of range
https://restaurants.ihop.com/en-us/nc/rockingham/breakfast-714-w-us-74-hwy-3782 list index out of range
48 stores


Parsing Locations in ok:   0%|          | 0/23 [00:00<?, ?it/s]

https://restaurants.ihop.com/en-us/ok/tulsa/breakfast-3130-s-memorial-dr-1488 list index out of range
34 stores


Parsing Locations in va:   0%|          | 0/48 [00:00<?, ?it/s]

https://restaurants.ihop.com/en-us/va/chesapeake/breakfast-2501-taylor-rd-575 list index out of range
https://restaurants.ihop.com/en-us/va/chester/breakfast-12251-bermuda-cross-rd-ln-4507 list index out of range
https://restaurants.ihop.com/en-us/va/dumfries/breakfast-3914-fettler-park-dr-3289 list index out of range
https://restaurants.ihop.com/en-us/va/manassas/breakfast-8785-centreville-rd-564 list index out of range
63 stores


Parsing Locations in wv:   0%|          | 0/9 [00:00<?, ?it/s]

https://restaurants.ihop.com/en-us/wv/charleston/breakfast-6308-maccorkle-ave-se-4502 list index out of range
9 stores


Parsing Locations in la:   0%|          | 0/21 [00:00<?, ?it/s]

https://restaurants.ihop.com/en-us/la/baton-rouge/breakfast-3006-college-dr-1930 list index out of range
https://restaurants.ihop.com/en-us/la/baton-rouge/breakfast-3680-harding-blvd-2034 list index out of range
https://restaurants.ihop.com/en-us/la/kenner/breakfast-3400-williams-blvd-3112 list index out of range
https://restaurants.ihop.com/en-us/la/thibodaux/breakfast-632-n-canal-blvd-3779 list index out of range
25 stores


Parsing Locations in mi:   0%|          | 0/27 [00:00<?, ?it/s]

https://restaurants.ihop.com/en-us/mi/detroit/breakfast-333-e.-jefferson-ave-3591 list index out of range
27 stores


Parsing Locations in ma:   0%|          | 0/21 [00:00<?, ?it/s]

https://restaurants.ihop.com/en-us/ma/brighton/breakfast-1850-soldiers-field-rd-4725 list index out of range
https://restaurants.ihop.com/en-us/ma/north-dartmouth/breakfast-47-faunce-corner-rd-2012 list index out of range
https://restaurants.ihop.com/en-us/ma/northborough/breakfast-4102-shops-way-3426 list index out of range
https://restaurants.ihop.com/en-us/ma/revere/breakfast-105-squire-rd-4732 list index out of range
17 stores


Parsing Locations in id:   0%|          | 0/6 [00:00<?, ?it/s]

7 stores


Parsing Locations in fl:   0%|          | 0/96 [00:00<?, ?it/s]

https://restaurants.ihop.com/en-us/fl/homestead/breakfast-35-south-homestead-blvd-360242 list index out of range
https://restaurants.ihop.com/en-us/fl/orlando/breakfast-11793-international-drive-360229 list index out of range
https://restaurants.ihop.com/en-us/fl/orlando/breakfast-12400-s-apopka-vineland-road-360114 list index out of range
https://restaurants.ihop.com/en-us/fl/pinellas-park/breakfast-4671-park-blvd-360227 list index out of range
https://restaurants.ihop.com/en-us/fl/tamarac/breakfast-6101-n-university-dr-360063 list index out of range
https://restaurants.ihop.com/en-us/fl/temple-terrace/breakfast-11710-morris-bridge-road-360180 list index out of range
137 stores


Parsing Locations in ne:   0%|          | 0/5 [00:00<?, ?it/s]

https://restaurants.ihop.com/en-us/ne/bellevue/breakfast-1503-cornhusker-rd-5331 list index out of range
https://restaurants.ihop.com/en-us/ne/lincoln/breakfast-4501-n-27th-st-5326 list index out of range
5 stores


Parsing Locations in wa:   0%|          | 0/28 [00:00<?, ?it/s]

https://restaurants.ihop.com/en-us/wa/kent/breakfast-24060-104th-ave-se-3708 list index out of range
36 stores


Parsing Locations in nm:   0%|          | 0/12 [00:00<?, ?it/s]

15 stores


Parsing Locations in pr:   0%|          | 0/6 [00:00<?, ?it/s]

6 stores


Parsing Locations in sd:   0%|          | 0/2 [00:00<?, ?it/s]

2 stores


Parsing Locations in tx:   0%|          | 0/124 [00:00<?, ?it/s]

https://restaurants.ihop.com/en-us/tx/arlington/breakfast-1105-w-interstate-hwy-20-1432 list index out of range
https://restaurants.ihop.com/en-us/tx/beaumont/breakfast-5875-eastex-frwy-1900 list index out of range
https://restaurants.ihop.com/en-us/tx/grand-prairie/breakfast-5252-s.-state-hwy-360-3508 list index out of range
https://restaurants.ihop.com/en-us/tx/grand-prairie/breakfast-4103-s-carrier-pkwy-1931 list index out of range
https://restaurants.ihop.com/en-us/tx/harker-heights/breakfast-170-east-central-texas-expy-3028 list index out of range
https://restaurants.ihop.com/en-us/tx/harlingen/breakfast-2105-w-lincoln-st-1923 list index out of range
https://restaurants.ihop.com/en-us/tx/lufkin/breakfast-4400-s-medford-dr-1474 list index out of range
https://restaurants.ihop.com/en-us/tx/mc-allen/breakfast-4321-n-10th-st-1902 list index out of range
https://restaurants.ihop.com/en-us/tx/missouri-city/breakfast-9220-hwy-6-3598 list index out of range
https://restaurants.ihop.com/en

Parsing Locations in ca:   0%|          | 0/169 [00:00<?, ?it/s]

https://restaurants.ihop.com/en-us/ca/fontana/breakfast-9781-sierra-ave-3457 list index out of range
https://restaurants.ihop.com/en-us/ca/hayward/breakfast-22222-foothill-blvd-601 list index out of range
https://restaurants.ihop.com/en-us/ca/los-angeles/breakfast-5655-wilshire-blvd-796 list index out of range
https://restaurants.ihop.com/en-us/ca/los-angeles/breakfast-800-s-flower-st-797 list index out of range
https://restaurants.ihop.com/en-us/ca/menifee/breakfast-26035-newport-rd-2021 list index out of range
https://restaurants.ihop.com/en-us/ca/moreno-valley/breakfast-24318-hemlock-ave-760 list index out of range
https://restaurants.ihop.com/en-us/ca/riverside/breakfast-10301-magnolia-ave-941 list index out of range
https://restaurants.ihop.com/en-us/ca/sacramento/breakfast-3001-n-st-3378 list index out of range


KeyboardInterrupt: 

In [37]:
dennys_state_gdfs = []
states_path = "data/states"
for file in os.listdir("data/states"):
    constructed_path = os.path.join(states_path, file)
    if constructed_path.endswith(".gpkg"):
        dennys_state_gdf = gpd.read_file(constructed_path)
        dennys_state_gdfs.append(dennys_state_gdf)

In [38]:
dennys_gdf = gpd.GeoDataFrame(pd.concat(dennys_state_gdfs, ignore_index=True))

In [46]:
dennys_gdf = dennys_gdf.to_crs(9311)
dennys_gdf.to_file(f"data/dennys.gpkg")

## Get Population Data

In [40]:
state_populations = pd.read_excel(
    "data/NST-EST2024-POP.xlsx", sheet_name=None, engine="openpyxl"
)

In [41]:
state_populations_df = state_populations["NST-EST2024-POP"][
    [
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)",
        "Unnamed: 5",
    ]
]
state_populations_df = state_populations_df.rename(
    columns={
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)": "NAME",
        "Unnamed: 5": "POPULATION",
    }
)
state_populations_df["NAME"] = state_populations_df["NAME"].str[1:]

In [42]:
states_with_population_df = states_df.merge(state_populations_df, on="NAME", how="left")
states_with_population_df = states_with_population_df[
    ["STUSPS", "NAME", "POPULATION", "geometry"]
]

In [43]:
states_with_population_df = states_with_population_df.dropna()

## Merge Data

In [52]:
dennys_state_counts_df = pd.DataFrame(
    dennys_gdf.groupby("STUSPS").size(), columns=["DENNYS"]
).reset_index()

In [53]:
dennys_state_counts_df = states_with_population_df.merge(
    dennys_state_counts_df, on="STUSPS", how="left"
)
dennys_state_counts_df = dennys_state_counts_df.fillna(0)

In [54]:
dennys_state_counts_df["per_1000"] = dennys_state_counts_df["DENNYS"] / (
    dennys_state_counts_df["POPULATION"] / 1000
)
dennys_state_counts_df["per_10k"] = dennys_state_counts_df["DENNYS"] / (
    dennys_state_counts_df["POPULATION"] / 10_000
)
dennys_state_counts_df["per_100k"] = dennys_state_counts_df["DENNYS"] / (
    dennys_state_counts_df["POPULATION"] / 100000
)
dennys_state_counts_df["per_500k"] = dennys_state_counts_df["DENNYS"] / (
    dennys_state_counts_df["POPULATION"] / 500_000
)
dennys_state_counts_df["per_1m"] = dennys_state_counts_df["DENNYS"] / (
    dennys_state_counts_df["POPULATION"] / 1_000_000
)
dennys_state_counts_df["per_capita"] = (
    dennys_state_counts_df["POPULATION"] / dennys_state_counts_df["DENNYS"]
)

In [55]:
dennys_state_counts_df = dennys_state_counts_df.to_crs(9311)

In [56]:
dennys_state_counts_df.to_file("data/dennys_per_state.gpkg")