In [2]:
import json
import os
import re

In [3]:
import cloudscraper
from bs4 import BeautifulSoup
import geopandas as gpd
import pandas as pd

In [4]:
from shapely.geometry import Point
from tqdm.notebook import tqdm

## Get State Data

In [5]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

In [6]:
states_df = states_df[["STUSPS", "NAME", "geometry"]]

## Get Perkins Data

In [7]:
scraper = cloudscraper.create_scraper()

In [8]:
r = scraper.get("https://www.perkinsrestaurants.com/locations/")
soup = BeautifulSoup(r.text, "html.parser")

In [9]:
stores_text = soup.find_all("script")[2].text

In [10]:
content_regex = r"content:\{(.*?)\},path:"
contents = re.findall(content_regex, stores_text)

In [11]:
path_regex = r'path:"(.*?)"'
paths = re.findall(path_regex, stores_text)

In [13]:
store_list = []
for i, content in enumerate(contents):
    latlon_regex = re.compile(
        r'latitude:(?:"-?[\d\.]+"|-?[\d\.]+),longitude:(?:"-?[\d\.]+"|-?[\d\.]+)'
    )
    match = latlon_regex.search(content)
    if not match:
        print("No Coords")
        continue
    split_lat, split_lon = match.group(0).split(",")
    lat = float(split_lat.split(":")[-1].replace('"', ""))
    lon = float(split_lon.split(":")[-1].replace('"', ""))
    geometry = Point(float(lon), float(lat))

    path = paths[i]
    if not path:
        print(i, "No Path")
        continue
    else:
        state_code = path.encode("utf-8").decode("unicode_escape").split("/")[2].upper()
        store_dict = {"geometry": geometry, "STUSPS": state_code}
        store_list.append(store_dict)

No Coords


In [14]:
perkins_location_gdf = (
    gpd.GeoDataFrame(store_list, crs=4326).drop_duplicates().reset_index(drop=True)
)

In [15]:
perkins_location_gdf.to_file("data/perkins.gpkg")

## Get Population Data

In [16]:
state_populations = pd.read_excel(
    "data/NST-EST2024-POP.xlsx", sheet_name=None, engine="openpyxl"
)

In [17]:
state_populations_df = state_populations["NST-EST2024-POP"][
    [
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)",
        "Unnamed: 5",
    ]
]
state_populations_df = state_populations_df.rename(
    columns={
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)": "NAME",
        "Unnamed: 5": "POPULATION",
    }
)
state_populations_df["NAME"] = state_populations_df["NAME"].str[1:]

In [18]:
states_with_population_df = states_df.merge(state_populations_df, on="NAME", how="left")
states_with_population_df = states_with_population_df[
    ["STUSPS", "NAME", "POPULATION", "geometry"]
]

In [19]:
states_with_population_df = states_with_population_df.dropna()
states_with_population_df = states_with_population_df.to_crs(perkins_location_gdf.crs)

## Merge Data

In [24]:
perkins_count_df = pd.DataFrame(
    {"stores": perkins_location_gdf.groupby("STUSPS").size()}
).reset_index()

In [26]:
perkins_count_gdf = states_with_population_df.merge(
    perkins_count_df, on="STUSPS", how="left"
)
perkins_count_gdf = perkins_count_gdf.fillna(0)

In [27]:
perkins_count_gdf["per_100k"] = perkins_count_gdf["stores"] / (
    perkins_count_gdf["POPULATION"] / 100000
)
perkins_count_gdf["per_1m"] = perkins_count_gdf["stores"] / (
    perkins_count_gdf["POPULATION"] / 1_000_000
)

In [28]:
perkins_count_gdf = perkins_count_gdf.to_crs(9311)

In [29]:
perkins_count_gdf.to_file("data/perkins_per_state.gpkg")