In [1]:
import json
import requests

In [2]:
from bs4 import BeautifulSoup
import geopandas as gpd
import pandas as pd

In [3]:
from shapely.geometry import Point

## Open GIS Data

In [4]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

## Get State Population data

In [5]:
state_populations = pd.read_excel(
    "data/NST-EST2024-POP.xlsx", sheet_name=None, engine="openpyxl"
)

In [6]:
state_populations_df = state_populations["NST-EST2024-POP"][
    [
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)",
        "Unnamed: 5",
    ]
]
state_populations_df = state_populations_df.rename(
    columns={
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)": "NAME",
        "Unnamed: 5": "POPULATION",
    }
)
state_populations_df["NAME"] = state_populations_df["NAME"].str[1:]

In [7]:
states_with_population_df = states_df.merge(state_populations_df, on="NAME", how="left")
states_with_population_df = states_with_population_df[
    ["STUSPS", "POPULATION", "geometry"]
]

In [8]:
states_with_population_df = states_with_population_df.dropna()

## Get Roy Rodgers data

In [10]:
r = requests.get("https://www.royrogersrestaurants.com/locations")

In [11]:
soup = BeautifulSoup(r.text, "html.parser")

In [12]:
roy_rodgers_json = json.loads(
    soup.find("script", {"data-drupal-selector": "drupal-settings-json"}).text
)

In [13]:
roy_rodgers_gdf = gpd.GeoDataFrame.from_features(
    roy_rodgers_json["geofield_google_map"]["geofield-map-view-locations-block-3"][
        "data"
    ]["features"],
    crs=4326,
)

In [14]:
roy_rodgers_gdf[["geometry"]].to_file("data/roy_rodgers.gpkg")

In [15]:
def find_state(data: dict) -> str:
    try:
        soup = BeautifulSoup(data["field_phone_number"], "html.parser")
        state_code = soup.find("span", {"class": "administrative-area"}).text
        return state_code
    except TypeError:
        return None

In [16]:
roy_rodgers_gdf["STUSPS"] = roy_rodgers_gdf.apply(
    lambda row: find_state(row["data"]), axis=1
)

In [17]:
roy_rodgers_gdf.loc[roy_rodgers_gdf.index == 25, "STUSPS"] = "NJ"
roy_rodgers_gdf.loc[roy_rodgers_gdf.index == 39, "STUSPS"] = "VA"

In [19]:
roy_rodgers_counts_gdf = roy_rodgers_gdf.groupby("STUSPS").size().reset_index()
roy_rodgers_counts_gdf = roy_rodgers_counts_gdf.rename(columns={0: "ROY_RODGERS"})

In [21]:
roy_rodgers_counts_gdf

Unnamed: 0,STUSPS,ROY_RODGERS
0,MD,22
1,NJ,3
2,PA,7
3,VA,8
4,WV,1


## Merge data

In [23]:
states_with_population_with_totals_df = states_with_population_df.merge(
    roy_rodgers_counts_gdf, on="STUSPS", how="inner"
)

In [25]:
states_with_population_with_totals_df["people_per_store"] = (
    (
        states_with_population_with_totals_df["POPULATION"]
        / states_with_population_with_totals_df["ROY_RODGERS"]
    )
    .round(decimals=0)
    .astype(int)
)
states_with_population_with_totals_df[
    "stores_per_1m"
] = states_with_population_with_totals_df["ROY_RODGERS"] / (
    states_with_population_with_totals_df["POPULATION"] / 1_000_000
)

In [26]:
states_with_population_with_totals_df = states_with_population_with_totals_df[
    ["geometry", "ROY_RODGERS", "people_per_store", "stores_per_1m"]
]

## Export

In [28]:
states_with_population_with_totals_df = states_with_population_with_totals_df.to_crs(
    32115
)

In [29]:
states_with_population_with_totals_df.to_file("data/states_with_counts.gpkg")