In [2]:
import json
import time

In [3]:
import cloudscraper
import geopandas as gpd
import pandas as pd

In [4]:
from shapely.geometry import Point
from tqdm.notebook import tqdm

In [5]:
import h3

## Get State Data

In [43]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

In [44]:
states_df = states_df[["STUSPS", "NAME", "geometry"]]

In [45]:
states_df = states_df.replace("United States Virgin Islands", "Virgin Islands")

In [55]:
country_shape_minus_alaska = states_df[states_df["STUSPS"] != "AK"][
    "geometry"
].union_all()

In [56]:
h3_shape = h3.geo_to_h3shape(country_shape_minus_alaska)
h3_cells = h3.h3shape_to_cells(h3_shape, res=3)

## Get Texas Roadhouse Data

In [58]:
scraper = cloudscraper.create_scraper()

In [61]:
restaurant_list = []
for h3_cell in tqdm(h3_cells, desc="Parsing H3 Cells"):
    y, x = h3.cell_to_latlng(h3_cell)
    url = (
        "https://www.texasroadhouse.com/restaurants/near?lat="
        + str(y)
        + "&long="
        + str(x)
        + "&radius=500&limit=100&nomnom=calendars&nomnom_calendars_from=20241204&nomnom_calendars_to=20241215&nomnom_exclude_extref="
    )

    locations = scraper.get(url).text
    restaurants = json.loads(locations)["restaurants"]
    for restaurant in restaurants:
        lng = restaurant["longitude"]
        lat = restaurant["latitude"]
        state_code = restaurant["state"]

        restaurant_list.append({"STATE": state_code, "geometry": Point(lng, lat)})

Parsing H3 Cells:   0%|          | 0/609 [00:00<?, ?it/s]

In [106]:
texas_roadhouse_gdf = gpd.GeoDataFrame(restaurant_list, crs=4326)

In [107]:
texas_roadhouse_gdf = texas_roadhouse_gdf.drop_duplicates().reset_index()[
    ["STATE", "geometry"]
]

## Get Population Data

In [108]:
state_populations = pd.read_excel(
    "data/NST-EST2023-POP.xlsx", sheet_name=None, engine="openpyxl"
)

In [109]:
state_populations_df = state_populations["NST-EST2023-POP"][
    [
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)",
        "Unnamed: 5",
    ]
]
state_populations_df = state_populations_df.rename(
    columns={
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)": "NAME",
        "Unnamed: 5": "POPULATION",
    }
)
state_populations_df["NAME"] = state_populations_df["NAME"].str[1:]

In [110]:
states_with_population_df = states_df.merge(state_populations_df, on="NAME", how="left")
states_with_population_df = states_with_population_df[
    ["STUSPS", "POPULATION", "geometry", "NAME"]
]

In [111]:
states_with_population_df = states_with_population_df.dropna()

## Merge Data

In [112]:
state_counts = {}
for i in tqdm(range(len(texas_roadhouse_gdf)), desc="Parsing Texas RoadHouses"):
    try:
        state_code = states_df[
            states_df.intersects(texas_roadhouse_gdf.iloc[i]["geometry"])
        ]["STUSPS"].iloc[0]
    except IndexError:
        print(i)
        continue

    if state_code not in state_counts:
        state_counts[state_code] = 1
    else:
        state_counts[state_code] += 1

Parsing Texas RoadHouses:   0%|          | 0/671 [00:00<?, ?it/s]

In [113]:
texas_roadhouse_per_state_df = pd.Series(state_counts, name="stores")
texas_roadhouse_per_state_df = texas_roadhouse_per_state_df.reset_index()
texas_roadhouse_per_state_df = texas_roadhouse_per_state_df.rename(
    columns={"index": "STUSPS"}
)

In [114]:
texas_roadhouse_per_state_df = states_with_population_df.merge(
    texas_roadhouse_per_state_df, on="STUSPS", how="left"
)
texas_roadhouse_per_state_df = texas_roadhouse_per_state_df.fillna(0)

In [115]:
texas_roadhouse_per_state_df["per_1000"] = texas_roadhouse_per_state_df["stores"] / (
    texas_roadhouse_per_state_df["POPULATION"] / 1000
)
texas_roadhouse_per_state_df["per_10k"] = texas_roadhouse_per_state_df["stores"] / (
    texas_roadhouse_per_state_df["POPULATION"] / 10_000
)
texas_roadhouse_per_state_df["per_100k"] = texas_roadhouse_per_state_df["stores"] / (
    texas_roadhouse_per_state_df["POPULATION"] / 100000
)
texas_roadhouse_per_state_df["per_500k"] = texas_roadhouse_per_state_df["stores"] / (
    texas_roadhouse_per_state_df["POPULATION"] / 500_000
)
texas_roadhouse_per_state_df["per_1m"] = texas_roadhouse_per_state_df["stores"] / (
    texas_roadhouse_per_state_df["POPULATION"] / 1_000_000
)
texas_roadhouse_per_state_df["per_capita"] = (
    texas_roadhouse_per_state_df["POPULATION"] / texas_roadhouse_per_state_df["stores"]
)

In [116]:
texas_roadhouse_per_state_df = texas_roadhouse_per_state_df.to_crs(9311)

In [117]:
texas_roadhouse_gdf = texas_roadhouse_gdf.to_crs(9311)

In [118]:
texas_roadhouse_per_state_df.to_file("data/roadhouses_per_state.gpkg")

In [119]:
texas_roadhouse_gdf.to_file("data/roadhouses.gpkg")