In [14]:
import os
import random
import time

In [15]:
from typing import Tuple

In [164]:
import cloudscraper
from bs4 import BeautifulSoup
import geopandas as gpd
import numpy as np
import pandas as pd

In [17]:
from shapely.geometry import Point
from tqdm.notebook import tqdm

## Get State Data

In [18]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

In [19]:
states_df = states_df[["STUSPS", "NAME", "geometry"]]

## Get RestData

In [76]:
scraper = cloudscraper.create_scraper()

In [77]:
def get_coords(store_url: str) -> Tuple[Point, str]:
    r = scraper.get(store_url)
    soup = BeautifulSoup(r.text, "html.parser")
    lat = float(soup.find("meta", {"itemprop": "latitude"}).attrs["content"])
    lon = float(soup.find("meta", {"itemprop": "longitude"}).attrs["content"])
    name = soup.find("h1", {"class": "Hero-title"}).text.split(" ")[0]
    return (Point(lon, lat), name)

In [79]:
root_url = "https://locations.checkersandrallys.com/"
r = scraper.get(root_url)
soup = BeautifulSoup(r.text, "html.parser")
state_as = soup.find_all("a", {"class": "Directory-listLink"})

# Parse all locations in a state
for state_a in tqdm(state_as, desc="Parsing States"):
    state_store_list = []
    state_code = state_a["href"].split("/")[0]
    state_url = os.path.join(root_url, state_code)

    # If File exists continue
    if os.path.isfile(f"data/states/{state_code}.gpkg"):
        continue

    r_state = scraper.get(state_url)

    if r_state.status_code != 200:
        print(state_code)
        continue

    soup_state = BeautifulSoup(r_state.text, "html.parser")
    city_as = soup_state.find_all("a", {"class": "Directory-listLink"})
    city_urls = [
        os.path.join(root_url, os.path.join("/".join(city_a["href"].split("/")[:2])))
        for city_a in city_as
    ]

    for city_url in tqdm(city_urls, desc=f"Parsing Locations in {state_code}"):
        r_city = scraper.get(city_url)
        soup_city = BeautifulSoup(r_city.text, "html.parser")
        restaurant_as = soup_city.find_all(
            "a", {"class": "Teaser-button", "data-ya-track": "moreinfo"}
        )
        restaurant_urls = [
            restaurant_a["href"].replace("../", root_url)
            for restaurant_a in restaurant_as
        ]
        for restaurant_url in restaurant_urls:
            try:
                point, name = get_coords(restaurant_url)
            except Exception as e:
                print(restaurant_url, e)
            store_dict = {"STUSPS": state_code.upper(), "geometry": point, "name": name}
            state_store_list.append(store_dict)
            time.sleep(random.uniform(0.01, 0.5))

    if state_store_list:
        print(len(state_store_list), "stores")
        state_rest_gdf = gpd.GeoDataFrame(state_store_list, crs=4326)
        state_rest_gdf.to_file(f"data/states/{state_code}.gpkg")

Parsing States:   0%|          | 0/29 [00:00<?, ?it/s]

Parsing Locations in al:   0%|          | 0/10 [00:00<?, ?it/s]

17 stores


Parsing Locations in az:   0%|          | 0/6 [00:00<?, ?it/s]

10 stores


Parsing Locations in ar:   0%|          | 0/7 [00:00<?, ?it/s]

12 stores


Parsing Locations in ca:   0%|          | 0/27 [00:00<?, ?it/s]

https://locations.checkersandrallys.com/ca/merced/3105-state-highway-59 'NoneType' object has no attribute 'text'
35 stores


Parsing Locations in de:   0%|          | 0/2 [00:00<?, ?it/s]

3 stores


Parsing Locations in fl:   0%|          | 0/85 [00:00<?, ?it/s]

144 stores


Parsing Locations in ga:   0%|          | 0/45 [00:00<?, ?it/s]

72 stores


Parsing Locations in il:   0%|          | 0/17 [00:00<?, ?it/s]

19 stores


Parsing Locations in in:   0%|          | 0/20 [00:00<?, ?it/s]

36 stores


Parsing Locations in ia:   0%|          | 0/1 [00:00<?, ?it/s]

2 stores


Parsing Locations in ky:   0%|          | 0/20 [00:00<?, ?it/s]

https://locations.checkersandrallys.com/ky/lexington/2975-richmond-road-1048 'NoneType' object has no attribute 'text'
31 stores


Parsing Locations in la:   0%|          | 0/23 [00:00<?, ?it/s]

39 stores


Parsing Locations in md:   0%|          | 0/28 [00:00<?, ?it/s]

35 stores


Parsing Locations in mi:   0%|          | 0/41 [00:00<?, ?it/s]

59 stores


Parsing Locations in ms:   0%|          | 0/20 [00:00<?, ?it/s]

23 stores


Parsing Locations in mo:   0%|          | 0/13 [00:00<?, ?it/s]

20 stores


Parsing Locations in nv:   0%|          | 0/1 [00:00<?, ?it/s]

https://locations.checkersandrallys.com/nv/las-vegas/3978-east-lake-mead-boulevard 'NoneType' object has no attribute 'text'
5 stores


Parsing Locations in nj:   0%|          | 0/11 [00:00<?, ?it/s]

12 stores


Parsing Locations in ny:   0%|          | 0/21 [00:00<?, ?it/s]

35 stores


Parsing Locations in nc:   0%|          | 0/8 [00:00<?, ?it/s]

https://locations.checkersandrallys.com/nc/greenville/703-s.e.-greenville-blvd.,-suite-#23 'NoneType' object has no attribute 'attrs'
9 stores


Parsing Locations in oh:   0%|          | 0/33 [00:00<?, ?it/s]

https://locations.checkersandrallys.com/oh/cincinnati/4115-glenway-avenue-#1 'NoneType' object has no attribute 'attrs'
67 stores


Parsing Locations in pa:   0%|          | 0/5 [00:00<?, ?it/s]

9 stores


Parsing Locations in sc:   0%|          | 0/5 [00:00<?, ?it/s]

8 stores


Parsing Locations in tn:   0%|          | 0/10 [00:00<?, ?it/s]

14 stores


Parsing Locations in tx:   0%|          | 0/14 [00:00<?, ?it/s]

21 stores


Parsing Locations in va:   0%|          | 0/12 [00:00<?, ?it/s]

15 stores


Parsing Locations in dc:   0%|          | 0/1 [00:00<?, ?it/s]

1 stores


Parsing Locations in wv:   0%|          | 0/3 [00:00<?, ?it/s]

3 stores


Parsing Locations in wi:   0%|          | 0/2 [00:00<?, ?it/s]

4 stores


## Fix Incorrect URLs
#### https://locations.checkersandrallys.com/ca/merced/3105-state-highway-59
#### https://locations.checkersandrallys.com/ky/lexington/2975-richmond-road-1048 
#### https://locations.checkersandrallys.com/nv/las-vegas/3978-east-lake-mead-boulevard 
#### https://locations.checkersandrallys.com/nc/greenville/703-s.e.-greenville-blvd.,-suite-#23 
#### https://locations.checkersandrallys.com/oh/cincinnati/4115-glenway-avenue-#1 

In [84]:
added_stores = [
    {"STUSPS": "CA", "geometry": Point(-120.5056439, 37.3198812), "name": "Rally's"},
    {"STUSPS": "KY", "geometry": Point(-84.44779, 38.003592), "name": "Rally's"},
    {"STUSPS": "NV", "geometry": Point(-115.087543, 36.196204), "name": "Rally's"},
    {"STUSPS": "NC", "geometry": Point(-77.370239, 35.588046), "name": "Checkers"},
    {"STUSPS": "OH", "geometry": Point(-84.582013, 39.114346), "name": "Rally's"},
]
added_gdf = gpd.GeoDataFrame(added_stores, crs=4326)

In [85]:
state_rest_gdfs = []
states_path = "data/states"
for file in os.listdir("data/states"):
    constructed_path = os.path.join(states_path, file)
    if constructed_path.endswith(".gpkg"):
        state_rest_gdf = gpd.read_file(constructed_path)
        state_rest_gdfs.append(state_rest_gdf)

In [86]:
state_rest_gdfs.append(added_gdf)

In [92]:
rest_gdf = gpd.GeoDataFrame(pd.concat(state_rest_gdfs, ignore_index=True))

In [99]:
rest_gdf["name"] = rest_gdf["name"].str.replace("-", "")

In [165]:
rest_gdf = rest_gdf.to_crs(9311)
rest_gdf.to_file(f"data/locations.gpkg")

## Get Population Data

In [166]:
state_populations = pd.read_excel(
    "data/NST-EST2024-POP.xlsx", sheet_name=None, engine="openpyxl"
)

In [167]:
state_populations_df = state_populations["NST-EST2024-POP"][
    [
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)",
        "Unnamed: 5",
    ]
]
state_populations_df = state_populations_df.rename(
    columns={
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)": "NAME",
        "Unnamed: 5": "POPULATION",
    }
)
state_populations_df["NAME"] = state_populations_df["NAME"].str[1:]

In [168]:
states_with_population_df = states_df.merge(state_populations_df, on="NAME", how="left")
states_with_population_df = states_with_population_df[
    ["STUSPS", "NAME", "POPULATION", "geometry"]
]

In [169]:
states_with_population_df = states_with_population_df.dropna()

## Merge Data

In [196]:
groupby_df = rest_gdf.groupby(["name", "STUSPS"]).size()
checkers_counts_df = pd.DataFrame(groupby_df["Checkers"]).rename(
    columns={0: "Checkers"}
)
rallys_counts_df = pd.DataFrame(groupby_df["Rally's"]).rename(columns={0: "Rally's"})

In [199]:
checkers_vs_rallys_df = rallys_counts_df.merge(
    checkers_counts_df, on="STUSPS", how="outer"
)

In [201]:
checkers_vs_rallys_df = checkers_vs_rallys_df.replace(np.nan, 0)
checkers_vs_rallys_df["Rally's"] = checkers_vs_rallys_df["Rally's"].astype(int)

In [202]:
checkers_vs_rallys_gdf = states_with_population_df.merge(
    checkers_vs_rallys_df, on="STUSPS", how="left"
)
checkers_vs_rallys_gdf = checkers_vs_rallys_gdf.fillna(0)

In [203]:
checkers_vs_rallys_gdf["rallys_per_100k"] = checkers_vs_rallys_gdf["Rally's"] / (
    checkers_vs_rallys_gdf["POPULATION"] / 100_000
)
checkers_vs_rallys_gdf["rallys_per_1m"] = checkers_vs_rallys_gdf["Rally's"] / (
    checkers_vs_rallys_gdf["POPULATION"] / 1_000_000
)
checkers_vs_rallys_gdf["checkers_per_100k"] = checkers_vs_rallys_gdf["Checkers"] / (
    checkers_vs_rallys_gdf["POPULATION"] / 100_000
)
checkers_vs_rallys_gdf["checkers_per_1m"] = checkers_vs_rallys_gdf["Checkers"] / (
    checkers_vs_rallys_gdf["POPULATION"] / 1_000_000
)

In [204]:
checkers_vs_rallys_gdf = checkers_vs_rallys_gdf.to_crs(9311)

In [205]:
checkers_vs_rallys_gdf.to_file("data/checkers_vs_rallys.gpkg")