In [10]:
import os
import re
import random
import time

In [11]:
from urllib.parse import urljoin

In [12]:
import cloudscraper
from bs4 import BeautifulSoup
import geopandas as gpd
import pandas as pd

In [13]:
from shapely.geometry import Point
from tqdm.notebook import tqdm

## Get State Data

In [14]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

In [15]:
states_df = states_df[["STUSPS", "NAME", "geometry"]]

## Get CFA Data

In [16]:
scraper = cloudscraper.create_scraper()

In [17]:
def get_coords(store_url: str) -> Point:
    r = scraper.get(store_url)
    soup = BeautifulSoup(r.text, "html.parser")
    coords = (
        soup.find("div", {"class": "button-grouping flex"})
        .find("a")
        .attrs["href"]
        .split("/")[-1]
        .split(",")
    )
    lat = float(coords[0])
    lon = float(coords[1])
    return Point(lon, lat)

In [20]:
r = scraper.get("https://www.chick-fil-a.com/locations/browse")
soup = BeautifulSoup(r.text, "html.parser")
state_as = soup.find_all("ul")[2].find_all("a")

# Parse all locations in a state
for state_a in tqdm(state_as, desc="Parsing States"):
    state_store_list = []
    state_code = state_a.text.split("(")[1][:2]
    state_url = urljoin("https://www.chick-fil-a.com/", state_a.attrs["href"])

    # If File exists continue
    if os.path.isfile(f"data/states/{state_code}.gpkg"):
        continue

    r_state = scraper.get(state_url)

    if r_state.status_code != 200:
        print(state_code)
        continue

    stores_soup = BeautifulSoup(r_state.text, "html.parser")
    store_as = stores_soup.find("div", {"class": "state-locations-list"}).find_all("a")
    store_urls = [
        urljoin("https://www.chick-fil-a.com/", store_a.attrs["href"])
        for store_a in store_as
    ]

    for store_url in tqdm(store_urls, desc=f"Parsing Locations in {state_code}"):
        try:
            point = get_coords(store_url)
        except Exception as e:
            print(store_url, e)
        store_dict = {"STUSPS": state_code, "geometry": point}
        state_store_list.append(store_dict)
        time.sleep(random.uniform(0.01, 0.5))

    if state_store_list:
        chick_fil_a_gdf = gpd.GeoDataFrame(state_store_list, crs=4326)
        chick_fil_a_gdf.to_file(f"data/states/{state_code}.gpkg")

Parsing States:   0%|          | 0/49 [00:00<?, ?it/s]

Parsing Locations in AZ:   0%|          | 0/54 [00:00<?, ?it/s]

Parsing Locations in AR:   0%|          | 0/38 [00:00<?, ?it/s]

Parsing Locations in CA:   0%|          | 0/206 [00:00<?, ?it/s]

Parsing Locations in CO:   0%|          | 0/60 [00:00<?, ?it/s]

Parsing Locations in CT:   0%|          | 0/16 [00:00<?, ?it/s]

Parsing Locations in DE:   0%|          | 0/17 [00:00<?, ?it/s]

Parsing Locations in FL:   0%|          | 0/274 [00:00<?, ?it/s]

https://www.chick-fil-a.com/locations/fl/maitland-boulevard 'NoneType' object has no attribute 'find'


Parsing Locations in GA:   0%|          | 0/272 [00:00<?, ?it/s]

https://www.chick-fil-a.com/locations/ga/relo-larry-mulkey-memorial-rd 'NoneType' object has no attribute 'find'
https://www.chick-fil-a.com/locations/ga/relo-wesley-chapel-road 'NoneType' object has no attribute 'find'


Parsing Locations in HI:   0%|          | 0/5 [00:00<?, ?it/s]

Parsing Locations in ID:   0%|          | 0/11 [00:00<?, ?it/s]

Parsing Locations in IL:   0%|          | 0/73 [00:00<?, ?it/s]

Parsing Locations in IN:   0%|          | 0/61 [00:00<?, ?it/s]

Parsing Locations in IA:   0%|          | 0/20 [00:00<?, ?it/s]

Parsing Locations in KS:   0%|          | 0/24 [00:00<?, ?it/s]

Parsing Locations in KY:   0%|          | 0/53 [00:00<?, ?it/s]

Parsing Locations in LA:   0%|          | 0/59 [00:00<?, ?it/s]

Parsing Locations in ME:   0%|          | 0/2 [00:00<?, ?it/s]

Parsing Locations in MD:   0%|          | 0/89 [00:00<?, ?it/s]

Parsing Locations in MA:   0%|          | 0/21 [00:00<?, ?it/s]

Parsing Locations in MI:   0%|          | 0/36 [00:00<?, ?it/s]

Parsing Locations in MN:   0%|          | 0/27 [00:00<?, ?it/s]

Parsing Locations in MS:   0%|          | 0/45 [00:00<?, ?it/s]

Parsing Locations in MO:   0%|          | 0/50 [00:00<?, ?it/s]

Parsing Locations in MT:   0%|          | 0/4 [00:00<?, ?it/s]

Parsing Locations in NE:   0%|          | 0/15 [00:00<?, ?it/s]

Parsing Locations in NV:   0%|          | 0/19 [00:00<?, ?it/s]

Parsing Locations in NH:   0%|          | 0/4 [00:00<?, ?it/s]

Parsing Locations in NJ:   0%|          | 0/65 [00:00<?, ?it/s]

Parsing Locations in NM:   0%|          | 0/15 [00:00<?, ?it/s]

Parsing Locations in NY:   0%|          | 0/70 [00:00<?, ?it/s]

Parsing Locations in NC:   0%|          | 0/196 [00:00<?, ?it/s]

Parsing Locations in ND:   0%|          | 0/5 [00:00<?, ?it/s]

Parsing Locations in OH:   0%|          | 0/86 [00:00<?, ?it/s]

Parsing Locations in OK:   0%|          | 0/59 [00:00<?, ?it/s]

Parsing Locations in OR:   0%|          | 0/15 [00:00<?, ?it/s]

Parsing Locations in PA:   0%|          | 0/110 [00:00<?, ?it/s]

https://www.chick-fil-a.com/locations/pa/bartonsville 'NoneType' object has no attribute 'find'


Parsing Locations in RI:   0%|          | 0/2 [00:00<?, ?it/s]

Parsing Locations in SC:   0%|          | 0/104 [00:00<?, ?it/s]

Parsing Locations in SD:   0%|          | 0/4 [00:00<?, ?it/s]

Parsing Locations in TN:   0%|          | 0/107 [00:00<?, ?it/s]

Parsing Locations in TX:   0%|          | 0/499 [00:00<?, ?it/s]

Parsing Locations in UT:   0%|          | 0/36 [00:00<?, ?it/s]

Parsing Locations in VA:   0%|          | 0/145 [00:00<?, ?it/s]

Parsing Locations in DC:   0%|          | 0/13 [00:00<?, ?it/s]

Parsing Locations in WA:   0%|          | 0/24 [00:00<?, ?it/s]

Parsing Locations in WV:   0%|          | 0/23 [00:00<?, ?it/s]

Parsing Locations in WI:   0%|          | 0/21 [00:00<?, ?it/s]

Parsing Locations in WY:   0%|          | 0/2 [00:00<?, ?it/s]

In [29]:
chick_fil_a_gdfs = []
states_path = "data/states"
for file in os.listdir("data/states"):
    constructed_path = os.path.join(states_path, file)
    if constructed_path.endswith(".gpkg"):
        chick_fil_a_gdf = gpd.read_file(constructed_path)
        chick_fil_a_gdfs.append(chick_fil_a_gdf)

## Manually Add The Ones that Failed

In [30]:
added_stores_df = gpd.GeoDataFrame(
    [
        {"STUSPS": "FL", "geometry": Point(-81.403531, 28.636014)},
        {"STUSPS": "GA", "geometry": Point(-84.1461867164634, 34.18484034999439)},
        {"STUSPS": "GA", "geometry": Point(-84.2149281954882, 33.71594003742288)},
        {"STUSPS": "PA", "geometry": Point(-75.2598390581711, 40.99934674317709)},
    ]
)

In [31]:
chick_fil_a_gdfs.append(added_stores_df)

In [33]:
chick_fil_a_gdf = gpd.GeoDataFrame(
    pd.concat(chick_fil_a_gdfs, ignore_index=True), crs=4326
)

In [34]:
chick_fil_a_gdf = chick_fil_a_gdf.to_crs(9311)
chick_fil_a_gdf.to_file(f"data/stores.gpkg")

## Get Population Data

In [36]:
state_populations = pd.read_excel(
    "data/NST-EST2024-POP.xlsx", sheet_name=None, engine="openpyxl"
)

In [37]:
state_populations_df = state_populations["NST-EST2024-POP"][
    [
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)",
        "Unnamed: 5",
    ]
]
state_populations_df = state_populations_df.rename(
    columns={
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)": "NAME",
        "Unnamed: 5": "POPULATION",
    }
)
state_populations_df["NAME"] = state_populations_df["NAME"].str[1:]

In [38]:
states_with_population_df = states_df.merge(state_populations_df, on="NAME", how="left")
states_with_population_df = states_with_population_df[
    ["STUSPS", "NAME", "POPULATION", "geometry"]
]

In [39]:
states_with_population_df = states_with_population_df.dropna()

## Merge Data

In [43]:
chick_fil_a_state_counts_df = pd.DataFrame(
    chick_fil_a_gdf.groupby("STUSPS").size(), columns=["CFAs"]
).reset_index()

In [44]:
chick_fil_a_state_counts_gdf = states_with_population_df.merge(
    chick_fil_a_state_counts_df, on="STUSPS", how="left"
)
chick_fil_a_state_counts_gdf = chick_fil_a_state_counts_gdf.fillna(0)

In [45]:
chick_fil_a_state_counts_gdf["per_100k"] = chick_fil_a_state_counts_gdf["CFAs"] / (
    chick_fil_a_state_counts_gdf["POPULATION"] / 100000
)
chick_fil_a_state_counts_gdf["per_1m"] = chick_fil_a_state_counts_gdf["CFAs"] / (
    chick_fil_a_state_counts_gdf["POPULATION"] / 1_000_000
)

In [46]:
chick_fil_a_state_counts_gdf = chick_fil_a_state_counts_gdf.to_crs(9311)

In [47]:
chick_fil_a_state_counts_gdf.to_file("data/chick_fil_a_per_state.gpkg")