In [1]:
import json
import os
import random
import re
import time

In [2]:
import cloudscraper
from bs4 import BeautifulSoup
import geopandas as gpd
import pandas as pd
import h3

In [3]:
from shapely.geometry import Point
from tqdm.notebook import tqdm

## Get State Data

In [4]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

In [5]:
states_df = states_df[["STUSPS", "NAME", "geometry"]]

## Get Taco Bell Data

In [6]:
scraper = cloudscraper.create_scraper()

In [7]:
url_root = "https://locations.tacobell.com/
national_store_list = []

In [16]:
# Parse all locations in a state
for i in tqdm(range(len(states_df)), desc=f"Parsing States"):
    state_store_list = []
    state_code = states_df.iloc[i]["STUSPS"].lower()

    # If File exists continue
    if os.path.isfile(f"data/states/{state_code}.gpkg"):
        continue

    state_url = f"{url_root}{state_code}.html"
    locations_r = scraper.get(state_url)

    if locations_r.status_code != 200:
        print(state_code)
        continue

    soup = BeautifulSoup(locations_r.text, "html.parser")

    city_as = soup.find_all("a", {"class": "Link DirLinks"})
    for city_a in tqdm(city_as, desc=f"Parsing Locations in {state_code}"):
        href = city_a.attrs["href"]

        city_url = f"{url_root}{href}.html"
        r_city = scraper.get(city_url)
        city_soup = BeautifulSoup(r_city.text, "html.parser")
        taco_bell_locations_as = city_soup.find_all(
            "a",
            {
                "class": "Link font-bold text-2xl text-purple underline uppercase hover:font-black"
            },
        )

        for taco_bell_locations_a in taco_bell_locations_as:
            taco_bell_url = taco_bell_locations_a.attrs["href"].replace("../", url_root)
            taco_bell_r = scraper.get(taco_bell_url)
            taco_bell_soup = BeautifulSoup(taco_bell_r.text, "html.parser")
            table = taco_bell_soup.find_all("table", {"class": "w-full"})
            if table:
                table = table[0]
                tds = table.find_all("td")
                times = list(set([td.find("span").text for td in tds[1::2]]))
                if len(times) == 1 and times[0] == "Closed":
                    print(taco_bell_r.url)
                    continue

            lat = float(
                taco_bell_soup.find("meta", {"itemprop": "latitude"}).attrs["content"]
            )
            lng = float(
                taco_bell_soup.find("meta", {"itemprop": "longitude"}).attrs["content"]
            )
            store_dict = {"STUSPS": state_code.upper(), "geometry": Point(lng, lat)}
            national_store_list.append(store_dict)
            state_store_list.append(store_dict)

        time.sleep(random.uniform(0.01, 0.5))

    if state_store_list:
        state_taco_bell = gpd.GeoDataFrame(state_store_list, crs=4326)
        state_taco_bell.to_file(f"data/states/{state_code}.gpkg")

Parsing States:   0%|          | 0/56 [00:00<?, ?it/s]

Parsing Locations in nc:   0%|          | 0/159 [00:00<?, ?it/s]

https://locations.tacobell.com/nc/clinton/1410-sunset-avenue.html
https://locations.tacobell.com/nc/williamston/1471-washington-street.html
https://locations.tacobell.com/nc/wilson/901-highway-301.html


Parsing Locations in ok:   0%|          | 0/76 [00:00<?, ?it/s]

https://locations.tacobell.com/ok/tinker-afb/3360-n-avenue.html


Parsing Locations in va:   0%|          | 0/118 [00:00<?, ?it/s]

https://locations.tacobell.com/va/chantilly/4418-chantilly-place.html
https://locations.tacobell.com/va/emporia/107-cloverleaf-drive.html
https://locations.tacobell.com/va/franklin/1290-armory-drive.html
https://locations.tacobell.com/va/reston/11257-roger-bacon-dr.html


Parsing Locations in wv:   0%|          | 0/57 [00:00<?, ?it/s]

Parsing Locations in la:   0%|          | 0/74 [00:00<?, ?it/s]

https://locations.tacobell.com/la/gonzales/1202-n--airline-hwy-.html


Parsing Locations in mi:   0%|          | 0/216 [00:00<?, ?it/s]

https://locations.tacobell.com/mi/detroit/5221-gullen-mall.html
https://locations.tacobell.com/mi/east-lansing/601-e-grand-river-ave.html


Parsing Locations in ma:   0%|          | 0/61 [00:00<?, ?it/s]

Parsing Locations in id:   0%|          | 0/23 [00:00<?, ?it/s]

Parsing Locations in fl:   0%|          | 0/205 [00:00<?, ?it/s]

https://locations.tacobell.com/fl/orlando/4225-e-colonial-drive.html
https://locations.tacobell.com/fl/orlando/5109-international-dr-.html
https://locations.tacobell.com/fl/orlando/7853-colonial-drive.html
https://locations.tacobell.com/fl/winter-springs/1345-tuskawilla-rd.html


Parsing Locations in ne:   0%|          | 0/19 [00:00<?, ?it/s]

Parsing Locations in wa:   0%|          | 0/88 [00:00<?, ?it/s]

https://locations.tacobell.com/wa/colville/1520-n--highway-395.html


Parsing Locations in nm:   0%|          | 0/27 [00:00<?, ?it/s]

pr


Parsing Locations in sd:   0%|          | 0/8 [00:00<?, ?it/s]

Parsing Locations in tx:   0%|          | 0/306 [00:00<?, ?it/s]

https://locations.tacobell.com/tx/anthony/1811-antonio-street.html
https://locations.tacobell.com/tx/bedford/1505-brown-trail.html
https://locations.tacobell.com/tx/carrollton/4117-north-josey-lane.html
https://locations.tacobell.com/tx/dallas/8001-forest-lane.html
https://locations.tacobell.com/tx/dallas/3001-n--hampton-rd-.html
https://locations.tacobell.com/tx/mckinney/2920-w--eldorado-parkway.html
https://locations.tacobell.com/tx/midland/4727-n--midkiff-drive.html
https://locations.tacobell.com/tx/midland/1814-n--midland-drive.html
https://locations.tacobell.com/tx/odessa/1713-n--county-road-w-.html
https://locations.tacobell.com/tx/plano/401-coit-road.html
https://locations.tacobell.com/tx/plano/7100-corporate-drive.html
https://locations.tacobell.com/tx/southlake/101-north-kimball-avenue.html


Parsing Locations in ca:   0%|          | 0/416 [00:00<?, ?it/s]

https://locations.tacobell.com/ca/oakland/6035-telegraph-road.html
https://locations.tacobell.com/ca/phelan/9619-sheep-creek-road.html
https://locations.tacobell.com/ca/santa-ana/2720-w-edinger.html


Parsing Locations in al:   0%|          | 0/104 [00:00<?, ?it/s]

https://locations.tacobell.com/al/bessemer/710-9th-ave-n.html


Parsing Locations in ga:   0%|          | 0/156 [00:00<?, ?it/s]

https://locations.tacobell.com/ga/atlanta/3385-buford-hwy.html


Parsing Locations in pa:   0%|          | 0/164 [00:00<?, ?it/s]

https://locations.tacobell.com/pa/philadelphia/1500-jfk-boulevard.html


Parsing Locations in mo:   0%|          | 0/140 [00:00<?, ?it/s]

Parsing Locations in co:   0%|          | 0/63 [00:00<?, ?it/s]

Parsing Locations in ut:   0%|          | 0/44 [00:00<?, ?it/s]

Parsing Locations in tn:   0%|          | 0/137 [00:00<?, ?it/s]

https://locations.tacobell.com/tn/atoka/11200-highway-51-south.html
https://locations.tacobell.com/tn/johnson-city/1276-gilbreath-drive.html
https://locations.tacobell.com/tn/knoxville/1900-w-cumberland.html
https://locations.tacobell.com/tn/waverly/310-west-main-street.html


Parsing Locations in wy:   0%|          | 0/12 [00:00<?, ?it/s]

Parsing Locations in ny:   0%|          | 0/172 [00:00<?, ?it/s]

https://locations.tacobell.com/ny/new-york/1884-third-ave.html


Parsing Locations in ks:   0%|          | 0/54 [00:00<?, ?it/s]

Parsing Locations in ak:   0%|          | 0/8 [00:00<?, ?it/s]

Parsing Locations in nv:   0%|          | 0/19 [00:00<?, ?it/s]

https://locations.tacobell.com/nv/las-vegas/333-w-st-louis-ave.html


Parsing Locations in il:   0%|          | 0/206 [00:00<?, ?it/s]

https://locations.tacobell.com/il/benton/634-west-main-street.html


Parsing Locations in vt:   0%|          | 0/5 [00:00<?, ?it/s]

https://locations.tacobell.com/vt/brattleboro/1007-putney-road.html


Parsing Locations in mt:   0%|          | 0/12 [00:00<?, ?it/s]

Parsing Locations in ia:   0%|          | 0/42 [00:00<?, ?it/s]

Parsing Locations in sc:   0%|          | 0/78 [00:00<?, ?it/s]

https://locations.tacobell.com/sc/charleston/2040-savannah-hwy.html


Parsing Locations in nh:   0%|          | 0/17 [00:00<?, ?it/s]

https://locations.tacobell.com/nh/seabrook/306-lafayette-road.html


Parsing Locations in az:   0%|          | 0/64 [00:00<?, ?it/s]

https://locations.tacobell.com/az/phoenix/3507-w--peoria-ave-.html
https://locations.tacobell.com/az/sierra-vista/3920-east-fry-blvd.html


Parsing Locations in dc:   0%|          | 0/1 [00:00<?, ?it/s]

as
vi


Parsing Locations in nj:   0%|          | 0/107 [00:00<?, ?it/s]

https://locations.tacobell.com/nj/columbia/2-simpson-road.html
https://locations.tacobell.com/nj/newark/150-bleeker-street.html


Parsing Locations in md:   0%|          | 0/77 [00:00<?, ?it/s]

Parsing Locations in me:   0%|          | 0/19 [00:00<?, ?it/s]

Parsing Locations in hi:   0%|          | 0/18 [00:00<?, ?it/s]

https://locations.tacobell.com/hi/lahaina/127-a-hinau-st.html


Parsing Locations in de:   0%|          | 0/15 [00:00<?, ?it/s]

https://locations.tacobell.com/de/rehoboth-beach/19010-coastal-hwy.html
gu
mp


Parsing Locations in ri:   0%|          | 0/12 [00:00<?, ?it/s]

Parsing Locations in ky:   0%|          | 0/104 [00:00<?, ?it/s]

https://locations.tacobell.com/ky/cadiz/57-hospitality-lane.html
https://locations.tacobell.com/ky/cynthiana/905-u-s--highway-27-south.html
https://locations.tacobell.com/ky/dry-ridge/71-broadway-street.html
https://locations.tacobell.com/ky/fort-campbell/3001-bastogne-avenue.html
https://locations.tacobell.com/ky/louisville/one-arena-plaza-307695.html
https://locations.tacobell.com/ky/louisville/1441-gardiner-lane.html
https://locations.tacobell.com/ky/louisville/one-arena-plaza-307696.html


Parsing Locations in oh:   0%|          | 0/249 [00:00<?, ?it/s]

Parsing Locations in wi:   0%|          | 0/103 [00:00<?, ?it/s]

Parsing Locations in or:   0%|          | 0/70 [00:00<?, ?it/s]

https://locations.tacobell.com/or/silverton/605-west-c-street.html
https://locations.tacobell.com/or/wasco/91444-biggs-rufus-hwy.html


Parsing Locations in nd:   0%|          | 0/8 [00:00<?, ?it/s]

Parsing Locations in ar:   0%|          | 0/79 [00:00<?, ?it/s]

Parsing Locations in in:   0%|          | 0/137 [00:00<?, ?it/s]

Parsing Locations in mn:   0%|          | 0/72 [00:00<?, ?it/s]

Parsing Locations in ct:   0%|          | 0/43 [00:00<?, ?it/s]

In [18]:
taco_bells_state_gdfs = []
states_path = "data/states"
for file in os.listdir("data/states"):
    constructed_path = os.path.join(states_path, file)
    if constructed_path.endswith(".gpkg"):
        state_taco_bell_gdf = gpd.read_file(constructed_path)
        taco_bells_state_gdfs.append(state_taco_bell_gdf)

In [20]:
taco_bell_gdf = gpd.GeoDataFrame(pd.concat(taco_bells_state_gdfs, ignore_index=True))

In [21]:
taco_bell_gdf = taco_bell_gdf.to_crs(9311)
taco_bell_gdf.to_file(f"data/stores.gpkg")

## Get Population Data

In [23]:
state_populations = pd.read_excel(
    "data/NST-EST2024-POP.xlsx", sheet_name=None, engine="openpyxl"
)

In [24]:
state_populations_df = state_populations["NST-EST2024-POP"][
    [
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)",
        "Unnamed: 5",
    ]
]
state_populations_df = state_populations_df.rename(
    columns={
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)": "NAME",
        "Unnamed: 5": "POPULATION",
    }
)
state_populations_df["NAME"] = state_populations_df["NAME"].str[1:]

In [25]:
states_with_population_df = states_df.merge(state_populations_df, on="NAME", how="left")
states_with_population_df = states_with_population_df[
    ["STUSPS", "NAME", "POPULATION", "geometry"]
]

In [26]:
states_with_population_df = states_with_population_df.dropna()

## Merge Data

In [27]:
taco_bell_per_state_df = pd.Series(state_counts, name="stores")
taco_bell_per_state_df = taco_bell_per_state_df.reset_index()
taco_bell_per_state_df = taco_bell_per_state_df.rename(columns={"index": "STUSPS"})

In [29]:
taco_bell_per_state_df = states_with_population_df.merge(
    taco_bell_per_state_df, on="STUSPS", how="left"
)
taco_bell_per_state_df = taco_bell_per_state_df.fillna(0)

In [30]:
taco_bell_per_state_df["per_1000"] = taco_bell_per_state_df["stores"] / (
    taco_bell_per_state_df["POPULATION"] / 1000
)
taco_bell_per_state_df["per_10k"] = taco_bell_per_state_df["stores"] / (
    taco_bell_per_state_df["POPULATION"] / 10_000
)
taco_bell_per_state_df["per_100k"] = taco_bell_per_state_df["stores"] / (
    taco_bell_per_state_df["POPULATION"] / 100000
)
taco_bell_per_state_df["per_500k"] = taco_bell_per_state_df["stores"] / (
    taco_bell_per_state_df["POPULATION"] / 500_000
)
taco_bell_per_state_df["per_1m"] = taco_bell_per_state_df["stores"] / (
    taco_bell_per_state_df["POPULATION"] / 1_000_000
)
taco_bell_per_state_df["per_capita"] = (
    taco_bell_per_state_df["POPULATION"] / taco_bell_per_state_df["stores"]
)

In [31]:
taco_bell_per_state_df = taco_bell_per_state_df.to_crs(9311)

In [32]:
taco_bell_per_state_df.to_file("data/taco_bell_per_state.gpkg")