In [15]:
import os
import re
import random
import json
import time

In [16]:
from urllib.parse import urljoin

In [17]:
import cloudscraper
from bs4 import BeautifulSoup
import geopandas as gpd
import pandas as pd

In [18]:
from shapely.geometry import Point
from tqdm.notebook import tqdm

## Get State Data

In [19]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

In [20]:
states_df = states_df[["STUSPS", "NAME", "geometry"]]

## Get Chipotle Data

In [21]:
scraper = cloudscraper.create_scraper()

In [22]:
def get_coords(store_url: str) -> Point:
    r = scraper.get(store_url)
    soup = BeautifulSoup(r.text, "html.parser")
    store_dict = json.loads(soup.find("script", {"type": "application/ld+json"}).text)
    lat = round(float(store_dict["@graph"][1]["geo"]["latitude"]), 6)
    lon = round(float(store_dict["@graph"][1]["geo"]["longitude"]), 6)
    return Point(lon, lat)

In [23]:
def get_urls(url: str) -> list:
    r = scraper.get(url)
    soup = BeautifulSoup(r.text, "html.parser")
    ul = soup.find(
        "ul", {"class": "lg:columns-4 md:columns-3 sm:columns-2 columns-1 lg:-m-3"}
    )
    urls = [urljoin(url, a.attrs["href"]) for a in ul.find_all("a")]
    return urls

In [24]:
def find_stores(url: str) -> list:
    base_url = "https://locations.qdoba.com/"
    r = scraper.get(url)
    soup = BeautifulSoup(r.text, "html.parser")
    h3s = soup.find_all("h3", {"class": "locationProjectName"})
    return [h3.find_next("a").attrs["href"].replace("../../", base_url) for h3 in h3s]

In [26]:
state_urls = get_urls("https://locations.qdoba.com/us")

# Parse all locations in a state
for state_url in tqdm(state_urls, desc="Parsing States"):
    state_store_list = []
    state_code = state_url.split("/")[-1].upper()

    # If File exists continue
    if os.path.isfile(f"data/states/{state_code}.gpkg"):
        continue

    city_urls = get_urls(state_url)

    if not city_urls:
        print(state_code)
        continue

    for city_url in tqdm(city_urls, desc=f"Parsing Locations in {state_code}"):
        store_urls = find_stores(city_url)
        if not store_urls:
            print(city_url)
            continue

        for store_url in store_urls:
            try:
                point = get_coords(store_url)
            except Exception as e:
                print(store_url, e)
            store_dict = {"STUSPS": state_code, "geometry": point, "url": store_url}
            state_store_list.append(store_dict)
            time.sleep(random.uniform(0.01, 0.5))

    if state_store_list:
        store_gdf = gpd.GeoDataFrame(state_store_list, crs=4326)
        store_gdf.to_file(f"data/states/{state_code}.gpkg")

Parsing States:   0%|          | 0/46 [00:00<?, ?it/s]

Parsing Locations in FL:   0%|          | 0/19 [00:00<?, ?it/s]

Parsing Locations in GA:   0%|          | 0/8 [00:00<?, ?it/s]

Parsing Locations in ID:   0%|          | 0/5 [00:00<?, ?it/s]

Parsing Locations in IL:   0%|          | 0/19 [00:00<?, ?it/s]

https://locations.qdoba.com/us/il/east-peoria/1040-w-camp-st 'NoneType' object has no attribute 'text'


Parsing Locations in IN:   0%|          | 0/32 [00:00<?, ?it/s]

Parsing Locations in IA:   0%|          | 0/4 [00:00<?, ?it/s]

Parsing Locations in KS:   0%|          | 0/12 [00:00<?, ?it/s]

Parsing Locations in KY:   0%|          | 0/13 [00:00<?, ?it/s]

Parsing Locations in LA:   0%|          | 0/3 [00:00<?, ?it/s]

Parsing Locations in MD:   0%|          | 0/19 [00:00<?, ?it/s]

https://locations.qdoba.com/us/md/fort-meade/fort-meade 'NoneType' object has no attribute 'text'


Parsing Locations in MA:   0%|          | 0/16 [00:00<?, ?it/s]

Parsing Locations in MI:   0%|          | 0/58 [00:00<?, ?it/s]

Parsing Locations in MN:   0%|          | 0/26 [00:00<?, ?it/s]

Parsing Locations in MS:   0%|          | 0/3 [00:00<?, ?it/s]

Parsing Locations in MO:   0%|          | 0/28 [00:00<?, ?it/s]

Parsing Locations in MT:   0%|          | 0/5 [00:00<?, ?it/s]

Parsing Locations in NE:   0%|          | 0/11 [00:00<?, ?it/s]

Parsing Locations in NV:   0%|          | 0/7 [00:00<?, ?it/s]

Parsing Locations in NH:   0%|          | 0/1 [00:00<?, ?it/s]

Parsing Locations in NJ:   0%|          | 0/25 [00:00<?, ?it/s]

https://locations.qdoba.com/us/nj/patterson/186-218-maple-st 'NoneType' object has no attribute 'text'


Parsing Locations in NY:   0%|          | 0/15 [00:00<?, ?it/s]

https://locations.qdoba.com/us/ny/new-york


Parsing Locations in NC:   0%|          | 0/11 [00:00<?, ?it/s]

Parsing Locations in ND:   0%|          | 0/6 [00:00<?, ?it/s]

Parsing Locations in OH:   0%|          | 0/16 [00:00<?, ?it/s]

Parsing Locations in OK:   0%|          | 0/15 [00:00<?, ?it/s]

Parsing Locations in OR:   0%|          | 0/8 [00:00<?, ?it/s]

Parsing Locations in PA:   0%|          | 0/23 [00:00<?, ?it/s]

https://locations.qdoba.com/us/pa/bryn-mawr/1112-w-lancaster-ave 'NoneType' object has no attribute 'text'


Parsing Locations in SC:   0%|          | 0/2 [00:00<?, ?it/s]

Parsing Locations in SD:   0%|          | 0/7 [00:00<?, ?it/s]

Parsing Locations in TN:   0%|          | 0/1 [00:00<?, ?it/s]

Parsing Locations in TX:   0%|          | 0/18 [00:00<?, ?it/s]

Parsing Locations in UT:   0%|          | 0/2 [00:00<?, ?it/s]

Parsing Locations in VA:   0%|          | 0/28 [00:00<?, ?it/s]

https://locations.qdoba.com/us/va/mclean/mclean 'NoneType' object has no attribute 'text'


Parsing Locations in WA:   0%|          | 0/20 [00:00<?, ?it/s]

Parsing Locations in DC:   0%|          | 0/2 [00:00<?, ?it/s]

Parsing Locations in WV:   0%|          | 0/6 [00:00<?, ?it/s]

Parsing Locations in WI:   0%|          | 0/42 [00:00<?, ?it/s]

Parsing Locations in WY:   0%|          | 0/5 [00:00<?, ?it/s]

## Manually Add 1 in FL that wasn't picked up

In [28]:
store_gdfs = []
states_path = "data/states"
for file in os.listdir("data/states"):
    constructed_path = os.path.join(states_path, file)
    if constructed_path.endswith(".gpkg"):
        qdoba_gdf = gpd.read_file(constructed_path)
        store_gdfs.append(qdoba_gdf)

In [30]:
qdoba_gdf = pd.concat(store_gdfs, ignore_index=True)

In [31]:
qdoba_gdf.head()

Unnamed: 0,STUSPS,url,geometry
0,AK,https://locations.qdoba.com/us/ak/anchorage/70...,POINT (-149.87024 61.19319)
1,AK,https://locations.qdoba.com/us/ak/anchorage/19...,POINT (-149.8475 61.1431)
2,AK,https://locations.qdoba.com/us/ak/anchorage/11...,POINT (-149.73958 61.22703)
3,AK,https://locations.qdoba.com/us/ak/wasilla/1515...,POINT (-149.41159 61.5781)
4,AL,https://locations.qdoba.com/us/al/birmingham/4...,POINT (-86.80123 33.50858)


In [32]:
qdoba_gdf = qdoba_gdf.to_crs(9311)
qdoba_gdf.to_file(f"data/stores.gpkg")

## Get Population Data

In [33]:
state_populations = pd.read_excel(
    "data/NST-EST2024-POP.xlsx", sheet_name=None, engine="openpyxl"
)

In [34]:
state_populations_df = state_populations["NST-EST2024-POP"][
    [
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)",
        "Unnamed: 5",
    ]
]
state_populations_df = state_populations_df.rename(
    columns={
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)": "NAME",
        "Unnamed: 5": "POPULATION",
    }
)
state_populations_df["NAME"] = state_populations_df["NAME"].str[1:]

In [35]:
states_with_population_df = states_df.merge(state_populations_df, on="NAME", how="left")
states_with_population_df = states_with_population_df[
    ["STUSPS", "NAME", "POPULATION", "geometry"]
]

In [36]:
states_with_population_df = states_with_population_df.dropna()

## Merge Data

In [37]:
qdoba_count_df = pd.DataFrame(
    qdoba_gdf.groupby("STUSPS").size(), columns=["QDOBAs"]
).reset_index()

In [38]:
qdoba_count_gdf = states_with_population_df.merge(
    qdoba_count_df, on="STUSPS", how="left"
)
qdoba_count_gdf = qdoba_count_gdf.fillna(0)

In [39]:
qdoba_count_gdf["per_100k"] = (
    qdoba_count_gdf["QDOBAs"] / (qdoba_count_gdf["POPULATION"] / 100000)
).round(decimals=2)
qdoba_count_gdf["per_1m"] = (
    qdoba_count_gdf["QDOBAs"] / (qdoba_count_gdf["POPULATION"] / 1_000_000)
).round(decimals=1)

In [40]:
qdoba_count_gdf = qdoba_count_gdf.to_crs(9311)

In [41]:
qdoba_count_gdf.to_file("data/QDOBA_Per_State.gpkg")

In [43]:
qdoba_count_gdf[["QDOBAs", "per_100k", "per_1m", "STUSPS"]].sort_values(
    "per_100k", ascending=False
)

Unnamed: 0,QDOBAs,per_100k,per_1m,STUSPS
21,90.0,1.53,15.3,CO
14,11.0,1.2,12.0,SD
24,7.0,1.2,12.0,WY
10,21.0,1.06,10.6,NE
45,59.0,0.99,9.9,WI
47,7.0,0.89,8.9,ND
6,72.0,0.71,7.1,MI
49,46.0,0.67,6.7,IN
43,29.0,0.64,6.4,KY
20,40.0,0.64,6.4,MO
