In [None]:
import os
import random
import re
import time

In [2]:
from bs4 import BeautifulSoup
import cloudscraper
import geopandas as gpd
import pandas as pd

In [3]:
from shapely.geometry import Point
from tqdm.notebook import tqdm

## Get State Data

In [65]:
file_path = "data/lecu000e21a_e/lpr_000e21a_e.shp"
states_df = gpd.read_file(file_path)

In [78]:
states_df = states_df[["PRENAME", "geometry"]]
states_df = states_df.rename(columns={"PRENAME": "Geography"})

## Get Population data

In [91]:
state_populations = pd.read_csv("data/1710000901-noSymbol.csv")

In [104]:
states_population_gdf = states_df.merge(state_populations, on="Geography", how="left")
states_population_gdf = states_population_gdf.rename(columns={"Q1 2025": "POPULATION"})
states_population_gdf["POPULATION"] = (
    states_population_gdf["POPULATION"].str.replace(",", "").astype(int)
)

## Get Subway Data

In [15]:
def get_coords(soup) -> Point:
    latitude_meta = soup.find("meta", {"itemprop": "latitude"})
    longitude_meta = soup.find("meta", {"itemprop": "longitude"})
    lat = float(latitude_meta.attrs["content"])
    lon = float(longitude_meta.attrs["content"])
    return Point(lon, lat)

In [18]:
scraper = cloudscraper.create_scraper()
url_root = "https://restaurants.subway.com"

In [55]:
count_dict = {}
store_list = []

canada_url = "https://restaurants.subway.com/canada"
r = scraper.get(canada_url)
soup = BeautifulSoup(r.text, "html.parser")
province_as = soup.find_all("a", {"class": "Directory-listLink"})[:-1]

for province_a in tqdm(province_as, desc="Parsing States"):
    # Get Url
    href = province_a["href"]
    state_code = href.split("/")[1]
    province_url = os.path.join("https://restaurants.subway.com/canada", state_code)

    # Get State code
    state_store_list = []

    # If File exists continue
    if os.path.isfile(f"data/states/{state_code}.gpkg"):
        continue

    state_r = scraper.get(province_url)

    if state_r.status_code != 200:
        print(province_url)
        continue

    soup_state = BeautifulSoup(state_r.text, "html.parser")
    locations_as = soup_state.find_all("a", {"class": "Directory-listLink"})

    # Parse all locations in a state
    for locations_a in tqdm(locations_as, desc=f"Parsing Locations In {state_code}"):
        href = locations_a.attrs["href"]
        count = int(re.findall(r"\d+", locations_a.attrs["data-count"])[0])
        location_url = href.replace("..", url_root)
        if count == 1:
            r_location = scraper.get(location_url)
            location_soup = BeautifulSoup(r_location.text, "html.parser")
            tbody = location_soup.find("tbody")
            schedule = list(
                set(
                    [
                        td.text
                        for td in tbody.find_all(
                            "td", {"class": "c-hours-details-row-intervals"}
                        )
                    ]
                )
            )
            if len(schedule) == 1 and schedule[0] == "Closed":
                print(location_url)
                continue
            point = get_coords(location_soup)
            store_dict = {
                "STATE": state_code,
                "geometry": point,
            }
            store_list.append(store_dict)
            state_store_list.append(store_dict)
        else:
            r_location = scraper.get(location_url)
            location_soup = BeautifulSoup(r_location.text, "html.parser")
            locations_divs = location_soup.find_all(
                "div", {"class": "Teaser-innerWrapper"}
            )

            # Multiple stores in locations, parse the stores per location
            for location_div in locations_divs:
                try:
                    location_a = location_div.find("a")
                    location_url = location_a.attrs["href"].replace("../..", url_root)
                    r_store = scraper.get(location_url)
                    store_soup = BeautifulSoup(r_store.text, "html.parser")
                    tbody = store_soup.find("tbody")
                    schedule = list(
                        set(
                            [
                                td.text
                                for td in tbody.find_all(
                                    "td", {"class": "c-hours-details-row-intervals"}
                                )
                            ]
                        )
                    )
                    if len(schedule) == 1 and schedule[0] == "Closed":
                        print(location_url)
                        continue
                    point = get_coords(store_soup)
                    store_dict = {
                        "STATE": state_code,
                        "geometry": point,
                    }
                    store_list.append(store_dict)
                    state_store_list.append(store_dict)
                except Exception as e:
                    print(e, location_url)

    if state_store_list:
        subway_state_gdf = gpd.GeoDataFrame(state_store_list, crs=4326)
        subway_state_gdf.to_file(f"data/states/{state_code}.gpkg")

    time.sleep(random.uniform(0.01, 0.25))

Parsing States:   0%|          | 0/12 [00:00<?, ?it/s]

Parsing Locations In ab:   0%|          | 0/119 [00:00<?, ?it/s]

https://restaurants.subway.com/canada/ab/calgary/5125-126th-avenue-se
https://restaurants.subway.com/canada/ab/edmonton/2-10507-104-ave-nw
https://restaurants.subway.com/canada/ab/edmonton/10551-kingsway-avenue
https://restaurants.subway.com/canada/ab/edmonton/2020-101-street-sw
https://restaurants.subway.com/canada/ab/edmonton/302-1100-westmount-shop-centre
https://restaurants.subway.com/canada/ab/jasper/626-a-connaught-drive
https://restaurants.subway.com/canada/ab/lethbridge/4401-university-drive-w
https://restaurants.subway.com/canada/ab/swan-hills/4619-federated-road


Parsing Locations In bc:   0%|          | 0/113 [00:00<?, ?it/s]

https://restaurants.subway.com/canada/bc/burnaby/3294-production-way
https://restaurants.subway.com/canada/bc/burnaby/6569-kingsway
https://restaurants.subway.com/canada/bc/burnaby/7215-canada-way
https://restaurants.subway.com/canada/bc/burnaby/8888-university-dr
https://restaurants.subway.com/canada/bc/coquitlam/602-clarke-rd
https://restaurants.subway.com/canada/bc/kimberley/460-ross-st
https://restaurants.subway.com/canada/bc/north-vancouver/2916-lonsdale-ave
https://restaurants.subway.com/canada/bc/parksville/826-west-island-hwy
https://restaurants.subway.com/canada/bc/richmond/unit-105-3675-westminster-hwy
https://restaurants.subway.com/canada/bc/surrey/14313-cresent-rd
https://restaurants.subway.com/canada/bc/vancouver/110-510-w-hastings-st
https://restaurants.subway.com/canada/bc/vancouver/1368-w-georgia-st
https://restaurants.subway.com/canada/bc/vancouver/4665-arbutus-street
https://restaurants.subway.com/canada/bc/vancouver/505-burrard-st
https://restaurants.subway.com/canad

Parsing Locations In mb:   0%|          | 0/46 [00:00<?, ?it/s]

https://restaurants.subway.com/canada/mb/winnipeg/393-portage-ave
https://restaurants.subway.com/canada/mb/winnipeg/747-corydon-ave
https://restaurants.subway.com/canada/mb/winnipeg/can-west-globel-park


Parsing Locations In nb:   0%|          | 0/41 [00:00<?, ?it/s]

https://restaurants.subway.com/canada/nb/bathurst/310-vanier-blvd
https://restaurants.subway.com/canada/nb/miramichi/186-king-st
https://restaurants.subway.com/canada/nb/moncton/2731-mountain-road


Parsing Locations In nl:   0%|          | 0/27 [00:00<?, ?it/s]

https://restaurants.subway.com/canada/nl/st-johns/430-top-sail-rd
https://restaurants.subway.com/canada/nl/st-anthony/46-48-west-street


Parsing Locations In nt:   0%|          | 0/1 [00:00<?, ?it/s]

Parsing Locations In ns:   0%|          | 0/46 [00:00<?, ?it/s]

https://restaurants.subway.com/canada/ns/dartmouth/100-ilsley-ave
https://restaurants.subway.com/canada/ns/eastern-passage/71-cow-bay-rd
https://restaurants.subway.com/canada/ns/eskasoni/4716-shore-road
https://restaurants.subway.com/canada/ns/sydney/1102-kings-road


Parsing Locations In on:   0%|          | 0/291 [00:00<?, ?it/s]

https://restaurants.subway.com/canada/on/arnprior/100-madawaska-blvd
https://restaurants.subway.com/canada/on/brampton/2084-steeles-ave-e
https://restaurants.subway.com/canada/on/brampton/490-bramalea-road
https://restaurants.subway.com/canada/on/caledon/3005-charleston-side-road
https://restaurants.subway.com/canada/on/cornwall/33-9th-street-east
https://restaurants.subway.com/canada/on/etobicoke/300-browns-line
https://restaurants.subway.com/canada/on/etobicoke/500-rexdale-blvd
https://restaurants.subway.com/canada/on/glouester/1616-cyrville-road
https://restaurants.subway.com/canada/on/guelph/765-woolwich-st
https://restaurants.subway.com/canada/on/hamilton/114-york-blvd
https://restaurants.subway.com/canada/on/hamilton/135-fennell-ave-w
https://restaurants.subway.com/canada/on/hampton/2363-taunton-road-east
https://restaurants.subway.com/canada/on/kaladar/10201-highway-41
https://restaurants.subway.com/canada/on/kemptville/2600-county-road-43
https://restaurants.subway.com/canada/o

Parsing Locations In pe:   0%|          | 0/8 [00:00<?, ?it/s]

Parsing Locations In qc:   0%|          | 0/269 [00:00<?, ?it/s]

https://restaurants.subway.com/canada/qc/alma/65-st-joseph
https://restaurants.subway.com/canada/qc/bedford/77-principal-st
https://restaurants.subway.com/canada/qc/boisbriand/4-boul-des-entreprises
https://restaurants.subway.com/canada/qc/gatineau/120-blvd-de-l'hopital
https://restaurants.subway.com/canada/qc/gatineau/177-boul-st-joseph
https://restaurants.subway.com/canada/qc/gatineau/2335-rue-saint-louis
https://restaurants.subway.com/canada/qc/gatineau/320-st-joseph-blvd
https://restaurants.subway.com/canada/qc/hull/40-promenade-du-portage
https://restaurants.subway.com/canada/qc/kirkland/2899-boul-st-charles
https://restaurants.subway.com/canada/qc/lasalle/7814-boul-champlain
https://restaurants.subway.com/canada/qc/laval/1120-le-corbusier-boulevard
https://restaurants.subway.com/canada/qc/levis/1655-blvd-alphonse-desjardins
https://restaurants.subway.com/canada/qc/longueuil/3137-boul-taschereau
https://restaurants.subway.com/canada/qc/mont-royal/2305-chemin-rockland
https://resta

Parsing Locations In sk:   0%|          | 0/71 [00:00<?, ?it/s]

Parsing Locations In yt:   0%|          | 0/1 [00:00<?, ?it/s]

In [56]:
subway_gdfs = []
states_path = "data/states"
for file in os.listdir("data/states"):
    constructed_path = os.path.join(states_path, file)
    if constructed_path.endswith(".gpkg"):
        subway_gdf = gpd.read_file(constructed_path)
        subway_gdfs.append(subway_gdf)

In [57]:
subway_gdf = gpd.GeoDataFrame(pd.concat(subway_gdfs, ignore_index=True))
subway_gdf = subway_gdf.to_crs(9311)
subway_gdf.to_file(f"data/stores.gpkg")

In [95]:
subways_by_state_df = pd.DataFrame(
    {
        "STUSPS": subway_by_state_series.index,
        "COUNT": subway_by_state_series.values,
    }
)

## Merge Data

In [97]:
province_ref_dict = {
    "Newfoundland and Labrador": "nl",
    "Prince Edward Island": "pe",
    "Nova Scotia": "ns",
    "New Brunswick": "nb",
    "Quebec": "qc",
    "Ontario": "on",
    "Manitoba": "mb",
    "Saskatchewan": "sk",
    "Alberta": "ab",
    "British Columbia": "bc",
    "Yukon": "yt",
    "Northwest Territories": "nt",
    "Nunavut": "nu",
}

In [102]:
ref_df = pd.DataFrame(province_ref_dict.items(), columns=["Geography", "STUSPS"])

In [107]:
subway_population_gdf = states_population_gdf.merge(
    subways_by_state_df.merge(ref_df, on="STUSPS", how="inner"),
    on="Geography",
    how="inner",
)

In [18]:
subway_population_gdf["per_100k"] = (
    (subway_population_gdf["COUNT"] / (subway_population_gdf["POPULATION"] / 100000))
    .round(decimals=0)
    .astype(int)
)

subway_population_gdf["per_1m"] = (
    (subway_population_gdf["COUNT"] / (subway_population_gdf["POPULATION"] / 1_000_000))
    .round(decimals=0)
    .astype(int)
)

In [19]:
subways_by_states_gdf = subways_by_states_gdf.dropna()

In [21]:
subways_by_states_gdf = subways_by_states_gdf.to_crs(9311)
subways_by_states_gdf.to_file(f"data/stores_by_states.gpkg")