In [1]:
import re
import json
import requests

In [2]:
import geopandas as gpd
import pandas as pd

In [3]:
from bs4 import BeautifulSoup
from shapely.geometry import Point
from tqdm.notebook import tqdm

## Get State Data

In [4]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

In [5]:
states_df = states_df[["STUSPS", "NAME", "geometry"]]

## Get Population data

In [6]:
state_populations = pd.read_excel(
    "data/NST-EST2024-POP.xlsx", sheet_name=None, engine="openpyxl"
)

In [7]:
state_populations_df = state_populations["NST-EST2024-POP"][
    [
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)",
        "Unnamed: 5",
    ]
]
state_populations_df = state_populations_df.rename(
    columns={
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)": "NAME",
        "Unnamed: 5": "POPULATION",
    }
)
state_populations_df["NAME"] = state_populations_df["NAME"].str[1:]

In [8]:
states_with_population_df = states_df.merge(state_populations_df, on="NAME", how="left")
states_with_population_df = states_with_population_df[
    ["STUSPS", "NAME", "POPULATION", "geometry"]
]

## Get Store Data

In [9]:
def get_urls(url: str) -> list:
    r = requests.get(url)
    soup = BeautifulSoup(r.text, "html.parser")
    divs = soup.find_all("div", {"class": "map-list-item is-single"})
    return [div.find("a").attrs["href"] for div in divs]

In [11]:
def get_coords(url: str) -> Point:
    r = requests.get(url)
    soup = BeautifulSoup(r.text, "html.parser")
    gmap = soup.find("div", {"id": "gmap"})
    script = gmap.find("script", {"type": "text/javascript"})
    match = re.search(r"=\s*({.*});", script.text, flags=re.S)
    if match:
        js_text = match.group(1)
        lat_match = re.search(r'"centerLat"\s*:\s*([0-9\.\-]+)', js_text)
        lng_match = re.search(r'"centerLng"\s*:\s*([0-9\.\-]+)', js_text)
        if lat_match and lng_match:
            lat = float(lat_match.group(1))
            lng = float(lng_match.group(1))
        else:
            print(url)
    return Point(lng, lat)

In [12]:
url = "https://stores.spirithalloween.com/christmas/"
state_urls = get_urls(url)

In [13]:
store_urls_usa = []
for state_url in tqdm(state_urls, desc="Parsing States"):
    store_urls = get_urls(state_url)
    store_urls_usa.extend(store_urls)

Parsing States:   0%|          | 0/13 [00:00<?, ?it/s]

In [14]:
store_dicts = []
for store_url in tqdm(store_urls_usa, desc="Parsing Stores"):
    point = get_coords(store_url)
    STUSPS = store_url.split("/")[-3].upper()
    store_dict = {"STUSPS": STUSPS, "url": store_url, "geometry": point}
    store_dicts.append(store_dict)

Parsing Stores:   0%|          | 0/30 [00:00<?, ?it/s]

In [28]:
store_gdf = gpd.GeoDataFrame(store_dicts, crs=4326)
store_gdf = store_gdf.to_crs(26917)
store_gdf.to_file("data/stores.gpkg")

In [29]:
store_counts_df = store_gdf.groupby("STUSPS").size().reset_index(name="COUNT")

## Combine With States

In [30]:
store_count_gdf = states_with_population_df.merge(
    store_counts_df, on="STUSPS", how="left"
)

In [31]:
store_count_gdf["per_100k"] = (
    store_count_gdf["COUNT"] / (store_count_gdf["POPULATION"] / 100_000)
).round(decimals=2)
store_count_gdf["per_1m"] = (
    store_count_gdf["COUNT"] / (store_count_gdf["POPULATION"] / 1_000_000)
).round(decimals=1)

In [34]:
store_count_gdf = store_count_gdf.fillna(0)

In [35]:
store_count_gdf = store_count_gdf.to_crs(26917)
store_count_gdf.to_file(f"data/Spirit_Christmas_Locations_Per_State.gpkg")

In [36]:
store_count_gdf.sort_values("per_1m", ascending=False)

Unnamed: 0,STUSPS,NAME,POPULATION,geometry,COUNT,per_100k,per_1m
43,DE,Delaware,1036423.0,"MULTIPOLYGON (((967283.744 4388036.33, 967572....",1.0,0.1,1.0
34,NH,New Hampshire,1402199.0,"MULTIPOLYGON (((1346926.116 4810894.355, 13470...",1.0,0.07,0.7
39,NJ,New Jersey,9379642.0,"POLYGON ((967068.404 4400755.516, 967033.756 4...",6.0,0.06,0.6
55,CT,Connecticut,3643023.0,"MULTIPOLYGON (((1190642.294 4598514.422, 11907...",2.0,0.05,0.5
7,MA,Massachusetts,7066568.0,"MULTIPOLYGON (((1402184.105 4626749.517, 14031...",2.0,0.03,0.3
19,PA,Pennsylvania,13017721.0,"POLYGON ((540434.878 4528506.569, 540493.467 4...",4.0,0.03,0.3
53,IN,Indiana,6880131.0,"POLYGON ((-124368.376 4218994.073, -124062.676...",2.0,0.03,0.3
6,MI,Michigan,10083356.0,"MULTIPOLYGON (((318593.539 4656026.917, 318733...",2.0,0.02,0.2
29,IL,Illinois,12642259.0,"POLYGON ((-395840.263 4501208.149, -395574.947...",3.0,0.02,0.2
25,NY,New York,19737367.0,"MULTIPOLYGON (((1251358.969 4605412.599, 12514...",3.0,0.02,0.2
