In [1]:
import json
import requests

In [2]:
import geopandas as gpd
import pandas as pd

In [3]:
from bs4 import BeautifulSoup
from shapely.geometry import Point
from tqdm.notebook import tqdm

## Get State Data

In [4]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

In [5]:
states_df = states_df[["STUSPS", "NAME", "geometry"]]

## Get Population data

In [6]:
state_populations = pd.read_excel(
    "data/NST-EST2024-POP.xlsx", sheet_name=None, engine="openpyxl"
)

In [7]:
state_populations_df = state_populations["NST-EST2024-POP"][
    [
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)",
        "Unnamed: 5",
    ]
]
state_populations_df = state_populations_df.rename(
    columns={
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)": "NAME",
        "Unnamed: 5": "POPULATION",
    }
)
state_populations_df["NAME"] = state_populations_df["NAME"].str[1:]

In [8]:
states_with_population_df = states_df.merge(state_populations_df, on="NAME", how="left")
states_with_population_df = states_with_population_df[
    ["STUSPS", "NAME", "POPULATION", "geometry"]
]

## Get Store Data

In [9]:
url = "https://maps.spirithalloween.com/api/getAsyncLocations"
params = {
    "template": "search",
    "level": "search",
    "radius": 5000,
    "lat": 39.0997,
    "lng": -94.5786,
    "limit": 2000,
}

In [10]:
r = requests.get(url, params=params)

In [11]:
data = r.json()
locations = data["markers"]

In [12]:
store_dicts = []
for location in locations:
    soup = BeautifulSoup(location["info"], "html.parser")
    store_url = soup.find("a", {"class": "store-info"}).attrs["href"]

    address = soup.find("div", {"class": "address"})
    text = address.find_all("div")[-2].text
    stusps = text.split(" ")[-2]
    if len(stusps) > 2:
        stusps = text.split(" ")[-3]

    point = Point(location["lng"], location["lat"])
    store_dict = {"url": store_url, "geometry": point, "STUSPS": stusps}
    store_dicts.append(store_dict)

In [13]:
store_gdf = gpd.GeoDataFrame(store_dicts, crs=4326)
store_gdf.to_file("data/stores.gpkg")

In [39]:
store_counts_df = store_gdf.groupby("STUSPS").size().reset_index(name="COUNT")

## Combine With States

In [41]:
store_count_gdf = states_with_population_df.merge(
    store_counts_df, on="STUSPS", how="left"
)

In [42]:
store_count_gdf["per_100k"] = (
    store_count_gdf["COUNT"] / (store_count_gdf["POPULATION"] / 100_000)
).round(decimals=2)
store_count_gdf["per_1m"] = (
    store_count_gdf["COUNT"] / (store_count_gdf["POPULATION"] / 1_000_000)
).round(decimals=2)

In [43]:
store_count_gdf = store_count_gdf.to_crs(9311)
store_count_gdf.to_file(f"data/Spirit_Halloween_Locations_Per_State.gpkg")

In [44]:
store_count_gdf.sort_values("per_1m", ascending=False)

Unnamed: 0,STUSPS,NAME,POPULATION,geometry,COUNT,per_100k,per_1m
34,NH,New Hampshire,1402199.0,"MULTIPOLYGON (((2326343.717 202530.281, 232642...",12.0,0.86,8.56
2,OK,Oklahoma,4063882.0,"POLYGON ((-268912.756 -936547.904, -268600.891...",29.0,0.71,7.14
24,WY,Wyoming,585067.0,"POLYGON ((-865730.986 59352.206, -864993.905 5...",4.0,0.68,6.84
43,DE,Delaware,1036423.0,"MULTIPOLYGON (((2060773.554 -301785.14, 206090...",7.0,0.68,6.75
21,CO,Colorado,5901339.0,"POLYGON ((-786602.67 -668516.216, -785169.939 ...",38.0,0.64,6.44
35,AZ,Arizona,7473027.0,"POLYGON ((-1389045.258 -1265605.408, -1388845....",48.0,0.64,6.42
22,UT,Utah,3443222.0,"POLYGON ((-1233486.447 -719056.636, -1233235.8...",22.0,0.64,6.39
50,OR,Oregon,4253653.0,"MULTIPOLYGON (((-1781988.816 403834.726, -1781...",25.0,0.59,5.88
19,PA,Pennsylvania,13017721.0,"POLYGON ((1619177.315 -262060.235, 1618879.478...",75.0,0.58,5.76
11,WA,Washington,7857320.0,"MULTIPOLYGON (((-1634509.823 627551.89, -16343...",45.0,0.57,5.73
