In [6]:
import json
import requests

In [7]:
import geopandas as gpd
import pandas as pd

In [8]:
from bs4 import BeautifulSoup
from shapely.geometry import Point
from tqdm.notebook import tqdm

## Get State Data

In [54]:
file_path = "data/lecu000e21a_e/lpr_000e21a_e.shp"
states_df = gpd.read_file(file_path)

In [55]:
canada_provinces = {
    "Alberta": "AB",
    "British Columbia": "BC",
    "Manitoba": "MB",
    "New Brunswick": "NB",
    "Newfoundland and Labrador": "NL",
    "Northwest Territories": "NT",
    "Nova Scotia": "NS",
    "Nunavut": "NU",
    "Ontario": "ON",
    "Prince Edward Island": "PE",
    "Quebec": "QC",
    "Saskatchewan": "SK",
    "Yukon": "YT",
}

In [56]:
states_df["STUSPS"] = states_df["PRENAME"].map(canada_provinces)

In [57]:
states_df = states_df.rename(columns={"PRENAME": "Geography"})[
    ["geometry", "Geography", "STUSPS"]
]

## Get Population data

In [58]:
state_populations = pd.read_csv("data/1710000901-noSymbol.csv")

In [59]:
states_population_gdf = states_df.merge(state_populations, on="Geography", how="left")
states_population_gdf = states_population_gdf.rename(columns={"Q1 2025": "POPULATION"})
states_population_gdf["POPULATION"] = (
    states_population_gdf["POPULATION"].str.replace(",", "").astype(int)
)

## Get Store Data

In [9]:
url = "https://maps.spirithalloween.com/api/getAsyncLocations"
params = {
    "template": "search",
    "level": "search",
    "radius": 5000,
    "lat": 39.0997,
    "lng": -94.5786,
    "limit": 2000,
}

In [10]:
r = requests.get(url, params=params)

In [11]:
data = r.json()
locations = data["markers"]

In [12]:
store_dicts = []
for location in locations:
    soup = BeautifulSoup(location["info"], "html.parser")
    store_url = soup.find("a", {"class": "store-info"}).attrs["href"]

    address = soup.find("div", {"class": "address"})
    text = address.find_all("div")[-2].text
    stusps = text.split(" ")[-2]
    if len(stusps) > 2:
        stusps = text.split(" ")[-3]

    point = Point(location["lng"], location["lat"])
    store_dict = {"url": store_url, "geometry": point, "STUSPS": stusps}
    store_dicts.append(store_dict)

In [13]:
store_gdf = gpd.GeoDataFrame(store_dicts, crs=4326)
store_gdf.to_file("data/stores.gpkg")

## If you have the data from a previous project

In [30]:
store_gdf = gpd.read_file("data/stores.gpkg")

In [31]:
store_counts_df = store_gdf.groupby("STUSPS").size().reset_index(name="COUNT")

In [38]:
states_population_gdf

Unnamed: 0,Geography,geometry,POPULATION
0,Newfoundland and Labrador,"MULTIPOLYGON (((8514140.177 2242113.643, 85141...",545579
1,Prince Edward Island,"MULTIPOLYGON (((8361759.451 1609870.034, 83617...",179280
2,Nova Scotia,"MULTIPOLYGON (((8566602.126 1595861.269, 85665...",1079627
3,New Brunswick,"MULTIPOLYGON (((8114947.354 1376014.071, 81149...",858963
4,Quebec,"MULTIPOLYGON (((7632374.02 1242855.226, 763236...",9111629
5,Ontario,"MULTIPOLYGON (((7574268.543 1177482.237, 75742...",16182641
6,Manitoba,"POLYGON ((6039436.26 2637017.429, 6039446.631 ...",1504023
7,Saskatchewan,"POLYGON ((5248633.914 2767057.263, 5249285.64 ...",1250909
8,Alberta,"POLYGON ((5228304.177 2767597.891, 5228098.463...",4960097
9,British Columbia,"MULTIPOLYGON (((3825396.686 2287453.014, 38253...",5722318


## Combine With States

In [60]:
store_count_gdf = states_population_gdf.merge(store_counts_df, on="STUSPS", how="left")

In [61]:
store_count_gdf["per_100k"] = (
    store_count_gdf["COUNT"] / (store_count_gdf["POPULATION"] / 100_000)
).round(decimals=2)
store_count_gdf["per_1m"] = (
    store_count_gdf["COUNT"] / (store_count_gdf["POPULATION"] / 1_000_000)
).round(decimals=2)

In [62]:
store_count_gdf = store_count_gdf.to_crs(3348)
store_count_gdf.to_file(f"data/Spirit_Halloween_Locations_Per_Canadian_Province.gpkg")

In [63]:
store_count_gdf.sort_values("per_1m", ascending=False)

Unnamed: 0,geometry,Geography,STUSPS,POPULATION,COUNT,per_100k,per_1m
1,"MULTIPOLYGON (((8361759.451 1609870.034, 83617...",Prince Edward Island,PE,179280,1.0,0.56,5.58
0,"MULTIPOLYGON (((8514140.177 2242113.643, 85141...",Newfoundland and Labrador,NL,545579,2.0,0.37,3.67
3,"MULTIPOLYGON (((8114947.354 1376014.071, 81149...",New Brunswick,NB,858963,3.0,0.35,3.49
6,"POLYGON ((6039436.26 2637017.429, 6039446.631 ...",Manitoba,MB,1504023,4.0,0.27,2.66
5,"MULTIPOLYGON (((7574268.543 1177482.237, 75742...",Ontario,ON,16182641,39.0,0.24,2.41
7,"POLYGON ((5248633.914 2767057.263, 5249285.64 ...",Saskatchewan,SK,1250909,3.0,0.24,2.4
9,"MULTIPOLYGON (((3825396.686 2287453.014, 38253...",British Columbia,BC,5722318,13.0,0.23,2.27
8,"POLYGON ((5228304.177 2767597.891, 5228098.463...",Alberta,AB,4960097,11.0,0.22,2.22
2,"MULTIPOLYGON (((8566602.126 1595861.269, 85665...",Nova Scotia,NS,1079627,2.0,0.19,1.85
4,"MULTIPOLYGON (((7632374.02 1242855.226, 763236...",Quebec,QC,9111629,8.0,0.09,0.88


In [69]:
canadian_stores_gdf = store_gdf[
    store_gdf["STUSPS"].isin(store_count_gdf["STUSPS"].unique())
].reset_index(drop=True)
canadian_stores_gdf = canadian_stores_gdf.to_crs(3348)
canadian_stores_gdf.to_file("data/canadian_stores.gpkg")