In [1]:
from xml.etree import ElementTree

In [2]:
import cloudscraper
import geopandas as gpd
import pandas as pd
from shapely.geometry import Point

## Get State Data

In [23]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

In [24]:
states_df = states_df[["STUSPS", "NAME", "geometry"]]

## Get Winchells Donuts

In [25]:
scraper = cloudscraper.create_scraper()

In [26]:
r = scraper.get("https://winchells.com/maps_xml")

In [27]:
tree = ElementTree.fromstring(r.content)

In [28]:
store_dicts = []
for child in tree:
    try:
        address = child.attrib["Address"]
        state_code = address.split(",")[2].strip().split(" ")[0]
        if state_code == "Guam" or address == "143 4, Hagåtña, 96910, Guam":
            state_code = "GU"
    except Exception:
        if address == "603 E University Dr. Ste D Carson, CA":
            state_code = "CA"
        else:
            print(child.attrib["Address"])

    point = Point(child.attrib["Xcoord"], child.attrib["Ycoord"])
    store_dicts.append({"geometry": point, "STUSPS": state_code, "address": address})

In [29]:
winchells_gdf = gpd.GeoDataFrame(store_dicts, crs=4326)

In [30]:
winchells_gdf = winchells_gdf.to_crs(9311)
winchells_gdf.to_file(f"data/locations.gpkg")

## Get Population Data

In [39]:
state_populations = pd.read_excel(
    "data/NST-EST2024-POP.xlsx", sheet_name=None, engine="openpyxl"
)

In [40]:
state_populations_df = state_populations["NST-EST2024-POP"][
    [
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)",
        "Unnamed: 5",
    ]
]
state_populations_df = state_populations_df.rename(
    columns={
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)": "NAME",
        "Unnamed: 5": "POPULATION",
    }
)
state_populations_df["NAME"] = state_populations_df["NAME"].str[1:]

#### Manually Add Guam

In [43]:
state_populations_df.loc[-1] = ["Guam", 167777]
state_populations_df.index = state_populations_df.index + 1
state_populations_df = state_populations_df.sort_index()

In [45]:
states_with_population_df = states_df.merge(state_populations_df, on="NAME", how="left")
states_with_population_df = states_with_population_df[
    ["STUSPS", "NAME", "POPULATION", "geometry"]
]

In [46]:
states_with_population_df = states_with_population_df.dropna()

## Merge Data

In [53]:
groupby_df = winchells_gdf.groupby("STUSPS").size()
winchells_counts_df = pd.DataFrame(groupby_df).rename(columns={0: "Winchells"})

In [54]:
winchells_counts_gdf = states_with_population_df.merge(
    winchells_counts_df, on="STUSPS", how="left"
)
winchells_counts_gdf = winchells_counts_gdf.fillna(0)

In [57]:
winchells_counts_gdf["Per_100k"] = winchells_counts_gdf["Winchells"] / (
    winchells_counts_gdf["POPULATION"] / 100_000
)
winchells_counts_gdf["Per_1m"] = winchells_counts_gdf["Winchells"] / (
    winchells_counts_gdf["POPULATION"] / 1_000_000
)
winchells_counts_gdf["Winchells"] = winchells_counts_gdf["Winchells"].astype(int)

In [58]:
winchells_counts_gdf = winchells_counts_gdf.to_crs(9311)

In [59]:
winchells_counts_gdf.to_file("data/Winchells.gpkg")