In [36]:
import json
import os
import random
import re
import time

In [3]:
from bs4 import BeautifulSoup
import cloudscraper
import geopandas as gpd
import pandas as pd

In [4]:
from shapely.geometry import Point
from tqdm.notebook import tqdm

## Get State Data

In [5]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

In [6]:
states_df = states_df[["STUSPS", "NAME", "geometry"]]

## Get Population data

In [7]:
state_populations = pd.read_excel(
    "data/NST-EST2024-POP.xlsx", sheet_name=None, engine="openpyxl"
)

In [8]:
state_populations_df = state_populations["NST-EST2024-POP"][
    [
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)",
        "Unnamed: 5",
    ]
]
state_populations_df = state_populations_df.rename(
    columns={
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)": "NAME",
        "Unnamed: 5": "POPULATION",
    }
)
state_populations_df["NAME"] = state_populations_df["NAME"].str[1:]

In [9]:
states_with_population_df = states_df.merge(state_populations_df, on="NAME", how="left")
states_with_population_df = states_with_population_df[
    ["STUSPS", "NAME", "POPULATION", "geometry"]
]

## Get Cold Store Data

In [60]:
scraper = cloudscraper.create_scraper()

In [61]:
url = "https://www.coldstonecreamery.com/locator/"

In [62]:
r = scraper.get(url)

In [63]:
soup = BeautifulSoup(r.text, "html.parser")
div = soup.find("div", {"id": "main"})
script_text = div.find("script").text

In [64]:
pattern_coords = r'"Latitude":([-+]?\d+\.\d+),"Longitude":([-+]?\d+\.\d+)'
matches = re.findall(pattern, script_text)
points = [Point(float(match[1]), float(match[0])) for match in matches]
pattern_state = r'"State":"(.*?)"'
matches_state = re.findall(pattern_state, script_text)

In [65]:
cold_stone_gdf = gpd.GeoDataFrame(
    {"geometry": points, "STUSPS": matches_state}, crs=4326
)

In [66]:
cold_stone_gdf = cold_stone_gdf.to_crs(9311)
cold_stone_gdf.to_file("data/cold_stone.gpkg")

In [67]:
cold_stone_counts_gdf = pd.DataFrame(
    cold_stone_gdf.groupby("STUSPS").size(), columns=["COLD_STONE"]
)
cold_stone_counts_gdf = cold_stone_counts_gdf.reset_index()

## Combine With States

In [68]:
cold_stone_per_state_gdf = states_with_population_df.merge(
    cold_stone_counts_gdf, on="STUSPS", how="left"
)

In [70]:
cold_stone_per_state_gdf = cold_stone_per_state_gdf.fillna(0)
cold_stone_per_state_gdf["COLD_STONE"] = cold_stone_per_state_gdf["COLD_STONE"].astype(
    int
)

In [71]:
cold_stone_per_state_gdf["per_1000"] = cold_stone_per_state_gdf["COLD_STONE"] / (
    cold_stone_per_state_gdf["POPULATION"] / 1000
)
cold_stone_per_state_gdf["per_10k"] = cold_stone_per_state_gdf["COLD_STONE"] / (
    cold_stone_per_state_gdf["POPULATION"] / 10_000
)
cold_stone_per_state_gdf["per_100k"] = cold_stone_per_state_gdf["COLD_STONE"] / (
    cold_stone_per_state_gdf["POPULATION"] / 100000
)
cold_stone_per_state_gdf["per_500k"] = cold_stone_per_state_gdf["COLD_STONE"] / (
    cold_stone_per_state_gdf["POPULATION"] / 500_000
)
cold_stone_per_state_gdf["per_1m"] = cold_stone_per_state_gdf["COLD_STONE"] / (
    cold_stone_per_state_gdf["POPULATION"] / 1_000_000
)

In [72]:
cold_stone_per_state_gdf = cold_stone_per_state_gdf.dropna()

In [73]:
cold_stone_per_state_gdf = cold_stone_per_state_gdf.to_crs(9311)
cold_stone_per_state_gdf.to_file("data/cold_stones_per_state.gpkg")