In [35]:
from collections import Counter

In [11]:
import cloudscraper
import geopandas as gpd
import pandas as pd

## Get State Data

In [13]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

In [14]:
states_df = states_df[["STUSPS", "NAME", "geometry"]]

## Get Population data

In [15]:
state_populations = pd.read_excel(
    "data/NST-EST2024-POP.xlsx", sheet_name=None, engine="openpyxl"
)

In [16]:
state_populations_df = state_populations["NST-EST2024-POP"][
    [
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)",
        "Unnamed: 5",
    ]
]
state_populations_df = state_populations_df.rename(
    columns={
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)": "NAME",
        "Unnamed: 5": "POPULATION",
    }
)
state_populations_df["NAME"] = state_populations_df["NAME"].str[1:]

In [17]:
states_with_population_df = states_df.merge(state_populations_df, on="NAME", how="left")
states_with_population_df = states_with_population_df[
    ["STUSPS", "NAME", "POPULATION", "geometry"]
]

## Get Sams Club Data

In [51]:
scraper = cloudscraper.create_scraper()

In [52]:
url_root = "https://www.samsclub.com/api/node/vivaldi/browse/v2/clubfinder/search?isActive=true"
r = scraper.get(url_root)

In [None]:
stores_json = r.json()

In [54]:
state_sorted_list = sorted(stores_json, key=lambda x: x["address"]["state"])
states_by_stores = []
for store_json in state_sorted_list:
    states_by_stores.append(store_json["address"]["state"])

In [55]:
sams_club_state_counts_df = pd.DataFrame.from_dict(
    Counter(states_by_stores), orient="index"
).reset_index()
sams_club_state_counts_df = sams_club_state_counts_df.rename(
    columns={"index": "STUSPS", 0: "SAMS_CLUBS"}
)

## Combine With States

In [57]:
sam_club_state_counts_gdf = states_with_population_df.merge(
    sams_club_state_counts_df, on="STUSPS", how="left"
)

In [58]:
sam_club_state_counts_gdf = sam_club_state_counts_gdf.fillna(0)
sam_club_state_counts_gdf["BJS"] = sam_club_state_counts_gdf["SAMS_CLUBS"].astype(int)

In [59]:
sam_club_state_counts_gdf["per_1000"] = sam_club_state_counts_gdf["SAMS_CLUBS"] / (
    sam_club_state_counts_gdf["POPULATION"] / 1000
)
sam_club_state_counts_gdf["per_10k"] = sam_club_state_counts_gdf["SAMS_CLUBS"] / (
    sam_club_state_counts_gdf["POPULATION"] / 10_000
)
sam_club_state_counts_gdf["per_100k"] = sam_club_state_counts_gdf["SAMS_CLUBS"] / (
    sam_club_state_counts_gdf["POPULATION"] / 100000
)
sam_club_state_counts_gdf["per_500k"] = sam_club_state_counts_gdf["SAMS_CLUBS"] / (
    sam_club_state_counts_gdf["POPULATION"] / 500_000
)
sam_club_state_counts_gdf["per_1m"] = sam_club_state_counts_gdf["SAMS_CLUBS"] / (
    sam_club_state_counts_gdf["POPULATION"] / 1_000_000
)

In [60]:
sam_club_state_counts_gdf = sam_club_state_counts_gdf.fillna(0)

In [61]:
sam_club_state_counts_gdf = sam_club_state_counts_gdf.to_crs(3968)
sam_club_state_counts_gdf.to_file(f"data/sams_clubs_per_states.gpkg")