In [1]:
import json
import os

In [2]:
from bs4 import BeautifulSoup
import cloudscraper
import geopandas as gpd
import pandas as pd

In [3]:
from shapely.geometry import Point
from tqdm.notebook import tqdm

## Get State Data

In [4]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

In [5]:
states_df = states_df[["STUSPS", "NAME", "geometry"]]

## Get Population data

In [6]:
state_populations = pd.read_excel(
    "data/NST-EST2024-POP.xlsx", sheet_name=None, engine="openpyxl"
)

In [7]:
state_populations_df = state_populations["NST-EST2024-POP"][
    [
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)",
        "Unnamed: 5",
    ]
]
state_populations_df = state_populations_df.rename(
    columns={
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)": "NAME",
        "Unnamed: 5": "POPULATION",
    }
)
state_populations_df["NAME"] = state_populations_df["NAME"].str[1:]

In [8]:
states_with_population_df = states_df.merge(state_populations_df, on="NAME", how="left")
states_with_population_df = states_with_population_df[
    ["STUSPS", "NAME", "POPULATION", "geometry"]
]

## Get Costco Data

In [9]:
scraper = cloudscraper.create_scraper()

In [10]:
url = "https://www.costco.com/WarehouseListByStateDisplayView"
r = scraper.get(url)

In [11]:
soup = BeautifulSoup(r.text, "html.parser")

In [14]:
stores_list = json.loads(
    soup.find_all("script", {"type": "text/javascript"})[11].text[30:-6]
)

In [61]:
store_dict = {"NAME": [], "STUSPS": [], "geometry": []}
for stores_in_state in tqdm(stores_list, desc="Parsing States"):
    for store in stores_in_state["warehouseList"]:
        store_dict["NAME"].append(stores_in_state["state"])
        store_dict["STUSPS"].append(stores_in_state["stateCode"])
        store_dict["geometry"].append(
            Point(float(store["longitude"]), float(store["latitude"]))
        )

Parsing States:   0%|          | 0/52 [00:00<?, ?it/s]

#### If the above cells breaks just keep running until complete. Then run the line below

In [62]:
costco_gdf = gpd.GeoDataFrame(store_dict, crs=4326)

In [63]:
costco_gdf = costco_gdf.to_crs(9311)
costco_gdf.to_file(f"data/costco_stores.gpkg")

## Combine With States

In [75]:
costco_state_counts_series = costco_gdf.groupby("STUSPS").size()
costco_state_counts_df = pd.DataFrame(
    {
        "STUSPS": costco_state_counts_series.index,
        "stores": costco_state_counts_series.values,
    }
)

In [76]:
costco_state_counts_gdf = states_with_population_df.merge(
    costco_state_counts_df, on="STUSPS", how="left"
)

In [77]:
costco_state_counts_gdf = costco_state_counts_gdf.fillna(0)
costco_state_counts_gdf["stores"] = costco_state_counts_gdf["stores"].astype(int)

In [78]:
costco_state_counts_gdf["per_1000"] = costco_state_counts_gdf["stores"] / (
    costco_state_counts_gdf["POPULATION"] / 1000
)
costco_state_counts_gdf["per_10k"] = costco_state_counts_gdf["stores"] / (
    costco_state_counts_gdf["POPULATION"] / 10_000
)
costco_state_counts_gdf["per_100k"] = costco_state_counts_gdf["stores"] / (
    costco_state_counts_gdf["POPULATION"] / 100000
)
costco_state_counts_gdf["per_500k"] = costco_state_counts_gdf["stores"] / (
    costco_state_counts_gdf["POPULATION"] / 500_000
)
costco_state_counts_gdf["per_1m"] = costco_state_counts_gdf["stores"] / (
    costco_state_counts_gdf["POPULATION"] / 1_000_000
)

In [79]:
costco_state_counts_gdf = costco_state_counts_gdf.fillna(0)

In [81]:
costco_state_counts_gdf = costco_state_counts_gdf.to_crs(9311)
costco_state_counts_gdf.to_file(f"data/costcos_by_states.gpkg")