In [118]:
import json
import os
import random
import requests
import time

In [107]:
import geopandas as gpd
import pandas as pd

In [108]:
from shapely.geometry import Point
from tqdm.notebook import tqdm

## Get State Data

In [109]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

In [110]:
states_df = states_df[["STUSPS", "NAME", "geometry"]]

## Get Population data

In [111]:
state_populations = pd.read_excel(
    "data/NST-EST2024-POP.xlsx", sheet_name=None, engine="openpyxl"
)

In [112]:
state_populations_df = state_populations["NST-EST2024-POP"][
    [
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)",
        "Unnamed: 5",
    ]
]
state_populations_df = state_populations_df.rename(
    columns={
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)": "NAME",
        "Unnamed: 5": "POPULATION",
    }
)
state_populations_df["NAME"] = state_populations_df["NAME"].str[1:]

In [113]:
states_with_population_df = states_df.merge(state_populations_df, on="NAME", how="left")
states_with_population_df = states_with_population_df[
    ["STUSPS", "NAME", "POPULATION", "geometry"]
]

## Get BJs Data

In [114]:
url = "https://api.bjs.com/digital/live/apis/v1.0/clublocatorpage/statetowns/10201"
r = requests.get(url)
stores_json = r.json()

In [115]:
states_list = stores_json["clubLocatorStateTownList"]

In [119]:
search_url = "https://api.bjs.com/digital/live/api/v1.2/club/search/10201"

headers = {
    "accept": "application/json, text/plain, */*",
    "accept-language": "en-US,en;q=0.9,ru-RU;q=0.8,ru;q=0.7",
    "cache-control": "no-cache",
    "content-type": "application/json",
    "pragma": "no-cache",
    "priority": "u=1, i",
    "sec-ch-ua": '"Not A(Brand";v="8", "Chromium";v="132", "Google Chrome";v="132"',
    "sec-ch-ua-mobile": "?0",
    "sec-ch-ua-platform": '"Windows"',
    "sec-fetch-dest": "empty",
    "sec-fetch-mode": "cors",
    "sec-fetch-site": "same-site",
    "Referer": "https://www.bjs.com/",
    "Referrer-Policy": "strict-origin-when-cross-origin",
}

In [120]:
for state in tqdm(states_list, desc="Parsing States"):
    state_code = state["StateCode"]
    towns = state["Towns"]

    state_store_list = []
    # If File exists continue
    if os.path.isfile(f"data/states/{state_code}.gpkg"):
        continue

    for town in tqdm(towns, desc=f"Parsing stores in {state_code}"):
        store_id = town.split("|")[0]
        body = {
            "Town": store_id,
            "latitude": "",
            "longitude": "",
            "radius": "",
            "zipCode": "",
        }
        r = requests.post(search_url, data=json.dumps(body), headers=headers)
        if r.status_code != 200:
            print(r.reason, r.status_code, store_id)

        store_json = r.json()["Stores"]["PhysicalStore"][0]
        point = Point(store_json["longitude"], store_json["latitude"])
        store_dict = {"STATE": state_code.upper(), "geometry": point}
        state_store_list.append(store_dict)
        time.sleep(random.uniform(0.01, 0.5))

    if state_store_list:
        bjs_state_gdf = gpd.GeoDataFrame(state_store_list, crs=4326)
        bjs_state_gdf.to_file(f"data/states/{state_code}.gpkg")

Parsing States:   0%|          | 0/21 [00:00<?, ?it/s]

Parsing stores in AL:   0%|          | 0/1 [00:00<?, ?it/s]

Parsing stores in CT:   0%|          | 0/13 [00:00<?, ?it/s]

Parsing stores in DE:   0%|          | 0/4 [00:00<?, ?it/s]

Parsing stores in FL:   0%|          | 0/41 [00:00<?, ?it/s]

Parsing stores in GA:   0%|          | 0/6 [00:00<?, ?it/s]

Parsing stores in IN:   0%|          | 0/2 [00:00<?, ?it/s]

Parsing stores in KY:   0%|          | 0/1 [00:00<?, ?it/s]

Parsing stores in ME:   0%|          | 0/3 [00:00<?, ?it/s]

Parsing stores in MD:   0%|          | 0/13 [00:00<?, ?it/s]

Parsing stores in MA:   0%|          | 0/30 [00:00<?, ?it/s]

Parsing stores in MI:   0%|          | 0/5 [00:00<?, ?it/s]

Parsing stores in NH:   0%|          | 0/7 [00:00<?, ?it/s]

Parsing stores in NJ:   0%|          | 0/25 [00:00<?, ?it/s]

Parsing stores in NY:   0%|          | 0/49 [00:00<?, ?it/s]

Parsing stores in NC:   0%|          | 0/10 [00:00<?, ?it/s]

Parsing stores in OH:   0%|          | 0/8 [00:00<?, ?it/s]

Parsing stores in PA:   0%|          | 0/19 [00:00<?, ?it/s]

Parsing stores in RI:   0%|          | 0/5 [00:00<?, ?it/s]

Parsing stores in SC:   0%|          | 0/2 [00:00<?, ?it/s]

Parsing stores in TN:   0%|          | 0/4 [00:00<?, ?it/s]

Parsing stores in VA:   0%|          | 0/15 [00:00<?, ?it/s]

In [157]:
bjs_gdfs = []
states_path = "data/states"
for file in os.listdir("data/states"):
    constructed_path = os.path.join(states_path, file)
    if constructed_path.endswith(".gpkg"):
        bjs_gdf = gpd.read_file(constructed_path)
        bjs_gdfs.append(bjs_gdf)

In [158]:
bjs_gdf = gpd.GeoDataFrame(pd.concat(bjs_gdfs, ignore_index=True))
bjs_gdf = bjs_gdf.to_crs(3968)
bjs_gdf.to_file("data/bjs.gpkg")

In [159]:
bjs_counts_gdf = pd.DataFrame(bjs_gdf.groupby("STATE").size(), columns=["BJS"])
bjs_counts_gdf = bjs_counts_gdf.reset_index()
bjs_counts_gdf = bjs_counts_gdf.rename(columns={"STATE": "STUSPS"})

## Combine With States

In [160]:
bjs_state_counts_gdf = states_with_population_df.merge(
    bjs_counts_gdf, on="STUSPS", how="left"
)

In [161]:
bjs_state_counts_gdf = bjs_state_counts_gdf.fillna(0)
bjs_state_counts_gdf["BJS"] = bjs_state_counts_gdf["BJS"].astype(int)

In [162]:
bjs_state_counts_gdf["per_1000"] = bjs_state_counts_gdf["BJS"] / (
    bjs_state_counts_gdf["POPULATION"] / 1000
)
bjs_state_counts_gdf["per_10k"] = bjs_state_counts_gdf["BJS"] / (
    bjs_state_counts_gdf["POPULATION"] / 10_000
)
bjs_state_counts_gdf["per_100k"] = bjs_state_counts_gdf["BJS"] / (
    bjs_state_counts_gdf["POPULATION"] / 100000
)
bjs_state_counts_gdf["per_500k"] = bjs_state_counts_gdf["BJS"] / (
    bjs_state_counts_gdf["POPULATION"] / 500_000
)
bjs_state_counts_gdf["per_1m"] = bjs_state_counts_gdf["BJS"] / (
    bjs_state_counts_gdf["POPULATION"] / 1_000_000
)

In [163]:
bjs_state_counts_gdf = bjs_state_counts_gdf.fillna(0)

In [164]:
bjs_state_counts_gdf = bjs_state_counts_gdf.to_crs(3968)
bjs_state_counts_gdf.to_file(f"data/bjs_per_states.gpkg")