In [68]:
import httpx
import json
import os
import re

In [69]:
import demjson3
import geopandas as gpd
import pandas as pd

## Get State Data

In [70]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

In [71]:
states_df = states_df[["STUSPS", "NAME", "geometry"]]

## Get Population data

In [72]:
state_populations = pd.read_excel(
    "data/NST-EST2024-POP.xlsx", sheet_name=None, engine="openpyxl"
)

In [73]:
state_populations_df = state_populations["NST-EST2024-POP"][
    [
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)",
        "Unnamed: 5",
    ]
]
state_populations_df = state_populations_df.rename(
    columns={
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)": "NAME",
        "Unnamed: 5": "POPULATION",
    }
)
state_populations_df["NAME"] = state_populations_df["NAME"].str[1:]

In [74]:
states_with_population_df = states_df.merge(state_populations_df, on="NAME", how="left")
states_with_population_df = states_with_population_df[
    ["STUSPS", "NAME", "POPULATION", "geometry"]
]

## Get Walmart Data

In [50]:
url = "https://corporate.walmart.com/content/dam/corporate/documents/about/location-facts/locations.js"
r = httpx.get(url)
js_text = r.text

In [75]:
match = re.search(r"var\s+locationFacts\s*=\s*(\{.*\});?", js_text, re.DOTALL)

In [76]:
js_object_text = match.group(1)
js_object_text = re.sub(
    r"`([\s\S]*?)`",  # Match multiline content inside backticks
    lambda m: '"' + m.group(1).replace('"', '\\"').replace("\n", "\\n") + '"',
    js_object_text,
)
location_facts = demjson3.decode(js_object_text)

In [77]:
for state, state_data in location_facts.items():
    stats = state_data.pop("stats", None)
    if stats:
        state_data.update(stats)

In [78]:
for state, state_data in location_facts.items():
    for section in ["footprintTable", "footprintGrid"]:
        section_data = state_data.pop(section, None)
        if section_data:
            # Prefix keys if needed to avoid overwriting
            prefixed = {f"{section}_{k}": v for k, v in section_data.items()}
            state_data.update(prefixed)

In [93]:
walmart_df = pd.DataFrame(location_facts.values())
walmart_df = walmart_df[
    [
        "state",
        "footprintTable_totalRetailUnits",
        "footprintTable_samsClub",
        "footprintGrid_numberOfAssociates",
        "footprintGrid_averageWage",
    ]
]
walmart_df = walmart_df.replace(to_replace={"N/A": "0", ",": "", r"\$": ""}, regex=True)

In [95]:
walmart_df = walmart_df.rename(
    columns={
        "state": "NAME",
        "footprintTable_totalRetailUnits": "UNITS",
        "footprintTable_samsClub": "SAMS_CLUBS",
        "footprintGrid_numberOfAssociates": "WORKERS",
        "footprintGrid_averageWage": "AVG_WAGE",
    }
)

In [101]:
walmart_df[["UNITS", "SAMS_CLUBS", "WORKERS"]] = walmart_df[
    ["UNITS", "SAMS_CLUBS", "WORKERS"]
].astype(int)
walmart_df["AVG_WAGE"] = walmart_df["AVG_WAGE"].astype(float)
walmart_df["WALMARTS"] = walmart_df["UNITS"] - walmart_df["SAMS_CLUBS"]

## Combine With States

In [123]:
walmart_gdf = states_with_population_df.merge(walmart_df, on="NAME", how="left")

In [124]:
walmart_gdf = walmart_gdf.fillna(0)

In [125]:
walmart_gdf["per_100k"] = (
    walmart_gdf["WALMARTS"] / (walmart_gdf["POPULATION"] / 100_000)
).round(decimals=1)
walmart_gdf["per_1m"] = (
    walmart_gdf["WALMARTS"] / (walmart_gdf["POPULATION"] / 1_000_000)
).round(decimals=1)
walmart_gdf["workers_per_unit"] = (walmart_gdf["WORKERS"] / walmart_gdf["UNITS"]).round(
    decimals=1
)

In [126]:
walmart_gdf = walmart_gdf.fillna(0)

In [127]:
walmart_gdf = walmart_gdf.to_crs(9311)
walmart_gdf.to_file(f"data/walmarts_per_state.gpkg")

In [128]:
walmart_gdf

Unnamed: 0,STUSPS,NAME,POPULATION,geometry,UNITS,SAMS_CLUBS,WORKERS,AVG_WAGE,WALMARTS,per_100k,per_1m,workers_per_unit
0,MS,Mississippi,2943172.0,"MULTIPOLYGON (((1109997.773 -1565578.875, 1111...",86.0,7.0,25955.0,18.71,79.0,2.7,26.8,301.8
1,NC,North Carolina,10881189.0,"MULTIPOLYGON (((2154808.027 -692253.992, 21554...",214.0,22.0,62396.0,18.95,192.0,1.8,17.6,291.6
2,OK,Oklahoma,4063882.0,"POLYGON ((-268912.756 -936547.904, -268600.891...",134.0,13.0,35417.0,18.04,121.0,3.0,29.8,264.3
3,VA,Virginia,8734685.0,"MULTIPOLYGON (((2098153.247 -490400.387, 20981...",148.0,15.0,44936.0,19.32,133.0,1.5,15.2,303.6
4,WV,West Virginia,1770495.0,"POLYGON ((1506482.53 -601393.056, 1506489.516 ...",44.0,5.0,12667.0,17.42,39.0,2.2,22.0,287.9
5,LA,Louisiana,4588071.0,"MULTIPOLYGON (((1079322.442 -1609024.878, 1079...",137.0,14.0,37022.0,18.27,123.0,2.7,26.8,270.2
6,MI,Michigan,10083356.0,"MULTIPOLYGON (((1376542.504 -187676.574, 13766...",117.0,23.0,35257.0,18.3,94.0,0.9,9.3,301.3
7,MA,Massachusetts,7066568.0,"MULTIPOLYGON (((2420778.194 34524.899, 2421697...",48.0,0.0,13405.0,19.54,48.0,0.7,6.8,279.3
8,ID,Idaho,1971122.0,"POLYGON ((-1357016.205 78008.02, -1356337.908 ...",27.0,1.0,8971.0,18.33,26.0,1.3,13.2,332.3
9,FL,Florida,22904868.0,"MULTIPOLYGON (((1997726.172 -1929592.915, 1997...",387.0,46.0,119545.0,19.16,341.0,1.5,14.9,308.9
