In [1]:
import geopandas as gpd
import pandas as pd

## Get State Data

In [2]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

In [3]:
states_df = states_df[["STUSPS", "NAME", "geometry"]]

## Get Population data

In [4]:
state_populations = pd.read_excel(
    "data/NST-EST2024-POP.xlsx", sheet_name=None, engine="openpyxl"
)

In [5]:
state_populations_df = state_populations["NST-EST2024-POP"][
    [
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)",
        "Unnamed: 5",
    ]
]
state_populations_df = state_populations_df.rename(
    columns={
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)": "NAME",
        "Unnamed: 5": "POPULATION",
    }
)
state_populations_df["NAME"] = state_populations_df["NAME"].str[1:]

In [6]:
states_with_population_df = states_df.merge(state_populations_df, on="NAME", how="left")
states_with_population_df = states_with_population_df[
    ["STUSPS", "NAME", "POPULATION", "geometry"]
]

## Read Meijer Data

In [7]:
meijer_locations = pd.read_excel(
    "data/Meijer Location Addresses with DF Association.xlsx",
    sheet_name=None,
    engine="openpyxl",
)
meijer_df = meijer_locations["ADDRESSES"]

In [8]:
meijer_df.columns = meijer_df.iloc[1]
meijer_df = meijer_df.iloc[2:]

In [9]:
meijer_df = meijer_df[
    meijer_df["Type"].isin(["MEXPRESS", "Store", "Grocery"])
].reset_index()

In [10]:
meijer_count_df = (
    meijer_df.groupby("STATE")
    .size()
    .reset_index(name="COUNT")
    .sort_values("COUNT", ascending=False)
)

In [11]:
meijer_count_df = meijer_count_df.rename(columns={"STATE": "STUSPS"})

## Combine With States

In [20]:
meijer_count_gdf = states_with_population_df.merge(
    meijer_count_df, on="STUSPS", how="left"
)

In [21]:
meijer_count_gdf = meijer_count_gdf.dropna()

In [22]:
meijer_count_gdf["per_100k"] = (
    meijer_count_gdf["COUNT"] / (meijer_count_gdf["POPULATION"] / 100_000)
).round(decimals=1)
meijer_count_gdf["per_1m"] = (
    meijer_count_gdf["COUNT"] / (meijer_count_gdf["POPULATION"] / 1_000_000)
).round(decimals=1)

In [25]:
meijer_count_gdf = meijer_count_gdf.to_crs(3174)
meijer_count_gdf.to_file(f"data/Meijers_Per_State.gpkg")

In [26]:
meijer_count_gdf

Unnamed: 0,STUSPS,NAME,POPULATION,geometry,COUNT,per_100k,per_1m
6,MI,Michigan,10083356.0,"MULTIPOLYGON (((1104700.306 607760.091, 110483...",131.0,1.3,13.0
29,IL,Illinois,12642259.0,"POLYGON ((398308.248 427674.488, 398544.995 42...",26.0,0.2,2.1
47,KY,Kentucky,4550595.0,"MULTIPOLYGON (((552890.411 11775.685, 553614.8...",11.0,0.2,2.4
48,OH,Ohio,11824034.0,"MULTIPOLYGON (((1143472.966 560503.925, 114361...",56.0,0.5,4.7
49,WI,Wisconsin,5930405.0,"MULTIPOLYGON (((804470.533 979269.907, 804601....",12.0,0.2,2.0
53,IN,Indiana,6880131.0,"POLYGON ((677800.593 156825.79, 678100.6 15702...",43.0,0.6,6.2
