In [60]:
import geopandas as gpd
import pandas as pd

In [61]:
from shapely.geometry import Point

## Get State Data

In [62]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

In [63]:
states_df = states_df[["STUSPS", "NAME", "geometry"]]

## Get Population data

In [64]:
state_populations = pd.read_excel(
    "data/NST-EST2024-POP.xlsx", sheet_name=None, engine="openpyxl"
)

In [65]:
state_populations_df = state_populations["NST-EST2024-POP"][
    [
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)",
        "Unnamed: 5",
    ]
]
state_populations_df = state_populations_df.rename(
    columns={
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)": "NAME",
        "Unnamed: 5": "POPULATION",
    }
)
state_populations_df["NAME"] = state_populations_df["NAME"].str[1:]

In [66]:
states_with_population_df = states_df.merge(state_populations_df, on="NAME", how="left")
states_with_population_df = states_with_population_df[
    ["STUSPS", "NAME", "POPULATION", "geometry"]
]

## Get Kwik Trip Data

In [67]:
kwik_trip_df = pd.read_csv("data/Store List - Kwik Trip  Kwik Star.csv")

In [68]:
geometry = [
    Point(xy) for xy in zip(kwik_trip_df["Longitude"], kwik_trip_df["Latitude"])
]
kwik_trip_df = kwik_trip_df.drop(["Longitude", "Latitude"], axis=1)
kwik_trip_gdf = gpd.GeoDataFrame(kwik_trip_df, crs="EPSG:4326", geometry=geometry)

In [69]:
kwik_trip_gdf = kwik_trip_gdf.to_crs(9311)
kwik_trip_gdf.to_file("data/stores.gpkg")

In [70]:
kwik_trip_state_counts_df = (
    pd.DataFrame(kwik_trip_gdf.groupby("State").size(), columns=["stores"])
    .reset_index()
    .rename(columns={"State": "STUSPS"})
)

## Combine With States

In [71]:
kwik_trip_state_counts_gdf = states_with_population_df.merge(
    kwik_trip_state_counts_df, on="STUSPS", how="left"
)

In [72]:
kwik_trip_state_counts_gdf = kwik_trip_state_counts_gdf.fillna(0)
kwik_trip_state_counts_gdf["stores"] = kwik_trip_state_counts_gdf["stores"].astype(int)

In [73]:
kwik_trip_state_counts_gdf["per_100k"] = kwik_trip_state_counts_gdf["stores"] / (
    kwik_trip_state_counts_gdf["POPULATION"] / 100000
)
kwik_trip_state_counts_gdf["per_1m"] = kwik_trip_state_counts_gdf["stores"] / (
    kwik_trip_state_counts_gdf["POPULATION"] / 1_000_000
)

In [74]:
kwik_trip_state_counts_gdf = kwik_trip_state_counts_gdf.fillna(0)

In [75]:
kwik_trip_state_counts_gdf = kwik_trip_state_counts_gdf.to_crs(9311)
kwik_trip_state_counts_gdf.to_file(f"data/kwik_trip_per_state.gpkg")