In [10]:
import geopandas as gpd
import pandas as pd

## Open State data

In [11]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_gdf = gpd.read_file(file_path)

In [12]:
states_gdf = states_gdf[["STUSPS", "NAME", "geometry"]]

## Open population data

In [13]:
state_populations = pd.read_excel(
    "data/NST-EST2024-POP.xlsx", sheet_name=None, engine="openpyxl"
)

In [14]:
state_populations_df = state_populations["NST-EST2024-POP"][
    [
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)",
        "Unnamed: 5",
    ]
]
state_populations_df = state_populations_df.rename(
    columns={
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)": "NAME",
        "Unnamed: 5": "POPULATION",
    }
)
state_populations_df["NAME"] = state_populations_df["NAME"].str[1:]

In [15]:
states_with_population_gdf = states_gdf.merge(
    state_populations_df, on="NAME", how="left"
)
states_with_population_gdf = states_with_population_gdf[
    ["STUSPS", "NAME", "POPULATION", "geometry"]
]

## Open 7-Elevens and Speedway locations

In [16]:
stores_gdf = gpd.read_file("data/711_national.gpkg")
stores_gdf = stores_gdf.to_crs(9311)
speedway_gdf = stores_gdf[stores_gdf["brand"] == "speedway"]
seven_eleven_gdf = stores_gdf[stores_gdf["brand"] == "7-eleven"]

In [17]:
speedway_counts_df = (
    pd.DataFrame(speedway_gdf.groupby("state").size())
    .reset_index()
    .rename(columns={0: "speedway"})
)
speedway_counts_df = speedway_counts_df.rename(columns={"state": "STUSPS"})

In [18]:
eleven_counts_df = (
    pd.DataFrame(seven_eleven_gdf.groupby("state").size())
    .reset_index()
    .rename(columns={0: "7-elevens"})
)
eleven_counts_df = eleven_counts_df.rename(columns={"state": "STUSPS"})

#### Merge Data now

In [19]:
states_with_eleven_gdf = states_with_population_gdf.merge(
    eleven_counts_df, on="STUSPS", how="left"
)

In [20]:
states_with_eleven_and_speedway_gdf = states_with_eleven_gdf.merge(
    speedway_counts_df, on="STUSPS", how="left"
)

In [21]:
states_with_eleven_and_speedway_gdf = states_with_eleven_and_speedway_gdf.fillna(0)

In [22]:
states_with_eleven_and_speedway_gdf["7-elevens"] = states_with_eleven_and_speedway_gdf[
    "7-elevens"
].astype(int)
states_with_eleven_and_speedway_gdf["speedway"] = states_with_eleven_and_speedway_gdf[
    "speedway"
].astype(int)

In [26]:
def speedway_seven_elevens_classification(row) -> str:
    elevens = row["7-elevens"]
    speedways = row["speedway"]
    if elevens == 0 and speedways == 0:
        return "No Stores"
    if elevens == speedways:
        return "Same Amount"
    if speedways > elevens:
        return "More Speedways"
    if speedways < elevens:
        return "More 7-Elevens"

In [27]:
states_with_eleven_and_speedway_gdf[
    "label"
] = states_with_eleven_and_speedway_gdf.apply(
    lambda row: speedway_seven_elevens_classification(row), axis=1
)

In [28]:
states_with_eleven_and_speedway_gdf[
    "7-elevens_per_100k"
] = states_with_eleven_and_speedway_gdf["7-elevens"] / (
    states_with_eleven_and_speedway_gdf["POPULATION"] / 100000
)
states_with_eleven_and_speedway_gdf[
    "speedways_per_100k"
] = states_with_eleven_and_speedway_gdf["speedway"] / (
    states_with_eleven_and_speedway_gdf["POPULATION"] / 100000
)

In [30]:
states_with_eleven_and_speedway_gdf = states_with_eleven_and_speedway_gdf.to_crs(9311)
states_with_eleven_and_speedway_gdf.to_file("data/eleven_vs_speedway.gpkg")