In [50]:
import geopandas as gpd
import pandas as pd

## Open State data

In [51]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)
states_df = states_df[
    ~states_df["STATEFP"].isin(["60", "66", "69", "72", "78"])
].reset_index(drop=True)

## Open Population Data

In [52]:
state_populations = pd.read_excel(
    "data/NST-EST2024-POP.xlsx", sheet_name=None, engine="openpyxl"
)

In [53]:
state_populations_df = state_populations["NST-EST2024-POP"][
    [
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)",
        "Unnamed: 5",
    ]
]
state_populations_df = state_populations_df.rename(
    columns={
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)": "NAME",
        "Unnamed: 5": "POPULATION",
    }
)
state_populations_df["NAME"] = state_populations_df["NAME"].str[1:]

In [54]:
states_with_population_df = states_df.merge(state_populations_df, on="NAME", how="left")
states_with_population_df = states_with_population_df[
    ["STUSPS", "NAME", "POPULATION", "geometry"]
]

## Get Hospital Data

In [55]:
hospitals_df = pd.read_csv("data/hospitals.csv")

In [56]:
hospitals_df["NAME"] = hospitals_df["STATE"].str.title()
hospitals_df["NUMBER OF AT-RISK RURAL HOSPITALS"] = hospitals_df[
    "NUMBER OF AT-RISK RURAL HOSPITALS"
].astype(int)

In [57]:
hospitals_df = hospitals_df[["NAME", "NUMBER OF AT-RISK RURAL HOSPITALS"]]

## Merge Data

In [58]:
hospitals_gdf = states_with_population_df.merge(hospitals_df, on="NAME", how="left")

In [59]:
hospitals_gdf["HOSPITALS_PER_100k"] = hospitals_gdf[
    "NUMBER OF AT-RISK RURAL HOSPITALS"
] / (hospitals_gdf["POPULATION"] / 100_000)
hospitals_gdf["HOSPITALS_PER_1m"] = hospitals_gdf[
    "NUMBER OF AT-RISK RURAL HOSPITALS"
] / (hospitals_gdf["POPULATION"] / 1_000_000)

In [61]:
hospitals_gdf = hospitals_gdf.fillna(0)

In [62]:
hospitals_gdf = hospitals_gdf.to_crs(9311)
hospitals_gdf.to_file("data/hospitals.gpkg")

In [63]:
"NUMBER OF AT-RISK RURAL HOSPITALS PER STATE".title()

'Number Of At-Risk Rural Hospitals Per State'