In [42]:
import geopandas as gpd
import pandas as pd

## Open State data

In [43]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

## Get Population Data

In [44]:
state_populations = pd.read_excel(
    "data/NST-EST2024-POP.xlsx", sheet_name=None, engine="openpyxl"
)

In [45]:
state_populations_df = state_populations["NST-EST2024-POP"][
    [
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)",
        "Unnamed: 5",
    ]
]
state_populations_df = state_populations_df.rename(
    columns={
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)": "NAME",
        "Unnamed: 5": "POPULATION",
    }
)
state_populations_df["NAME"] = state_populations_df["NAME"].str[1:]

In [46]:
states_with_population_df = states_df.merge(state_populations_df, on="NAME", how="left")
states_with_population_df = states_with_population_df[
    ["STUSPS", "NAME", "POPULATION", "geometry"]
]

## Open Bankruptcy data

In [59]:
bankruptcy_df = pd.read_excel(
    "data/bf_f2_1231.2024.xlsx", sheet_name="Table F-2.", engine="openpyxl"
)

In [60]:
bankruptcy_df = bankruptcy_df[["Unnamed: 0", "Unnamed: 11"]]
bankruptcy_df = bankruptcy_df.rename(
    columns={"Unnamed: 0": "NAME", "Unnamed: 11": "BANKRUPTCIES"}
)
bankruptcy_df = bankruptcy_df.replace("AR ²", "AR")

In [61]:
bankruptcy_df["STATE_CODES"] = bankruptcy_df["NAME"].str.split(",").str[0]

In [62]:
bankruptcy_states_df = (
    bankruptcy_df.groupby("STATE_CODES").agg({"BANKRUPTCIES": "sum"}).reset_index()
)
bankruptcy_states_df = bankruptcy_states_df.rename(columns={"STATE_CODES": "STUSPS"})

## Merge Data

In [63]:
bankruptcy_states_gdf = states_with_population_df.merge(
    bankruptcy_states_df, on="STUSPS", how="left"
).dropna()

In [64]:
bankruptcy_states_gdf = bankruptcy_states_gdf[
    ["NAME", "geometry", "POPULATION", "BANKRUPTCIES"]
]

In [65]:
bankruptcy_states_gdf["BANKRUPTCIES"] = bankruptcy_states_gdf["BANKRUPTCIES"].astype(
    int
)
bankruptcy_states_gdf["Per_100k"] = (
    (
        bankruptcy_states_gdf["BANKRUPTCIES"]
        / (bankruptcy_states_gdf["POPULATION"] / 100_000)
    )
    .round(decimals=1)
    .astype(float)
)

In [66]:
bankruptcy_states_gdf = bankruptcy_states_gdf.to_crs(9311)
bankruptcy_states_gdf.to_file("data/bankruptcies_per_state.gpkg")