In [1]:
import geopandas as gpd
import pandas as pd

## Open State data

In [2]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

## Get Population Data

In [3]:
state_populations = pd.read_excel(
    "data/NST-EST2024-POP.xlsx", sheet_name=None, engine="openpyxl"
)

In [4]:
state_populations_df = state_populations["NST-EST2024-POP"][
    [
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)",
        "Unnamed: 5",
    ]
]
state_populations_df = state_populations_df.rename(
    columns={
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)": "NAME",
        "Unnamed: 5": "POPULATION",
    }
)
state_populations_df["NAME"] = state_populations_df["NAME"].str[1:]

In [5]:
states_with_population_df = states_df.merge(state_populations_df, on="NAME", how="left")
states_with_population_df = states_with_population_df[
    ["STUSPS", "NAME", "POPULATION", "geometry"]
]

## Open 2024 Bankruptcy data

In [6]:
bankruptcy_2024_df = pd.read_excel(
    "data/bf_f2_1231.2024.xlsx", sheet_name="Table F-2.", engine="openpyxl"
)

In [7]:
bankruptcy_2024_df = bankruptcy_2024_df[["Unnamed: 0", "Unnamed: 11"]]
bankruptcy_2024_df = bankruptcy_2024_df.rename(
    columns={"Unnamed: 0": "NAME", "Unnamed: 11": "BANKRUPTCIES"}
)
bankruptcy_2024_df = bankruptcy_2024_df.replace("AR ²", "AR")

In [8]:
bankruptcy_2024_df["STATE_CODES"] = bankruptcy_2024_df["NAME"].str.split(",").str[0]

In [9]:
bankruptcy_2024_df = (
    bankruptcy_2024_df.groupby("STATE_CODES").agg({"BANKRUPTCIES": "sum"}).reset_index()
)
bankruptcy_2024_df = bankruptcy_2024_df.rename(columns={"STATE_CODES": "STUSPS"})

## Open 2023 Bankruptcy data

In [10]:
bankruptcy_2023_df = pd.read_excel(
    "data/bf_f2_1231.2023.xlsx", sheet_name="Table F-2.", engine="openpyxl"
)

In [11]:
bankruptcy_2023_df = bankruptcy_2023_df[["Unnamed: 0", "Unnamed: 11"]]
bankruptcy_2023_df = bankruptcy_2023_df.rename(
    columns={"Unnamed: 0": "NAME", "Unnamed: 11": "BANKRUPTCIES"}
)
bankruptcy_2023_df = bankruptcy_2023_df.replace("AR ²", "AR")

In [12]:
bankruptcy_2023_df["STATE_CODES"] = bankruptcy_2023_df["NAME"].str.split(",").str[0]

In [13]:
bankruptcy_2023_df = (
    bankruptcy_2023_df.groupby("STATE_CODES").agg({"BANKRUPTCIES": "sum"}).reset_index()
)
bankruptcy_2023_df = bankruptcy_2023_df.rename(columns={"STATE_CODES": "STUSPS"})

## Merge Data

In [25]:
bankruptcy_diff_df = bankruptcy_2024_df.merge(
    bankruptcy_2023_df, how="inner", on="STUSPS", suffixes=("_2024", "_2023")
)

In [26]:
bankruptcy_states_gdf = states_with_population_df.merge(
    bankruptcy_diff_df, on="STUSPS", how="left"
).dropna()

In [27]:
bankruptcy_states_gdf["diff"] = (
    bankruptcy_states_gdf["BANKRUPTCIES_2024"]
    - bankruptcy_states_gdf["BANKRUPTCIES_2023"]
)
bankruptcy_states_gdf["PERCENT_CHANGE"] = (
    (bankruptcy_states_gdf["diff"] / bankruptcy_states_gdf["BANKRUPTCIES_2023"])
    .round(decimals=1)
    .astype(float)
)

In [28]:
bankruptcy_states_gdf = bankruptcy_states_gdf[
    [
        "NAME",
        "geometry",
        "POPULATION",
        "BANKRUPTCIES_2023",
        "BANKRUPTCIES_2024",
        "PERCENT_CHANGE",
        "diff",
    ]
]

In [29]:
bankruptcy_states_gdf["BANKRUPTCIES_2023"] = bankruptcy_states_gdf[
    "BANKRUPTCIES_2023"
].astype(int)
bankruptcy_states_gdf["BANKRUPTCIES_2024"] = bankruptcy_states_gdf[
    "BANKRUPTCIES_2024"
].astype(int)
bankruptcy_states_gdf["diff"] = bankruptcy_states_gdf["diff"].astype(int)
bankruptcy_states_gdf["PERCENT_CHANGE"] = bankruptcy_states_gdf[
    "PERCENT_CHANGE"
].astype(float)
bankruptcy_states_gdf["Per_100k_2023"] = (
    (
        bankruptcy_states_gdf["BANKRUPTCIES_2023"]
        / (bankruptcy_states_gdf["POPULATION"] / 100_000)
    )
    .round(decimals=1)
    .astype(float)
)
bankruptcy_states_gdf["Per_100k_2024"] = (
    (
        bankruptcy_states_gdf["BANKRUPTCIES_2024"]
        / (bankruptcy_states_gdf["POPULATION"] / 100_000)
    )
    .round(decimals=1)
    .astype(float)
)

In [30]:
bankruptcy_states_gdf = bankruptcy_states_gdf.to_crs(9311)
bankruptcy_states_gdf.to_file("data/bankruptcies_per_state.gpkg")