In [1]:
import geopandas as gpd
import pandas as pd

## Open State data

In [2]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

## Get Poverty Percents

In [3]:
acs_homeownership = pd.read_csv(
    "data/ACSST5Y2023.S2502-2025-04-12T212317.csv", thousands=","
)

In [11]:
rename_columns_percent = {"Label (Grouping)": "NAME"}
for column in list(acs_homeownership.columns):
    str_split = column.split("!!")
    if len(str_split) == 3:
        if str_split[1] == "Percent owner-occupied housing units":
            rename_columns_percent[column] = str_split[0].strip()
    else:
        continue

In [16]:
percents_df = (
    acs_homeownership[rename_columns_percent.keys()]
    .rename(columns=rename_columns_percent)
    .T.reset_index()
)
percents_df.columns = [column.strip() for column in list(percents_df.iloc[0])]
percents_df = percents_df[1:]

In [20]:
percents_df["U35"] = percents_df["Under 35 years"].str.replace("%", "").astype(float)

In [21]:
percents_df = percents_df[["NAME", "U35"]]

## Merge Data

In [23]:
poverty_columns_percents_gdf = states_df.merge(
    percents_df, on="NAME", how="inner"
).dropna()

In [24]:
poverty_columns_percents_gdf = poverty_columns_percents_gdf[["NAME", "geometry", "U35"]]

In [25]:
poverty_columns_percents_gdf = poverty_columns_percents_gdf.to_crs(9311)
poverty_columns_percents_gdf.to_file("data/homeownership.gpkg")