In [1]:
import geopandas as gpd
import numpy as np
import pandas as pd

## Open Housing data

In [39]:
homeless_change_df = pd.read_excel(
    "data/2007-2024-PIT-Counts-by-State.xlsb",
    engine="pyxlsb",
    sheet_name=["2024", "2023"],
)

In [66]:
homeless_change_2023_df = homeless_change_df["2023"].rename(
    columns={"State": "STUSPS", "Overall Homeless": "homeless_population"}
)[["STUSPS", "homeless_population"]]
homeless_change_2023_df = homeless_change_2023_df[
    homeless_change_2023_df["homeless_population"].apply(lambda x: isinstance(x, int))
]
homeless_change_2024_df = homeless_change_df["2024"].rename(
    columns={"State": "STUSPS", "Overall Homeless": "homeless_population"}
)[["STUSPS", "homeless_population"]]
homeless_change_2024_df = homeless_change_2024_df[
    homeless_change_2024_df["homeless_population"].apply(lambda x: isinstance(x, int))
]

In [71]:
homeless_change_2023_2024_df = homeless_change_2024_df.merge(
    homeless_change_2023_df, how="inner", on="STUSPS", suffixes=("_2024", "_2023")
)

In [85]:
homeless_change_2023_2024_df["difference"] = (
    homeless_change_2023_2024_df["homeless_population_2024"]
    - homeless_change_2023_2024_df["homeless_population_2023"]
)
homeless_change_2023_2024_df["percent_change"] = (
    homeless_change_2023_2024_df["difference"].astype(float)
    / homeless_change_2023_2024_df["homeless_population_2023"].astype(float)
) * 100

In [88]:
homeless_change_2023_2024_df["percent_change"] = homeless_change_2023_2024_df[
    "percent_change"
].round(decimals=2)

## Open GIS Data

In [89]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

## Get Population Data

In [90]:
state_populations = pd.read_excel(
    "data/NST-EST2024-POP.xlsx", sheet_name=None, engine="openpyxl"
)

In [91]:
state_populations_df = state_populations["NST-EST2024-POP"][
    [
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)",
        "Unnamed: 5",
    ]
]
state_populations_df = state_populations_df.rename(
    columns={
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)": "NAME",
        "Unnamed: 5": "POPULATION",
    }
)
state_populations_df["NAME"] = state_populations_df["NAME"].str[1:]

In [92]:
states_with_population_df = states_df.merge(state_populations_df, on="NAME", how="left")
states_with_population_df = states_with_population_df[
    ["STUSPS", "NAME", "POPULATION", "geometry"]
]

## Merge Data for Homeless Changes

In [93]:
homeless_per_state_changes_df = states_with_population_df.merge(
    homeless_change_2023_2024_df, on="STUSPS", how="inner"
)

In [94]:
homeless_per_state_changes_df = homeless_per_state_changes_df.to_crs(5070)

In [98]:
homeless_per_state_changes_df = homeless_per_state_changes_df[
    ["STUSPS", "difference", "percent_change", "geometry"]
]

In [99]:
homeless_per_state_changes_df.to_file("data/homeless_changes.gpkg")