In [93]:
import geopandas as gpd
import numpy as np
import pandas as pd

In [30]:
from pyxlsb import open_workbook as open_xlsb

## Open Housing Data

In [107]:
df = []

with open_xlsb("data/2007-2023-PIT-Counts-by-State.xlsb") as wb:
    with wb.get_sheet(2) as sheet:
        for row in sheet.rows():
            df.append([item.v for item in row])

In [108]:
total_homeless_df = pd.DataFrame(df[1:], columns=df[0])

In [109]:
total_homeless_df

Unnamed: 0,State,Number of CoCs,Overall Homeless,Overall Homeless - Under 18,Overall Homeless - Age 18 to 24,Overall Homeless - Age 25 to 34,Overall Homeless - Age 35 to 44,Overall Homeless - Age 45 to 54,Overall Homeless - Age 55 to 64,Overall Homeless - Over 64,...,Overall Homeless Parenting Youth Age 18-24,Sheltered ES Homeless Parenting Youth Age 18-24,Sheltered TH Homeless Parenting Youth Age 18-24,Sheltered Total Homeless Parenting Youth Age 18-24,Unsheltered Homeless Parenting Youth Age 18-24,Overall Homeless Children of Parenting Youth,Sheltered ES Homeless Children of Parenting Youth,Sheltered TH Homeless Children of Parenting Youth,Sheltered Total Homeless Children of Parenting Youth,Unsheltered Homeless Children of Parenting Youth
0,AK,2.0,2614.0,361.0,227.0,501.0,609.0,459.0,346.0,111.0,...,18.0,13.0,5.0,18.0,0.0,17.0,12.0,5.0,17.0,0.0
1,AL,8.0,3304.0,560.0,178.0,514.0,684.0,608.0,555.0,205.0,...,21.0,12.0,1.0,13.0,8.0,24.0,15.0,1.0,16.0,8.0
2,AR,5.0,2609.0,304.0,177.0,483.0,550.0,557.0,399.0,139.0,...,10.0,6.0,3.0,9.0,1.0,17.0,11.0,5.0,16.0,1.0
3,AS,0.0,,,,,,,,,...,,,,,,,,,,
4,AZ,3.0,14237.0,1585.0,996.0,2776.0,3072.0,2511.0,2234.0,1063.0,...,58.0,25.0,30.0,55.0,3.0,70.0,30.0,37.0,67.0,3.0
5,CA,44.0,181399.0,15499.0,11840.0,26443.0,33036.0,29056.0,27063.0,10965.0,...,1040.0,410.0,261.0,671.0,369.0,1173.0,595.0,309.0,904.0,269.0
6,CO,4.0,14439.0,2291.0,771.0,2480.0,3043.0,2804.0,2264.0,786.0,...,70.0,26.0,40.0,66.0,4.0,88.0,37.0,48.0,85.0,3.0
7,CT,2.0,3015.0,583.0,236.0,469.0,528.0,489.0,533.0,177.0,...,56.0,37.0,19.0,56.0,0.0,68.0,45.0,23.0,68.0,0.0
8,DC,1.0,4922.0,715.0,521.0,835.0,785.0,740.0,923.0,403.0,...,119.0,35.0,84.0,119.0,0.0,121.0,36.0,85.0,121.0,0.0
9,DE,1.0,1245.0,335.0,66.0,195.0,189.0,193.0,211.0,56.0,...,10.0,9.0,1.0,10.0,0.0,14.0,12.0,2.0,14.0,0.0


In [71]:
total_homeless_df = total_homeless_df[["State", "Overall Homeless"]].dropna()

In [72]:
total_homeless_df = homeless_change_df.rename(
    columns={"State": "STUSPS", "Overall Homeless": "HOMELESS"}
)

## Open GIS Data

In [73]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

## Get Population Data

In [74]:
state_populations = pd.read_excel(
    "data/NST-EST2023-POP.xlsx", sheet_name=None, engine="openpyxl"
)

In [75]:
state_populations_df = state_populations["NST-EST2023-POP"][
    [
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)",
        "Unnamed: 5",
    ]
]
state_populations_df = state_populations_df.rename(
    columns={
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)": "NAME",
        "Unnamed: 5": "POPULATION",
    }
)
state_populations_df["NAME"] = state_populations_df["NAME"].str[1:]

In [76]:
states_with_population_df = states_df.merge(state_populations_df, on="NAME", how="left")
states_with_population_df = states_with_population_df[
    ["STUSPS", "NAME", "POPULATION", "geometry"]
]

## Merge Data for Homeless Changes

In [99]:
homeless_per_state = states_with_population_df.merge(
    total_homeless_df, on="STUSPS", how="left"
)

In [100]:
homeless_per_state = homeless_per_state[["NAME", "POPULATION", "HOMELESS", "geometry"]]

In [101]:
homeless_per_state = homeless_per_state[~homeless_per_state["POPULATION"].isna()]

In [102]:
homeless_per_state["POPULATION"] = homeless_per_state["POPULATION"].astype(int)
homeless_per_state["HOMELESS"] = homeless_per_state["HOMELESS"].astype(int)

In [104]:
homeless_per_state["per_1000"] = homeless_per_state["HOMELESS"] / (
    homeless_per_state["POPULATION"] / 1000
)
homeless_per_state["per_10k"] = homeless_per_state["HOMELESS"] / (
    homeless_per_state["POPULATION"] / 10_000
)
homeless_per_state["per_100k"] = homeless_per_state["HOMELESS"] / (
    homeless_per_state["POPULATION"] / 100000
)
homeless_per_state["per_500k"] = homeless_per_state["HOMELESS"] / (
    homeless_per_state["POPULATION"] / 500_000
)
homeless_per_state["per_1m"] = homeless_per_state["HOMELESS"] / (
    homeless_per_state["POPULATION"] / 1_000_000
)

In [105]:
homeless_per_state = homeless_per_state.to_crs(5070)

In [106]:
homeless_per_state.to_file("data/homeless.gpkg")