In [108]:
import requests

In [109]:
import geopandas as gpd
import pandas as pd

## Open State data

In [110]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

## Get 2010 Data

In [111]:
table = "DP04"
url = "https://api.census.gov/data/2010/acs/acs5/sptprofile"
params = {
    "get": f"group({table})",
    "POPGROUP": "001",
    "ucgid": "pseudo(0100000US$0400000)",
}
response = requests.get(url, params=params)

In [112]:
data = response.json()
columns = data[0]
rows = data[1:]
housing_2010_df = pd.DataFrame(rows, columns=columns)

In [113]:
metadata_url = (
    f"https://api.census.gov/data/2010/acs/acs5/sptprofile/groups/{table}.json"
)
meta_response = requests.get(metadata_url)

In [114]:
meta = meta_response.json()

lookup = {}
for var in meta["variables"]:
    lookup[var] = meta["variables"][var]["label"]
housing_2010_df = housing_2010_df.rename(columns=lookup)

In [115]:
new_cols = []
for col in housing_2010_df.columns:
    split = col.split("!!")

    if len(split) == 3:
        stat_type, topic, characteristic = split
    elif len(split) == 2:
        stat_type, topic = split
        characteristic = ""
    else:
        stat_type = col
        topic = ""
        characteristic = ""

    new_cols.append((stat_type, topic, characteristic))

housing_2010_df.columns = pd.MultiIndex.from_tuples(new_cols)

In [116]:
housing_2010_df = housing_2010_df.loc[
    :,
    [("Geographic Area Name", "", ""), ("Estimate", "GROSS RENT", "Median (dollars)")],
]
housing_2010_df.columns = ["NAME", "RENT"]

## Get 2023 Housing Data Percents

#### Need to open file, Census API yields wrong results

In [117]:
housing_2023_df = pd.read_csv(
    "data/ACSDP5Y2023.DP04-2025-06-27T193539.csv", thousands=","
)

In [118]:
rename_columns_home_percent = {"Label (Grouping)": "NAME"}
for column in list(housing_2023_df.columns):
    str_split = column.split("!!")
    if len(str_split) == 2:
        if str_split[1] == "Estimate":
            rename_columns_home_percent[column] = str_split[0].strip()
    else:
        continue

In [119]:
housing_2023_df = (
    housing_2023_df[rename_columns_home_percent.keys()]
    .rename(columns=rename_columns_home_percent)
    .T.reset_index()
)
housing_2023_df.columns = [column.strip() for column in list(housing_2023_df.iloc[0])]
housing_2023_df = housing_2023_df[1:]

In [121]:
housing_2023_df["RENT"] = housing_2023_df["Median (dollars)"].iloc[:, -1]
housing_2023_df = housing_2023_df[["RENT", "NAME"]]

## Merge Data

In [132]:
housing_df = housing_2023_df.merge(
    housing_2010_df, on="NAME", how="inner", suffixes=("_2023", "_2010")
)

In [133]:
housing_df[["RENT_2023", "RENT_2010"]] = housing_df[["RENT_2023", "RENT_2010"]].astype(
    int
)
housing_df["DIFF"] = housing_df["RENT_2023"] - housing_df["RENT_2010"]
housing_df["PERCENT"] = ((housing_df["DIFF"] / housing_df["RENT_2010"]) * 100).round(
    decimals=1
)

In [134]:
home_columns_percents_gdf = states_df.merge(housing_df, on="NAME", how="inner").dropna()

In [135]:
home_columns_percents_gdf = home_columns_percents_gdf.to_crs(9311)
home_columns_percents_gdf.to_file("data/housing.gpkg")