In [1]:
import requests

In [2]:
import geopandas as gpd
import pandas as pd

## Open State data

In [3]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_gdf = gpd.read_file(file_path)

## Get Ethnic Data

In [35]:
table = "B03001"

url = "https://api.census.gov/data/2023/acs/acs5"
params = {
    "get": f"group({table})",
    "ucgid": "pseudo(0100000US$0400000)",
}
response = requests.get(url, params=params)

In [36]:
data = response.json()
columns = data[0]
rows = data[1:]
ethnicity_df = pd.DataFrame(rows, columns=columns)

In [37]:
url = "https://api.census.gov/data/2023/acs/acs5/variables.json"
response = requests.get(url)
variables = response.json()["variables"]

In [38]:
col_vars = {k: v["label"] for k, v in variables.items() if k.startswith(table)}

In [39]:
ethnicity_df = ethnicity_df.rename(columns=col_vars)
ethnicity_df = ethnicity_df[[*list(col_vars.values()), "NAME"]]

In [40]:
rename_dict = {
    col: col.split("!!")[-1]
    for col in ethnicity_df.columns
    if col.startswith("Estimate!!Total")
}
ethnicity_df.rename(columns=rename_dict, inplace=True)

In [66]:
ethnicity_cols = list(rename_dict.values())
ethnicity_df[ethnicity_cols] = ethnicity_df[ethnicity_cols].astype(int)

In [67]:
ethnicity_cols.remove("Hispanic or Latino:")
ethnicity_cols.remove("Not Hispanic or Latino")
# ethnicity_cols.remove('South American:')
# ethnicity_cols.remove('Central American:')
# ethnicity_cols.remove('Other Hispanic or Latino:')
ethnicity_cols.remove("Total:")

In [68]:
ethnicity_df["ancestry"] = ethnicity_df[ethnicity_cols].idxmax(axis=1)

## Merge Data

In [69]:
ethnicity_gdf = states_gdf.merge(ethnicity_df, on="NAME", how="inner")

In [70]:
ethnicity_gdf = ethnicity_gdf.to_crs(9311)
ethnicity_gdf.to_file("data/hispanic_state.gpkg")