In [1]:
import requests

In [2]:
import geopandas as gpd
import pandas as pd

## Open State data

In [3]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

## Get Asian Data

In [4]:
table = "B02015"

url = "https://api.census.gov/data/2023/acs/acs5"
params = {
    "get": f"group({table})",
    "ucgid": "pseudo(0100000US$0400000)",
}
response = requests.get(url, params=params)

In [5]:
data = response.json()
columns = data[0]
rows = data[1:]
asian_df = pd.DataFrame(rows, columns=columns)

In [7]:
url = "https://api.census.gov/data/2023/acs/acs5/variables.json"
response = requests.get(url)
variables = response.json()["variables"]

In [8]:
b02015_vars = {k: v["label"] for k, v in variables.items() if k.startswith("B02015")}
b02015_vars["ucgid"] = "AFFGEOID"

In [9]:
asian_df = asian_df.rename(columns=b02015_vars)
asian_df = asian_df[list(b02015_vars.values())]

In [10]:
rename_dict = {
    col: col.split("!!")[-1]
    for col in asian_df.columns
    if col.startswith("Estimate!!Total")
}
asian_df.rename(columns=rename_dict, inplace=True)

In [11]:
ethnicity_cols = list(rename_dict.values())
asian_df[ethnicity_cols] = asian_df[ethnicity_cols].astype(int)

In [12]:
ethnicity_cols.remove("Total:")

In [13]:
asian_df["ancestry_countries"] = asian_df[ethnicity_cols].idxmax(axis=1)

## Merge Data

In [15]:
asian_gdf = states_df.merge(asian_df, on="AFFGEOID", how="inner")

In [17]:
asian_gdf = asian_gdf.to_crs(9311)
asian_gdf.to_file("data/asians_group_per_state.gpkg")

In [18]:
asian_df.groupby("ancestry_countries").size()

ancestry_countries
Asian Indian                 28
Chinese, except Taiwanese    11
Filipino                      7
Hmong                         2
Vietnamese                    4
dtype: int64