In [1]:
import requests

In [2]:
import geopandas as gpd
import pandas as pd

## Open State data

In [3]:
file_path = "data/cb_2024_us_county_500k/cb_2024_us_county_500k.shp"
counties_gdf = gpd.read_file(file_path)

In [4]:
counties_gdf = counties_gdf.rename(columns={"GEOIDFQ": "AFFGEOID"})

## Get Total Population Data

In [5]:
table = "DP05"
url = "https://api.census.gov/data/2023/acs/acs5/profile"
params = {
    "get": f"group({table})",
    "ucgid": "pseudo(0100000US$0500000)",
}
response = requests.get(url, params=params)

ERROR! Session/line number was not unique in database. History logging moved to new session 80


In [6]:
data = response.json()

In [7]:
columns = data[0]
rows = data[1:]
pop_df = pd.DataFrame(rows, columns=columns)

In [8]:
url = "https://api.census.gov/data/2023/acs/acs5/profile/variables.json"
response = requests.get(url)
variables = response.json()["variables"]

In [9]:
DP05_vars = {k: v["label"] for k, v in variables.items() if k.startswith(table)}
DP05_vars["ucgid"] = "AFFGEOID"

In [10]:
pop_df = pop_df.rename(columns=DP05_vars)
pop_df = pop_df[list(DP05_vars.values())]

In [11]:
pop_df = pop_df[["Estimate!!SEX AND AGE!!Total population", "AFFGEOID"]]
pop_df = pop_df.rename(columns={"Estimate!!SEX AND AGE!!Total population": "TOTAL_POP"})
pop_df["TOTAL_POP"] = pop_df["TOTAL_POP"].astype(int)

## Get Asian Data

In [12]:
table = "B02015"

url = "https://api.census.gov/data/2023/acs/acs5"
params = {
    "get": f"group({table})",
    "ucgid": "pseudo(0100000US$0500000)",
}
response = requests.get(url, params=params)

In [13]:
data = response.json()
columns = data[0]
rows = data[1:]
asian_df = pd.DataFrame(rows, columns=columns)

In [14]:
url = "https://api.census.gov/data/2023/acs/acs5/variables.json"
response = requests.get(url)
variables = response.json()["variables"]

In [15]:
b02015_vars = {k: v["label"] for k, v in variables.items() if k.startswith("B02015")}
b02015_vars["ucgid"] = "AFFGEOID"

In [16]:
asian_df = asian_df.rename(columns=b02015_vars)
asian_df = asian_df[list(b02015_vars.values())]

In [17]:
rename_dict = {
    col: col.split("!!")[-1]
    for col in asian_df.columns
    if col.startswith("Estimate!!Total")
}
asian_df.rename(columns=rename_dict, inplace=True)

In [18]:
ethnicity_cols = list(rename_dict.values())
asian_df[ethnicity_cols] = asian_df[ethnicity_cols].astype(int)

In [19]:
ethnicity_cols.remove("Total:")

In [20]:
asian_df = asian_df.merge(pop_df, on="AFFGEOID", how="inner")

In [29]:
asian_df["asian_total"] = asian_df[ethnicity_cols].sum(axis=1)
asian_df["asian_percent"] = (
    (asian_df["asian_total"] / asian_df["TOTAL_POP"]).round(decimals=3).astype(float)
)

In [53]:
asian_df["asian_counties_1_percent"] = asian_df[asian_df["asian_percent"] >= 0.01][
    ethnicity_cols
].idxmax(axis=1)
asian_df["asian_counties_2_percent"] = asian_df[asian_df["asian_percent"] >= 0.02][
    ethnicity_cols
].idxmax(axis=1)
asian_df["asian_counties_3_percent"] = asian_df[asian_df["asian_percent"] >= 0.03][
    ethnicity_cols
].idxmax(axis=1)
asian_df["asian_counties_4_percent"] = asian_df[asian_df["asian_percent"] >= 0.04][
    ethnicity_cols
].idxmax(axis=1)
asian_df["asian_counties_5_percent"] = asian_df[asian_df["asian_percent"] >= 0.05][
    ethnicity_cols
].idxmax(axis=1)

In [54]:
print(len(asian_df[~asian_df["asian_counties_1_percent"].isna()]))
asian_df.groupby("asian_counties_1_percent").size()

1112


asian_counties_1_percent
Asian Indian                  328
Bangladeshi                     4
Burmese                        34
Cambodian                      11
Chinese, except Taiwanese     193
Filipino                      266
Hmong                          54
Indonesian                      1
Japanese                       13
Korean                         41
Laotian                        24
Nepalese                        7
Other Asian, not specified      3
Other Asian, specified          2
Other Central Asian             3
Pakistani                       7
Singaporean                     1
Sri Lankan                      3
Taiwanese                       1
Thai                            3
Two or more Asian               6
Vietnamese                    107
dtype: int64

In [55]:
print(len(asian_df[~asian_df["asian_counties_2_percent"].isna()]))
asian_df.groupby("asian_counties_2_percent").size()

547


asian_counties_2_percent
Asian Indian                  203
Burmese                        22
Cambodian                       1
Chinese, except Taiwanese      97
Filipino                      111
Hmong                          31
Indonesian                      1
Japanese                        1
Korean                         14
Laotian                         9
Nepalese                        1
Other Asian, not specified      2
Other Central Asian             2
Pakistani                       2
Thai                            1
Two or more Asian               2
Vietnamese                     47
dtype: int64

In [56]:
print(len(asian_df[~asian_df["asian_counties_3_percent"].isna()]))
asian_df.groupby("asian_counties_3_percent").size()

350


asian_counties_3_percent
Asian Indian                  143
Burmese                        13
Cambodian                       1
Chinese, except Taiwanese      66
Filipino                       66
Hmong                          22
Korean                          5
Laotian                         3
Nepalese                        1
Other Asian, not specified      1
Other Central Asian             2
Pakistani                       1
Two or more Asian               1
Vietnamese                     25
dtype: int64

In [57]:
print(len(asian_df[~asian_df["asian_counties_4_percent"].isna()]))
asian_df.groupby("asian_counties_4_percent").size()

257


asian_counties_4_percent
Asian Indian                  113
Burmese                         8
Cambodian                       1
Chinese, except Taiwanese      53
Filipino                       44
Hmong                          16
Korean                          2
Laotian                         1
Nepalese                        1
Other Asian, not specified      1
Vietnamese                     17
dtype: int64

In [58]:
print(len(asian_df[~asian_df["asian_counties_5_percent"].isna()]))
asian_df.groupby("asian_counties_5_percent").size()

187


asian_counties_5_percent
Asian Indian                  83
Burmese                        5
Cambodian                      1
Chinese, except Taiwanese     41
Filipino                      35
Hmong                         10
Korean                         1
Laotian                        1
Other Asian, not specified     1
Vietnamese                     9
dtype: int64

## Merge Data

In [59]:
asian_gdf = counties_gdf.merge(asian_df, on="AFFGEOID", how="inner")

In [60]:
asian_gdf = asian_gdf.to_crs(9311)
asian_gdf.to_file("data/asians_group_per_county.gpkg")