In [1]:
import requests

In [2]:
import geopandas as gpd
import pandas as pd

## Open State data

In [3]:
file_path = "data/cb_2024_us_county_500k/cb_2024_us_county_500k.shp"
counties_gdf = gpd.read_file(file_path)

In [4]:
counties_gdf = counties_gdf.rename(columns={"GEOIDFQ": "AFFGEOID"})

## Get Ethnic Data

In [5]:
table = "B04006"

url = "https://api.census.gov/data/2023/acs/acs5"
params = {
    "get": f"group({table})",
    "ucgid": "pseudo(0100000US$0500000)",
}
response = requests.get(url, params=params)

In [6]:
data = response.json()
columns = data[0]
rows = data[1:]
ethnicity_df = pd.DataFrame(rows, columns=columns)

In [7]:
url = "https://api.census.gov/data/2023/acs/acs5/variables.json"
response = requests.get(url)
variables = response.json()["variables"]

In [8]:
col_vars = {k: v["label"] for k, v in variables.items() if k.startswith(table)}
col_vars["ucgid"] = "AFFGEOID"

In [9]:
ethnicity_df = ethnicity_df.rename(columns=col_vars)
ethnicity_df = ethnicity_df[list(col_vars.values())]

In [10]:
rename_dict = {
    col: col.split("!!")[-1]
    for col in ethnicity_df.columns
    if col.startswith("Estimate!!Total:!!West Indian (except Hispanic groups):")
}
ethnicity_df.rename(columns=rename_dict, inplace=True)

In [11]:
ethnicity_cols = list(rename_dict.values())
ethnicity_df[["Estimate!!Total:", *ethnicity_cols]] = ethnicity_df[
    ["Estimate!!Total:", *ethnicity_cols]
].astype(int)

In [12]:
ethnicity_df["PERCENT_WEST_INDIAN"] = (
    (
        ethnicity_df["West Indian (except Hispanic groups):"]
        / ethnicity_df["Estimate!!Total:"]
    )
    .round(decimals=3)
    .astype(float)
)

In [13]:
ethnicity_cols.remove("West Indian (except Hispanic groups):")

In [14]:
ethnicity_df["west_indian_counties_1_percent"] = ethnicity_df[
    ethnicity_df["PERCENT_WEST_INDIAN"] >= 0.01
][ethnicity_cols].idxmax(axis=1)
ethnicity_df["west_indian_counties_2_percent"] = ethnicity_df[
    ethnicity_df["PERCENT_WEST_INDIAN"] >= 0.02
][ethnicity_cols].idxmax(axis=1)
ethnicity_df["west_indian_counties_3_percent"] = ethnicity_df[
    ethnicity_df["PERCENT_WEST_INDIAN"] >= 0.03
][ethnicity_cols].idxmax(axis=1)
ethnicity_df["west_indian_counties_4_percent"] = ethnicity_df[
    ethnicity_df["PERCENT_WEST_INDIAN"] >= 0.04
][ethnicity_cols].idxmax(axis=1)
ethnicity_df["west_indian_counties_5_percent"] = ethnicity_df[
    ethnicity_df["PERCENT_WEST_INDIAN"] >= 0.05
][ethnicity_cols].idxmax(axis=1)
ethnicity_df["west_indian_counties_10_percent"] = ethnicity_df[
    ethnicity_df["PERCENT_WEST_INDIAN"] >= 0.10
][ethnicity_cols].idxmax(axis=1)
ethnicity_df["west_indian_counties_15_percent"] = ethnicity_df[
    ethnicity_df["PERCENT_WEST_INDIAN"] >= 0.15
][ethnicity_cols].idxmax(axis=1)

In [31]:
print(len(ethnicity_df[~ethnicity_df["west_indian_counties_1_percent"].isna()]))
ethnicity_df.groupby("west_indian_counties_1_percent").size().reset_index(
    name="COUNT"
).sort_values("COUNT", ascending=False)

170


Unnamed: 0,west_indian_counties_1_percent,COUNT
6,Jamaican,92
5,Haitian,54
4,Dutch West Indian,13
8,West Indian,3
0,Bahamian,2
3,British West Indian,2
7,Trinidadian and Tobagonian,2
1,Belizean,1
2,Bermudan,1


In [32]:
print(len(ethnicity_df[~ethnicity_df["west_indian_counties_2_percent"].isna()]))
ethnicity_df.groupby("west_indian_counties_2_percent").size().reset_index(
    name="COUNT"
).sort_values("COUNT", ascending=False)

59


Unnamed: 0,west_indian_counties_2_percent,COUNT
3,Jamaican,31
2,Haitian,26
1,Dutch West Indian,1
0,Bahamian,1


In [33]:
print(len(ethnicity_df[~ethnicity_df["west_indian_counties_3_percent"].isna()]))
ethnicity_df.groupby("west_indian_counties_3_percent").size().reset_index(
    name="COUNT"
).sort_values("COUNT", ascending=False)

30


Unnamed: 0,west_indian_counties_3_percent,COUNT
1,Haitian,16
2,Jamaican,13
0,Bahamian,1


In [34]:
print(len(ethnicity_df[~ethnicity_df["west_indian_counties_4_percent"].isna()]))
ethnicity_df.groupby("west_indian_counties_4_percent").size().reset_index(
    name="COUNT"
).sort_values("COUNT", ascending=False)

18


Unnamed: 0,west_indian_counties_4_percent,COUNT
0,Haitian,12
1,Jamaican,6


In [35]:
print(len(ethnicity_df[~ethnicity_df["west_indian_counties_5_percent"].isna()]))
ethnicity_df.groupby("west_indian_counties_5_percent").size().reset_index(
    name="COUNT"
).sort_values("COUNT", ascending=False)

10


Unnamed: 0,west_indian_counties_5_percent,COUNT
0,Haitian,7
1,Jamaican,3


In [36]:
print(len(ethnicity_df[~ethnicity_df["west_indian_counties_10_percent"].isna()]))
ethnicity_df.groupby("west_indian_counties_10_percent").size().reset_index(
    name="COUNT"
).sort_values("COUNT", ascending=False)

2


Unnamed: 0,west_indian_counties_10_percent,COUNT
0,Haitian,1
1,Jamaican,1


## Merge Data

In [37]:
ethnicity_gdf = counties_gdf.merge(ethnicity_df, on="AFFGEOID", how="inner")

In [38]:
ethnicity_gdf = ethnicity_gdf.to_crs(9311)
ethnicity_gdf.to_file("data/west_indian_per_county.gpkg")