In [1]:
import requests

In [2]:
import geopandas as gpd
import pandas as pd

## Open State data

In [3]:
file_path = "data/cb_2024_us_county_500k/cb_2024_us_county_500k.shp"
counties_gdf = gpd.read_file(file_path)

In [4]:
counties_gdf = counties_gdf.rename(columns={"GEOIDFQ": "AFFGEOID"})
counties_gdf = counties_gdf[counties_gdf["STATEFP"] != "72"].reset_index(drop=True)

## Get Ethnic Data

In [5]:
table = "B03001"

url = "https://api.census.gov/data/2023/acs/acs5"
params = {
    "get": f"group({table})",
    "ucgid": "pseudo(0100000US$0500000)",
}
response = requests.get(url, params=params)

In [6]:
data = response.json()
columns = data[0]
rows = data[1:]
ethnicity_df = pd.DataFrame(rows, columns=columns)

In [7]:
url = "https://api.census.gov/data/2023/acs/acs5/variables.json"
response = requests.get(url)
variables = response.json()["variables"]

In [8]:
col_vars = {k: v["label"] for k, v in variables.items() if k.startswith(table)}
col_vars["ucgid"] = "AFFGEOID"

In [9]:
ethnicity_df = ethnicity_df.rename(columns=col_vars)
ethnicity_df = ethnicity_df[list(col_vars.values())]

In [10]:
rename_dict = {
    col: col.split("!!")[-1]
    for col in ethnicity_df.columns
    if col.startswith("Estimate!!Total")
}
ethnicity_df.rename(columns=rename_dict, inplace=True)

In [11]:
ethnicity_cols = list(rename_dict.values())
ethnicity_df[ethnicity_cols] = ethnicity_df[ethnicity_cols].astype(int)

In [12]:
ethnicity_cols.remove("Hispanic or Latino:")
ethnicity_cols.remove("Not Hispanic or Latino")
ethnicity_cols.remove("South American:")
ethnicity_cols.remove("Other Hispanic or Latino:")
ethnicity_cols.remove("Total:")

In [13]:
ethnicity_df["PERCENT_HISPANIC"] = (
    (ethnicity_df["Hispanic or Latino:"] / ethnicity_df["Total:"])
    .round(decimals=3)
    .astype(float)
)

In [14]:
ethnicity_df["hispanic_counties_1_percent"] = ethnicity_df[
    ethnicity_df["PERCENT_HISPANIC"] >= 0.01
][ethnicity_cols].idxmax(axis=1)
ethnicity_df["hispanic_counties_2_percent"] = ethnicity_df[
    ethnicity_df["PERCENT_HISPANIC"] >= 0.02
][ethnicity_cols].idxmax(axis=1)
ethnicity_df["hispanic_counties_3_percent"] = ethnicity_df[
    ethnicity_df["PERCENT_HISPANIC"] >= 0.03
][ethnicity_cols].idxmax(axis=1)
ethnicity_df["hispanic_counties_4_percent"] = ethnicity_df[
    ethnicity_df["PERCENT_HISPANIC"] >= 0.04
][ethnicity_cols].idxmax(axis=1)
ethnicity_df["hispanic_counties_5_percent"] = ethnicity_df[
    ethnicity_df["PERCENT_HISPANIC"] >= 0.05
][ethnicity_cols].idxmax(axis=1)
ethnicity_df["hispanic_counties_10_percent"] = ethnicity_df[
    ethnicity_df["PERCENT_HISPANIC"] >= 0.10
][ethnicity_cols].idxmax(axis=1)
ethnicity_df["hispanic_counties_15_percent"] = ethnicity_df[
    ethnicity_df["PERCENT_HISPANIC"] >= 0.15
][ethnicity_cols].idxmax(axis=1)

In [15]:
print(len(ethnicity_df[~ethnicity_df["hispanic_counties_1_percent"].isna()]))
ethnicity_df.groupby("hispanic_counties_1_percent").size().reset_index(
    name="COUNT"
).sort_values("COUNT", ascending=False)

3079


Unnamed: 0,hispanic_counties_1_percent,COUNT
9,Mexican,2533
11,Puerto Rican,318
3,Central American:,117
0,All other Hispanic or Latino,48
6,Cuban,16
13,Spanish,12
7,Dominican (Dominican Republic),12
12,Spaniard,8
10,Peruvian,5
5,Colombian,3


In [16]:
print(len(ethnicity_df[~ethnicity_df["hispanic_counties_2_percent"].isna()]))
ethnicity_df.groupby("hispanic_counties_2_percent").size().reset_index(
    name="COUNT"
).sort_values("COUNT", ascending=False)

2722


Unnamed: 0,hispanic_counties_2_percent,COUNT
9,Mexican,2255
11,Puerto Rican,283
3,Central American:,106
0,All other Hispanic or Latino,34
7,Dominican (Dominican Republic),12
6,Cuban,11
13,Spanish,6
12,Spaniard,5
10,Peruvian,2
5,Colombian,2


In [17]:
print(len(ethnicity_df[~ethnicity_df["hispanic_counties_3_percent"].isna()]))
ethnicity_df.groupby("hispanic_counties_3_percent").size().reset_index(
    name="COUNT"
).sort_values("COUNT", ascending=False)

2257


Unnamed: 0,hispanic_counties_3_percent,COUNT
8,Mexican,1872
10,Puerto Rican,237
3,Central American:,95
0,All other Hispanic or Latino,20
6,Dominican (Dominican Republic),12
5,Cuban,8
11,Spaniard,3
12,Spanish,2
9,Peruvian,2
1,Argentinean,1


In [18]:
print(len(ethnicity_df[~ethnicity_df["hispanic_counties_4_percent"].isna()]))
ethnicity_df.groupby("hispanic_counties_4_percent").size().reset_index(
    name="COUNT"
).sort_values("COUNT", ascending=False)

1886


Unnamed: 0,hispanic_counties_4_percent,COUNT
8,Mexican,1538
10,Puerto Rican,217
3,Central American:,87
0,All other Hispanic or Latino,16
6,Dominican (Dominican Republic),11
5,Cuban,7
12,Spanish,2
11,Spaniard,2
1,Argentinean,1
2,Bolivian,1


In [19]:
print(len(ethnicity_df[~ethnicity_df["hispanic_counties_5_percent"].isna()]))
ethnicity_df.groupby("hispanic_counties_5_percent").size().reset_index(
    name="COUNT"
).sort_values("COUNT", ascending=False)

1633


Unnamed: 0,hispanic_counties_5_percent,COUNT
8,Mexican,1325
10,Puerto Rican,195
3,Central American:,75
0,All other Hispanic or Latino,14
6,Dominican (Dominican Republic),10
5,Cuban,7
4,Colombian,1
1,Argentinean,1
2,Bolivian,1
7,Ecuadorian,1


In [20]:
print(len(ethnicity_df[~ethnicity_df["hispanic_counties_10_percent"].isna()]))
ethnicity_df.groupby("hispanic_counties_10_percent").size().reset_index(
    name="COUNT"
).sort_values("COUNT", ascending=False)

943


Unnamed: 0,hispanic_counties_10_percent,COUNT
6,Mexican,750
7,Puerto Rican,135
2,Central American:,35
4,Dominican (Dominican Republic),10
0,All other Hispanic or Latino,6
3,Cuban,5
1,Bolivian,1
5,Ecuadorian,1


In [21]:
print(len(ethnicity_df[~ethnicity_df["hispanic_counties_15_percent"].isna()]))
ethnicity_df.groupby("hispanic_counties_15_percent").size().reset_index(
    name="COUNT"
).sort_values("COUNT", ascending=False)

654


Unnamed: 0,hispanic_counties_15_percent,COUNT
5,Mexican,500
6,Puerto Rican,111
1,Central American:,23
3,Dominican (Dominican Republic),10
2,Cuban,5
0,All other Hispanic or Latino,4
4,Ecuadorian,1


## Merge Data

In [22]:
ethnicity_gdf = counties_gdf.merge(ethnicity_df, on="AFFGEOID", how="inner")

In [23]:
ethnicity_gdf = ethnicity_gdf.to_crs(9311)
ethnicity_gdf.to_file("data/hispanic_county.gpkg")