In [53]:
import requests

In [54]:
import geopandas as gpd
import pandas as pd

## Open County data

In [55]:
file_path = "data/cb_2024_us_county_500k/cb_2024_us_county_500k.shp"
counties_gdf = gpd.read_file(file_path)
counties_gdf = counties_gdf.rename(columns={"GEOIDFQ": "AFFGEOID"})

## Get Ethnicity amounts

In [56]:
table = "B04006"

url = "https://api.census.gov/data/2023/acs/acs5"
params = {
    "get": f"group({table})",
    "ucgid": "pseudo(0100000US$0500000)",
}
response = requests.get(url, params=params)

In [57]:
data = response.json()
columns = data[0]
rows = data[1:]
ethnicity_df = pd.DataFrame(rows, columns=columns)

In [58]:
url = "https://api.census.gov/data/2023/acs/acs5/variables.json"
response = requests.get(url)
variables = response.json()["variables"]

In [59]:
col_vars = {k: v["label"] for k, v in variables.items() if k.startswith(table)}
col_vars["ucgid"] = "AFFGEOID"

In [60]:
ethnicity_df = ethnicity_df.rename(columns=col_vars)
ethnicity_df = ethnicity_df[list(col_vars.values())]

In [61]:
rename_dict = {
    col: col.split("!!")[-1]
    for col in ethnicity_df.columns
    if col.startswith("Estimate!!Total")
}
ethnicity_df.rename(columns=rename_dict, inplace=True)

In [62]:
ethnicity_cols = list(rename_dict.values())
ethnicity_df[ethnicity_cols] = ethnicity_df[ethnicity_cols].astype(int)

In [63]:
slavic_cols = [
    "Bulgarian",
    "Carpatho Rusyn",
    "Croatian",
    "Czech",
    "Czechoslovakian",
    "Eastern European",
    "Macedonian",
    "Polish",
    "Russian",
    "Serbian",
    "Slavic",
    "Slovak",
    "Slovene",
    "Soviet Union",
    "Ukrainian",
    "Yugoslavian",
]

In [64]:
ethnicity_df["SLAVIC_SUM"] = ethnicity_df[slavic_cols].sum(axis=1)

  ethnicity_df['SLAVIC_SUM'] = ethnicity_df[slavic_cols].sum(axis=1)


In [65]:
ethnicity_df["PERCENT_SLAVIC"] = (
    (ethnicity_df["SLAVIC_SUM"] / ethnicity_df["Total:"])
    .round(decimals=3)
    .astype(float)
)

  ethnicity_df["PERCENT_SLAVIC"] = (


In [66]:
ethnicity_df["slavic_counties_1_percent"] = ethnicity_df[
    ethnicity_df["PERCENT_SLAVIC"] >= 0.01
][slavic_cols].idxmax(axis=1)
ethnicity_df["slavic_counties_2_percent"] = ethnicity_df[
    ethnicity_df["PERCENT_SLAVIC"] >= 0.02
][slavic_cols].idxmax(axis=1)
ethnicity_df["slavic_counties_3_percent"] = ethnicity_df[
    ethnicity_df["PERCENT_SLAVIC"] >= 0.03
][slavic_cols].idxmax(axis=1)
ethnicity_df["slavic_counties_4_percent"] = ethnicity_df[
    ethnicity_df["PERCENT_SLAVIC"] >= 0.04
][slavic_cols].idxmax(axis=1)
ethnicity_df["slavic_counties_5_percent"] = ethnicity_df[
    ethnicity_df["PERCENT_SLAVIC"] >= 0.05
][slavic_cols].idxmax(axis=1)
ethnicity_df["slavic_counties_10_percent"] = ethnicity_df[
    ethnicity_df["PERCENT_SLAVIC"] >= 0.10
][slavic_cols].idxmax(axis=1)
ethnicity_df["slavic_counties_15_percent"] = ethnicity_df[
    ethnicity_df["PERCENT_SLAVIC"] >= 0.15
][slavic_cols].idxmax(axis=1)

  ethnicity_df["slavic_counties_1_percent"] = ethnicity_df[
  ethnicity_df["slavic_counties_2_percent"] = ethnicity_df[
  ethnicity_df["slavic_counties_3_percent"] = ethnicity_df[
  ethnicity_df["slavic_counties_4_percent"] = ethnicity_df[
  ethnicity_df["slavic_counties_5_percent"] = ethnicity_df[
  ethnicity_df["slavic_counties_10_percent"] = ethnicity_df[
  ethnicity_df["slavic_counties_15_percent"] = ethnicity_df[


In [67]:
print(len(ethnicity_df[~ethnicity_df["slavic_counties_1_percent"].isna()]))
ethnicity_df.groupby("slavic_counties_1_percent").size().reset_index(
    name="COUNT"
).sort_values("COUNT", ascending=False)

2443


Unnamed: 0,slavic_counties_1_percent,COUNT
5,Polish,1932
2,Czech,304
6,Russian,130
4,Eastern European,26
11,Ukrainian,23
1,Croatian,8
3,Czechoslovakian,7
9,Slovak,5
8,Slavic,3
10,Slovene,2


In [68]:
print(len(ethnicity_df[~ethnicity_df["slavic_counties_2_percent"].isna()]))
ethnicity_df.groupby("slavic_counties_2_percent").size().reset_index(
    name="COUNT"
).sort_values("COUNT", ascending=False)

1747


Unnamed: 0,slavic_counties_2_percent,COUNT
4,Polish,1372
1,Czech,249
5,Russian,86
8,Ukrainian,17
3,Eastern European,10
0,Croatian,4
6,Slovak,4
2,Czechoslovakian,3
7,Slovene,1
9,Yugoslavian,1


In [69]:
print(len(ethnicity_df[~ethnicity_df["slavic_counties_3_percent"].isna()]))
ethnicity_df.groupby("slavic_counties_3_percent").size().reset_index(
    name="COUNT"
).sort_values("COUNT", ascending=False)

1285


Unnamed: 0,slavic_counties_3_percent,COUNT
4,Polish,1004
1,Czech,186
5,Russian,66
7,Ukrainian,12
3,Eastern European,7
6,Slovak,4
0,Croatian,3
2,Czechoslovakian,2
8,Yugoslavian,1


In [70]:
print(len(ethnicity_df[~ethnicity_df["slavic_counties_4_percent"].isna()]))
ethnicity_df.groupby("slavic_counties_4_percent").size().reset_index(
    name="COUNT"
).sort_values("COUNT", ascending=False)

956


Unnamed: 0,slavic_counties_4_percent,COUNT
4,Polish,748
1,Czech,140
5,Russian,48
7,Ukrainian,10
6,Slovak,3
3,Eastern European,3
0,Croatian,2
2,Czechoslovakian,1
8,Yugoslavian,1


In [71]:
print(len(ethnicity_df[~ethnicity_df["slavic_counties_5_percent"].isna()]))
ethnicity_df.groupby("slavic_counties_5_percent").size().reset_index(
    name="COUNT"
).sort_values("COUNT", ascending=False)

701


Unnamed: 0,slavic_counties_5_percent,COUNT
4,Polish,555
1,Czech,104
5,Russian,28
7,Ukrainian,6
6,Slovak,3
0,Croatian,2
3,Eastern European,2
2,Czechoslovakian,1


In [72]:
print(len(ethnicity_df[~ethnicity_df["slavic_counties_10_percent"].isna()]))
ethnicity_df.groupby("slavic_counties_10_percent").size().reset_index(
    name="COUNT"
).sort_values("COUNT", ascending=False)

201


Unnamed: 0,slavic_counties_10_percent,COUNT
1,Polish,161
0,Czech,29
2,Russian,8
4,Ukrainian,2
3,Slovak,1


In [73]:
print(len(ethnicity_df[~ethnicity_df["slavic_counties_15_percent"].isna()]))
ethnicity_df.groupby("slavic_counties_15_percent").size().reset_index(
    name="COUNT"
).sort_values("COUNT", ascending=False)

54


Unnamed: 0,slavic_counties_15_percent,COUNT
1,Polish,42
0,Czech,8
2,Russian,3
3,Ukrainian,1


## Merge Data

In [74]:
ethnicity_gdf = counties_gdf.merge(ethnicity_df, on="AFFGEOID", how="inner")

In [76]:
ethnicity_gdf = ethnicity_gdf.to_crs(9311)
ethnicity_gdf.to_file("data/slavic_ancestry_per_county.gpkg")