In [1]:
import geopandas as gpd
import pandas as pd

## Open State data

In [2]:
file_path = "data/cb_2024_us_county_500k/cb_2024_us_county_500k.shp"
counties_gdf = gpd.read_file(file_path)

## Get Ethnicity amounts

In [3]:
acs_ethnicity = pd.read_csv(
    "data/ACSDT5Y2023.B04006_2025-05-02T185520/ACSDT5Y2023.B04006-Data.csv",
    low_memory=False,
)

#### Get Column names

In [4]:
acs_column_names = pd.read_csv(
    "data/ACSDT5Y2023.B04006_2025-05-02T185520/ACSDT5Y2023.B04006-Column-Metadata.csv"
)

In [5]:
ethnicity_cols = list(
    acs_column_names[
        (acs_column_names["Column Name"] == "B04006_005E")
        | (acs_column_names["Column Name"] == "B04006_001E")
    ]["Column Name"]
)

#### Back to Data

In [6]:
rename_cols = {
    "Estimate!!Total:!!American": "American",
    "Geography": "GEOIDFQ",
    "Estimate!!Total:": "TOTAL",
}

In [7]:
ethnicity_per_county_df = acs_ethnicity[["GEO_ID", *ethnicity_cols]]
ethnicity_per_county_df.columns = ethnicity_per_county_df.iloc[0]
ethnicity_per_county_df = ethnicity_per_county_df[1:]
ethnicity_per_county_df = ethnicity_per_county_df.rename(columns=rename_cols)

In [8]:
ancestry_cols = list(rename_cols.values())[:-2]

In [9]:
ethnicity_per_county_df[ancestry_cols] = ethnicity_per_county_df[ancestry_cols].astype(
    int
)

In [15]:
ethnicity_per_county_df["PERCENT_AMERICAN"] = (
    (
        ethnicity_per_county_df["American"].astype(int)
        / ethnicity_per_county_df["TOTAL"].astype(int)
    )
    * 100
).round(decimals=1)

## Merge Data

In [17]:
ethnicity_per_county_gdf = counties_gdf.merge(
    ethnicity_per_county_df, on="GEOIDFQ", how="left"
)

In [18]:
ethnicity_per_county_gdf["county_id"] = ethnicity_per_county_gdf["GEOIDFQ"].str[9:]
ethnicity_per_county_gdf["state_id"] = ethnicity_per_county_gdf["GEOIDFQ"].str[9:11]

In [19]:
ethnicity_per_county_gdf = ethnicity_per_county_gdf.to_crs(9311)
ethnicity_per_county_gdf.to_file("data/americans_per_county.gpkg")