In [198]:
import geopandas as gpd
import pandas as pd

## Open State data

In [199]:
file_path = "data/cb_2018_us_county_500k/cb_2018_us_county_500k.shp"
counties_gdf = gpd.read_file(file_path)
counties_gdf = counties_gdf[counties_gdf["STATEFP"] != "09"][["geometry", "AFFGEOID"]]
counties_gdf = counties_gdf.to_crs(4326)

#### Format for CT

In [200]:
ct_gdf = gpd.read_file("data/CT_Planning_Regions.geojson")
ct_gdf["AFFGEOID"] = "0500000US" + ct_gdf["PlanningRegionFIPS_GEOID"]

In [201]:
counties_gdf = pd.concat(
    [ct_gdf[["geometry", "AFFGEOID"]], counties_gdf], ignore_index=True
)

## Get Ethnicity amounts

In [202]:
acs_ethnicity = pd.read_csv(
    "data/ACSDT5Y2023.B04006_2025-05-02T185520/ACSDT5Y2023.B04006-Data.csv",
    low_memory=False,
)

#### Get Column names

In [203]:
acs_column_names = pd.read_csv(
    "data/ACSDT5Y2023.B04006_2025-05-02T185520/ACSDT5Y2023.B04006-Column-Metadata.csv"
)

#### Back to Data

In [204]:
acs_ethnicity.columns = acs_ethnicity.iloc[0].str.lstrip().str.rstrip()
acs_ethnicity_totals_df = acs_ethnicity[
    acs_ethnicity.iloc[0][
        acs_ethnicity.iloc[0].str.split("!!").str[0] != "Margin of Error"
    ].dropna()
]
acs_ethnicity_totals_df.columns = acs_ethnicity_totals_df.columns.str.split("!!").str[
    -1
]
acs_ethnicity_totals_df = acs_ethnicity_totals_df.iloc[1:]

In [205]:
acs_ethnicity_totals_df["state_id"] = acs_ethnicity_totals_df["Geography"].str[9:11]
acs_ethnicity_totals_df = acs_ethnicity_totals_df[
    ~acs_ethnicity_totals_df["state_id"].isin(["60", "66", "69", "72", "78"])
].reset_index(drop=True)

In [206]:
acs_ethnicity_totals_df["LARGEST_ANCESTRY"] = (
    acs_ethnicity_totals_df.loc[
        :,
        ~acs_ethnicity_totals_df.columns.isin(
            [
                "state_id",
                "Geography",
                "Geographic Area Name",
                "Total:",
                "Unclassified or not reported",
                "Other groups",
            ]
        ),
    ]
    .astype(int)
    .idxmax(axis=1)
)

In [207]:
pd.DataFrame(
    acs_ethnicity_totals_df.groupby("LARGEST_ANCESTRY").size()
).reset_index().rename(columns={0: "Count"}).sort_values("Count", ascending=False)

Unnamed: 0,LARGEST_ANCESTRY,Count
7,German,1719
2,English,600
0,American,528
8,Irish,185
5,French (except Basque),32
9,Italian,29
10,Norwegian,19
14,Subsaharan African:,9
15,West Indian (except Hispanic groups):,6
4,Finnish,5


## Merge Data

In [208]:
acs_ethnicity_totals_df = acs_ethnicity_totals_df.rename(
    columns={"Geography": "AFFGEOID"}
)

In [209]:
ethnicity_per_county_gdf = counties_gdf.merge(
    acs_ethnicity_totals_df, on="AFFGEOID", how="left"
)

In [210]:
ethnicity_per_county_gdf = ethnicity_per_county_gdf.to_crs(9311)
ethnicity_per_county_gdf.to_file("data/largest_group.gpkg")