In [53]:
import requests

In [54]:
import geopandas as gpd
import pandas as pd

In [55]:
from tqdm.notebook import tqdm

## Open County data

In [69]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
state_gdf = gpd.read_file(file_path)

## Get Ethnic Data

In [70]:
r = requests.get("https://api.census.gov/data/2022/acs/acs5/groups/B02017.json")
columns_obj = r.json()

In [71]:
df = pd.read_csv(
    "data/ACSDT5Y2022.B02017_2025-09-02T223826/ACSDT5Y2022.B02017-Data.csv",
    thousands=",",
)

In [72]:
df.columns = list(df.iloc[0])
df = df.iloc[1:]

In [75]:
rename_columns = {"Geographic Area Name": "NAME"}
for column in tqdm(list(df.columns)[:-1], desc="Parsing Columns"):
    str_split = column.split("!!")
    if str_split[0] == "Estimate":
        rename_columns[column] = str_split[-1].strip()
    else:
        continue

Parsing Columns:   0%|          | 0/110 [00:00<?, ?it/s]

In [84]:
df = df.rename(columns=rename_columns)[list(rename_columns.values())]

In [85]:
estimate_cols = []
for column in rename_columns.values():
    if column == "NAME" or column[-1] == ":":
        continue
    estimate_cols.append(column)

In [87]:
df[estimate_cols] = df[estimate_cols].astype(int)

In [88]:
df["most_tribe"] = df[estimate_cols].idxmax(axis=1)

In [92]:
df.groupby("most_tribe").size().reset_index(name="COUNT").sort_values(
    "COUNT", ascending=False
)

Unnamed: 0,most_tribe,COUNT
5,Cherokee,14
1,"American Indian or Alaska Native tribes, not s...",13
0,All other American Indian tribes (with only on...,7
6,Chippewa,4
2,"American Indian tribes, not specified",4
10,Navajo,3
3,Arapaho,1
4,Central American Indian,1
7,Choctaw,1
8,Lumbee,1


## Merge Data

In [94]:
gdf = state_gdf.merge(df, on="NAME", how="inner")

In [95]:
gdf = gdf.to_crs(9311)
gdf.to_file("data/Native_American_Tribe_Per_State.gpkg")