In [82]:
import geopandas as gpd
import pandas as pd

## Open State data

In [91]:
file_path = "data/cb_2024_us_county_500k/cb_2024_us_county_500k.shp"
counties_gdf = gpd.read_file(file_path)
counties_gdf = counties_gdf.to_crs(4326)

## Get Language 

In [92]:
language_df = pd.read_csv(
    "data/ACSST5Y2023.S1603_2025-06-16T011229/ACSST5Y2023.S1603-Data.csv",
    low_memory=False,
)

#### Get Column names

In [93]:
acs_column_names = pd.read_csv(
    "data/ACSST5Y2023.S1603_2025-06-16T011229/ACSST5Y2023.S1603-Column-Metadata.csv"
)

In [94]:
col_renames = {
    "Estimate!!Total!!Total population 5 years and over": "POPULATION",
    "Estimate!!Total!!Speak a language other than English at home!!Total population 5 years and over": "POPULATION_NOT_ENGLISH_AT_HOME",
    "Estimate!!Speak Spanish at home!!Speak a language other than English at home!!Total population 5 years and over": "POPULATION_SPANISH_AT_HOME",
    "Geography": "GEOIDFQ",
}

In [95]:
language_cols = list(
    acs_column_names[
        (acs_column_names["Column Name"] == "S1603_C01_001E")
        | (acs_column_names["Column Name"] == "S1603_C04_001E")
        | (acs_column_names["Column Name"] == "S1603_C06_001E")
    ]["Column Name"]
)

#### Back to Data

In [96]:
language_df = language_df[["GEO_ID", *language_cols]]
language_df.columns = language_df.iloc[0]
language_df = language_df[1:]
language_df = language_df.rename(columns=col_renames)

In [97]:
language_df["POPULATION"] = language_df["POPULATION"].astype(int)
language_df["POPULATION_NOT_ENGLISH_AT_HOME"] = language_df[
    "POPULATION_NOT_ENGLISH_AT_HOME"
].astype(int)
language_df["POPULATION_SPANISH_AT_HOME"] = language_df[
    "POPULATION_SPANISH_AT_HOME"
].astype(int)

## Merge Data

In [110]:
language_gdf = counties_gdf.merge(language_df, on="GEOIDFQ", how="left")

In [111]:
language_gdf["POPULATION_LANGUAGE_NOT_ENGLISH_NOT_SPANISH"] = (
    language_gdf["POPULATION_NOT_ENGLISH_AT_HOME"]
    - language_gdf["POPULATION_SPANISH_AT_HOME"]
)
language_gdf["PERCENT_POPULATION_LANGUAGE_NOT_ENGLISH_NOT_SPANISH"] = (
    (
        language_gdf["POPULATION_LANGUAGE_NOT_ENGLISH_NOT_SPANISH"]
        / language_gdf["POPULATION"]
    )
    * 100
).round(decimals=2)

In [112]:
language_gdf = language_gdf.dropna()

In [113]:
language_gdf = language_gdf.to_crs(9311)
language_gdf.to_file("data/language.gpkg")

In [114]:
language_gdf.sort_values(
    "PERCENT_POPULATION_LANGUAGE_NOT_ENGLISH_NOT_SPANISH", ascending=False
)

Unnamed: 0,STATEFP,COUNTYFP,COUNTYNS,GEOIDFQ,GEOID,NAME,NAMELSAD,STUSPS,STATE_NAME,LSAD,ALAND,AWATER,geometry,POPULATION,POPULATION_NOT_ENGLISH_AT_HOME,POPULATION_SPANISH_AT_HOME,POPULATION_LANGUAGE_NOT_ENGLISH_NOT_SPANISH,PERCENT_POPULATION_LANGUAGE_NOT_ENGLISH_NOT_SPANISH
886,02,050,01419966,0500000US02050,02050,Bethel,Bethel Census Area,AK,Alaska,05,105310429084,12678050559,"MULTIPOLYGON (((-3101902.04 2893602.272, -3101...",16530.0,9875.0,66.0,9809.0,59.34
319,04,001,00025441,0500000US04001,04001,Apache,Apache County,AZ,Arizona,06,29003487955,54140333,"POLYGON ((-887636.263 -836070.038, -887635.984...",61741.0,32320.0,1420.0,30900.0,50.05
2350,39,075,01074050,0500000US39075,39075,Holmes,Holmes County,OH,Ohio,06,1094566305,3694365,"POLYGON ((1486161.522 -320411.558, 1486197.987...",40624.0,20371.0,149.0,20222.0,49.78
2387,02,016,01419965,0500000US02016,02016,Aleutians West,Aleutians West Census Area,AK,Alaska,05,11377949308,25183571468,"MULTIPOLYGON (((-4288407.995 3426181.085, -428...",5059.0,2839.0,478.0,2361.0,46.67
2999,35,031,00929107,0500000US35031,35031,McKinley,McKinley County,NM,New Mexico,06,14118189295,12689169,"POLYGON ((-821272.444 -1026257.454, -821097.93...",66961.0,34007.0,3640.0,30367.0,45.35
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3050,72,149,01804555,0500000US72149,72149,Villalba,Villalba Municipio,PR,Puerto Rico,13,92200653,3595301,"POLYGON ((3539506.685 -2301734.089, 3539531.75...",20893.0,20090.0,20090.0,0.0,0.00
3061,72,027,01804493,0500000US72027,72027,Camuy,Camuy Municipio,PR,Puerto Rico,13,120259577,40304880,"POLYGON ((3491696.17 -2276406.736, 3492711.031...",31672.0,30496.0,30496.0,0.0,0.00
3208,49,031,01448029,0500000US49031,49031,Piute,Piute County,UT,Utah,06,1963753824,20286511,"POLYGON ((-1086336.197 -642608.539, -1086014.3...",1649.0,83.0,83.0,0.0,0.00
3198,72,085,01804523,0500000US72085,72085,Las Piedras,Las Piedras Municipio,PR,Puerto Rico,13,87748419,32509,"POLYGON ((3598485.329 -2274064.031, 3598671.26...",33897.0,33078.0,33078.0,0.0,0.00
