In [14]:
import geopandas as gpd
import pandas as pd

## Open State data

In [15]:
file_path = "data/cb_2024_us_county_500k/cb_2024_us_county_500k.shp"
counties_gdf = gpd.read_file(file_path)
counties_gdf = counties_gdf.to_crs(4326)

## Get Language 

In [16]:
language_df = pd.read_csv(
    "data/ACSST5Y2023.S1603_2025-06-16T011229/ACSST5Y2023.S1603-Data.csv",
    low_memory=False,
)

#### Get Column names

In [17]:
acs_column_names = pd.read_csv(
    "data/ACSST5Y2023.S1603_2025-06-16T011229/ACSST5Y2023.S1603-Column-Metadata.csv"
)

In [18]:
col_renames = {
    "Estimate!!Total!!Total population 5 years and over": "POPULATION",
    "Estimate!!Total!!Speak a language other than English at home!!Total population 5 years and over": "POPULATION_NOT_ENGLISH_AT_HOME",
    "Estimate!!Speak Spanish at home!!Speak a language other than English at home!!Total population 5 years and over": "POPULATION_SPANISH_AT_HOME",
    "Geography": "GEOIDFQ",
}

In [19]:
language_cols = list(
    acs_column_names[
        (acs_column_names["Column Name"] == "S1603_C01_001E")
        | (acs_column_names["Column Name"] == "S1603_C04_001E")
        | (acs_column_names["Column Name"] == "S1603_C06_001E")
    ]["Column Name"]
)

#### Back to Data

In [20]:
language_df = language_df[["GEO_ID", *language_cols]]
language_df.columns = language_df.iloc[0]
language_df = language_df[1:]
language_df = language_df.rename(columns=col_renames)

In [21]:
language_df["POPULATION"] = language_df["POPULATION"].astype(int)
language_df["POPULATION_NOT_ENGLISH_AT_HOME"] = language_df[
    "POPULATION_NOT_ENGLISH_AT_HOME"
].astype(int)
language_df["POPULATION_SPANISH_AT_HOME"] = language_df[
    "POPULATION_SPANISH_AT_HOME"
].astype(int)

## Merge Data

In [22]:
language_gdf = counties_gdf.merge(language_df, on="GEOIDFQ", how="left")

In [23]:
language_gdf["POPULATION_LANGUAGE_NOT_ENGLISH_NOT_SPANISH"] = (
    language_gdf["POPULATION_NOT_ENGLISH_AT_HOME"]
    - language_gdf["POPULATION_SPANISH_AT_HOME"]
)
language_gdf["PERCENT_POPULATION_LANGUAGE_NOT_ENGLISH_NOT_SPANISH"] = (
    (
        language_gdf["POPULATION_LANGUAGE_NOT_ENGLISH_NOT_SPANISH"]
        / language_gdf["POPULATION"]
    )
    * 100
).round(decimals=2)
language_gdf["PERCENT_POPULATION_LANGUAGE_SPANISH_AT_HOME"] = (
    (language_gdf["POPULATION_SPANISH_AT_HOME"] / language_gdf["POPULATION"]) * 100
).round(decimals=2)

In [28]:
language_gdf = language_gdf.dropna()

In [29]:
language_gdf = language_gdf.to_crs(9311)
language_gdf[language_gdf["STUSPS"] != "PR"].to_file("data/language.gpkg")

In [32]:
language_gdf[language_gdf["STUSPS"] != "PR"].sort_values(
    "PERCENT_POPULATION_LANGUAGE_SPANISH_AT_HOME", ascending=False
)

Unnamed: 0,STATEFP,COUNTYFP,COUNTYNS,GEOIDFQ,GEOID,NAME,NAMELSAD,STUSPS,STATE_NAME,LSAD,ALAND,AWATER,geometry,POPULATION,POPULATION_NOT_ENGLISH_AT_HOME,POPULATION_SPANISH_AT_HOME,POPULATION_LANGUAGE_NOT_ENGLISH_NOT_SPANISH,PERCENT_POPULATION_LANGUAGE_NOT_ENGLISH_NOT_SPANISH,PERCENT_POPULATION_LANGUAGE_SPANISH_AT_HOME
497,48,427,01383999,0500000US48427,48427,Starr,Starr County,TX,Texas,06,3168019235,15386849,"POLYGON ((83531.908 -2042353.274, 83703.676 -2...",59754.0,55260.0,55241.0,19.0,0.03,92.45
264,48,261,01383916,0500000US48261,48261,Kenedy,Kenedy County,TX,Texas,06,3777657843,1261927310,"MULTIPOLYGON (((261305.708 -2003968.269, 26196...",52.0,47.0,46.0,1.0,1.92,88.46
1331,48,323,01383944,0500000US48323,48323,Maverick,Maverick County,TX,Texas,06,3313828190,31862966,"POLYGON ((-65458.283 -1763836.238, -61370.81 -...",52728.0,46911.0,46271.0,640.0,1.21,87.75
2004,48,479,01384025,0500000US48479,48479,Webb,Webb County,TX,Texas,06,8706239656,36502178,"POLYGON ((-20981.285 -1861741.168, -11229.076 ...",245610.0,216461.0,214430.0,2031.0,0.83,87.31
3072,48,505,01384038,0500000US48505,48505,Zapata,Zapata County,TX,Texas,06,2585876042,154371160,"POLYGON ((54674.439 -1964034.942, 57501.664 -1...",12761.0,10839.0,10758.0,81.0,0.63,84.30
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1099,46,069,01266984,0500000US46069,46069,Hyde,Hyde County,SD,South Dakota,06,2229019409,14541548,"POLYGON ((25701.394 -40545.573, 25697.542 -365...",1210.0,12.0,0.0,12.0,0.99,0.00
1587,39,163,01074093,0500000US39163,39163,Vinton,Vinton County,OH,Ohio,06,1068008170,6781751,"POLYGON ((1473414.27 -486529.21, 1473214.302 -...",11978.0,179.0,0.0,179.0,1.49,0.00
2259,46,017,01265767,0500000US46017,46017,Buffalo,Buffalo County,SD,South Dakota,06,1220933806,42362357,"POLYGON ((33803.985 -89691.581, 36614.63 -8962...",1544.0,25.0,0.0,25.0,1.62,0.00
1958,38,091,01034220,0500000US38091,38091,Steele,Steele County,ND,North Dakota,06,1844441513,8435674,"POLYGON ((151782.787 262166.366, 151757.595 26...",1626.0,23.0,0.0,23.0,1.41,0.00
