In [23]:
import geopandas as gpd
import pandas as pd

## Open GIS Data

In [24]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

## Get Population Data

In [25]:
acs_pop = pd.read_csv("data/ACSDT5Y2021.B01003-2025-01-03T214138.csv")

In [26]:
rename_columns = {}
for column in list(acs_pop.columns):
    str_split = column.split("!!")
    if len(str_split) == 2:
        if str_split[1] == "Estimate":
            rename_columns[column] = str_split[0].strip()
    else:
        continue

states = list(rename_columns.values())
rename_columns["Label (Grouping)"] = "group"
states.append("group")
acs_pop = acs_pop.rename(columns=rename_columns)

In [27]:
acs_pop = acs_pop.T.reset_index()
acs_pop.columns = acs_pop.iloc[0]
acs_pop = acs_pop.drop(acs_pop.index[0])

In [28]:
acs_pop["POPULATION"] = acs_pop["Total"].str.replace(",", "").astype(int)
acs_pop["NAME"] = acs_pop["group"]

In [29]:
acs_pop = acs_pop[["NAME", "POPULATION"]]

## Merge Population With States Data

In [30]:
states_population_gdf = states_df.merge(acs_pop, on="NAME", how="inner")[
    ["geometry", "NAME", "POPULATION"]
]
states_population_gdf = states_population_gdf.reset_index(drop=True)

## Get Ethnic Data

In [31]:
acs_cubans = pd.read_csv("data/ACSDT5YSPT2021.B05001-2025-01-03T213506.csv")

In [32]:
rename_columns = {}
for column in list(acs_cubans.columns):
    str_split = column.split("!!")
    if len(str_split) == 3:
        if str_split[2] == "Estimate":
            rename_columns[column] = str_split[0].strip()
    else:
        continue

In [33]:
states = list(rename_columns.values())

In [34]:
rename_columns["Label (Grouping)"] = "group"
states.append("group")
acs_cubans = acs_cubans.rename(columns=rename_columns)

In [35]:
acs_cubans = acs_cubans[states]
states.remove("group")

#### Remove the last record as it's the titles, set then remove.

In [36]:
acs_cubans = acs_cubans.T.reset_index()
acs_cubans.columns = acs_cubans.iloc[49]
acs_cubans = acs_cubans.drop(acs_cubans.index[49])

In [38]:
acs_cubans["CUBANS"] = (
    acs_cubans["Total:"].astype(str).str.replace(",", "", regex=True).astype(int)
)
acs_cubans["NAME"] = acs_cubans["group"]
acs_cubans = acs_cubans[["NAME", "CUBANS"]]

## Merge Cuban And Population

In [39]:
cubans_per_state_gdf = states_population_gdf.merge(acs_cubans, on="NAME", how="left")

In [55]:
cubans_per_state_gdf["per_1000"] = cubans_per_state_gdf["CUBANS"] / (
    cubans_per_state_gdf["POPULATION"] / 1000
)
cubans_per_state_gdf["per_10k"] = (
    (cubans_per_state_gdf["CUBANS"] / (cubans_per_state_gdf["POPULATION"] / 10_000))
    .round()
    .astype(int)
)
cubans_per_state_gdf["per_100k"] = (
    (cubans_per_state_gdf["CUBANS"] / (cubans_per_state_gdf["POPULATION"] / 100000))
    .round()
    .astype(int)
)
cubans_per_state_gdf["per_500k"] = (
    (cubans_per_state_gdf["CUBANS"] / (cubans_per_state_gdf["POPULATION"] / 500_000))
    .round()
    .astype(int)
)
cubans_per_state_gdf["per_1m"] = (
    (cubans_per_state_gdf["CUBANS"] / (cubans_per_state_gdf["POPULATION"] / 1_000_000))
    .round()
    .astype(int)
)

In [57]:
cubans_per_state_gdf = cubans_per_state_gdf.fillna(0)

In [58]:
cubans_per_state_gdf = cubans_per_state_gdf.to_crs(9311)

In [59]:
cubans_per_state_gdf.to_file("data/cubans.gpkg")