In [1]:
import geopandas as gpd
import pandas as pd

## Open GIS Data

In [36]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

## Get Age data

In [37]:
acs_age = pd.read_csv("data/ACSST1Y2023.S0101-2024-12-12T233736.csv", thousands=",")

In [62]:
rename_columns_female = {}
rename_columns_male = {}
rename_columns_totals = {}
for column in list(acs_age.columns):
    str_split = column.split("!!")
    if len(str_split) == 3:
        if str_split[2] == "Estimate":
            if str_split[1] == "Male":
                rename_columns_male[column] = str_split[0].strip()
            if str_split[1] == "Female":
                rename_columns_female[column] = str_split[0].strip()
            if str_split[1] == "Total":
                rename_columns_totals[column] = str_split[0].strip()
    else:
        continue

In [63]:
states = list(rename_columns_totals.values())
states.append("group")

In [116]:
rename_columns_totals["Label (Grouping)"] = "group"
acs_age_totals = acs_age.rename(columns=rename_columns_totals)

#### Totals Data

In [117]:
acs_age_totals = acs_age_totals[states]

In [118]:
acs_age_totals = acs_age_totals.T.reset_index()
acs_age_totals.columns = acs_age_totals.iloc[52]
acs_age_totals = acs_age_totals.drop(acs_age_totals.index[52:])
acs_age_totals.columns = [
    col.replace(" ", "_").strip() for col in acs_age_totals.columns
]

In [119]:
cat_cols = [
    "group",
    "Total_population",
    "Under_5_years",
    "5_to_9_years",
    "10_to_14_years",
    "15_to_19_years",
    "20_to_24_years",
    "25_to_29_years",
    "30_to_34_years",
    "35_to_39_years",
    "40_to_44_years",
    "45_to_49_years",
    "50_to_54_years",
    "55_to_59_years",
    "60_to_64_years",
    "65_to_69_years",
    "70_to_74_years",
    "75_to_79_years",
    "80_to_84_years",
    "85_years_and_over",
]

In [120]:
for col in cat_cols[1:]:
    acs_age_totals[col] = acs_age_totals[col].str.replace(",", "").astype(int)

In [121]:
acs_age_totals = acs_age_totals[cat_cols]

In [122]:
acs_age_totals["most_populated_group"] = acs_age_totals[cat_cols[2:]].idxmax(axis=1)

In [123]:
acs_age_totals["most_populated_group_label"] = (
    acs_age_totals["most_populated_group"].str.replace("_", " ").str[:-6]
)

In [124]:
acs_age_totals["most_populated_group_population"] = acs_age_totals.apply(
    lambda row: row[row["most_populated_group"]], axis=1
)

## Combine Data

In [125]:
acs_age_totals = acs_age_totals.rename(columns={"group": "NAME"})

In [130]:
age_gdf = states_df.merge(acs_age_totals, on="NAME", how="left")

In [131]:
age_gdf = age_gdf[
    [
        "NAME",
        "geometry",
        "most_populated_group",
        "most_populated_group_label",
        "most_populated_group_population",
    ]
].dropna()

In [132]:
age_gdf = age_gdf.to_crs(9311)

In [133]:
age_gdf.to_file("data/age_group.gpkg")