In [1]:
import geopandas as gpd
import pandas as pd

## Open GIS Data

In [2]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

## Get Age data

In [3]:
acs_age = pd.read_csv("data/ACSST1Y2023.S0101-2024-12-12T233736.csv", thousands=",")

In [4]:
rename_columns_female = {}
rename_columns_male = {}
rename_columns_totals = {}
for column in list(acs_age.columns):
    str_split = column.split("!!")
    if len(str_split) == 3:
        if str_split[2] == "Estimate":
            if str_split[1] == "Male":
                rename_columns_male[column] = str_split[0].strip()
            if str_split[1] == "Female":
                rename_columns_female[column] = str_split[0].strip()
            if str_split[1] == "Total":
                rename_columns_totals[column] = str_split[0].strip()
    else:
        continue

In [5]:
states = list(rename_columns_totals.values())
states.append("group")

In [6]:
rename_columns_totals["Label (Grouping)"] = "group"
acs_age_totals = acs_age.rename(columns=rename_columns_totals)

#### Totals Data

In [7]:
acs_age_totals = acs_age_totals[states]

In [8]:
acs_age_totals = acs_age_totals.T.reset_index()
acs_age_totals.columns = acs_age_totals.iloc[52]
acs_age_totals = acs_age_totals.drop(acs_age_totals.index[52:])

In [9]:
acs_age_totals.columns = [
    column.strip().replace(" ", "_") for column in list(acs_age_totals.columns)
]

In [10]:
cat_1_cols = [
    "5_to_14_years",
    "15_to_17_years",
    "Under_18_years",
    "18_to_24_years",
    "15_to_44_years",
    "16_years_and_over",
    "18_years_and_over",
    "21_years_and_over",
    "60_years_and_over",
    "62_years_and_over",
    "65_years_and_over",
    "75_years_and_over",
]

In [11]:
for col in cat_1_cols:
    acs_age_totals[col] = acs_age_totals[col].str.replace(",", "").astype(int)

In [12]:
cat_2_cols = [
    "Total_population",
    "Under_5_years",
    "5_to_9_years",
    "10_to_14_years",
    "15_to_19_years",
    "20_to_24_years",
    "25_to_29_years",
    "30_to_34_years",
    "35_to_39_years",
    "40_to_44_years",
    "45_to_49_years",
    "50_to_54_years",
    "55_to_59_years",
    "60_to_64_years",
    "65_to_69_years",
    "70_to_74_years",
    "75_to_79_years",
    "80_to_84_years",
    "85_years_and_over",
]

In [13]:
for col in cat_2_cols:
    acs_age_totals[col] = acs_age_totals[col].str.replace(",", "").astype(int)

In [14]:
acs_age_totals["working_age"] = acs_age_totals["18_years_and_over"] - acs_age_totals[
    [
        "65_to_69_years",
        "70_to_74_years",
        "75_to_79_years",
        "80_to_84_years",
        "85_years_and_over",
    ]
].sum(axis=1)

In [15]:
acs_age_totals["percent_over_18"] = (
    acs_age_totals["18_years_and_over"] / acs_age_totals["Total_population"]
)

In [16]:
acs_age_totals["working_age_percent"] = (
    acs_age_totals["working_age"] / acs_age_totals["Total_population"]
)

In [17]:
acs_age_totals["percent_over_18_working_age"] = (
    acs_age_totals["working_age"] / acs_age_totals["18_years_and_over"]
)

## Combine Data

In [93]:
acs_age_totals = acs_age_totals.rename(columns={"group": "NAME"})

In [94]:
gender_gdf = states_df.merge(acs_age_totals, on="NAME", how="left")

In [98]:
gender_gdf = gender_gdf[
    ["NAME", "geometry", "working_age", "working_age_percent"]
].dropna()

In [99]:
gender_gdf = gender_gdf.to_crs(9311)

In [100]:
gender_gdf.to_file("data/population_18.gpkg")