In [17]:
import geopandas as gpd
import pandas as pd

## Open GIS Data

In [18]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

## Get Age data

In [19]:
acs_age = pd.read_csv("data/ACSST1Y2023.S0101-2024-12-12T233736.csv", thousands=",")

In [20]:
rename_columns_female = {}
rename_columns_male = {}
for column in list(acs_age.columns):
    str_split = column.split("!!")
    if len(str_split) == 3:
        if str_split[2] == "Estimate":
            if str_split[1] == "Male":
                rename_columns_male[column] = str_split[0].strip()
            if str_split[1] == "Female":
                rename_columns_female[column] = str_split[0].strip()
    else:
        continue

In [21]:
states = list(rename_columns_female.values())
states.append("group")

In [22]:
rename_columns_female["Label (Grouping)"] = "group"
rename_columns_male["Label (Grouping)"] = "group"

#### Females Data

In [23]:
acs_age_female = acs_age.rename(columns=rename_columns_female)
acs_age_female = acs_age_female[states]

In [24]:
acs_age_female = acs_age_female.T.reset_index()
acs_age_female.columns = acs_age_female.iloc[52]
acs_age_female = acs_age_female.drop(acs_age_female.index[52:])

#### Males Data

In [25]:
acs_age_male = acs_age.rename(columns=rename_columns_male)
acs_age_male = acs_age_male[states]

In [26]:
acs_age_male = acs_age_male.T.reset_index()
acs_age_male.columns = acs_age_male.iloc[52]
acs_age_male = acs_age_male.drop(acs_age_male.index[52:])

#### Merge Gender Data

In [27]:
gender_table = acs_age_male.merge(
    acs_age_female, on="group", how="inner", suffixes=("_male", "_female")
)

In [28]:
gender_table.columns = [
    column.strip().replace(" ", "_") for column in list(gender_table.columns)
]

In [29]:
gender_table["65_years_and_over_male"] = (
    gender_table["65_years_and_over_male"].str.replace(",", "").astype(int)
)
gender_table["65_years_and_over_female"] = (
    gender_table["65_years_and_over_female"].str.replace(",", "").astype(int)
)

In [30]:
gender_table["gender_ratio_m_to_f_65_over"] = (
    gender_table["65_years_and_over_male"] / gender_table["65_years_and_over_female"]
)

In [31]:
gender_table["gender_ratio_m_to_f_65_over"] = (
    gender_table["gender_ratio_m_to_f_65_over"].round(decimals=2) * 100
).astype(int)

## Combine Data

In [32]:
gender_table = gender_table.rename(columns={"group": "NAME"})

In [33]:
gender_gdf = states_df.merge(gender_table, on="NAME", how="left")

In [34]:
gender_gdf = gender_gdf[["NAME", "geometry", "gender_ratio_m_to_f_65_over"]].dropna()

In [35]:
gender_gdf = gender_gdf.to_crs(9311)

In [36]:
gender_gdf.to_file("data/gender_65_and_over.gpkg")