In [1]:
import geopandas as gpd
import pandas as pd

## Open GIS Data

In [2]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

In [3]:
states_df = states_df[["NAME", "geometry"]]

## Get Age data

In [4]:
vet_df = pd.read_csv("data/ACSST5Y2023.S2101-2025-01-14T235917.csv", thousands=",")

##### Get Totals

In [5]:
rename_columns_vet_totals = {"Label (Grouping)": "NAME"}
for column in list(vet_df.columns):
    str_split = column.split("!!")
    if len(str_split) == 3:
        if str_split[1] == "Veterans":
            rename_columns_vet_totals[column] = str_split[0].strip()
    else:
        continue

In [6]:
vet_totals_df = (
    vet_df[rename_columns_vet_totals.keys()]
    .rename(columns=rename_columns_vet_totals)
    .T.reset_index()
)
vet_totals_df.columns = [column.strip() for column in list(vet_totals_df.iloc[0])]
vet_totals_df = vet_totals_df[1:]

In [7]:
vet_totals_df["vet_population"] = (
    vet_totals_df["Civilian population 18 years and over"]
    .str.replace(",", "")
    .astype(int)
)
vet_totals_df["vet_gulf_war"] = (
    vet_totals_df["Gulf War (8/1990 to 8/2001) veterans"]
    .str.replace(",", "")
    .astype(int)
)
vet_totals_df["vet_korean_war"] = (
    vet_totals_df["Korean War veterans"].str.replace(",", "").astype(int)
)

In [8]:
vet_totals_df = vet_totals_df[
    ["NAME", "vet_population", "vet_gulf_war", "vet_korean_war"]
]

##### Get Non Vet totals

In [9]:
rename_columns_nonvet_total = {"Label (Grouping)": "NAME"}
for column in list(vet_df.columns):
    str_split = column.split("!!")
    if len(str_split) == 3:
        if str_split[1] == "Total":
            rename_columns_nonvet_total[column] = str_split[0].strip()
    else:
        continue

In [10]:
nonvet_total_df = (
    vet_df[rename_columns_nonvet_total.keys()]
    .rename(columns=rename_columns_nonvet_total)
    .T.reset_index()
)
nonvet_total_df.columns = [column.strip() for column in list(nonvet_total_df.iloc[0])]
nonvet_total_df = nonvet_total_df[1:]

In [11]:
nonvet_total_df["nonvet_population_over_18"] = (
    nonvet_total_df["Civilian population 18 years and over"]
    .str.replace(",", "")
    .astype(int)
)
nonvet_total_df = nonvet_total_df[["NAME", "nonvet_population_over_18"]]

In [12]:
vet_df = vet_totals_df.merge(nonvet_total_df, on="NAME", how="inner")

In [21]:
vet_df["gulf_war_vets_per_10k"] = (
    (vet_df["vet_gulf_war"] / (vet_df["nonvet_population_over_18"] / 10_000))
    .round(decimals=0)
    .astype(int)
)
vet_df["gulf_war_vets_per_1000"] = (
    (vet_df["vet_gulf_war"] / (vet_df["nonvet_population_over_18"] / 1000))
    .round(decimals=0)
    .astype(int)
)
vet_df["korean_war_vets_per_10k"] = (
    (vet_df["vet_korean_war"] / (vet_df["nonvet_population_over_18"] / 10_000))
    .round(decimals=0)
    .astype(int)
)

## Merge data with GIS Data

In [22]:
vet_df = vet_df[
    [
        "NAME",
        "vet_gulf_war",
        "vet_korean_war",
        "nonvet_population_over_18",
        "korean_war_vets_per_10k",
        "gulf_war_vets_per_10k",
        "gulf_war_vets_per_1000",
    ]
]

In [23]:
vet_population_gdf = states_df.merge(vet_df, on="NAME", how="inner")

In [24]:
vet_population_gdf = vet_population_gdf.to_crs(9311)

In [25]:
vet_population_gdf.to_file("data/vets.gpkg")