In [None]:
import geopandas as gpd
import pandas as pd

## Open State data

In [2]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_gdf = gpd.read_file(file_path)

## Open Population Data

In [3]:
state_populations = pd.read_excel(
    "data/NST-EST2024-POP.xlsx", sheet_name=None, engine="openpyxl"
)

In [4]:
state_populations_df = state_populations["NST-EST2024-POP"][
    [
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)",
        "Unnamed: 5",
    ]
]
state_populations_df = state_populations_df.rename(
    columns={
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)": "NAME",
        "Unnamed: 5": "POPULATION",
    }
)
state_populations_df["NAME"] = state_populations_df["NAME"].str[1:]

In [5]:
states_with_population_gdf = states_gdf.merge(
    state_populations_df, on="NAME", how="left"
)
states_with_population_gdf = states_with_population_gdf[
    ["STUSPS", "NAME", "POPULATION", "geometry"]
]

## Get Vehicle Data

In [6]:
vehicle_df = pd.read_csv("data/table_data.csv")

In [7]:
vehicle_df = vehicle_df[
    (vehicle_df["Measures"] == "Vehicles") & (vehicle_df["Year of Year"] == 2023)
]
vehicle_df = vehicle_df.rename(columns={"State": "NAME"})

## Merge Data

In [8]:
vehicle_gdf = states_with_population_gdf.merge(vehicle_df, on="NAME", how="inner")

In [9]:
vehicle_gdf["vehicles_per_person"] = (
    vehicle_gdf["Values"] / vehicle_gdf["POPULATION"]
).round(decimals=2)

In [10]:
vehicle_gdf = vehicle_gdf.to_crs(9311)
vehicle_gdf.to_file("data/Vehicles_Per_State.gpkg")