In [1]:
import geopandas as gpd
import pandas as pd
import wikipedia as wp

## Open State data

In [2]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

## Get GDP Data

In [49]:
html = (
    wp.page("List_of_U.S._states_and_territories_by_GDP", auto_suggest=False)
    .html()
    .encode("UTF-8")
)

In [64]:
states_gdp_df = pd.read_html(html)[0]

In [65]:
states_gdp_df = states_gdp_df.rename(
    columns={
        "State or federal district": "NAME",
        "Nominal GDP per capita[1][3]": "NOMINAL_CHANGE",
    }
)
states_gdp_df = states_gdp_df[["NAME", "NOMINAL_CHANGE"]]

In [66]:
states_gdp_df.columns = [" ".join(col).strip() for col in states_gdp_df.columns.values]

In [67]:
states_gdp_df = states_gdp_df.rename(
    columns={
        "NAME NAME": "NAME",
        "NOMINAL_CHANGE 2022": "NOMINAL_CHANGE_2022",
        "NOMINAL_CHANGE 2024": "NOMINAL_CHANGE_2024",
    }
)
states_gdp_df = states_gdp_df[["NAME", "NOMINAL_CHANGE_2022", "NOMINAL_CHANGE_2024"]]

In [68]:
states_gdp_df["NOMINAL_CHANGE_2022"].iloc[0]

'$93,460'

In [69]:
states_gdp_df["NOMINAL_CHANGE_2022"] = (
    states_gdp_df["NOMINAL_CHANGE_2022"]
    .str.replace(",", "")
    .str.replace("$", "")
    .astype(int)
)
states_gdp_df["NOMINAL_CHANGE_2024"] = (
    states_gdp_df["NOMINAL_CHANGE_2024"]
    .str.replace(",", "")
    .str.replace("$", "")
    .astype(int)
)

In [70]:
states_gdp_df["NOMINAL_CHANGE_DIFF"] = (
    states_gdp_df["NOMINAL_CHANGE_2024"] - states_gdp_df["NOMINAL_CHANGE_2022"]
)

In [71]:
states_gdp_df["NOMINAL_CHANGE_PERCENT"] = (
    states_gdp_df["NOMINAL_CHANGE_DIFF"] / states_gdp_df["NOMINAL_CHANGE_2022"]
)

## Merge Data

In [72]:
states_gdp_gdf = states_df.merge(states_gdp_df, on="NAME", how="inner").dropna()

In [73]:
states_gdp_gdf = states_gdp_gdf[
    ["NAME", "geometry", "NOMINAL_CHANGE_DIFF", "NOMINAL_CHANGE_PERCENT"]
]

In [74]:
states_gdp_gdf = states_gdp_gdf.to_crs(9311)
states_gdp_gdf.to_file("data/gdp.gpkg")