In [1]:
import geopandas as gpd
import pandas as pd
import wikipedia as wp

## Open State data

In [2]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

## Get GDP Data

In [3]:
html = (
    wp.page("List_of_U.S._states_and_territories_by_GDP", auto_suggest=False)
    .html()
    .encode("UTF-8")
)

In [4]:
states_gdp_df = pd.read_html(html)[0]

In [5]:
states_gdp_df = states_gdp_df.rename(
    columns={
        "State or federal district": "NAME",
        "Annual GDP change at current prices (2023â2024)[1]": "PERCENT_CHANGE",
        "Real\xa0GDP growth rate (2023â\x80\x932024)[1]": "REAL_GDP",
    }
)

In [6]:
states_gdp_df = states_gdp_df[["NAME", "PERCENT_CHANGE", "REAL_GDP"]]

In [7]:
states_gdp_df.columns = [" ".join(col).strip() for col in states_gdp_df.columns.values]

In [8]:
states_gdp_df = states_gdp_df.rename(
    columns={
        "NAME NAME": "NAME",
        "PERCENT_CHANGE PERCENT_CHANGE": "RAW_CHANGE",
        "PERCENT_CHANGE Annual GDP change at current prices (2023â2024)[1].1": "PERCENT_CHANGE",
        "REAL_GDP REAL_GDP": "REAL_GDP",
    }
)

In [9]:
states_gdp_df["REAL_GDP"] = states_gdp_df["REAL_GDP"].str.replace("%", "")
states_gdp_df["REAL_GDP"] = states_gdp_df["REAL_GDP"].astype(float)
states_gdp_df["PERCENT_CHANGE"] = (
    states_gdp_df["PERCENT_CHANGE"].str.replace("%", "").astype(float)
)
states_gdp_df["RAW_CHANGE"] = states_gdp_df["RAW_CHANGE"].astype(int)

## Merge Data

In [10]:
states_gdp_gdf = states_df.merge(states_gdp_df, on="NAME", how="right").dropna()

In [11]:
states_gdp_gdf = states_gdp_gdf[
    ["NAME", "geometry", "RAW_CHANGE", "PERCENT_CHANGE", "REAL_GDP"]
]

In [12]:
states_gdp_gdf = states_gdp_gdf.to_crs(9311)
states_gdp_gdf.to_file("data/gdp.gpkg")