In [24]:
import re
import requests

In [25]:
from bs4 import BeautifulSoup
import geopandas as gpd
import pandas as pd

In [35]:
from tqdm.notebook import tqdm

## Get Corrections Data

In [79]:
headers = {
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36"
}

In [80]:
url = "https://www.moneygeek.com/financial-planning/taxes/state-policing-corrections-spending/#methodology"

In [81]:
r = requests.get(url, headers=headers)

In [82]:
soup = BeautifulSoup(r.text, "html.parser")

In [83]:
corrections_table = soup.find_all(
    "table", {"class": "w-full lining-nums tabular-nums style_table__H8eRl"}
)[2]

In [84]:
table_rows = corrections_table.find_all("tr")

In [85]:
corrections_spend = []
for table_row in tqdm(table_rows, desc="Parsing Table"):
    tds = table_row.find_all("td")
    if not tds:
        continue
    row = {
        "Rank (Highest Corrections % of total Spend)": int(
            "".join(re.findall(r"\d+", tds[0].text))
        ),
        "State": tds[1].text,
        "Corrections % of Total Spend": float(
            ".".join(re.findall(r"\d+", tds[2].text))
        ),
        "Corrections Per Capita Spend": int("".join(re.findall(r"\d+", tds[3].text))),
        "Corrections Expenditures ($ Millions)": int(
            "".join(re.findall(r"\d+", tds[4].text))
        ),
    }
    corrections_spend.append(row)

Parsing Table:   0%|          | 0/52 [00:00<?, ?it/s]

In [90]:
corrections_df = pd.DataFrame(corrections_spend)

In [92]:
corrections_df = corrections_df.rename(columns={"State": "NAME"})

## Get State Data

In [88]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

In [89]:
states_df = states_df[["STUSPS", "NAME", "geometry"]]

## Merge Data

In [93]:
corrections_gdf = states_df.merge(corrections_df, on="NAME", how="left")

In [97]:
corrections_gdf = corrections_gdf.dropna().reset_index()

In [98]:
corrections_gdf = corrections_gdf.to_crs(9311)

In [99]:
corrections_gdf.to_file("data/corrections_per_state.gpkg")