In [8]:
import geopandas as gpd
import pandas as pd
import wikipedia as wp

## Open Congress GIS Data

In [33]:
file_path = "data/tlgpkg_2024_a_us_legislative.gpkg/tlgpkg_2024_a_us_legislative.gpkg"
congressional_gdf = gpd.read_file(file_path, layer="Congressional_Districts")

In [34]:
congressional_gdf["STATEFP"] = congressional_gdf["GEOID"].str[:2]
congressional_gdf["District"] = congressional_gdf["GEOID"].str[2:]

## Open Up State Data

In [35]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

In [36]:
congressional_seats_per_state_series = (
    states_df.merge(congressional_gdf, how="right", on="STATEFP")
    .groupby(["STATEFP"])
    .size()
)
congressional_seats_per_state_df = pd.DataFrame(
    {
        "STATEFP": congressional_seats_per_state_series.index,
        "SEATS": congressional_seats_per_state_series.values,
    }
)

## Get Progressive Data

In [38]:
html = wp.page("Congressional_Progressive_Caucus").html().encode("UTF-8")

In [72]:
progressives_df = pd.read_html(html)[4]

In [73]:
progressives_df[["STUSPS", "District"]] = progressives_df["District"].str.split(
    "-", n=1, expand=True
)

In [74]:
progressives_df["District"] = progressives_df["District"].str.zfill(2)

In [75]:
progressives_df

Unnamed: 0,State,District,CPVI[57],Member,STUSPS
0,Arizona,07,D+15,RaÃºl Grijalva,AZ
1,California,02,D+23,Jared Huffman,CA
2,California,08,D+26,John Garamendi,CA
3,California,10,D+18,Mark DeSaulnier,CA
4,California,12,D+40,Barbara Lee,CA
...,...,...,...,...,...
92,Virginia,08,D+26,Don Beyer,VA
93,Washington,07,D+36,Pramila Jayapal,WA
94,Washington,09,D+21,Adam Smith,WA
95,Wisconsin,02,D+19,Mark Pocan,WI


In [76]:
progressives_df = progressives_df[["State", "District", "STUSPS"]]

In [77]:
progressives_df_with_geoid = progressives_df.merge(states_df, on="STUSPS", how="left")
progressives_df_with_geoid = progressives_df_with_geoid[
    ["State", "District", "STUSPS", "GEOID"]
]

In [78]:
progressives_df["GEOID"] = progressives_df_with_geoid["GEOID"]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  progressives_df["GEOID"] = progressives_df_with_geoid["GEOID"]


In [79]:
progressives_df["District"] = progressives_df["District"].replace("AL", "00")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  progressives_df["District"] = progressives_df["District"].replace("AL", "00")


In [80]:
progressives_df["GEOID"] = progressives_df["GEOID"] + progressives_df["District"]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  progressives_df["GEOID"] = progressives_df["GEOID"] + progressives_df["District"]


## Merge With Congressional

In [81]:
progressives_gdf = congressional_gdf.merge(progressives_df, on="GEOID", how="right")

In [82]:
usa_projected = states_df.to_crs(progressives_gdf.crs).union_all()

In [83]:
progressives_gdf = gpd.clip(gdf=progressives_gdf, mask=usa_projected)

In [84]:
progressives_gdf = progressives_gdf.to_crs(9311)
progressives_gdf.to_file("data/progressive_house_districts.gpkg")

## Get Counts

In [85]:
progressive_counts_series = (
    congressional_gdf.merge(progressives_df, on="GEOID", how="right")[["STUSPS"]]
    .groupby(["STUSPS"])
    .size()
)
progressive_counts_df = pd.DataFrame(
    {
        "STUSPS": progressive_counts_series.index,
        "SEATS": progressive_counts_series.values,
    }
)

In [86]:
congressional_state_counts_series = (
    congressional_gdf.merge(states_df, on="STATEFP", how="left")
    .groupby(["STUSPS"])
    .size()
)
congressional_state_counts_df = pd.DataFrame(
    {
        "STUSPS": congressional_state_counts_series.index,
        "SEATS": congressional_state_counts_series.values,
    }
)

In [87]:
comparison_df = congressional_state_counts_df.merge(
    progressive_counts_df, on="STUSPS", how="left"
)

In [88]:
comparison_df = comparison_df.rename(
    columns={"SEATS_x": "total", "SEATS_y": "progressives"}
)
comparison_df["progressives"] = comparison_df["progressives"].fillna(0).astype(int)

In [89]:
comparison_df["percent_progressive"] = round(
    comparison_df["progressives"] / comparison_df["total"], 2
)

## Merge With States Shapefile and Export

In [90]:
progressives_per_state_gdf = states_df.merge(comparison_df, on="STUSPS", how="left")[
    ["total", "progressives", "percent_progressive", "geometry"]
]

In [91]:
progressives_per_state_gdf = progressives_per_state_gdf.to_crs(9311)
progressives_per_state_gdf.to_file("data/progressives_per_state.gpkg")