In [1]:
import geopandas as gpd
import pandas as pd
import wikipedia as wp

## Open Congress GIS Data

In [2]:
file_path = "data/cb_2018_us_cd116_5m/cb_2018_us_cd116_5m.shp"
congressional_gdf = gpd.read_file(file_path)

## Open Up State Data

In [3]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

In [4]:
congressional_seats_per_state_series = (
    states_df.merge(congressional_gdf, how="right", on="STATEFP")
    .groupby(["STATEFP"])
    .size()
)
congressional_seats_per_state_df = pd.DataFrame(
    {
        "STATEFP": congressional_seats_per_state_series.index,
        "SEATS": congressional_seats_per_state_series.values,
    }
)

## Get Progressive Data

In [5]:
html = wp.page("Congressional_Progressive_Caucus").html().encode("UTF-8")

In [6]:
progressives_df = pd.read_html(html)[4]

In [7]:
progressives_df[["STUSPS", "CD116FP"]] = progressives_df["District"].str.split(
    "-", n=1, expand=True
)

In [8]:
progressives_df["CD116FP"] = progressives_df["CD116FP"].str.zfill(2)

In [9]:
progressives_df = progressives_df[["State", "CD116FP", "STUSPS"]]

In [10]:
progressives_df_with_geoid = progressives_df.merge(states_df, on="STUSPS", how="left")
progressives_df_with_geoid = progressives_df_with_geoid[
    ["State", "CD116FP", "STUSPS", "GEOID"]
]

In [11]:
progressives_df["GEOID"] = progressives_df_with_geoid["GEOID"]

In [12]:
progressives_df["CD116FP"] = progressives_df["CD116FP"].replace("AL", "00")

In [13]:
progressives_df["GEOID"] = progressives_df["GEOID"] + progressives_df["CD116FP"]

## Merge With Congressional

In [14]:
progressives_gdf = congressional_gdf.merge(progressives_df, on="GEOID", how="right")

In [15]:
progressives_gdf = progressives_gdf.to_crs(9311)
progressives_gdf.to_file("data/progressive_house_districts.gpkg")

## Get Counts

In [63]:
progressive_counts_series = (
    congressional_gdf.merge(progressives_df, on="GEOID", how="right")[["STUSPS"]]
    .groupby(["STUSPS"])
    .size()
)
progressive_counts_df = pd.DataFrame(
    {
        "STUSPS": progressive_counts_series.index,
        "SEATS": progressive_counts_series.values,
    }
)

In [64]:
congressional_state_counts_series = (
    congressional_gdf.merge(states_df, on="STATEFP", how="left")
    .groupby(["STUSPS"])
    .size()
)
congressional_state_counts_df = pd.DataFrame(
    {
        "STUSPS": congressional_state_counts_series.index,
        "SEATS": congressional_state_counts_series.values,
    }
)

In [65]:
comparison_df = congressional_state_counts_df.merge(
    progressive_counts_df, on="STUSPS", how="left"
)

In [66]:
comparison_df = comparison_df.rename(
    columns={"SEATS_x": "total", "SEATS_y": "progressives"}
)
comparison_df["progressives"] = comparison_df["progressives"].fillna(0).astype(int)

In [67]:
comparison_df["percent_progressive"] = round(
    comparison_df["progressives"] / comparison_df["total"], 2
)

## Merge With States Shapefile and Export

In [74]:
progressives_per_state_gdf = states_df.merge(comparison_df, on="STUSPS", how="left")[
    ["total", "progressives", "percent_progressive", "geometry"]
]

In [75]:
progressives_per_state_gdf = progressives_per_state_gdf.to_crs(9311)
progressives_per_state_gdf.to_file("data/progressives_per_state.gpkg")