In [None]:
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

# Step 1: Load the data
df = pd.read_csv("coauthorship_matrix.csv")  # replace with your actual path

df = df[df["party"].isin(["D", "R"])]
df = df[df["leg_id"] == "89R"]
df = df[df["chamber"] == "Senate"]

df = df[["row_author", "col_author", "party", "coauthored_bills"]]

# Step 2: Pivot to create n x n matrix
matrix = df.pivot_table(
    index="row_author", columns="col_author", values="coauthored_bills", fill_value=0
)
# print(matrix)

# Step 3: Run PCA
pca = PCA(n_components=2)
components = pca.fit_transform(matrix)

# Step 4: Min-max scaling to 0–100%
scaled = (
    (components - components.min(axis=0))
    / (components.max(axis=0) - components.min(axis=0))
    * 100
)

# Step 5: Create a DataFrame for plotting, and merge in party info
pca_df = pd.DataFrame(scaled, columns=["Influence", "Ideology"])
pca_df["Author"] = matrix.index
pca_df = pca_df.merge(
    df[["row_author", "party"]].drop_duplicates(),
    left_on="Author",
    right_on="row_author",
    how="left",
)


# Step 6: Assign colors by party
def assign_color(party):
    if party == "R":
        return "#e41a1c"  # Red
    elif party == "D":
        return "#377eb8"  # Blue
    else:
        return "#6f6f6f"  # Gray


pca_df["color"] = pca_df["party"].apply(assign_color)

# Step 7: Plotting
plt.figure(figsize=(12, 8))
plt.scatter(pca_df["Influence"], pca_df["Ideology"], c=pca_df["color"], alpha=0.9)

# Add labels
for _, row in pca_df.iterrows():
    plt.text(row["Influence"], row["Ideology"], row["Author"], fontsize=8, ha="right")

plt.title("Texas Legislature 89R – PCA of Coauthorships (Scaled & Colored by Party)")
plt.ylabel("Influence")
plt.xlabel("Ideology")
plt.grid(True)
plt.tight_layout()
# plt.show()
pca_df