In [27]:
import pandas as pd
import numpy as np
import requests
from dotenv import load_dotenv
import os
from rich import print
from functools import lru_cache
from tqdm import tqdm

load_dotenv(".env")
DATA_GOV_API_KEY = os.getenv("DATA_GOV_API_KEY")

In [39]:
@lru_cache(maxsize=10000)
def load_employer_contributions(employer_name: str, election_cycle: str, debug: bool = False):
    try:
        employer_data_url = f"https://api.open.fec.gov/v1/schedules/schedule_a"
        employer_data_params = {
            "api_key": DATA_GOV_API_KEY,
            "contributor_employer": employer_name,
            "two_year_transaction_period": election_cycle,
            "per_page": 100,
            "sort": "-contribution_receipt_amount",
            "page": 1
        }
        employer_response = requests.get(employer_data_url, params=employer_data_params)
        employer_data = employer_response.json()
        if debug:
            print(employer_data)
        partisan_score = 0
        partisan_donations = 0
        for contribution in employer_data["results"]:
            party_name = contribution["committee"]["party"]
            amount = contribution["contribution_receipt_amount"]
            if party_name:
                if "democratic" in party_name.lower():
                    partisan_score += amount * 1
                    partisan_donations += amount
                elif "republican" in party_name.lower():
                    partisan_score += amount * -1
                    partisan_donations += amount
            # otherwise, we ignore the donation

        return ((partisan_score / partisan_donations) if partisan_donations > 0 else 0), employer_data
    except Exception as e:
        if debug:
            print(f"Error fetching data from API: {e}")
        return None, None

In [40]:
website_df = pd.read_csv("website_data.csv")
scores_dict = {"names": [], "scores": [], "year": [], "employer_data_json": []}

  website_df = pd.read_csv("website_data.csv")


In [42]:
import json

names_column = website_df["company_name"]
years = ["2020", "2016", "2012", "2008"]

for name in tqdm(names_column, desc="Processing names", unit="name"):
    for year in tqdm(years, desc="Processing years", unit="year", leave=False):
        name = name.strip()
        if name:
            score, json_data = load_employer_contributions(name, year, debug=False)
            if score is not None:
                scores_dict["names"].append(name)
                scores_dict["scores"].append(score)
                scores_dict["year"].append(year)
                scores_dict["employer_data_json"].append(json.dumps(json_data))
            else:
                scores_dict["names"].append(name)
                scores_dict["scores"].append(np.nan)
                scores_dict["year"].append(year)
                scores_dict["employer_data_json"].append(json.dumps(json_data))
                # print(f"Failed to fetch score for {name} in {year}")

Processing names: 100%|██████████| 5189/5189 [2:05:32<00:00,  1.45s/name]   


In [None]:
scores_dict

{'names': ['11 Industries',
  '11 Industries',
  '11 Industries',
  '11 Industries',
  '11 Wells Spirits',
  '11 Wells Spirits',
  '11 Wells Spirits',
  '11 Wells Spirits',
  '14th Rose',
  '14th Rose',
  '14th Rose',
  '14th Rose',
  '1919 Cookware',
  '1919 Cookware'],
 'scores': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 'year': ['2020',
  '2016',
  '2012',
  '2008',
  '2020',
  '2016',
  '2012',
  '2008',
  '2020',
  '2016',
  '2012',
  '2008',
  '2020',
  '2016']}

In [43]:
scores_df = pd.DataFrame(scores_dict)
scores_df["score"] = scores_df["scores"].astype(float)
scores_df.to_csv("employer_partisanship_scores.csv", index=False)