In [2]:
# Data Analysis Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')

import os
import pickle
import itertools

In [3]:
import warnings
warnings.filterwarnings('ignore')

In [4]:
# util funcs to create bioguide_id - party - congress
import match_bioguide_party_utils

# Load in ProPub Data 

In [6]:
fp = "data/propub_data.pickle"

In [7]:
if os.path.exists(fp):
    with open(fp,'rb') as f:
        propub_data = pickle.load(f)
else:
    key = ''
    propub_data = match_bioguide_party_utils.load_propublica_congress_api(key)
    
    with open(fp,'wb') as f:
        pickle.dump(propub_data,f)

# Load in House Exp. Data

In [8]:
# generated from R cleaning files
df_house = pd.read_csv("data/data.csv")

In [9]:
# force lower columns if not already
df_house.columns = [x.lower() for x in df_house.columns]

In [10]:
# Currently staffers fall under PERSONAL COMPENSATION
df_pc = df_house[df_house["category"] == "personnel compensation"]


# Generate Matching Between ID - Party - Congress

In [108]:
df_bioid_party_lookup,df_bioid_party = match_bioguide_party_utils.generate_bioid_party_lookup(df_pc,propub_data)

In [109]:
# cast as type int so it's orderd chronologically
df_bioid_party["congress"] = df_bioid_party["congress"].astype(int)

df_bioid_party["year_clean"] = df_bioid_party["year_clean"].astype(int)

In [110]:
df_sliced = df_bioid_party[["payee","category","purpose","office_cc","amount","party","congress","quarter_clean","year_clean"]]

In [111]:
df_unique_id = df_sliced.groupby(["payee","congress","party","purpose","office_cc","quarter_clean","year_clean"])["amount"].sum().reset_index()

# Tracking Staffers Over Time

We want to see which staffers appear more than once in the house exp. data

In [112]:
df_more_than_one = df_unique_id[df_unique_id['payee'].duplicated(keep=False)]

In [93]:
df_multi = df_more_than_one.groupby("payee").agg({"congress":lambda x: x.nunique(),"party":lambda x: x.nunique()})

Here we can look at which staffers have also served in multiple congress sessions

In [117]:
df_multi.sort_values(["congress","party"],ascending = False).head(10)

Unnamed: 0_level_0,congress,party
payee,Unnamed: 1_level_1,Unnamed: 2_level_1
anfinson susan,5,2
anfinson t e,5,2
awan abid a,5,2
baugh r p,5,2
blankenship april l,5,2
donches michelle m,5,2
elliott jennifer l,5,2
fratter bonnie b,5,2
fredericks autumn,5,2
henry bryant heather,5,2


Interesting enough, it seems some staffers have even jumped across parties!

# Looking at Top Repeating

We'll look more closely at the people who've served the longest and for different parties.

In [94]:
df_five = df_multi[df_multi["congress"] > 4]

In [99]:
df_five_house = df_sliced[df_sliced["payee"].isin(df_five.index)]

In [118]:
"Number of unique individiuals who have served in more than four sessions {}".format(len(df_five_house["payee"].unique()))

'Number of unique individiuals who have served in more than four sessions 22'

In [107]:
df_five_house.sort_values(["payee","quarter_clean","year_clean"])

Unnamed: 0,payee,category,purpose,office_cc,amount,party,congress,quarter_clean,year_clean
1244,anfinson susan,personnel compensation,shared employee,hon marsha blackburn,57.89,R,115,q1,2010
2293,anfinson susan,personnel compensation,shared employee,hon john campbell,12.50,R,113,q1,2010
2414,anfinson susan,personnel compensation,shared employee,hon shelley moore capito,13.33,R,113,q1,2010
3253,anfinson susan,personnel compensation,shared employee,hon mike coffman,80.00,R,115,q1,2010
5532,anfinson susan,personnel compensation,shared employee,hon j randy forbes,93.33,R,114,q1,2010
5576,anfinson susan,personnel compensation,shared employee,hon jeff fortenberry,56.73,R,115,q1,2010
5986,anfinson susan,personnel compensation,shared employee,hon jim gerlach,13.33,R,113,q1,2010
8712,anfinson susan,personnel compensation,shared employee,hon mary jo kilroy,171.11,D,111,q1,2010
8713,anfinson susan,personnel compensation,shared employee other compensation,hon mary jo kilroy,2395.56,D,111,q1,2010
10369,anfinson susan,personnel compensation,shared employee,hon betsy markey,62.60,D,111,q1,2010
