This notebook will just look at a list of bills that had a large number of party flips. See if there is anything in common with the bills.

In [63]:
import pandas as pd
import numpy as np
import pickle
from congress import Congress
pd.options.display.max_columns = 100

In [71]:
def get_full_set():
    for dataset in ['train','dev','test']:
        df = pd.read_csv('../data/model/' + dataset + '.csv', encoding = 'latin1')
        if dataset == 'train':
            df_votes = df
        else:
            df_votes = pd.concat([df_votes, df])
    return df_votes
#df_votes = get_full_set()

In [4]:
# get bills with at least one flip, sorted
df_votes = df_votes.groupby('bill_id').broke_from_party.sum().sort_values(ascending = False)
df_votes = pd.DataFrame(df_votes)
df_votes = df_votes[df_votes > 0]

In [9]:
# merge with bill info
df_bills = pd.read_csv('../data/propublica/billsfull.csv')
df_bills.drop_duplicates(subset = ['bill_id'], inplace = True)
df_bills.set_index('bill_id', inplace = True)

  interactivity=interactivity, compiler=compiler, result=result)


In [19]:
df_votes = pd.concat([df_votes, df_bills], axis = 1).dropna(subset = ['broke_from_party']).sort_values('broke_from_party', ascending = False)

In [20]:
df_votes.to_csv('../data/model/bill_exam.csv')

In [25]:
test = propublica.bills.get('hr2997', congress=114)

In [28]:
test = propublica.bills.get('hr4600', congress=107)

In [34]:
test = propublica.bills.get('hr4809', congress=113)

In [35]:
test

{'actions': [{'action_type': 'President',
   'chamber': None,
   'datetime': '2014-09-26',
   'description': 'Became Public Law No: 113-172.',
   'id': 16},
  {'action_type': 'President',
   'chamber': None,
   'datetime': '2014-09-26',
   'description': 'Signed by President.',
   'id': 15},
  {'action_type': 'Floor',
   'chamber': 'House',
   'datetime': '2014-09-19',
   'description': 'Presented to President.',
   'id': 14},
  {'action_type': 'Floor',
   'chamber': 'Senate',
   'datetime': '2014-09-18',
   'description': 'Message on Senate action sent to the House.',
   'id': 13},
  {'action_type': 'Floor',
   'chamber': 'Senate',
   'datetime': '2014-09-17',
   'description': 'Passed Senate without amendment by Unanimous Consent. (consideration: CR S5720)',
   'id': 12},
  {'action_type': 'IntroReferral',
   'chamber': 'Senate',
   'datetime': '2014-07-30',
   'description': 'Received in the Senate, read twice.',
   'id': 11},
  {'action_type': 'Floor',
   'chamber': 'House',
   'da

## Download Missing Bills

In [72]:
# a fucking ton of bills are missing
df_votes = get_full_set()
df_votes = df_votes.groupby('bill_id').broke_from_party.sum().sort_values(ascending = False)
df_votes = pd.DataFrame(df_votes)

df_bills = pd.read_csv('../data/propublica/billsfull.csv')
df_bills.drop_duplicates(subset = ['bill_id'], inplace = True)
df_bills.set_index('bill_id', inplace = True)

  interactivity=interactivity, compiler=compiler, result=result)


In [73]:
missing = pd.concat([df_votes, df_bills], axis = 1).dropna(subset = ['broke_from_party']).sort_values('broke_from_party', ascending = False)
missing['is_missing'] = missing.chamber.isnull() == True
missing['session2'] = missing.index.str.split('-').str[-1]

In [88]:
missing[missing.session2 == '114']

Unnamed: 0.1,broke_from_party,Unnamed: 0,chamber,session,latest_major_action_date,sponsor_party,rsponsor,dsponsor,cosponsor,primary_subject,sponsor_id,number,title,bill_slug,summary,committees,committee_codes,bill_uri,short_title,sponsor_title,sponsor_name,sponsor_state,sponsor_uri,gpo_pdf_uri,congressdotgov_url,govtrack_url,introduced_date,active,last_vote,house_passage,senate_passage,enacted,summary_short,latest_major_action,is_missing,session2
hr5606-114,259.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,True,114
hr4498-114,177.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,True,114
hr4820-114,170.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,True,114
hr3537-114,167.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,True,114
hr5278-114,127.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,True,114
hr3038-114,119.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,True,114
hr1560-114,116.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,True,114
hr5055-114,112.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,True,114
hr2048-114,112.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,True,114
hr4889-114,103.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,True,114


In [74]:
missing.groupby(['session2', 'is_missing']).size()

session2  is_missing
105       False         346
          True            1
106       False         264
          True            4
107       False         339
          True            7
108       False         124
          True           13
109       False         299
          True           10
110       False         610
111       False         465
          True            3
112       False         436
          True            4
113       False         152
          True          295
114       True          463
115       False         215
          True           11
dtype: int64

In [52]:
295 / (152 + 295)

0.6599552572706935

In [75]:
missing_bill_ids = missing[missing.is_missing == True].index

In [76]:
missing_bill_ids[0]

'hr5606-114'

In [None]:
propublica = Congress('NGLoQNiF7aiAHB6vL1XGvdSQ7KB0CbPWxkLNv5Cz')

In [83]:
errors = []
path = "../data/propublica/missing_bills/"
for bill_id in missing_bill_ids:
    try:
        slug, session = bill_id.split("-")
    except:
        continue
    
    try:
        response = propublica.bills.get(slug, congress=int(session))
        f_name = "billinfo_" + bill_id + ".pickle"
        with open(path + f_name, 'wb') as f:
            pickle.dump(response, f)
    except Exception as e:
        errors.append([bill_id, "bill endpoint", e])
        
    try:
        response = propublica.bills.cosponsors(slug, congress=int(session))
        f_name = "cosponsors_" + bill_id + ".pickle"
        with open(path + f_name, 'wb') as f:
            pickle.dump(response, f)
    except Exception as e:
        errors.append([bill_id, "cosponsor endpoint", e])        

In [85]:
response

{'bill_id': 'hr3628-113',
 'bill_slug': 'hr3628',
 'bill_type': 'hr',
 'bill_uri': 'https://api.propublica.org/congress/v1/113/bills/hr3628.json',
 'committees': 'Senate Commerce, Science, and Transportation Committee',
 'congress': '113',
 'cosponsors': [{'cosponsor_id': 'R000011',
   'cosponsor_party': 'D',
   'cosponsor_state': 'WV',
   'cosponsor_title': 'Rep.',
   'cosponsor_uri': 'https://api.propublica.org/congress/v1/members/R000011.json',
   'date': '2013-12-02',
   'name': 'Nick J. Rahall II'}],
 'cosponsors_by_party': [{'party': {'id': 'D', 'sponsors': '1'}}],
 'house_passage_vote': '2014-01-08',
 'introduced_date': '2013-12-02',
 'latest_major_action': 'Received in the Senate and Read twice and referred to the Committee on Commerce, Science, and Transportation.',
 'latest_major_action_date': '2014-01-09',
 'number': 'H.R.3628',
 'number_of_cosponsors': 1,
 'senate_passage_vote': None,
 'sponsor_id': 'S001154',
 'sponsor_name': 'Bill Shuster',
 'sponsor_party': 'R',
 'sponso

In [84]:
pd.DataFrame(errors)

Unnamed: 0,0,1,2
0,-112,bill endpoint,112/bills/.json
1,-112,cosponsor endpoint,{'message': 'Internal server error'}
2,hr8-112,cosponsor endpoint,"Expecting ',' delimiter: line 24 column 85 (ch..."
3,-105,bill endpoint,105/bills/.json
4,-105,cosponsor endpoint,{'message': 'Internal server error'}
5,-106,bill endpoint,106/bills/.json
6,-106,cosponsor endpoint,{'message': 'Internal server error'}
7,-108,bill endpoint,108/bills/.json
8,-108,cosponsor endpoint,{'message': 'Internal server error'}
9,motion-109,bill endpoint,109/bills/motion.json


In [77]:
propublica.bills.cosponsors?

## Remove pre 105 votes from model sets

In [61]:
for dataset in ['train','dev','test']:
    df = pd.read_csv('../data/model/' + dataset + '.csv', encoding = 'latin1')
    df = df[df.congress >= 105]
    path = '../data/model/' + dataset + '2.csv'
    df.to_csv(path, index = False)

In [55]:
df_votes = get_full_set()

In [58]:
df_votes.shape

(1940619, 12)

In [60]:
df_votes[df_votes.congress > 105]

Unnamed: 0,full_set_id,member_id,party,vote_position,congress,roll_call,chamber,session,bill_id,majority_pos_rep,majority_pos_dem,broke_from_party
0,1688860,P000449,R,Yes,114.0,179.0,Senate,1.0,hr644-114,Yes,Yes,0
2,1805766,W000812,R,Yes,114.0,398.0,House,2.0,hr5485-114,Yes,No,0
4,515194,J000032,D,Yes,106.0,295.0,House,2.0,hr4201-106,Yes,No,1
6,1877718,W000813,R,Yes,115.0,223.0,House,1.0,hr876-115,Yes,Yes,0
7,1003396,B000575,R,No,110.0,96.0,House,2.0,hres1014-110,No,Yes,0
9,1890431,Y000062,D,Yes,115.0,248.0,House,1.0,hr1665-115,Yes,Yes,0
10,1038758,H001030,D,Yes,110.0,445.0,House,2.0,hconres379-110,No,Yes,0
11,1384055,B000574,D,No,112.0,945.0,House,1.0,hres502-112,Yes,No,0
12,928079,B001255,R,Yes,110.0,846.0,House,1.0,hr3222-110,Yes,Yes,0
13,1694673,C001072,D,Yes,114.0,377.0,House,1.0,hr805-114,Yes,Yes,0
