In [1]:
import numpy as np
import pandas as pd
import os
POSTGRES_PASSWORD = os.getenv('POSTGRES_PASSWORD')
import psycopg2
from sqlalchemy import create_engine

In [2]:
bills =pd.read_csv('Data/bills.csv')
committees =pd.read_csv('Data/committees.csv')
leadership =pd.read_csv('Data/leadership.csv')
os_legislators =pd.read_csv('Data/os_legislators.csv')
votingaffinity=pd.read_csv('Data/votingaffinity.csv')
committee_members =pd.read_csv('Data/committee_members.csv')
congress_members =pd.read_csv('Data/congress_members.csv')
os_contributions =pd.read_csv('Data/os_contributions.csv')
terms =pd.read_csv('Data/terms.csv')

## Bills

In [3]:
bills.columns = [x.lower() for x in bills.columns]
bills.columns = [x.replace('.', '_') for x in bills.columns]
bills.columns

Index(['congress', 'introduceddate', 'number', 'title', 'type', 'url',
       'latestaction_actiondate', 'latestaction_text', 'policyarea_name',
       'amendmentnumber', 'latestaction', 'latestaction_actiontime'],
      dtype='object')

## Congress Members

In [4]:
congress_members.columns = [x.lower() for x in congress_members.columns]
congress_members.columns = [x.replace('.', '_') for x in congress_members.columns]
congress_members.columns

Index(['bioguideid', 'birthyear', 'currentmember', 'directordername',
       'firstname', 'honorificname', 'invertedordername', 'lastname',
       'officialwebsiteurl', 'partyhistory', 'state', 'terms', 'updatedate',
       'addressinformation_city', 'addressinformation_district',
       'addressinformation_officeaddress', 'addressinformation_phonenumber',
       'addressinformation_zipcode', 'cosponsoredlegislation_count',
       'cosponsoredlegislation_url', 'depiction_attribution',
       'depiction_imageurl', 'sponsoredlegislation_count',
       'sponsoredlegislation_url', 'middlename', 'suffixname', 'nickname',
       'leadership', 'district'],
      dtype='object')

In [5]:
congress_members = congress_members.drop(['terms', 'leadership', 'partyhistory'], axis = 1)

In [6]:
congress_members.head(3).T

Unnamed: 0,0,1,2
bioguideid,B000944,C000127,C000141
birthyear,1952,1958,1943
currentmember,True,True,True
directordername,Sherrod Brown,Maria Cantwell,Benjamin L. Cardin
firstname,Sherrod,Maria,Ben
honorificname,Mr.,Ms.,Mr.
invertedordername,"Brown, Sherrod","Cantwell, Maria","Cardin, Benjamin L."
lastname,Brown,Cantwell,Cardin
officialwebsiteurl,https://www.brown.senate.gov/,https://www.cantwell.senate.gov,https://www.cardin.senate.gov/
state,Ohio,Washington,Maryland


## Terms

In [7]:
terms.columns = [x.lower() for x in terms.columns]
terms.columns = [x.replace('.', '_') for x in terms.columns]
terms.head(3).T

Unnamed: 0,0,1,2
chamber,Senate,Senate,Senate
congress,107,108,109
endyear,2003.0,2005.0,2007.0
membertype,Senator,Senator,Senator
startyear,2002,2003,2005
statecode,TX,TX,TX
statename,Texas,Texas,Texas
bioguideid,C001056,C001056,C001056
district,,,


## Leadership

In [8]:
leadership.columns = [x.lower() for x in leadership.columns]
leadership.columns = [x.replace('.', '_') for x in leadership.columns]
leadership.head(3).T

Unnamed: 0,0,1,2
congress,113,114,115
type,Assistant Democratic Leader,Assistant Majority Leader,Majority Whip
bioguideid,C001056,C001056,C001056
current,,,


## os_legislators

In [9]:
os_legislators.columns = [x.lower() for x in os_legislators.columns]
os_legislators.columns = [x.replace('.', '_') for x in os_legislators.columns]
os_legislators = os_legislators[['cid', 'party', 'oguide_id']]
os_legislators = os_legislators.rename({'oguided_id': 'bioguideid'}, axis = 1)
os_legislators.head(10)

Unnamed: 0,cid,party,oguide_id
0,N00050780,D,
1,N00035774,R,S001198
2,N00026050,R,M001153
3,N00044245,R,C001054
4,N00041295,R,M001212
5,N00024759,R,R000575
6,N00003028,R,A000055
7,N00048145,R,S001220
8,N00035691,R,P000609
9,N00030622,D,S001185


In [10]:
## merging congress_members and os_legislators to create members

In [11]:
members = pd.merge(congress_members, os_legislators,
                  on = 'bioguideid',
                  how = 'inner')

KeyError: 'bioguideid'

In [None]:
#fixing NaN
bio = congress_members.query("lastname=='Peltola'").reset_index()['bioguideid'][0]

## os_contributions

In [12]:
os_contributions.columns = [x.lower() for x in os_contributions.columns]
os_contributions.columns = [x.replace('.', '_') for x in os_contributions.columns]
os_contributions.head(3)

Unnamed: 0,org_nam,otal,pac,ndiv
0,State of Alaska,19541,0,19541
1,"Sonosky, Chambers et al",16050,0,16050
2,General Communication Inc,15150,2500,12650


## committees

In [14]:
committees.columns = [x.lower() for x in committees.columns]
committees.columns = [x.replace('.', '_') for x in committees.columns]
committees.head(3).T

Unnamed: 0,0,1,2
chamber,House,House,House
committeetypecode,Other,Standing,Select
name,Bicentenary Committee,Energy (Ad Hoc) Committee,U.S. Role in Iranian Arms Committee
systemcode,hcza00,hhah00,hlbz00
url,https://api.congress.gov/v3/committee/house/hc...,https://api.congress.gov/v3/committee/house/hh...,https://api.congress.gov/v3/committee/house/hl...
parent_name,,,
parent_systemcode,,,
parent_url,,,
subcommittees,,,


In [18]:
#subcommittees are dictionaries, need to deal with that
committees = committees.drop(['subcommittees'], axis = 1)

## committee_members

In [20]:
committee_members.columns = [x.lower() for x in committee_members.columns]
committee_members.columns = [x.replace('.', '_') for x in committee_members.columns]
committee_members.head(10)

Unnamed: 0,committee_code,rank,title,bioguide
0,HSII,1.0,Chair,W000821
1,HSII,1.0,Ranking Member,G000551
2,HSII,2.0,,L000564
3,HSII,2.0,,N000179
4,HSII,3.0,,W000804
5,HSII,3.0,,S001177
6,HSII,4.0,,M001177
7,HSII,4.0,,H001068
8,HSII,5.0,,G000565
9,HSII,5.0,,G000574
