In [1]:
import numpy as np
import pandas as pd
import os
POSTGRES_PASSWORD = os.getenv('POSTGRES_PASSWORD')
import psycopg2
from sqlalchemy import create_engine

In [2]:
bills=pd.read_csv('Data/bills.csv')              
congress_members=pd.read_csv('Data/congress_members.csv')  
os_legislators=pd.read_csv('Data/os_legislators.csv')
committee_members=pd.read_csv('Data/committee_members.csv')  
leadership=pd.read_csv('Data/leadership.csv')        
terms=pd.read_csv('Data/terms.csv')
committees=pd.read_csv('Data/committees.csv')         
os_contributions=pd.read_csv('Data/os_contributions.csv')  
votingaffinity=pd.read_csv('Data/votingaffinity.csv')

# Bills

In [3]:
bills.columns = [x.lower() for x in bills.columns]
bills.columns = [x.replace('.', '_') for x in bills.columns]
bills.columns

Index(['congress', 'introduceddate', 'number', 'title', 'type', 'url',
       'latestaction_actiondate', 'latestaction_text', 'policyarea_name',
       'amendmentnumber', 'latestaction', 'latestaction_actiontime'],
      dtype='object')

In [4]:
bills.head(3).T

Unnamed: 0,0,1,2
congress,118,118,118
introduceddate,2023-10-04,2023-09-29,2023-09-27
number,405.0,2998.0,373.0
title,A resolution expressing support for the design...,Land-Grant Research Equity and Accountability Act,A resolution designating the week of September...
type,SRES,S,SRES
url,https://api.congress.gov/v3/bill/118/sres/405?...,https://api.congress.gov/v3/bill/118/s/2998?fo...,https://api.congress.gov/v3/bill/118/sres/373?...
latestaction_actiondate,2023-10-04,2023-09-29,2023-09-27
latestaction_text,"Submitted in the Senate, considered, and agree...",Read twice and referred to the Committee on Ag...,"Submitted in the Senate, considered, and agree..."
policyarea_name,Armed Forces and National Security,Agriculture and Food,
amendmentnumber,,,


In [5]:
bills = bills.drop(['congress'], axis=1)

In [40]:
bills.head(1).T

Unnamed: 0,0
introduceddate,2023-10-04
number,405.0
title,A resolution expressing support for the design...
type,SRES
url,https://api.congress.gov/v3/bill/118/sres/405?...
latestaction_actiondate,2023-10-04
latestaction_text,"Submitted in the Senate, considered, and agree..."
policyarea_name,Armed Forces and National Security
amendmentnumber,
latestaction,


## congress_members

In [6]:
congress_members.columns = [x.lower() for x in congress_members.columns]
congress_members.columns = [x.replace('.', '_') for x in congress_members.columns]
congress_members.columns

Index(['bioguideid', 'birthyear', 'currentmember', 'directordername',
       'firstname', 'honorificname', 'invertedordername', 'lastname',
       'officialwebsiteurl', 'partyhistory', 'state', 'terms', 'updatedate',
       'addressinformation_city', 'addressinformation_district',
       'addressinformation_officeaddress', 'addressinformation_phonenumber',
       'addressinformation_zipcode', 'cosponsoredlegislation_count',
       'cosponsoredlegislation_url', 'depiction_attribution',
       'depiction_imageurl', 'sponsoredlegislation_count',
       'sponsoredlegislation_url', 'middlename', 'suffixname', 'nickname',
       'leadership', 'district'],
      dtype='object')

In [7]:
congress_members = congress_members.drop(['terms', 'leadership',
                                         'partyhistory'], axis=1)

In [8]:
congress_members.head(3).T

Unnamed: 0,0,1,2
bioguideid,B000944,C000127,C000141
birthyear,1952,1958,1943
currentmember,True,True,True
directordername,Sherrod Brown,Maria Cantwell,Benjamin L. Cardin
firstname,Sherrod,Maria,Ben
honorificname,Mr.,Ms.,Mr.
invertedordername,"Brown, Sherrod","Cantwell, Maria","Cardin, Benjamin L."
lastname,Brown,Cantwell,Cardin
officialwebsiteurl,https://www.brown.senate.gov/,https://www.cantwell.senate.gov,https://www.cardin.senate.gov/
state,Ohio,Washington,Maryland


## Terms

In [9]:
terms.columns = [x.lower() for x in terms.columns]
terms.columns = [x.replace('.', '_') for x in terms.columns]
terms.head(3).T

Unnamed: 0,0,1,2
chamber,Senate,Senate,Senate
congress,107,108,109
endyear,2003.0,2005.0,2007.0
membertype,Senator,Senator,Senator
startyear,2002,2003,2005
statecode,TX,TX,TX
statename,Texas,Texas,Texas
bioguideid,C001056,C001056,C001056
district,,,


## Leadership

In [10]:
leadership.columns = [x.lower() for x in leadership.columns]
leadership.columns = [x.replace('.', '_') for x in leadership.columns]
leadership.head(3).T

Unnamed: 0,0,1,2
congress,113,114,115
type,Assistant Democratic Leader,Assistant Majority Leader,Majority Whip
bioguideid,C001056,C001056,C001056
current,,,


## os_legislators

In [11]:
os_legislators.columns = [x.lower() for x in os_legislators.columns]
os_legislators.columns = [x.replace('.', '_') for x in os_legislators.columns]
os_legislators = os_legislators.drop_duplicates()
os_legislators.head(3).T

Unnamed: 0,0,1,2
cid,N00050780,N00035774,N00026050
firstl,Mary Peltola,Dan Sullivan,Lisa Murkowski
lastnam,Peltola,Sullivan,Murkowski
party,D,R,R
offic,AK01,AKS1,AKS2
gend,F,M,F
first_elected,2022,2014,2002
xit_cod,0,0,0
commen,,,
phon,,202-224-3004,202-224-6665


In [12]:
newbio = ['B001311','B001313','B001314','B001315','C001125',
          'C001126','C001127','C001129','E000071','F000475','G000061',
          'G000551','G000595','H001093','L000595','L000597','M000687',
          'M001210','M001217','M001227','R000579','R000618',
          'S001218','T000165','T000488','P000619']
legs = ['Dan Bishop','Shontel Brown','Aaron Bean','Nikki Budzinski',
        'Troy Carter','Mike Carey','Sheila Cherfilus-McCormick',
        'Mike Collins','Jake Ellzey','Brad Finstad','Mike Garcia',
        'Raul M Grijalva','Bob Good','Erin Houchin','Julia Letlow',
        'Laurel Lee','Kweisi Mfume','Greg Murphy','Jared Moskowitz',
        'Jennifer McClellan','Pat Ryan','Pete Ricketts','Melanie Stansbury',
        'Tom Tiffany','Shri Thanedar','Mary Peltola']
def replacebio(b,n):
    os_legislators.loc[os_legislators['firstl']==n, 'oguide_id'] = b
for b, n in zip(newbio, legs):
    replacebio(b,n)

In [13]:
os_legislators = os_legislators[['cid', 'party', 'oguide_id']]
os_legislators = os_legislators.rename({'oguide_id': 'bioguideid'},
                                      axis=1)
os_legislators

Unnamed: 0,cid,party,bioguideid
0,N00050780,D,P000619
1,N00035774,R,S001198
2,N00026050,R,M001153
3,N00044245,R,C001054
4,N00041295,R,M001212
...,...,...,...
532,N00032838,D,M001183
533,N00009771,R,C001047
534,N00049197,R,H001096
535,N00006236,R,B001261


In [14]:
members = pd.merge(congress_members, os_legislators,
                   on = 'bioguideid',
                  how = 'outer',
                  validate = 'one_to_one',
                  indicator = 'matched')

In [15]:
members['matched'].value_counts()

matched
both          530
left_only       7
right_only      6
Name: count, dtype: int64

In [16]:
members.query("matched=='left_only'")

Unnamed: 0,bioguideid,birthyear,currentmember,directordername,firstname,honorificname,invertedordername,lastname,officialwebsiteurl,state,...,depiction_imageurl,sponsoredlegislation_count,sponsoredlegislation_url,middlename,suffixname,nickname,district,cid,party,matched
120,N000147,1937.0,True,Eleanor Holmes Norton,ELEANOR,,"Norton, Eleanor Holmes",NORTON,https://norton.house.gov/,District of Columbia,...,https://www.congress.gov/img/member/116_dg_dc_...,975.0,https://api.congress.gov/v3/member/N000147/spo...,HOLMES,,,,,,left_only
133,S001177,1955.0,True,Gregorio Kilili Camacho Sablan,Gregorio,,"Sablan, Gregorio Kilili Camacho",Sablan,https://sablan.house.gov/,Northern Mariana Islands,...,https://www.congress.gov/img/member/s001177_20...,149.0,https://api.congress.gov/v3/member/S001177/spo...,Kilili Camacho,,,,,,left_only
248,P000610,1966.0,True,Stacey E. Plaskett,Stacey,Ms.,"Plaskett, Stacey E.",Plaskett,https://plaskett.house.gov/,Virgin Islands,...,https://www.congress.gov/img/member/116_dg_vi_...,112.0,https://api.congress.gov/v3/member/P000610/spo...,E.,,,,,,left_only
252,R000600,1947.0,True,Aumua Amata Coleman Radewagen,Aumua Amata,Mrs.,"Radewagen, Aumua Amata Coleman",Radewagen,https://radewagen.house.gov/,American Samoa,...,https://www.congress.gov/img/member/r000600_20...,35.0,https://api.congress.gov/v3/member/R000600/spo...,Coleman,,,,,,left_only
293,G000582,1976.0,True,Jenniffer González-Colón,Jenniffer,Miss,"González-Colón, Jenniffer",Gonzalez-Colon,https://gonzalez-colon.house.gov,Puerto Rico,...,https://www.congress.gov/img/member/g000582_20...,161.0,https://api.congress.gov/v3/member/G000582/spo...,,,,,,,left_only
478,M001219,1962.0,True,James C. Moylan,James,,"Moylan, James C.",Moylan,https://moylan.house.gov,Guam,...,https://www.congress.gov/img/member/m001219_20...,12.0,https://api.congress.gov/v3/member/M001219/spo...,C.,,,,,,left_only
536,B001320,1979.0,True,Laphonza R. Butler,Laphonza,Ms.,"Butler, Laphonza R.",Butler,https://www.butler.senate.gov/,California,...,,,,,,,,,,left_only


In [17]:
members.query("matched=='right_only'")

Unnamed: 0,bioguideid,birthyear,currentmember,directordername,firstname,honorificname,invertedordername,lastname,officialwebsiteurl,state,...,depiction_imageurl,sponsoredlegislation_count,sponsoredlegislation_url,middlename,suffixname,nickname,district,cid,party,matched
537,G000551,,,,,,,,,,...,,,,,,,,N00025284,D,right_only
538,F000062,,,,,,,,,,...,,,,,,,,N00007364,D,right_only
539,C001129,,,,,,,,,,...,,,,,,,,N00035370,R,right_only
540,S001197,,,,,,,,,,...,,,,,,,,N00035544,R,right_only
541,C001084,,,,,,,,,,...,,,,,,,,N00032019,D,right_only
542,S001192,,,,,,,,,,...,,,,,,,,N00033932,R,right_only


In [18]:
members = pd.merge(congress_members, os_legislators,
                  on = 'bioguideid',
                  how = 'inner')

In [19]:
members

Unnamed: 0,bioguideid,birthyear,currentmember,directordername,firstname,honorificname,invertedordername,lastname,officialwebsiteurl,state,...,depiction_attribution,depiction_imageurl,sponsoredlegislation_count,sponsoredlegislation_url,middlename,suffixname,nickname,district,cid,party
0,B000944,1952,True,Sherrod Brown,Sherrod,Mr.,"Brown, Sherrod",Brown,https://www.brown.senate.gov/,Ohio,...,"<a href=""http://www.senate.gov/artandhistory/h...",https://www.congress.gov/img/member/b000944_20...,1389.0,https://api.congress.gov/v3/member/B000944/spo...,,,,,N00003535,D
1,C000127,1958,True,Maria Cantwell,Maria,Ms.,"Cantwell, Maria",Cantwell,https://www.cantwell.senate.gov,Washington,...,"<a href=""http://www.senate.gov/artandhistory/h...",https://www.congress.gov/img/member/c000127_20...,882.0,https://api.congress.gov/v3/member/C000127/spo...,,,,,N00007836,D
2,C000141,1943,True,Benjamin L. Cardin,Ben,Mr.,"Cardin, Benjamin L.",Cardin,https://www.cardin.senate.gov/,Maryland,...,"<a href=""http://www.senate.gov/artandhistory/h...",https://www.congress.gov/img/member/c000141_20...,1265.0,https://api.congress.gov/v3/member/C000141/spo...,,,,,N00001955,D
3,C000174,1947,True,Thomas R. Carper,Thomas,Mr.,"Carper, Thomas R.",Carper,https://www.carper.senate.gov,Delaware,...,"<a href=""http://www.senate.gov/artandhistory/h...",https://www.congress.gov/img/member/c000174_20...,769.0,https://api.congress.gov/v3/member/C000174/spo...,R.,,,,N00012508,D
4,C001070,1960,True,Robert P. Casey Jr.,Bob,Mr.,"Casey, Robert P., Jr.",Casey,https://www.casey.senate.gov/,Pennsylvania,...,"<a href=""http://www.senate.gov/artandhistory/h...",https://www.congress.gov/img/member/c001070_20...,1180.0,https://api.congress.gov/v3/member/C001070/spo...,,JR.,,,N00027503,D
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
525,G000600,1988,True,Marie Gluesenkamp Perez,Marie,,"Perez, Marie Gluesenkamp",Perez,https://gluesenkampperez.house.gov,Washington,...,Image courtesy of the Member,https://www.congress.gov/img/member/g000600_20...,13.0,https://api.congress.gov/v3/member/G000600/spo...,Gluesenkamp,,,3.0,N00050490,D
526,V000135,1969,True,Derrick Van Orden,Derrick,Mr.,"Van Orden, Derrick",Van Orden,https://vanorden.house.gov,Wisconsin,...,Image courtesy of the Member,https://www.congress.gov/img/member/v000135_20...,14.0,https://api.congress.gov/v3/member/V000135/spo...,,,,3.0,N00046843,R
527,H001096,1962,True,Harriet M. Hageman,Harriet,,"Hageman, Harriet M.",Hageman,https://hageman.house.gov,Wyoming,...,Image courtesy of the Member,https://www.congress.gov/img/member/h001096_20...,28.0,https://api.congress.gov/v3/member/H001096/spo...,M.,,,,N00049197,R
528,R000618,1964,True,Pete Ricketts,Pete,Mr.,"Ricketts, Pete",Ricketts,https://www.ricketts.senate.gov,Nebraska,...,Official U.S. Senate Photo,https://www.congress.gov/img/member/r000618_20...,17.0,https://api.congress.gov/v3/member/R000618/spo...,,,,,N00027695,R


In [20]:
ideology = pd.read_csv('Data/ideology.csv')
ideology = ideology.rename({'bioguide_id':'bioguideid'}, axis=1)

In [21]:
members = pd.merge(members, ideology,
         on = 'bioguideid',
         how = 'inner')

## Voting similarity

In [22]:
votesim = pd.read_csv('Data/votingaffinity.csv')
votesim

Unnamed: 0,icpsr,member_compare,affinity_score
0,14226,14435,0.305147
1,14226,14858,0.375000
2,14226,14871,0.352941
3,14226,14921,0.772059
4,14226,15015,0.356618
...,...,...,...
204133,91980,31101,0.202532
204134,91980,31102,0.830018
204135,91980,31103,0.169982
204136,91980,39301,0.198915


## os_contributions

In [23]:
os_contributions.columns = [x.lower() for x in os_contributions.columns]
os_contributions.columns = [x.replace('.', '_') for x in os_contributions.columns]
os_contributions = os_contributions.rename({'otal': 'total',
                                           'org_nam': 'org_name'}, axis=1)
os_contributions.head(30)

Unnamed: 0,org_name,total,pac,ndiv,cid
0,State of Alaska,19541,0,19541,N00050780
1,"Sonosky, Chambers et al",16050,0,16050,N00050780
2,General Communication Inc,15150,2500,12650,N00050780
3,Google Inc,14164,0,14164,N00050780
4,University of Alaska/Anchorage,13938,0,13938,N00050780
5,Swing Left,13400,0,13400,N00050780
6,Apple Inc,10783,0,10783,N00050780
7,Service Employees International Union,10010,5000,5010,N00050780
8,National Education Assn,10000,10000,0,N00050780
9,Sealaska Corp,9950,0,9950,N00050780


## committees

In [24]:
committees.columns = [x.lower() for x in committees.columns]
committees.columns = [x.replace('.', '_') for x in committees.columns]
committees = committees.drop(['subcommittees'], axis=1)

In [25]:
committees['systemcode'] = [x[:-2].upper() for x in committees['systemcode']]

In [26]:
committees

Unnamed: 0,chamber,committeetypecode,name,systemcode,url,parent_name,parent_systemcode,parent_url
0,House,Other,Bicentenary Committee,HCZA,https://api.congress.gov/v3/committee/house/hc...,,,
1,House,Standing,Energy (Ad Hoc) Committee,HHAH,https://api.congress.gov/v3/committee/house/hh...,,,
2,House,Select,U.S. Role in Iranian Arms Committee,HLBZ,https://api.congress.gov/v3/committee/house/hl...,,,
3,House,Select,"Children, Youth, and Families (Select) Committee",HLCF,https://api.congress.gov/v3/committee/house/hl...,,,
4,House,Select,Select Committee on the Climate Crisis,HLCN,https://api.congress.gov/v3/committee/house/hl...,,,
...,...,...,...,...,...,...,...,...
709,Senate,Standing,"Competitiveness, Capital Formation and Economi...",SSSB,https://api.congress.gov/v3/committee/senate/s...,Small Business and Entrepreneurship Committee,sssb00,https://api.congress.gov/v3/committee/senate/s...
710,Senate,Standing,Advocacy and The Future of Small Business Subc...,SSSB,https://api.congress.gov/v3/committee/senate/s...,Small Business and Entrepreneurship Committee,sssb00,https://api.congress.gov/v3/committee/senate/s...
711,Senate,Standing,Small Business: Family Farm Subcommittee,SSSB,https://api.congress.gov/v3/committee/senate/s...,Small Business and Entrepreneurship Committee,sssb00,https://api.congress.gov/v3/committee/senate/s...
712,Senate,Standing,Entrepreneurship and Special Problems Facing S...,SSSB,https://api.congress.gov/v3/committee/senate/s...,Small Business and Entrepreneurship Committee,sssb00,https://api.congress.gov/v3/committee/senate/s...


## committee_members

In [27]:
committee_members.columns = [x.lower() for x in committee_members.columns]
committee_members.columns = [x.replace('.', '_') for x in committee_members.columns]
committee_members.head(30)

Unnamed: 0,committee_code,rank,title,bioguide
0,HSII,1.0,Chair,W000821
1,HSII,1.0,Ranking Member,G000551
2,HSII,2.0,,L000564
3,HSII,2.0,,N000179
4,HSII,3.0,,W000804
5,HSII,3.0,,S001177
6,HSII,4.0,,M001177
7,HSII,4.0,,H001068
8,HSII,5.0,,G000565
9,HSII,5.0,,G000574


## Create the database

In [28]:
dbserver = psycopg2.connect(
    host = 'postgres',
    user = 'postgres',
    password = POSTGRES_PASSWORD,
    port = 5432
)
dbserver.autocommit = True

In [29]:
cursor = dbserver.cursor()

In [30]:
try:
    cursor.execute('CREATE DATABASE congress')
except:
    cursor.execute('DROP DATABASE congress')
    cursor.execute('CREATE DATABASE congress')

In [31]:
engine = create_engine('postgresql+psycopg2://{user}:{password}@{host}:{port}/{db}'.format(
    user = 'postgres',
    password = POSTGRES_PASSWORD,
    host = 'postgres',
    port = 5432,
    db = 'congress'
))

We need to upload

* members
* bills
* committees
* committee members
* terms
* leadership
* contributions

In [32]:
print(members.shape[0])
members.to_sql('members', con=engine, index=False, chunksize=1000, if_exists = 'replace')

530


530

In [33]:
print(bills.shape[0])
bills.to_sql('bills', con=engine, index=False, chunksize=1000, if_exists = 'replace')

38325


38325

In [34]:
print(committees.shape[0])
committees.to_sql('committees', con=engine, index=False, chunksize=1000, if_exists = 'replace')

714


714

In [35]:
print(committee_members.shape[0])
committee_members.to_sql('committee_members', con=engine, index=False, chunksize=1000, if_exists = 'replace')

3866


3866

In [36]:
print(terms.shape[0])
terms.to_sql('terms', con=engine, index=False, chunksize=1000, if_exists = 'replace')

278


278

In [37]:
print(leadership.shape[0])
leadership.to_sql('leadership', con=engine, index=False, chunksize=1000, if_exists = 'replace')

79


79

In [38]:
print(os_contributions.shape[0])
os_contributions.to_sql('contributions', con=engine, index=False, chunksize=1000, if_exists = 'replace')

1990


1990

In [39]:
print(votesim.shape[0])
votesim.to_sql('votesim', con=engine, index=False, chunksize=1000, if_exists = 'replace')

204138


204138

In [41]:
votesim

Unnamed: 0,icpsr,member_compare,affinity_score
0,14226,14435,0.305147
1,14226,14858,0.375000
2,14226,14871,0.352941
3,14226,14921,0.772059
4,14226,15015,0.356618
...,...,...,...
204133,91980,31101,0.202532
204134,91980,31102,0.830018
204135,91980,31103,0.169982
204136,91980,39301,0.198915
