# Data Acquisition Notebook
- Notebook detailing ```database.py```
- Database will hold 2 most recent congresses

## ProPublica Data
- Functions and parameters

In [1]:
# API Key
import configparser

# Read/Scrape data
import requests
import json
import re
from bs4 import BeautifulSoup

In [2]:
config = configparser.ConfigParser()
config.read('auth/config.ini')
PROPUBLICA_API_KEY = config.get('propublica', 'PROPUBLICA_API_KEY')

In [3]:
API_ROOT = 'https://api.propublica.org/congress/v1/'
header = {'X-API-Key': f'{PROPUBLICA_API_KEY}'}

In [4]:
def get_house_ids(congress):
    '''
    Function to get house members' ProPublica ID by congress number
    '''

    call_string = API_ROOT + f'{congress}/house/members.json'
    r = requests.get(call_string, headers=header)
    result = r.json()['results'][0]['members']
    member_ids = [ member['id'] for member in result ]
    
    return member_ids

In [8]:
def get_member_data(member):
    '''
    Function to get house member's data
    '''
    
    call_string = API_ROOT + f'members/{member}.json'
    r = requests.get(call_string, headers=header)
    result = r.json()['results'][0]
    
    return result

## Get Initial Congress and Clean Data/Select Features

In [9]:
# House members of the 116th Congress (2018-2020)
ids_116 = get_house_ids(116)
members_116 = [ get_member_data(member) for member in ids_116 ]

In [18]:
from pprint import pprint

In [27]:
# Sample dictionary function
def sample_dict(dictionary, _range=(0,5)):
    assert _range[0] < _range[1]
    
    sample_keys = list(dictionary.keys())[_range[0]: _range[1]]
    sample_dict = { key: dictionary[key] for key in sample_keys }
    
    print(f'*Total Length: {len(dictionary)}')
    print('*Sample Dictionary:')
    pprint(sample_dict)

In [28]:
sample_dict(members_116[0])

*Total Length: 26
*Sample Dictionary:
{'first_name': 'Ralph',
 'id': 'A000374',
 'last_name': 'Abraham',
 'member_id': 'A000374',
 'middle_name': None}


In [40]:
def role_cleaner(role):
    '''
    Function to keep relevent role information
    '''
    
    # Nest subcommittees into committees
    committees = []
    subcommittees = []
    
    if role['committees'] != []:
        for com in role['committees']:
            if com != 'subcommittees':
                com['subcommittees'] = []
                committees.append(com)
            else:
                subcommittees = com
                
        if subcommittees != []:
            for com in committees:
                for subcom in subcommittees:
                        if subcom['parent_committee_id'] == com['code']:
                            com['subcommittees'].append(subcom)
                
    role_dict = {
        'details': {
            'congress': role['congress'],
            'state': role['state'],
            'party': role['party'],
            'seniority': role['seniority'],
            'district': role['district'],
            'start_date': role['start_date'],
            'end_date': role['end_date'],
            'at_large': role['at_large'],
        },
        'voting': {
            'sponsored': role['bills_sponsored'],
            'cosponsored': role['bills_cosponsored'],
            'total_votes': role['total_votes'],
            'missed_votes': role['missed_votes'],
        },
        'committees': committees,
    }
    
    return role_dict
    
def member_cleaner(member):
    '''
    Function to keep relevent information on congress member
    '''
    
    roles = member['roles']
    roles.sort(key=lambda x: x['congress'], reverse=True)
    fec_id = roles[0]['fec_candidate_id']
    
    mem_dict = {
        '_id': member['id'],
        'bio': {
            'first_name': member['first_name'],
            'middle_name': member['middle_name'],
            'last_name': member['last_name'],
            'dob': member['date_of_birth'],
            'gender': member['gender'],
            'current_party': member['current_party'],
        },
        'activity': {
            'most_recent_vote': member['most_recent_vote'],
            'last_updated': member['last_updated'],
            'in_office': member['in_office'],
        },
        'other_ids': {
            'google_id': member['google_entity_id'],
            'votesmart_id': member['votesmart_id'],
            'govtrack_id': member['govtrack_id'],
            'cspan_id': member['cspan_id'],
            'crp_id': member['crp_id'],
            'fec_id': fec_id,
        },
        'roles': [
            role_cleaner(role) for role in roles
        ],   
    }
    
    return mem_dict

In [41]:
sample = member_cleaner(members_116[0])

In [42]:
pprint(sample)

{'_id': 'A000374',
 'activity': {'in_office': False,
              'last_updated': '2020-12-31 18:30:50 -0500',
              'most_recent_vote': '2020-10-01'},
 'bio': {'current_party': 'R',
         'dob': '1954-09-16',
         'first_name': 'Ralph',
         'gender': 'M',
         'last_name': 'Abraham',
         'middle_name': None},
 'other_ids': {'crp_id': 'N00036633',
               'cspan_id': '76236',
               'fec_id': 'H4LA05221',
               'google_id': '/m/012dwd7_',
               'govtrack_id': '412630',
               'votesmart_id': '155414'},
 'roles': [{'committees': [{'api_uri': 'https://api.propublica.org/congress/v1/116/house/committees/HSAG.json',
                            'begin_date': '2019-01-23',
                            'code': 'HSAG',
                            'end_date': '2021-01-03',
                            'name': 'Committee on Agriculture',
                            'rank_in_party': 13,
                            'side': 'minor

## Get Number of Current Congress

In [None]:
from datetime import datetime

In [None]:
# Year of 1st Congress
begin = 1789

# Determine year to use
now = datetime.now().date()
if now.day >= 20:
    year = now.year
else:
    year = now.year - 1
    
# Determine current congress
current_cong_num = int(1 + (year - begin)/2)

In [None]:
print('Current Congress:', current_cong_num)

In [None]:
# House members of current congress
current_members = get_house_ids(current_cong_num)
members = [ get_member_data(mem) for mem in current_members ]

In [None]:
member = members[0]
sample_dict(member)

In [None]:
# Database connection and authentication
import firebase_admin
from firebase_admin import credentials, firestore
from google.cloud.firestore_v1.batch import WriteBatch

In [None]:
GCP_AUTH_PATH = config.get('firebase', 'GCP_AUTH_PATH')
cred = credentials.Certificate(GCP_AUTH_PATH)
app = firebase_admin.initialize_app(cred)

In [None]:
# Database
db = firestore.client()

In [None]:
def batch_insert_members(members, db):
    '''
    Function to batch insert house members into database
    '''
    
    batch = db.batch()
    for member in members:
        _id = member['member_id']
        insert_ref = db.collection("reps").document(f"{_id}")
        batch.set(insert_ref, member)
    
    batch.commit()
    
    return len(batch.write_results)

In [None]:
batch_insert_members(members, db)

In [None]:
rep_ref = db.collection('reps').document('A000370')
result = rep_ref.get().to_dict()

In [None]:
result['roles'].sort(key=lambda x: x['next_election'], reverse=True)

In [None]:
# House members of the 116th congress (2018-2020)
ids_116 = get_house_ids(116)
members_116 = [ get_member_data(mem) for mem in ids_116 ]

In [None]:
def get_roll_call_vote(congress, session_number, roll_call_num):
    query = f'{congress}/house/sessions/{session_number}/votes/{roll_call_num}.json'
    call_string = API_ROOT + query
    r = requests.get(call_string, headers=header)
    result = r.json()['results']['votes']
    
    return result