In [52]:
# Data Visualization
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px

# Config Reader
import configparser

# Database Connection
import firebase_admin
from firebase_admin import credentials, firestore

In [2]:
# Read config.ini file
config = configparser.ConfigParser()
config.read('./auth/config.ini')

# Get Google Firebase Auth
GCP_AUTH_PATH = config.get('firebase', 'GCP_AUTH_PATH')
cred = credentials.Certificate(GCP_AUTH_PATH)
app = firebase_admin.initialize_app(cred)

# Instantiate connection to database
db = firestore.client()

In [3]:
# Create collection references
reps_ref = db.collection("reps")
edu_ref = db.collection("edu")
votes_ref = db.collection("votes")

In [267]:
degrees = pd.DataFrame([ doc.to_dict() for doc in edu_ref.get() ])
reps = pd.DataFrame([ doc.to_dict() for doc in reps_ref.get() ])

In [268]:
degrees['degree'] = degrees['degree'].map(lambda x: ''.join(x.split('.')))

In [269]:
cc_dict = {
    'Associates': ['AAS', 'AS', 'AA'],
    'Bachelors': ['BS', 'BA', 'SB', 'AB', 'BDiv', 'BBA', 'BEng', 'BM', 'ALB', 'BSN', 'BGS', 'BPA', 'BSBA', 'LLB'],
    'High School': ['HS'],
    'JD': ['JD'],
    'Masters - General': ['MA', 'MS', 'SM', 'MSc', 'MFA', 'MAcc'],
    'Masters - Public': ['MIA', 'MPA', 'MUP', 'MPP', 'MSW', 'MSS', 'MPH', 'MHS'],
    'Masters - Education': ['MEd', 'SYC'],
    'Masters - Law': ['LLM'],
    'Masters - Theology': ['MDiv', 'ThM'],
    'MBA': ['MBA', 'MSEM'],
    'PHD': ['PhD'],
    'Veterinary': ['DVM'],
    'Dental': ['DDS', 'DMD'],
    'MD': ['MD', 'DPM'],
    'PHD - Education': ['EdD'],
    'PHD - Theology': ['DMin'],
    'PHD - Public': ['DPA'],
    'Nursing': ['MSN'],
}

In [270]:
x = degrees['degree']
cond_list = []
choice_list = []
for k, vs in cc_dict.items():
    for v in vs:
        cond_list.append(x == v)
        choice_list.append(k)

degrees['degree_group'] = np.select(cond_list, choice_list)

In [271]:
# Create merged DataFrame
df = degrees.merge(reps, how='left', on='_id')
df.sort_values('_id', inplace=True)

# Replace null values
df['middle_name'] = np.where(df['middle_name'].isna(), '', df['middle_name'])

# Add 'in_office' column
df['in_office'] = df['congresses'].map(set(['117']).issubset)

In [273]:
# Create groupby DataFrame
df = df.groupby(['_id', 'first_name', 'middle_name', 'last_name', 'current_party', 'dob', 'gender', 'in_office'])['degree_group'].apply(list).reset_index()

In [274]:
# Check results
df['in_office'].sum() # Number of US Reps in office: 435

435

In [275]:
def query_edu(df, deg_type=[], in_office=True):
    mask = df['degree_group'].map(set(deg_type).issubset)
    result = df[mask]
    if in_office:
        result = result[result['in_office']].drop(columns='in_office')
        
    return result

In [276]:
degree_groups = list(cc_dict.keys())

In [277]:
degree_groups

['Associates',
 'Bachelors',
 'High School',
 'JD',
 'Masters - General',
 'Masters - Public',
 'Masters - Education',
 'Masters - Law',
 'Masters - Theology',
 'MBA',
 'PHD',
 'Veterinary',
 'Dental',
 'MD',
 'PHD - Education',
 'PHD - Theology',
 'PHD - Public',
 'Nursing']

In [281]:
query_edu(df, ['High School']).sort_values('dob')

Unnamed: 0,_id,first_name,middle_name,last_name,current_party,dob,gender,degree_group
344,O000171,Tom,,O'Halleran,D,1946-01-24 00:00:00+00:00,M,[High School]
497,W000827,Ron,,Wright,R,1953-04-08 00:00:00+00:00,M,[High School]
419,S001177,Gregorio,Kilili Camacho,Sablan,I,1955-01-19 00:00:00+00:00,M,[High School]
62,C001054,Jerry,,Carl,R,1958-06-17 00:00:00+00:00,M,[High School]
357,P000604,Donald,M.,Payne,D,1958-12-17 00:00:00+00:00,M,[High School]
370,R000103,Matt,,Rosendale,R,1960-07-07 00:00:00+00:00,M,[High School]
37,B001295,Mike,,Bost,R,1960-12-30 00:00:00+00:00,M,[High School]
202,H001077,Clay,,Higgins,R,1961-08-24 00:00:00+00:00,M,[High School]
69,C001067,Yvette,D.,Clarke,D,1964-11-21 00:00:00+00:00,F,[High School]
250,K000395,Fred,,Keller,R,1965-10-23 00:00:00+00:00,M,[High School]
