In [1]:
import numpy as np
import pandas as pd
import dotenv
import os
import pymongo
import psycopg
from sqlalchemy import create_engine
import json


## 1) Connect to the Postgres DB


In [2]:
dotenv.load_dotenv()

POSTGRES_PASSWORD = os.getenv('POSTGRES_PASSWORD')
MONGO_INITDB_ROOT_USERNAME = os.getenv('MONGO_INITDB_ROOT_USERNAME')
MONGO_INITDB_ROOT_PASSWORD = os.getenv('MONGO_INITDB_ROOT_PASSWORD')

In [3]:
dbms = 'postgresql'
package = 'psycopg'
user = 'postgres'
password = POSTGRES_PASSWORD
host = 'localhost'
port = '5432'
db = 'contrans'
engine = create_engine(f'{dbms}+{package}://{user}:{password}@{host}:{port}/{db}')
engine

Engine(postgresql+psycopg://postgres:***@localhost:5432/contrans)

In [4]:
myquery = '''
SELECT *
FROM members
'''

pd.read_sql_query(myquery, con=engine)

Unnamed: 0,bioguide_id,full_name,chamber,birthyear,image,office_address,phone,website,fec_id,left_right_ideology,state_abbrev,district_code,icpsr,party
0,A000055,Robert B. Aderholt,House of Representatives,1965.0,https://www.congress.gov/img/member/a000055_20...,"272 Cannon House Office Building, Washington, ...",(202) 225-4876,https://aderholt.house.gov/,H6AL04098,0.405,AL,4,29701,Republican
1,A000148,Jake Auchincloss,House of Representatives,1988.0,https://www.congress.gov/img/member/67817e391f...,"1524 Longworth House Office Building, Washingt...",(202) 225-5931,https://auchincloss.house.gov,H0MA04192,-0.288,MA,4,22100,Democrat
2,A000369,Mark E. Amodei,House of Representatives,1958.0,https://www.congress.gov/img/member/a000369_20...,"104 Cannon House Office Building, Washington, ...",(202) 225-6155,https://amodei.house.gov,H2NV02395,0.384,NV,2,21196,Republican
3,A000370,Alma S. Adams,House of Representatives,1946.0,https://www.congress.gov/img/member/a000370_20...,"2436 Rayburn House Office Building, Washington...",(202) 225-1510,https://adams.house.gov,H4NC12100,-0.462,NC,12,21545,Democrat
4,A000371,Pete Aguilar,House of Representatives,1979.0,https://www.congress.gov/img/member/a000371_20...,"108 Cannon House Office Building, Washington, ...",(202) 225-3201,https://aguilar.house.gov/,H2CA31125,-0.324,CA,33,21506,Democrat
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
540,W000830,George Whitesides,House of Representatives,1974.0,https://www.congress.gov/img/member/68dc43db19...,"1504 Longworth House Office Building, Washingt...",(202) 225-1956,https://whitesides.house.gov/,H4CA27111,-0.189,CA,27,22559,Democrat
541,W000831,James R. Walkinshaw,House of Representatives,1982.0,https://www.congress.gov/img/member/68c1bd4ca9...,"2265 Rayburn House Office Building, Washington...",(202) 225-1492,https://walkinshaw.house.gov/,H6VA11066,-0.503,VA,11,22564,Democrat
542,Y000064,Todd Young,Senate,1972.0,https://www.congress.gov/img/member/y000064_20...,185 Dirksen Senate Office Building Washington...,(202) 224-5623,https://www.young.senate.gov,S0IN00194,0.438,IN,0,21133,Republican
543,Y000067,Rudy Yakym III,House of Representatives,1984.0,https://www.congress.gov/img/member/y000067_20...,"349 Cannon House Office Building, Washington, ...",(202) 225-3915,https://yakym.house.gov,H2IN02295,0.513,IN,2,22171,Republican


In [5]:
# from bill_versions: billtype, bill_number, versioncode, chamber, text
# from bills: bill_title (bioguide_id for the link)
# from members: full_name, state, party

myquery = '''
SELECT v.bill_type,
    v.bill_number, 
    v.versioncode, 
    v.currentchamber, 
    v.text,
    b.bill_title,
    m.full_name,
    m.state_abbrev,
    m.party
FROM bill_versions v
INNER JOIN bills b
    ON v.bill_type = b.bill_type AND v.bill_number = b.bill_number
INNER JOIN members m
    ON b.bioguide_id = m.bioguide_id
'''

df = pd.read_sql_query(myquery, con=engine)

In [6]:
json_recoded = df.to_json(orient='records')

In [7]:
myjson = json.loads(json_recoded)

myjson  

[{'bill_type': 'S',
  'bill_number': 3012,
  'versioncode': 0,
  'currentchamber': 'Senate',
  'text': '<p><strong>Shutdown Fairness Act</strong></p><p>This bill provides appropriations to pay federal employees who work during a government shutdown.</p><p>Specifically, the bill provides appropriations for federal agencies to provide standard rates of pay, allowances, pay differentials, benefits, and other payments to excepted employees for work performed during any period in which interim continuing appropriations or full-year appropriations are not in effect for a fiscal year (i.e., a government shutdown). An excepted employee is an employee who is required to work during a government shutdown.</p><p>Under current law, excepted employees are not paid until the government shutdown is over. This bill provides appropriations to pay excepted employees during a government shutdown. The bill also specifies that the term&nbsp;<em>excepted employee </em>includes certain contractors who suppor

## 2) Build the table using SQL that we want to copy into the Mongo DB

## 3) Load the data into Mongo


In [8]:
host = 'localhost'
port = '27017'
myclient = pymongo.MongoClient(f'mongodb://{MONGO_INITDB_ROOT_USERNAME}:{MONGO_INITDB_ROOT_PASSWORD}@{host}:{port}')

In [9]:
mymongoDBs = myclient['mymongoDBs'] # create a database on the Mongo server called "mymongoDBs"

In [10]:
bills = mymongoDBs['bills'] # creates a collection called "bills"
bills.insert_many(myjson)   

InsertManyResult([ObjectId('6914b2fb62b6c3e20fe7402a'), ObjectId('6914b2fb62b6c3e20fe7402b'), ObjectId('6914b2fb62b6c3e20fe7402c'), ObjectId('6914b2fb62b6c3e20fe7402d'), ObjectId('6914b2fb62b6c3e20fe7402e'), ObjectId('6914b2fb62b6c3e20fe7402f'), ObjectId('6914b2fb62b6c3e20fe74030'), ObjectId('6914b2fb62b6c3e20fe74031'), ObjectId('6914b2fb62b6c3e20fe74032'), ObjectId('6914b2fb62b6c3e20fe74033'), ObjectId('6914b2fb62b6c3e20fe74034'), ObjectId('6914b2fb62b6c3e20fe74035'), ObjectId('6914b2fb62b6c3e20fe74036'), ObjectId('6914b2fb62b6c3e20fe74037'), ObjectId('6914b2fb62b6c3e20fe74038'), ObjectId('6914b2fb62b6c3e20fe74039'), ObjectId('6914b2fb62b6c3e20fe7403a'), ObjectId('6914b2fb62b6c3e20fe7403b'), ObjectId('6914b2fb62b6c3e20fe7403c'), ObjectId('6914b2fb62b6c3e20fe7403d'), ObjectId('6914b2fb62b6c3e20fe7403e'), ObjectId('6914b2fb62b6c3e20fe7403f'), ObjectId('6914b2fb62b6c3e20fe74040'), ObjectId('6914b2fb62b6c3e20fe74041'), ObjectId('6914b2fb62b6c3e20fe74042'), ObjectId('6914b2fb62b6c3e20fe740

## 4) Query the Mongo DB, build a search engine on the text of the bill summaries

In [11]:
from bson.json_util import dumps, loads

In [12]:
row_query = {'state_abbrev': 'VA'}
col_query = {'bill_title': 1,
             'full_name': 1,
             'text': 1,
             '_id': 0}
myquery =bills.find(row_query, col_query)
myquery = dumps(myquery)
myquery = loads(myquery)
pd.DataFrame(myquery)

Unnamed: 0,text,bill_title,full_name
0,<p><strong>Shutdown Guidance for Financial Ins...,Shutdown Guidance for Financial Institutions Act,Suhas Subramanyam
1,<p><strong>Pay Our Troops Act of 2026</strong>...,Pay Our Troops Act of 2026,Jennifer A. Kiggans
2,<p><strong>No Pay for Congress During Default ...,No Pay for Congress During Default or Shutdown...,Eugene Simon Vindman
3,<p><strong>Freedom to Invest in Tomorrow’s Wor...,Freedom to Invest in Tomorrow’s Workforce Act,Robert J. Wittman
4,<p>This joint resolution terminates the nation...,A joint resolution terminating the national em...,Tim Kaine
...,...,...,...
72,<p><strong>Barriers and Regulatory Obstacles A...,BROADBAND Leadership Act,H. Morgan Griffith
73,<p><strong>Reclaiming Congress’s Constitutiona...,Reclaiming Congress’s Constitutional Mandate i...,H. Morgan Griffith
74,<p><b>New Source Review Permitting Improvement...,New Source Review Permitting Improvement Act,H. Morgan Griffith
75,<p><strong>Halt All Lethal Trafficking of Fent...,HALT Fentanyl Act,H. Morgan Griffith


In [13]:

bills.create_index([('text', 'text')])

'text_text'

In [14]:
row_query = {'$text': {'$search': 'tax forest', '$caseSensitive': False}}
col_query = {'bill_title': 1,
             'full_name': 1,
             'text': 1,
             '_id': 0}
myquery =bills.find(row_query, col_query)
myquery = dumps(myquery)
myquery = loads(myquery)
results = pd.DataFrame(myquery)
[print(x) for x in results[['text']]['text'][0:5]]

<p><strong>Proven Forest Management Act of 2025</strong></p><p>This bill sets forth provisions to expedite the approval and implementation of forest management activities and establishes related requirements.</p><p>First, the bill categorically excludes a forest management activity conducted on National Forest System land&nbsp;for reducing forest fuels from certain environmental review requirements under the National Environmental Policy Act of 1969 if the activity (1) does not exceed 10,000 acres (including not more than 3,000 acres of mechanical thinning), (2) is developed in a collaborative manner, and (3) is consistent with the forest plan developed for the relevant National Forest System land. </p><p>Next, the bill directs the Forest Service to conduct&nbsp;forest management activities  in a manner that attains multiple ecosystem benefits unless the costs associated with attaining such benefits are excessive.</p><p>Additionally, the Forest Service must (1) establish any post-progr

[None, None, None, None, None]