# Lobbying Data Formatting
- Get names/IDs of lobbyists ("registrant") and who they contributed to ("contribution_items")
    - Both in contribution data
- Save the PACS info as well if applicable

## Initial File Exploration

In [111]:
import json
import pandas as pd

In [112]:
lobbyist_json = "2021_lobbyists.json"

In [113]:
with open(lobbyist_json) as f:
    info = json.load(f)

In [114]:
lobbyist_data = info['results']

In [115]:
print(lobbyist_data[0])

{'url': 'https://lda.senate.gov/api/v1/filings/2da8c97a-0c80-4137-a257-e760cacb7c67/', 'filing_uuid': '2da8c97a-0c80-4137-a257-e760cacb7c67', 'filing_type': 'RR', 'filing_type_display': 'Registration', 'filing_year': 2021, 'filing_period': 'first_quarter', 'filing_period_display': '1st Quarter (Jan 1 - Mar 31)', 'filing_document_url': 'https://lda.senate.gov/filings/public/filing/2da8c97a-0c80-4137-a257-e760cacb7c67/print/', 'filing_document_content_type': 'text/html', 'income': None, 'expenses': None, 'expenses_method': None, 'expenses_method_display': None, 'posted_by_name': 'Amanda L. Wood', 'dt_posted': '2021-01-04T12:51:26-05:00', 'termination_date': None, 'registrant': {'id': 283696, 'url': 'https://lda.senate.gov/api/v1/registrants/283696/', 'house_registrant_id': 36844, 'name': 'BECKER & POLIAKOFF, P.A.', 'description': 'Law firm', 'address_1': '1275 K Street, NW', 'address_2': 'Suite 850', 'address_3': None, 'address_4': None, 'city': 'Washington', 'state': 'DC', 'state_displa

In [116]:
lobbyist_df = pd.DataFrame(lobbyist_data)

In [117]:
lobbyist_df.head()

Unnamed: 0,url,filing_uuid,filing_type,filing_type_display,filing_year,filing_period,filing_period_display,filing_document_url,filing_document_content_type,income,...,expenses_method_display,posted_by_name,dt_posted,termination_date,registrant,client,lobbying_activities,conviction_disclosures,foreign_entities,affiliated_organizations
0,https://lda.senate.gov/api/v1/filings/2da8c97a...,2da8c97a-0c80-4137-a257-e760cacb7c67,RR,Registration,2021,first_quarter,1st Quarter (Jan 1 - Mar 31),https://lda.senate.gov/filings/public/filing/2...,text/html,,...,,Amanda L. Wood,2021-01-04T12:51:26-05:00,,"{'id': 283696, 'url': 'https://lda.senate.gov/...","{'id': 207872, 'url': 'https://lda.senate.gov/...","[{'general_issue_code': 'ENV', 'general_issue_...",[],[],[]
1,https://lda.senate.gov/api/v1/filings/afd3c75b...,afd3c75b-de52-47ba-b262-1700d3667796,1T,1st Quarter - Termination,2021,first_quarter,1st Quarter (Jan 1 - Mar 31),https://lda.senate.gov/filings/public/filing/a...,text/html,15000.0,...,,Jeff Hogg,2021-01-05T11:16:52.930000-05:00,2021-01-01,"{'id': 401105690, 'url': 'https://lda.senate.g...","{'id': 205894, 'url': 'https://lda.senate.gov/...","[{'general_issue_code': 'TRD', 'general_issue_...",[],[],[]
2,https://lda.senate.gov/api/v1/filings/187c32ea...,187c32ea-632b-42a5-8536-6cacc7c68ff4,RR,Registration,2021,first_quarter,1st Quarter (Jan 1 - Mar 31),https://lda.senate.gov/filings/public/filing/1...,text/html,,...,,Donald R. Bramer,2021-01-04T18:58:15-05:00,,"{'id': 401103355, 'url': 'https://lda.senate.g...","{'id': 207882, 'url': 'https://lda.senate.gov/...","[{'general_issue_code': 'MAN', 'general_issue_...",[],[],[]
3,https://lda.senate.gov/api/v1/filings/58097464...,58097464-e7ec-4001-b12b-489948eabc82,RR,Registration,2021,first_quarter,1st Quarter (Jan 1 - Mar 31),https://lda.senate.gov/filings/public/filing/5...,text/html,,...,,Ronald K Hamm,2021-01-04T15:24:01-05:00,,"{'id': 400850504, 'url': 'https://lda.senate.g...","{'id': 207880, 'url': 'https://lda.senate.gov/...","[{'general_issue_code': 'TAX', 'general_issue_...",[],[],[]
4,https://lda.senate.gov/api/v1/filings/e60921f4...,e60921f4-3501-4b58-b33d-36b223457417,RR,Registration,2021,first_quarter,1st Quarter (Jan 1 - Mar 31),https://lda.senate.gov/filings/public/filing/e...,text/html,,...,,Ronald Hamm,2021-01-04T22:18:52-05:00,,"{'id': 400850504, 'url': 'https://lda.senate.g...","{'id': 207881, 'url': 'https://lda.senate.gov/...","[{'general_issue_code': 'URB', 'general_issue_...",[],[],[]


In [118]:
#lobbyist_df["lobbying_activities"][1]

In [119]:
l_names = []
l_ids = []
issue_codes = []
issue_descs = []

for row in lobbyist_df["lobbying_activities"]:
    for act in range(len(row)):
        #print(row[act])
        issue_code = row[act]["general_issue_code"]
        issue_desc = row[act]["general_issue_code_display"]
        num_lobbyists = len(row[act]["lobbyists"])
        issue_codes += [issue_code] * num_lobbyists
        issue_descs += [issue_desc] * num_lobbyists
        for lobbyist in row[act]["lobbyists"]:
            l = lobbyist["lobbyist"]
            l_id = l["id"]
            l_name = l["first_name"] + " " + l["last_name"]
            l_names.append(l_name)
            l_ids.append(l_id)

#print(l_names)
#print(l_ids)
#print(issue_codes)
#print(issue_descs)

In [120]:
lobby_df = pd.DataFrame({"name": l_names, "id": l_ids, "issue_code": issue_codes, "issue_desc":issue_descs})

In [121]:
lobby_df.head()

Unnamed: 0,name,id,issue_code,issue_desc
0,CLARENCE WILLIAMS,59711,ENV,Environment/Superfund
1,AMANDA WOOD,59585,ENV,Environment/Superfund
2,OMAR FRANCO,53996,ENV,Environment/Superfund
3,CLARENCE WILLIAMS,59711,TAX,Taxation/Internal Revenue Code
4,AMANDA WOOD,59585,TAX,Taxation/Internal Revenue Code


In [248]:
lobby_df.to_csv("lobbyists_2019.csv")

In [249]:
lobbyist_df.columns

Index(['url', 'filing_uuid', 'filing_type', 'filing_type_display',
       'filing_year', 'filing_period', 'filing_period_display',
       'filing_document_url', 'filing_document_content_type', 'income',
       'expenses', 'expenses_method', 'expenses_method_display',
       'posted_by_name', 'dt_posted', 'termination_date', 'registrant',
       'client', 'lobbying_activities', 'conviction_disclosures',
       'foreign_entities', 'affiliated_organizations'],
      dtype='object')

In [250]:
lobbyist_df["lobbying_activities"].iloc[1]

[{'general_issue_code': 'TRD',
  'general_issue_code_display': 'Trade (domestic/foreign)',
  'description': 'International wildlife trade.',
  'foreign_entity_issues': '',
  'lobbyists': [{'lobbyist': {'id': 73661,
     'prefix': None,
     'prefix_display': None,
     'first_name': 'JEFF',
     'nickname': None,
     'middle_name': None,
     'last_name': 'HOGG',
     'suffix': None,
     'suffix_display': None},
    'covered_position': None,
    'new': False}],
  'government_entities': [{'id': 2, 'name': 'HOUSE OF REPRESENTATIVES'},
   {'id': 1, 'name': 'SENATE'}]},
 {'general_issue_code': 'ANI',
  'general_issue_code_display': 'Animals',
  'description': 'International animal trade.',
  'foreign_entity_issues': '',
  'lobbyists': [{'lobbyist': {'id': 73661,
     'prefix': None,
     'prefix_display': None,
     'first_name': 'JEFF',
     'nickname': None,
     'middle_name': None,
     'last_name': 'HOGG',
     'suffix': None,
     'suffix_display': None},
    'covered_position': No

In [251]:
lobbyist_df.shape

(25, 22)

In [253]:
contributions_json = "2019_contributions.json"

with open(contributions_json) as j:
    contrib_data = json.load(j)

In [254]:
contrib_df = pd.DataFrame(contrib_data)

In [255]:
contrib_df.head(10)

Unnamed: 0,url,filing_uuid,filing_type,filing_type_display,filing_year,filing_period,filing_period_display,filing_document_url,filing_document_content_type,filer_type,...,state,state_display,zip,country,country_display,registrant,lobbyist,no_contributions,pacs,contribution_items
0,https://lda.senate.gov/api/v1/contributions/42...,42f421fe-4d3f-4c0f-8658-ab278f555279,MM,Mid-Year Report,2019,mid_year,Mid-Year (Jan 1 - Jun 30),https://lda.senate.gov/filings/public/contribu...,text/html,lobbyist,...,VA,Virginia,22314,US,United States of America,"{'id': 65191, 'url': 'https://lda.senate.gov/a...","{'id': 69168, 'prefix': None, 'prefix_display'...",True,[],[]
1,https://lda.senate.gov/api/v1/contributions/3a...,3a2a92b1-68b1-40ab-88ff-ad233fec6bdf,MM,Mid-Year Report,2019,mid_year,Mid-Year (Jan 1 - Jun 30),https://lda.senate.gov/filings/public/contribu...,text/html,lobbyist,...,DC,District of Columbia,20001,US,United States of America,"{'id': 21027, 'url': 'https://lda.senate.gov/a...","{'id': 7484, 'prefix': 'mr', 'prefix_display':...",True,[],[]
2,https://lda.senate.gov/api/v1/contributions/29...,2994032e-ef18-40c7-bbc6-15124d12e6a4,MM,Mid-Year Report,2019,mid_year,Mid-Year (Jan 1 - Jun 30),https://lda.senate.gov/filings/public/contribu...,text/html,lobbyist,...,MD,Maryland,20850,US,United States of America,"{'id': 25055, 'url': 'https://lda.senate.gov/a...","{'id': 57861, 'prefix': 'mr', 'prefix_display'...",True,[],[]
3,https://lda.senate.gov/api/v1/contributions/8a...,8a1002b1-a7fa-4597-96ef-adae9bd9ec78,MM,Mid-Year Report,2019,mid_year,Mid-Year (Jan 1 - Jun 30),https://lda.senate.gov/filings/public/contribu...,text/html,lobbyist,...,DC,District of Columbia,20005,US,United States of America,"{'id': 320793, 'url': 'https://lda.senate.gov/...","{'id': 63507, 'prefix': None, 'prefix_display'...",True,[],[]
4,https://lda.senate.gov/api/v1/contributions/c1...,c1837954-6d4c-4711-a20d-f2b56d533770,MM,Mid-Year Report,2019,mid_year,Mid-Year (Jan 1 - Jun 30),https://lda.senate.gov/filings/public/contribu...,text/html,lobbyist,...,DC,District of Columbia,20004,US,United States of America,"{'id': 37948, 'url': 'https://lda.senate.gov/a...","{'id': 41484, 'prefix': None, 'prefix_display'...",True,[],[]
5,https://lda.senate.gov/api/v1/contributions/ec...,ec5079b4-853d-4eae-9d5f-5e65548de67b,MM,Mid-Year Report,2019,mid_year,Mid-Year (Jan 1 - Jun 30),https://lda.senate.gov/filings/public/contribu...,text/html,lobbyist,...,DC,District of Columbia,20005,US,United States of America,"{'id': 7257, 'url': 'https://lda.senate.gov/ap...","{'id': 62502, 'prefix': None, 'prefix_display'...",True,[],[]
6,https://lda.senate.gov/api/v1/contributions/31...,31c8c692-0d53-4e14-bfc3-0f606fa86e3d,MM,Mid-Year Report,2019,mid_year,Mid-Year (Jan 1 - Jun 30),https://lda.senate.gov/filings/public/contribu...,text/html,lobbyist,...,DC,District of Columbia,20002,US,United States of America,"{'id': 5506, 'url': 'https://lda.senate.gov/ap...","{'id': 68338, 'prefix': None, 'prefix_display'...",True,[],[]
7,https://lda.senate.gov/api/v1/contributions/f0...,f0f2fcde-a99a-43c9-b75a-97bfb8fd820c,MM,Mid-Year Report,2019,mid_year,Mid-Year (Jan 1 - Jun 30),https://lda.senate.gov/filings/public/contribu...,text/html,organization,...,DC,District of Columbia,20003,US,United States of America,"{'id': 48805, 'url': 'https://lda.senate.gov/a...",,False,[],"[{'contribution_type': 'feca', 'contribution_t..."
8,https://lda.senate.gov/api/v1/contributions/04...,04433f09-b34f-4e42-990b-3981d5dc1921,MM,Mid-Year Report,2019,mid_year,Mid-Year (Jan 1 - Jun 30),https://lda.senate.gov/filings/public/contribu...,text/html,lobbyist,...,VA,Virginia,22203,US,United States of America,"{'id': 301573, 'url': 'https://lda.senate.gov/...","{'id': 65598, 'prefix': None, 'prefix_display'...",True,[],[]
9,https://lda.senate.gov/api/v1/contributions/f7...,f784c0fd-6500-45bc-af46-125ffeba86a2,MM,Mid-Year Report,2019,mid_year,Mid-Year (Jan 1 - Jun 30),https://lda.senate.gov/filings/public/contribu...,text/html,lobbyist,...,DC,District of Columbia,20001,US,United States of America,"{'id': 2877, 'url': 'https://lda.senate.gov/ap...","{'id': 2879, 'prefix': 'mr', 'prefix_display':...",True,[],[]


In [256]:
made_contrib = contrib_df[contrib_df["no_contributions"] == False].copy()

In [257]:
made_contrib = made_contrib.reset_index()
made_contrib.head()

Unnamed: 0,index,url,filing_uuid,filing_type,filing_type_display,filing_year,filing_period,filing_period_display,filing_document_url,filing_document_content_type,...,state,state_display,zip,country,country_display,registrant,lobbyist,no_contributions,pacs,contribution_items
0,7,https://lda.senate.gov/api/v1/contributions/f0...,f0f2fcde-a99a-43c9-b75a-97bfb8fd820c,MM,Mid-Year Report,2019,mid_year,Mid-Year (Jan 1 - Jun 30),https://lda.senate.gov/filings/public/contribu...,text/html,...,DC,District of Columbia,20003,US,United States of America,"{'id': 48805, 'url': 'https://lda.senate.gov/a...",,False,[],"[{'contribution_type': 'feca', 'contribution_t..."
1,27,https://lda.senate.gov/api/v1/contributions/1d...,1dded0ed-8b35-4c1f-ad31-7c2d241ccba8,MM,Mid-Year Report,2019,mid_year,Mid-Year (Jan 1 - Jun 30),https://lda.senate.gov/filings/public/contribu...,text/html,...,DC,District of Columbia,20036,US,United States of America,"{'id': 6256, 'url': 'https://lda.senate.gov/ap...","{'id': 62131, 'prefix': None, 'prefix_display'...",False,[],"[{'contribution_type': 'feca', 'contribution_t..."
2,29,https://lda.senate.gov/api/v1/contributions/7b...,7b01d95b-3b44-481c-8caa-9da46f65d110,MA,Mid-Year Amendment,2019,mid_year,Mid-Year (Jan 1 - Jun 30),https://lda.senate.gov/filings/public/contribu...,text/html,...,DC,District of Columbia,20003,US,United States of America,"{'id': 38687, 'url': 'https://lda.senate.gov/a...","{'id': 7922, 'prefix': 'ms', 'prefix_display':...",False,[],"[{'contribution_type': 'feca', 'contribution_t..."
3,30,https://lda.senate.gov/api/v1/contributions/c8...,c8d7f1c6-c357-4116-baa9-6388013a9a6a,MM,Mid-Year Report,2019,mid_year,Mid-Year (Jan 1 - Jun 30),https://lda.senate.gov/filings/public/contribu...,text/html,...,DC,District of Columbia,20003,US,United States of America,"{'id': 38687, 'url': 'https://lda.senate.gov/a...","{'id': 7922, 'prefix': 'ms', 'prefix_display':...",False,[],"[{'contribution_type': 'feca', 'contribution_t..."
4,32,https://lda.senate.gov/api/v1/contributions/fd...,fdea3d75-044c-4dea-8f4f-061d58d4d289,MM,Mid-Year Report,2019,mid_year,Mid-Year (Jan 1 - Jun 30),https://lda.senate.gov/filings/public/contribu...,text/html,...,VA,Virginia,22205,US,United States of America,"{'id': 400745228, 'url': 'https://lda.senate.g...","{'id': 69446, 'prefix': None, 'prefix_display'...",False,[],"[{'contribution_type': 'feca', 'contribution_t..."


In [258]:
lobbyist_id = []
lobbyist_name = []
for row in made_contrib["lobbyist"]:
    if row != None:
        lobbyist_id.append(row["id"])
        lobbyist_name.append(row["first_name"] + " " + row["last_name"])
    else:
        lobbyist_id.append("")
        lobbyist_name.append("")

made_contrib["lobbyist_name"] = lobbyist_name
made_contrib["lobbyist_id"] = lobbyist_id

In [259]:
registrant_names = []
house_registrant_id = []
senate_registrant_id = []
pacs = []
con_items = []
lobbyist_id = []
lobbyist_name = []

In [260]:
for row in made_contrib.itertuples():
    #print(row)
    #break
    registrant = row[24]
    pac = row[27]
    contribution_items = row[28]
    con_items += contribution_items
    registrant_names += [registrant["name"]] * len(contribution_items)
    house_registrant_id += [registrant["house_registrant_id"]] * len(contribution_items)
    senate_registrant_id += [registrant["id"]] * len(contribution_items)
    lobbyist_id += [row[30]] * len(contribution_items)
    lobbyist_name += [row[29]] * len(contribution_items)
    if pac != []:
        pacs += [pac] * len(contribution_items)
    else:
        pacs += ["None"] * len(contribution_items)

In [261]:
con_df = pd.DataFrame(con_items)

In [262]:
con_df.head()

Unnamed: 0,contribution_type,contribution_type_display,contributor_name,payee_name,honoree_name,amount,date
0,feca,FECA,WAYNE STATE UNIVERSITY DETROIT MICHIGAN,"JAMES M. WILLIAMS, JR.",Kamala Harris,110.0,2019-01-22
1,feca,FECA,"LISTER, JAMES HARDWICK",ALASKANS FOR DON YOUNG,Don Young,500.0,2019-03-05
2,feca,FECA,"LOWE, JOCELYN HONG",XOCHITL FOR NEW MEXICO,Xochitl Torres Small,100.0,2019-03-13
3,feca,FECA,"LOWE, JOCELYN HONG",STEPHANIE MURPHY FOR CONGRESS,Stephanie Murphy,100.0,2019-03-13
4,feca,FECA,"LOWE, JOCELYN HONG",AT THE TABLE,PAC,100.0,2019-03-13


In [246]:
con_df["pac"] = pacs
con_df["registrant_house_id"] = house_registrant_id
con_df["senate_registrant_id"] = senate_registrant_id
con_df["registrant"] = registrant_names
con_df["lobbyist_id"] = lobbyist_id
con_df["lobbyist_name"] = lobbyist_name

In [263]:
con_df.to_csv("contributions_2019.csv")

## Contributor JSON File Downloads

In [None]:
import requests
import json

In [None]:
contributions_url = "https://lda.senate.gov/api/v1/contributions/"
filing_years = [2019, 2020, 2021]
api_key = "1620156b847c54ddc0bd6c5ead8e2113cdb8cd4d"

In [None]:
# Make initial request for given year
# Write obtained info to the corresponding file
# Get the value for the next page
# If no value exists (next = null), then break
# Otherwise, make new request and repeat

In [None]:
#params = {"filing_year": 2021, "page": 1}
#all_results = []
#while True:
#    r = requests.get(contributions_url, params, headers={"api_key":api_key})
#    data = r.json()
#    #print(data)
#    if "results" in list(data.keys()):
#        all_results += data["results"]
#    if "next" not in list(data.keys()):
#        break
#    params["page"] += 1
#    if params["page"] % 10 == 0:
#        print(params["page"])

In [None]:
#with open("2021_cotributions.json", mode="w+") as f:
#    f.write("[")
#    for ind, item in enumerate(all_results):
#        f.write(json.dumps(item))
#        if ind != len(all_results) - 1:
#            f.write(",")
#        f.write("\n")
#    f.write("]")