In [1]:
import pandas as pd
import requests
import time

### Intended output
- Project Number
- Agency
- Title
- Department
- Financial Year
- Total Cost Amount
- Abstract


In [2]:
# Project award criteria
agency = 'NIH'
states = ['NY', 'DE', 'MD', 'NJ', 'PA', 'CT', 'RI', 'MA', 'VT', 'NH', 'ME']
fy = '2019'

In [3]:
cols = ['project_num', 'agency', 'title', 'department', 'fy', 'total_cost', 'abstract', 'org_state', 'cong_district']

In [4]:
award_df = pd.DataFrame(columns=cols)

In [99]:
base_url = 'https://api.federalreporter.nih.gov/v1/projects/'
search_start = 'search?query='
query_p1 = 'fy:{}'.format(fy)
query_p2 = '$agency:{}'.format(agency)
query_p3 = '$orgstate:{}$'.format(','.join(states))
offset = '&offset=1'
limit = '&limit=50'

full_url = base_url + search_start + query_p1 + query_p2 + query_p3 + offset + limit

In [100]:
full_url

'https://api.federalreporter.nih.gov/v1/projects/search?query=fy:2019$agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$&offset=1&limit=50'

In [101]:
r = requests.get(full_url)

In [103]:
r.json()

{'totalCount': 24803,
 'offset': 1,
 'limit': 50,
 'totalPages': 497,
 'items': [{'projectNumber': '1R21AT010515-01',
   'fy': 2019,
   'title': 'MINDFULNESS AND ROMANTIC RELATIONSHIP QUALITY',
   'department': 'HHS',
   'agency': 'NIH',
   'ic': 'NCCIH',
   'totalCostAmount': 202689,
   'nihApplId': 9808833,
   'smApplId': 1171386,
   'budgetStartDate': '2019-08-09T00:00:00',
   'budgetEndDate': '2020-07-31T00:00:00',
   'contactPi': 'JAREMKA, LISA',
   'otherPis': None,
   'congressionalDistrict': '00',
   'dunsId': '059007500',
   'latitude': 39.714507,
   'longitude': -75.738715,
   'orgName': 'UNIVERSITY OF DELAWARE',
   'orgCity': 'NEWARK',
   'orgState': 'DE',
   'orgCountry': 'UNITED STATES',
   'orgZipCode': '197160000',
   'projectStartDate': '2019-08-09T00:00:00',
   'projectEndDate': '2021-07-31T00:00:00',
   'cfdaCode': '213',
   'abstract': 'One of the most robust findings in health psychology is that poor quality relationships place people at risk for an array of disease

In [26]:
param_dict = {'agency': ['NIH'],
              'orgstate': ['NY', 'DE', 'MD', 'NJ', 'PA', 'CT', 'RI', 'MA', 'VT', 'NH', 'ME'],
              'fy': ['2019']}

In [37]:
for param in param_dict.keys():
    print(param + ':' + ','.join(param_dict[param]))

agency:NIH
orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME
fy:2019


In [124]:
def url_construct(param_dict, offset=1, limit=50):
    url = base_url + search_start
    for param in param_dict.keys():
        full_param_string = param + ':' + ','.join(param_dict[param]) + '$'
        url += full_param_string
    offset_string = '&offset=' + str(offset)
    limit_string = '&limit=' + str(limit)
    url += offset_string + limit_string
    return url

In [125]:
def get_pages(url):
    r = requests.get(url)
    return r.json()['totalPages']

In [126]:
def construct_list(result_list, param_dict):
    call = url_construct(param_dict)
    pages = get_pages(call)
    offset = 1
    limit = 50
    curr_page = 1
    while curr_page <= pages:
        call = url_construct(param_dict, offset=offset, limit=limit)
        r = requests.get(call)
        awards = r.json()['items']
        for award in awards:
            try:
                df_row = {'project_num': award['projectNumber'], 
                          'agency': award['agency'], 
                          'title': award['title'], 
                          'department': award['department'], 
                          'fy': award['fy'], 
                          'total_cost': award['totalCostAmount'], 
                          'abstract': award['abstract'], 
                          'org_state': cong_district, 
                          'cong_district': award['congressionalDistrict']
                        }
                                
                result_list.append(df_row)

            except:
                print('Could not parse project ' + award['projectNumber'])
        offset += 50
        curr_page += 1
        time.sleep(1)

In [127]:
result_list=[]
construct_list(result_list, param_dict)

URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=1&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=1&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=51&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=101&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=151&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=201&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NI

URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=2651&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=2701&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=2751&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=2801&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=2851&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=2901&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?quer

URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=5301&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=5351&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=5401&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=5451&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=5501&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=5551&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?quer

URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=7951&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=8001&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=8051&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=8101&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=8151&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=8201&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?quer

URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=10601&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=10651&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=10701&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=10751&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=10801&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=10851&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/searc

URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=13251&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=13301&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=13351&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=13401&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=13451&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=13501&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/searc

URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=15901&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=15951&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=16001&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=16051&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=16101&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=16151&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/searc

URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=18551&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=18601&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=18651&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=18701&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=18751&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=18801&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/searc

URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=21201&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=21251&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=21301&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=21351&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=21401&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=21451&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/searc

URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=23851&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=23901&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=23951&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=24001&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=24051&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/search?query=agency:NIH$orgstate:NY,DE,MD,NJ,PA,CT,RI,MA,VT,NH,ME$fy:2019$&offset=24101&limit=50
URL used:  https://api.federalreporter.nih.gov/v1/projects/searc

In [149]:
def cong_district_fix(award_row):
    if award_row['org_state'] in ['DE', 'VT']:
        return '01'
    else:
        return award_row['cong_district']

In [151]:
award_df = pd.DataFrame(result_list)
award_df['cong_district'] = award_df.apply(lambda x: cong_district_fix(x), axis=1)

display(award_df.head())
display(award_df.info())

Unnamed: 0,project_num,agency,title,department,fy,total_cost,abstract,org_state,cong_district
0,1R21AT010515-01,NIH,MINDFULNESS AND ROMANTIC RELATIONSHIP QUALITY,HHS,2019,202689.0,One of the most robust findings in health psyc...,DE,1
1,5R01AT009720-02,NIH,ASSOCIATION BETWEEN CERVICAL SPINAL MANIPULATI...,HHS,2019,609263.0,"Project Summary:Over the last 30 years, spine ...",NH,2
2,4R33AT010117-02,NIH,MINDFUL MOMS IN RECOVERY: YOGA-BASED MINDFULNE...,HHS,2019,818003.0,Project Summary/AbstractNew Hampshire has seco...,NH,2
3,5R01LM012527-03,NIH,INCORPORATING IMAGE-BASED FEATURES INTO BIOMED...,HHS,2019,463024.0,The proposed research aims to develop and adva...,DE,1
4,1R21AT010366-01A1,NIH,DIETARY FIBER TO MITIGATE ANTIBIOTIC-INDUCED M...,HHS,2019,243750.0,To enhance the use of currently available anti...,RI,1


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24803 entries, 0 to 24802
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   project_num    24803 non-null  object 
 1   agency         24803 non-null  object 
 2   title          24803 non-null  object 
 3   department     24803 non-null  object 
 4   fy             24803 non-null  int64  
 5   total_cost     24796 non-null  float64
 6   abstract       24803 non-null  object 
 7   org_state      24803 non-null  object 
 8   cong_district  24786 non-null  object 
dtypes: float64(1), int64(1), object(7)
memory usage: 1.7+ MB


None

In [145]:
award_df[award_df['cong_district'].isna()].groupby('org_state').count()

Unnamed: 0_level_0,project_num,agency,title,department,fy,total_cost,abstract,cong_district
org_state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
MA,2,2,2,2,2,2,2,0
MD,9,9,9,9,9,9,9,0
NY,4,4,4,4,4,4,4,0
PA,2,2,2,2,2,2,2,0


In [129]:
award_df[award_df.duplicated()]

Unnamed: 0,project_num,agency,title,department,fy,total_cost,abstract,org_state,cong_district


In [130]:
award_df.to_pickle('award_df.pkl')

In [133]:
award_df[['org_state', 'cong_district', 'project_num']].groupby(['org_state', 'cong_district']).count()[50:]

Unnamed: 0_level_0,Unnamed: 1_level_0,project_num
org_state,cong_district,Unnamed: 2_level_1
NY,14,473
NY,15,4
NY,16,2
NY,17,69
NY,18,5
NY,19,1
NY,20,178
NY,21,5
NY,22,36
NY,23,272


In [152]:
def district_to_str(district_int):
    if district_int < 10:
        return '0' + str(district_int)
    else:
        return str(district_int)

In [153]:
legislator_df = pd.read_csv('legislators.csv')
legislator_df['congressional_district'] = legislator_df['congressional_district'].apply(lambda x: district_to_str(x))

display(legislator_df.head())
display(legislator_df.info())

Unnamed: 0,state,congressional_district,legislator_name,party_code
0,CT,5,Jahana Hayes,D
1,CT,3,Rosa DeLauro,D
2,CT,2,Joe Courtney,D
3,CT,1,John Larson,D
4,CT,4,Jim Himes,D


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 90 entries, 0 to 89
Data columns (total 4 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   state                   90 non-null     object
 1   congressional_district  90 non-null     object
 2   legislator_name         90 non-null     object
 3   party_code              90 non-null     object
dtypes: object(4)
memory usage: 2.9+ KB


None

In [160]:
leg_award_df = legislator_df.merge(award_df, 
                                   how='left', 
                                   left_on=['state', 'congressional_district'], 
                                   right_on=['org_state', 'cong_district'])

display(leg_award_df.head())
display(leg_award_df.info())

Unnamed: 0,state,congressional_district,legislator_name,party_code,project_num,agency,title,department,fy,total_cost,abstract,org_state,cong_district
0,CT,5,Jahana Hayes,D,5R01MD013550-02,NIH,A REINFORCEMENT INTERVENTION FOR INCREASING HI...,HHS,2019.0,570241.0,"Abstract African American and Latina women, as...",CT,5
1,CT,5,Jahana Hayes,D,5U41HG009889-02 (5328),NIH,A COMPREHENSIVE FUNCTIONAL MAP OF HUMAN PROTEI...,HHS,2019.0,1154631.0,PRODUCTION CORE – PROJECT SUMMARYThe objective...,CT,5
2,CT,5,Jahana Hayes,D,5U41HG009889-02 (5329),NIH,A COMPREHENSIVE FUNCTIONAL MAP OF HUMAN PROTEI...,HHS,2019.0,512412.0,RESOURCE PROJECT - PROJECT SUMMARYThe overall ...,CT,5
3,CT,5,Jahana Hayes,D,5U41HG009889-02 (5327),NIH,A COMPREHENSIVE FUNCTIONAL MAP OF HUMAN PROTEI...,HHS,2019.0,602668.0,"MANAGEMENT, DISSEMINATION AND TRAINING – PROJE...",CT,5
4,CT,5,Jahana Hayes,D,5U41HG009889-02,NIH,A COMPREHENSIVE FUNCTIONAL MAP OF HUMAN PROTEI...,HHS,2019.0,2269711.0,OVERALL – PROJECT SUMMARYThe objective of the ...,CT,5


<class 'pandas.core.frame.DataFrame'>
Int64Index: 27331 entries, 0 to 27330
Data columns (total 13 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   state                   27331 non-null  object 
 1   congressional_district  27331 non-null  object 
 2   legislator_name         27331 non-null  object 
 3   party_code              27331 non-null  object 
 4   project_num             27327 non-null  object 
 5   agency                  27327 non-null  object 
 6   title                   27327 non-null  object 
 7   department              27327 non-null  object 
 8   fy                      27327 non-null  float64
 9   total_cost              27320 non-null  float64
 10  abstract                27327 non-null  object 
 11  org_state               27327 non-null  object 
 12  cong_district           27327 non-null  object 
dtypes: float64(2), object(11)
memory usage: 2.9+ MB


None

In [164]:
leg_award_df[leg_award_df.project_num == '1R01AT010333-01A1']

Unnamed: 0,state,congressional_district,legislator_name,party_code,project_num,agency,title,department,fy,total_cost,abstract,org_state,cong_district
8253,MD,7,Kweisi Mfume,D,1R01AT010333-01A1,NIH,NEURAL CORRELATES OF HYPOALGESIA DRIVEN BY OBS...,HHS,2019.0,736899.0,Project SummaryPlacebo effects held an ambival...,MD,7
10570,MD,7,Elijah Cummings,D,1R01AT010333-01A1,NIH,NEURAL CORRELATES OF HYPOALGESIA DRIVEN BY OBS...,HHS,2019.0,736899.0,Project SummaryPlacebo effects held an ambival...,MD,7


In [162]:
leg_award_df.to_csv('legislators_awards.csv')

In [167]:
legislator_df[legislator_df[['state', 'congressional_district']].duplicated(keep=False)]

Unnamed: 0,state,congressional_district,legislator_name,party_code
17,MD,7,Kweisi Mfume,D
18,MD,7,Elijah Cummings,D
53,NY,27,Chris Jacobs,R
66,NY,27,Chris Collins,R
82,PA,12,Fred Keller,R
86,PA,12,Tom Marino,R
