# Querying OpenAlex Database with author ids

In [1]:
import requests, json
import csv
import ast



In [3]:
#List of names to test
test_name_list = [
    "william pao",
    "frederick suchy",
    "malcolm cox",
    "nancy cooke",
    "allan sniderman",
    #"vincent dennis",
    #"alessandra pernis",
    #"john minna",
    #"dennis bier",
    #"roger pomerantz"
] #names from asci/aap data

In [4]:
#this cell is an example of how we are calling OpenAlex data
#-----------------TESTING--------------

author_id = "A2242856272"
# specify endpoint
endpoint = 'works'

# build the 'filter' parameter
page = 'page={}'

# put the URL together
filtered_works_url = f'https://api.openalex.org/{endpoint}?filter=author.id:{author_id}&{page}'
print(f'complete URL with filters:\n{filtered_works_url}')

complete URL with filters:
https://api.openalex.org/works?filter=author.id:A2242856272&page={}


In [6]:
#general formula to call data
#endpoint,filterInfo gives acces to table in database, look at documentation and example below
base_url = 'https://api.openalex.org/'
def set_query_call_name(endpoint, filterInfo, name):
    fullquery = base_url+endpoint+'?'+filterInfo+name
    response = requests.get(fullquery)
    visualize_data = response.json()
    return visualize_data
    #print(json.dumps(visualize_data, indent=2)

In [7]:
#finds all work ids by author ids 
def work_id(givenAuthorID):
        page = 'page={}'
        filtered_works_url = f'https://api.openalex.org/works?filter=author.id:{givenAuthorID}&{page}'
        page = 1
        has_more_pages = True
        all_worksID = set()

        # loop through pages
        while has_more_pages:

            # set page value and request page from OpenAlex
            url = filtered_works_url.format(page)
            page_with_results = requests.get(url).json()

            # loop through partial list of results
            results = page_with_results['results']
            for i,work in enumerate(results):
                openalex_id = work['id'].replace("https://openalex.org/", "")
                all_worksID.add(openalex_id)
            # next page
            page += 1

            # end loop when either there are no more results on the requested page 
            # or the next request would exceed 15 results
            per_page = page_with_results['meta']['per_page']
            has_more_pages = len(results) == per_page
        return (all_worksID)
#list_output = work_id('A5044648110')
#print(f'{len(list_output)}')

In [8]:
#gets each publications by its workid
def findWork(workId):
    fullquery = base_url+'works/'+workId
    response = requests.get(fullquery)
    visualize_data = response.json()
    visualize_data.pop("abstract_inverted_index")
    visualize_data.pop("related_works")
    visualize_data.pop("ngrams_url")
    #clean the unicode
    #visualize_data[""]
    return visualize_data
#findWork('W2044177650')
#dont need abstract_inverted_index,related_works data

In [10]:
#opens a csv with name and associated ids, then puts it into a dictionary
#ie. {"alan d'andrea": ['A5045114091', 'A5044648110'], 'james fagin': ['A5080716045', 'A5026026919', 'A5079286906']}
names = {}

with open('openAlex_Ids.csv', newline='') as csvfile:
    reader = csv.reader(csvfile)
    next(reader, None)  
    for row in reader:
        name = row[0]
        openIds_str = row[1]  
        openIds_list = ast.literal_eval(openIds_str)
        names[name] = openIds_list

print(names)


{'Sarki Abdulkadir': ['A5066860794', 'A5089778892', 'A5035943760', 'A5112766814', 'A5099551396'], 'Ash Alizadeh': ['A5007420876', 'A5032767433'], 'Vineet Arora': ['A5038649068', 'A5005082660', 'A5049096869', 'A5102461964', 'A5051552508', 'A5100106033', 'A5114228778', 'A5094607975', 'A5094607926', 'A5037079188', 'A5102707254', 'A5113981273', 'A5085002321'], 'Andrew Auerbach': ['A5045146638', 'A5111240651', 'A5101417559', 'A5008611431', 'A5007217896', 'A5035140943', 'A5051070875'], 'Adam Bass': ['A5079706186', 'A5054530341', 'A5035044470', 'A5046924167', 'A5001175067', 'A5063699613', 'A5019966595', 'A5038002304', 'A5039213176', 'A5113217880'], 'Edward Behrens': ['A5032723566', 'A5067266858', 'A5109243802', 'A5108830810', 'A5078438726', 'A5111105240', 'A5103958677', 'A5114006739', 'A5091847681', 'A5083304548', 'A5007280326', 'A5035708524', 'A5105841414', 'A5106355753'], 'Rameen Beroukhim': ['A5041980697', 'A5112504487', 'A5110664853', 'A5069852085', 'A5103945735'], 'Karl Bilimoria': ['A50

In [None]:
#combined work_id(authorID) and findWork(workID) to find publications by author ids saved in a separte csv file

#testing = {"alan d'andrea": ['A5045114091', 'A5044648110'], 'james fagin': ['A5080716045', 'A5026026919', 'A5079286906']}

personDict = {}
count = 0
error_authors = []

for name, openIds in names.items():
    count += 1
    print(f'{count}. Working to find works by: {name}')
    tempList = []
    for authorID in openIds:
        print(authorID)
        try:
            all_workids = work_id(authorID)
            print(f'{len(all_workids)}')
            for workID in all_workids:
                work_details = findWork(workID)
                tempList.append(work_details)

        except Exception as e:  # Broader exception handling; replace Exception with more specific exceptions as needed
            print(f"Error processing authorID {authorID}: {str(e)}")
            error_authors.append({'name': name, 'authorID': authorID, 'error': str(e)})
    personDict[name] = tempList  # Storing all work details for each author

print(f'=============ERRORS===============')
if len(error_authors) == 0:
    print(f'+++++++++++NONE++++++++++++')
else:
    for error in error_authors:
        print(error)


with open('openAlex_Output.json', 'w', encoding="utf-8") as jsonFile:
    json.dump(personDict, jsonFile, indent=2)      
       

1. Working to find works by: Sarki Abdulkadir
A5066860794
197
Error processing authorID A5066860794: 'ngrams_url'
A5089778892
1
Error processing authorID A5089778892: 'ngrams_url'
A5035943760
2
Error processing authorID A5035943760: 'ngrams_url'
A5112766814
2
Error processing authorID A5112766814: 'ngrams_url'
A5099551396
1
Error processing authorID A5099551396: 'ngrams_url'
2. Working to find works by: Ash Alizadeh
A5007420876
551
Error processing authorID A5007420876: 'ngrams_url'
A5032767433
10
Error processing authorID A5032767433: 'ngrams_url'
3. Working to find works by: Vineet Arora
A5038649068
545
Error processing authorID A5038649068: 'ngrams_url'
A5005082660
6
Error processing authorID A5005082660: 'ngrams_url'
A5049096869
2
Error processing authorID A5049096869: 'ngrams_url'
A5102461964
6
Error processing authorID A5102461964: 'ngrams_url'
A5051552508
1
Error processing authorID A5051552508: 'ngrams_url'
A5100106033
1
Error processing authorID A5100106033: 'ngrams_url'
A5114