In [1]:
import requests
import json
from nameparser import HumanName

In [None]:
#put your list of names in here
names = []

In [4]:
# Initialize dictionary representing a each physician-scientist's queried IDs
# Key:
# - "{first_name} {last_name}" (dict): a dict of dictionaries of possible inventors keyed on id
#     Each dictionary contains:
#     - "id" (str): openAlex inventor id
#     - "counts_by_year" (list): a list of dictionaries documenting activity of inventor
#         Each dictionary contains: 
#         - "year" (int): the year
#         - "works_count" (int): inventor's patent count that year
#         - "cited_by_count" (int): number of papers that cite inventor that year
#     - "first_name" (str): inventor's first name
#     - "last_name" (str): inventor's last name
#     - "created_date" (str): inventor's first seen date
#     - "works_count" (int): inventor's total number of papers
#     - "api" (str): "openAlex"
def author_ids(names):

    # Initialize a dictionary to store the queried IDs for each physician-scientist
    ids_dictionary = {}

    # Initialize a set to store the failed queries
    failed_queries = set()

    for person in names:
        cursor = "*"

        # Get the person's first and last name
        #CHANGE IF THE NAME INCLUDES A MIDDLE NAME
        first_name = person["first_name"].strip(".")
        last_name = person["last_name"].strip(".")

        # Create a query name by combining the person's first and last name
        author_name = first_name + " " + last_name

        # Keep making queries until the request is successful or the maximum number of queries is reached
        while True:
            query_author = f'https://api.openalex.org/authors?search={author_name}&per_page=100&cursor={cursor}'
            
            try:
                # Make the API request
                response = requests.get(query_author)


                # Check if the request was successful
                if response.status_code == 200:
                    # Get the list of inventors from the response
                    authors = response.json()["results"]

                    # Iterate over the author and add them to the dictionary
                    for author in authors:
                        name = HumanName(author.pop("display_name"))
        
                        # remove unnecessary features
                        author.pop("display_name_alternatives")
                        author.pop("orcid")
                        author.pop("summary_stats")
                        author.pop("x_concepts")
                        author.pop("works_api_url")
                        
                        author["id"] = author["id"][21:]
                        author["name_first"] = name.first if len(name.first) > 1 else None
                        author["name_middle"] = name.middle if len(name.middle) > 1 else None
                        author["name_last"] = name.last if len(name.last) > 1 else None
                        author["works"] = []
                        
                        author["api"] = "openAlex"
                        
                        if author_name not in ids_dictionary:
                            ids_dictionary[author_name] = {}
                        
                        if author["id"] not in ids_dictionary[author_name]:
                            ids_dictionary[author_name][author["id"]] = {}

                        ids_dictionary[author_name][author["id"]] = author

                # Break out of the loop if there are no more authors
                if not authors:
                    break

                # Update the query parameters with the next `cursor` value
                cursor = response.json()["meta"]["next_cursor"]
                    
            # If the request is unsuccessful, add the query name and `after` value to the failed queries set
            except Exception as e:
                print(e)
                failed_queries.add((author_name, cursor))
                break

        print(author_name + " done")

    # Print the failed queries
    print(f"Failed queries: {failed_queries}")
    return ids_dictionary, failed_queries

Kjersti Aagaard done
Derek Abbott done
Francois Abboud done
Hanna Abboud done
Omar Abdel-Wahab done
Sarki Abdulkadir done
E Abel done
Evan Abel done
John Abel done
Walter Abelmann done
Failed queries: set()


In [5]:
ids_dictionary, failed_queries =author_ids(names)
with open("openAlex_ids.json", "w") as f:
    json.dump(ids_dictionary, f, indent=4)

with open("openAlex_failed_queries.json", "w") as f:
    json.dump(list(failed_queries), f, indent=4)
