In [2]:
import requests
import json
from nameparser import HumanName
import csv

In [3]:
#put your list of names in here
names = []

In [6]:
#RUN ONLY IF YOUR NAMES ARE IN A CSV, IF NOT MOVE TO NEXT CELL
def read_names_from_csv(file_path, encoding='utf-8'):
    all_names = []

    with open(file_path, 'r', encoding=encoding, errors='ignore') as csv_file:
        csv_reader = csv.reader(csv_file)
        
        # Assuming the first column contains first names and the second column contains last names
        for row in csv_reader:
            try:
                if len(row) >= 2:  # Ensure the row has at least two columns
                    first_names = row[1]
                    last_names = row[3] #May need to adjust these indexes depending on the format of your csv. Check with the print statements below
                    full_name = first_names + " " + last_names
                    all_names.append(full_name)
            except  UnicodeDecodeError as e:
                print(f"Error decoding line {csv_reader.line_num}: {e}")

    return all_names

# Example usage
file_path = r'150physicianscientists.csv'
all_names = read_names_from_csv(file_path)
all_names = all_names[1:]

# Now you can use the lists first_names and last_names as needed
print(f'ALL NAMES({len(all_names)}) :')
print("======================================================")
print(all_names)

ALL NAMES(150) :
['Sarki Abdulkadir', 'Ash Alizadeh', 'Vineet Arora', 'Andrew Auerbach', 'Adam Bass', 'Edward Behrens', 'Rameen Beroukhim', 'Karl Bilimoria', 'Ron Bose', 'Russell Bowler', 'Nicole Calakos', 'Sarat Chandarlapaty', 'Ping Chi', 'Ryan Corcoran', 'Peter Crompton', 'Aaron Cypess', 'Maximilian Diehn', 'Justin Dimick', 'Mark Dransfield', 'Brian Feldman', 'Ariel Feldstein', 'Martin Fernandez-Zapico', 'Alessia Fornoni', 'Terry Fry', 'Rene Galindo', 'Anupam Agarwal', 'Jayakrishna Ambati', 'Suneel Apte', 'Kyong-Mi Chang', 'Herbert Cohen', 'Laurence Cooper', 'George Cotsarelis', 'Kenneth Covinsky', 'Maurizio Del Poeta', 'Phillip Dennis', 'Michael Diamond', 'Jay Edelberg', 'David Engman', 'Daniel Fowler', 'Vance Fowler', 'John Frangioni', 'Bin Gao', 'Michael Glickman', 'Stephen Hammes', 'Hal Hoffman', 'Sam Hwang', 'S. Karumanchi', 'Douglas Kerr', 'Tejvir Khurana', 'Fadlo Khuri', 'Maria Abreu', 'Michael Ackerman', 'David Andes', 'Scott Armstrong', 'R. Barr', 'Marcel Behr', 'Daniel Ben

In [7]:
# Initialize dictionary representing a each physician-scientist's queried IDs
# Key:
# - "{first_name} {last_name}" (dict): a dict of dictionaries of possible inventors keyed on id
#     Each dictionary contains:
#     - "id" (str): openAlex inventor id
#     - "counts_by_year" (list): a list of dictionaries documenting activity of inventor
#         Each dictionary contains: 
#         - "year" (int): the year
#         - "works_count" (int): inventor's patent count that year
#         - "cited_by_count" (int): number of papers that cite inventor that year
#     - "first_name" (str): inventor's first name
#     - "last_name" (str): inventor's last name
#     - "created_date" (str): inventor's first seen date
#     - "works_count" (int): inventor's total number of papers
#     - "api" (str): "openAlex"
def author_ids(names):

    # Initialize a dictionary to store the queried IDs for each physician-scientist
    ids_dictionary = {}

    # Initialize a set to store the failed queries
    failed_queries = set()

    for person in names:
        cursor = "*"
        
        author_name = person

        # Keep making queries until the request is successful or the maximum number of queries is reached
        while True:
            query_author = f'https://api.openalex.org/authors?search={author_name}&per_page=100&cursor={cursor}'
            
            try:
                # Make the API request
                response = requests.get(query_author)

                # Check if the request was successful
                if response.status_code == 200:
                    # Get the list of inventors from the response
                    authors = response.json()["results"]

                    # Iterate over the author and add them to the dictionary
                    for author in authors:
                        name = HumanName(author.pop("display_name"))
        
                        # remove unnecessary features
                        author.pop("display_name_alternatives")
                        author.pop("orcid")
                        author.pop("summary_stats")
                        author.pop("x_concepts")
                        author.pop("works_api_url")
                        
                        author["id"] = author["id"][21:]
                        author["name_first"] = name.first if len(name.first) > 1 else None
                        author["name_middle"] = name.middle if len(name.middle) > 1 else None
                        author["name_last"] = name.last if len(name.last) > 1 else None
                        author["works"] = []
                        
                        author["api"] = "openAlex"
                        
                        if author_name not in ids_dictionary:
                            ids_dictionary[author_name] = {}
                        
                        if author["id"] not in ids_dictionary[author_name]:
                            ids_dictionary[author_name][author["id"]] = {}

                        ids_dictionary[author_name][author["id"]] = author

                # Break out of the loop if there are no more authors
                if not authors:
                    break

                # Update the query parameters with the next `cursor` value
                cursor = response.json()["meta"]["next_cursor"]
                    
            # If the request is unsuccessful, add the query name and `after` value to the failed queries set
            except Exception as e:
                print(e)
                failed_queries.add((author_name, cursor))
                break

        print(author_name + " done")

    # Print the failed queries
    print(f"Failed queries: {failed_queries}")
    return ids_dictionary, failed_queries

ids_passed, ids_failed = author_ids(all_names)

Sarki Abdulkadir done
Ash Alizadeh done
Vineet Arora done
Andrew Auerbach done
Adam Bass done
Edward Behrens done
Rameen Beroukhim done
Karl Bilimoria done
Ron Bose done
Russell Bowler done
Nicole Calakos done
Sarat Chandarlapaty done
Ping Chi done
Ryan Corcoran done
Peter Crompton done
Aaron Cypess done
Maximilian Diehn done
Justin Dimick done
Mark Dransfield done
Brian Feldman done
Ariel Feldstein done
Martin Fernandez-Zapico done
Alessia Fornoni done
Terry Fry done
Rene Galindo done
Anupam Agarwal done
Jayakrishna Ambati done
Suneel Apte done
Kyong-Mi Chang done
Herbert Cohen done
Laurence Cooper done
George Cotsarelis done
Kenneth Covinsky done
Maurizio Del Poeta done
Phillip Dennis done
Michael Diamond done
Jay Edelberg done
David Engman done
Daniel Fowler done
Vance Fowler done
John Frangioni done
Bin Gao done
Michael Glickman done
Stephen Hammes done
Hal Hoffman done
Sam Hwang done
S. Karumanchi done
Douglas Kerr done
Tejvir Khurana done
Fadlo Khuri done
Maria Abreu done
Michael

In [9]:
def write_set_to_csv(file_path, my_set):
    with open(file_path, 'w', newline='') as csv_file:
        writer = csv.writer(csv_file)

        # Write the header
        writer.writerow(['Name', 'ID'])

        # Write the data
        for name, id_set in my_set.items():                
            id_set = list(id_set)
            writer.writerow([name, id_set]) 
            
write_set_to_csv("openAlex_Ids.csv", ids_passed)