In [68]:
import dimcli
from dimcli.utils import *
import re
import psycopg2
import requests
import os, sys, time, json
from tqdm.notebook import tqdm as progressbar
from nameparser import HumanName

import pandas as pd

import plotly.express as px
from plotly.offline import plot
if not 'google.colab' in sys.modules:
  # make js dependencies local / needed by html exports
    from plotly.offline import init_notebook_mode
    init_notebook_mode(connected=True)

print("==\nLogging in..")
# https://digital-science.github.io/dimcli/getting-started.html#authentication
ENDPOINT = "https://app.dimensions.ai"
if 'google.colab' in sys.modules:
    import getpass
    KEY = getpass.getpass(prompt='API Key: ')  
    dimcli.login(key=KEY, endpoint=ENDPOINT)
else:
    KEY = "9F8D648F0D7E437CB1736BEBDF007F02"
    dimcli.login(key=KEY, endpoint=ENDPOINT)
dsl = dimcli.Dsl()

==
Logging in..
[2mDimcli - Dimensions API Client (v1.2)[0m
[2mConnected to: <https://app.dimensions.ai/api/dsl> - DSL v2.10[0m
[2mMethod: manual login[0m
====
Heads up! The latest Dimcli version is  1.3
You have installed:  1.2
====
Please upgrade: `pip install dimcli -U`


In [69]:
# Specify the path to your JSON file
id_path = '/Users/alishali/Desktop/physician-scientists copy/openAlex_scraper/asci_aap_dataJSON.json'

# Initialize an empty dictionary
id_dictionary = {}

try:
    # Open the JSON file in read mode
    with open(id_path, 'r') as json_file:
        # Load the JSON data into the dictionary
        id_dictionary = json.load(json_file)
except FileNotFoundError:
    print("JSON file not found.")
except json.JSONDecodeError:
    print("Error decoding JSON data.")

print(id_dictionary)

{'people': [{'year': '2019', 'first_name': 'Kjersti', 'middle_name': 'M.', 'last_name': 'Aagaard', 'phone': '8012307893', 'email': 'aagaardt@bcm.edu', 'affiliation': "['Baylor College of Medicine']", 'original specialization': "['Obstetrics and Gynecology', 'Clinical research', 'Clinical trials']", 'modified specialization': "['Obstetrics and Gynecology']", 'unactive': 'False', 'organization': 'False', 'email_affiliation': "['ASCI']", 'umbrella_aff': "['bcm']", 'related_aff': '[]', 'umbrella_spec': '[]', 'related_spec': '[]', 'id_num': '[]', 'kumu_num': 'P1'}, {'year': '2016', 'first_name': 'Derek', 'middle_name': 'W.', 'last_name': 'Abbott', 'phone': '2163688564', 'email': 'dwa4@case.edu', 'affiliation': "['Case Western Reserve University School of Medicine']", 'original specialization': "['Molecular Biology', 'Immunology', 'Pathology']", 'modified specialization': "['Pathology']", 'unactive': 'False', 'organization': 'False', 'email_affiliation': "['ASCI']", 'umbrella_aff': "['case']

In [70]:
def execute_command(query):
    try:
        connection = psycopg2.connect(
            "postgresql://selina04_mit_edu:tcM97azb3HqLeOIlV6HGpA@livid-dibbler-6457.g8z.cockroachlabs.cloud:26257/livid-dibbler-6457.researchers?sslmode=verify-full"
        )

        # Create a cursor and execute the insert statement
        cursor = connection.cursor()
        # print('This is current command: ',query)
        cursor.execute(query)
        connection.commit()  # Commit the transaction
        # print("Data inserted successfully")
        cursor.close()
        connection.close()

    except psycopg2.Error as error:
        print(f"Error: {query}")
        print("------")
        print(error)

In [71]:
def researcherIds(name):
    try:
        res = dsl.query_iterative(f"""search researchers for "{name}" return researchers""")
        listDict = res.json['researchers']
        allIds = set()
        for idv in listDict:
            allIds.add(idv['id'])

        allIds = list(allIds)  # Convert set back to a list if needed
        
        if len(allIds) > 512:    
            allIds = allIds[0:512]
        print(f'Total of {len(allIds)} unique ids for {name} ')

        # Constructing the DSL query with the list of IDs
        query_ids = ', '.join([f'"{id_val}"' for id_val in allIds])
        query = f'search researchers where id in [{query_ids}] return researchers[id+obsolete+redirect]'

        # Execute the DSL query
        res2 = dsl.query_iterative(query)
        listDict2 = res2.json['researchers']
        allWorkingIDs = set()
        for person in listDict2:
            if person['obsolete'] == 0:  # Current Working Id(s)
                allWorkingIDs.add(person["id"])
            else:
                for ids in person['redirect']:
                    allWorkingIDs.add(ids)
        
        print(f'++++++++ FINAL {len(allWorkingIDs)} Working ID(s) +++++++++++')  
        print(allWorkingIDs)
    except:
        print(f'No IDs found for the {name}')
        return {}
    
    return allWorkingIDs
testIDs = researcherIds("Leslie Schoenfield")

Starting iteration with limit=1000 skip=0 ...[0m
0-2 / 2 (0.25s)[0m
===
Records extracted: 2[0m
Starting iteration with limit=1000 skip=0 ...[0m


Total of 2 unique ids for Leslie Schoenfield 


0-2 / 2 (0.39s)[0m
===
Records extracted: 2[0m


++++++++ FINAL 2 Working ID(s) +++++++++++
{'ur.014424615212.32', 'ur.0102031123.62'}


In [72]:
base_url = 'https://api.openalex.org/'

In [73]:
def author_ids(author_name):

    # Initialize a dictionary to store the queried IDs for each physician-scientist
    ids_dictionary = {}

    # Initialize a set to store the failed queries
    failed_queries = set()

    cursor = "*"

    # Keep making queries until the request is successful or the maximum number of queries is reached
    while True:
        query_author = f'https://api.openalex.org/authors?search={author_name}&per_page=100&cursor={cursor}'
        
        try:
            # Make the API request
            response = requests.get(query_author)

            # Check if the request was successful
            if response.status_code == 200:
                # Get the list of inventors from the response
                authors = response.json()["results"]

                # Iterate over the author and add them to the dictionary
                for author in authors:
                    name = HumanName(author.pop("display_name"))
    
                    # remove unnecessary features
                    author.pop("display_name_alternatives")
                    author.pop("orcid")
                    author.pop("summary_stats")
                    author.pop("x_concepts")
                    author.pop("works_api_url")
                    
                    author["id"] = author["id"][21:]
                    author["name_first"] = name.first if len(name.first) > 1 else None
                    author["name_middle"] = name.middle if len(name.middle) > 1 else None
                    author["name_last"] = name.last if len(name.last) > 1 else None
                    author["works"] = []
                    
                    author["api"] = "openAlex"
                    
                    if author_name not in ids_dictionary:
                        ids_dictionary[author_name] = {}
                    
                    if author["id"] not in ids_dictionary[author_name]:
                        ids_dictionary[author_name][author["id"]] = {}

                    ids_dictionary[author_name][author["id"]] = author

            # Break out of the loop if there are no more authors
            if not authors:
                break

            # Update the query parameters with the next `cursor` value
            cursor = response.json()["meta"]["next_cursor"]
                
        # If the request is unsuccessful, add the query name and `after` value to the failed queries set
        except Exception as e:
            print(e)
            failed_queries.add((author_name, cursor))
            break

    print(author_name + " done")

    # Print the failed queries
    print(f"Failed queries: {failed_queries}")
    return ids_dictionary, failed_queries

In [74]:
#gets each publications by its workid

def findAuthor(authorID):
    fullquery = base_url+'authors/'+authorID
    response = requests.get(fullquery)
    data = response.json()

    # Specify the keys you're interested in
    keys = [
        "affiliations",
        "cited_by_count",
        "display_name",
        "display_name_alternatives",
        "id",
        "ids",
        "last_known_institutions",
        "summary_stats",
        "works_api_url",
        "works_count",
        "topics"
    ]   

    # Create a new dictionary with only the specified keys
    visualize_data = {key: data.get(key, None) for key in keys}

    return visualize_data

In [75]:
# Function to query Dimensions
def query_dimensions(ids):
    ids_str = ', '.join([f'"{id}"' for id in ids])
    q = f"""search researchers where id in [{ids_str}] return researchers[id+current_research_org+dimensions_url+first_grant_year+first_publication_year+last_grant_year+last_name+last_publication_year+nih_ppid+obsolete+orcid_id+redirect+research_orgs+score+total_grants+total_publications]"""
    result = dsl.query_iterative(q, show_results=None, limit=100, skip=0, pause=1.5, force=False, maxlimit=0, verbose=None, _tot_count_prev_query=0, _warnings_tot=None)
    if not "researchers" in result.data:
        return []
    return result.data["researchers"]

# Function to query OpenAlex
def query_openalex(author_name):
    result_dict = [] #Storing a list of dictionaries, each representing an author with the keys as the characteristics (i.e. id, title, pub_date)
    authorIds, failed = author_ids(author_name)
    print(author_name + ": "+ str(authorIds))
    for author_name, author_data in authorIds.items():
        for authorId_dict in author_data.values():
            authorId = authorId_dict['id']  # Extract the ID from the dictionary
            author_details = findAuthor(authorId)
            result_dict.append(author_details)
    print("openalex result dict: ", result_dict)
    return result_dict



# Main loop to process each author
for author in id_dictionary["people"][2300:4732]:
    first_name = author["first_name"].strip(".")
    middle_name = author["middle_name"].strip(".")
    last_name = author["last_name"].strip(".")
    asci_year_value = author["year"]

    if author["middle_name"]: 
        author_name = first_name + " " + middle_name + " " + last_name
    else: 
        author_name = first_name + " " + last_name

    cleaned_name = re.sub(r'[^a-zA-Z0-9_]', '_', author_name)
    cleaned_name = re.sub(r'^\d', '_', cleaned_name)

    execute_command(f"""CREATE TABLE IF NOT EXISTS {cleaned_name} (
        id STRING PRIMARY KEY,
        current_research_org STRING,
        dimensions_url STRING,
        first_grant_year STRING,
        first_name STRING,
        first_publication_year STRING,
        last_grant_year STRING,
        last_name STRING,
        last_publication_year STRING,
        nih_ppid STRING,
        obsolete STRING,
        orcid_id STRING,
        redirect STRING,
        research_orgs STRING,
        score STRING,
        total_grants STRING,
        total_publications STRING,
        affiliations STRING,
        cited_by_count STRING,
        display_name STRING,
        display_name_alternatives STRING,
        last_known_institutions STRING,
        summary_stats STRING,
        works_api_url STRING,
        works_count STRING,
        topics STRING,
        asci_year STRING
    );
    """)
    
    ids = list(researcherIds(author_name))

    print(f"-------------querying for {author_name}----------------------")

    dimensions_data = query_dimensions(ids)
    openalex_data = query_openalex(author_name)
    
    combined_data = dimensions_data + openalex_data
    print("THIIS IS THE COMBIIEND DATA: ", combined_data)
    
    columns = [
        "id",
        "current_research_org",
        "dimensions_url",
        "first_grant_year",
        "first_name",
        "first_publication_year",
        "last_grant_year",
        "last_name",
        "last_publication_year",
        "nih_ppid",
        "obsolete",
        "orcid_id",
        "redirect",
        "research_orgs",
        "score",
        "total_grants",
        "total_publications",
        "affiliations",
        "cited_by_count",
        "display_name",
        "display_name_alternatives",
        "last_known_institutions",
        "summary_stats",
        "works_api_url",
        "works_count",
        "topics",
        "asci_year"
    ]
    

    for researcher in combined_data: 
        print("-----------researcher-------------", researcher)
        fields = []
        values = []
        update_fields = []   
        for field, value in researcher.items():
            if not isinstance(value, str):
                value = json.dumps(value)
            value = value.replace("'", "''")
            if field in columns:
                fields.append(field)
                values.append(f"'{value}'")
                update_fields.append(f"{field} = EXCLUDED.{field}")

        columns_str = ', '.join(fields)
        values_str = ', '.join(values)
        update_str = ', '.join(update_fields)

        # insert_query = f"INSERT INTO {cleaned_name} ({columns_str}) VALUES({values_str});"
        insert_query = f"""INSERT INTO {cleaned_name} ({columns_str}) VALUES({values_str}) ON CONFLICT (id) DO UPDATE SET {update_str};"""
        execute_command(insert_query)

        # Handle ORCID IDs from Dimensions
        if 'orcid_id' in researcher and researcher['orcid_id']:
            orcid_id = str(researcher['orcid_id']).strip('["]').strip('\'"')
            print("-----------researcherid-------------", researcher['id'])
            print("-----------orcid_id-------------", orcid_id)
            # Insert a new row with the ORCID ID, duplicating other values
            orcid_insert_query = f"INSERT INTO {cleaned_name} ({columns_str}) VALUES({values_str.replace(researcher['id'], str(orcid_id))}) ON CONFLICT (id) DO UPDATE SET {values_str.replace(researcher['id'], str(orcid_id))};"
            execute_command(orcid_insert_query)

        # Handle ORCID IDs from OpenAlex
        if 'ids' in researcher and 'orcid' in researcher['ids']:
            orcid_id = researcher['ids']['orcid']
            print("-----------researcherid-------------", researcher['id'])
            print("-----------orcid_id-------------", orcid_id)
            # Insert a new row with the ORCID ID, duplicating other values
            orcid_insert_query = f"INSERT INTO {cleaned_name} ({columns_str}) VALUES({values_str.replace(researcher['id'], str(orcid_id))}) ON CONFLICT (id) DO UPDATE SET {values_str.replace(researcher['id'], str(orcid_id))};"
            execute_command(orcid_insert_query)
    

    
    # Update the asci_year column
    execute_command(f"""UPDATE {cleaned_name} SET asci_year = '{asci_year_value}';""")

Starting iteration with limit=1000 skip=0 ...[0m
0-7 / 7 (0.32s)[0m
===
Records extracted: 7[0m
Starting iteration with limit=1000 skip=0 ...[0m


Total of 7 unique ids for Jonathan Lindner 


0-7 / 7 (0.28s)[0m
===
Records extracted: 7[0m
Starting iteration with limit=100 skip=0 ...[0m


++++++++ FINAL 7 Working ID(s) +++++++++++
{'ur.011755311476.25', 'ur.01242700051.26', 'ur.016017457303.89', 'ur.01175006530.14', 'ur.014144320643.55', 'ur.0670046312.82', 'ur.010011051350.54'}
-------------querying for Jonathan Lindner----------------------


0-7 / 7 (2.71s)[0m
===
Records extracted: 7[0m


Jonathan Lindner done
Failed queries: set()
Jonathan Lindner: {'Jonathan Lindner': {'A5037936416': {'id': 'A5037936416', 'relevance_score': 22497.271, 'works_count': 444, 'cited_by_count': 25381, 'ids': {'openalex': 'https://openalex.org/A5037936416', 'orcid': 'https://orcid.org/0000-0003-2604-5277'}, 'affiliations': [{'institution': {'id': 'https://openalex.org/I165690674', 'ror': 'https://ror.org/009avj582', 'display_name': 'Oregon Health & Science University', 'country_code': 'US', 'type': 'education', 'lineage': ['https://openalex.org/I165690674']}, 'years': [2024, 2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016, 2015]}, {'institution': {'id': 'https://openalex.org/I4210134211', 'ror': 'https://ror.org/046kb4y45', 'display_name': 'University of Virginia Medical Center', 'country_code': 'US', 'type': 'healthcare', 'lineage': ['https://openalex.org/I2799765794', 'https://openalex.org/I4210134211']}, 'years': [2024, 2023, 2022, 2005, 2004, 2002, 2000, 1998, 1997, 1996]}, {'institution'

Starting iteration with limit=1000 skip=0 ...[0m
0-1 / 1 (0.24s)[0m
===
Records extracted: 1[0m
Starting iteration with limit=1000 skip=0 ...[0m


Total of 1 unique ids for Pamela Ling 


0-1 / 1 (0.24s)[0m
===
Records extracted: 1[0m
Starting iteration with limit=100 skip=0 ...[0m


++++++++ FINAL 1 Working ID(s) +++++++++++
{'ur.0772067763.56'}
-------------querying for Pamela Ling----------------------


0-1 / 1 (4.56s)[0m
===
Records extracted: 1[0m


Pamela Ling done
Failed queries: set()
Pamela Ling: {'Pamela Ling': {'A5030374908': {'id': 'A5030374908', 'relevance_score': 10768.206, 'works_count': 4488, 'cited_by_count': 150313, 'ids': {'openalex': 'https://openalex.org/A5030374908', 'orcid': 'https://orcid.org/0009-0004-6334-051X'}, 'affiliations': [{'institution': {'id': 'https://openalex.org/I4210135723', 'ror': 'https://ror.org/02txedb84', 'display_name': 'Shanghai Institute of Technical Physics', 'country_code': 'CN', 'type': 'facility', 'lineage': ['https://openalex.org/I19820366', 'https://openalex.org/I4210135723']}, 'years': [2024]}, {'institution': {'id': 'https://openalex.org/I19820366', 'ror': 'https://ror.org/034t30j35', 'display_name': 'Chinese Academy of Sciences', 'country_code': 'CN', 'type': 'government', 'lineage': ['https://openalex.org/I19820366']}, 'years': [2024, 2022, 2020, 2018, 2017, 2014, 2013, 2012, 2011, 2010]}, {'institution': {'id': 'https://openalex.org/I136199984', 'ror': 'https://ror.org/03vek6s52

Starting iteration with limit=1000 skip=0 ...[0m
===
Records extracted: 0[0m
Starting iteration with limit=1000 skip=0 ...[0m


Total of 0 unique ids for Vishwanath Rao Lingappa 



>>>[Dimcli tip] An error occurred with the batch '0-1000'. Consider using the 'limit' argument to retrieve fewer records per iteration, or use 'force=True' to ignore errors and continue the extraction.[0m
Starting iteration with limit=100 skip=0 ...[0m


Query Error
1 QuerySyntaxError found

1 ParserError found
  * [Line 1:32] (']') no viable alternative at input '[]'
No IDs found for the Vishwanath Rao Lingappa
-------------querying for Vishwanath Rao Lingappa----------------------
Query Error
1 QuerySyntaxError found

1 ParserError found
  * [Line 1:32] (']') no viable alternative at input '[]'



>>>[Dimcli tip] An error occurred with the batch '0-100'. Consider using the 'limit' argument to retrieve fewer records per iteration, or use 'force=True' to ignore errors and continue the extraction.[0m


Vishwanath Rao Lingappa done
Failed queries: set()
Vishwanath Rao Lingappa: {}
openalex result dict:  []
THIIS IS THE COMBIIEND DATA:  []


Starting iteration with limit=1000 skip=0 ...[0m
0-2 / 2 (0.32s)[0m
===
Records extracted: 2[0m
Starting iteration with limit=1000 skip=0 ...[0m


Total of 2 unique ids for Daniel C Link 


0-2 / 2 (0.27s)[0m
===
Records extracted: 2[0m
Starting iteration with limit=100 skip=0 ...[0m


++++++++ FINAL 2 Working ID(s) +++++++++++
{'ur.016375226437.33', 'ur.010633200331.89'}
-------------querying for Daniel C Link----------------------


0-2 / 2 (4.54s)[0m
===
Records extracted: 2[0m


Daniel C Link done
Failed queries: set()
Daniel C Link: {'Daniel C Link': {'A5049284725': {'id': 'A5049284725', 'relevance_score': 29339.521, 'works_count': 799, 'cited_by_count': 35254, 'ids': {'openalex': 'https://openalex.org/A5049284725', 'orcid': 'https://orcid.org/0000-0002-3170-7581'}, 'affiliations': [{'institution': {'id': 'https://openalex.org/I204465549', 'ror': 'https://ror.org/01yc7t268', 'display_name': 'Washington University in St. Louis', 'country_code': 'US', 'type': 'education', 'lineage': ['https://openalex.org/I204465549']}, 'years': [2024, 2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016, 2015]}, {'institution': {'id': 'https://openalex.org/I4210135078', 'ror': 'https://ror.org/036c27j91', 'display_name': 'Washington University Medical Center', 'country_code': 'US', 'type': 'healthcare', 'lineage': ['https://openalex.org/I4210135078']}, 'years': [2019, 1992, 1991]}, {'institution': {'id': 'https://openalex.org/I4210119077', 'ror': 'https://ror.org/02kb97560', 'displa

Starting iteration with limit=1000 skip=0 ...[0m
0-1 / 1 (0.30s)[0m
===
Records extracted: 1[0m
Starting iteration with limit=1000 skip=0 ...[0m


Total of 1 unique ids for MacRae F Linton 


0-1 / 1 (0.29s)[0m
===
Records extracted: 1[0m
Starting iteration with limit=100 skip=0 ...[0m


++++++++ FINAL 1 Working ID(s) +++++++++++
{'ur.01326031522.43'}
-------------querying for MacRae F Linton----------------------


0-1 / 1 (0.32s)[0m
===
Records extracted: 1[0m


MacRae F Linton done
Failed queries: set()
MacRae F Linton: {'MacRae F Linton': {'A5089000198': {'id': 'A5089000198', 'relevance_score': 28053.514, 'works_count': 363, 'cited_by_count': 18894, 'ids': {'openalex': 'https://openalex.org/A5089000198', 'orcid': 'https://orcid.org/0000-0002-9277-0453'}, 'affiliations': [{'institution': {'id': 'https://openalex.org/I901861585', 'ror': 'https://ror.org/05dq2gs74', 'display_name': 'Vanderbilt University Medical Center', 'country_code': 'US', 'type': 'healthcare', 'lineage': ['https://openalex.org/I4210162197', 'https://openalex.org/I901861585']}, 'years': [2024, 2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016, 2015]}, {'institution': {'id': 'https://openalex.org/I200719446', 'ror': 'https://ror.org/02vm5rt34', 'display_name': 'Vanderbilt University', 'country_code': 'US', 'type': 'education', 'lineage': ['https://openalex.org/I200719446']}, 'years': [2024, 2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016, 2015]}, {'institution': {'id': 'https://o

Starting iteration with limit=1000 skip=0 ...[0m
0-1 / 1 (0.31s)[0m
===
Records extracted: 1[0m
Starting iteration with limit=1000 skip=0 ...[0m


Total of 1 unique ids for Michail S Lionakis 


0-1 / 1 (0.25s)[0m
===
Records extracted: 1[0m
Starting iteration with limit=100 skip=0 ...[0m


++++++++ FINAL 1 Working ID(s) +++++++++++
{'ur.01044511164.71'}
-------------querying for Michail S Lionakis----------------------


0-1 / 1 (0.31s)[0m
===
Records extracted: 1[0m


Michail S Lionakis done
Failed queries: set()
Michail S Lionakis: {'Michail S Lionakis': {'A5022799608': {'id': 'A5022799608', 'relevance_score': 27813.691, 'works_count': 445, 'cited_by_count': 16763, 'ids': {'openalex': 'https://openalex.org/A5022799608', 'orcid': 'https://orcid.org/0000-0003-4994-9500', 'scopus': 'http://www.scopus.com/inward/authorDetails.url?authorID=6507497145&partnerID=MN8TOARS'}, 'affiliations': [{'institution': {'id': 'https://openalex.org/I1299303238', 'ror': 'https://ror.org/01cwqze88', 'display_name': 'National Institutes of Health', 'country_code': 'US', 'type': 'government', 'lineage': ['https://openalex.org/I1299022934', 'https://openalex.org/I1299303238']}, 'years': [2024, 2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016, 2015]}, {'institution': {'id': 'https://openalex.org/I4210134534', 'ror': 'https://ror.org/043z4tv69', 'display_name': 'National Institute of Allergy and Infectious Diseases', 'country_code': 'US', 'type': 'facility', 'lineage': ['https:

Starting iteration with limit=1000 skip=0 ...[0m
0-1 / 1 (0.30s)[0m
===
Records extracted: 1[0m
Starting iteration with limit=1000 skip=0 ...[0m


Total of 1 unique ids for Lance A Liotta 


0-1 / 1 (0.95s)[0m
===
Records extracted: 1[0m
Starting iteration with limit=100 skip=0 ...[0m


++++++++ FINAL 1 Working ID(s) +++++++++++
{'ur.014136153365.29'}
-------------querying for Lance A Liotta----------------------


0-1 / 1 (0.44s)[0m
===
Records extracted: 1[0m


Lance A Liotta done
Failed queries: set()
Lance A Liotta: {'Lance A Liotta': {'A5086886026': {'id': 'A5086886026', 'relevance_score': 60881.3, 'works_count': 1063, 'cited_by_count': 93200, 'ids': {'openalex': 'https://openalex.org/A5086886026', 'orcid': 'https://orcid.org/0000-0001-5155-7907'}, 'affiliations': [{'institution': {'id': 'https://openalex.org/I162714631', 'ror': 'https://ror.org/02jqj7156', 'display_name': 'George Mason University', 'country_code': 'US', 'type': 'education', 'lineage': ['https://openalex.org/I162714631']}, 'years': [2024, 2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016, 2015]}, {'institution': {'id': 'https://openalex.org/I79576946', 'ror': 'https://ror.org/00b30xv10', 'display_name': 'University of Pennsylvania', 'country_code': 'US', 'type': 'education', 'lineage': ['https://openalex.org/I79576946']}, 'years': [2019, 2012, 2007]}, {'institution': {'id': 'https://openalex.org/I32971472', 'ror': 'https://ror.org/03v76x132', 'display_name': 'Yale University'

Starting iteration with limit=1000 skip=0 ...[0m
===
Records extracted: 0[0m
Starting iteration with limit=1000 skip=0 ...[0m


Total of 0 unique ids for Steven M Lipkin 
Query Error
1 QuerySyntaxError found

1 ParserError found
  * [Line 1:32] (']') no viable alternative at input '[]'



>>>[Dimcli tip] An error occurred with the batch '0-1000'. Consider using the 'limit' argument to retrieve fewer records per iteration, or use 'force=True' to ignore errors and continue the extraction.[0m
Starting iteration with limit=100 skip=0 ...[0m


No IDs found for the Steven M Lipkin
-------------querying for Steven M Lipkin----------------------
Query Error
1 QuerySyntaxError found

1 ParserError found
  * [Line 1:32] (']') no viable alternative at input '[]'



>>>[Dimcli tip] An error occurred with the batch '0-100'. Consider using the 'limit' argument to retrieve fewer records per iteration, or use 'force=True' to ignore errors and continue the extraction.[0m


Steven M Lipkin done
Failed queries: set()
Steven M Lipkin: {'Steven M Lipkin': {'A5040643638': {'id': 'A5040643638', 'relevance_score': 16927.283, 'works_count': 315, 'cited_by_count': 8916, 'ids': {'openalex': 'https://openalex.org/A5040643638', 'orcid': 'https://orcid.org/0000-0002-0603-9139'}, 'affiliations': [{'institution': {'id': 'https://openalex.org/I205783295', 'ror': 'https://ror.org/05bnh6r87', 'display_name': 'Cornell University', 'country_code': 'US', 'type': 'education', 'lineage': ['https://openalex.org/I205783295']}, 'years': [2024, 2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016, 2015]}, {'institution': {'id': 'https://openalex.org/I4210130527', 'ror': 'https://ror.org/03aeycp46', 'display_name': 'Meyer (China)', 'country_code': 'CN', 'type': 'company', 'lineage': ['https://openalex.org/I4210130527']}, 'years': [2017]}, {'institution': {'id': 'https://openalex.org/I145220665', 'ror': 'https://ror.org/01wvxpc32', 'display_name': 'Cornell College', 'country_code': 'US', 

Starting iteration with limit=1000 skip=0 ...[0m
0-1 / 1 (0.34s)[0m
===
Records extracted: 1[0m
Starting iteration with limit=1000 skip=0 ...[0m


Total of 1 unique ids for Marc E Lippman 


0-1 / 1 (0.29s)[0m
===
Records extracted: 1[0m
Starting iteration with limit=100 skip=0 ...[0m


++++++++ FINAL 1 Working ID(s) +++++++++++
{'ur.01242102471.41'}
-------------querying for Marc E Lippman----------------------


0-1 / 1 (2.75s)[0m
===
Records extracted: 1[0m


Marc E Lippman done
Failed queries: set()
Marc E Lippman: {'Marc E Lippman': {'A5028539104': {'id': 'A5028539104', 'relevance_score': 37407.88, 'works_count': 612, 'cited_by_count': 42632, 'ids': {'openalex': 'https://openalex.org/A5028539104', 'orcid': 'https://orcid.org/0000-0001-5280-4084'}, 'affiliations': [{'institution': {'id': 'https://openalex.org/I2799903593', 'ror': 'https://ror.org/00hjz7x27', 'display_name': 'Georgetown University Medical Center', 'country_code': 'US', 'type': 'healthcare', 'lineage': ['https://openalex.org/I2799903593']}, 'years': [2024, 2023, 2022, 2021, 2020, 2019, 2004, 2003, 2002, 2001]}, {'institution': {'id': 'https://openalex.org/I184565670', 'ror': 'https://ror.org/05vzafd60', 'display_name': 'Georgetown University', 'country_code': 'US', 'type': 'education', 'lineage': ['https://openalex.org/I184565670']}, 'years': [2024, 2023, 2022, 2021, 2020, 2019, 2017, 2005, 2004, 2003]}, {'institution': {'id': 'https://openalex.org/I145608581', 'ror': 'https

Starting iteration with limit=1000 skip=0 ...[0m
===
Records extracted: 0[0m
Starting iteration with limit=1000 skip=0 ...[0m


Total of 0 unique ids for Howard L Lippton 
Query Error
1 QuerySyntaxError found

1 ParserError found
  * [Line 1:32] (']') no viable alternative at input '[]'



>>>[Dimcli tip] An error occurred with the batch '0-1000'. Consider using the 'limit' argument to retrieve fewer records per iteration, or use 'force=True' to ignore errors and continue the extraction.[0m
Starting iteration with limit=100 skip=0 ...[0m


No IDs found for the Howard L Lippton
-------------querying for Howard L Lippton----------------------



>>>[Dimcli tip] An error occurred with the batch '0-100'. Consider using the 'limit' argument to retrieve fewer records per iteration, or use 'force=True' to ignore errors and continue the extraction.[0m


Query Error
1 QuerySyntaxError found

1 ParserError found
  * [Line 1:32] (']') no viable alternative at input '[]'
Howard L Lippton done
Failed queries: set()
Howard L Lippton: {'Howard L Lippton': {'A5028376304': {'id': 'A5028376304', 'relevance_score': 9512.51, 'works_count': 112, 'cited_by_count': 3484, 'ids': {'openalex': 'https://openalex.org/A5028376304'}, 'affiliations': [{'institution': {'id': 'https://openalex.org/I121820613', 'ror': 'https://ror.org/05ect4e57', 'display_name': 'Louisiana State University', 'country_code': 'US', 'type': 'education', 'lineage': ['https://openalex.org/I121820613']}, 'years': [2001, 1998, 1997, 1995, 1994, 1993, 1992, 1991, 1990, 1989]}, {'institution': {'id': 'https://openalex.org/I75420490', 'ror': 'https://ror.org/01qv8fp92', 'display_name': 'Louisiana State University Health Sciences Center New Orleans', 'country_code': 'US', 'type': 'healthcare', 'lineage': ['https://openalex.org/I75420490']}, 'years': [2001, 1998, 1995]}, {'institution': {

Starting iteration with limit=1000 skip=0 ...[0m
0-2 / 2 (4.20s)[0m
===
Records extracted: 2[0m
Starting iteration with limit=1000 skip=0 ...[0m


Total of 2 unique ids for Peter E Lipsky 


0-2 / 2 (0.23s)[0m
===
Records extracted: 2[0m
Starting iteration with limit=100 skip=0 ...[0m


++++++++ FINAL 2 Working ID(s) +++++++++++
{'ur.01365642471.01', 'ur.01044277536.75'}
-------------querying for Peter E Lipsky----------------------


0-2 / 2 (0.24s)[0m
===
Records extracted: 2[0m


Peter E Lipsky done
Failed queries: set()
Peter E Lipsky: {'Peter E Lipsky': {'A5043874873': {'id': 'A5043874873', 'relevance_score': 40845.754, 'works_count': 895, 'cited_by_count': 53895, 'ids': {'openalex': 'https://openalex.org/A5043874873', 'orcid': 'https://orcid.org/0000-0002-9287-1676'}, 'affiliations': [{'institution': {'id': 'https://openalex.org/I4210110460', 'ror': 'https://ror.org/01z71je29', 'display_name': 'Ampel BioSolutions (United States)', 'country_code': 'US', 'type': 'company', 'lineage': ['https://openalex.org/I4210110460']}, 'years': [2024, 2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016, 2015]}, {'institution': {'id': 'https://openalex.org/I4210091245', 'ror': 'https://ror.org/00cm7z053', 'display_name': 'Health First', 'country_code': 'US', 'type': 'healthcare', 'lineage': ['https://openalex.org/I4210091245']}, 'years': [2024]}, {'institution': {'id': 'https://openalex.org/I53276908', 'ror': 'https://ror.org/0008kv292', 'display_name': 'Marymount University', 'c

Starting iteration with limit=1000 skip=0 ...[0m
0-1 / 1 (0.61s)[0m
===
Records extracted: 1[0m
Starting iteration with limit=1000 skip=0 ...[0m


Total of 1 unique ids for Conor M Liston 


0-1 / 1 (0.25s)[0m
===
Records extracted: 1[0m
Starting iteration with limit=100 skip=0 ...[0m


++++++++ FINAL 1 Working ID(s) +++++++++++
{'ur.0643554311.14'}
-------------querying for Conor M Liston----------------------


0-1 / 1 (2.62s)[0m
===
Records extracted: 1[0m


Conor M Liston done
Failed queries: set()
Conor M Liston: {'Conor M Liston': {'A5052319154': {'id': 'A5052319154', 'relevance_score': 19322.045, 'works_count': 182, 'cited_by_count': 16007, 'ids': {'openalex': 'https://openalex.org/A5052319154', 'orcid': 'https://orcid.org/0000-0002-1639-5401'}, 'affiliations': [{'institution': {'id': 'https://openalex.org/I205783295', 'ror': 'https://ror.org/05bnh6r87', 'display_name': 'Cornell University', 'country_code': 'US', 'type': 'education', 'lineage': ['https://openalex.org/I205783295']}, 'years': [2024, 2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016, 2015]}, {'institution': {'id': 'https://openalex.org/I4210097825', 'ror': 'https://ror.org/00we1gw23', 'display_name': 'MIND Research Institute', 'country_code': 'US', 'type': 'nonprofit', 'lineage': ['https://openalex.org/I4210097825']}, 'years': [2024, 2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016, 2015]}, {'institution': {'id': 'https://openalex.org/I4210107408', 'ror': 'https://ror.org/01m7

Starting iteration with limit=1000 skip=0 ...[0m
0-1 / 1 (0.40s)[0m
===
Records extracted: 1[0m
Starting iteration with limit=1000 skip=0 ...[0m


Total of 1 unique ids for Piro Lito 


0-1 / 1 (0.27s)[0m
===
Records extracted: 1[0m
Starting iteration with limit=100 skip=0 ...[0m


++++++++ FINAL 1 Working ID(s) +++++++++++
{'ur.01116324012.77'}
-------------querying for Piro Lito----------------------


0-1 / 1 (0.30s)[0m
===
Records extracted: 1[0m


Piro Lito done
Failed queries: set()
Piro Lito: {'Piro Lito': {'A5055707551': {'id': 'A5055707551', 'relevance_score': 16453.055, 'works_count': 102, 'cited_by_count': 6755, 'ids': {'openalex': 'https://openalex.org/A5055707551', 'orcid': 'https://orcid.org/0000-0003-2196-3503'}, 'affiliations': [{'institution': {'id': 'https://openalex.org/I205783295', 'ror': 'https://ror.org/05bnh6r87', 'display_name': 'Cornell University', 'country_code': 'US', 'type': 'education', 'lineage': ['https://openalex.org/I205783295']}, 'years': [2024, 2023, 2022, 2021, 2020, 2019, 2017, 2016]}, {'institution': {'id': 'https://openalex.org/I1334819555', 'ror': 'https://ror.org/02yrq0923', 'display_name': 'Memorial Sloan Kettering Cancer Center', 'country_code': 'US', 'type': 'healthcare', 'lineage': ['https://openalex.org/I1334819555']}, 'years': [2024, 2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016, 2014]}, {'institution': {'id': 'https://openalex.org/I39965400', 'ror': 'https://ror.org/03rcspa57', 'displ

Starting iteration with limit=1000 skip=0 ...[0m
0-9 / 9 (0.34s)[0m
===
Records extracted: 9[0m
Starting iteration with limit=1000 skip=0 ...[0m


Total of 9 unique ids for John R Little 


0-9 / 9 (0.33s)[0m
===
Records extracted: 9[0m
Starting iteration with limit=100 skip=0 ...[0m


++++++++ FINAL 9 Working ID(s) +++++++++++
{'ur.0636135105.53', 'ur.066630517.30', 'ur.01262476675.36', 'ur.061552725.56', 'ur.057015623.80', 'ur.01070773017.25', 'ur.016603066674.03', 'ur.0764207171.45', 'ur.060135525.59'}
-------------querying for John R Little----------------------


0-9 / 9 (0.39s)[0m
===
Records extracted: 9[0m


John R Little done
Failed queries: set()
John R Little: {'John R Little': {'A5091328673': {'id': 'A5091328673', 'relevance_score': 15689.71, 'works_count': 264, 'cited_by_count': 10956, 'ids': {'openalex': 'https://openalex.org/A5091328673', 'orcid': 'https://orcid.org/0000-0003-2034-2684'}, 'affiliations': [{'institution': {'id': 'https://openalex.org/I170897317', 'ror': 'https://ror.org/00py81415', 'display_name': 'Duke University', 'country_code': 'US', 'type': 'education', 'lineage': ['https://openalex.org/I170897317']}, 'years': [2024]}, {'institution': {'id': 'https://openalex.org/I78577930', 'ror': 'https://ror.org/00hj8s172', 'display_name': 'Columbia University', 'country_code': 'US', 'type': 'education', 'lineage': ['https://openalex.org/I78577930']}, 'years': [2022, 2020]}, {'institution': {'id': 'https://openalex.org/I57206974', 'ror': 'https://ror.org/0190ak572', 'display_name': 'New York University', 'country_code': 'US', 'type': 'education', 'lineage': ['https://openalex

Starting iteration with limit=1000 skip=0 ...[0m
0-5 / 5 (0.53s)[0m
===
Records extracted: 5[0m
Starting iteration with limit=1000 skip=0 ...[0m


Total of 5 unique ids for William C Little 


0-5 / 5 (0.26s)[0m
===
Records extracted: 5[0m
Starting iteration with limit=100 skip=0 ...[0m


++++++++ FINAL 5 Working ID(s) +++++++++++
{'ur.01211352071.18', 'ur.0754244072.30', 'ur.01107307222.13', 'ur.013025754251.54', 'ur.011304441212.10'}
-------------querying for William C Little----------------------


0-5 / 5 (2.77s)[0m
===
Records extracted: 5[0m


William C Little done
Failed queries: set()
William C Little: {'William C Little': {'A5013401681': {'id': 'A5013401681', 'relevance_score': 21699.582, 'works_count': 361, 'cited_by_count': 19432, 'ids': {'openalex': 'https://openalex.org/A5013401681'}, 'affiliations': [{'institution': {'id': 'https://openalex.org/I1289110261', 'ror': 'https://ror.org/02tdmfk69', 'display_name': 'Belfast Health and Social Care Trust', 'country_code': 'GB', 'type': 'healthcare', 'lineage': ['https://openalex.org/I1289110261']}, 'years': [2021]}, {'institution': {'id': 'https://openalex.org/I29606459', 'ror': 'https://ror.org/044pcn091', 'display_name': 'University of Mississippi Medical Center', 'country_code': 'US', 'type': 'healthcare', 'lineage': ['https://openalex.org/I29606459', 'https://openalex.org/I4210141039']}, 'years': [2018, 2016, 2015, 2014]}, {'institution': {'id': 'https://openalex.org/I2802864460', 'ror': 'https://ror.org/02y070a55', 'display_name': 'Jackson Memorial Hospital', 'country_c

Starting iteration with limit=1000 skip=0 ...[0m
0-2 / 2 (0.34s)[0m
===
Records extracted: 2[0m
Starting iteration with limit=1000 skip=0 ...[0m


Total of 2 unique ids for Stephen D Litwin 


0-2 / 2 (0.25s)[0m
===
Records extracted: 2[0m
Starting iteration with limit=100 skip=0 ...[0m


++++++++ FINAL 2 Working ID(s) +++++++++++
{'ur.013644270424.26', 'ur.01061160231.43'}
-------------querying for Stephen D Litwin----------------------


0-2 / 2 (0.29s)[0m
===
Records extracted: 2[0m


Stephen D Litwin done
Failed queries: set()
Stephen D Litwin: {'Stephen D Litwin': {'A5003294432': {'id': 'A5003294432', 'relevance_score': 9699.039, 'works_count': 134, 'cited_by_count': 2361, 'ids': {'openalex': 'https://openalex.org/A5003294432'}, 'affiliations': [{'institution': {'id': 'https://openalex.org/I4210113115', 'ror': 'https://ror.org/024hqjk04', 'display_name': 'Administración Nacional de Laboratorios e Institutos de Salud', 'country_code': 'AR', 'type': 'government', 'lineage': ['https://openalex.org/I4210113115']}, 'years': [2021, 2020, 2019, 2017, 2014, 2010, 2004, 2003, 2000, 1999]}, {'institution': {'id': 'https://openalex.org/I4210110275', 'ror': 'https://ror.org/024h8p458', 'display_name': 'Ministerio de Salud', 'country_code': 'AR', 'type': 'government', 'lineage': ['https://openalex.org/I4210110275']}, 'years': [2021, 2020, 2019, 2001]}, {'institution': {'id': 'https://openalex.org/I4210087537', 'ror': 'https://ror.org/004wh0w58', 'display_name': 'Guthrie Founda

Starting iteration with limit=1000 skip=0 ...[0m
0-1 / 1 (0.31s)[0m
===
Records extracted: 1[0m
Starting iteration with limit=1000 skip=0 ...[0m


Total of 1 unique ids for Edison Tak-Bun Liu 


0-1 / 1 (0.30s)[0m
===
Records extracted: 1[0m
Starting iteration with limit=100 skip=0 ...[0m


++++++++ FINAL 1 Working ID(s) +++++++++++
{'ur.01244760140.44'}
-------------querying for Edison Tak-Bun Liu----------------------


0-1 / 1 (2.73s)[0m
===
Records extracted: 1[0m


Edison Tak-Bun Liu done
Failed queries: set()
Edison Tak-Bun Liu: {'Edison Tak-Bun Liu': {'A5061163700': {'id': 'A5061163700', 'relevance_score': 5614.1577, 'works_count': 8, 'cited_by_count': 443, 'ids': {'openalex': 'https://openalex.org/A5061163700'}, 'affiliations': [{'institution': {'id': 'https://openalex.org/I66068411', 'ror': 'https://ror.org/05k8wg936', 'display_name': 'Genome Institute of Singapore', 'country_code': 'SG', 'type': 'facility', 'lineage': ['https://openalex.org/I115228651', 'https://openalex.org/I2801752549', 'https://openalex.org/I66068411']}, 'years': [2012, 2008, 2007, 2005, 2004]}], 'last_known_institutions': [{'id': 'https://openalex.org/I66068411', 'ror': 'https://ror.org/05k8wg936', 'display_name': 'Genome Institute of Singapore', 'country_code': 'SG', 'type': 'facility', 'lineage': ['https://openalex.org/I115228651', 'https://openalex.org/I2801752549', 'https://openalex.org/I66068411']}], 'topics': [{'id': 'https://openalex.org/T11482', 'display_name': '

Starting iteration with limit=1000 skip=0 ...[0m
0-6 / 6 (3.09s)[0m
===
Records extracted: 6[0m
Starting iteration with limit=1000 skip=0 ...[0m


Total of 6 unique ids for Fu-Tong Liu 


0-6 / 6 (0.40s)[0m
===
Records extracted: 6[0m
Starting iteration with limit=100 skip=0 ...[0m


++++++++ FINAL 6 Working ID(s) +++++++++++
{'ur.015444377011.83', 'ur.01056510446.59', 'ur.010370561012.75', 'ur.01371445004.50', 'ur.015336541304.73', 'ur.063157767.08'}
-------------querying for Fu-Tong Liu----------------------


0-6 / 6 (0.33s)[0m
===
Records extracted: 6[0m


Fu-Tong Liu done
Failed queries: set()
Fu-Tong Liu: {'Fu-Tong Liu': {'A5100394072': {'id': 'A5100394072', 'relevance_score': 33581.855, 'works_count': 29415, 'cited_by_count': 498873, 'ids': {'openalex': 'https://openalex.org/A5100394072', 'orcid': 'https://orcid.org/0000-0002-4213-2883'}, 'affiliations': [{'institution': {'id': 'https://openalex.org/I182386381', 'ror': 'https://ror.org/00g5b0g93', 'display_name': 'Zunyi Medical University', 'country_code': 'CN', 'type': 'education', 'lineage': ['https://openalex.org/I182386381']}, 'years': [2024, 2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016, 2013]}, {'institution': {'id': 'https://openalex.org/I4800084', 'ror': 'https://ror.org/00hn7w693', 'display_name': 'Southwest Jiaotong University', 'country_code': 'CN', 'type': 'education', 'lineage': ['https://openalex.org/I4800084']}, 'years': [2024, 2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016, 2013]}, {'institution': {'id': 'https://openalex.org/I195019228', 'ror': 'https://ror.org/04dpa

KeyboardInterrupt: 

In [None]:
# #Drop Table (If you want to delete a table from the database)
# for author in id_dictionary["people"][10:35]:
    
#     first_name = author["first_name"].strip(".")
#     middle_name = author["middle_name"].strip(".")
#     last_name = author["last_name"].strip(".")

#     if author["middle_name"]: 
#         author_name = first_name + " " + middle_name + " " + last_name
#     else: 
#         author_name = first_name + " " + last_name

#     cleaned_name = re.sub(r'[^a-zA-Z0-9_]', '_', author_name)
#     cleaned_name = re.sub(r'^\d', '_', cleaned_name)

#     execute_command(f"""DROP TABLE IF EXISTS {cleaned_name};""")
