In [1]:
from tqdm.notebook import tqdm as progress
import pandas as pd
import plotly.express as px
from plotly.offline import plot
import dimcli
from dimcli.utils import *
import os, sys, time, json
from IPython.display import Image
from IPython.core.display import HTML 

In [2]:
#Log into Dimensions
#API key is: 9F8D648F0D7E437CB1736BEBDF007F02
#!pip install dimcli -U --quiet 

print("==\nLogging in..")
# https://digital-science.github.io/dimcli/getting-started.html#authentication
ENDPOINT = "https://app.dimensions.ai"
if 'google.colab' in sys.modules:
    import getpass
    KEY = getpass.getpass(prompt='API Key: ')  
    dimcli.login(key=KEY, endpoint=ENDPOINT)
else:
    KEY = "9F8D648F0D7E437CB1736BEBDF007F02"
    dimcli.login(key=KEY, endpoint=ENDPOINT)
dsl = dimcli.Dsl()

==
Logging in..
[2mDimcli - Dimensions API Client (v1.1)[0m
[2mConnected to: <https://app.dimensions.ai/api/dsl> - DSL v2.8[0m
[2mMethod: manual login[0m
====
Heads up! The latest Dimcli version is  1.2
You have installed:  1.1
====
Please upgrade: `pip install dimcli -U`


In [3]:
names = [
    "MICHAEL AGUS",
    "DOROTHY BAINTON",
    "CHARLES BALCH",
    "JEFFREY BALSER",
    "KEVIN CATT",
    "HENRY CHAMBERS",
    "RICHARD CLARK",
    "STEPHANIE EISENBARTH",
    "PENG JI",
    "PETER FECCI",
    "TIPPI MACKENZIE",
    "PURNA KASHYAP"
]

namesTest = [
    "MICHAEL AGUS",
    "PETER FECCI",
    "TIPPI MACKENZIE",
    "PURNA KASHYAP"]

#Link to grant variables description: https://docs.dimensions.ai/dsl/datasource-grants.html#grants
search_Var = "[abstract + active_year + category_bra + category_for + category_for_2020 + category_hra + category_hrcs_hc + category_hrcs_rac + category_icrp_cso + category_icrp_ct + category_rcdc + category_sdg + category_uoa + concepts + concepts_scores + date_inserted + dimensions_url + end_date + foa_number + funder_countries + funder_org_acronym + funder_org_cities + funder_org_countries + funder_org_name + funder_org_states + funder_orgs + funders + funding_aud + funding_cad + funding_chf + funding_cny + funding_currency + funding_eur + funding_gbp + funding_jpy + funding_nzd + funding_org_acronym + funding_org_cities + funding_org_city + funding_org_name + funding_org_states + funding_schemes + funding_usd + grant_number + id + investigators + keywords + language + language_title + linkout + original_title + project_numbers + research_org_cities + research_org_countries + research_org_names + research_org_state_codes + research_orgs + researchers + score + start_date + start_year + title]"

In [4]:
def researcherIds(name):
    res = dsl.query_iterative(f"""search researchers for "{name}" return researchers""")
    listDict = res.json['researchers']
    allIds = set()
    for idv in listDict:
        allIds.add(idv['id'])

    allIds = list(allIds)  # Convert set back to a list if needed
    print(f'Total of {len(allIds)} unique ids for {name}: ')
    print(allIds)

    # Constructing the DSL query with the list of IDs
    query_ids = ', '.join([f'"{id_val}"' for id_val in allIds])
    query = f'search researchers where id in [{query_ids}] return researchers[id+obsolete+redirect]'

    # Execute the DSL query
    res2 = dsl.query_iterative(query)
    listDict2 = res2.json['researchers']
    allWorkingIDs = set()
    for person in listDict2:
        if person['obsolete'] == 0:  # Current Working Id(s)
            allWorkingIDs.add(person["id"])
        else:
            for ids in person['redirect']:
                allWorkingIDs.add(ids)
    
    print(f'++++++++ FINAL {len(allWorkingIDs)} Working ID(s) +++++++++++')  
    print(allWorkingIDs)
    
    return allWorkingIDs
testIDs = researcherIds("David Sankey")

Starting iteration with limit=1000 skip=0 ...[0m
0-193 / 193 (6.09s)[0m
===
Records extracted: 193[0m
Starting iteration with limit=1000 skip=0 ...[0m


Total of 193 unique ids for David Sankey: 
['ur.011325562211.34', 'ur.013256511441.79', 'ur.015731176027.95', 'ur.01054242413.45', 'ur.014172422321.14', 'ur.01116742271.23', 'ur.014320114311.06', 'ur.07433744221.78', 'ur.015012240060.68', 'ur.016240520521.24', 'ur.010444417475.96', 'ur.015450214527.05', 'ur.01140270701.69', 'ur.016612025002.93', 'ur.0714036640.60', 'ur.01124375426.12', 'ur.015366255521.68', 'ur.016063673701.58', 'ur.015621235575.61', 'ur.01315671130.33', 'ur.0605462501.51', 'ur.015756715251.04', 'ur.015470321621.20', 'ur.013043034641.00', 'ur.015102477771.79', 'ur.011553606451.89', 'ur.01030361026.32', 'ur.01101444145.85', 'ur.011022604501.09', 'ur.011657124747.33', 'ur.01044206131.26', 'ur.014072773667.31', 'ur.015525235716.30', 'ur.010630635107.77', 'ur.016214721561.67', 'ur.010075412767.97', 'ur.013514112141.39', 'ur.014157553341.49', 'ur.0742162372.99', 'ur.01240027465.25', 'ur.011007424357.96', 'ur.012350012623.03', 'ur.01000634025.05', 'ur.015664650234.51', 'ur.0

0-193 / 193 (0.64s)[0m
===
Records extracted: 193[0m


++++++++ FINAL 2 Working ID(s) +++++++++++
{'ur.012304571623.47', 'ur.016251502631.77'}


In [5]:
#using researcher ids to find grants
def findingGrants(ids):
    all_grants = {}
    if len(ids) >= 1:
        for i in ids:
            res = dsl.query_iterative(f"""search grants where researchers = "{i}" return grants{search_Var}""")
            number = res.json['_stats']['total_count']
            print(f'{i} id has {number} grants')
            listDict = res.json['grants']
            name_list = []
            for grant in listDict:
                name_list.append(grant)
            all_grants[i]= name_list
    
    else:
        print(f'+++++++++++There are no working ids+++++++++++')
    return all_grants
        
#findingGrants(testIDs)

In [6]:
#Finds grants based on active researcher ids

def idGrants(nameList):
    allGrants = {}
    for name in nameList:
        nameIds = researcherIds(name)
        indvGrants = findingGrants(nameIds)
        allGrants[name] = indvGrants
    return allGrants
#idGrants(namesTest)

In [7]:
#search index queries
def grantSearch(namesTest):
    all_grants= {}
    for name in namesTest:
        res = dsl.query_iterative(f"""search grants in investigators for "{name}" return grants{search_Var}""")
        number = res.json['_stats']['total_count']
        print(f'{name} has {number} grants')
        listDict = res.json['grants']
        for indvgrant in listDict:
            print(indvgrant)
            print()

#grantSearch(namesTest)

In [8]:
#exact name searchers
def grantSearch(namesList):
    all_grants= {}
    for name in namesList:
        res = dsl.query_iterative(f"""search grants where investigators = "{name}" return grants{search_Var}""")
        number = res.json['_stats']['total_count']
        print(f'{name} has {number} grants')
        listDict = res.json['grants']
        name_list = []
        for grant in listDict:
            #print(grant.keys())
            name_list.append(grant)
        all_grants[name]= name_list
    return all_grants



#grantSearch(namesTest)

In [None]:
#Use ids when it shows up, but use search index queries if not
def findallGrants(names):
    allGrants = {}
    for n in names:
        nameIds = researcherIds(n)
        if len(nameIds) != 0:
            indvGrants = findingGrants(nameIds)
            allGrants[n] = indvGrants
            
        else:
            searched = "No ids"
            res = dsl.query_iterative(f"""search grants in investigators for "{n}" return grants{search_Var}""")
            number = res.json['_stats']['total_count']
            typeDict = {}
            print(f'{n} has {number} grants')
            listDict = res.json['grants']
            typeDict[searched] = listDict
            allGrants[n] = typeDict
    return allGrants


findallGrants(["John Abel"])
    

Starting iteration with limit=1000 skip=0 ...[0m
0-27 / 27 (0.73s)[0m
===
Records extracted: 27[0m
Starting iteration with limit=1000 skip=0 ...[0m


Total of 27 unique ids for John Abel: 
['ur.07370032243.26', 'ur.013564276423.55', 'ur.013145453621.07', 'ur.073430540.26', 'ur.010740401257.50', 'ur.074673247.88', 'ur.0113601506.45', 'ur.015511503762.83', 'ur.01100100145.61', 'ur.012756721200.18', 'ur.0731023010.97', 'ur.011411730413.84', 'ur.013152145273.78', 'ur.011373457143.88', 'ur.0660057507.79', 'ur.01370644616.03', 'ur.013667277461.72', 'ur.012317776113.57', 'ur.014364356720.76', 'ur.011660271405.54', 'ur.053245756.22', 'ur.014464515022.13', 'ur.07754467115.07', 'ur.0105542203.93', 'ur.053660030.94', 'ur.014455657146.15', 'ur.01350722170.79']


0-27 / 27 (4.58s)[0m
===
Records extracted: 27[0m
Starting iteration with limit=1000 skip=0 ...[0m


++++++++ FINAL 17 Working ID(s) +++++++++++
{'ur.07370032243.26', 'ur.01100100145.61', 'ur.013564276423.55', 'ur.012317776113.57', 'ur.012756721200.18', 'ur.073430540.26', 'ur.010740401257.50', 'ur.011373457143.88', 'ur.074673247.88', 'ur.013152145273.78', 'ur.0660057507.79', 'ur.01370644616.03', 'ur.053660030.94', 'ur.011660271405.54', 'ur.015511503762.83', 'ur.013667277461.72', 'ur.01350722170.79'}


===
Records extracted: 0[0m
Starting iteration with limit=1000 skip=0 ...[0m


ur.07370032243.26 id has 0 grants


===
Records extracted: 0[0m
Starting iteration with limit=1000 skip=0 ...[0m


ur.01100100145.61 id has 0 grants


===
Records extracted: 0[0m
Starting iteration with limit=1000 skip=0 ...[0m


ur.013564276423.55 id has 0 grants


===
Records extracted: 0[0m
Starting iteration with limit=1000 skip=0 ...[0m


ur.012317776113.57 id has 0 grants
