In [2]:
import openalexapi
import requests
import json
import csv
import time
import math
# Getting all author ids through search of first and last name:
base_url = 'https://api.openalex.org/'

In [3]:
# finds all author ids given name
#%%time
def get_authorIDs(name):
    listofIDs = []
    page = 1
    visualize_data = {}  # Initialize with an empty dictionary
    
    while True:
        full_query = f'https://api.openalex.org/authors?search={name}&page={page}'
        response = requests.get(full_query)
        visualize_data = response.json()
        
        for result in visualize_data['results']:
            openalex_id = result['id'].replace("https://openalex.org/", "")
            listofIDs.append(openalex_id)
        
        page += 1
        if page > math.ceil(visualize_data['meta']['count'] / 25):
            break

    print(f'There are {len(listofIDs)} author ids for {name}')
    return listofIDs

#get_authorIDs("William pao")

In [4]:
#finds all work ids given author ids
def work_id(givenAuthorID):
    filtered_works_url = f'https://api.openalex.org/works?filter=author.id:{givenAuthorID}&page='
    page = 1
    all_worksID = []

    # Loop through pages
    while True:
        # Request page from OpenAlex
        url = filtered_works_url + str(page)
        page_with_results = requests.get(url).json()
        results = page_with_results['results']
        
        # Append work IDs using list comprehension
        all_worksID.extend([work['id'].replace("https://openalex.org/", "") for work in results])
        
        # Check if there are more pages
        if len(results) != page_with_results['meta']['per_page']:
            break
        
        # Next page
        page += 1

    print(f'There are {len(all_worksID)} works for {givenAuthorID}')
    return all_worksID

#work_id('A4334433008')

In [5]:
#finds entire work summary given work id
def findWork(workId):
    fullquery = base_url + 'works/' + workId
    response = requests.get(fullquery)
    visualize_data = response.json()

    # Remove multiple keys using a dictionary comprehension
    keys_to_remove = ["abstract_inverted_index", "related_works", "ngrams_url"]
    visualize_data = {key: visualize_data[key] for key in visualize_data if key not in keys_to_remove}

    return visualize_data
#findWork('W1986121817')

In [6]:
#finds all work concepts given author id
def findWorkConcepts(aID):  
    finalWorkSet = set()
    workIds = work_id(aID)
    for wID in workIds:
        allinfo = findWork(wID)
        for concept in allinfo['concepts']:
            if float(concept['score']) >= 0.3 and float(concept['level']) >= 1:
                finalWorkSet.add(concept['display_name'].lower())
    return finalWorkSet
findWorkConcepts('A4372739318')

There are 1 works for A4372739318


{'curriculum',
 'diversity (politics)',
 'mathematics education',
 'narrative',
 'pedagogy',
 'qualitative research'}

In [7]:
#find institution given author id
def findAuthorInstitutions(aID):
    full_query = f'https://api.openalex.org/authors/{aID}'
    response = requests.get(full_query)
    visualize_data = response.json()
    
    # Use a ternary operator for conditional return
    return visualize_data["last_known_institution"]["display_name"] if visualize_data["last_known_institution"] is not None else None

#findAuthorInstitutions('A4334433008')  

In [8]:
#finds coAuthors of all works given author id
#%%time
def findCoAuthors(aID):
    totalCoAuthors = set()
    for wID in work_id(aID):
        allinfo = findWork(wID)
        for concept in allinfo['authorships']:
            firstDict = concept['author']
            name = firstDict['display_name'].lower()
            parts = name.split()
            if len(parts) == 3:
                # if the name has a middle name, remove it
                parts.pop(1)
            finalName = " ".join(parts)
            totalCoAuthors.add(finalName)
    return totalCoAuthors

#findCoAuthors('A4327582801')       
    

In [22]:
#Function that gets all author id variables as value and id as key 
def combination(name, listFilter):
    totalDict = {}
    removeIds = set()
    ids = get_authorIDs(name)
    for i in ids:
        indvList = []
        for var in listFilter:
            if var.lower() == "institution":
                indvList.append(findAuthorInstitutions(i))
            elif var.lower() == "concept":
                indvList.append(findWorkConcepts(i))
            elif var.lower() == "coauthor":
                coauthors = findCoAuthors(i)
                if name.lower() not in coauthors:
                    print(f'THERE IS NO {name} IN SET FOR {i}')
                    removeIds.add(i)
                else:
                    coauthors.discard(name.lower())
                indvList.append(coauthors)
        totalDict[i] = indvList
    for i2 in removeIds:
        totalDict.pop(i2)
    print(f'THERE ARE {len(totalDict.keys())} KEYS LEFT SINCE {len(removeIds)} IDS DID NOT HAVE {name} IN AUTHORSHIPS')
    return totalDict

combination("John Adamson", ["institution", "coauthor"])


There are 38 author ids for John Adamson
There are 164 works for A4338988036
There are 223 works for A4347349823
There are 49 works for A4334433008
There are 113 works for A4345940772
There are 112 works for A4342530884
There are 10 works for A4340970251
There are 19 works for A4335235559
There are 18 works for A4349866829
There are 15 works for A4349785327
There are 1 works for A2052798532
There are 16 works for A4331103553
There are 1 works for A4372489382
There are 1 works for A4336431390
There are 1 works for A2585988028
There are 1 works for A4372485827
There are 4 works for A4350165947
There are 15 works for A4342077751
There are 1 works for A4304423741
There are 1 works for A4346282386
There are 1 works for A4327582801
There are 6 works for A4335240014
There are 4 works for A4343479288
There are 1 works for A2406690590
There are 3 works for A4349232974
There are 2 works for A4350492330
There are 1 works for A3083458673
There are 1 works for A3198642075
There are 1 works for A320

{'A4338988036': ['Africa Health Research Institute',
  {'a. bellingham',
   'a.l. martín de francisco',
   'ada h. c. kung',
   'adel yunis',
   'ajay singh',
   'akira hayashi',
   'alan collins',
   "alan d'andrea",
   'alan gewirtz',
   'alan samuels',
   'alan saven',
   'albert gordon',
   'allan collins',
   'allan erslev',
   'amy pai',
   'anatole besarab',
   'andreas bircher',
   'andrew morrow',
   'andrew schafer',
   'andrzej wi¿cek',
   'andré lascari',
   'anna migliaccio',
   'annamaria kausz',
   'annette drygalski',
   'arthur nienhuis',
   'asher chanan-khan',
   'ashraf mikhail',
   'ayalew tefferi',
   'bayard clarkson',
   'bertram kasiske',
   'beverly torok-storb',
   'bharat aggarwal',
   'brenda sandmaier',
   'brigitte durand',
   'bruce chabner',
   'bruce cheson',
   'bruce johnson',
   'bruce spinowitz',
   'c. wise',
   'carlo a. j. m. gaillard',
   'carlos barron',
   'carlos bozzini',
   'carmelita carrier',
   'carol pollock',
   'caroline finch',
   '

In [15]:
#%%time
def scores(value1, value2):
    try:
        finalScores = []
        for v1, v2 in zip(value1, value2):
            score = 0
            if v1 is not None and v2 is not None:
                #check if variables are string or sets
                if type(v1) == str:
                    if v1 == v2:
                        score = 1
                        finalScores.append(score)
                    else:
                        score = 0
                elif len(v1.intersection(v2)) >= 2:
                    score = len(v1.intersection(v2)) / (len(v1) + len(v2))
                    finalScores.append(score)
                else:
                    score = 0
                    finalScores.append(score)
            else:
                finalScores.append(score)
        total = sum(finalScores) / len(finalScores)
        if type(total) == str:
            print("ERORRRRRRRR")
        return total
    except Exception as e:
        return str(e)
scores(['Africa Health Research Institute', {'pace', 'kingdom'}, {'norbert mietus', 'edward conry'}], ['University of Niigata Prefecture', {'syllabus'}, {'julia christmas', 'li-hsin tu'}])    

0.0

In [21]:
#%%time
def merge(dictGiven):
    print(f'Starting with {len(dictGiven)} ids')
    finalDict = {}
    removeKeys = set()
    for key, value in dictGiven.items():
        tempSet = set()
        for key2, value2 in dictGiven.items():
            if key != key2 and key not in removeKeys and key2 not in removeKeys:
                numCheck = scores(value, value2)  # Use the correct function name "scores"
                if numCheck > float(0.40):
                    tempSet.add((key2, numCheck))
                    removeKeys.add(key2)
        finalDict[key] = tempSet
    for i in removeKeys:
        finalDict.pop(i)
    print(f'Ending with {len(dictGiven)} ids')
    return finalDict
merge({
 'A4335240014': ['Africa Health Research Institute',
  {'kingdom', 'pace'},
  {'edward conry', 'norbert mietus'}],
 'A4343479288': ['Africa Health Research Institute',
  {'context (archaeology)', 'relevance (law)', 'translanguaging'},
  {'naoki fujimoto-adamson'}],
 'A2406690590': ['Africa Health Research Institute',
  {'acre', 'transect'},
  {'ben dameron', 'daniel steinhurst', 'frank amorosanna', 'glenn harbaugh'}],
 'A4349232974': ['Africa Health Research Institute', set(), set()],
 'A4350492330': ['Africa Health Research Institute', {'politics'}, set()],
 'A3083458673': [None, {'finch'}, set()],
 'A3198642075': [None, set(), set()],
 'A3200964741': [None,
  {'coronavirus disease 2019 (covid-19)', 'pandemic'},
  {'i-chin chiang', 'sviatlana karpava', 'xiuping li'}],
 'A4318638943': [None, {'architecture'}, {'jean-sébastien cluzel'}],
 'A4323480629': ['Africa Health Research Institute',
  {'neurocognitive'},
  {'alex sigal',
   'anne derache',
   'dami collier',
   'farina karim',
   'jay brijkumar',
   'jennifer giandhari',
   'khadija khan',
   'nirmala perumal',
   'ravindra gupta',
   'steven kemp',
   'tasneem naidoo',
   'theresa smit',
   'tulio oliveira'}],
 'A4339183015': ['Africa Health Research Institute',
  set(),
  {'dun liu', 'peter bennett'}],
 'A4346260474': [None, set(), set()],
 'A4350323753': ['Africa Health Research Institute', {'football'}, set()],
 'A4365775785': ['University of Niigata Prefecture',
  {'syllabus'},
  {'julia christmas', 'li-hsin tu'}],
 'A4372739318': [None,
  {'curriculum'},
  {'howard brown', 'naoki fujimoto-adamson'}]})

Starting with 15 ids
Ending with 15 ids


{'A4335240014': set(),
 'A4343479288': set(),
 'A2406690590': set(),
 'A4349232974': set(),
 'A4350492330': set(),
 'A3083458673': set(),
 'A3198642075': set(),
 'A3200964741': set(),
 'A4318638943': set(),
 'A4323480629': set(),
 'A4339183015': set(),
 'A4346260474': set(),
 'A4350323753': set(),
 'A4365775785': set(),
 'A4372739318': set()}

In [None]:
def finalMerge(name, listFilter):
    getDetails = combination(name, listFilter)
    return merge(getDetails)
finalMerge("John Adamson", "" )