## Definitions

In [1]:
import requests
import pandas as pd
import os
import pdb
from dotenv import load_dotenv
import math
from tqdm import tqdm
import time

load_dotenv()
API_KEY = os.getenv('SCOPUS_API_KEY')

## ORCIDs

In [2]:
def scopus_json_to_df(d):
    results = d['search-results']
    if 'entry' in results:
        return pd.json_normalize(results['entry'])
    else:
        pdb.set_trace()
        ## TODO: Inform about missing 'entry'
        return pd.DataFrame()
    
def perform_single_scopus_request(start, query, api_key, count_per_page):
    url_template = 'https://api.elsevier.com/content/search/scopus?start={start}&count={count_per_page}&query={query}&apiKey={api_key}&httpAccept=application/json'
    try:
        r = requests.get(url_template.format(start=start,query=query,api_key=api_key,count_per_page=count_per_page))
        if r.status_code == 200:
            return r
        else:
            print(f'Query: "{query}" returned wrong status code. Status code: {r.status_code} Reason: {str(r.reason)}')
    except Exception as e:
            print(f'Query: "{query}" failed. Reason: {str(e)}')
            
def request_scopus_search(query, api_key, count_per_page=25):
    r = perform_single_scopus_request(0, query, api_key, count_per_page)
    if r is not None:
        d = r.json()
        total_results = int(d['search-results']['opensearch:totalResults'])
        if total_results == 0:
            return pd.DataFrame() # If no results,send empty DF

        elif total_results > 0 and total_results <= count_per_page:
            return scopus_json_to_df(d)

        elif total_results > count_per_page:
            page_ds = [d]

            for page_start in range(count_per_page,total_results,count_per_page):
                r = perform_single_scopus_request(page_start, query, api_key, count_per_page)
                page_ds.append(r.json())

            alldf = pd.concat([scopus_json_to_df(page_d) for page_d in page_ds])

            ## TODO: finish paging
            return alldf
        else:
            print(f'Total results should NEVER be negative. Query: "{query}"')
            pdb.set_trace()

def request_scopus_orcid(orcid, api_key):
    return request_scopus_search(f'orcid({orcid})', api_key).assign(searched_orcid=orcid,timestamp=pd.Timestamp.now())

def search_all_orcids(orcids, api_key):
    dfs = []
    no_results = []
    for orcid in tqdm(orcids):
        time.sleep(.5)
        df = request_scopus_orcid(orcid, api_key)
        if not df.empty:
            dfs.append(df)
        else:
            no_results.append(orcid)
    return pd.concat(dfs), no_results

orcids = pd.read_excel('input/list_orcid.xls',header=None)[0].rename('orcids')

df_orcids, no_results_orcids = search_all_orcids(orcids, API_KEY)
df_orcids

100%|██████████| 489/489 [23:24<00:00,  2.87s/it]   


Unnamed: 0,@_fa,link,prism:url,dc:identifier,eid,dc:title,dc:creator,prism:publicationName,prism:issn,prism:eIssn,...,openaccess,openaccessFlag,searched_orcid,timestamp,pii,article-number,freetoread.value,freetoreadLabel.value,prism:isbn,pubmed-id
0,true,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...",https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:85122308989,2-s2.0-85122308989,"The effect of moisture, nutrients and disturba...",Bartušková A.,Functional Ecology,02698463,13652435,...,0,False,0000-0001-9550-4217,2022-09-08 11:58:16.136977,,,,,,
0,true,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...",https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:85133719027,2-s2.0-85133719027,Grafting of silver nanospheres and nanoplates ...,Reznickova A.,Vacuum,0042207X,,...,0,False,0000-0001-8517-7785,2022-09-08 11:58:21.208566,S0042207X2200392X,111268,,,,
1,true,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...",https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:85124376364,2-s2.0-85124376364,Plasma treatment of PTFE at elevated temperatu...,Reznickova A.,Materials Today Communications,,23524928,...,0,False,0000-0001-8517-7785,2022-09-08 11:58:21.208566,S2352492822001301,103254,,,,
2,true,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...",https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:85102934268,2-s2.0-85102934268,Grafting of Metal Nanoparticles with Specific ...,Lacmanová V.,Chemicke Listy,00092770,12137103,...,0,False,0000-0001-8517-7785,2022-09-08 11:58:21.208566,,,,,,
3,true,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...",https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:85094640057,2-s2.0-85094640057,Photochemical preparation of silver colloids i...,Kvitek O.,Coatings,,20796412,...,1,True,0000-0001-8517-7785,2022-09-08 11:58:21.208566,,1046,"[{'$': 'all'}, {'$': 'publisherfullgold'}, {'$...","[{'$': 'All Open Access'}, {'$': 'Gold'}, {'$'...",,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8,true,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...",https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:84943534174,2-s2.0-84943534174,Effects of long-term drainage on microbial com...,Urbanová Z.,Soil Biology and Biochemistry,00380717,,...,0,False,0000-0002-0742-3933,2022-09-08 12:21:37.700192,S0038071715003454,,,,,
9,true,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...",https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:84914814472,2-s2.0-84914814472,Microbial community composition and in silico ...,Urbanová Z.,FEMS Microbiology Ecology,01686496,15746941,...,1,True,0000-0002-0742-3933,2022-09-08 12:21:37.700192,,,"[{'$': 'all'}, {'$': 'publisherfree2read'}]","[{'$': 'All Open Access'}, {'$': 'Bronze'}]",,25195805
10,true,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...",https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:84877786141,2-s2.0-84877786141,Methane Emissions and Methanogenic Archaea on ...,Urbanová Z.,Ecosystems,14329840,14350629,...,0,False,0000-0002-0742-3933,2022-09-08 12:21:37.700192,,,,,,
11,true,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...",https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:84868247092,2-s2.0-84868247092,Vegetation and carbon gas dynamics under a cha...,Urbanová Z.,Plant Ecology and Diversity,17550874,17551668,...,0,False,0000-0002-0742-3933,2022-09-08 12:21:37.700192,,,,,,


### Saving output

In [3]:
df_orcids.to_csv('output/publications_orcids.csv')

pd.Series(no_results_orcids).to_csv('output/no_results_orcids.csv',index=False)

In [5]:
df_orcids.shape

(15561, 32)

## Author IDs

In [12]:
def request_scopus_authorid(author_id, api_key):
    return request_scopus_search(f'au-id({author_id})', api_key).assign(searched_authorid=author_id,timestamp=pd.Timestamp.now())

def search_all_authorids(author_ids, api_key):
    dfs = []
    no_results = []
    for author_id in tqdm(author_ids):
        time.sleep(.5)
        df = request_scopus_authorid(author_id, api_key)
        if not df.empty:
            dfs.append(df)
        else:
            no_results.append(author_id)
    
    return pd.concat(dfs), no_results

author_ids = pd.read_excel('input/list_scopusid.xls',header=None)[0].rename('scopus_ids')
df_author_ids, no_results_author_ids = search_all_authorids(author_ids, API_KEY)
df_author_ids

100%|██████████| 214/214 [12:10<00:00,  3.41s/it]


Unnamed: 0,@_fa,link,prism:url,dc:identifier,eid,dc:title,dc:creator,prism:publicationName,prism:issn,prism:volume,...,timestamp,prism:doi,pii,affiliation,article-number,prism:eIssn,freetoread.value,freetoreadLabel.value,prism:isbn,pubmed-id
0,true,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...",https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:76049088350,2-s2.0-76049088350,Maximilianus wietrowsky SJ - His fate in the l...,Bočková A.,Listy Filologicke,00244457,132,...,2022-09-08 12:32:23.127589,,,,,,,,,
0,true,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...",https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:85133719027,2-s2.0-85133719027,Grafting of silver nanospheres and nanoplates ...,Reznickova A.,Vacuum,0042207X,203,...,2022-09-08 12:32:27.933472,10.1016/j.vacuum.2022.111268,S0042207X2200392X,"[{'@_fa': 'true', 'affilname': 'University of ...",111268,,,,,
1,true,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...",https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:85124376364,2-s2.0-85124376364,Plasma treatment of PTFE at elevated temperatu...,Reznickova A.,Materials Today Communications,,31,...,2022-09-08 12:32:27.933472,10.1016/j.mtcomm.2022.103254,S2352492822001301,"[{'@_fa': 'true', 'affilname': 'University of ...",103254,23524928,,,,
2,true,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...",https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:85102934268,2-s2.0-85102934268,Grafting of Metal Nanoparticles with Specific ...,Lacmanová V.,Chemicke Listy,00092770,115,...,2022-09-08 12:32:27.933472,,,"[{'@_fa': 'true', 'affilname': 'University of ...",,12137103,,,,
3,true,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...",https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:85094640057,2-s2.0-85094640057,Photochemical preparation of silver colloids i...,Kvitek O.,Coatings,,10,...,2022-09-08 12:32:27.933472,10.3390/coatings10111046,,"[{'@_fa': 'true', 'affilname': 'University of ...",1046,20796412,"[{'$': 'all'}, {'$': 'publisherfullgold'}, {'$...","[{'$': 'All Open Access'}, {'$': 'Gold'}, {'$'...",,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5,true,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...",https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:84955588914,2-s2.0-84955588914,Victims’ Responses to Stalking: An Examination...,Podaná Z.,Journal of Interpersonal Violence,08862605,31,...,2022-09-08 12:44:32.880163,10.1177/0886260514556764,,"[{'@_fa': 'true', 'affilname': 'Charles Univer...",,15526518,,,,25392391
6,true,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...",https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:84874198460,2-s2.0-84874198460,Does Cultural Context Affect the Association B...,Podaná Z.,Journal of Contemporary Criminal Justice,10439862,29,...,2022-09-08 12:44:32.880163,10.1177/1043986212471181,,"[{'@_fa': 'true', 'affilname': 'Filozofická Fa...",,15525406,,,,
7,true,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...",https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:84892200599,2-s2.0-84892200599,Czech Republic,Burianek J.,Juvenile Delinquency in Europe and Beyond: Res...,,,...,2022-09-08 12:44:32.880163,10.1007/978-0-387-95982-5_21,,"[{'@_fa': 'true', 'affilname': 'Charles Univer...",,,,,"[{'@_fa': 'true', '$': '9780387959818'}]",
8,true,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...",https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:79951687221,2-s2.0-79951687221,Reporting to the police as a response to intim...,Podaná Z.,Sociologicky Casopis,00380288,46,...,2022-09-08 12:44:32.880163,,,"[{'@_fa': 'true', 'affilname': 'Charles Univer...",,,,,,


In [16]:
df_author_ids.iloc[0]

@_fa                                                                   true
link                      [{'@_fa': 'true', '@ref': 'self', '@href': 'ht...
prism:url                 https://api.elsevier.com/content/abstract/scop...
dc:identifier                                         SCOPUS_ID:76049088350
eid                                                      2-s2.0-76049088350
dc:title                  Maximilianus wietrowsky SJ - His fate in the l...
dc:creator                                                       Bočková A.
prism:publicationName                                     Listy Filologicke
prism:issn                                                         00244457
prism:volume                                                            132
prism:issueIdentifier                                                   1-2
prism:pageRange                                                     136-165
prism:coverDate                                                  2009-12-01
prism:coverD

In [13]:
df_author_ids.to_csv('output/publications_author_ids.csv')

pd.Series(no_results_author_ids).to_csv('output/no_results_author_ids.csv',index=False)

## Names

In [None]:
def request_scopus_name(firstname, surname, api_key):
    return request_scopus_search(f'auth({surname} {firstname})', api_key).assign(searched_orcid=orcid,timestamp=pd.Timestamp.now())

def search_all_names(names, api_key):
    dfs = []
    no_results = []
    for idx, row in  tqdm(names.iterrows()):
        time.sleep(.5)
        df = request_scopus_name(row.firstname,row.surname, api_key)
        if not df.empty:
            dfs.append(df)
        else:
            no_results.append(author_id)
    
    return pd.concat(dfs), no_results


In [17]:
names = pd.read_excel('input/list_name.xls',header=None).rename({0:'firstname',1:'surname'},axis=1)
names

Unnamed: 0,firstname,surname
0,Abdel-Mohsen,Abdel-Lattif
1,Adam,Konecny
2,Adam,Mackerle
3,Adam,Schrofel
4,Agnieszka,Bielach
...,...,...
1175,Thanh,Nam Phan
1176,Vera,Neuzil Bunesova
1177,Veronika,Gvozdikova Javurkova
1178,Viet,Leu Quoc
