## Query

In [4]:
query = '((AF-ID(60008734)) OR (AF-ID(60001997))) AND (TITLE(review* OR meta-anal* OR literature OR guideline* OR evidence-synth*)) AND  PUBYEAR > 2017 AND PUBYEAR < 2023'

## Scopus search

In [8]:
from pybliometrics.scopus import ScopusSearch
import pandas as pd

In [6]:
s = ScopusSearch(query, refresh=30, download=True, verbose=True, subscriber=True, view="COMPLETE")
# COMPLETE view for author information
# refresh=n will download contents if previous download exceeds n days

Downloading results for query "((AF-ID(60008734)) OR (AF-ID(60001997))) AND (TITLE(review* OR meta-anal* OR literature OR guideline* OR evidence-synth*)) AND  PUBYEAR > 2017 AND PUBYEAR < 2023":


100%|██████████████████████████████████████████| 97/97 [01:41<00:00,  1.05s/it]


In [9]:
data = pd.DataFrame(s.results)

In [10]:
data.to_csv(f'data/project_draagvlak/scopus.csv')

## Filter

- first or last author with VU or VUmc affiliation

In [11]:
affiliations = ['60008734', '60001997']

In [19]:
def get_pos(x: list):
    p = [0]*len(x); p[0] = p[-1] = 1
    return p

In [40]:
data_filt = data.copy()

data_filt = data_filt[~pd.isna(data_filt['author_afids'])]  # no authors -> no first or last
data_filt['author_afids2'] = data_filt['author_afids'].apply(lambda x: x.split(';'))  # create a list of authors

# create a list of same length and assign 1 to first and last position
data_filt['position'] = data_filt['author_afids2'].apply(get_pos)
data_filt = data_filt.explode(['author_afids2', 'position'])  # authors to rows

# authors can have multiple afids so create a second list
data_filt = data_filt[data_filt['author_afids2'] != '']  # no afid
data_filt['author_afids2'] = data_filt['author_afids2'].apply(lambda x: x.split('-'))
data_filt = data_filt.explode('author_afids2')  # authors-afids to rows

# only keep authors from our affiliations
data_filt = data_filt[data_filt.author_afids2.isin(affiliations)]

# keep publications with first or last author: sum() > 0
data_filt['position_sum'] = data_filt.groupby('eid').position.transform('sum')
data_filt = data_filt[data_filt.position_sum > 0]

# de-duplicate (author -> publication level)
data_filt = data_filt.drop_duplicates('eid')

# drop used columns and save
data_filt = data_filt.drop(columns=['author_afids2', 'position', 'position_sum'])
data_filt.to_csv('data/project_draagvlak/data_filt.csv', index=False)