In [3]:
import numpy as np
import pandas as pd
import string
import re
import pickle

import pybliometrics
from pybliometrics.scopus import AuthorRetrieval
import elsapy_utils as ep
import json
import requests

import doctest

import spacy
from spacy.lang.en import English
from nltk.probability import FreqDist
from nltk.corpus import stopwords
from nltk import sent_tokenize
from nltk.tokenize import word_tokenize
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.tag import pos_tag

from nltk.metrics import BigramAssocMeasures
from nltk.metrics import TrigramAssocMeasures
from nltk.collocations import BigramCollocationFinder
from nltk.collocations import TrigramCollocationFinder

from sklearn.preprocessing import MultiLabelBinarizer

import plotly.express as px

In [57]:
def trough(position: int, length: int, b = 4) -> np.ndarray:
    """ This implements the custom author weight function.
        The fundamental assumption is that authors at the
        beginning and end of the author list contribute 
        more to a publication than middle authors.
        
        args:
        -----
        position - where the author is located in the authorship list
        length   - total number of authors in the authorship list
        b        - parameter for the slope of the 'trough' in the trough function
        
        returns:
        --------
        author weight according to the trough function
    """
    
    def sigmoid(x: float) -> float:
        """ The sigmoid/logistic function. """
        return 1/(1 + np.exp(-x))
    
    c = (length-1)/length
    
    return 1-c*(sigmoid(b*(position - 1)) - sigmoid(b*(position - length + 2)))

def convert_eu_to_float(x) -> float:
    """ Convert number of the form 0,xx to 0.xx 
    
        args:
        -----
        :x - str or float
        
        returns:
        --------
        decimal form of x
        
        :raises ValueError: if x is not in the form x.xx, 'x.xx', or 'x,xx' 
        :raises AttributeError: if x is not str, int, or float
        
        Examples:
        ---------
        >>> convert_eu_to_float('7,89')
        7.89
        >>> convert_eu_to_float(4.5)
        4.5
        >>> convert_eu_to_float('9.99')
        9.99
        >>> convert_eu_to_float(4)
        4.0
    """
    
    if isinstance(x, float) or isinstance(x, int):
        return float(x)
    
    return float(x.replace(',', '.'))
    
def count_affils(affils: list) -> int:
    """ Accepts a list of affiliations and returns the length. 
        Logic is included for where the list is empty.
    """
    
    if isinstance(affils, float):
        return 0
    else:
        return len(affils)

def publication_metrics(profiles):
    """ Generate df to hold all publication metrics for each author 
    
        args:
        -----
        profiles - dict of {auid: AuthorRetrieval object}
        
        returns:
        --------
        dict of the form
                 auid: [
                     indexed_name,
                     affiliation_current,
                     affiliation_history,
                     alias,
                     citation_count,
                     cited_by_count,
                     coauthor_count,
                     classificationgroup,
                     document_count,
                     h_index,
                     orcid,
                     publication_range,
                     subject_areas
                 ]
    """
    
    return {
        author: [
            profiles[author].indexed_name,
            profiles[author].affiliation_current,
            profiles[author].affiliation_history,
            profiles[author].alias,
            profiles[author].citation_count,
            profiles[author].cited_by_count,
            profiles[author].coauthor_count,
            profiles[author].classificationgroup,
            profiles[author].document_count,
            profiles[author].h_index,
            profiles[author].orcid,
            profiles[author].publication_range,
            profiles[author].subject_areas,
        ] for author in profiles
    }

In [59]:
doctest.testmod(verbose = True)

Trying:
    convert_eu_to_float('7,89')
Expecting:
    7.89
ok
Trying:
    convert_eu_to_float(4.5)
Expecting:
    4.5
ok
Trying:
    convert_eu_to_float('9.99')
Expecting:
    9.99
ok
Trying:
    convert_eu_to_float(4)
Expecting:
    4.0
ok
5 items had no tests:
    __main__
    __main__.check_str
    __main__.count_affils
    __main__.publication_metrics
    __main__.trough
1 items passed all tests:
   4 tests in __main__.convert_eu_to_float
4 tests in 6 items.
4 passed and 0 failed.
Test passed.


TestResults(failed=0, attempted=4)