In [24]:
import pandas as pd
from tqdm import tqdm
import time
import requests

from semanticscholar import SemanticScholar

sch = SemanticScholar(timeout=2)

In [2]:
papers_df = pd.read_csv('../Data/Papers - After Content Filter.csv')
influent_list = []

In [3]:
papers_df['SSCitations'] = 0
c = 0
for index, row in tqdm(papers_df.iterrows(), total=papers_df.shape[0]):
    paper = sch.paper(row['DOI'])

    papers_df.loc[index,'SSCitations'] = len(paper['citations'])

    infl_count = 0
    for ref in paper['references']:
        if ref['isInfluential'] and ref['doi'] is not None:
            influent_list.append({'Orig DOI': row['DOI'],'Ref DOI': ref['doi'], 'Ref Title': ref['title']})
            infl_count += 1
    
    c+=1
    if c%50==0:
        print('Waiting 150 seconds...')
        time.sleep(150)


 34%|███▍      | 49/145 [00:42<01:29,  1.07it/s]

Waiting 150 seconds...


 68%|██████▊   | 99/145 [03:53<00:40,  1.12it/s]  

Waiting 150 seconds...


100%|██████████| 145/145 [06:58<00:00,  2.89s/it]


In [4]:
influent_df = pd.DataFrame(influent_list)

print(f"Total Influential References: {influent_df.shape[0]}")
print(f"Unique Influential References: {len(influent_df['Ref DOI'].unique())}")

Total Influential References: 242
Unique Influential References: 207


In [5]:
influent_df

Unnamed: 0,Orig DOI,Ref DOI,Ref Title
0,10.1007/978-3-540-24581-0_47,10.1109/4235.887237,Evolutionary ensembles with negative correlati...
1,10.1007/978-3-540-24581-0_47,10.1080/00401706.1995.10484383,"Machine Learning, Neural and Statistical Class..."
2,10.1109/TEVC.2005.844158,10.1016/b978-1-55860-377-6.50046-3,Error-Correcting Output Coding Corrects Bias a...
3,10.1109/TEVC.2005.844158,10.1023/A:1022648800760,The Strength of Weak Learnability
4,10.1109/TEVC.2005.844158,10.1023/A:1007659514849,MultiBoosting: A Technique for Combining Boost...
...,...,...,...
237,10.1007/s10618-021-00781-5,10.1007/s10115-011-0400-x,Early classification on time series
238,10.1002/int.22354,10.1145/3287560.3287589,A comparative study of fairness-enhancing inte...
239,10.1016/j.cor.2021.105676,10.1109/5.726791,Gradient-based learning applied to document re...
240,10.1016/j.cor.2021.105676,10.1007/S10479-021-04033-Z,The stochastic multi-gradient algorithm for mu...


In [7]:
papers_df.to_csv('../Data/Papers - After Content Filter with SSCitations.csv')

In [26]:
sch = SemanticScholar(timeout=5)

df = influent_df.drop('Orig DOI', axis=1)
df = df[df.duplicated(keep='first')==False]

ref_list = []

c = 0
for index, row in tqdm(df.iterrows(), total=df.shape[0]):
    try:
        paper = sch.paper(row['Ref DOI'])
    except requests.exceptions.ReadTimeout:
        print(f"ReadTimeout occured at id {index}")
        continue

    try:
        ref_list.append({'Title': paper['title'],
                        'DOI': paper['doi'], 
                        'Topics': paper['topics'],
                        'Venue': paper['venue'],
                        'Year': paper['year'],
                        'Abstract': paper['abstract']})
    except:
        print(f"Error in {index}")
    
    c+=1
    if c%50==0:
        print('Waiting 150 seconds...')
        time.sleep(150)


 24%|██▎       | 49/207 [01:53<03:46,  1.43s/it]

Waiting 150 seconds...


 27%|██▋       | 56/207 [04:45<20:10,  8.01s/it]  

ReadTimeout occured at id 60


 47%|████▋     | 98/207 [06:36<02:36,  1.44s/it]

Error in 111


 48%|████▊     | 100/207 [06:39<02:56,  1.65s/it]

Waiting 150 seconds...


 54%|█████▎    | 111/207 [09:27<04:38,  2.90s/it]  

Error in 129


 70%|██████▉   | 144/207 [10:47<01:19,  1.26s/it]

Error in 165


 72%|███████▏  | 150/207 [10:56<02:00,  2.12s/it]

Waiting 150 seconds...


 97%|█████████▋| 200/207 [15:06<00:06,  1.08it/s]

Waiting 150 seconds...


100%|██████████| 207/207 [17:43<00:00,  5.14s/it]


In [27]:
ref_df = pd.DataFrame(ref_list)
ref_df

Unnamed: 0,Title,DOI,Topics,Venue,Year,Abstract
0,Evolutionary ensembles with negative correlati...,10.1109/4235.887237,"[{'topic': 'Neural Network Simulation', 'topic...",IEEE Trans. Evol. Comput.,2000,Based on negative correlation learning and evo...
1,"Machine Learning, Neural and Statistical Class...",10.1080/00401706.1995.10484383,[],,1995,
2,Error-Correcting Output Coding Corrects Bias a...,10.1016/b978-1-55860-377-6.50046-3,"[{'topic': 'Supervised learning', 'topicId': '...",ICML,1995,
3,The strength of weak learnability,10.1023/A:1022648800760,"[{'topic': 'Learnability', 'topicId': '4569', ...",30th Annual Symposium on Foundations of Comput...,1989,
4,MultiBoosting: A Technique for Combining Boost...,10.1023/A:1007659514849,"[{'topic': 'AdaBoost', 'topicId': '33311', 'ur...",Machine Learning,2000,
...,...,...,...,...,...,...
198,Early classification of time series using mult...,10.1016/J.INS.2019.04.024,"[{'topic': 'Mathematical optimization', 'topic...",Inf. Sci.,2019,
199,Early classification on time series,10.1007/s10115-011-0400-x,"[{'topic': 'Time series', 'topicId': '1293', '...",Knowledge and Information Systems,2012,
200,A comparative study of fairness-enhancing inte...,10.1145/3287560.3287589,"[{'topic': 'Fairness measure', 'topicId': '923...",FAT,2018,Computers are increasingly used to make decisi...
201,The stochastic multi-gradient algorithm for mu...,10.1007/S10479-021-04033-Z,"[{'topic': 'Algorithm', 'topicId': '305', 'url...",ArXiv,2019,


In [28]:
ref_df.to_csv('../Data/Influential papers.csv')