In [1]:
import pandas as pd
import bibtexparser

In [2]:
info = ['Title', 'Authors', 'Keywords', 'Abstract', 'Citations', 'Year', 'Publisher', 'DOI']

In [3]:
info_dict = {
     'IEEE Xplore': {
         'Title':     'Document Title',
         'Authors':   'Authors',
         'Keywords':  'Author Keywords',
         'Abstract':  'Abstract',
         'Citations': 'Article Citation Count',
         'Year':      'Publication Year',
         'Publisher': 'Publication Title',
         'DOI':       'DOI'
        },
     'Web of Science':{
         'Title':     'Article Title',
         'Authors':   'Authors',
         'Keywords':  'Author Keywords',
         'Abstract':  'Abstract',
         'Citations': 'Times Cited, All Databases',
         'Year':      'Publication Year',
         'Publisher': 'Source Title',
         'DOI':       'DOI'
     },
     'Scopus':{
         'Title':     'Title',
         'Authors':   'Authors',
         'Keywords':  'Author Keywords',
         'Abstract':  'Abstract',
         'Citations': 'Cited by',
         'Year':      'Year',
         'Publisher': 'Source title',
         'DOI':       'DOI'
     },
     'ACM DL': {
         'Title':     'title',
         'Authors':   'author',
         'Keywords':  'keywords',
         'Abstract':  'abstract',
         'Year':      'year',
         'Publisher': 'publisher',
         'DOI':       'doi'
     }
}

In [4]:
df = pd.read_csv('../Search Results/IEEE Xplore.csv', usecols=list(info_dict['IEEE Xplore'].values()))
df = df.rename(columns={v: k for k, v in info_dict['IEEE Xplore'].items()})
df = df[df['DOI'].notna()]

In [5]:
for i in range(1,2):
    web_df = pd.read_excel(f"../Search Results/Web of Science - {i}.xls", 
                           usecols=list(info_dict['Web of Science'].values()))
    web_df = web_df.rename(columns={v: k for k, v in info_dict['Web of Science'].items()})
    web_df = web_df[web_df['DOI'].notna()]
    
    df = pd.concat([df, web_df])
    
print(f"Total papers: {df.shape[0]}")
print(f"Total unique papers: {len(df['DOI'].unique())}")

Total papers: 1260
Total unique papers: 1092


In [6]:
sco_df = pd.read_csv(f"../Search Results/Scopus.csv", usecols=list(info_dict['Scopus'].values()))
sco_df = sco_df.rename(columns={v: k for k, v in info_dict['Scopus'].items()})
sco_df = sco_df[sco_df['DOI'].notna()]

df = pd.concat([df, sco_df])
    
print(f"Total papers: {df.shape[0]}")
print(f"Total unique papers: {len(df['DOI'].unique())}")

Total papers: 2261
Total unique papers: 1479


In [7]:
for i in range(1,6):
    with open(f"../Search Results/ACM DL - {i}.bib") as bibtex_file:
        bib_database = bibtexparser.load(bibtex_file)

    acm_df = pd.DataFrame(bib_database.entries)
    acm_df = acm_df[list(info_dict['ACM DL'].values())]
    acm_df = acm_df.rename(columns={v: k for k, v in info_dict['ACM DL'].items()})
    acm_df = acm_df[acm_df['DOI'].notna()]

    df = pd.concat([df, acm_df])
    
print(f"Total papers: {df.shape[0]}")
print(f"Total unique papers: {len(df['DOI'].unique())}")

Total papers: 2481
Total unique papers: 1596


In [8]:
df = df[~df[['DOI']].duplicated()]
df.reset_index(drop=True, inplace=True)
df

Unnamed: 0,Title,Authors,Publisher,Year,Abstract,DOI,Keywords,Citations
0,Pareto-Based Multiobjective Machine Learning: ...,Y. Jin; B. Sendhoff,"IEEE Transactions on Systems, Man, and Cyberne...",2008.0,Machine learning is inherently a multiobjectiv...,10.1109/TSMCC.2008.919172,Ensemble;evolutionary multiobjective optimizat...,217.0
1,Multiobjective Evolutionary Data Mining for Pe...,Y. Nojima; Y. Tanigaki; N. Masuyama; H. Ishibuchi,"2018 IEEE International Conference on Systems,...",2018.0,"In recent years, evolutionary multiobjective o...",10.1109/SMC.2018.00135,Evolutionary multiobjective optimization;data ...,
2,Single-Objective/Multiobjective Cat Swarm Opti...,D. Yan; H. Cao; Y. Yu; Y. Wang; X. Yu,IEEE Transactions on Automation Science and En...,2020.0,This article proposes single-objective/multiob...,10.1109/TASE.2020.2969485,Clustering analysis;data partition;quantum mod...,9.0
3,Deep-Learning-Aided Packet Routing in Aeronaut...,D. Liu; J. Zhang; J. Cui; S. -X. Ng; R. G. Mau...,IEEE Internet of Things Journal,2022.0,Data packet routing in aeronautical <italic>ad...,10.1109/JIOT.2021.3105357,Aeronautical ad hoc network (AANET);deep learn...,
4,Rotation effects of objective functions in par...,Y. Takahashi; Y. Nojima; H. Ishibuchi,2015 10th Asian Control Conference (ASCC),2015.0,Fuzzy genetics-based machine learning (FGBML) ...,10.1109/ASCC.2015.7244890,Fuzzy genetics-based machine learning;parallel...,1.0
...,...,...,...,...,...,...,...,...
1591,Embedding Temporal Convolutional Networks for ...,"Burrello, Alessio and Pagliari, Daniele Jahier...",Association for Computing Machinery,2022,Photoplethysmography (PPG) sensors allow for n...,10.1145/3487910,"wearable devices, medical IoT, heart rate moni...",
1592,A Probabilistic Graphical Model-Based Approach...,"Mishra, Nikita and Zhang, Huazhe and Lafferty,...",Association for Computing Machinery,2015,"In many deployments, computer systems are unde...",10.1145/2786763.2694373,probabilistic graphical models,
1593,Deep Room Recognition Using Inaudible Echos,"Song, Qun and Gu, Chaojie and Tan, Rui",Association for Computing Machinery,2018,Recent years have seen the increasing need of ...,10.1145/3264945,"smartphone, Room recognition, inaudible sound",
1594,Online Feature Elicitation in Interactive Opti...,"Boutilier, Craig and Regan, Kevin and Viappian...",Association for Computing Machinery,2009,Most models of utility elicitation in decision...,10.1145/1553374.1553384,,


In [9]:
df['Remove - Title'] = None
df.to_csv('../Data/Papers - Initial.csv', index=False)