In [23]:
import os 
import asyncio
import aiohttp
import random
from typing import List, Dict
from rich import print as rprint

from dotenv import load_dotenv

load_dotenv()

search_url = "https://api.semanticscholar.org/graph/v1/paper/search"

In [None]:
import requests
import time
import csv

# Constants
SEMANTIC_SCHOLAR_API_URL = "https://api.semanticscholar.org/graph/v1"
SEARCH_ENDPOINT = f"{SEMANTIC_SCHOLAR_API_URL}/paper/search"
PAPER_FIELDS = "title,abstract,authors,citationCount,influentialCitationCount,referenceCount,url,venue,publicationVenue,year,openAccessPdf,journal"

# Parameters
QUERY = "Systemic Lupus Erythematosus"  # Replace with your query
LIMIT = 100  # Number of papers to retrieve per request
OFFSET = 0  # Starting point for retrieval
MAX_RESULTS = 1000  # Maximum number of papers to retrieve
HIGH_IMPACT_JOURNALS_FILE = "high_impact_journals.csv"
MIN_CITATIONS = 50  # Minimum number of citations to consider a paper impactful
PUBLICATION_TYPES = "JournalArticle,Review,CaseReport,ClinicalTrial,Conference"

# Search papers using Semantic Scholar API
def search_papers(query, limit=100, offset=0):
    params = {
        "query": query,
        "limit": limit,
        "offset": offset,
        "fields": PAPER_FIELDS,
        "minCitationCount": MIN_CITATIONS,
        "publicationTypes": PUBLICATION_TYPES
    }
    response = requests.get(SEARCH_ENDPOINT, params=params)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error: {response.status_code} - {response.text}")
        return None


results = search_papers(QUERY)



In [30]:
results['data'][0]

{'paperId': '6019cc5eb993f3490ddac510d0350b287098e7da',
 'publicationVenue': {'id': 'd4a5fe3b-9c8d-4b96-a023-0fe8d2a3156b',
  'name': 'Arthritis & Rheumatism',
  'type': 'journal',
  'alternate_names': ['Arthritis  Rheum'],
  'issn': '0004-3591',
  'url': 'http://www3.interscience.wiley.com/cgi-bin/jhome/76509746',
  'alternate_urls': ['https://onlinelibrary.wiley.com/journal/15290131',
   'http://onlinelibrary.wiley.com/journal/10.1002/(ISSN)2326-5205',
   'http://www3.interscience.wiley.com/cgi-bin/jhome/77005015',
   'http://onlinelibrary.wiley.com/journal/10.1002/(ISSN)2151-4658',
   'http://www3.interscience.wiley.com/cgi-bin/jhome/76509746?CRETRY=1&SRETRY=0',
   'http://onlinelibrary.wiley.com/journal/10.1002/(ISSN)1529-0131',
   'http://www3.interscience.wiley.com/cgi-bin/jtoc/112142621/']},
 'url': 'https://www.semanticscholar.org/paper/6019cc5eb993f3490ddac510d0350b287098e7da',
 'title': 'The 1982 revised criteria for the classification of systemic lupus erythematosus.',
 'abs

In [12]:
[paper['venue'] for paper in results['data']]

['Italian National Conference on Sensors',
 'Nature Machine Intelligence',
 '2023 IEEE International Conference on Integrated Circuits and Communication Systems (ICICACS)',
 'Progress in Biomedical Engineering',
 'Int. J. Intell. Networks',
 'Deep Learning, Machine Learning and IoT in Biomedical and Health Informatics',
 'INTERANTIONAL JOURNAL OF SCIENTIFIC RESEARCH IN ENGINEERING AND MANAGEMENT',
 'Current Genomics',
 'IEEE International Conference on Healthcare Informatics',
 'Intelligent Systems Reference Library',
 'Encyclopedia',
 'International journal  of research and innovation in applied science',
 'BMJ Innovations',
 '2018 Second International Conference on Electronics, Communication and Aerospace Technology (ICECA)',
 'Nature Network Boston',
 'arXiv.org',
 'Smart Manufacturing - When Artificial Intelligence Meets the Internet of Things',
 'Journal of Healthcare Leadership',
 '2022 1st IEEE International Conference on Industrial Electronics: Developments & Applications (ICID

In [None]:
os.environ['SEMANTIC_SCHOLAR_API_KEY']

'JbDRuaZkji5Sca4LmFHCF5xSXzkM3BoR7pyeeB8b'

In [None]:
response = requests

In [None]:
os.environ

In [29]:
import requests
import xml.etree.ElementTree as ET
import json

def get_journal_metrics(issn):
    """
    Retrieves the SJR and SNIP metrics for a journal given its ISSN using the 
    Elsevier Serial Title API.

    Args:
      issn: The ISSN of the journal.

    Returns:
      A dictionary containing the SJR and SNIP values for the journal, or None 
      if the API request fails.
    """

    # Replace with your actual Elsevier API key
    api_key = os.environ['ELSEVIER_API_KEY']
    
    # Construct the API request URL
    base_url = "https://api.elsevier.com/content/serial/title"
    params = {
        "issn": issn,
        "field": "SJR,SNIP",
        "view": "STANDARD",
        "apiKey": api_key,
    }

    try:
        # Send the API request
        response = requests.get(base_url, params=params)
        response.raise_for_status()  # Raise an exception for bad status codes

        # Parse the JSON response
        data = json.loads(response.content)

        # Extract SJR and SNIP values
        entry = data['serial-metadata-response']['entry'][0]
        sjr = {sjr['@year']: sjr['$'] for sjr in entry['SJRList']['SJR']}
        snip = {snip['@year']: snip['$'] for snip in entry['SNIPList']['SNIP']}

        return {
            "SJR": sjr,
            "SNIP": snip
        }

    except requests.exceptions.RequestException as e:
        print(f"API request failed: {e}")
        return None

# Example usage
issn = "0028-0836"  # Example ISSN
metrics = get_journal_metrics(issn)

if metrics:
    print(f"Metrics for ISSN {issn}:")
    print(f"SJR: {metrics['SJR']}")
    print(f"SNIP: {metrics['SNIP']}")

Metrics for ISSN 0028-0836:
SJR: {'2023': '18.509'}
SNIP: {'2023': '10.01'}


In [34]:
import pandas as pd
journals_df = pd.read_csv("journals/scimagojr 2023.csv", sep=";")
journals_df

Unnamed: 0,Rank,Sourceid,Title,Type,Issn,SJR,SJR Best Quartile,H index,Total Docs. (2023),Total Docs. (3years),...,Ref. / Doc.,%Female,Overton,SDG,Country,Region,Publisher,Coverage,Categories,Areas
0,1,28773,Ca-A Cancer Journal for Clinicians,journal,"15424863, 00079235",106094,Q1,211,49,124,...,9886,4395,2,35,United States,Northern America,Wiley-Blackwell,1950-2023,Hematology (Q1); Oncology (Q1),Medicine
1,2,19300156903,Foundations and Trends in Machine Learning,journal,"19358245, 19358237",37044,Q1,39,3,13,...,29900,2778,0,0,United States,Northern America,Now Publishers Inc,2008-2023,Artificial Intelligence (Q1); Human-Computer I...,Computer Science
2,3,20315,Nature Reviews Molecular Cell Biology,journal,"14710072, 14710080",35910,Q1,508,123,336,...,9319,2941,1,20,United Kingdom,Western Europe,Nature Publishing Group,2000-2023,Cell Biology (Q1); Molecular Biology (Q1),"Biochemistry, Genetics and Molecular Biology"
3,4,29431,Quarterly Journal of Economics,journal,"00335533, 15314650",30448,Q1,306,47,136,...,7755,2667,35,22,United Kingdom,Western Europe,Oxford University Press,1886-2023,Economics and Econometrics (Q1),"Economics, Econometrics and Finance"
4,5,12464,Nature Reviews Cancer,journal,"1474175X, 14741768",26837,Q1,505,105,304,...,10290,4433,1,59,United Kingdom,Western Europe,Nature Publishing Group,2001-2023,Cancer Research (Q1); Oncology (Q1),"Biochemistry, Genetics and Molecular Biology; ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29160,29161,145515,Waves in Random and Complex Media (discontinued),journal,"17455049, 17455030",,-,61,44,1397,...,4059,2054,0,2,United Kingdom,Western Europe,Taylor and Francis Ltd.,2005-2023,Engineering (miscellaneous); Physics and Astro...,Engineering; Physics and Astronomy
29161,29162,17543,Wireless Communications and Mobile Computing (...,journal,"15308677, 15308669",,-,81,145,4066,...,3190,2940,0,24,United Kingdom,Western Europe,Hindawi Limited,2001-2023,Computer Networks and Communications; Electric...,Computer Science; Engineering
29162,29163,21100874917,World Journal of Clinical Cases (discontinued),journal,23078960,,-,30,106,3584,...,3736,4000,1,44,China,Asiatic Region,Baishideng Publishing Group,2018-2023,Medicine (miscellaneous),Medicine
29163,29164,21100897940,WSQ,journal,"19341520, 07321562",,-,8,34,0,...,1179,7500,0,3,United States,Northern America,Feminist Press at CUNY,2018-2019,Gender Studies,Social Sciences


In [None]:
journals_df = journals_df[['Sourceid', 'Title', 'Type', 'Issn', 'SJR', 'SJR Best Quartile', 'H index']]

In [45]:
journals_df[['Issn1', 'Issn2']] = journals_df['Issn'].str.split(", ", expand=True)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  journals_df[['Issn1', 'Issn2']] = journals_df['Issn'].str.split(", ", expand=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  journals_df[['Issn1', 'Issn2']] = journals_df['Issn'].str.split(", ", expand=True)


In [51]:
journals_df['SJR'] = journals_df['SJR'].str.replace(',', '.').astype(float)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  journals_df['SJR'] = journals_df['SJR'].str.replace(',', '.').astype(float)


In [52]:
journals_df

Unnamed: 0,Sourceid,Title,Type,Issn,SJR,SJR Best Quartile,H index,Issn1,Issn2
0,28773,Ca-A Cancer Journal for Clinicians,journal,"15424863, 00079235",106.094,Q1,211,15424863,00079235
1,19300156903,Foundations and Trends in Machine Learning,journal,"19358245, 19358237",37.044,Q1,39,19358245,19358237
2,20315,Nature Reviews Molecular Cell Biology,journal,"14710072, 14710080",35.910,Q1,508,14710072,14710080
3,29431,Quarterly Journal of Economics,journal,"00335533, 15314650",30.448,Q1,306,00335533,15314650
4,12464,Nature Reviews Cancer,journal,"1474175X, 14741768",26.837,Q1,505,1474175X,14741768
...,...,...,...,...,...,...,...,...,...
29160,145515,Waves in Random and Complex Media (discontinued),journal,"17455049, 17455030",,-,61,17455049,17455030
29161,17543,Wireless Communications and Mobile Computing (...,journal,"15308677, 15308669",,-,81,15308677,15308669
29162,21100874917,World Journal of Clinical Cases (discontinued),journal,23078960,,-,30,23078960,
29163,21100897940,WSQ,journal,"19341520, 07321562",,-,8,19341520,07321562


In [62]:
len(results['data'])

100

In [67]:
journals_df.to_csv("journals_df.csv")

In [70]:
journals_df = pd.read_csv("journals_df.csv", index_col=0)
journals_df

Unnamed: 0,Sourceid,Title,Type,Issn,SJR,SJR Best Quartile,H index,Issn1,Issn2
0,28773,Ca-A Cancer Journal for Clinicians,journal,"15424863, 00079235",106.094,Q1,211,15424863,00079235
1,19300156903,Foundations and Trends in Machine Learning,journal,"19358245, 19358237",37.044,Q1,39,19358245,19358237
2,20315,Nature Reviews Molecular Cell Biology,journal,"14710072, 14710080",35.910,Q1,508,14710072,14710080
3,29431,Quarterly Journal of Economics,journal,"00335533, 15314650",30.448,Q1,306,00335533,15314650
4,12464,Nature Reviews Cancer,journal,"1474175X, 14741768",26.837,Q1,505,1474175X,14741768
...,...,...,...,...,...,...,...,...,...
29160,145515,Waves in Random and Complex Media (discontinued),journal,"17455049, 17455030",,-,61,17455049,17455030
29161,17543,Wireless Communications and Mobile Computing (...,journal,"15308677, 15308669",,-,81,15308677,15308669
29162,21100874917,World Journal of Clinical Cases (discontinued),journal,23078960,,-,30,23078960,
29163,21100897940,WSQ,journal,"19341520, 07321562",,-,8,19341520,07321562


In [76]:
filtered_results = []

for paper in results['data']:
    try:
        journal = paper['publicationVenue']['name']
        issn = paper['publicationVenue']['issn']
        # print(journal)
        issn_reformat = issn.replace("-", "")
        # print(issn_reformat)
        matching_rows = journals_df[(journals_df['Issn1'] == issn_reformat) | 
                            (journals_df['Issn2'] == issn_reformat)]
        
        if matching_rows.shape[0] > 0:
            SJR = matching_rows.iloc[0]['SJR']
            H_index = matching_rows.iloc[0]['H index']
            # display(matching_rows)
            paper['publicationVenue']['SJR'] = SJR
            paper['publicationVenue']['H_Index'] = H_index
            if SJR > 1.0:
                filtered_results.append(paper)
    
    except Exception as e:
        print("ERROR: ", e)

len(filtered_results)

ERROR:  'NoneType' object is not subscriptable
ERROR:  'NoneType' object is not subscriptable


72

84