### Documentation here https://dev.elsevier.com/documentation/ArticleRetrievalAPI.wadl

In [None]:
# https://api.elsevier.com/content/article/doi/{doi}

In [2]:
import pandas as pd
import requests
import os
import xmltodict
import json
from credentials import keys

In [6]:
#read the dataset
df = pd.read_csv("../csv/02_papers_with_abstracts_removed_duplicates_not_filtered_yet.csv")


In [4]:
# This function downloads the full text from the doi number and save it in a folder called "full_texts" as a json object

def downloadFullText(doi, id):
  try:
    # Split the DOI into segments
    doi_segments = doi.split("/")

    # Create the full folder path, including subfolders
    folder_path = os.path.join("./full_texts", *doi_segments)

    # Create the folders if they don't exist
    os.makedirs(folder_path, exist_ok=True)

    # Send the request to the Elsevier API
    response = requests.get(f"https://api.elsevier.com/content/article/doi/{doi}", headers={
      "X-ELS-APIKey": keys["els-apikey"],  #save the keys in a new python file called credentials.py that has a dict variable called  keys  = {"key": "value"}
        "X-ELS-Insttoken": keys["els-inst-token"],
        "Accept" : "application/json"
    })

    # Check the status code of the response
    if response.status_code == 200:
      json_data = response.json()
      file_name = doi.replace("/", "_")
      file_path = os.path.join(folder_path, f"{file_name}.json")  # Use .json extension
      with open(file_path, 'w') as f:
        f.write(json.dumps(json_data, indent=4))  # Indent for readability

    else:
      # If the request was not successful, print an error message
      print(f"Error retrieving article: {response.text}")
  except Exception as e:
    print(id, str(e))

In [6]:
# This function downloads the full text from the doi number and save it in a folder called "full_texts" as a json object

def topicSearch(topicId, id):
  try:
    # Split the DOI into segments
    doi_segments = f"{topicId}/{id}"

    # Create the full folder path, including subfolders
    folder_path = os.path.join("./topic_full_texts", *doi_segments)

    # Create the folders if they don't exist
    os.makedirs(folder_path, exist_ok=True)

    # Send the request to the Elsevier API
    response = requests.get(f"https://api.elsevier.com/analytics/scival/topic/{topicId}", headers={
      "X-ELS-APIKey": keys["els-apikey"],  #save the keys in a new python file called credentials.py that has a dict variable called  keys  = {"key": "value"}
        "X-ELS-Insttoken": keys["els-inst-token"],
        "Accept" : "application/json"
    })

    # Check the status code of the response
    if response.status_code == 200:
      json_data = response.json()
      file_name = doi_segments.replace("/", "_")
      file_path = os.path.join(folder_path, f"{file_name}.json")  # Use .json extension
      with open(file_path, 'w') as f:
        f.write(json.dumps(json_data, indent=4))  # Indent for readability

    else:
      # If the request was not successful, print an error message
      print(f"Error retrieving article: {response.text}")
  except Exception as e:
    print(id, str(e))

In [7]:
for i in range(2):
    print(str(i), end="\r")
    with open("last_rec", 'w') as f:
        f.write(str(i))
    topicSearch("digital-twin", i)

0 [Errno 30] Read-only file system: '/0'
1 [Errno 30] Read-only file system: '/1'


In [7]:
for i in range(2):
    print(str(i)+ "/" + str(len(df)), end="\r")
    with open("last_rec", 'w') as f:
        f.write(str(i))
    doi = df.loc[i]["dc:identifier"].replace("DOI:", "")
    downloadFullText(doi, i)

0/15353

In [8]:
for i in range(1):
    try:
        print(str(i)+ "/" + str(len(df)), end="\r")
        with open("last_rec", 'w') as f:
            f.write(str(i))
        doi = str(df.loc[i]["dc:identifier"]).replace("DOI:", "")
        downloadFullText(doi, i)
    except:
        print(i, )
        pass

0/15353