In [1]:
import requests
import pandas as pd
import re

In [2]:
def extract_doi(doi_str):
    """
    Given a string such as 
      "omid:br/0627021793 doi:10.1016/j.neucom.2006.0..."
    return the substring that comes after 'doi:'.
    """
    # The regex looks for "doi:" followed by non-whitespace characters.
    match = re.search(r'doi:(\S+)', doi_str)
    if match:
        return match.group(1)
    return doi_str  # Fallback if "doi:" not found

def get_references(doi):
    """
    Retrieves outgoing references for the given DOI by calling the OpenCitations
    /references/{id} endpoint. It cleans the 'cited' field to extract only the DOI.
    """
    url = f"https://opencitations.net/index/api/v2/references/doi:{doi}"
    response = requests.get(url)
    edges = []
    if response.status_code == 200:
        data = response.json()
        for record in data:
            # The API field 'cited' might come with extra prefix text.
            cited_raw = record.get("cited")
            if cited_raw:
                cleaned_doi = extract_doi(cited_raw)
                edges.append({
                    "origin_doi": doi,
                    "target_doi": cleaned_doi
                })
    else:
        print(f"Error {response.status_code} when fetching references for {doi}")
    return edges

def get_citations(doi):
    """
    Retrieves incoming citations for the given DOI by calling the OpenCitations
    /citations/{id} endpoint. It cleans the 'citing' field to extract only the DOI.
    """
    url = f"https://opencitations.net/index/api/v2/citations/doi:{doi}"
    response = requests.get(url)
    edges = []
    if response.status_code == 200:
        data = response.json()
        for record in data:
            # The API field 'citing' might have extra information; clean it.
            citing_raw = record.get("citing")
            if citing_raw:
                cleaned_doi = extract_doi(citing_raw)
                edges.append({
                    "origin_doi": cleaned_doi,  # The citing work
                    "target_doi": doi           # Our input DOI is being cited
                })
    else:
        print(f"Error {response.status_code} when fetching citations for {doi}")
    return edges


In [3]:
# Example DOI â€“ replace with your actual DOI string.
doi_input = "10.1016/j.neucom.2018.10.063"

# Get outgoing and incoming edges.
outgoing_edges = get_references(doi_input)
incoming_edges = get_citations(doi_input)

# Combine both lists.
all_edges = outgoing_edges + incoming_edges

# Build the DataFrame and drop any duplicate rows.
df_citations = pd.DataFrame(all_edges).drop_duplicates()

In [4]:
df_citations

Unnamed: 0,origin_doi,target_doi
0,10.1016/j.neucom.2018.10.063,10.1109/tsmcb.2011.2168604
1,10.1016/j.neucom.2018.10.063,10.1016/j.neunet.2014.10.001
2,10.1016/j.neucom.2018.10.063,10.1109/ijcnn.2017.7966217
3,10.1016/j.neucom.2018.10.063,10.1155/2016/3049632
4,10.1016/j.neucom.2018.10.063,10.1364/josaa.2.001160
5,10.1016/j.neucom.2018.10.063,10.1007/s11045-016-0414-3
6,10.1016/j.neucom.2018.10.063,10.1109/tnnls.2014.2335212
7,10.1016/j.neucom.2018.10.063,10.1109/tip.2015.2475625
8,10.1016/j.neucom.2018.10.063,10.1109/robomech.2017.8261132
9,10.1016/j.neucom.2018.10.063,10.1016/j.neucom.2006.02.013
