## Code to get most influential references or citations
- Documentation of API: https://api.semanticscholar.org/api-docs/#tag/Author-Data/operation/post_graph_get_authors

In [2]:
import requests
import time

In [3]:
paper_id = "b9b220b485d2add79118ffdc2aaa148b67fa53ef" 


url_string = f"https://api.semanticscholar.org/graph/v1/paper/{paper_id}/citations?fields=title,authors,isInfluential"


# function to load the json file that is on the url
def load_json_from_url(url):
    response = requests.get(url)
    data = response.json()
    return data

data = load_json_from_url(url_string)

In [4]:

def get_paper_refs_or_cits(paper_id, fields="title,authors,isInfluential", limit=100, use_references=True):
    """
    Fetch all references for a paper, handling pagination.
    
    Args:
        paper_id: The Semantic Scholar paper ID
        fields: Comma-separated fields to include in the response
        limit: Number of items to fetch per request
        
    Returns:
        Tuple of (all_references, influential_references)
    """

    suffix = "references" if use_references else "citations"
    base_url = f"https://api.semanticscholar.org/graph/v1/paper/{paper_id}/{suffix}"
    url = f"{base_url}?fields={fields}&limit={limit}"
    
    all_references = []
    offset = 0
    
    while True:
        # Add offset to URL if not first request
        paginated_url = f"{url}&offset={offset}" if offset > 0 else url
        
        try:
            response = requests.get(paginated_url)
            response.raise_for_status()  # Raise exception for HTTP errors
            data = response.json()
            
            # Add fetched references to our collection
            references = data.get('data', [])
            all_references.extend(references)
            
            # Check if there are more references to fetch
            if 'next' in data:
                offset = data['next']
                # Add a small delay to avoid hitting rate limits
                time.sleep(0.5)
            else:
                # No more pages, we're done
                break
                
        except requests.exceptions.RequestException as e:
            print(f"Error fetching references: {e}")
            break
    
    # Filter for influential references
    influential_references = [ref for ref in all_references if ref.get('isInfluential', False)]
    

    print(f"Total references: {len(all_references)}")
    print(f"Influential references: {len(influential_references)}")
    return all_references, influential_references

# Example usage
# if __name__ == "__main__":
# use_references = False

# paper_id = "b9b220b485d2add79118ffdc2aaa148b67fa53ef"
# all_refs, influential_refs = get_paper_refs_or_cits(
#     paper_id,
#     fields="title,authors,isInfluential,url",
#     use_references=use_references
# )

# print(f"Total references: {len(all_refs)}")
# print(f"Influential references: {len(influential_refs)}")

In [29]:
def print_influential_references(influential_refs, use_references=False, limit=5):
    """
    Prints the first few influential references or citations.

    Args:
        influential_refs: List of influential references or citations.
        use_references: Boolean indicating whether to use references or citations.
        limit: Number of influential references to print.
    """
    for i, ref in enumerate(influential_refs[:limit]):
        print(f"\nInfluential Reference {i+1}:")
        if use_references:
            paper = ref.get('citedPaper', {})
        else:
            paper = ref.get('citingPaper', {})

        print(f"Title: {paper.get('title')} - (year: {paper.get('year', 'N/A')})")
        print(f"Authors: {', '.join(a.get('name', '') for a in paper.get('authors', []))}")
        print(f"URL: {paper.get('url')}")

In [6]:
paper_id = "b9b220b485d2add79118ffdc2aaa148b67fa53ef"

# Machiavelli paper
paper_id = "5da2d404d789aeff266b63a760d07fe8bc31ba23"

# Reward Rational Implicit Choice
paper_id = "1f52deff193c7c3dfc77c48cbdc653c94f093a92"

In [15]:
use_references = False  # If False, it will fetch citations, otherwise references

all_refs, influential_refs = get_paper_refs_or_cits(
    paper_id,
    fields="title,authors,isInfluential,url,contexts,intents,year",
    use_references=use_references
)

Total references: 184
Influential references: 15


In [30]:


# Example usage
print_influential_references(influential_refs, use_references=use_references, limit=100)


Influential Reference 1:
Title: Learning from Preferences and Mixed Demonstrations in General Settings - (year: 2025)
Authors: Jason R Brown, Carl Henrik Ek, Robert D Mullins
URL: https://www.semanticscholar.org/paper/72329bf059a3ae7d5e7d0bc21a2beed31141205f

Influential Reference 2:
Title: On the Partial Identifiability in Reward Learning: Choosing the Best Reward - (year: 2025)
Authors: Filippo Lazzati, Alberto Maria Metelli
URL: https://www.semanticscholar.org/paper/0475eed79b448112d91f2799a4ec7d273223c087

Influential Reference 3:
Title: Learning Human Preferences Over Robot Behavior as Soft Planning Constraints - (year: 2024)
Authors: Austin Narcomey, Nathan Tsoi, Ruta Desai, Marynel Vázquez
URL: https://www.semanticscholar.org/paper/14ba38ecdd58366582754507b5d61465d8d6aff9

Influential Reference 4:
Title: Data Quality in Imitation Learning - (year: 2023)
Authors: Suneel Belkhale, Yuchen Cui, Dorsa Sadigh
URL: https://www.semanticscholar.org/paper/a91b821a95ccd417e0f1315247732dd4

In [17]:
min_contexts = 2
subset_refs = []

for ref in all_refs:
    contexts = ref.get('contexts', [])
    if len(contexts) >= min_contexts:
        subset_refs.append(ref)
        # print(f"Title: {ref.get('title')}")

print(f"Total references with at least {min_contexts} contexts: {len(subset_refs)}")

Total references with at least 2 contexts: 51


In [24]:
# all_refs[0]

In [19]:
subset_refs[0]

{'isInfluential': True,
 'contexts': ['Jeon et al. (2020) interpret many of types of feedback as part of an overarching formalism, reward-rational (implicit) choice (RRC), providing a mathematical theory for reward learning that combines different types of feedback.',
  'Reward-rational preference orderings over observations, the basis of LEOPARD, are a generalisation of the deterministic reward-rational choice framework (Jeon et al., 2020), but offer several distinct advantages.',
  'First, we develop a general mathematical framework, reward-rational partial orderings (RRPO), extending that of deterministic reward-rational choice (RRC, Jeon et al. (2020)).'],
 'intents': [],
 'citingPaper': {'paperId': '72329bf059a3ae7d5e7d0bc21a2beed31141205f',
  'url': 'https://www.semanticscholar.org/paper/72329bf059a3ae7d5e7d0bc21a2beed31141205f',
  'title': 'Learning from Preferences and Mixed Demonstrations in General Settings',
  'year': 2025,
  'authors': [{'authorId': '2376282484', 'name': 'J

In [22]:
for i, ref in enumerate(subset_refs):
    title = ref['citingPaper'].get('title', 'No Title Available')
    url = ref['citingPaper'].get('url', 'No URL Available')
    print(f"{i}. Title: {title} - year: {ref['citingPaper'].get('year', 'N/A')}")
    print(f"URL: {url}\n")

0. Title: Learning from Preferences and Mixed Demonstrations in General Settings - year: 2025
URL: https://www.semanticscholar.org/paper/72329bf059a3ae7d5e7d0bc21a2beed31141205f

1. Title: Collaborative Rational Speech Act: Pragmatic Reasoning for Multi-Turn Dialog - year: 2025
URL: https://www.semanticscholar.org/paper/bf0257efd52561519fd6d12908de88c94777d872

2. Title: On the Partial Identifiability in Reward Learning: Choosing the Best Reward - year: 2025
URL: https://www.semanticscholar.org/paper/0475eed79b448112d91f2799a4ec7d273223c087

3. Title: Reinforcement Learning From Imperfect Corrective Actions And Proxy Rewards - year: 2024
URL: https://www.semanticscholar.org/paper/123d8f72a57e7f956d383de500d5abf32380fd68

4. Title: Building Machines that Learn and Think with People - year: 2024
URL: https://www.semanticscholar.org/paper/d35f1042defe9a1d69c343ce0237f14d057f48b8

5. Title: Value Internalization: Learning and Generalizing from Social Reward - year: 2024
URL: https://www.se

In [21]:
subset_refs

[{'isInfluential': True,
  'contexts': ['Jeon et al. (2020) interpret many of types of feedback as part of an overarching formalism, reward-rational (implicit) choice (RRC), providing a mathematical theory for reward learning that combines different types of feedback.',
   'Reward-rational preference orderings over observations, the basis of LEOPARD, are a generalisation of the deterministic reward-rational choice framework (Jeon et al., 2020), but offer several distinct advantages.',
   'First, we develop a general mathematical framework, reward-rational partial orderings (RRPO), extending that of deterministic reward-rational choice (RRC, Jeon et al. (2020)).'],
  'intents': [],
  'citingPaper': {'paperId': '72329bf059a3ae7d5e7d0bc21a2beed31141205f',
   'url': 'https://www.semanticscholar.org/paper/72329bf059a3ae7d5e7d0bc21a2beed31141205f',
   'title': 'Learning from Preferences and Mixed Demonstrations in General Settings',
   'year': 2025,
   'authors': [{'authorId': '2376282484', 