In [4]:
import requests
import xml.etree.ElementTree as ET

def count_arxiv_papers(query):
    url = f"http://export.arxiv.org/api/query?search_query=all:{query}&max_results=1"
    response = requests.get(url)
    
    # Parse XML response
    root = ET.fromstring(response.text)
    
    # uses OpenSearch namespace
    total_results_tag = root.find(".//{http://a9.com/-/spec/opensearch/1.1/}totalResults")
    
    if total_results_tag is not None:
        return int(total_results_tag.text)
    else:
        return 0

# Example usage
keywords = ["large-language models", "gradient descent"]
for keyword in keywords:
    count = count_arxiv_papers(keyword)
    print(f"🔍 {keyword}: {count} papers found")



🔍 large-language models: 962875 papers found
🔍 gradient descent: 71693 papers found


In [5]:
#Papers by Year

import requests
import xml.etree.ElementTree as ET
from collections import Counter

def count_arxiv_papers_by_year(query, max_results=2000):
    """
    Fetches the number of ArXiv papers matching a search query, grouped by year.
    :param query: The keyword(s) to search for.
    :param max_results: Max number of papers to retrieve (2000 per request limit).
    :return: Dictionary with years as keys and paper counts as values.
    """
    base_url = "http://export.arxiv.org/api/query"
    params = {
        "search_query": f"all:{query}",
        "start": 0,
        "max_results": max_results  # Adjust based on your needs
    }

    response = requests.get(base_url, params=params)
    root = ET.fromstring(response.text)
    
    year_counts = Counter()
    
    # Extract publication years from each paper
    for entry in root.findall("{http://www.w3.org/2005/Atom}entry"):
        published_date = entry.find("{http://www.w3.org/2005/Atom}published").text
        year = published_date[:4]  # Extract YYYY
        year_counts[year] += 1
    
    return dict(sorted(year_counts.items()))  # Return sorted results

# Example: Get papers by year for multiple keywords
keywords = ["LLM", "gradient descent"]
for keyword in keywords:
    print(f"\n🔍 Papers for: {keyword.upper()}")
    year_distribution = count_arxiv_papers_by_year(keyword)
    for year, count in year_distribution.items():
        print(f"{year}: {count} papers")



🔍 Papers for: LLM
2010: 1 papers
2022: 4 papers
2023: 442 papers
2024: 1418 papers
2025: 135 papers

🔍 Papers for: GRADIENT DESCENT
1998: 1 papers
1999: 1 papers
2000: 1 papers
2002: 2 papers
2004: 1 papers
2005: 1 papers
2006: 2 papers
2007: 1 papers
2009: 2 papers
2010: 4 papers
2011: 8 papers
2012: 13 papers
2013: 20 papers
2014: 29 papers
2015: 49 papers
2016: 69 papers
2017: 108 papers
2018: 177 papers
2019: 224 papers
2020: 252 papers
2021: 221 papers
2022: 256 papers
2023: 237 papers
2024: 284 papers
2025: 37 papers
