In [1]:
import requests
import time
import urllib.parse

SEM_SCHOLAR_API_KEY = 'YOUR-KEY'

In [None]:
def fetch_paper_details(paper_id):
    url = f"https://api.semanticscholar.org/graph/v1/paper/{paper_id}?fields=fieldsOfStudy"
    response = requests.get(url, headers={'x-api-key': SEM_SCHOLAR_API_KEY})
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Failed to fetch paper details: {response.status_code}, {response.text}")
        return None

def fetch_papers(fields, min_citations, start_year, end_year):
    papers = []
    seen_ids = set()
    headers = {'x-api-key': SEM_SCHOLAR_API_KEY}

    for field in fields:
        print(field)
        encoded_field = urllib.parse.quote(field)
        time.sleep(5)
        for year in range(start_year, end_year + 1):
            time.sleep(3)
            url = f"https://api.semanticscholar.org/graph/v1/paper/search?query={encoded_field}&limit=100&fields=title,authors,year,citationCount,url&sort=citationCount&year={year}"
            response = requests.get(url, headers=headers)
            if response.status_code == 200:
                data = response.json()
                for paper in data.get('data', []):
                    if paper.get('citationCount', 0) >= min_citations:
                        paper_details = fetch_paper_details(paper['paperId'])
                        if paper_details and paper_details['fieldsOfStudy'] and 'Computer Science' in paper_details.get('fieldsOfStudy', []):
                            papers.append(paper)
            else:
                print(f"Failed to fetch papers for {field} in {year}: {response.status_code}, {response.text}")
    return sorted(papers, key=lambda x: x['citationCount'], reverse=True)



fields = ["Machine Learning",
          "Deep Learning",
          'Artificial Intelligence',
          'Computer Science',
          'Natural Language Processing',
          'Net',
          'Attention',
          'Normalization',
          'reinforcement learning',
          'rnn',
          'cnn',
          'recurrent',
          'convolutional',
          'machine translation',
          'natural language understanding',
          'computer vision',
          'gpt',
          'image generation',
          'image recognition',
          'generative pre-trained transformer',
          'generative pretrained transformer',
          'generative ai',
          'transformer',
          'chatgpt',
          'large language model',
          'llm',
          'understanding natural language',
          'stochastic parrot',
          'alphago',
          'alphazero',
          'alphafold',
          'chincilla',
          'scaling law',
        ]
def get_papers(fields):
    papers = fetch_papers(fields, 500, 1935, 2023)
    return sorted(papers, key=lambda x: x['citationCount'], reverse=True)

papers = get_papers(fields)

unique_papers = {}
for paper in papers:
    unique_papers[paper['paperId']] = paper

papers = list(unique_papers.values())
len(papers)

In [None]:
for paper in papers:
    print(f"{paper['year']} - {paper['authors'][0]['name']} - {paper['title']} - {paper['citationCount']} - {paper['url']}")

In [None]:
def convert_to_markdown_table(papers):
    markdown_table = "| Year | Author | Title | Citations |\n"
    markdown_table += "|------|--------|-------|-----------|\n"

    for paper in papers:
        title_link = f"[{paper['title']}]({paper['url']})"
        author_name = paper['authors'][0]['name'] if paper['authors'] else "No Author"
        markdown_table += f"| {paper['year']} | {author_name} | {title_link} | {paper['citationCount']} |\n"

    return markdown_table


markdown_table = convert_to_markdown_table(papers)
print(markdown_table)