In [46]:
import requests

In [6]:
import feedparser
from urllib.parse import quote

In [42]:
def get_arxiv_papers(authors, max_results=100):
    base_url = "http://export.arxiv.org/api/query?"
    authors_quoted = "|".join([quote(author) for author in authors])
    query = f"search_query=au:{authors_quoted}&start=0&max_results={max_results}"
    url = base_url + query
    feed = feedparser.parse(url)
    
    papers = []
    for entry in feed.entries:
        paper_info = {}
        paper_info['title'] = entry.title
        paper_info['published'] = entry.published
        paper_info['authors'] = [author.name for author in entry.authors]
        paper_info['link'] = [link.href for link in entry.links if link.type == 'text/html'][0]
        paper_info['doi'] = entry.get('arxiv_doi', None)
        papers.append(paper_info)
    
    return papers

In [69]:
def generate_html_code(papers, my_names=["Rutger van Haasteren", "R. van Haasteren"], verbose=False):
    # Sort papers by year (newest first)
    papers = sorted(papers, key=lambda x: x['published'], reverse=True)

    html_code = \
"""---
layout: default
title: Publications
---

<h1>{{ page.title }}</h1>

"""
    for paper in papers:
        author_text = ""
        authors_list = []
        
        # Check if my name is in the list of all authors
        if any(my_name in paper['authors'] for my_name in my_names):
            for author in paper['authors'][:3]:
                if author in my_names:
                    authors_list.append(f"<b>{author}</b>")
                else:
                    authors_list.append(author)
            if len(paper['authors']) > 3 and not any(my_name in paper['authors'][:3] for my_name in my_names):
                author_text = ", ".join(authors_list) + f", <i>et al.</i>, <b>inc. {my_names[0]}</b>"
            else:
                author_text = ", ".join(authors_list)
        else:
            if verbose:
                print(f"Skipping paper: {paper['title']}")
            continue

        # Get the year from the 'published' field
        year = paper['published'].split('-')[0]
        author_text += f" ({year})"
        # Add title, link, and doi if available
        title = f'"{paper["title"]}"'
        link = f'<a href="{paper["link"]}">arXiv Link</a>'
        if paper['doi']:
            doi = f'<a href="https://doi.org/{paper["doi"]}">DOI Link</a>'
            link += f', {doi}'
        # Create html section for each paper
        html_code += f'<!-- {paper["title"]} -->\n<div class="paper" style="margin-bottom: 2em;">\n<span style="font-size: larger; margin-bottom: 0.5em;">{author_text}</span>\n<div>{title}</div>\n<div>{link}</div>\n</div>\n\n\n'

    return html_code

In [48]:
papers = get_arxiv_papers(["Rutger van Haasteren", "R. van Haasteren"], max_results=200)

In [70]:
html_string = generate_html_code(papers, my_names=['Rutger van Haasteren', "R. van Haasteren"])

with open("./publications.html", 'w') as fp:
    fp.write(html_string)