<a href="https://colab.research.google.com/github/wizard339/article_finder/blob/main/article_finder.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import urllib.request
import urllib.parse
import argparse
import xml.etree.ElementTree as ET
from bs4 import BeautifulSoup
import pandas as pd

In [2]:
BASE_URL = 'http://export.arxiv.org/api/query?search_query='
PREFIX = {'Title': 'ti',
          'Author': 'au',
          'Abstract': 'abs',
          'Comment': 'co',
          'Journal Reference': 'jr',
          'Subject Category': 'cat',
          'Report Number': 'rn',
          'ID': 'id',
          'All': 'all'}

# input_keywords = input('Please enter the keywords or search phrases separated by commas: ')
input_keywords = 'reinforcement learning'

In [113]:
def make_query(url=BASE_URL, prefix=PREFIX['All'], keywords=input_keywords):
    keywords = urllib.parse.quote_plus(keywords)
    url = f'{url}{prefix}:"{keywords}"'
    print(url)
    req = urllib.request.Request(url)
    with urllib.request.urlopen(req) as response:
        if response.status == 200:
            soup = BeautifulSoup(response, 'xml')
        else:
            raise ValueError('Please, check the correctness of the request')
    cols = ['updated', 'title', 'summary', 'author', 'link']
    articles = pd.DataFrame(columns=cols)

    for tag in soup.find_all('entry'):
        updated_to_df = pd.to_datetime(tag.updated.string[:10])
        authors_list = ", ".join([n.string for n in tag.find_all('name')])
        link_to_df = tag.find(title='pdf').get('href')
        row_to_concat = pd.DataFrame({'updated': updated_to_df,
                                      'title': tag.title,
                                      'summary': tag.summary,
                                      'author': authors_list,
                                      'link': link_to_df,}, index=tag.id)
        articles = pd.concat([articles, row_to_concat])

    return articles

In [114]:
articles = make_query()

http://export.arxiv.org/api/query?search_query=all:"reinforcement+learning"


In [115]:
articles

Unnamed: 0,updated,title,summary,author,link
http://arxiv.org/abs/2001.09608v1,2020-01-27,Some Insights into Lifelong Reinforcement Lear...,A lifelong reinforcement learning system is ...,Changjian Li,http://arxiv.org/pdf/2001.09608v1
http://arxiv.org/abs/2108.11510v1,2021-08-25,Deep Reinforcement Learning in Computer Vision...,Deep reinforcement learning augments the rei...,"Ngan Le, Vidhiwar Singh Rathour, Kashu Yamazak...",http://arxiv.org/pdf/2108.11510v1
http://arxiv.org/abs/2108.03258v1,2021-08-05,Memory-two strategies forming symmetric mutual...,We investigate symmetric equilibria of mutua...,Masahiko Ueda,http://arxiv.org/pdf/2108.03258v1
http://arxiv.org/abs/2203.12114v1,2022-03-23,An Optical Controlling Environment and Reinfor...,Deep reinforcement learning has the potentia...,"Abulikemu Abuduweili, Changliu Liu",http://arxiv.org/pdf/2203.12114v1
http://arxiv.org/abs/2202.05135v3,2022-04-30,Group-Agent Reinforcement Learning,It can largely benefit the reinforcement lea...,"Kaiyue Wu, Xiao-Jun Zeng",http://arxiv.org/pdf/2202.05135v3
http://arxiv.org/abs/2204.05437v1,2022-04-11,Implementing Online Reinforcement Learning wit...,A Temporal Neural Network (TNN) architecture...,James E. Smith,http://arxiv.org/pdf/2204.05437v1
http://arxiv.org/abs/1709.05067v1,2017-09-15,Deep Reinforcement Learning for Conversational AI,Deep reinforcement learning is revolutionizi...,"Mahipal Jadeja, Neelanshi Varia, Agam Shah",http://arxiv.org/pdf/1709.05067v1
http://arxiv.org/abs/2009.07888v5,2022-05-16,Transfer Learning in Deep Reinforcement Learni...,Reinforcement learning is a learning paradig...,"Zhuangdi Zhu, Kaixiang Lin, Anil K. Jain, Jiay...",http://arxiv.org/pdf/2009.07888v5
http://arxiv.org/abs/2010.14616v1,2020-09-26,Lineage Evolution Reinforcement Learning,We propose a general agent population learni...,"Zeyu Zhang, Guisheng Yin",http://arxiv.org/pdf/2010.14616v1
http://arxiv.org/abs/2011.13577v1,2020-11-27,A survey of benchmarking frameworks for reinfo...,Reinforcement learning has recently experien...,"Belinda Stapelberg, Katherine M. Malan",http://arxiv.org/pdf/2011.13577v1


In [116]:
articles.info()

<class 'pandas.core.frame.DataFrame'>
Index: 10 entries, http://arxiv.org/abs/2001.09608v1 to http://arxiv.org/abs/2011.13577v1
Data columns (total 5 columns):
 #   Column   Non-Null Count  Dtype         
---  ------   --------------  -----         
 0   updated  10 non-null     datetime64[ns]
 1   title    10 non-null     object        
 2   summary  10 non-null     object        
 3   author   10 non-null     object        
 4   link     10 non-null     object        
dtypes: datetime64[ns](1), object(4)
memory usage: 480.0+ bytes
