In [1]:
def arxiv(arxiv_id, dirname='../', seq=',', output=True):
    import os
    import urllib.request as request
    from bs4 import BeautifulSoup
    import re
    import json
    
    arxiv_id = arxiv_id.strip()
    
    # URL
    url_index = 'https://arxiv.org/'
    url_abs = url_index +'abs/' + arxiv_id
    url_pdf = url_index + 'pdf/' + arxiv_id + '.pdf'

    # Set BS4
    rqt = request.urlopen(url_abs)
    print(url_abs)
    soup = BeautifulSoup(rqt, "lxml")

    # Set File name
    title = soup.find('head')
    title = title.find('title')
    title = title.get_text()
    pattern = re.compile('\n')
    title = re.sub(pattern, ' ', title)
    pattern = re.compile('\s+')
    title = re.sub(pattern, ' ', title)
    title_new = title + '.pdf'
    
    pattern = re.compile('^\[[a-z-]+/[0-9]+\]')
    title = re.sub(pattern, '', title, count=1)
    
    pattern = re.compile('^\[([0-9]{4}\.[0-9]+)\]')
    title = re.sub(pattern, '', title, count=1)
    
    # Set authers
    authers = soup.find('div', class_='authors')
    authers = authers.find_all('a')
    f = lambda x:x.get_text()
    authers = list(map(f, authers))
    
    # Set citation date
    citation_date = soup.find('meta', attrs={'name':'citation_date'})
    citation_date = citation_date.get('content')
    ####### Data Type from String Type #######            
    def str2time(time):
        import datetime as dt
        try:
            time = dt.datetime.strptime(time, '%Y/%m/%d')
        except TypeError:
            time = dt.datetime(1900, 1, 1, 0, 0)
        return time
    citation_date = str2time(citation_date)
    citation_year = str(citation_date.year)

    dirname = os.path.join(dirname, citation_year)
    if not os.path.exists(dirname):
        os.makedirs(dirname)

    pattern = '¥|/|:|\*|<|>|\|'
    title_new = re.sub(pattern, '_', title_new)
    filename = os.path.join(dirname, title_new)
    
    if not os.path.exists(filename):
        request.urlretrieve(url_pdf, '{0}'.format(filename))
        print('Save: ', filename)
    else:
        print('Allready, the file exists. ')
    contents = {
        arxiv_id:{
            'Year':citation_year, 
            'title':title, 
            'url':url_pdf, 
            'authers': authers
        }
    }
    
    for key in contents.keys():
        print(key, contents[key])
    print('')

    if output == True:
        with open('paper_list.txt', 'a') as f:
            text_line = arxiv_id + seq + title + seq + str(authers) + '\n'
            f.write(text_line)
    else:
        pass
        
    text_line = '|'+citation_year+'|'+arxiv_id + '|' + '['+title+']'+'('+url_pdf+')' + '|' + ','.join(authers)+'|' + '\n'
    print(text_line)
    return contents

In [2]:
arxivstr = """
    1709.01066
    1310.4546
    1301.3781
    1512.01237
    1702.02138
    1703.0687
    1312.5258
    1611.07074
    1512.01237
    1512.01237
    1601.07558
    1001.0785
    1701.04579
    0911.3635
    1312.5258
    1612.01928
    1612.03809
    1612.07837
    1702.08431
    1703.08002
    1706.05394
    1409.0473
    1504.00702
    hep-th/9810056
    1511.06410
    1409.4842
    1512.03385
    1610.09001
    1703.01386
"""


import re
pattern = re.compile('\s+')
arxivlist = re.split(pattern, arxivstr)
arxivlist = list(filter(lambda x:x!='', arxivlist))
list(map(arxiv, arxivlist))

https://arxiv.org/abs/1709.01066
Save:  ../2017/[1709.01066] Quantum Decimation in Hilbert Space_ Coarse-Graining without Structure.pdf
1709.01066 {'Year': '2017', 'title': ' Quantum Decimation in Hilbert Space: Coarse-Graining without Structure', 'url': 'https://arxiv.org/pdf/1709.01066.pdf', 'authers': ['Ashmeet Singh', 'Sean M. Carroll']}

|2017|1709.01066|[ Quantum Decimation in Hilbert Space: Coarse-Graining without Structure](https://arxiv.org/pdf/1709.01066.pdf)|Ashmeet Singh,Sean M. Carroll|

https://arxiv.org/abs/1310.4546
Save:  ../2013/[1310.4546] Distributed Representations of Words and Phrases and their Compositionality.pdf
1310.4546 {'Year': '2013', 'title': ' Distributed Representations of Words and Phrases and their Compositionality', 'url': 'https://arxiv.org/pdf/1310.4546.pdf', 'authers': ['Tomas Mikolov', 'Ilya Sutskever', 'Kai Chen', 'Greg Corrado', 'Jeffrey Dean']}

|2013|1310.4546|[ Distributed Representations of Words and Phrases and their Compositionality](https

Save:  ../2016/[1612.07837] SampleRNN_ An Unconditional End-to-End Neural Audio Generation Model.pdf
1612.07837 {'Year': '2016', 'title': ' SampleRNN: An Unconditional End-to-End Neural Audio Generation Model', 'url': 'https://arxiv.org/pdf/1612.07837.pdf', 'authers': ['Soroush Mehri', 'Kundan Kumar', 'Ishaan Gulrajani', 'Rithesh Kumar', 'Shubham Jain', 'Jose Sotelo', 'Aaron Courville', 'Yoshua Bengio']}

|2016|1612.07837|[ SampleRNN: An Unconditional End-to-End Neural Audio Generation Model](https://arxiv.org/pdf/1612.07837.pdf)|Soroush Mehri,Kundan Kumar,Ishaan Gulrajani,Rithesh Kumar,Shubham Jain,Jose Sotelo,Aaron Courville,Yoshua Bengio|

https://arxiv.org/abs/1702.08431
Save:  ../2017/[1702.08431] Boundary-Seeking Generative Adversarial Networks.pdf
1702.08431 {'Year': '2017', 'title': ' Boundary-Seeking Generative Adversarial Networks', 'url': 'https://arxiv.org/pdf/1702.08431.pdf', 'authers': ['R Devon Hjelm', 'Athul Paul Jacob', 'Tong Che', 'Adam Trischler', 'Kyunghyun Cho', 'Y

[{'1709.01066': {'Year': '2017',
   'title': ' Quantum Decimation in Hilbert Space: Coarse-Graining without Structure',
   'url': 'https://arxiv.org/pdf/1709.01066.pdf',
   'authers': ['Ashmeet Singh', 'Sean M. Carroll']}},
 {'1310.4546': {'Year': '2013',
   'title': ' Distributed Representations of Words and Phrases and their Compositionality',
   'url': 'https://arxiv.org/pdf/1310.4546.pdf',
   'authers': ['Tomas Mikolov',
    'Ilya Sutskever',
    'Kai Chen',
    'Greg Corrado',
    'Jeffrey Dean']}},
 {'1301.3781': {'Year': '2013',
   'title': ' Efficient Estimation of Word Representations in Vector Space',
   'url': 'https://arxiv.org/pdf/1301.3781.pdf',
   'authers': ['Tomas Mikolov', 'Kai Chen', 'Greg Corrado', 'Jeffrey Dean']}},
 {'1512.01237': {'Year': '2015',
   'title': ' Quantum mechanics of 4-derivative theories',
   'url': 'https://arxiv.org/pdf/1512.01237.pdf',
   'authers': ['Alberto Salvio', 'Alessandro Strumia']}},
 {'1702.02138': {'Year': '2017',
   'title': ' An Imp