In [154]:
import urllib.request as libreq
import json
import pandas as pd
from bs4 import BeautifulSoup
import requests
import tqdm
import os
import glob

In [285]:
df = pd.DataFrame()

In [286]:
year = '2013'
with libreq.urlopen(f'https://dblp.uni-trier.de/search/publ/api?q=stream%3Aconf%2Ficlr%3A%20year%3A{year}%3A&h=1000&format=json') as url:
    r = url.read()
    data_json = json.loads(r)


In [287]:
[title['info']['title'] for title in data_json['result']['hits']['hit']]

['Local Component Analysis',
 'When Does a Mixture of Products Contain a Product of Mixtures?',
 'Linear-Nonlinear-Poisson Neurons Can Do Inference On Deep Boltzmann Machines',
 'Regularized Auto-Encoders Estimate Local Statistics',
 'Visual Objects Classification with Sliding Spatial Pyramid Matching',
 'Cutting Recursive Autoencoder Trees',
 'Clustering Learning for Robotic Vision',
 'Unsupervised Feature Learning for low-level Local Image Descriptors',
 'Matrix Approximation under Local Low-Rank Assumption',
 'The Manifold of Human Emotions',
 'Efficient Learning of Domain-invariant Image Representations',
 'Auto-pooling: Learning to Improve Invariance of Image Features from Image Sequences',
 'Barnes-Hut-SNE',
 'The Diagonalized Newton Algorithm for Nonnegative Matrix Factorization',
 'Feature grouping from spatially constrained multiplicative interaction',
 'Factorized Topic Models',
 'Boltzmann Machines and Denoising Autoencoders for Image Denoising',
 'Pushing Stochastic Gradien

In [288]:
links = [url['info']['ee'] for url in data_json['result']['hits']['hit']][:-1]
titles = [title['info']['title'] for title in data_json['result']['hits']['hit']][:-1]
conference, _, _, _, _, date, year, _ = titles[-1].split(',')
titles.pop(-1)
links.pop(-1)
print(conference)
print(date)
print(year)

1st International Conference on Learning Representations
 May 2-4
 2013


In [289]:
# Scrapper OpenReview

pbar = pbar = tqdm.tqdm(total= len(links))
for name, url in zip(titles,links):
    page = requests.get(url).content.decode('utf-8')
    soup = BeautifulSoup(page, 'lxml')

    try:
        authors = [author['content'] for author in soup.find_all('meta', {'name': 'citation_author'})]
    except:
        authors = []
    try:
        abstract = soup.find('meta', {'name': 'citation_abstract'})['content'].replace('\n', ' ')
    except:
        abstract = ''

    df = df.append({
        'title': name, 
        'authors': authors,
        'conference': conference,
        'abstract': abstract,
        'date': date,
        'year':year}, ignore_index=True)
    pbar.update(1)
pbar.close()

df

100%|██████████| 55/55 [01:06<00:00,  1.21s/it]


Unnamed: 0,title,authors,conference,abstract,date,year
0,Local Component Analysis,"[Roux, Nicolas Le, Bach, Francis]",1st International Conference on Learning Repre...,"Kernel density estimation, a.k.a. Parzen wind...",May 2-4,2013
1,When Does a Mixture of Products Contain a Prod...,"[Montufar, Guido F., Morton, Jason]",1st International Conference on Learning Repre...,We derive relations between theoretical prope...,May 2-4,2013
2,Linear-Nonlinear-Poisson Neurons Can Do Infere...,"[Shao, Louis Yuanlong]",1st International Conference on Learning Repre...,One conjecture in both deep learning and clas...,May 2-4,2013
3,Regularized Auto-Encoders Estimate Local Stati...,"[Alain, Guillaume, Bengio, Yoshua]",1st International Conference on Learning Repre...,What do auto-encoders learn about the underly...,May 2-4,2013
4,Visual Objects Classification with Sliding Spa...,"[Lim, Hao Wooi, Tay, Yong Haur]",1st International Conference on Learning Repre...,We present a method for visual object classif...,May 2-4,2013
5,Cutting Recursive Autoencoder Trees,"[Scheible, Christian, Schuetze, Hinrich]",1st International Conference on Learning Repre...,Deep Learning models enjoy considerable succe...,May 2-4,2013
6,Clustering Learning for Robotic Vision,"[Culurciello, Eugenio, Bates, Jordan, Dundar, ...",1st International Conference on Learning Repre...,We present the clustering learning technique ...,May 2-4,2013
7,Unsupervised Feature Learning for low-level Lo...,"[Osendorfer, Christian, Bayer, Justin, Urban, ...",1st International Conference on Learning Repre...,Unsupervised feature learning has shown impre...,May 2-4,2013
8,Matrix Approximation under Local Low-Rank Assu...,"[Lee, Joonseok, Kim, Seungyeon, Lebanon, Guy, ...",1st International Conference on Learning Repre...,Matrix approximation is a common tool in mach...,May 2-4,2013
9,The Manifold of Human Emotions,"[Kim, Seungyeon, Li, Fuxin, Lebanon, Guy, Essa...",1st International Conference on Learning Repre...,Sentiment analysis predicts the presence of p...,May 2-4,2013


In [290]:
df.to_csv(f'./iclr/ICLR_{year}.csv', index=False)

In [296]:
# join all csvs in a folder

path = r'./iclr'
all_files = glob.glob(os.path.join(path, "*.csv"))

li = []

for filename in all_files:
    df = pd.read_csv(filename, index_col=None, header=0)
    li.append(df)

# save to csv
frame = pd.concat(li, axis=0, ignore_index=True)
frame['authors'] = frame['authors'].apply(lambda x: '; '.join(eval(x)))
frame['date'] = frame['date'].apply(lambda x: re.sub('[^A-Za-z]+', '', x).upper())
frame.columns = ['Article Title', 'Author Full Names', 'Conference Title', 'Abstract', 'Publication Date', 'Publication Year']
frame['Source Title'] = ['INTERNATIONAL CONFERENCE ON LEARNING REPRESENTATIONS']*len(frame)
frame['Publication Type'] = ['C']*len(frame)

frame.to_csv('./venues/ICLR.csv', index=False)