In [1]:
import os
import json
import time
import operator
import numpy as np
from tqdm.notebook import tqdm

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

## Get submission list

In [None]:
def get_item_url(page_id, driver, url_list):
    # find all items in current page
    # the first element is all submissions, the second is rejected/withdrawn ones
    item_list_parent = driver.find_elements(By.CSS_SELECTOR, "ul[class='list-unstyled submissions-list']")[0]
    item_list = item_list_parent.find_elements(By.CLASS_NAME, 'note')
    item_list_len = len(item_list)
    print(f'processing page {page_id} | {item_list_len} items | total: {len(url_list)} items')
    for i in tqdm(range(item_list_len)):
        # the fist <a> is the paper title and url
        item = item_list[i].find_elements(By.TAG_NAME, 'a')[0] 
        url_list.append(item.get_attribute('href').strip()) 

In [7]:
s = Service('/opt/homebrew/bin/chromedriver')
driver = webdriver.Chrome(service=s)
driver.get('https://openreview.net/group?id=ICLR.cc/2022/Conference')

In [None]:
url_list = []
page_id = 0
is_not_end = True
while(page_id >= 0):
    if page_id == 0:
        # process current page
        get_item_url(page_id, driver, url_list)
    else:
        if is_not_end:
            try:
                # jump to next page
                next_page_btns[0].find_element(By.TAG_NAME, 'a').click()
                time.sleep(2.5)
                # process current page
                get_item_url(page_id, driver, url_list)
            except:
                print(f'Failed to jump to page {page_id}')
        else:
            break
    next_page_btns = driver.find_elements(By.CSS_SELECTOR, "li[class='  right-arrow']")
    is_not_end = len(next_page_btns) == 4
    page_id += 1

In [None]:
# save url list
with open('assets/url_list.txt', 'w') as f:
    f.write(time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime()) + '\n')    
    f.write('\n'.join(url_list))

## Parse each item

In [3]:
# submission meta data
class AllSubmissions:
    def __init__(self, save_root: str):
        self.items = []
        self.save_root = save_root
    
    def update(self, index: int, url: str, title: str, keywords: list, scores: list, avg_score: float = -1.):
        item = {}
        if len(scores) > 0 and avg_score == -1.:
            avg_score = np.mean(scores)    
        item.update({
            'url': url,
            'title': title,
            'keywords': keywords,
            'scores': scores,
            'avg_score': avg_score
        })
        self.items.append(item)
        with open(os.path.join(self.save_root, f'{index}.txt'), 'w') as f:
            json.dump(item, f)
    
    def get_all_values_by_key(self, key: str = ''):
        return list(map(operator.itemgetter(key), self.items))
    
    def save(self, path: str):
        with open(path, 'w') as f:
            for item in self.items:
                f.write(json.dumps(item) + '\n')
        print(f'Saved to {path}')
    
    def __len__(self):
        return len(self.items)

In [4]:
# read url list
with open('assets/url_list.txt', 'r') as f:
    url_list = f.readlines()
data_time = url_list[0]
item_list = url_list[1:]
num_items = len(item_list)
print(f'Total {num_items} items | time: {data_time}')

Total 3328 items | time: 2021-11-09 17:11:59



In [10]:
all_submissions = AllSubmissions('assets/data/')
for i in tqdm(range(1900, num_items)):
    item_url = item_list[i].strip()
    print(item_url)
    driver.get(item_url)
    time.sleep(1)
    loaded = False
    num_try = 0
    while not loaded:
        comment_list = driver.find_elements(By.CSS_SELECTOR, "div[class='note_with_children comment-level-odd']")
        num_comment = len(comment_list)
        if num_comment > 0:
            loaded = True
        else:
            time.sleep(.5)
            if num_try > 1000:
                print(f'Failed to load {item_url} with max tries!')
            num_try += 1
    # process comments
    item_scores = []
    for comment in comment_list:
        _comment = comment.find_elements(By.CLASS_NAME, 'meta_row')[0].find_elements(By.TAG_NAME, 'span')[0]
        if 'ICLR' not in _comment.get_attribute('innerHTML'):
            continue
        if 'Reviewer' not in _comment.get_attribute('innerHTML'):
            continue
        _comment = comment.find_elements(By.CSS_SELECTOR, "div[class='note panel']")[0]
        _comment = _comment.find_elements(By.CLASS_NAME, 'note_contents')
        if len(_comment) < 2:
            continue
        _comment = _comment[-2]
        recommend = _comment.find_elements(By.TAG_NAME, 'span')
        if recommend[0].get_attribute('innerHTML') == 'Recommendation: ':
            _score = float(recommend[1].get_attribute('innerHTML').split(':')[0])
            item_scores.append(_score)
    # process title
    _title = driver.find_elements(By.CLASS_NAME, 'note_content_title')[0].find_elements(By.TAG_NAME, 'a')[0]
    item_title = _title.get_attribute('innerHTML').strip()
    # process keywords
    _keywords = driver.find_elements(By.CLASS_NAME, 'note_contents')[0]
    if 'Keywords:' in _keywords.find_elements(By.TAG_NAME, 'span')[0].get_attribute('innerHTML'):
        item_keywords = _keywords.find_elements(By.TAG_NAME, 'span')[1].get_attribute('innerHTML').strip().split(',')
        item_keywords = [_k.strip() for _k in item_keywords]
    else:
        item_keywords = []
    all_submissions.update(i, item_url, item_title, item_keywords, item_scores)

  0%|          | 0/1428 [00:00<?, ?it/s]

https://openreview.net/forum?id=RNf9AgtRtL
https://openreview.net/forum?id=KwLWsm5idpR
https://openreview.net/forum?id=lsQCDXjOl3k
https://openreview.net/forum?id=fpU10jwpPvw
https://openreview.net/forum?id=CrXLp_yeA-K
https://openreview.net/forum?id=VMuenFh7IpP
https://openreview.net/forum?id=1wVvweK3oIb
https://openreview.net/forum?id=UdxJ2fJx7N0
https://openreview.net/forum?id=VppWsjXgBY6
https://openreview.net/forum?id=UarYhFFxQ2B
https://openreview.net/forum?id=R332S76RjxS
https://openreview.net/forum?id=ZKy2X3dgPA
https://openreview.net/forum?id=827jG3ahxL
https://openreview.net/forum?id=Ng8wWGXXIXh
https://openreview.net/forum?id=9NVd-DMtThY
https://openreview.net/forum?id=kocM6lVTIfJ
https://openreview.net/forum?id=swRxhFpK5ds
https://openreview.net/forum?id=JQ1RLAEn-BO
https://openreview.net/forum?id=EwqEx5ipbOu
https://openreview.net/forum?id=AdEM_SzfSd
https://openreview.net/forum?id=VwSHZgruNEc
https://openreview.net/forum?id=yjsA8Uin-Y
https://openreview.net/forum?id=hfU7K

https://openreview.net/forum?id=JGO8CvG5S9
https://openreview.net/forum?id=sBHVNmCt3t
https://openreview.net/forum?id=9q3g_5gQbbA
https://openreview.net/forum?id=0rjx6jy25R4
https://openreview.net/forum?id=lQI_mZjvBxj
https://openreview.net/forum?id=g4nVdxU9RK
https://openreview.net/forum?id=68n2s9ZJWF8
https://openreview.net/forum?id=e0uknAgETh
https://openreview.net/forum?id=g2LCQwG7Of
https://openreview.net/forum?id=aBAgwom5pTn
https://openreview.net/forum?id=ibrUkC-pbis
https://openreview.net/forum?id=BM7RjuhAK7W
https://openreview.net/forum?id=demdsohU_e
https://openreview.net/forum?id=3PN4iyXBeF
https://openreview.net/forum?id=0EL4vLgYKRW
https://openreview.net/forum?id=tlkMbWBEAFb
https://openreview.net/forum?id=ib8vMnQPQ2
https://openreview.net/forum?id=HObMhrCeAAF
https://openreview.net/forum?id=QEBHPRodWYE
https://openreview.net/forum?id=9wOQOgNe-w
https://openreview.net/forum?id=m8bypnj7Yl5
https://openreview.net/forum?id=5fmBRf5rrC
https://openreview.net/forum?id=41e9o6cQPj

https://openreview.net/forum?id=u6ybkty-bL
https://openreview.net/forum?id=XLxhEjKNbXj
https://openreview.net/forum?id=crq5s3LLESc
https://openreview.net/forum?id=9zcjXdavnX
https://openreview.net/forum?id=2eXhNpHeW6E
https://openreview.net/forum?id=3Skn65dgAr4
https://openreview.net/forum?id=mYaOK2og0tf
https://openreview.net/forum?id=ziRLU3Y2PN_
https://openreview.net/forum?id=zBhwgP7kt4
https://openreview.net/forum?id=MGIg_Q4QtW2
https://openreview.net/forum?id=ETiaOyNwJW
https://openreview.net/forum?id=Ud7G0LtrHVD
https://openreview.net/forum?id=wu5yYUutDGW
https://openreview.net/forum?id=mvq4blDaCkN
https://openreview.net/forum?id=4GBHVfEcmoS
https://openreview.net/forum?id=f9JwVXMJ1Up
https://openreview.net/forum?id=RuC5ilX2m6O
https://openreview.net/forum?id=f2zGmcA0bs7
https://openreview.net/forum?id=zuqcmNVK4c2
https://openreview.net/forum?id=O17RRqiZc5x
https://openreview.net/forum?id=8MN_GH4Ckp4
https://openreview.net/forum?id=xtZXWpXVbiK
https://openreview.net/forum?id=Sb4h

https://openreview.net/forum?id=ZV3PZXrRDQ
https://openreview.net/forum?id=KSSfF5lMIAg
https://openreview.net/forum?id=1RqyBxJU_Wy
https://openreview.net/forum?id=t5s-hd1bqLk
https://openreview.net/forum?id=VPjw9KPWRSK
https://openreview.net/forum?id=bB6YLDJewoK
https://openreview.net/forum?id=fStt6fyzrK
https://openreview.net/forum?id=Bel1Do_eZC
https://openreview.net/forum?id=PHugX0j2xcE
https://openreview.net/forum?id=oapKSVM2bcj
https://openreview.net/forum?id=-xhk0O7iAc0
https://openreview.net/forum?id=R9Ht8RZK3qY
https://openreview.net/forum?id=mfwdY3U_9ea
https://openreview.net/forum?id=PTRo58zPt3P
https://openreview.net/forum?id=lVtq6C5_3QL
https://openreview.net/forum?id=OVShHe8Ce0
https://openreview.net/forum?id=q58E59ZPLp
https://openreview.net/forum?id=HTfUrAxjPkR
https://openreview.net/forum?id=cdZLe5S0ur
https://openreview.net/forum?id=D6nH3719vZy
https://openreview.net/forum?id=OQo6Tuyo0ih
https://openreview.net/forum?id=n0OeTdNRG0Q
https://openreview.net/forum?id=Ix_mh4

https://openreview.net/forum?id=4Ycr8oeCoIh
https://openreview.net/forum?id=pbduKpYzn9j
https://openreview.net/forum?id=DTg98fkyoyn
https://openreview.net/forum?id=QJWVP4CTmW4
https://openreview.net/forum?id=JVWB8QRUOi-
https://openreview.net/forum?id=drRnrGMZ3ze
https://openreview.net/forum?id=QkRV50TZyP
https://openreview.net/forum?id=nbC8iTTXIrk
https://openreview.net/forum?id=UXrVIKDbsb_
https://openreview.net/forum?id=eELR-4Dk4U8
https://openreview.net/forum?id=ZumkmSpY9G4
https://openreview.net/forum?id=Vt1lpp5Vebd
https://openreview.net/forum?id=EIm_pvFJx5k
https://openreview.net/forum?id=sEIl_stzQyB
https://openreview.net/forum?id=9n9c8sf0xm
https://openreview.net/forum?id=gbe1zHyA73
https://openreview.net/forum?id=CNY9h3uyfiO
https://openreview.net/forum?id=oiy9BAuqnDg
https://openreview.net/forum?id=KLh86DknDj7
https://openreview.net/forum?id=eDjxhFbaWX
https://openreview.net/forum?id=uecYQBshVYV
https://openreview.net/forum?id=AUGBfDIV9rL
https://openreview.net/forum?id=MmC5

https://openreview.net/forum?id=Dzpe9C1mpiv
https://openreview.net/forum?id=dAFxBu5OAXh
https://openreview.net/forum?id=7ADMMyZpeY
https://openreview.net/forum?id=327eol9Xgyi
https://openreview.net/forum?id=PVB_t0HCMVC
https://openreview.net/forum?id=AlPBx2zq7Jt
https://openreview.net/forum?id=JTbUTe0B0J1
https://openreview.net/forum?id=1saVY0lW1x
https://openreview.net/forum?id=roxWnqcguNq
https://openreview.net/forum?id=PtSAD3caaA2
https://openreview.net/forum?id=ZUinrZwKnHb
https://openreview.net/forum?id=gJLEXy3ySpu
https://openreview.net/forum?id=qSV5CuSaK_a
https://openreview.net/forum?id=D1hTwPPmMVv
https://openreview.net/forum?id=Harn4_EZBw
https://openreview.net/forum?id=cZAi1yWpiXQ
https://openreview.net/forum?id=olQbo52II9
https://openreview.net/forum?id=s03AQxehtd_
https://openreview.net/forum?id=RQ428ZptQfU
https://openreview.net/forum?id=VQyHD2R3Aq
https://openreview.net/forum?id=wX4Z5X5vpm
https://openreview.net/forum?id=J_PHjw4gvXJ
https://openreview.net/forum?id=SC6JbE

https://openreview.net/forum?id=0kPL3xO4R5
https://openreview.net/forum?id=-ngwPqanCEZ
https://openreview.net/forum?id=ATUh28lnSuW
https://openreview.net/forum?id=D1TYemnoRN
https://openreview.net/forum?id=vr39r4Rjt3z
https://openreview.net/forum?id=Y1O-K5itG09
https://openreview.net/forum?id=UOj0MV__Cr
https://openreview.net/forum?id=oU3aTsmeRQV
https://openreview.net/forum?id=c60vFLXEwED
https://openreview.net/forum?id=1Z5P--ntu8
https://openreview.net/forum?id=2e7Bf6b-v_P
https://openreview.net/forum?id=34k1OWJWtDW
https://openreview.net/forum?id=MTex8qKavoS
https://openreview.net/forum?id=3rULBvOJ8D2
https://openreview.net/forum?id=m4BAEB_Imy
https://openreview.net/forum?id=MsHnJPaBUZE
https://openreview.net/forum?id=voEpzgY8gsT
https://openreview.net/forum?id=RxplU3vmBx
https://openreview.net/forum?id=Ih7LAeOYIb0
https://openreview.net/forum?id=qfaNCudAnji
https://openreview.net/forum?id=V3C8p78sDa
https://openreview.net/forum?id=vEIVxSN8Xhx
https://openreview.net/forum?id=pWBNOgd

https://openreview.net/forum?id=4lLyoISm9M
https://openreview.net/forum?id=XJFGyJEBLuz
https://openreview.net/forum?id=aWA3-vIQDv
https://openreview.net/forum?id=UgBo_nhiHl
https://openreview.net/forum?id=uouGog2bW-F
https://openreview.net/forum?id=fvLLcIYmXb
https://openreview.net/forum?id=IXrQxlxr0iB
https://openreview.net/forum?id=WQIdU90Gsu
https://openreview.net/forum?id=oLYTo-pL0Be
https://openreview.net/forum?id=wxVpa5z4DU1
https://openreview.net/forum?id=AAeMQz0x4nA
https://openreview.net/forum?id=DrpKmCmPMSC
https://openreview.net/forum?id=B9LUI0pZFGc
https://openreview.net/forum?id=dtpgsBPJJW
https://openreview.net/forum?id=9W2KnHqm_xN
https://openreview.net/forum?id=fE-sp8USacG
https://openreview.net/forum?id=tk1eA4lvVRC
https://openreview.net/forum?id=cpstx0xuvRY
https://openreview.net/forum?id=A3HHaEdqAJL
https://openreview.net/forum?id=jE_ipyh20rb
https://openreview.net/forum?id=DzBDB7y8UOy
https://openreview.net/forum?id=R8sQPpGCv0
https://openreview.net/forum?id=085y6YP