In [74]:
import openreview
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
import os
from collections import defaultdict


In [32]:
client = openreview.api.OpenReviewClient(
    baseurl='https://api2.openreview.net',
)

In [33]:
submissions_emnlp = client.get_all_notes(invitation='EMNLP/2023/Conference/-/Submission', details='directReplies')

Getting V2 Notes: 100%|█████████▉| 2018/2021 [00:04<00:00, 410.89it/s]


In [165]:
def get_review(meta):
    reviews_meta = {
        "Soundness" : [],
        "Excitement" : [],
        "Reproducibility" : [],
        "Recommendation" : None,
        "Recommendation_for_Best_Paper_Award" : None,
        "Decision" : None
    }
    details = meta.details["directReplies"]
    general_reviews = []
    meta_review = None 
    decision_review = None
    for detail in details:
        detail = detail['content']
        if 'decision' in detail:
            decision_review = detail
        elif 'metareview' in detail:
            meta_review = detail
        else:
            general_reviews.append(detail)

    # general_reviews = details[:-2]
    # meta_review = details[-2]['content']
    # decision_review = details[-1]['content']
    for gr in general_reviews:
        soundness = gr['Soundness']['value'][0]
        excitement = gr['Excitement']['value'][0]
        reproducibility = gr['Reproducibility']['value'][0]
        reviews_meta['Soundness'].append(soundness)
        reviews_meta['Excitement'].append(excitement)
        reviews_meta['Reproducibility'].append(reproducibility)
    reviews_meta['Decision'] = decision_review["decision"]["value"]
    reviews_meta['Recommendation'] = meta_review["recommendation"]["value"]
    reviews_meta['Recommendation_for_Best_Paper_Award'] = meta_review["Recommendation_for_Best_Paper_Award"]["value"] if "Recommendation_for_Best_Paper_Award" in meta_review else None
    return reviews_meta

    

In [200]:
all_paper_types = defaultdict(int)
all_track = set()
all_venue = defaultdict(int)
meta_datas = []
for i, submission in enumerate(tqdm(submissions_emnlp)):
    number = submission.number
    content = submission.content
    title = content['title']['value'] #str
    authors = content['authors']['value'] #list[str]
    keywords = content['keywords']['value'] #list[str]
    abstract = content['abstract']['value'] #str
    paper_type = content['Submission_Type']['value'] #str
    track1 = content['Submission_Track']['value']
    track2 = None if 'Submission_Track_2' not in content else content['Submission_Track_2']['value']
    venue = content['venue']['value']
    
    all_paper_types[paper_type] += 1
    all_venue[venue] += 1
    try:
        reviews_meta = get_review(submission)
        avg_s = np.mean([float(i) for i in reviews_meta["Soundness"]])
        avg_e = np.mean([float(i) for i in reviews_meta["Excitement"]])
    except Exception as e:
        print(i)

    meta_datas.append({
        "Submission Number" : number,
        "Title" : title,
        "Authors" : ', '.join(authors),
        "Track" : track1,
        "Submission Type" : paper_type,
        "Venue" : venue,
        "Soundness" : ', '.join(reviews_meta["Soundness"]),
        "Excitement" : ', '.join(reviews_meta["Excitement"]),
        "Reproducibility" : ', '.join(reviews_meta["Reproducibility"]),
        "Mean-SE" : f"{avg_s:.2f}, {avg_e:.2f}",
        "Meta-review" : reviews_meta['Recommendation'],
        "Abstract" : abstract,
        "Keywords" : ', '.join(keywords),
        "Track-2" : track2

    })
    
meta_datas = sorted(meta_datas, key=lambda x:(x["Track"], x["Submission Type"], x["Venue"]))
table_datas = []
for data in meta_datas:
    table_datas.append([
        data["Submission Number"],
        data["Title"], 
        data['Authors'],
        data["Track"],
        data["Submission Type"],
        data["Venue"],
        data["Soundness"], data["Excitement"], data["Reproducibility"], data["Mean-SE"],
        data["Meta-review"], data['Abstract'], data['Keywords'], data["Track-2"]
    ])


  0%|          | 0/2021 [00:00<?, ?it/s]

449
557


In [201]:
df = pd.DataFrame(table_datas, columns=["Submission Number", "Title", "Authors", "Track", "Submission Type", "Venue", "Soundness", "Excitement", "Reproducibility", \
        "Mean-SE", "Meta-review", "Abstract", "Keywords", "Track-2"])
df.to_excel('EMNLP-2023-accept-papers.xlsx', index=False)

In [210]:
main_s = []
main_e = []
findings_s = []
findings_e = []
for data in table_datas:
    mse = data[-5].split(', ')
    soundness = float(mse[0])
    excitement = float(mse[1])
    if data[-9] == 'EMNLP 2023 Findings':
        findings_e.append(excitement)
        findings_s.append(soundness)
    elif data[-9] == 'EMNLP 2023 Main':
        main_s.append(soundness)
        main_e.append(excitement)
print(f"""Statistics of Accepted Papers
Main Conference:
\t Mean Soundness: {np.mean(main_s):.2f}
\t Mean Excitement: {np.mean(main_e):.2f}
\t Std Soundeness: {np.std(main_s):.2f}
\t Std Excitement: {np.std(main_e):.2f}
Findings:
\t Mean Soundness: {np.mean(findings_s):.2f}
\t Mean Excitement: {np.mean(findings_e):.2f}
\t Std Soundeness: {np.std(findings_s):.2f}
\t Std Excitement: {np.std(findings_e):.2f}""")

Statistics of Accepted Papers
Main Conference:
	 Mean Soundness: 3.66
	 Mean Excitement: 3.57
	 Std Soundeness: 0.39
	 Std Excitement: 0.37
Findings:
	 Mean Soundness: 3.23
	 Mean Excitement: 3.16
	 Std Soundeness: 0.35
	 Std Excitement: 0.35
