Code of this notebook is partially taken from OpenReview documentation:

https://docs.openreview.net/how-to-guides/data-retrieval-and-modification/how-to-export-all-submission-attachments

In [None]:
!pip install openreview-py

Collecting openreview-py
  Downloading openreview_py-1.46.0-py3-none-any.whl.metadata (4.5 kB)
Collecting pycryptodome (from openreview-py)
  Downloading pycryptodome-3.21.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.4 kB)
Collecting pylatexenc (from openreview-py)
  Downloading pylatexenc-2.10.tar.gz (162 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m162.6/162.6 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting tld>=0.12 (from openreview-py)
  Downloading tld-0.13-py2.py3-none-any.whl.metadata (9.4 kB)
Downloading openreview_py-1.46.0-py3-none-any.whl (705 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m705.0/705.0 kB[0m [31m17.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading tld-0.13-py2.py3-none-any.whl (263 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m263.8/263.8 kB[0m [31m15.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading p

In [None]:
import openreview
import os

In [None]:
###For this project, we use API V2 for EMNLP and NeurIPS, and API V1 for ICRL. ###
###Choose the corespondent API for the venue you are interested in.###
'''
venue id for EMNLP2023: EMNLP/2023/Conference
venue id for NeurIPS2023: NeurIPS.cc/2023/Conference
venue id for ICRL2023: ICLR.cc/2023/Conference###

'''

# API V1
client = openreview.Client(
    baseurl='https://api.openreview.net',
    username="<your username>",
    password="<your username>"
)


# API V2
client = openreview.api.OpenReviewClient(
    baseurl='https://api2.openreview.net',
    username="<your username>",
    password="<your username>"
)



In [None]:
# For collecting ICRL submission PDFs, use API V1 and run the fowllows

client = openreview.Client(baseurl='https://api.openreview.net')

notes = client.get_all_notes(invitation="ICLR.cc/2023/Conference/-/Blind_Submission")


Getting V1 Notes: 100%|█████████▉| 3792/3796 [00:01<00:00, 2743.19it/s]


In [None]:
# output directory
output_dir = "/content/drive/MyDrive/Thesis/ICLR2023_submissions"
os.makedirs(output_dir, exist_ok=True)

# loop through the notes and download the PDF attachments
for note in notes:
    print(f"Processing note: {note.id}")
    if "pdf" in note.content:
        try:
            print(f"Found PDF for note {note.id}: {note.content['pdf']}")
            # fetch the PDF attachment
            pdf_content = client.get_attachment(note.id, 'pdf')
            file_path = os.path.join(output_dir, f'submission{note.number}.pdf')
            with open(file_path, 'wb') as op:
                op.write(pdf_content)
            print(f"Saved PDF for note {note.id} to {file_path}")
        except Exception as e:
            print(f"Error downloading/saving PDF for note {note.id}: {e}")
    else:
        print(f"No PDF field found for note {note.id}")

print(f"Files saved in {output_dir}")


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Found PDF for note RIJM-pJF_3K: /pdf/accaf0a0c437aa4aa099c6d5ada1e2c443734628.pdf
Saved PDF for note RIJM-pJF_3K to /content/drive/MyDrive/Thesis/ICLR2023_submissions/submission4849.pdf
Processing note: RHsOd1Aineq
Found PDF for note RHsOd1Aineq: /pdf/f5538f3bef892c0f11e9f2227337fd59997e6cf5.pdf
Saved PDF for note RHsOd1Aineq to /content/drive/MyDrive/Thesis/ICLR2023_submissions/submission3828.pdf
Processing note: RDy3IbvjMqT
Found PDF for note RDy3IbvjMqT: /pdf/9411d2b932a2b7a46202e67d58a5f8067fd33c77.pdf
Saved PDF for note RDy3IbvjMqT to /content/drive/MyDrive/Thesis/ICLR2023_submissions/submission5573.pdf
Processing note: R98ZfMt-jE
Found PDF for note R98ZfMt-jE: /pdf/751b7f72b933e8842e1162601b80445c8fa2b7c7.pdf
Saved PDF for note R98ZfMt-jE to /content/drive/MyDrive/Thesis/ICLR2023_submissions/submission3131.pdf
Processing note: R4oodnmxb9m
Found PDF for note R4oodnmxb9m: /pdf/4f75209d9dc5458db7ebbf858cb7ca2b4fcf2a40.

In [None]:
# For collecting EMNLP and NeurIPS submission PDFs, use API V2 and run the fowllows

notes = client.get_all_notes(invitation = "NeurIPS.cc/2023/Conference/-/Blind_Submission")


In [None]:
output_dir = "/content/drive/MyDrive/Thesis/NeurIPS2023_submissions"

os.makedirs(output_dir, exist_ok=True)

for note in notes:
    if note.content.get("pdf", {}).get('value'):
        f = client.get_attachment(note.id, 'pdf')
        file_path = os.path.join(output_dir, f'submission{note.number}.pdf')
        with open(file_path, 'wb') as op:
            op.write(f)

print(f"Files saved in {output_dir}")
