# A simple web demo of [Rebiber](https://github.com/yuchenlin/rebiber).

We often cite papers using their arXiv versions without noting that they are already PUBLISHED in some conferences. These unofficial bib entries might violate rules about submissions or camera-ready versions for some conferences. We introduce Rebiber, a simple tool in Python to fix them automatically. It is based on the official conference information from the DBLP or the ACL anthology (for NLP confernces)! You can check the list of [supported conferences](https://github.com/yuchenlin/rebiber#supported-conferences).

# Step 1: Install Rebiber.

In [None]:
%%capture
# Install rebiber package from git, may need to restart runtime
!pip install rebiber -U
#!pip install -e git+https://github.com/yuchenlin/rebiber.git#egg=rebiber -U

# Step 2: Import Bib.

## by Uploading

In [None]:
# Run this cell, then clilck upload to upload file(s)
from google.colab import files
uploaded = files.upload()
for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))
ls_bibfiles = list(uploaded.keys())

# print fn and # of bib records in each file
import rebiber
for fn in ls_bibfiles:
  bib_entries = rebiber.load_bib_file(fn)
  print("filename: {}; # of entries: {}".format(fn, len(bib_entries)))

## by Pasting

In [105]:
#paste an entry or continuous entries between """    """

ls_bib = []
s = """
@Misc{zaheer2021big,
  author        = {Manzil Zaheer and Guru Guruganesh and Avinava Dubey and Joshua Ainslie and Chris Alberti and Santiago Ontanon and Philip Pham and Anirudh Ravula and Qifan Wang and Li Yang and Amr Ahmed},
  title         = {Big Bird: Transformers for Longer Sequences},
  year          = {2021},
  archiveprefix = {arXiv},
  eprint        = {2007.14062},
  file          = {:Ref/2007.14062.pdf:PDF},
  primaryclass  = {cs.LG},
}
"""

ls_bib.append(s)

# Prepare inputs, the input and output files are only this Colab session
fp_input = "input_pasted.bib"
#fp_output = "output.bib"
input_bib = open(fp_input, "w")

for bib in ls_bib:
  input_bib.write('\n'.join(re.split('\n\s+', bib)) + "\n\n")
input_bib.close()

ls_bibfiles = [fp_input]

# Step 3: PROCESS and PRINT

In [106]:
#%%capture
# process input bib to output bib.

from IPython.utils import io

ls_output = []
for fn in ls_bibfiles:
  fn_output = '{}_rebibed.bib'.format(fn.split('.bib')[0])
  ls_output.append(fn_output)
  with io.capture_output() as captured:
    !rebiber -i {fn} -o {fn_output}

# Print bib entries
for fn in ls_output:
  f = open(fn, 'r')
  rebibed_bib = f.read()
  f.close()
  print(rebibed_bib)

@misc{zaheer2021big,
 archiveprefix = {arXiv},
 author = {Manzil Zaheer and Guru Guruganesh and Avinava Dubey and Joshua Ainslie and Chris Alberti and Santiago Ontanon and Philip Pham and Anirudh Ravula and Qifan Wang and Li Yang and Amr Ahmed},
 eprint = {2007.14062},
 file = {:Ref/2007.14062.pdf:PDF},
 primaryclass = {cs.LG},
 title = {Big Bird: Transformers for Longer Sequences},
 year = {2021}
}




# Step 4 DOWNLOAD

In [107]:
# Run this cell to download processed outputs
for fn in ls_output:
  files.download(fn)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>