In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import torch
from transformers import AutoTokenizer, pipeline, AutoModelForCausalLM
from transformers import LogitsProcessor
from typing import Iterable
import envs
import pandas as pd
import string
from leaderboard import SummaryGenerator, EvaluationModel, run_eval
from tqdm import tqdm

MODEL_NAME = "fava-uw/fava-model"

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME,
                                             device_map="auto",
                                             torch_dtype="auto")

2024-05-28 17:17:00,474 - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).
  return self.fget.__get__(instance, owner)()
Loading checkpoint shards: 100%|██████████| 2/2 [00:05<00:00,  2.95s/it]


In [8]:
from bs4 import BeautifulSoup
from bs4.formatter import HTMLFormatter

def post_process(edited_text):
    soup = BeautifulSoup(edited_text, "html.parser")
    for tag in soup.findAll(["delete", "subjective", "unverifiable", "invented", "contradictory"]):
        tag.decompose()
    return soup.get_text().strip()

In [4]:
def gen_func(source, summary):
    input_text = "Read the following references:\n{evidence}\nPlease identify all the errors in the following text using the information in the references provided and suggest edits if necessary:\n[Text] {output}\n[Edited] "
    input_text = input_text.format(evidence=source, output=summary)
    input_ids = tokenizer.encode(input_text, return_tensors="pt").to("cuda")
    out = model.generate(input_ids,
                         do_sample=False,
                         max_new_tokens=512,
                         pad_token_id=tokenizer.eos_token_id)
    text = tokenizer.decode(out[0][len(input_ids[0]):], skip_special_tokens=True)
    return text

In [12]:
# Success case
# source = "Banff National Park is Canada's oldest national park, established in 1885 as Rocky Mountains Park. Located in Alberta's Rocky Mountains, 110–180 kilometres (68–112 mi) west of Calgary, Banff encompasses 6,641 square kilometres (2,564 sq mi) of mountainous terrain."
# summary = "Canada's oldest national park, Banff, was established in 1886. It recently won a Nature's Choice 2023 award for its beautiful mountainous terrain. It's the best national park ever."

# Failure case of unverifiable
source = "Fist of Legend . It is a remake of the 1972 Fist of Fury , which starred Bruce Lee as the lead character ."
summary = "Fist of Legend is a remake of the 1972 film Fist of Fury. The original film starred Bruce Lee as the lead character. The remake is a reinterpretation of the original story and characters. The remake was released in 1994 and starred Jet Li as the lead character. The film was directed by Corey Yuen and produced by Golden Harvest. The film was a commercial success and received positive reviews for its action sequences and choreography."
text = gen_func(source, summary)
print(text)


Edited: Fist of Legend is a remake of the 1972 film Fist of Fury. The original film starred Bruce Lee as the lead character. The remake is a reinterpretation of the original story and characters. The remake was released in 1994 and starred Jet Li as the lead character. The film was directed by Corey Yuen and produced by Golden Harvest. The film was a commercial success and received positive reviews for its action sequences and choreography.


In [6]:
post_process(text)

"\nEdited: Canada's oldest national park, Banff, was established in 1885.  "

In [7]:
import util
df = pd.read_csv("generated.csv")
source_summary_pairs = util.create_pairs(df)
edited = []
for doc, summary in tqdm(source_summary_pairs, desc="Fava Editing"):
    edited_summary = ""
    if util.is_summary_valid(summary):
        edited_summary = gen_func(doc, summary)
        print(edited_summary)
        input()
        edited_summary = post_process(edited_summary)
    edited.append(edited_summary)
df = df.rename({"summary": "original_summary"})
df["summary"] = edited
df.to_csv("fava.csv", index=False)



1. Paul Merson has restarted his row with Andros Townsend after the Tottenham midfielder was brought on with only seven minutes remaining in his team's 0-0 draw with Burnley on Sunday.
2. Merson initially angered Townsend for writing in his Sky Sports column that 'if Andros Townsend can get in (the England team) then it opens it up to anybody.'
3. Merson had another dig at Townsend after his appearance for Tottenham against Burnley.
4. Andros Townsend scores England's equaliser in their 1-1 friendly draw with Italy in Turin on Tuesday night.
5. Merson was proven wrong when Townsend hit a stunning equaliser for England against Italy and he duly admitted his mistake.
6. Townsend hit back at Merson on Twitter after scoring for England against Italy.
7. Merson criticised Townsend's call-up to the England squad last week.
8. Any bad feeling between the pair seemed to have passed but Merson was unable to resist having another dig at Townsend after Tottenham drew at Turf Moor.
1. Paul Merson 

Fava Editing:   0%|          | 1/1006 [01:09<19:20:31, 69.29s/it]


KeyboardInterrupt: 

In [None]:
run_eval("fava.csv")

  return self.fget.__get__(instance, owner)()
2024-05-28 17:10:25,074 - INFO - Use pytorch device: cuda
Batches: 100%|██████████| 1/1 [00:01<00:00,  1.48s/it]:00<?, ?it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 21.45it/s]:01<24:50,  1.48s/it]
Batches: 100%|██████████| 1/1 [00:00<00:00, 22.66it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 23.24it/s]:01<07:05,  2.36it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 21.43it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 22.49it/s]:01<03:53,  4.28it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 21.25it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 22.21it/s]:01<02:37,  6.35it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 22.49it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 22.88it/s]:01<01:57,  8.46it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 25.19it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 24.60it/s]0:02<01:34, 10.57it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 24.82it/s]
Batches: 100%|██████████| 1/1 [00:00<00:0

Average Length 113.0865671641791
Answer Rate 0.9990059642147118
Consistent Rate 93.03482587064677
