In [20]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import torch
from transformers import AutoTokenizer, pipeline, AutoModelForCausalLM
from transformers import LogitsProcessor
from sentence_transformers import CrossEncoder
from typing import Iterable
import envs
import pandas as pd
import string
import numpy
from functools import partial
from leaderboard import SummaryGenerator, EvaluationModel, run_eval

MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.1"
# MODEL_NAME = "mistral_dpo_5k"

In [5]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME,
                                             device_map="auto",
                                             torch_dtype="auto",
                                             attn_implementation="flash_attention_2")

2024-06-17 14:23:02,331 - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).
Loading checkpoint shards: 100%|███████████████████████████████████████████████████| 2/2 [00:07<00:00,  3.71s/it]


In [6]:
tokenizer.chat_template = open("mistral_template.jinja", "r").read()

In [7]:
from sentence_transformers import CrossEncoder
scorer = CrossEncoder("vectara/hallucination_evaluation_model")

  return self.fget.__get__(instance, owner)()
2024-06-17 14:23:18,541 - INFO - Use pytorch device: cuda


In [16]:
def gen_func(source, scorer, debug=False):
    messages = [{"role": "system", "content": envs.SYSTEM_PROMPT},
                {"role": "user", "content": envs.USER_PROMPT.format(passage=source)}]
    # good_word_ids = tokenizer.encode(source + "\n" + "\n".join(string.printable) + "\n" + tokenizer.eos_token)
    # white_list_processor = WhiteListLogitsProcessor(good_word_ids)
    input_ids = tokenizer.apply_chat_template(messages, 
                                              add_generation_prompt=True, 
                                              return_tensors="pt").to("cuda")
    out = model.generate(input_ids, 
                         do_sample=True, 
                         temperature=0.7,
                         top_p=0.95,
                         max_new_tokens=512,
                         num_return_sequences=10,
                         pad_token_id=tokenizer.eos_token_id)
    texts = tokenizer.batch_decode(out[:, len(input_ids[0]):], skip_special_tokens=True)
    # Best of N
    scores = scorer.predict([[source, summ] for summ in texts], show_progress_bar=False)
    if debug:
        print(tokenizer.decode(input_ids[0]))
        print(scores)
    idx = numpy.argmax(scores)
    return texts[idx]

In [17]:
gen_func("The first vaccine for Ebola was approved by the FDA in 2019 in the US, five years after the initial outbreak in 2014. To produce the vaccine, scientists had to sequence the DNA of Ebola, then identify possible vaccines, and finally show successful clinical trials. Scientists say a vaccine for COVID-19 is unlikely to be ready this year, although clinical trials have already started.", 
         scorer=scorer,
         debug=True)

<s> [INST] You are a chat bot answering questions using data. You must stick to the answers provided solely by the text in the passage provided. 

You are asked the question 'Provide a concise summary of the following passage, covering the core pieces of information described': 
Passage:
The first vaccine for Ebola was approved by the FDA in 2019 in the US, five years after the initial outbreak in 2014. To produce the vaccine, scientists had to sequence the DNA of Ebola, then identify possible vaccines, and finally show successful clinical trials. Scientists say a vaccine for COVID-19 is unlikely to be ready this year, although clinical trials have already started. [/INST]
[0.9990177  0.99417984 0.9993338  0.9990607  0.99931836 0.99933136
 0.9990609  0.9993253  0.99923015 0.99903166]


'The first Ebola vaccine was approved by the FDA in the US in 2019, five years after the initial outbreak in 2014. Scientists had to sequence the DNA of Ebola, identify possible vaccines, and show successful clinical trials to produce the vaccine. A vaccine for COVID-19 is unlikely to be ready this year, although clinical trials have already started.'

In [18]:
summ = SummaryGenerator()

In [None]:
df = summ.generate_summaries(pd.read_csv("leaderboard_dataset.csv"), partial(gen_func, scorer=scorer))
df.to_csv("generated.csv", index=False)

 39%|███████████████████████████▊                                           | 394/1006 [43:08<3:18:45, 19.49s/it]