# Generate Preference Datas

In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import torch
from transformers import AutoTokenizer, pipeline, AutoModelForCausalLM
from transformers import LogitsProcessor
from typing import Iterable
import envs
import pandas as pd
import string
from sentence_transformers import CrossEncoder
from leaderboard import SummaryGenerator, EvaluationModel
from tqdm import tqdm
import vllm 
import util
MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.1"

2024-05-29 23:41:01,336	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


In [2]:
model = vllm.LLM(model=MODEL_NAME, max_model_len=16384)

INFO 05-29 11:34:44 llm_engine.py:72] Initializing an LLM engine with config: model='mistralai/Mistral-7B-Instruct-v0.1', tokenizer='mistralai/Mistral-7B-Instruct-v0.1', tokenizer_mode=auto, revision=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=16384, download_dir=None, load_format=auto, tensor_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, seed=0)
INFO 05-29 11:34:48 weight_utils.py:164] Using model weights format ['*.safetensors']
INFO 05-29 11:34:57 llm_engine.py:322] # GPU blocks: 2738, # CPU blocks: 2048
INFO 05-29 11:35:00 model_runner.py:632] Capturing the model for CUDA graphs. This may lead to unexpected consequences if the model is not static. To run the model in eager mode, set 'enforce_eager=True' or use '--enforce-eager' in the CLI.
INFO 05-29 11:35:00 model_runner.py:636] CUDA graphs can take additional 1~3 GiB memory per GPU. If you are running out of memory, cons

In [2]:
from datasets import load_dataset
vitc = load_dataset("tals/vitaminc", split="train")
xsum = load_dataset("EdinburghNLP/xsum", split="train")
cnndm = load_dataset("abisee/cnn_dailymail", "1.0.0", split="train")

In [3]:
shuffled_vitc = vitc.shuffle(seed=42)
vitc_sampled = shuffled_vitc.shard(num_shards=30, index=0)

In [4]:
cnndm_filtered = cnndm.filter(lambda example: len(example["article"].split()) <= 500)
cnndm_sampled = cnndm_filtered.shard(num_shards=10, index=0)

In [5]:
xsum_filtered = xsum.filter(lambda example: len(example["document"].split()) <= 500)
xsum_sampled = xsum_filtered.shard(num_shards=10, index=0)

In [6]:
docs = cnndm_sampled["article"] + vitc_sampled["evidence"] + xsum_sampled["document"]
print(len(docs))

37195


In [8]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.chat_template = open("mistral_template.jinja", "r").read()
def construct_prompt(source):
    messages = [{"role": "system", "content": envs.SYSTEM_PROMPT},
                {"role": "user", "content": envs.USER_PROMPT.format(passage=source)}]
    prompt = tokenizer.apply_chat_template(messages, tokenize=False)
    return prompt

In [9]:
prompts = [construct_prompt(doc) for doc in tqdm(docs)]
print(prompts[0], len(prompts))

100%|██████████| 37195/37195 [00:05<00:00, 7049.40it/s]

<s>[INST] You are a chat bot answering questions using data. You must stick to the answers provided solely by the text in the passage provided. 

You are asked the question 'Provide a concise summary of the following passage, covering the core pieces of information described': LONDON, England (Reuters) -- Harry Potter star Daniel Radcliffe gains access to a reported £20 million ($41.1 million) fortune as he turns 18 on Monday, but he insists the money won't cast a spell on him. Daniel Radcliffe as Harry Potter in "Harry Potter and the Order of the Phoenix" To the disappointment of gossip columnists around the world, the young actor says he has no plans to fritter his cash away on fast cars, drink and celebrity parties. "I don't plan to be one of those people who, as soon as they turn 18, suddenly buy themselves a massive sports car collection or something similar," he told an Australian interviewer earlier this month. "I don't think I'll be particularly extravagant. "The things I like 




In [10]:
n = 6
sampling_params = vllm.SamplingParams(
    n=n,
    temperature=1.0,
    max_tokens=512,
)
print(prompts[0])


<s>[INST] You are a chat bot answering questions using data. You must stick to the answers provided solely by the text in the passage provided. 

You are asked the question 'Provide a concise summary of the following passage, covering the core pieces of information described': LONDON, England (Reuters) -- Harry Potter star Daniel Radcliffe gains access to a reported £20 million ($41.1 million) fortune as he turns 18 on Monday, but he insists the money won't cast a spell on him. Daniel Radcliffe as Harry Potter in "Harry Potter and the Order of the Phoenix" To the disappointment of gossip columnists around the world, the young actor says he has no plans to fritter his cash away on fast cars, drink and celebrity parties. "I don't plan to be one of those people who, as soon as they turn 18, suddenly buy themselves a massive sports car collection or something similar," he told an Australian interviewer earlier this month. "I don't think I'll be particularly extravagant. "The things I like 

In [11]:
print(model.generate(prompts[0], sampling_params))

Processed prompts: 100%|██████████| 1/1 [00:05<00:00,  5.55s/it]

[RequestOutput(request_id=0, prompt='<s>[INST] You are a chat bot answering questions using data. You must stick to the answers provided solely by the text in the passage provided. \n\nYou are asked the question \'Provide a concise summary of the following passage, covering the core pieces of information described\': LONDON, England (Reuters) -- Harry Potter star Daniel Radcliffe gains access to a reported £20 million ($41.1 million) fortune as he turns 18 on Monday, but he insists the money won\'t cast a spell on him. Daniel Radcliffe as Harry Potter in "Harry Potter and the Order of the Phoenix" To the disappointment of gossip columnists around the world, the young actor says he has no plans to fritter his cash away on fast cars, drink and celebrity parties. "I don\'t plan to be one of those people who, as soon as they turn 18, suddenly buy themselves a massive sports car collection or something similar," he told an Australian interviewer earlier this month. "I don\'t think I\'ll be 




In [12]:
outputs = model.generate(prompts, sampling_params)
lines = []
for i, output in enumerate(outputs):
    gen = [{"text": output.outputs[idx].text} for idx in range(n)]
    lines.append({"prompt": prompts[i], "generated_text": gen})

util.dump2jsonl(lines, "tmp.jsonl")

Processed prompts: 100%|██████████| 37195/37195 [7:02:04<00:00,  1.47it/s]  


In [None]:
lines = util.load_jsonl("tmp.jsonl")
model = CrossEncoder("vectara/hallucination_evaluation_model", device="cuda")

In [8]:
for i, line in enumerate(tqdm(lines)):
    doc = docs[i]
    summaries = line["generated_text"]
    scores = model.predict([[doc, _sum["text"]] for _sum in summaries], show_progress_bar=False)
    for j, item in enumerate(summaries):
        item["score"] = scores[j]

100%|██████████| 37195/37195 [44:15<00:00, 14.00it/s]


In [13]:
dpo_pairs = []
for line in tqdm(lines):
    prompt = line["prompt"]
    summaries = line["generated_text"]
    rejected_samples = []
    chosen_samples = []
    for _sum in summaries:
        if not util.is_summary_valid(_sum["text"]): 
            continue
        if _sum["score"] > 0.8:
            chosen_samples.append(_sum["text"])
        elif _sum["score"] < 0.5:
            rejected_samples.append(_sum["text"])

    for chosen in chosen_samples:
        for rejected in rejected_samples:
            dpo_pairs.append({
                "prompt": prompt,
                "chosen": chosen,
                "rejected": rejected
            })

print(len(dpo_pairs))
util.dump2jsonl(dpo_pairs, "dpo_pairs.jsonl")

100%|██████████| 37195/37195 [00:03<00:00, 10634.78it/s]


69519
