#### Let's first load the llama2 base model and deploy it

In [1]:
import json
import re
from pprint import pprint
from src import process_text, model

import pandas as pd
import torch
from datasets import Dataset, load_dataset
from huggingface_hub import notebook_login
from peft import LoraConfig, PeftModel
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
)
from trl import SFTTrainer
import logging

logging.basicConfig(level=logging.INFO)

DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
#MODEL_NAME = "meta-llama/Meta-Llama-3-8B-Instruct"
MODEL_NAME = "meta-llama/Llama-2-7b-hf"

#### Download and extract data

In [2]:
dataset = load_dataset("Salesforce/dialogstudio", "TweetSumm")

DEFAULT_SYSTEM_PROMPT = """
                        Below is a conversation between a human and an AI agent. Write a summary of the conversation.
                        """.strip()

You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.


In [3]:
dataset["train"] = process_text.process_dataset(dataset["train"])
dataset["validation"] = process_text.process_dataset(dataset["validation"])



INFO:root:Processing dataset...
INFO:root:Processing dataset...


#### Evaluate model

In [4]:
llm2, tokenizer = model.create_model_and_tokenizer(llm_model_name=MODEL_NAME)

INFO:root:Creating model meta-llama/Llama-2-7b-hf..


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [5]:
examples = []
for data_point in dataset["test"].select(range(5)):
    summaries = json.loads(data_point["original dialog info"])["summaries"][
        "abstractive_summaries"
    ]
    summary = summaries[0]
    summary = " ".join(summary)
    conversation = process_text.create_conversation_text(data_point)
    examples.append(
        {
            "summary": summary,
            "conversation": conversation,
            "prompt": process_text.generate_deployment_prompt(conversation),
        }
    )
test_df = pd.DataFrame(examples)
test_df

Unnamed: 0,summary,conversation,prompt
0,Customer is complaining that the watchlist is ...,user: My watchlist is not updating with new ep...,### Instruction: Below is a conversation betwe...
1,Customer is asking about the ACC to link to th...,"user: hi , my Acc was linked to an old number....",### Instruction: Below is a conversation betwe...
2,Customer is complaining about the new updates ...,user: the new update ios11 sucks. I can’t even...,### Instruction: Below is a conversation betwe...
3,Customer is complaining about parcel service ...,user: FUCK YOU AND YOUR SHITTY PARCEL SERVICE ...,### Instruction: Below is a conversation betwe...
4,The customer says that he is stuck at Staines ...,user: Stuck at Staines waiting for a Reading t...,### Instruction: Below is a conversation betwe...


In [6]:
def summarize(model, text: str):
    inputs = tokenizer(text, return_tensors="pt").to(DEVICE)
    inputs_length = len(inputs["input_ids"][0])
    with torch.inference_mode():
        outputs = model.generate(**inputs, max_new_tokens=256, temperature=0.0001)
    return tokenizer.decode(outputs[0][inputs_length:], skip_special_tokens=True)


example = test_df.iloc[0]
summary = summarize(llm2, example.prompt)

In [7]:
pprint(summary)

('\n'
 '    user: My watchlist is not updating with new episodes (past couple days). '
 'Any idea why?\n'
 "    agent: Apologies for the trouble, Norlene! We're looking into this. In "
 'the meantime, try navigating to the season / episode manually.\n'
 '    user: Tried logging out/back in, that didn’t help\n'
 '    agent: Sorry! 😔 We assure you that our team is working hard to '
 'investigate, and we hope to have a fix ready soon!\n'
 '    user: Thank you! Some shows updated overnight, but others did not...\n'
 '    agent: We definitely understand, Norlene. For now, we recommend checking '
 'the show page for these shows as the new eps will be there\n'
 '    user: As of this morning, the problem seems to be resolved. Watchlist '
 'updated overnight with all new episodes. Thank you for your attention to '
 'this matter! I love Hulu 💚\n'
 "    agent: Awesome! That's what we love to hear. If you happen to need "
 "anything else, we'll be here to support! 💚\n"
 '\n'
 '    ### Output:\n'
 