**Install packages**

In [None]:
!pip install -U git+https://github.com/huggingface/peft.git
!pip install bitsandbytes==0.35.0
!pip install transformers==4.31
!pip install -q datasets
!pip install -qqq trl==0.7.1

In [None]:
import torch
import time
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt

from datasets import Dataset, load_dataset
from datasets import load_dataset, load_metric
from transformers import pipeline, set_seed
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

import warnings
warnings.filterwarnings("ignore")

**Dataset = CNN Daily Mail**

In [None]:
huggingface_dataset_name = "cnn_dailymail"
dataset = load_dataset(huggingface_dataset_name, "3.0.0")

In [None]:
sample = dataset["train"][1]
print(sample["article"][:500])

**Prompt**

```
    Summarize the following conversation.
    
    ### Input:
    (CNN) -- Usain Bolt rounded off the world championships Sunday by claiming his third
    gold in Moscow as he anchored Jamaica to
    victory in the men's 4x100m relay. The fastest man in the world charged clear of United
    States rival Justin Gatlin as the Jamaican
    quartet of Nesta Carter, Kemar Bailey-Cole, Nickel Ashmeade and Bolt won in 37.36
    seconds. The U.S finished second in 37.56 seconds
    with Canada taking the bronze after Britain were disqualified for a faulty handover.
    The 26-year-old Bolt has n......
    

    ### Summary:

    Usain Bolt wins third gold of world championship .
    Anchors Jamaica to 4x100m relay victory .
    Eighth gold at the championships for Bolt .
    Jamaica double up in women's 4x100m relay .
```

**Formatting the data**

In [None]:
def format_instruction(dialogue: str, summary: str):
    return f"""### Instruction:
Summarize the following conversation.

### Input:
{dialogue.strip()}

### Summary:
{summary}
""".strip()

def generate_instruction_dataset(data_point):

    return {
        "article": data_point["article"],
        "highlights": data_point["highlights"],
        "text": format_instruction(data_point["article"],data_point["highlights"])
    }

def process_dataset(data: Dataset):
    return (
        data.shuffle(seed=42)
        .map(generate_instruction_dataset).remove_columns(['id'])
    )

**Shuffling the data and selecting some**



In [None]:
dataset["train"] = process_dataset(dataset["train"])
dataset["test"] = process_dataset(dataset["validation"])
dataset["validation"] = process_dataset(dataset["validation"])

# 1000 rows
train_data = dataset['train'].shuffle(seed=42).select([i for i in range(1000)])

# 100 rows
test_data = dataset['test'].shuffle(seed=42).select([i for i in range(100)])
validation_data = dataset['validation'].shuffle(seed=42).select([i for i in range(100)])

**Set up a language model with LLAMA-2**

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

model_id =  "NousResearch/Llama-2-7b-hf"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map="auto")

tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

**Summary with the model above**

In [None]:
index = 2

dialogue = test_data['article'][index]
summary = test_data['highlights'][index]

prompt = f"""
Summarize the following conversation.

### Input:
{dialogue}

### Summary:
"""

inputs = tokenizer(prompt, return_tensors='pt')
output = tokenizer.decode(
    model.generate(
        inputs["input_ids"],
        max_new_tokens=100,
    )[0],
    skip_special_tokens=True
)

dash_line = '-'.join('' for x in range(100))
print(f'INPUT PROMPT:\n{prompt}')
print(f'MODEL GENERATION:\n{output}')

#Fine Tuning#

**Knowledge distillation training (training a smaller model (the student model) from a teacher model**

> Add blockquote



In [None]:
from peft import prepare_model_for_kbit_training

def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():

        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)
print(model)

**Configure a model with Lora attention**

In [None]:
from peft import LoraConfig, get_peft_model

lora_config = LoraConfig(
    r=16,
    lora_alpha=64,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], 
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)
print_trainable_parameters(model)

**Configure various aspects of the fine-tuning process**

In [None]:
from transformers import TrainingArguments

OUTPUT_DIR = "llama2-docsum-adapter"

training_arguments = TrainingArguments(
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    optim="paged_adamw_32bit",
    logging_steps=1,
    learning_rate=1e-4,
    fp16=True,
    max_grad_norm=0.3,
    num_train_epochs=4,
    evaluation_strategy="steps",
    eval_steps=0.2,
    warmup_ratio=0.05,
    save_strategy="epoch",
    group_by_length=True,
    output_dir=OUTPUT_DIR,
    report_to="tensorboard",
    save_safetensors=True,
    lr_scheduler_type="cosine",
    seed=42,
)
model.config.use_cache = False

**Train a model using sequence-level fine-tuning**

In [None]:
from trl import SFTTrainer

trainer = SFTTrainer(
    model=model,
    train_dataset=train_data,
    eval_dataset=validation_data,
    peft_config=lora_config,
    dataset_text_field="text",
    max_seq_length=1024,
    tokenizer=tokenizer,
    args=training_arguments,
)

trainer.train()

**Save the fine-tuned model and tokenizer**

In [None]:
import os

peft_model_path = "./peft-dialogue-summary"

if not os.path.exists(peft_model_path):
    os.makedirs(peft_model_path)


trainer.model.save_pretrained(peft_model_path)
tokenizer.save_pretrained(peft_model_path)

**Prepare the model for inference by using cache**

In [None]:
from transformers import TextStreamer
model.config.use_cache = True
model.eval()

**Hugging face Token**

In [None]:
os.environ["TOKEN"] = "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"

**Load the trained model and tokenizer**

In [None]:
from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer

peft_model_dir = "peft-dialogue-summary"
trained_model = AutoPeftModelForCausalLM.from_pretrained(peft_model_dir)
tokenizer = AutoTokenizer.from_pretrained(peft_model_dir)

**Generate a summary using a trained PEFT model and tokenizer**

In [None]:
index = 51

dialogue = train_data['article'][index][:10000]
summary = train_data['highlights'][index]

prompt = f"""
Summarize the following conversation.

### Input:
{dialogue}

### Summary:
"""

input_ids = tokenizer(prompt, return_tensors='pt',truncation=True).input_ids.cuda()
outputs = trained_model.generate(input_ids=input_ids, max_new_tokens=200, )
output= tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0][len(prompt):]

dash_line = '-'.join('' for x in range(100))
print(f'INPUT PROMPT:\n{prompt}')
print(f'TRAINED MODEL GENERATED TEXT :\n{output}')