In [1]:
!pip install --upgrade pip
!pip install --disable-pip-version-check
!pip install torch==1.13.1
!pip install torchdata==0.5.1 --quiet

!pip install transformers==4.27.2
!pip install datasets==2.11.0 --quiet
!pip install evaluate==0.4.0
!pip install rouge_score==0.1.2
!pip install loralib==0.1.1
!pip install peft==0.3.0 --quiet

Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com
[31mERROR: You must give at least one requirement to install (see "pip help install")[0m[31m
[0mLooking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com
Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com
Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com
Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com
Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com


In [2]:
from datasets import load_dataset
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig, TrainingArguments, Trainer
import torch
import time
import evaluate
import pandas as pd
import numpy as np

import os

os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [3]:
# Loading dataset
dataset = load_dataset("knkarthick/dialogsum")

Found cached dataset csv (/home/ec2-user/.cache/huggingface/datasets/knkarthick___csv/knkarthick--dialogsum-c8fac5d84cd35861/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1)


  0%|          | 0/3 [00:00<?, ?it/s]

In [4]:
# Load pre_trained model and tokenizer
model_name = "google/flan-t5-base"
model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype = torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# 2.0 Perform Zero Shot Inferencing

In [5]:
index = 10

dialogue = dataset["test"][index]["dialogue"]
summary = dataset["test"][index]["summary"]

# generate prompt

prompt = """
Summarize the following conversation.

{}

Summary:
""".format(dialogue)

inputs = tokenizer(prompt, return_tensors = "pt")
output = tokenizer.decode(
    model.generate(
        inputs["input_ids"],
        max_new_tokens = 200,
    )[0],
    skip_special_tokens = True
)

In [6]:
print("BASELINE SUMMARY: {}".format(summary))
print()
print("GENERATED SUMMARY ZERO SHOT: {}".format(output))

BASELINE SUMMARY: #Person1# attends Brian's birthday party. Brian thinks #Person1# looks great and charming.

GENERATED SUMMARY ZERO SHOT: #Person1#: Happy birthday, Brian. #Person2#: I'm so happy you're having a good time. #Person1#: Thank you, I'm sure you look great today. #Person2#: Thank you, I'm sure you look great. #Person1#: Thank you, I'm sure you look great today. #Person2#: Thank you, I'm sure you look great today. #Person1#: Thank you, I'm sure you look great today. #Person2#: Thank you, I'm sure you look great today. #Person1#: Thank you, I'm sure you look great today.


# 3.0 Perform Full Fine-Tuning

`Full Fine-Tuning` is the process of training a pre-trained language model on a task. When performing full fine-tuning, all the parameters of the pre-trained language model are updated during the training process.

* Loading the pre-trained model
* Task-specific dataset preparation - Prepare a labelled dataset specific to task. Dataset should be annotated with appropriate labels.
* Model architecture modification
* Fine-tuning process
* Evaluation

In [7]:
# see how many parameters there are in our LLM
all_model_params = 0
trainable_model_params = 0

for _, param in model.named_parameters():
    all_model_params += param.numel()
    if param.requires_grad:
        trainable_model_params += param.numel()
        
print("Total Number of Parameters: {}".format(all_model_params))
print("Trainable Parameters: {}".format(trainable_model_params))

Total Number of Parameters: 247577856
Trainable Parameters: 247577856


## 3.1 Data Preparation

Need to convert the dialogue-summary (prompt-response) pairs into explicit instructions for the LLM. Then preprocess the prompt-response dataset into tokens and pull out their `input_ids`

In [8]:
def tokenize_function(example):
    """
    This function converts the dialogue_summar into explicit instructions for the LLM. It then tokenizes the 
    """
    start_prompt = "Summarize the following conversation.\n\n"
    end_prompt = "\n\nSummary: "
    prompt = [start_prompt + dialogue + end_prompt for dialogue in example["dialogue"]]
    example["input_ids"] = tokenizer(prompt, padding = "max_length", truncation = True, return_tensors = "pt").input_ids
    example["labels"] = tokenizer(example["summary"], padding = "max_length", truncation = True, return_tensors = "pt").input_ids
    
    return example

In [9]:
tokenized_dataset = dataset.map(tokenize_function, batched = True)
tokenized_dataset

Loading cached processed dataset at /home/ec2-user/.cache/huggingface/datasets/knkarthick___csv/knkarthick--dialogsum-c8fac5d84cd35861/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1/cache-7342a9edc524a537.arrow


Map:   0%|          | 0/1500 [00:00<?, ? examples/s]

Loading cached processed dataset at /home/ec2-user/.cache/huggingface/datasets/knkarthick___csv/knkarthick--dialogsum-c8fac5d84cd35861/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1/cache-5168131c4aa8a6cf.arrow


DatasetDict({
    train: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic', 'input_ids', 'labels'],
        num_rows: 12460
    })
    test: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic', 'input_ids', 'labels'],
        num_rows: 1500
    })
    validation: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic', 'input_ids', 'labels'],
        num_rows: 500
    })
})

In [10]:
tokenized_dataset = tokenized_dataset.remove_columns(["id", "topic", "dialogue", "summary"])

In [11]:
sample_tokenized_dataset = tokenized_dataset.filter(lambda example, index: index % 100 == 0, with_indices = True)
sample_tokenized_dataset

Loading cached processed dataset at /home/ec2-user/.cache/huggingface/datasets/knkarthick___csv/knkarthick--dialogsum-c8fac5d84cd35861/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1/cache-7c7c8c8ac345425f.arrow


Filter:   0%|          | 0/1500 [00:00<?, ? examples/s]

Loading cached processed dataset at /home/ec2-user/.cache/huggingface/datasets/knkarthick___csv/knkarthick--dialogsum-c8fac5d84cd35861/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1/cache-96c77433143689a7.arrow


DatasetDict({
    train: Dataset({
        features: ['input_ids', 'labels'],
        num_rows: 125
    })
    test: Dataset({
        features: ['input_ids', 'labels'],
        num_rows: 15
    })
    validation: Dataset({
        features: ['input_ids', 'labels'],
        num_rows: 5
    })
})

## 3.2 Fine Tuning Model with Preprocessed Data

Using built-in Hugging Face `Trainer` class. Only fine-tuning on a subset of the data to save time and resources. The performance may not be as great as we think since we're only fine-tuning on the subset of data.

In [12]:
output_dir = f'./dialogue-summary-training-{str(int(time.time()))}'


# initializing training arguments
training_args = TrainingArguments(
    output_dir = output_dir,
    learning_rate = 1e-5,
    num_train_epochs = 1,
    weight_decay = 0.01,
    logging_steps = 1,
    max_steps = 1
)

# trainer
trainer = Trainer(
    model = model,
    args = training_args,
    train_dataset = sample_tokenized_dataset["train"],
    eval_dataset = sample_tokenized_dataset["validation"]
)



In [13]:
# run training job
trainer.train()



Step,Training Loss
1,49.25


TrainOutput(global_step=1, training_loss=49.25, metrics={'train_runtime': 143.4321, 'train_samples_per_second': 0.056, 'train_steps_per_second': 0.007, 'total_flos': 5478058819584.0, 'train_loss': 49.25, 'epoch': 0.06})

In [14]:
trainer.save_model(output_dir)

In [15]:
# create instance of the AutoMmodelForSeq2SeqLM class for the instruct model
instruct_model = AutoModelForSeq2SeqLM.from_pretrained("dialogue-summary-training-1689008306", torch_dtype = torch.bfloat16)

# 4.0 Model Evaluation After Full Fine Tuning

## 4.1 Qualitative Evaluation

In [16]:
index = 200

dialogue = dataset["test"][index]["dialogue"]
summary = dataset["test"][index]["summary"]

# generate prompt

prompt = """
Summarize the following conversation.

{}

Summary:
""".format(dialogue)

inputs = tokenizer(prompt, return_tensors = "pt").input_ids

original_model_output = model.generate(input_ids =inputs, 
                                       generation_config = GenerationConfig(max_new_tokens = 200, num_beams = 1))
original_model_output_decode = tokenizer.decode(original_model_output[0], skip_special_tokens = True)

instruct_model_output = instruct_model.generate(input_ids = inputs,
                                               generation_config = GenerationConfig(max_new_tokens = 200, num_beams = 1))
instruct_model_output_decode = tokenizer.decode(instruct_model_output[0], skip_special_tokens = True)

In [17]:
# Print the results
print("BASELINE SUMMARY: {}".format(summary))
print()
print("GENERATED SUMMARY ZERO SHOT: {}".format(original_model_output_decode))
print()
print("GENERATED SUMMARY FINE-TUNED: {}".format(instruct_model_output_decode))

BASELINE SUMMARY: #Person1# teaches #Person2# how to upgrade software and hardware in #Person2#'s system.

GENERATED SUMMARY ZERO SHOT: #Person1#: You'd like to upgrade your computer. #Person2: You'd like to upgrade your computer.

GENERATED SUMMARY FINE-TUNED: #Person1#: I'm thinking of upgrading my computer.


## 4.2 Quantitative Evaluation (ROUGE)

`ROUGE` is a metric used to quantify the validity of summarization produced. It compares the baseline summary to the generated summary.

In [18]:
# load rouge evaluation
rouge = evaluate.load("rouge")

In [19]:
# generate the outputs for the sample of the test dataset 
dialogues = dataset["test"][0:10]["dialogue"]
baseline_summaries = dataset["test"][0:10]["summary"]

original_model_summaries = []
instruct_model_summaries = []

for _, dialogue in enumerate(dialogues):
    prompt = """
    Summarize the following conversation.
    
    {}
    
    Summary
    """.format(dialogue)
    
    inputs = tokenizer(prompt, return_tensors = "pt").input_ids

    original_model_output = model.generate(input_ids =inputs, 
                                       generation_config = GenerationConfig(max_new_tokens = 200, num_beams = 1))
    original_model_output_decode = tokenizer.decode(original_model_output[0], skip_special_tokens = True)
    original_model_summaries.append(original_model_output_decode)

    instruct_model_output = instruct_model.generate(input_ids = inputs,
                                               generation_config = GenerationConfig(max_new_tokens = 200, num_beams = 1))
    instruct_model_output_decode = tokenizer.decode(instruct_model_output[0], skip_special_tokens = True)
    instruct_model_summaries.append(instruct_model_output_decode)
    
zipped_summaries = list(zip(baseline_summaries, original_model_summaries, instruct_model_summaries))

df = pd.DataFrame(zipped_summaries, columns = ["Baseline", "Original-Model", "Fine-Tuned"])

In [20]:
df

Unnamed: 0,Baseline,Original-Model,Fine-Tuned
0,Ms. Dawson helps #Person1# to write a memo to ...,The memo is being distributed to all employees...,The memo will go out to all employees by this ...
1,In order to prevent employees from wasting tim...,This memo should go out as an intra-office mem...,The memo will go out to all employees by this ...
2,Ms. Dawson takes a dictation for #Person1# abo...,Employees who use the Instant Messaging system...,The memo will go out to all employees by this ...
3,#Person2# arrives late because of traffic jam....,#Person1: You're finally here!,The traffic jam at the Carrefour intersection ...
4,#Person2# decides to follow #Person1#'s sugges...,#Person1#: You've finally found your way home.,The traffic jam at the Carrefour intersection ...
5,#Person2# complains to #Person1# about the tra...,The person is driving to work.,The traffic jam at the Carrefour intersection ...
6,#Person1# tells Kate that Masha and Hero get d...,Masha and Hero are getting divorced.,Masha and Hero are getting divorced.
7,#Person1# tells Kate that Masha and Hero are g...,Masha and Hero are getting divorce.,Masha and Hero are getting divorced.
8,#Person1# and Kate talk about the divorce betw...,#Person1: They are getting divorced. #Person2:...,Masha and Hero are getting divorced.
9,#Person1# and Brian are at the birthday party ...,Brian's birthday is coming up.,"#Person1#: Happy birthday, Brian. #Person2#: I..."


In [21]:
# Evaluate the models using ROUGE metric

original_model_results = rouge.compute(
    predictions = original_model_summaries,
    references = baseline_summaries[0:len(original_model_summaries)],
    use_aggregator = True,
    use_stemmer = True
)

instruct_model_results = rouge.compute(
    predictions = instruct_model_summaries,
    references = baseline_summaries[0:len(instruct_model_summaries)],
    use_aggregator = True,
    use_stemmer = True
)

In [22]:
print('ORIGINAL MODEL:')
print(original_model_results)
print('INSTRUCT MODEL:')
print(instruct_model_results)

ORIGINAL MODEL:
{'rouge1': 0.2517194351542178, 'rouge2': 0.07682569526047786, 'rougeL': 0.20633697770997542, 'rougeLsum': 0.21046936832978025}
INSTRUCT MODEL:
{'rouge1': 0.2488908619621208, 'rouge2': 0.1104891774891775, 'rougeL': 0.2158497376018475, 'rougeLsum': 0.2190598097174996}


There is an improvement in the ROUGE score despite only fine-tuning the model a subset of the data.

# 5.0 Perform Parameter Efficient Fine Tuning (PEFT

`Parameter Efficient Fine-Tuning` is another fine tuning method that is much more efficient than full fine tuning. Though the final results may not be as high as Full fine tuning, the method is efficient and generates comparable results.

After fine tuning for a specific task, the result is that the original LLM remains unchanged and a newly-trained `LoRA Adapter` emerges. This LoRA Adapter is much smaller than the original LLM.

During inference time the LoRA Adapter is combined with the original LLM to serve the inference request.

## 5.1 Setup PEFT Model For Fine-Tuning

Set up the PEFT model for fine-tuning with a new layer/parameter adapter. Using PEFT, we freeze the underlying LLM and only train the adapter.

In [23]:
from peft import LoraConfig, get_peft_model, TaskType

lora_config = LoraConfig(
    r = 32,
    lora_alpha = 32,
    target_modules = ["q", "v"],
    lora_dropout = 0.05,
    bias = "none",
    task_type = TaskType.SEQ_2_SEQ_LM
)

Add LoRA Adapter layers to the original LLM to be trained

In [24]:
peft_model = get_peft_model(model, lora_config)

In [25]:
# see how many parameters there are in our LLM
all_model_params = 0
trainable_model_params = 0

for _, param in peft_model.named_parameters():
    all_model_params += param.numel()
    if param.requires_grad:
        trainable_model_params += param.numel()
        
print("Total Number of Parameters: {}".format(all_model_params))
print("Trainable Parameters: {}".format(trainable_model_params))

Total Number of Parameters: 251116800
Trainable Parameters: 3538944


## 5.2 Train PEFT Adapter

In [26]:
output_dir = f'./peft-dialogue-summary-training-{str(int(time.time()))}'

peft_training_args = TrainingArguments(
    output_dir = output_dir,
    auto_find_batch_size = True,
    learning_rate = 1e-3,
    num_train_epochs = 1,
    logging_steps = 1,
    max_steps = 1
)

peft_trainer = Trainer(
    model = peft_model,
    args = peft_training_args,
    train_dataset = sample_tokenized_dataset["train"]
)

In [27]:
peft_trainer.train()

peft_model_path = "./peft-dialogue-summary-checkpoint-local"

peft_trainer.model.save_pretrained(peft_model_path)
tokenizer.save_pretrained(peft_model_path)



Step,Training Loss
1,51.0


('./peft-dialogue-summary-checkpoint-local/tokenizer_config.json',
 './peft-dialogue-summary-checkpoint-local/special_tokens_map.json',
 './peft-dialogue-summary-checkpoint-local/tokenizer.json')

Prepare an instance of this model by adding an adapter to the original LLM model. Setting `is_trainable = False`.

In [28]:
from peft import PeftModel, PeftConfig

# original LLM model
peft_model_base = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base", torch_dtype = torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")

# adding layer to original model
peft_model = PeftModel.from_pretrained(peft_model_base,
                                      './peft-dialogue-summary-checkpoint-local',
                                      torch_dtype = torch.bfloat16,
                                      is_trainable = False)

In [29]:
# see how many parameters there are in our LLM
all_model_params = 0
trainable_model_params = 0

for _, param in peft_model.named_parameters():
    all_model_params += param.numel()
    if param.requires_grad:
        trainable_model_params += param.numel()
        
print("Total Number of Parameters: {}".format(all_model_params))
print("Trainable Parameters: {}".format(trainable_model_params))

Total Number of Parameters: 251116800
Trainable Parameters: 0


# 6.0 Model Evaluation After PEFT-Tuning

## 6.1 Qualitative Evaluation

In [30]:
index = 200

dialogue = dataset["test"][index]["dialogue"]
summary = dataset["test"][index]["summary"]

# generate prompt

prompt = """
Summarize the following conversation.

{}

Summary:
""".format(dialogue)

inputs = tokenizer(prompt, return_tensors = "pt").input_ids

original_model_output = model.generate(input_ids =inputs, 
                                       generation_config = GenerationConfig(max_new_tokens = 200, num_beams = 1))
original_model_output_decode = tokenizer.decode(original_model_output[0], skip_special_tokens = True)

peft_model_output = peft_model.generate(input_ids = inputs,
                                               generation_config = GenerationConfig(max_new_tokens = 200, num_beams = 1))
peft_model_output_decode = tokenizer.decode(peft_model_output[0], skip_special_tokens = True)

In [31]:
# Print the results
print("BASELINE SUMMARY: {}".format(summary))
print()
print("GENERATED SUMMARY ZERO SHOT: {}".format(original_model_output_decode))
print()
print("GENERATED SUMMARY FINE-TUNED: {}".format(instruct_model_output_decode))
print()
print("GENERATED SUMMARY PEFT-TUNED: {}".format(peft_model_output_decode))

BASELINE SUMMARY: #Person1# teaches #Person2# how to upgrade software and hardware in #Person2#'s system.

GENERATED SUMMARY ZERO SHOT: #Pork1: Have you considered upgrading your system? #Person1: Yes, but I'm not sure what exactly I would need. #Person1: I'd like to make up my own flyers and banners for advertising. #Person1: I'd like to make up my own flyers and banners for advertising. #Person2: That would be a definite bonus. #Person1: You can do that. #Person1: I'd like to do that.

GENERATED SUMMARY FINE-TUNED: #Person1#: Happy birthday, Brian. #Person2#: I'm so happy you're having a good time. #Person1#: Thank you, I'm sure you look great today. #Person2#: Thank you, I'm sure you look great. #Person1#: Thank you, I'm sure you look great today. #Person2#: Thank you, I'm sure you look great today. #Person1#: Thank you, I'm sure you look great today.

GENERATED SUMMARY PEFT-TUNED: #Person1#: I'm thinking of upgrading my computer.


## 6.2 Quantitative Evaluation (ROUGE)

`ROUGE` is a metric used to quantify the validity of summarization produced. It compares the baseline summary to the generated summary.

In [32]:
# generate the outputs for the sample of the test dataset 
dialogues = dataset["test"][0:10]["dialogue"]
baseline_summaries = dataset["test"][0:10]["summary"]

original_model_summaries = []
instruct_model_summaries = []
peft_model_summaries = []

for _, dialogue in enumerate(dialogues):
    prompt = """
    Summarize the following conversation.
    
    {}
    
    Summary
    """.format(dialogue)
    
    inputs = tokenizer(prompt, return_tensors = "pt").input_ids

    original_model_output = model.generate(input_ids =inputs, 
                                       generation_config = GenerationConfig(max_new_tokens = 200, num_beams = 1))
    original_model_output_decode = tokenizer.decode(original_model_output[0], skip_special_tokens = True)
    original_model_summaries.append(original_model_output_decode)

    instruct_model_output = instruct_model.generate(input_ids = inputs,
                                               generation_config = GenerationConfig(max_new_tokens = 200, num_beams = 1))
    instruct_model_output_decode = tokenizer.decode(instruct_model_output[0], skip_special_tokens = True)
    instruct_model_summaries.append(instruct_model_output_decode)
    
    peft_model_output = peft_model.generate(input_ids = inputs,
                                               generation_config = GenerationConfig(max_new_tokens = 200, num_beams = 1))
    peft_model_output_decode = tokenizer.decode(peft_model_output[0], skip_special_tokens = True)
    peft_model_summaries.append(peft_model_output_decode)
    
    
zipped_summaries = list(zip(baseline_summaries, original_model_summaries, instruct_model_summaries, peft_model_summaries))

df = pd.DataFrame(zipped_summaries, columns = ["Baseline", "Original-Model", "Fine-Tuned", "PEFT-Tuned"])

In [33]:
df

Unnamed: 0,Baseline,Original-Model,Fine-Tuned,PEFT-Tuned
0,Ms. Dawson helps #Person1# to write a memo to ...,Employees are required to use instant messaging.,The memo will go out to all employees by this ...,The memo is to be distributed to all employees...
1,In order to prevent employees from wasting tim...,This memo is for all employees.,The memo will go out to all employees by this ...,The memo is to be distributed to all employees...
2,Ms. Dawson takes a dictation for #Person1# abo...,Employees who use Instant Message will receive...,The memo will go out to all employees by this ...,The memo is to be distributed to all employees...
3,#Person2# arrives late because of traffic jam....,The person in the first sentence is talking ab...,The traffic jam at the Carrefour intersection ...,The traffic jam at the Carrefour intersection ...
4,#Person2# decides to follow #Person1#'s sugges...,Person1: I'm so glad you got stuck in traffic ...,The traffic jam at the Carrefour intersection ...,The traffic jam at the Carrefour intersection ...
5,#Person2# complains to #Person1# about the tra...,The car is causing a lot of traffic in the city.,The traffic jam at the Carrefour intersection ...,The traffic jam at the Carrefour intersection ...
6,#Person1# tells Kate that Masha and Hero get d...,Masha and Hero are getting divorced.,Masha and Hero are getting divorced.,Masha and Hero are getting divorced.
7,#Person1# tells Kate that Masha and Hero are g...,#Person1: Masha and Hero are getting divorced....,Masha and Hero are getting divorced.,Masha and Hero are getting divorced.
8,#Person1# and Kate talk about the divorce betw...,Masha and Hero are getting married.,Masha and Hero are getting divorced.,Masha and Hero are getting divorced.
9,#Person1# and Brian are at the birthday party ...,#Person1: Thank you for coming to the party. #...,"#Person1#: Happy birthday, Brian. #Person2#: I...","#Person1#: Happy birthday, Brian. #Person2#: I..."


In [34]:
# Evaluate the models using ROUGE metric

original_model_results = rouge.compute(
    predictions = original_model_summaries,
    references = baseline_summaries[0:len(original_model_summaries)],
    use_aggregator = True,
    use_stemmer = True
)

instruct_model_results = rouge.compute(
    predictions = instruct_model_summaries,
    references = baseline_summaries[0:len(instruct_model_summaries)],
    use_aggregator = True,
    use_stemmer = True
)

peft_model_results = rouge.compute(
    predictions = peft_model_summaries,
    references = baseline_summaries[0:len(peft_model_summaries)],
    use_aggregator = True,
    use_stemmer = True
)

In [35]:
print('ORIGINAL MODEL:')
print(original_model_results)
print('INSTRUCT MODEL:')
print(instruct_model_results)
print("PEFT MODEL: ")
print(peft_model_results)

ORIGINAL MODEL:
{'rouge1': 0.2032152477543929, 'rouge2': 0.07742309097552065, 'rougeL': 0.18894173166390682, 'rougeLsum': 0.1894183320685648}
INSTRUCT MODEL:
{'rouge1': 0.2488908619621208, 'rouge2': 0.1104891774891775, 'rougeL': 0.2158497376018475, 'rougeLsum': 0.2190598097174996}
PEFT MODEL: 
{'rouge1': 0.2517460317460318, 'rouge2': 0.10979007465963989, 'rougeL': 0.2128638229725186, 'rougeLsum': 0.21601610748349878}
