## Guide on fine-tuning a Phi-3-mini model for Python code generation utilizing QLoRA via Hugging Face Hub

## Loading the libraries

In [1]:
# 'pip' is a package installer for Python. It is used to install and manage software packages/libraries written in Python.

# '!pip list' is a command that lists all installed Python packages.

# '|' is a pipe. It takes the output of the command on its left and uses it as the input to the command on its right.

# 'grep' is a command-line utility for searching plain-text data sets for lines that match a regular expression. In this case, it's used to filter the output of 'pip list'.

# 'transformers.' is the regular expression that 'grep' is searching for. It will match any line that contains 'transformers.'.

# So, '!pip list | grep transformers.' is a command that lists all installed Python packages and filters the list for packages that contain 'transformers.' in their name.
!pip list | grep transformers.

transformers              4.42.3


## Importing the libraries

In [2]:
# This code block imports several modules and functions that are needed for the script.

# 'randrange' is a function from the 'random' module that generates a random number within the specified range.

# 'torch' is a library for scientific computing that provides a wide range of functionalities for dealing with tensors.

# 'load_dataset' is a function from the 'datasets' library that loads a dataset from the Hugging Face datasets hub.

# 'LoraConfig', 'prepare_model_for_kbit_training', and 'PeftModel' are classes and functions from the 'peft' library that are used for configuring and preparing the model for training.

# 'AutoModelForCausalLM', 'AutoTokenizer', 'BitsAndBytesConfig', 'TrainingArguments', 'set_seed', and 'pipeline' are classes and functions from the 'transformers' library that are used for creating and training the model.

# 'SFTTrainer' is a class from the 'trl' library that is used for training the model.
from random import randrange

import torch
from datasets import load_dataset

from peft import LoraConfig, prepare_model_for_kbit_training, PeftModel
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    set_seed,
    pipeline
)
from trl import SFTTrainer

## Setting Global Parameters

In [3]:
# 'model_id' and 'model_name' are the identifiers for the pre-trained model from Hugging Face hub that you want to fine-tune.
model_id = "microsoft/Phi-3-mini-4k-instruct"
# model_name = "microsoft/Phi-3-mini-4k-instruct"
model_name = "./Phi-3-mini-4k-instruct"

# 'dataset_name' is the identifier for the dataset that you want to use for fine-tuning.
# dataset_name = "Intel/orca_dpo_pairs"
dataset_name = "iamtarun/python_code_instructions_18k_alpaca"

# 'dataset_split' is the split of the dataset that you want to use for fine-tuning. In this case, it is set to 'train', which means that the training split of the dataset will be used.
dataset_split= "train"

# 'new_model' is the name that you want to give to the fine-tuned model.
new_model = "phi-3-mini-QLoRA-alpaca"

# 'hf_model_repo' is the identifier for the Hugging Face repository where you want to save the fine-tuned model.
hf_model_repo="wolfeidau/"+new_model

# Load Model on GPU 

# 'device_map' is a dictionary that maps devices to model parts. In this case, it is set to {"": 0}, which means that the entire model will be loaded on GPU 0.
device_map = {"": 0}

# Bits and Bytes configuration for the model

# 'use_4bit' is a boolean that controls whether 4-bit precision should be used for loading the base model.
use_4bit = True

# 'bnb_4bit_compute_dtype' is the data type that should be used for computations with the 4-bit base model. In this case, it is set to 'bfloat16'.
bnb_4bit_compute_dtype = "bfloat16"

# 'bnb_4bit_quant_type' is the type of quantization that should be used for the 4-bit base model. In this case, it is set to 'nf4'.
bnb_4bit_quant_type = "nf4"

# 'use_double_quant' is a boolean that controls whether nested quantization should be used for the 4-bit base model.
use_double_quant = True

# LoRA configuration for the model

# 'lora_r' is the dimension of the LoRA attention.
lora_r = 16

# 'lora_alpha' is the alpha parameter for LoRA scaling.
lora_alpha = 16

# 'lora_dropout' is the dropout probability for LoRA layers.
lora_dropout = 0.05

# 'target_modules' is a list of the modules that should be targeted by LoRA.
target_modules= ['k_proj', 'q_proj', 'v_proj', 'o_proj', "gate_proj", "down_proj", "up_proj"]

# 'set_seed(1234)' sets the random seed for reproducibility.
set_seed(1234)



## Connect to Huggingface Hub

IMPORTANT: The upcoming section's execution will vary based on your code execution environment and the configuration of your API Keys.

Interactive login to Hugging Face Hub is possible.

Alternatively, you can supply a .env file that contains the Hugging Face token.

In [4]:
# 'huggingface_hub' is a library that provides tools for working with the Hugging Face Model Hub.

# 'login' is a function from the 'huggingface_hub' library that logs you into the Hugging Face Model Hub.

# 'dotenv' is a library that allows you to load environment variables from a .env file.

# 'load_dotenv' is a function from the 'dotenv' library that loads environment variables from a .env file.

# 'os' is a standard Python library that provides functions for interacting with the operating system.

# 'os.getenv' is a function from the 'os' library that gets the value of an environment variable.

# 'from huggingface_hub import login' imports the 'login' function from the 'huggingface_hub' library.

# 'from dotenv import load_dotenv' imports the 'load_dotenv' function from the 'dotenv' library.

# 'import os' imports the 'os' library.

# 'load_dotenv()' calls the 'load_dotenv' function, which loads environment variables from a .env file.

# 'login(token=os.getenv("HF_HUB_TOKEN"))' calls the 'login' function with the value of the 'HF_HUB_TOKEN' environment variable as the 'token' parameter. This logs you into the Hugging Face Model Hub using the token stored in the 'HF_HUB_TOKEN' environment variable.
from huggingface_hub import login
from dotenv import load_dotenv
import os

load_dotenv()
login(token=os.getenv("HF_HUB_TOKEN"))

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


## Load the dataset with the instruction set

In [5]:
# 'load_dataset' is a function from the 'datasets' library that loads a dataset from the Hugging Face Dataset Hub.

# 'dataset_name' is the identifier for the dataset that you want to load.

# 'split' is the split of the dataset that you want to load. In this case, it is set to the value of 'dataset_split'.

# 'dataset = load_dataset(dataset_name, split=dataset_split)' loads the specified split of the specified dataset from the Hugging Face Dataset Hub and assigns it to the variable 'dataset'.

# 'len(dataset)' returns the size of the dataset.

# 'print(f"dataset size: {len(dataset)}")' prints the size of the dataset.

# 'randrange' is a function from the 'random' library that returns a random integer from the specified range.

# 'len(dataset)' is the range from which 'randrange' should choose a random integer. In this case, it is set to the size of the dataset, which means that 'randrange' will choose a random index from the dataset.

# 'dataset[randrange(len(dataset))]' returns a random example from the dataset.

# 'print(dataset[randrange(len(dataset))])' prints a random example from the dataset.
dataset = load_dataset(dataset_name, split=dataset_split)
print(f"dataset size: {len(dataset)}")
print(dataset[randrange(len(dataset))])

dataset size: 18612
{'instruction': 'Compare two strings to check if they are anagrams or not in Python.', 'input': '“silent”, “listen”', 'output': 'def is_anagram(w1, w2):\n    # Check if lengths are equal\n    if len(w1) == len(w2):\n        # Sort the strings\n        s1 = sorted(w1)\n        s2 = sorted(w2)\n        # Check if sorted strings are equal\n        if s1 == s2:\n            return True\n    return False\n\n# Example\nw1 = "silent"\nw2 = "listen"\n\nprint(is_anagram(w1, w2)) #Output: True', 'prompt': 'Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\nCompare two strings to check if they are anagrams or not in Python.\n\n### Input:\n“silent”, “listen”\n\n### Output:\ndef is_anagram(w1, w2):\n    # Check if lengths are equal\n    if len(w1) == len(w2):\n        # Sort the strings\n        s1 = sorted(w1)\n        s2 = sorted(w2)\n        # Check if sorted strings are equal\n        if s1 == s2:\n 

In [6]:
# 'dataset' is a variable that contains the dataset loaded from the Hugging Face Dataset Hub.

# 'dataset' when used alone like this in a Jupyter notebook cell, it will display the structure of the dataset. This includes information such as the number of examples in the dataset, the names and types of the fields in the dataset, and the shapes of the fields.

# This line of code is used to check the structure of the dataset to ensure that it is in the expected format before proceeding with further data processing or model training.
dataset

Dataset({
    features: ['instruction', 'input', 'output', 'prompt'],
    num_rows: 18612
})

In [7]:
# 'randrange' is a function from the 'random' library that returns a random integer from the specified range.

# 'len(dataset)' is the range from which 'randrange' should choose a random integer. In this case, it is set to the size of the dataset, which means that 'randrange' will choose a random index from the dataset.

# 'dataset[randrange(len(dataset))]' returns a random example from the dataset.

# 'print(dataset[randrange(len(dataset))])' prints a random example from the dataset. This is useful for getting a sense of what the data in the dataset looks like.
print(dataset[randrange(len(dataset))])

{'instruction': 'Create a simple Python program to sum the values of two variables.', 'input': '<input>', 'output': 'x = 5\ny = 4\n\ntotal = x + y\n\nprint(total) # Output: 9', 'prompt': 'Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\nCreate a simple Python program to sum the values of two variables.\n\n### Input:\n<input>\n\n### Output:\nx = 5\ny = 4\n\ntotal = x + y\n\nprint(total) # Output: 9'}


## Load the tokenizer to prepare the dataset

In [8]:
# 'tokenizer_id' is the identifier for the tokenizer that you want to load. In this case, it is set to the value of 'model_id', which means that the tokenizer associated with the pre-trained model will be loaded.

# 'AutoTokenizer' is a class from the 'transformers' library that provides a generic tokenizer class from which all other tokenizer classes inherit.

# 'from_pretrained' is a method of the 'AutoTokenizer' class that loads a tokenizer from the Hugging Face Model Hub.

# 'tokenizer = AutoTokenizer.from_pretrained(tokenizer_id)' loads the tokenizer associated with 'tokenizer_id' from the Hugging Face Model Hub and assigns it to the variable 'tokenizer'.

# 'tokenizer.padding_side' is a property of the 'tokenizer' object that determines on which side of the input sequences padding should be added. It can be set to either 'left' or 'right'.

# 'tokenizer.padding_side = 'right'' sets 'tokenizer.padding_side' to 'right', which means that padding will be added to the right side of the input sequences. This is done to prevent warnings that can occur when 'tokenizer.padding_side' is set to 'left'.
tokenizer_id = model_id
tokenizer = AutoTokenizer.from_pretrained(tokenizer_id)
tokenizer.padding_side = 'right' # to prevent warnings

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Function to generate the suitable format for our model.

In [9]:
# 'create_message_column' is a function that takes a row from a dataset and returns a dictionary with a single key-value pair. The key is 'messages' and the value is a list of dictionaries, each representing a message.

# 'row' is the input to the 'create_message_column' function. It is expected to be a dictionary with keys 'instruction', 'input', and 'output'.

# 'messages' is a list that will contain the messages.

# 'user' is a dictionary that represents a user message. The 'content' key contains the instruction and input from the row, and the 'role' key is set to 'user'.

# 'messages.append(user)' adds the user message to the 'messages' list.

# 'assistant' is a dictionary that represents an assistant message. The 'content' key contains the output from the row, and the 'role' key is set to 'assistant'.

# 'messages.append(assistant)' adds the assistant message to the 'messages' list.

# 'return {"messages": messages}' returns a dictionary with a single key-value pair. The key is 'messages' and the value is the 'messages' list.

# 'format_dataset_chatml' is a function that takes a row from a dataset and returns a dictionary with a single key-value pair. The key is 'text' and the value is the result of applying the chat template to the messages in the row.

# 'row' is the input to the 'format_dataset_chatml' function. It is expected to be a dictionary with a key 'messages'.

# 'return {"text": tokenizer.apply_chat_template(row["messages"], add_generation_prompt=False, tokenize=False)}' returns a dictionary with a single key-value pair. The key is 'text' and the value is the result of applying the chat template to the messages in the row. The 'add_generation_prompt' parameter is set to False, which means that no generation prompt will be added to the end of the text. The 'tokenize' parameter is set to False, which means that the text will not be tokenized.
def create_message_column(row):
    messages = []
    user = {
        "content": f"{row['instruction']}\n Input: {row['input']}",
        "role": "user"
    }
    messages.append(user)
    assistant = {
        "content": f"{row['output']}",
        "role": "assistant"
    }
    messages.append(assistant)
    return {"messages": messages}

def format_dataset_chatml(row):
    return {"text": tokenizer.apply_chat_template(row["messages"], add_generation_prompt=False, tokenize=False)}

Implement the ChatML format on our dataset.

In [10]:
# 'dataset' is a variable that contains the dataset loaded from the Hugging Face Dataset Hub.

# 'map' is a method of the 'Dataset' class that applies a function to each example in the dataset.

# 'create_message_column' is a function that takes a row from a dataset and returns a dictionary with a single key-value pair. The key is 'messages' and the value is a list of dictionaries, each representing a message.

# 'dataset_chatml = dataset.map(create_message_column)' applies the 'create_message_column' function to each example in 'dataset' and assigns the result to 'dataset_chatml'. This transforms the dataset into a format where each example is a list of messages.

# 'format_dataset_chatml' is a function that takes a row from a dataset and returns a dictionary with a single key-value pair. The key is 'text' and the value is the result of applying the chat template to the messages in the row.

# 'dataset_chatml = dataset_chatml.map(format_dataset_chatml)' applies the 'format_dataset_chatml' function to each example in 'dataset_chatml'. This transforms the dataset into a format where each example is a single string of text that represents a conversation.
dataset_chatml = dataset.map(create_message_column)
dataset_chatml = dataset_chatml.map(format_dataset_chatml)

In [11]:
# 'dataset_chatml' is a variable that contains the dataset that has been transformed into a format where each example is a single string of text that represents a conversation.

# 'dataset_chatml[0]' returns the first example in 'dataset_chatml'.

# This line of code is used to check the first example in the transformed dataset to ensure that the transformation was performed correctly.
dataset_chatml[0]

{'instruction': 'Create a function to calculate the sum of a sequence of integers.',
 'input': '[1, 2, 3, 4, 5]',
 'output': '# Python code\ndef sum_sequence(sequence):\n  sum = 0\n  for num in sequence:\n    sum += num\n  return sum',
 'prompt': 'Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\nCreate a function to calculate the sum of a sequence of integers.\n\n### Input:\n[1, 2, 3, 4, 5]\n\n### Output:\n# Python code\ndef sum_sequence(sequence):\n  sum = 0\n  for num in sequence:\n    sum += num\n  return sum',
 'messages': [{'content': 'Create a function to calculate the sum of a sequence of integers.\n Input: [1, 2, 3, 4, 5]',
   'role': 'user'},
  {'content': '# Python code\ndef sum_sequence(sequence):\n  sum = 0\n  for num in sequence:\n    sum += num\n  return sum',
   'role': 'assistant'}],
 'text': '<|user|>\nCreate a function to calculate the sum of a sequence of integers.\n Input: [1, 2, 3, 4, 5]<

In [12]:
# 'dataset_chatml' is a variable that contains the dataset that has been transformed into a format where each example is a single string of text that represents a conversation.

# 'train_test_split' is a method of the 'Dataset' class that splits the dataset into a training set and a test set.

# 'test_size=0.05' is a parameter of the 'train_test_split' method that specifies the proportion of the dataset to include in the test set. In this case, it is set to 0.05, which means that 5% of the dataset will be included in the test set.

# 'seed=1234' is a parameter of the 'train_test_split' method that specifies the seed for the random number generator. This is used to ensure that the split is reproducible.

# 'dataset_chatml = dataset_chatml.train_test_split(test_size=0.05, seed=1234)' splits 'dataset_chatml' into a training set and a test set and assigns the result to 'dataset_chatml'. The result is a dictionary with two key-value pairs. The keys are 'train' and 'test', and the values are the training set and the test set, respectively.

# 'dataset_chatml' when used alone like this in a Jupyter notebook cell, it will display the structure of the training set and the test set. This includes information such as the number of examples in each set, the names and types of the fields in the sets, and the shapes of the fields.

# This line of code is used to check the structure of the training set and the test set to ensure that the split was performed correctly.
dataset_chatml = dataset_chatml.train_test_split(test_size=0.05, seed=1234)
dataset_chatml

DatasetDict({
    train: Dataset({
        features: ['instruction', 'input', 'output', 'prompt', 'messages', 'text'],
        num_rows: 17681
    })
    test: Dataset({
        features: ['instruction', 'input', 'output', 'prompt', 'messages', 'text'],
        num_rows: 931
    })
})

## Instruction fine-tune a Phi-3-mini model using QLORA and trl

Initially, we attempt to recognize our GPU.

In [13]:
# 'torch' is a library for scientific computing that provides a wide range of functionalities for dealing with tensors, which are multi-dimensional arrays.

# 'torch.cuda.is_bf16_supported()' is a function that checks if BF16 is supported on the current GPU. BF16 is a data type that uses 16 bits, like float16, but allocates more bits to the exponent, which can result in higher precision.

# 'compute_dtype' is a variable that will hold the data type to be used for computations.

# 'attn_implementation' is a variable that will hold the type of attention implementation to be used.

# 'if torch.cuda.is_bf16_supported():' checks if BF16 is supported on the current GPU. If it is, the following block of code is executed.

# 'compute_dtype = torch.bfloat16' sets 'compute_dtype' to 'torch.bfloat16', which is the BF16 data type in PyTorch.

# 'attn_implementation = 'flash_attention_2'' sets 'attn_implementation' to 'flash_attention_2', which is a type of attention implementation.

# 'else:' specifies that the following block of code should be executed if BF16 is not supported on the current GPU.

# 'compute_dtype = torch.float16' sets 'compute_dtype' to 'torch.float16', which is the float16 data type in PyTorch.

# 'attn_implementation = 'sdpa'' sets 'attn_implementation' to 'sdpa', which is a type of attention implementation.

# 'print(attn_implementation)' prints the value of 'attn_implementation', which is the type of attention implementation to be used.

# 'print(compute_dtype)' prints the value of 'compute_dtype', which is the data type to be used for computations.
if torch.cuda.is_bf16_supported():
  compute_dtype = torch.bfloat16
  attn_implementation = 'flash_attention_2'
else:
  compute_dtype = torch.float16
  attn_implementation = 'sdpa'

print(attn_implementation)
print(compute_dtype)

flash_attention_2
torch.bfloat16


## Load the tokenizer and model to finetune

In [14]:
# 'AutoTokenizer' is a class from the Hugging Face Transformers library that provides a tokenizer for a given pre-trained model.

# 'from_pretrained' is a method of the 'AutoTokenizer' class that loads a tokenizer from a pre-trained model.

# 'model_name' is a variable that contains the name of the pre-trained model.

# 'trust_remote_code=True' is a parameter that allows the execution of remote code when loading the tokenizer.

# 'add_eos_token=True' is a parameter that adds an end-of-sentence token to the tokenizer.

# 'use_fast=True' is a parameter that uses the fast version of the tokenizer, if available.

# 'tokenizer.pad_token = tokenizer.unk_token' sets the padding token of the tokenizer to be the same as the unknown token.

# 'tokenizer.pad_token_id = tokenizer.convert_tokens_to_ids(tokenizer.pad_token)' sets the ID of the padding token to be the same as the ID of the padding token.

# 'tokenizer.padding_side = 'left'' sets the side where padding will be added to be the left side.

# 'BitsAndBytesConfig' is a class that provides a configuration for quantization.

# 'bnb_config' is a variable that holds the configuration for quantization.

# 'AutoModelForCausalLM' is a class from the Hugging Face Transformers library that provides a model for causal language modeling.

# 'from_pretrained' is a method of the 'AutoModelForCausalLM' class that loads a model from a pre-trained model.

# 'torch_dtype=compute_dtype' is a parameter that sets the data type of the model to be the same as 'compute_dtype'.

# 'quantization_config=bnb_config' is a parameter that sets the configuration for quantization to be 'bnb_config'.

# 'device_map=device_map' is a parameter that sets the device map of the model to be 'device_map'.

# 'attn_implementation=attn_implementation' is a parameter that sets the type of attention implementation to be 'attn_implementation'.

# 'prepare_model_for_kbit_training' is a function that prepares a model for k-bit training.

# 'model = prepare_model_for_kbit_training(model)' prepares 'model' for k-bit training and assigns the result back to 'model'.
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, add_eos_token=True, use_fast=True)
tokenizer.pad_token = tokenizer.unk_token
tokenizer.pad_token_id = tokenizer.convert_tokens_to_ids(tokenizer.pad_token)
tokenizer.padding_side = 'left'

bnb_config = BitsAndBytesConfig(
        load_in_4bit=use_4bit,
        bnb_4bit_quant_type=bnb_4bit_quant_type,
        bnb_4bit_compute_dtype=compute_dtype,
        bnb_4bit_use_double_quant=use_double_quant,
)

model = AutoModelForCausalLM.from_pretrained(
          model_name, torch_dtype=compute_dtype, trust_remote_code=True, quantization_config=bnb_config, device_map=device_map,
          attn_implementation=attn_implementation
)

model = prepare_model_for_kbit_training(model)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Set up the QLoRA parameters.

In [15]:
# 'AutoTokenizer' is a class from the Hugging Face Transformers library that provides a tokenizer for a given pre-trained model.

# 'from_pretrained' is a method of the 'AutoTokenizer' class that loads a tokenizer from a pre-trained model.

# 'model_name' is a variable that contains the name of the pre-trained model.

# 'trust_remote_code=True' is a parameter that allows the execution of remote code when loading the tokenizer.

# 'add_eos_token=True' is a parameter that adds an end-of-sentence token to the tokenizer.

# 'use_fast=True' is a parameter that uses the fast version of the tokenizer, if available.

# 'tokenizer.pad_token = tokenizer.unk_token' sets the padding token of the tokenizer to be the same as the unknown token.

# 'tokenizer.pad_token_id = tokenizer.convert_tokens_to_ids(tokenizer.pad_token)' sets the ID of the padding token to be the same as the ID of the padding token.

# 'tokenizer.padding_side = 'left'' sets the side where padding will be added to be the left side.

# 'BitsAndBytesConfig' is a class that provides a configuration for quantization.

# 'bnb_config' is a variable that holds the configuration for quantization.

# 'AutoModelForCausalLM' is a class from the Hugging Face Transformers library that provides a model for causal language modeling.

# 'from_pretrained' is a method of the 'AutoModelForCausalLM' class that loads a model from a pre-trained model.

# 'torch_dtype=compute_dtype' is a parameter that sets the data type of the model to be the same as 'compute_dtype'.

# 'quantization_config=bnb_config' is a parameter that sets the configuration for quantization to be 'bnb_config'.

# 'device_map=device_map' is a parameter that sets the device map of the model to be 'device_map'.

# 'attn_implementation=attn_implementation' is a parameter that sets the type of attention implementation to be 'attn_implementation'.

# 'prepare_model_for_kbit_training' is a function that prepares a model for k-bit training.

# 'model = prepare_model_for_kbit_training(model)' prepares 'model' for k-bit training and assigns the result back to 'model'.
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, add_eos_token=True, use_fast=True)
tokenizer.pad_token = tokenizer.unk_token
tokenizer.pad_token_id = tokenizer.convert_tokens_to_ids(tokenizer.pad_token)
tokenizer.padding_side = 'left'

bnb_config = BitsAndBytesConfig(
        load_in_4bit=use_4bit,
        bnb_4bit_quant_type=bnb_4bit_quant_type,
        bnb_4bit_compute_dtype=compute_dtype,
        bnb_4bit_use_double_quant=use_double_quant,
)

model = AutoModelForCausalLM.from_pretrained(
          model_name, torch_dtype=compute_dtype, trust_remote_code=True, quantization_config=bnb_config, device_map=device_map,
          attn_implementation=attn_implementation
)

model = prepare_model_for_kbit_training(model)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

The SFTTrainer offers a built-in integration with peft, simplifying the process of effectively fine-tuning LLMs. All we need to do is establish our LoRAConfig and supply it to the trainer. However, before initiating our training, it's necessary to specify the hyperparameters (TrainingArguments) we plan to utilize.

We now possess all the necessary components to construct our SFTTrainer and commence the model training.

In [18]:
# 'SFTTrainer' is a class that provides a trainer for fine-tuning a model.

# 'trainer' is a variable that holds the trainer.

# 'model=model' is a parameter that sets the model to be trained to be 'model'.

# 'train_dataset=dataset_chatml['train']' is a parameter that sets the training dataset to be 'dataset_chatml['train']'.

# 'eval_dataset=dataset_chatml['test']' is a parameter that sets the evaluation dataset to be 'dataset_chatml['test']'.

# 'peft_config=peft_config' is a parameter that sets the configuration for the Lora layer to be 'peft_config'.

# 'dataset_text_field="text"' is a parameter that sets the field in the dataset that contains the text to be 'text'.

# 'max_seq_length=512' is a parameter that sets the maximum sequence length for the model to be 512.

# 'tokenizer=tokenizer' is a parameter that sets the tokenizer to be 'tokenizer'.

# 'args=args' is a parameter that sets the training arguments to be 'args'.

peft_config = LoraConfig(
    r=16,
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=['k_proj', 'gate_proj', 'v_proj', 'up_proj', 'q_proj', 'o_proj', 'down_proj']
)

args = TrainingArguments(
        output_dir="./phi-3-mini-QLoRA",
        evaluation_strategy="steps",
        do_eval=True,
        optim="adamw_torch",
        per_device_train_batch_size=8,
        gradient_accumulation_steps=4,
        per_device_eval_batch_size=8,
        log_level="debug",
        save_strategy="epoch",
        logging_steps=100,
        learning_rate=1e-4,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        eval_steps=100,
        num_train_epochs=3,
        warmup_ratio=0.1,
        lr_scheduler_type="linear",
        seed=42,
)

# This line of code is used to create a trainer for fine-tuning the model with the specified parameters.
trainer = SFTTrainer(
        model=model,
        train_dataset=dataset_chatml['train'],
        eval_dataset=dataset_chatml['test'],
        peft_config=peft_config,
        dataset_text_field="text",
        max_seq_length=512,
        tokenizer=tokenizer,
        args=args,
)


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/931 [00:00<?, ? examples/s]

Using auto half precision backend


Initiate the model training process by invoking the train() method on our Trainer instance.

In [19]:
# 'trainer.train()' is a method that starts the training of the model. It uses the training dataset, model, and training arguments that were specified when the trainer was created.

# 'trainer.save_model()' is a method that saves the trained model to the local file system. The model will be saved in the output directory that was specified in the training arguments.

# This block of code is used to train the model and then save the trained model to the local file system.
# train
trainer.train()

# save model in local
trainer.save_model()

Currently training with a batch size of: 8
***** Running training *****
  Num examples = 17,681
  Num Epochs = 5
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 4
  Total optimization steps = 2,760
  Number of trainable parameters = 8,912,896
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
The input hidden states seems to be silently casted in float32, this might be related to the fact you have upcasted embedding or layer norm layers in float32. We will cast back the input in torch.bfloat16.


Step,Training Loss,Validation Loss


KeyboardInterrupt: 

In [None]:
# 'hf_adapter_repo' is a variable that holds the repository name for the Hugging Face model adapter.

# 'username/model-adapter-name' is the repository name, where 'username' is the username of the repository owner and 'model-adapter-name' is the name of the model adapter.

# This line of code is used to set the repository name for the Hugging Face model adapter.
hf_adapter_repo=hf_model_repo

In [None]:
# 'trainer.push_to_hub(hf_adapter_repo)' is a method that pushes the trained model adapter to the Hugging Face Model Hub. 

# 'hf_adapter_repo' is the repository name for the model adapter on the Hugging Face Model Hub.

# This line of code is used to save the trained model adapter to the Hugging Face Model Hub.
# Save the adapter
trainer.push_to_hub(hf_adapter_repo)

## Combine the model and the adapters, then save it.

Note: When operating on a T4 instance, memory cleanup is necessary.

In [40]:
# 'del model' and 'del trainer' are lines of code that delete the 'model' and 'trainer' objects. This frees up the memory that was used by these objects.

# 'import gc' is a line of code that imports the 'gc' module, which provides an interface to the garbage collector.

# 'gc.collect()' is a function that triggers a full garbage collection. It frees up memory by collecting all the objects that are no longer in use.

# This block of code is used to empty the VRAM (Video Random Access Memory) by deleting the 'model' and 'trainer' objects and then triggering a full garbage collection.
# Empty VRAM
del model
del trainer
import gc
gc.collect()
gc.collect()

0

In [41]:
# 'torch.cuda.empty_cache()' is a function from the PyTorch library that releases all unoccupied cached memory currently held by the caching allocator so that those can be used in other GPU application and visible in nvidia-smi.

# It's a PyTorch specific function to manage GPU memory and it doesn't affect the GPU memory usage by PyTorch tensors.

# This line of code is used to empty the cache memory that's used by PyTorch on the GPU.
torch.cuda.empty_cache() # PyTorch thing

In [42]:
# 'torch.cuda.empty_cache()' is a function from the PyTorch library that releases all unoccupied cached memory currently held by the caching allocator so that those can be used in other GPU application and visible in nvidia-smi.

# It's a PyTorch specific function to manage GPU memory and it doesn't affect the GPU memory usage by PyTorch tensors.

# This line of code is used to empty the cache memory that's used by PyTorch on the GPU.
torch.cuda.empty_cache() # PyTorch thing

Reload the previously trained and saved model, merge it, and then proceed to save the entire model.

In [None]:
# 'hf_adapter_repo' is a variable that holds the repository name for the Hugging Face model adapter.

# 'edumunozsala/phi-3-mini-QLoRA' is the repository name, where 'edumunozsala' is the username of the repository owner and 'phi-3-mini-QLoRA' is the name of the model adapter.

# 'model_name, hf_adapter_repo, compute_dtype' is a line of code that returns the values of the 'model_name', 'hf_adapter_repo', and 'compute_dtype' variables.

# This block of code is used to set the repository name for the Hugging Face model adapter and then return the values of the 'model_name', 'hf_adapter_repo', and 'compute_dtype' variables.
hf_adapter_repo = "edumunozsala/phi-3-mini-QLoRA"

model_name, hf_adapter_repo, compute_dtype

In [None]:
# 'peft_model_id' and 'tr_model_id' are variables that hold the identifiers for the PEFT model and the transformer model, respectively.

# 'AutoModelForCausalLM.from_pretrained(tr_model_id, trust_remote_code=True, torch_dtype=compute_dtype)' is a function that loads a pre-trained transformer model for causal language modeling. 'tr_model_id' is the identifier for the pre-trained model, 'trust_remote_code=True' allows the execution of code from the model file, and 'torch_dtype=compute_dtype' sets the data type for the PyTorch tensors.

# 'PeftModel.from_pretrained(model, peft_model_id)' is a function that loads a pre-trained PEFT model. 'model' is the transformer model and 'peft_model_id' is the identifier for the pre-trained PEFT model.

# 'model.merge_and_unload()' is a method that merges the PEFT model with the transformer model and then unloads the PEFT model.

# This block of code is used to load a pre-trained transformer model and a pre-trained PEFT model, merge the two models, and then unload the PEFT model.
peft_model_id = hf_adapter_repo
tr_model_id = model_name

model = AutoModelForCausalLM.from_pretrained(tr_model_id, trust_remote_code=True, torch_dtype=compute_dtype)
model = PeftModel.from_pretrained(model, peft_model_id)
model = model.merge_and_unload()

In [None]:
# 'tokenizer' is a variable that holds the tokenizer.

# 'AutoTokenizer.from_pretrained(peft_model_id)' is a function from the Hugging Face Transformers library that loads a pre-trained tokenizer. 'peft_model_id' is the identifier for the pre-trained tokenizer.

# This line of code is used to load a pre-trained tokenizer.
tokenizer = AutoTokenizer.from_pretrained(peft_model_id)

In [None]:
# 'hf_model_repo' is a variable that holds the repository name for the Hugging Face model.

# This line of code is used to reference the repository name for the Hugging Face model.
hf_model_repo

In [None]:
# 'merged_model_id' is a variable that holds the identifier for the merged model.

# 'hf_model_repo' is the repository name for the Hugging Face model.

# 'model.push_to_hub(merged_model_id)' is a method that pushes the merged model to the Hugging Face Model Hub. 'merged_model_id' is the identifier for the merged model.

# 'tokenizer.push_to_hub(merged_model_id)' is a method that pushes the tokenizer to the Hugging Face Model Hub. 'merged_model_id' is the identifier for the tokenizer.

# This block of code is used to save the merged model and the tokenizer to the Hugging Face Model Hub.
# SAve the model merged to the Hub
merged_model_id = hf_model_repo
model.push_to_hub(merged_model_id)
tokenizer.push_to_hub(merged_model_id)

## Model Inference and evaluation

In the end, we download the model that we created from the hub and conduct tests to ensure its proper functioning!

In [None]:
# 'hf_model_repo' is a variable that holds the repository name for the Hugging Face model.

# This line of code is used to reference the repository name for the Hugging Face model.
hf_model_repo

In [None]:
# 'hf_model_repo' is a variable that holds the repository name for the Hugging Face model.

# 'username/modelname' is the repository name, where 'username' is the username of the repository owner and 'modelname' is the name of the model.

# This line of code is used to set the repository name for the Hugging Face model.
hf_model_repo='username/modelname'

Retrieve the model and tokenizer from the Hub.

In [None]:
# 'device_map' is a variable that holds the mapping of the devices that are used for computation.

# 'compute_dtype' is a variable that holds the data type that is used for computation.

# This line of code is used to return the values of the 'device_map' and 'compute_dtype' variables.
device_map, compute_dtype

In [None]:
# This block of code is used to import the necessary libraries, set the seed for reproducibility, and load a pre-trained tokenizer and model.

# 'import torch' is a line of code that imports the PyTorch library, which is a popular open-source machine learning library.

# 'from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed' is a line of code that imports the 'AutoTokenizer', 'AutoModelForCausalLM', and 'set_seed' functions from the Hugging Face Transformers library.

# 'set_seed(1234)' is a line of code that sets the seed for the random number generator to '1234'. This is done to ensure that the results are reproducible.

# 'tokenizer = AutoTokenizer.from_pretrained(hf_model_repo,trust_remote_code=True)' is a line of code that loads a pre-trained tokenizer from the Hugging Face Model Hub. 'hf_model_repo' is the repository name for the model and 'trust_remote_code=True' allows the execution of code from the model file.

# 'model = AutoModelForCausalLM.from_pretrained(hf_model_repo, trust_remote_code=True, torch_dtype=compute_dtype, device_map=device_map)' is a line of code that loads a pre-trained model for causal language modeling from the Hugging Face Model Hub. 'hf_model_repo' is the repository name for the model, 'trust_remote_code=True' allows the execution of code from the model file, 'torch_dtype=compute_dtype' sets the data type for the PyTorch tensors, and 'device_map=device_map' sets the device mapping.
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed

set_seed(1234)  # For reproducibility

tokenizer = AutoTokenizer.from_pretrained(hf_model_repo,trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(hf_model_repo, trust_remote_code=True, torch_dtype=compute_dtype, device_map=device_map) # compute "auto" dev_map "cuda"

We arrange the dataset in the same manner as before.

In [None]:
# This block of code is used to prepare the dataset for training.

# 'dataset_chatml = dataset.map(create_message_column)' is a line of code that applies the 'create_message_column' function to each element in the 'dataset'. The result is stored in 'dataset_chatml'.

# 'dataset_chatml = dataset_chatml.map(format_dataset_chatml)' is a line of code that applies the 'format_dataset_chatml' function to each element in 'dataset_chatml'.

# 'dataset_chatml = dataset_chatml.train_test_split(test_size=0.05)' is a line of code that splits 'dataset_chatml' into a training set and a test set. The test set size is 5% of the total dataset.

# 'dataset_chatml' is a line of code that returns the 'dataset_chatml' variable.

# This block of code is used to create a message column in the dataset, format the dataset, and then split the dataset into a training set and a test set.
## prepare the dataset
dataset_chatml = dataset.map(create_message_column)
dataset_chatml = dataset_chatml.map(format_dataset_chatml)
dataset_chatml = dataset_chatml.train_test_split(test_size=0.05)
dataset_chatml

Establish a text generation pipeline to execute the inference.

In [None]:
# 'pipe' is a variable that holds the pipeline for text generation.

# 'pipeline("text-generation", model=model, tokenizer=tokenizer)' is a function from the Hugging Face Transformers library that creates a pipeline for text generation. 'model' is the pre-trained model and 'tokenizer' is the pre-trained tokenizer.

# This line of code is used to create a pipeline for text generation using the pre-trained model and tokenizer.
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

In [None]:
# 'pipe' is an instance of the 'pipeline' class from the 'transformers' library. It is used to generate responses to prompts.

# 'pipe.tokenizer' is the tokenizer of the 'pipe' pipeline. It is used to convert text into a format that the model can understand.

# 'apply_chat_template' is a function of the tokenizer that formats a chat message in a way that the model can understand.

# '[{"role": "user", "content": dataset_chatml['test'][0]['messages'][0]['content']}]' is a list of dictionaries that represents a chat message. Each dictionary contains a 'role' field that indicates the role of the sender (in this case, 'user') and a 'content' field that contains the text of the message.

# 'tokenize=False' is a parameter that controls whether the 'apply_chat_template' function should tokenize the input. In this case, it is set to 'False', which means that the function will not tokenize the input.

# 'add_generation_prompt=True' is a parameter that controls whether the 'apply_chat_template' function should add a generation prompt to the input. In this case, it is set to 'True', which means that the function will add a generation prompt.

# This line of code tests the 'apply_chat_template' function by applying it to the first message in the test set of the 'dataset_chatml' dataset.
pipe.tokenizer.apply_chat_template([{"role": "user", "content": dataset_chatml['test'][0]['messages'][0]['content']}], tokenize=False, add_generation_prompt=True)

In [None]:
# 'test_inference' is a function that generates a response to a given prompt.

# 'prompt' is the input parameter, which is expected to be a string that contains the user's message.

# 'prompt = pipe.tokenizer.apply_chat_template([{"role": "user", "content": prompt}], tokenize=False, add_generation_prompt=True)' applies a chat template to the 'prompt' and assigns the result back to 'prompt'. The 'apply_chat_template' function is part of the tokenizer of the 'pipe' pipeline. It formats the 'prompt' in a way that the model can understand.

# 'outputs = pipe(prompt, max_new_tokens=256, do_sample=True, num_beams=1, temperature=0.3, top_k=50, top_p=0.95, max_time= 180)' generates a response to the 'prompt' using the 'pipe' pipeline. The parameters control the generation process. For example, 'max_new_tokens=256' limits the length of the generated response to 256 tokens, and 'temperature=0.3' controls the randomness of the generation process (lower values make the output more deterministic).

# 'return outputs[0]['generated_text'][len(prompt):].strip()' returns the generated response. The '[len(prompt):]' part removes the original 'prompt' from the beginning of the 'generated_text', and the 'strip()' part removes leading and trailing whitespace.
def test_inference(prompt):
    prompt = pipe.tokenizer.apply_chat_template([{"role": "user", "content": prompt}], tokenize=False, add_generation_prompt=True)
    outputs = pipe(prompt, max_new_tokens=256, do_sample=True, num_beams=1, temperature=0.3, top_k=50, top_p=0.95, max_time= 180)
    return outputs[0]['generated_text'][len(prompt):].strip()

In [None]:
# '%%time' is a magic command in Jupyter Notebook that measures the execution time of the cell it is placed in.

# 'test_inference' is a function that generates a response to a given input.

# 'dataset_chatml' is a dictionary-like object that contains the dataset.

# 'dataset_chatml['test']' retrieves the test set from the 'dataset_chatml' dataset.

# 'dataset_chatml['test'][0]' retrieves the first sample from the test set.

# 'dataset_chatml['test'][0]['messages'][0]['content']' retrieves the 'content' field from the first message in the 'messages' field of the first sample in the test set.

# 'test_inference(dataset_chatml['test'][0]['messages'][0]['content'])' generates a response to the 'content' field of the first message in the 'messages' field of the first sample in the test set.
%%time

test_inference(dataset_chatml['test'][0]['messages'][0]['content'])

## Evaluate the performance

In [None]:
# 'datasets' is a library that provides access to a large collection of datasets and evaluation metrics for Natural Language Processing (NLP).

# 'load_metric' is a function from the 'datasets' library that loads an evaluation metric.

# 'from datasets import load_metric' imports the 'load_metric' function from the 'datasets' library. This allows you to use 'load_metric' directly instead of having to write 'datasets.load_metric'.
from datasets import load_metric

We'll employ the ROGUE metric for performance evaluation. While it may not be the optimal metric, it's straightforward and convenient to measure.

In [None]:
# 'load_metric' is a function from the 'datasets' library that loads an evaluation metric.

# 'rouge_metric = load_metric("rouge", trust_remote_code=True)' loads the ROUGE metric and assigns it to the variable 'rouge_metric'.

# The 'trust_remote_code' parameter is set to 'True', which means that the function will trust and execute remote code. This is necessary because the ROUGE metric is not included in the 'datasets' library by default and needs to be downloaded from the Hugging Face Model Hub.
rouge_metric = load_metric("rouge", trust_remote_code=True)

Establish a function for inference and evaluation of a sample.

In [None]:
# 'calculate_rogue' is a function that calculates the ROUGE score for a given row of data.

# 'row' is the input parameter, which is expected to be a dictionary-like object that contains the fields 'messages' and 'output'.

# 'response = test_inference(row['messages'][0]['content'])' generates a response by running the 'test_inference' function on the 'content' field of the first message in the 'messages' field of 'row'.

# 'result = rouge_metric.compute(predictions=[response], references=[row['output']], use_stemmer=True)' computes the ROUGE score by comparing the generated 'response' with the 'output' field of 'row'. The 'use_stemmer' parameter is set to 'True', which means that the ROUGE metric will use a stemmer to reduce words to their root form before comparison.

# 'result = {key: value.mid.fmeasure * 100 for key, value in result.items()}' transforms the 'result' dictionary by multiplying each value by 100. This is done to convert the ROUGE scores from a range of 0-1 to a range of 0-100.

# 'result['response']=response' adds the generated 'response' to the 'result' dictionary.

# 'return result' returns the 'result' dictionary, which now contains the ROUGE scores and the generated response.
def calculate_rogue(row):
    response = test_inference(row['messages'][0]['content'])
    result = rouge_metric.compute(predictions=[response], references=[row['output']], use_stemmer=True)
    result = {key: value.mid.fmeasure * 100 for key, value in result.items()}
    result['response']=response
    return result

In [None]:
%%time
metricas = dataset_chatml['test'].select(range(0,500)).map(calculate_rogue, batched=False)

In [None]:
# 'numpy' is a library in Python that provides support for large, multi-dimensional arrays and matrices, along with a large collection of high-level mathematical functions to operate on these arrays.

# 'import numpy as np' imports the 'numpy' library and gives it the alias 'np'. This means that you can use 'np' instead of 'numpy' to refer to the library in the rest of your code.
import numpy as np

At this point, we can compute the metric for the sample.

In [None]:
# 'np.mean' is a function from the 'numpy' library that calculates the arithmetic mean of an array.

# 'metricas' is a dictionary that contains the evaluation metrics of the model.

# 'metricas['rouge1']', 'metricas['rouge2']', 'metricas['rougeL']', and 'metricas['rougeLsum']' are lists of ROUGE scores for each evaluated sample.

# ROUGE (Recall-Oriented Understudy for Gisting Evaluation) is a set of metrics used to evaluate automatic summarization and machine translation. It compares the overlap of n-grams between the system's output and the reference summaries.

# 'print("Rouge 1 Mean: ",np.mean(metricas['rouge1']))' prints the mean ROUGE-1 score.

# 'print("Rouge 2 Mean: ",np.mean(metricas['rouge2']))' prints the mean ROUGE-2 score.

# 'print("Rouge L Mean: ",np.mean(metricas['rougeL']))' prints the mean ROUGE-L score.

# 'print("Rouge Lsum Mean: ",np.mean(metricas['rougeLsum']))' prints the mean ROUGE-Lsum score.
print("Rouge 1 Mean: ",np.mean(metricas['rouge1']))
print("Rouge 2 Mean: ",np.mean(metricas['rouge2']))
print("Rouge L Mean: ",np.mean(metricas['rougeL']))
print("Rouge Lsum Mean: ",np.mean(metricas['rougeLsum']))

## Inference in batches

In [None]:
# 'dataset_chatml' is a dictionary-like object that contains the dataset.

# 'dataset_chatml['test']' retrieves the test set from the 'dataset_chatml' dataset.

# 'dataset_chatml['test'][0]' retrieves the first sample from the test set.

# 'dataset_chatml['test'][0]['output']' retrieves the 'output' field from the first sample in the test set. This 'output' field typically contains the expected response or output for the given sample.
dataset_chatml['test'][0]['output']

In [None]:
# 'num_samples' is a variable that specifies the number of samples to be used for the evaluation.

# In this case, 'num_samples=500' means that 500 samples will be used for the evaluation.
num_samples=500

In [None]:
%%time
# '%%time' is a magic command in Jupyter Notebook that measures the execution time of the cell it is placed in.

# 'prompts' is a list of prompts that are generated by applying a chat template to the first message of each sample in the test set of the 'dataset_chatml' dataset.

# 'pipe' is an instance of the 'pipeline' class from the 'transformers' library. It is used to generate responses to the prompts.

# 'outputs' is a list of responses that are generated by the 'pipe' pipeline.

# 'preds' is a list of predictions that are extracted from the 'outputs'. Each prediction is the text that follows the "<|assistant|>\n" tag in the generated response.

# 'references' is a list of reference outputs that are extracted from the test set of the 'dataset_chatml' dataset.

# 'rouge_metric.add_batch(predictions=preds, references=references)' adds a batch of predictions and references to the 'rouge_metric' for evaluation.
prompts = [pipe.tokenizer.apply_chat_template([{"role": "user", "content": dataset_chatml['test'][i]['messages'][0]['content']}], tokenize=False, add_generation_prompt=True)
                                              for i in range(num_samples)]
outputs = pipe(prompts, batch_size=4, max_new_tokens=256, do_sample=True, num_beams=1, temperature=0.3, top_k=50, top_p=0.95,
                   max_time= 180)
preds = [outputs[i][0]['generated_text'].split("<|assistant|>\n")[1].strip() for i in range(len(outputs))]
references= [dataset_chatml['test'][i]['output'] for i in range(len(outputs))]
rouge_metric.add_batch(predictions=preds, references=references)

At this point, we can compute the metric for the sample.

In [None]:
# 'rouge_metric' is an instance of the Rouge metric class from the 'datasets' library.

# 'compute' is a method of the Rouge metric class that calculates the Rouge scores.

# 'use_stemmer=True' is an argument that indicates whether to use stemming when calculating the Rouge scores. Stemming is the process of reducing inflected (or sometimes derived) words to their word stem, base or root form.

# So, 'result = rouge_metric.compute(use_stemmer=True)' calculates the Rouge scores with stemming and stores the result in the 'result' variable.
result = rouge_metric.compute(use_stemmer=True)

In [None]:
# 'np.mean' is a function from the 'numpy' library that calculates the arithmetic mean of an array.

# 'result' is a dictionary that contains the evaluation metrics of the model.

# 'result['rouge1']', 'result['rouge2']', 'result['rougeL']', and 'result['rougeLsum']' are lists of ROUGE scores for each evaluated sample.

# ROUGE (Recall-Oriented Understudy for Gisting Evaluation) is a set of metrics used to evaluate automatic summarization and machine translation. It compares the overlap of n-grams between the system's output and the reference summaries.

# 'print("Rouge 1 Mean: ",np.mean(result['rouge1']))' prints the mean ROUGE-1 score.

# 'print("Rouge 2 Mean: ",np.mean(result['rouge2']))' prints the mean ROUGE-2 score.

# 'print("Rouge L Mean: ",np.mean(result['rougeL']))' prints the mean ROUGE-L score.

# 'print("Rouge Lsum Mean: ",np.mean(result['rougeLsum']))' prints the mean ROUGE-Lsum score.
print("Rouge 1 Mean: ",np.mean(result['rouge1']))
print("Rouge 2 Mean: ",np.mean(result['rouge2']))
print("Rouge L Mean: ",np.mean(result['rougeL']))
print("Rouge Lsum Mean: ",np.mean(result['rougeLsum']))

In [None]:
# 'result' is a dictionary that contains the evaluation metrics of the model.

# 'result['rouge1']' retrieves the value of the ROUGE-1 score from the 'result' dictionary.

# ROUGE-1 is a metric for evaluating automatic summarization of texts and machine translation. It compares the overlap of unigrams (single words) between the system's output and the reference summaries.
result['rouge1']