## Install Dependencies

In [1]:
!pip3 install --upgrade pip
# !pip3 install gcsfs==2024.6.1
!pip3 install transformers==4.48.1 trl==0.14.0 datasets==3.2.0 tokenizers==0.21.0 accelerate==1.3.0 bitsandbytes==0.45.1 sentencepiece==0.2.0 torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 tqdm==4.66.6

Collecting pip
  Downloading pip-25.0-py3-none-any.whl.metadata (3.7 kB)
Downloading pip-25.0-py3-none-any.whl (1.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m19.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 24.1.2
    Uninstalling pip-24.1.2:
      Successfully uninstalled pip-24.1.2
Successfully installed pip-25.0
Collecting transformers==4.48.1
  Downloading transformers-4.48.1-py3-none-any.whl.metadata (44 kB)
Collecting trl==0.14.0
  Downloading trl-0.14.0-py3-none-any.whl.metadata (12 kB)
Collecting datasets==3.2.0
  Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting accelerate==1.3.0
  Downloading accelerate-1.3.0-py3-none-any.whl.metadata (19 kB)
Collecting bitsandbytes==0.45.1
  Downloading bitsandbytes-0.45.1-py3-none-manylinux_2_24_x86_64.whl.metadata (5.8 kB)
Collecting torch==2.6.0
  Downloading torch-2.6.0-cp311-cp311-

## Login to Huggingface

In [2]:
from huggingface_hub import login
login(token="")

## Experiment 1 - Fine tuning a LLAMA 3 8b model on a customer support chat dataset.

### Load and set up dataset

In [1]:
from datasets import load_dataset

In [2]:
dataset = load_dataset("bitext/Bitext-customer-support-llm-chatbot-training-dataset")
dataset = dataset['train']
dataset = dataset.train_test_split(0.2)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


### Model setup

In [3]:
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
from torch import bfloat16

def load_llama_model(model_id):
  tokenizer = AutoTokenizer.from_pretrained(model_id)
  tokenizer.padding_size = 'left'
  tokenizer.pad_token = tokenizer.eos_token # For decode only models like LLama3

  model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=bfloat16,
                                               device_map="auto")
  # pipe = pipeline("text-generation", model=model, tokenizer=tokenizer,
  #                 torch_dtype=bfloat16, device_map="auto", temperature=0.8)
  pipe = None
  return pipe, model, tokenizer

In [4]:
pipe, model, tokenizer = load_llama_model("meta-llama/Llama-3.2-3B-Instruct")

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

### Pre-process the dataset

In [5]:
def convert_to_chat_template(example, tokenizer):
  chat = [
      {"role": "system", "content": "You are an advanced customer service assistant."},
      {"role": "user", "content": f"{example['instruction']}"},
      {"role": "assistant", "content": f"{example['response']}"},
  ]
  # 'text' is the default key.
  # It can be customized by specifying the dataset_text_field arg in SFTConfig
  example['labels'] = tokenizer.apply_chat_template(chat, tokenize=False,
                                                        #  padding="max_length", # not needed for SFTConfig
                                                        #  truncate=True, max_length=512
                                                    )
  return example

In [6]:
dataset = dataset.map(lambda sample: convert_to_chat_template(sample, tokenizer))

Map:   0%|          | 0/21497 [00:00<?, ? examples/s]

Map:   0%|          | 0/5375 [00:00<?, ? examples/s]

In [7]:
final_dataset = dataset.remove_columns(['flags', 'instruction', 'category', 'intent', 'response'])

### Supervised fine tuning

In [8]:
from trl import SFTTrainer, SFTConfig

training_args = SFTConfig(
    output_dir="./results",
    eval_strategy="steps", # To evaluate during training
    eval_steps=1000,
    logging_steps=1000,
    save_steps=1000,
    per_device_train_batch_size=5, # Adjust based on your hardware
    per_device_eval_batch_size=5,
    num_train_epochs=2, # How many times to loop through the dataset
    fp16=False, # Must be False for MacBooks
    report_to="none", # Here we can use something like tensorboard to see the training metrics
    log_level="info",
    learning_rate=1e-5, # Would avoid larger values here
    max_grad_norm=2, # Clipping the gradients is always a good idea
    max_seq_length=512,
    packing=False,
    dataset_text_field='labels'
)

trainer = SFTTrainer(
    model=model,
    train_dataset=final_dataset["train"],
    eval_dataset=final_dataset["test"],
    tokenizer=tokenizer,
    args=training_args,
)

  trainer = SFTTrainer(


Map:   0%|          | 0/21497 [00:00<?, ? examples/s]

Map:   0%|          | 0/5375 [00:00<?, ? examples/s]

In [None]:
trainer.train()

In [None]:
SAVE_PATH = "drive/MyDrive/HF_MODELS/llama3-sft-v1"
tainer.save_model(SAVE_PATH)
tokenizer.save_pretrained(SAVE_PATH)