## Install Dependencies

In [1]:
!pip3 install --upgrade pip
!pip3 install transformers==4.48.1 trl==0.14.0 datasets==3.2.0 tokenizers==0.21.0 accelerate==1.3.0 bitsandbytes==0.45.1 sentencepiece==0.2.0 torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 tqdm==4.66.6

Collecting pip
  Downloading pip-25.0-py3-none-any.whl.metadata (3.7 kB)
Downloading pip-25.0-py3-none-any.whl (1.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m27.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 24.1.2
    Uninstalling pip-24.1.2:
      Successfully uninstalled pip-24.1.2
Successfully installed pip-25.0
Collecting transformers==4.48.1
  Downloading transformers-4.48.1-py3-none-any.whl.metadata (44 kB)
Collecting trl==0.14.0
  Downloading trl-0.14.0-py3-none-any.whl.metadata (12 kB)
Collecting datasets==3.2.0
  Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting bitsandbytes==0.45.1
  Downloading bitsandbytes-0.45.1-py3-none-manylinux_2_24_x86_64.whl.metadata (5.8 kB)
Collecting torch==2.6.0
  Downloading torch-2.6.0-cp311-cp311-manylinux1_x86_64.whl.metadata (28 kB)
Collecting torchvision==0.21.0
  Downloading torchvisio

## Login to Huggingface

In [2]:
from huggingface_hub import login
login(token="")

## Global utility functions

In [53]:
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
from torch import bfloat16
from collections import defaultdict

def load_llama_model(model_id="meta-llama/Llama-3.2-3B-Instruct"):
  tokenizer = AutoTokenizer.from_pretrained(model_id)
  tokenizer.padding_size = 'left'
  tokenizer.pad_token = tokenizer.eos_token # For decode only models like LLama3

  model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=bfloat16,
                                               device_map="auto")
  pipe = pipeline("text-generation", model=model, tokenizer=tokenizer,
                  torch_dtype=bfloat16, device_map="auto", temperature=0.8)
  return pipe, model, tokenizer

def run_self_consistency(pipe_ref, question, answer_extract_fn, num_attempts=5, max_ans_len=512):
  """
  Given a question, a HF pipeline ref and a function to extract answer values,
  run the pipeline <num_attempt> times, then return the answers and a histogram of answer_values

  Args:
    pipe_ref: A HF pipeline object ref.
    question (str): A sentence containing a question,
    answer_extract_fn: A function reference that takes in a string and can extact the answer.
    num_attempts (int): Number of times the question should be put to the pipeline.
    max_ans_len (int): The max number of tokens the answer can contain.

  Returns:
    tuple(list, dict)
    list is the list of answers.
    dict is the histogram of answer values.

  Example:
    pipe = .. A HF pipeline object
    question =  "What is 2 + 2?"
    def extract_fn(ans):
      return ans[-2:] #extracts last two chars

     >>> run_self_consistency(pipe, question, extract_fn, num_attempts=3)
    (
      ["2 and 2 equals 4", "Adding 2 and 2 gives 4", "2 + 2 equals 4.],
      {'4': 2, '4.': 1}
    )
    Note the `incorrect` answer '4.'. The extract function must be tuned to the answer given by the pipeline/model
  """
  answers = []
  answer_val_freq = defaultdict(int)
  outputs = pipe_ref(question, num_return_sequences=num_attempts, max_new_tokens=max_ans_len)
  for output in outputs:
    answer = output["generated_text"][-1]["content"]
    answers.append(answer)
    answer_val = answer_extract_fn(answer)
    answer_val_freq[answer_val] += 1
  return answers, answer_val_freq



In [4]:
from datasets import load_dataset

def load_gsm8k_dataset():
  GSM_8K_ID="openai/gsm8k"
  train_dataset = load_dataset(GSM_8K_ID, "main", split="train")
  test_dataset = load_dataset(GSM_8K_ID, "main", split="test")
  return train_dataset, test_dataset

def load_bittext_cust_dataset():
  dataset = load_dataset("bitext/Bitext-customer-support-llm-chatbot-training-dataset")
  dataset = dataset['train']
  dataset = dataset.train_test_split(0.2)
  return dataset['train'], dataset['test']

## Experiment 1 - Fine tuning on a customer support chat dataset.

### Load bittest customer support dataset

In [None]:
train_dataset, test_dataset = load_bittext_cust_dataset()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


### Model setup

In [None]:
pipe, model, tokenizer = load_llama_model()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

### Pre-process the dataset

In [None]:
def convert_to_chat_template(example, tokenizer):
  chat = [
      {"role": "system", "content": "You are an advanced customer service assistant."},
      {"role": "user", "content": f"{example['instruction']}"},
      {"role": "assistant", "content": f"{example['response']}"},
  ]
  # 'text' is the default key.
  # It can be customized by specifying the dataset_text_field arg in SFTConfig
  example['labels'] = tokenizer.apply_chat_template(chat, tokenize=False,
                                                        #  padding="max_length", # not needed for SFTConfig
                                                        #  truncate=True, max_length=512
                                                    )
  return example

In [None]:
final_train_dataset = train_dataset.map(lambda sample: convert_to_chat_template(sample, tokenizer))
final_test_dataset = test_dataset.map(lambda sample: convert_to_chat_template(sample, tokenizer))

Map:   0%|          | 0/21497 [00:00<?, ? examples/s]

Map:   0%|          | 0/5375 [00:00<?, ? examples/s]

In [None]:
final_train_dataset = final_train_dataset.remove_columns(['flags', 'instruction', 'category', 'intent', 'response'])
final_test_dataset = final_test_dataset.remove_columns(['flags', 'instruction', 'category', 'intent', 'response'])

### Supervised fine tuning

In [None]:
from trl import SFTTrainer, SFTConfig

training_args = SFTConfig(
    output_dir="./results",
    eval_strategy="steps", # To evaluate during training
    eval_steps=1000,
    logging_steps=1000,
    save_steps=1000,
    per_device_train_batch_size=5, # Adjust based on your hardware
    per_device_eval_batch_size=5,
    num_train_epochs=2, # How many times to loop through the dataset
    fp16=False, # Must be False for MacBooks
    report_to="none", # Here we can use something like tensorboard to see the training metrics
    log_level="info",
    learning_rate=1e-5, # Would avoid larger values here
    max_grad_norm=2, # Clipping the gradients is always a good idea
    max_seq_length=512,
    packing=False,
    dataset_text_field='labels'
)

trainer = SFTTrainer(
    model=model,
    train_dataset=final_train_dataset,
    eval_dataset=final_test_dataset,
    tokenizer=tokenizer,
    args=training_args,
)

  trainer = SFTTrainer(


Map:   0%|          | 0/21497 [00:00<?, ? examples/s]

Map:   0%|          | 0/5375 [00:00<?, ? examples/s]

In [None]:
trainer.train()

In [None]:
SAVE_PATH = "drive/MyDrive/HF_MODELS/llama3-sft-v1"
tainer.save_model(SAVE_PATH)
tokenizer.save_pretrained(SAVE_PATH)

# Experiment 2 - Fine Tuning on multiple chains of thought

### Model setup

In [5]:
pipe, model, tokenizer = load_llama_model()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/54.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/878 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/20.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.46G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/189 [00:00<?, ?B/s]

Device set to use cuda:0


### Load and process dataset

In [30]:
train_dataset, test_dataset = load_gsm8k_dataset()

In [51]:
import math


def extract_result(answer):
  """
  Extract the numeric result from an answer sentence.

  Args:
    answer (str): The sentence containing the answer.

  Returns:
    str: the stringified value of answer.

  Example:
     >>> extract_result("Sansa earns $5 x 3 = $<<5*3=15>>15 every day #### 15$")
     '15'
  """
  value = answer.split('####')[-1].strip()
  for irrv_char in [',', '$', '%', 'g']: # Handles cases like 1,000$, 50%, 20g etc
    value = value.replace(irrv_char, '')
  try:
    return str(int(value))
  except ValueError:
    return str('N/A')

def build_chat_and_extract_answer_val(dataset_entry):
  """
  Place the question in the dataset entry in a full fledged chat and add it as a dataset column.
  Extract the stringified answer and add it as a dataset column.

  Args:
    dataset_entry: A row in a dataset. Each row contains multiple column

  Returns:
    dataset_entry: A row in a dataset. Each row contains multiple column

  Example:
    entry = {
       "question": "What is 2 + 2?",
       "answer": "The value of 2 + 2 is 4. #### 4.".
     }
     >>> build_chat_and_extract_answer_val(entry)
    {
       "question": "What is 2 + 2?",
       "answer": "The value of 2 + 2 is 4. #### 4.".,
       "chat": [{...system prompt....}, {..user prompt..}, {..assistant prompt..}...{user prompt with question}],
       "answer_val": "4"
     }
  """
  question = dataset_entry["question"]
  answer_val = extract_result(dataset_entry["answer"])
  chat = [
    {"role": "system", "content": "You are a chatbot who solves word problems! When asked to do so think step by step and at the end, you MUST write the answer as an integer after '####'."},

    {"role": "user", "content": "Q: Sansa is a famous artist, she can draw a portrait and sell it according to its size. She sells an 8-inch portrait for $5, and a 16-inch portrait for twice the price of the 8-inch portrait. If she sells three 8-inch portraits and five 16-inch portraits per day, how many does she earns every 3 days?"},
    {"role": "assistant", "content": "A: Sansa earns $5 x 3 = $<<5*3=15>>15 every day by selling three 8-inch portraits. The price of the 16-inch portrait is $5 x 2 = $<<5*2=10>>10 each. So, she earns $10 x 5 = $<<10*5=50>>50 every day by selling five 16-inch portraits. Her total earnings is $50 + $15 = $<<50+15=65>>65 every day. Therefore, the total amount she earns after 3 days is $65 x 3 = $<<65*3=195>>195. #### 195"},

    {"role": "user", "content": "Q: There are some lions in Londolozi at first. Lion cubs are born at the rate of 5 per month and lions die at the rate of 1 per month. If there are 148 lions in Londolozi after 1 year, how many lions were there in Londolozi at first?"},
    {"role": "assistant", "content": "A: There are 5-1=<<5-1=4>>4 more lions each month. There will be 4*12=<<4*12=48>>48 more lions after 1 year. There were 148-48=<<148-48=100>>100 lions in Londolozi at first. #### 100"},

    {"role": "user", "content": "Q: Randy just turned 12 and started playing the piano. His friend Sheila told him about the 10,000-hour rule which says, after 10,000 hours of practice, you become an expert or master in your field. If Randy wants to become a piano expert before he is 20, how many hours a day will he need to practice if he practices every day, Monday – Friday, and takes two weeks off for vacation each year?"},
    {"role": "assistant", "content": "A: Randy has 20 – 12 = <<20-12=8>>8 years until he is 20. He must practice 10,000 hours / 8 years = <<10000/8=1250>>1,250 hours a year to become an expert. There are 52 weeks in a year – 2 weeks of vacation Randy plans to take = <<52-2=50>>50 weeks of practice for Randy. Randy will practice Monday – Friday, which is 5 days a week, so 50 weeks x 5 days = <<50*5=250>>250 days of practice each year. Randy will need to practice 1250 hours / 250 days = <<1250/250=5>>5 hours each day. #### 5"},

    {"role": "user", "content": "Q: A publishing house decides to create a contest for new writers and will award a total of $800 in prizes to the 18 novels with the most votes. First place will get $200, second place will get $150, third place $120 and the rest of the awards will be paid with the same amount of money. How much money will each writer earn from fourth place onwards?"},
    {"role": "assistant", "content": "A: The first three cash prizes sum to $200 + $150 + $120= $<<200+150+120=470>>470 The remaining amount of cash prizes to be distributed is $800 - $470= $<<800-470=330>>330 The number of novels left to be awarded is 18 – 3= <<18-3=15>>15 novels So each writer from 4th to 18th will earn $330 / 15= $<<330/15=22>>22 #### 22"},

    {"role": "user", "content": "Q: Maisie and Donna dropped off flyers for a neighborhood clean-up day at houses around their neighborhood. Maisie walked down the shorter left street and dropped off 33 flyers. Donna took the long right street on her bicycle and dropped off five more than twice as many flyers as Maisie. How many flyers did Donna drop off?"},
    {"role": "assistant", "content": "A: Twice as many flyers as Maisie is 33 * 2 = <<33*2=66>>66 flyers. Donna dropped off five more than that, so she dropped off 66 + 5 = <<66+5=71>>71 flyers. #### 71"},

    {"role": "user", "content": f"Q: {question}"}
  ]
  dataset_entry["chat"] = chat
  dataset_entry["answer_val"] = answer_val
  return dataset_entry


In [31]:
train_dataset = train_dataset.map(build_chat_and_extract_answer_val)
test_dataset = test_dataset.map(build_chat_and_extract_answer_val)
train_dataset = train_dataset.remove_columns(['question'])
test_dataset = test_dataset.remove_columns(['question'])

Map:   0%|          | 0/7473 [00:00<?, ? examples/s]

### Generate data

In [47]:
run_self_consistency(pipe, train_dataset[0]["chat"], extract_result)

(['A: Natalia sold 48 clips in April. She sold half as many clips in May, so she sold 48 / 2 = 24 clips in May. The total number of clips Natalia sold is 48 + 24 = 72. #### 72',
  'A: Natalia sold 48 clips in April. She sold half as many clips in May, which is 48 / 2 = 24 clips. The total number of clips Natalia sold in April and May is 48 + 24 = 72 clips. #### 72',
  'A: Natalia sold 48 clips in April. She sold half as many clips in May, so she sold 48 / 2 = 24 clips in May. In total, Natalia sold 48 + 24 = 72 clips. #### 72',
  'A: Natalia sold 48 clips in April. She sold half as many clips in May, so she sold 48 / 2 = 24 clips in May. The total number of clips Natalia sold in April and May is 48 + 24 = 72 clips. #### 72',
  'A: In April, Natalia sold 48 clips. In May, she sold half as many clips as in April, so she sold 48 / 2 = 24 clips. The total number of clips Natalia sold is 48 + 24 = 72. #### 72'],
 defaultdict(int, {'72': 5}))