In [1]:
# %%capture
# This cell will take time
!pip install unsloth
# Also get the latest nightly Unsloth!
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install optuna

Found existing installation: unsloth 2024.11.7
Uninstalling unsloth-2024.11.7:
  Successfully uninstalled unsloth-2024.11.7
Collecting unsloth@ git+https://github.com/unslothai/unsloth.git (from unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-install-ups5x6md/unsloth_b71bc4dd7e7745eeba3b2bc15d4ec163
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-install-ups5x6md/unsloth_b71bc4dd7e7745eeba3b2bc15d4ec163
  Resolved https://github.com/unslothai/unsloth.git to commit f26d4e739ed507de7a9088da53d10fd02f58d160
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: unsloth
  Building wheel for unsloth (pyproject.toml) ... [?25l[?25hdone
  Created wheel for unsloth: filename=unsloth-2024.11.7-py3-none-a

In [2]:
import os
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    pipeline
)
from datasets import load_dataset
from peft import LoraConfig, PeftModel

lora_config = LoraConfig(
    r=8,
    target_modules=["q_proj", "o_proj", "k_proj", "v_proj", "gate_proj", "up_proj", "down_proj"],
    task_type="CAUSAL_LM",
)

# bitsandbytes parameters
#################################################################

# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = True

# Set up quantization config
#################################################################
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

In [3]:
from google.colab import drive
drive.mount('/content/drive')

from unsloth import FastLanguageModel
import torch
max_seq_length = 2048 # Choose any
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Meta-Llama-3.1-8B",
    max_seq_length = max_seq_length,
    dtype = dtype,
    quantization_config = bnb_config,
)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
==((====))==  Unsloth 2024.11.7: Fast Llama patching. Transformers = 4.46.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [4]:
# Load model and wrap with LoRA

model = FastLanguageModel.get_peft_model(
    model,
    r = 32, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 32,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

Unsloth 2024.11.7 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [5]:
# download and load competition dataset

from datasets import load_dataset
from datasets import Dataset
dataset = load_dataset("ad6398/nyu-dl-teach-maths-comp")

In [6]:
# split


train_test_split = 0.80
train_dataset_size = int(len(dataset['train']) * train_test_split)
test_dataset_size = len(dataset['train']) - train_dataset_size

train_dataset, test_dataset = Dataset.from_dict(dataset['train'][:train_dataset_size]), Dataset.from_dict(dataset['train'][train_dataset_size:])


In [7]:
prompt = """You are a mathematician, and your task is to finds if the answer to the given math question is correct or not. Solve the question and response 'True' if answer is correct, 'False' if answer is incorrect.

### Question:
{}

### Answer:
{}

### Solution:
{}

### Output:
{}
"""

EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
def formatting_prompts_func(examples):
    question = examples["question"]
    ans       = examples["answer"]
    expln     = examples["solution"]
    output    = examples["is_correct"]
    texts = []
    for instruction, input, expln, output in zip(question, ans, expln, output):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        if output == True:
          expln += "\nThe Answer matches the Solution, so the output is True."
        else:
          expln += "\nThe Answer does not match the Solution, so the output is False"
        text = prompt.format(instruction,input,expln,output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }

In [8]:
train_dataset = train_dataset.map(formatting_prompts_func, batched = True,)
test_dataset = test_dataset.map(formatting_prompts_func, batched = True,)

Map:   0%|          | 0/800000 [00:00<?, ? examples/s]

Map:   0%|          | 0/200000 [00:00<?, ? examples/s]

In [9]:
print(train_dataset['text'][0])

You are a mathematician, and your task is to finds if the answer to the given math question is correct or not. Solve the question and response 'True' if answer is correct, 'False' if answer is incorrect.

### Question:
What is the radius of the circle inscribed in triangle $ABC$ if $AB = 22, AC=12,$ and $BC=14$? Express your answer in simplest radical form.

### Answer:
3.16227766016838

### Solution:
The circle is inscribed in a triangle, and we know the sides of the triangle.
To use the inradius formula, we need to know the area of the triangle.
We can use Heron's formula to calculate the area.
<llm-code>
import math
from sympy import *

AB, AC, BC = 22, 12, 14

# Calculate the semiperimeter and area using Heron's formula
s = (AB + AC + BC) / 2
K = sqrt(s * (s - AB) * (s - AC) * (s - BC))

print(K)
</llm-code>
<llm-code-output>
75.8946638440411
</llm-code-output>
Let's now use the formula for the radius of the inscribed circle.
<llm-code>
r = K / s
print(r)
</llm-code>
<llm-code-outp

In [10]:
train_tmp = Dataset.from_dict(train_dataset[:30000])
test_tmp = Dataset.from_dict(test_dataset[:3000])

In [11]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

training_args = TrainingArguments(
    per_device_train_batch_size = 4,  # Slightly larger batch
    gradient_accumulation_steps = 4,  # Effective batch size = 16
    warmup_steps = 20,  # Stabilize early training
    num_train_epochs = 1,  # Multiple passes over data
    max_steps = 1000,  # Let epochs determine step count
    learning_rate = 5e-5,  # Lower LR for stability
    fp16 = True,
    bf16 = is_bfloat16_supported(),
    logging_steps = 10,  # Reduced logging frequency
    optim = "adamw_8bit",
    weight_decay = 0.01,  # Lower decay for small data
    lr_scheduler_type = "cosine",  # Smooth decay
    seed = 3407,
    output_dir = "outputs",
    report_to = "none",
)

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = train_tmp,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 4,
    packing = False, # Can make training 5x faster for short sequences.
    args = training_args
)

Map (num_proc=4):   0%|          | 0/30000 [00:00<?, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs


In [12]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 30,000 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 4 | Gradient Accumulation steps = 4
\        /    Total batch size = 16 | Total steps = 1,000
 "-____-"     Number of trainable parameters = 83,886,080


Step,Training Loss
10,1.4794
20,1.2306
30,0.8227
40,0.7515
50,0.7096
60,0.6975
70,0.6861
80,0.6835
90,0.6627
100,0.6652


Step,Training Loss
10,1.4794
20,1.2306
30,0.8227
40,0.7515
50,0.7096
60,0.6975
70,0.6861
80,0.6835
90,0.6627
100,0.6652


In [13]:
# save to google drive

model.save_pretrained("drive/MyDrive/Colab Notebooks/lora_model_6") # replace with your disire path
tokenizer.save_pretrained("drive/MyDrive/Colab Notebooks/lora_model_6")

('drive/MyDrive/Colab Notebooks/lora_model_6/tokenizer_config.json',
 'drive/MyDrive/Colab Notebooks/lora_model_6/special_tokens_map.json',
 'drive/MyDrive/Colab Notebooks/lora_model_6/tokenizer.json')

Inference

In [14]:
from transformers import TextGenerationPipeline

class MyPipeline(TextGenerationPipeline):
    def postprocess(pipeline ,dict):
      # dict_keys(['generated_sequence', 'input_ids', 'prompt_text'])
      tmp = dict['generated_sequence']
      holder = len(dict['input_ids'][0])
      tmp2 = tokenizer.batch_decode([tmp[0][0][holder:]], skip_special_tokens=True)
      return tmp2[0]

FastLanguageModel.for_inference(model)
pipe = MyPipeline(
    task="text-generation",
    model = model,
    tokenizer = tokenizer,
    batch_size = 4,
    eos_token_id = model.config.eos_token_id,
    max_new_tokens = 64
)

The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FalconMambaForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'Gemma2ForCausalLM', 'GitForCausalLM', 'GlmForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'GraniteForCausalLM', 'GraniteMoeForCausalLM', 'JambaForCausalLM', 'JetMoeForCausalLM', 'LlamaForCausalLM', 'MambaForCausalLM', 'Mamba2ForCausalLM', 'MarianForCausalLM', 'MBartForCausa

In [15]:


EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
def formatting_prompts_func(examples):
    question = examples["question"]
    ans       = examples["answer"]
    texts = []
    for instruction, input in zip(question, ans):
        text = prompt.format(instruction, input, "", "")
        texts.append(text)
    return { "text" : texts, }

test_format = test_dataset.map(formatting_prompts_func, batched = True)

Map:   0%|          | 0/200000 [00:00<?, ? examples/s]

In [16]:
test_input = test_format['text']
test_key = test_dataset['is_correct']

In [17]:
res = []

for i in range(10):
  # sperate into 10 runs
  l = 100*i
  r = 100*(i+1)
  print(l, r)
  res += pipe(test_input[l:r])

0 100
100 200
200 300
300 400
400 500
500 600
600 700
700 800
800 900
900 1000


In [None]:
res_tmp = []

for i,v in enumerate(res):
  tf = v.strip().split('\n')[-1]
  if tf == 'True':
    res_tmp.append(True)
  else:
    res_tmp.append(False)

In [19]:
correct = 0
incorrect = 0
for i in range(len(res_tmp)):
  if test_key[i] == res_tmp[i]:
    correct += 1
  elif test_key[i] != res_tmp[i]:
    incorrect += 1
  else:
    print(i)

print(correct/len(res_tmp))
print(res_tmp)
print(test_key[:len(res_tmp)])

0.611
[False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, True, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False,