In [14]:
!pip install torch datasets transformers peft bitsandbytes
!pip install --upgrade datasets fsspec huggingface_hub

Collecting fsspec
  Using cached fsspec-2025.5.1-py3-none-any.whl.metadata (11 kB)


In [15]:
import torch

from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, BitsAndBytesConfig

from peft import LoraConfig, get_peft_model, TaskType

In [16]:
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type = 'nf4',
    bnb_4bit_compute_dtype = torch.bfloat16,
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config = bnb_config,
    device_map = 'auto',
    trust_remote_code = True
)

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

In [17]:
lora_config = LoraConfig(
    task_type = TaskType.CAUSAL_LM,
    r = 8,
    lora_alpha = 16,
    target_modules = ['q_proj', 'v_proj'],
    lora_dropout = 0.05,
    bias = 'none',
)

model = get_peft_model(model, lora_config)

In [18]:
data = load_dataset('openai/gsm8k', 'main', split='train[:200]')

In [19]:
def tokenize(batch):
  texts = [
      f"### Instruction:\n{instruction}\n### Response:\n{out}"
      for instruction, out in zip(batch['question'], batch['answer'])
  ]

  tokens = tokenizer(
      texts,
      padding = 'max_length',
      truncation = True,
      return_tensors = 'pt',
      max_length = 256
  )

  tokens['labels'] = tokens['input_ids'].clone()

  return tokens

In [20]:
tokenize_data = data.map(tokenize, batched=True, remove_columns=data.column_names)

In [22]:
training_args = TrainingArguments(
    output_dir = './tinyllama-math-lora-tutorial',
    per_device_train_batch_size = 4,
    gradient_accumulation_steps = 4,
    learning_rate = 2e-4,
    lr_scheduler_type = 'cosine',
    num_train_epochs = 50,
    warmup_ratio = 0.05,
    fp16 = True,
    save_strategy = 'epoch',
    report_to = 'none',
    remove_unused_columns = False,
    label_names = ['labels']
)

In [24]:
trainer = Trainer(
    model = model,
    args = training_args,
    train_dataset = tokenize_data,
    processing_class = tokenizer
)

In [25]:
trainer.train()

Step,Training Loss


KeyboardInterrupt: 

In [None]:
model.save_pretrained('./tinyllama-math-lora-finetuned')
tokenizer.save_pretrained('./tinyllama-math-lora-finetuned')