In [1]:
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/10623Project

Mounted at /content/drive
/content/drive/MyDrive/10623Project


In [2]:
# !pip install bitsandbytes datasets
!pip install -U bitsandbytes
!pip install datasets
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling
)
from peft import LoraConfig, get_peft_model, TaskType, prepare_model_for_kbit_training
import os
import json

Collecting bitsandbytes
  Downloading bitsandbytes-0.45.5-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch<3,>=2.0->bitsandbytes)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch<3,>=2.0->bitsandbytes)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch<3,>=2.0->bitsandbytes)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch<3,>=2.0->bitsandbytes)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch<3,>=2.0->bitsandbytes)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-

In [3]:
# Step 1: Load and process dataset (assuming JSONL format)
def load_jsonl_dataset(file_path):
    with open(file_path, "r") as f:
        lines = [json.loads(line) for line in f]
    return lines

def format_for_instruction(example):
    instruction = "Solve the following math problem step by step."
    question = example["prompt"].replace("Q:", "").replace("A:", "").strip()
    return {
        "prompt": f"### Instruction:\n{instruction}\n\n### Input:\n{question}\n\n### Response:",
        "completion": example["completion"]
    }


# Load raw and format
data_path = "gsm8k_reasoning_train.jsonl"
raw_data = load_jsonl_dataset(data_path)
formatted_data = [format_for_instruction(ex) for ex in raw_data]
print(formatted_data[0])

{'prompt': '### Instruction:\nSolve the following math problem step by step.\n\n### Input:\nNatalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May?\n\n### Response:', 'completion': 'Natalia sold 48/2 = <<48/2=24>>24 clips in May.\nNatalia sold 48+24 = <<48+24=72>>72 clips altogether in April and May.\n#### 72'}


In [6]:

# Convert to HF dataset
# !pip install datasets
from datasets import Dataset
train_dataset = Dataset.from_list(formatted_data)

# Tokenization
model_name = "google/gemma-7b"
hf_token = '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'
tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)

def tokenize(example):
    full_input = example["prompt"] + example["completion"]
    return tokenizer(
        full_input,
        truncation=True,
        max_length=384,
        padding="max_length"
    )

tokenized_dataset = train_dataset.map(tokenize, batched=False)

Map:   0%|          | 0/7473 [00:00<?, ? examples/s]

In [7]:

# Step 2: Load 4-bit quantized model with BitsAndBytes
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_quant_type="nf4"
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    quantization_config=bnb_config,
    torch_dtype=torch.bfloat16,
    token=hf_token
)

# Prepare for QLoRA
model = prepare_model_for_kbit_training(model)

# Step 3: Apply LoRA (QLoRA = LoRA on quantized model)
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],  # Check for Gemma-specific names if needed
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.CAUSAL_LM
)

model = get_peft_model(model, lora_config)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [8]:

from transformers import TrainingArguments
# Step 4: Training setup
training_args = TrainingArguments(
    output_dir="./qlora_gemma_gsm8k_reasoning",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=8,
    num_train_epochs=3,
    learning_rate=2e-5,
    fp16=True,
    logging_steps=10,
    save_steps=200,
    save_total_limit=2,
    # evaluation_strategy="no",
    eval_strategy='no',
    report_to="none"
)

data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    data_collator=data_collator,
    tokenizer=tokenizer
)

# Step 5: Train
trainer.train()

# Save PEFT adapter
model.save_pretrained("./qlora_gemma_gsm8k/adapter")

  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
  return fn(*args, **kwargs)


Step,Training Loss
10,1.7183
20,1.4324
30,1.3063
40,1.1669
50,1.0294
60,0.9391
70,0.8873
80,0.8663
90,0.8528
100,0.8471


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.

Cannot access gated repo for url https://huggingface.co/google/gemma-7b/resolve/main/config.json.
Access to model google/gemma-7b is restricted. You must have access to it and be authenticated to access it. Please log in. - silently ignoring the lookup for the file config.json in google/gemma-7b.
  return fn(*args, **kwargs)

Cannot access gated repo for url https://huggingface.co/google/gemma-7b/resolve/main/config.json.
Access to model google/gemma-7b is restricted. You must have access to it and be authenticated to access it. Please log in. - silently ignoring the lookup for th