In [None]:
pip install transformers datasets accelerate bitsandbytes peft

In [4]:
from huggingface_hub import login
login(token="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")


In [1]:
import torch
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig,
    Trainer,
    TrainingArguments,
)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

# ─── 1. CONFIGURATION ────────────────────────────────────────────────────────────
MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.2"
DATA_PATH = "./reasoning_complexity_dataset.jsonl"
OUTPUT_DIR = "./mistral7b_reasoning_clf_optimized"

EPOCHS = 2
LR = 2e-4
BATCH = 1
ACCUM = 16
MAX_LEN = 512
MAX_OUT = 128
DEVICE_MAP = "auto"

# ─── 2. LOAD DATA ────────────────────────────────────────────────────────────────
raw_ds = load_dataset("json", data_files={"train": DATA_PATH}, split="train")

# ─── 3. LOAD TOKENIZER ───────────────────────────────────────────────────────────
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token

# ─── 4. LOAD MODEL WITH 4-BIT QUANTIZATION ───────────────────────────────────────
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    quantization_config=bnb_config,
    device_map=DEVICE_MAP
)
model = prepare_model_for_kbit_training(model)
model.gradient_checkpointing_enable()  # Enable gradient checkpointing

# ─── 5. APPLY LoRA ADAPTERS ──────────────────────────────────────────────────────
lora_cfg = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)
model = get_peft_model(model, lora_cfg)

# ─── 6. PREPROCESS FUNCTION ─────────────────────────────────────────────────────
def preprocess(examples):
    queries = examples["query"]
    labels = examples["label"]
    reasoning = examples["reasoning"]
    prompts = [f"Classify the reasoning level of this query.\nQuery: {q}\nAnswer: " for q in queries]
    outputs = [f"{r} Label: {l}" for r, l in zip(reasoning, labels)]

    tok_in = tokenizer(prompts, max_length=MAX_LEN, truncation=True, padding=False)
    tok_out = tokenizer(outputs, max_length=MAX_OUT, truncation=True, padding=False)

    input_ids, attention_mask, labels_list = [], [], []
    for inp_ids, inp_mask, out_ids in zip(
        tok_in["input_ids"], tok_in["attention_mask"], tok_out["input_ids"]
    ):
        full_ids = inp_ids + out_ids
        full_mask = inp_mask + [1] * len(out_ids)
        pad_len = MAX_LEN - len(full_ids)

        full_ids += [tokenizer.pad_token_id] * pad_len
        full_mask += [0] * pad_len

        label_ids = [-100] * len(inp_ids) + out_ids + [-100] * pad_len

        input_ids.append(full_ids)
        attention_mask.append(full_mask)
        labels_list.append(label_ids)

    return {
        "input_ids": input_ids,
        "attention_mask": attention_mask,
        "labels": labels_list,
    }

tokenized_ds = raw_ds.map(
    preprocess,
    batched=True,
    remove_columns=raw_ds.column_names
)

# ─── 7. TRAINING SETUP ───────────────────────────────────────────────────────────
training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    per_device_train_batch_size=BATCH,
    gradient_accumulation_steps=ACCUM,
    num_train_epochs=EPOCHS,
    learning_rate=LR,
    bf16=True,  # Use bf16 for better performance and stability on RTX 40 series
    logging_steps=10,
    save_steps=200,
    save_total_limit=3,
    optim="paged_adamw_8bit",
    ddp_find_unused_parameters=False,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_ds,
    tokenizer=tokenizer,
)

# ─── 8. START TRAINING ───────────────────────────────────────────────────────────
trainer.train()

# ─── 9. SAVE MODEL ──────────────────────────────────────────────────────────────
model.save_pretrained(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)

print("✅ Fine-tuning complete! Model saved to", OUTPUT_DIR)


  from .autonotebook import tqdm as notebook_tqdm





Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████| 3/3 [00:06<00:00,  2.23s/it]
  trainer = Trainer(
wandb: Currently logged in as: yashsolanki6775 (yashsolanki6775-georgian-college) to https://api.wandb.ai. Use `wandb login --relogin` to force relogin


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


Step,Training Loss
10,52.6154
20,11.3606
30,6.181
40,1.9499
50,0.8412
60,0.4922
70,0.1647
80,0.0642
90,0.0173
100,0.0035


KeyboardInterrupt: 