# Lib install

In [1]:
# !pip install -q bitsandbytes accelerate transformers peft datasets

# # After this cell, restart runtime:
# # Runtime > Restart session

# Import

In [1]:
import torch
from datasets import load_dataset
from transformers import (
    AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig,
    DataCollatorForLanguageModeling, TrainingArguments, Trainer
)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
from huggingface_hub import snapshot_download

local_model = snapshot_download(
    repo_id="Qwen/Qwen3-8B",
    cache_dir="model/Qwen3-8B/huggingface_cache",
    resume_download=True,
    force_download=False
)



In [None]:
torch.backends.cuda.matmul.allow_tf32 = True
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

MODEL_NAME = local_model # "Qwen/Qwen3-8B"
MAX_LEN = 256  # reduced for T4 safety

# Tokenizer

In [None]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# Dataset

In [None]:
# data = load_dataset("json", data_files={
#     "train": "/content/train.jsonl",
#     "test": "/content/test.jsonl"
# })

# # If you only have one file:
# # data = load_dataset("json", data_files={"all": "/content/aura_130_messages.jsonl"})
# # data = data["all"].train_test_split(test_size=0.1, seed=42)

data = load_dataset("Yodhasu04/prethesis_dataset")

def merge_messages(messages):
    # Flatten into single string
    return "\n".join(f"{m['role'].upper()}: {m['content']}" for m in messages)

def tokenize_function(examples):
    texts = [merge_messages(m) for m in examples["messages"]]
    tokenizer.truncation_side = "left"
    return tokenizer(
        texts,
        truncation=True,
        max_length=MAX_LEN,
        padding="max_length"
    )

tokenized = data.map(tokenize_function, batched=True, remove_columns=["messages"])

Map:   0%|          | 0/8 [00:00<?, ? examples/s]

# Load Qwen in 4 bit and prepare LoRA

In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,  # fp16 for T4
    bnb_4bit_use_double_quant=True
)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map="auto",
    quantization_config=bnb_config,
    trust_remote_code=False,
)

model.config.use_cache = False
model.gradient_checkpointing_enable(gradient_checkpointing_kwargs={"use_reentrant": False})
model = prepare_model_for_kbit_training(model)



Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]