In [1]:
%pip install -q transformers datasets accelerate peft huggingface_hub

Note: you may need to restart the kernel to use updated packages.


In [2]:
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments,
    Trainer,
    DataCollatorWithPadding
)
from peft import LoraConfig, get_peft_model
import torch

In [2]:
dataset = load_dataset("Kaeyze/computer-science-synthetic-dataset")

In [3]:
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-0.5B")

In [None]:
def preprocess_function(examples):
    combined_texts = [inp + " " + out for inp, out in zip(examples['input'], examples['output'])]
    tokenized = tokenizer(
        combined_texts, 
        padding="max_length", 
        truncation=True, 
        max_length=64 
    )
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

In [5]:
tokenized_dataset = dataset.map(
    preprocess_function, 
    batched=True, 
    num_proc=4, 
    remove_columns=dataset["train"].column_names
)

In [None]:
train_test_split = tokenized_dataset["train"].train_test_split(test_size=0.2, seed=42)
train_dataset = train_test_split["train"].select(range(5000)) 
eval_dataset = train_test_split["test"].select(range(1000)) 

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    "Qwen/Qwen2.5-0.5B", 
    device_map="cpu",
    torch_dtype=torch.float32
)


In [None]:
lora_config = LoraConfig(
    r=1,                           
    lora_alpha=4,                 
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,             
)


In [None]:
model = get_peft_model(model, lora_config)
for name, param in model.named_parameters():
    param.requires_grad = "lora" in name

model.config.use_cache = False
model.train()

The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable.


PeftModel(
  (base_model): LoraModel(
    (model): Qwen2ForCausalLM(
      (model): Qwen2Model(
        (embed_tokens): Embedding(151936, 896)
        (layers): ModuleList(
          (0-23): 24 x Qwen2DecoderLayer(
            (self_attn): Qwen2Attention(
              (q_proj): lora.Linear(
                (base_layer): Linear(in_features=896, out_features=896, bias=True)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=896, out_features=1, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=1, out_features=896, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): Linear(in_features=896, out_fe

In [None]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    shift_logits = torch.as_tensor(logits).permute(0, 2, 1)
    shift_labels = torch.as_tensor(labels)[:, 1:].contiguous()
    shift_logits = shift_logits[:, :-1, :].contiguous()
    loss_fct = torch.nn.CrossEntropyLoss(ignore_index=-100)
    loss = loss_fct(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))
    return {"eval_loss": loss.item()}

In [None]:
training_args = TrainingArguments(
    output_dir="./qwen_lora_finetuned",
    per_device_train_batch_size=6,
    gradient_accumulation_steps=4,
    learning_rate=3e-5,
    num_train_epochs=3,
    save_total_limit=1,
    logging_strategy="steps",
    logging_steps=100,
    eval_strategy="no", 
    save_strategy="epoch",
    report_to="none",
    fp16=False,          
    dataloader_num_workers=2,
    remove_unused_columns=True,
    use_cpu=True,
    eval_accumulation_steps=6 
)

In [25]:
data_collator = DataCollatorWithPadding(tokenizer, padding="max_length", max_length=64)

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset, 
    compute_metrics=compute_metrics,
    data_collator=data_collator,
)

In [26]:
trainer.train()

Step,Training Loss


KeyboardInterrupt: 

In [None]:
eval_results = trainer.evaluate()
print("Final Evaluation Results:", eval_results)