In [None]:
!pip install -U transformers
!pip install -U bitsandbytes

In [None]:
!pip install datasets

In [None]:
from huggingface_hub import login
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, EarlyStoppingCallback
from peft import LoraConfig, get_peft_model
from trl import SFTTrainer

# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-Coder-1.5B-Instruct")
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-Coder-1.5B-Instruct", load_in_4bit=True, device_map="auto")

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


In [None]:
from datasets import Dataset
import torch

# Load the dataset from CSV
dataset = Dataset.from_csv("/content/Query_response_with_specific_questions.csv")
column_names = ['Unnamed: 0', 'article', 'abstract', 'section_names', 'query', 'response']
# Add special tokens if necessary
tokenizer.add_special_tokens({'pad_token': '[PAD]'})


# Define preprocess function
def preprocess_function(examples):
    inputs = [
        f"### Context:\n{context}\n\n### Query:\n{query}\n\n"
        for context, query in zip(examples["article"], examples["query"])
    ]
    tokenized = tokenizer(inputs,  truncation=True, padding="max_length", max_length=512)
    responses  = [f"### Response: \n{response}" for response in examples["response"]]
    tokenized_outputs = tokenizer(responses, truncation=True, padding = "max_length", max_length=512)
    input_ids = tokenized["input_ids"]
    labels = tokenized_outputs["input_ids"]

    # Mask all non-response tokens in labels
    # for i, input_ids_seq in enumerate(input_ids):
    #     response_start = inputs[i].find("### Response:\n") + len("### Response:\n")
    #     response_token_start = len(tokenizer.encode(inputs[i][:response_start], add_special_tokens=False))

    #     labels[i][:response_token_start] = [-100]  # Mask everything before the response
    # Mask padding tokens in labels
    for i in range(len(labels)):
        labels[i] = [
            token if token != tokenizer.pad_token_id else -100
            for token in labels[i]
        ]

    tokenized["labels"] = labels
    return tokenized

tokenized_datasets = dataset.map(preprocess_function, batched=True, remove_columns=column_names)

train_test_split = tokenized_datasets.train_test_split(test_size=0.2)

train_dataset = train_test_split["train"]
test_dataset = train_test_split["test"]

In [None]:
inputs = {
    key: torch.tensor(train_dataset[0][key]).unsqueeze(0).to("cuda")  # Add batch dimension
    for key in ["input_ids", "attention_mask", "labels"]
}
print(f"Labels shape: {inputs['labels'].shape}")
print(f"Labels dtype: {inputs['labels'].dtype}")

Labels shape: torch.Size([1, 512])
Labels dtype: torch.int64


In [None]:
# Define the QLoRA configuration
lora_config = LoraConfig(
    r=8,  # Reduced rank for smaller models
    lora_alpha=16,  # Adjusted scaling
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],  # Verify these with your model
    lora_dropout=0.05,  # Lower dropout for smaller models
    bias="none",
    task_type="CAUSAL_LM"
)

# Apply QLoRA to the model
model = get_peft_model(model, lora_config)
model.train()

# Define training arguments
training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    logging_dir="./logs",
    logging_steps=10,
    save_strategy="epoch",
    evaluation_strategy="epoch",
    learning_rate=5e-5,
    fp16=True,
    load_best_model_at_end=True# Use mixed precision if supported
)

trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    tokenizer=tokenizer,
    peft_config=lora_config,
    max_seq_length=1024,
    packing=True,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=1)]
)

# Train the model
trainer.train()

# Save the fine-tuned model
model.save_pretrained("./mistral-7b-qlora-finetuned")
tokenizer.save_pretrained("./mistral-7b-qlora-finetuned")

  trainer = Trainer(

Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33myugdave578[0m ([33myugdave578-city-of-austin[0m). Use [1m`wandb login --relogin`[0m to force relogin




Epoch,Training Loss,Validation Loss
1,6.6629,6.629179
2,6.0263,6.099248
3,5.9328,5.954483


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


('./mistral-7b-qlora-finetuned/tokenizer_config.json',
 './mistral-7b-qlora-finetuned/special_tokens_map.json',
 './mistral-7b-qlora-finetuned/vocab.json',
 './mistral-7b-qlora-finetuned/merges.txt',
 './mistral-7b-qlora-finetuned/added_tokens.json',
 './mistral-7b-qlora-finetuned/tokenizer.json')

In [None]:
import torch

torch.cuda.empty_cache()
del model
del trainer

import gc
gc.collect



<function gc.collect(generation=2)>

In [None]:
import locale
locale.getpreferredencoding = lambda: "UTF-8"
!pip install unsloth
# Also get the latest nightly Unsloth!
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git

Collecting unsloth
  Using cached unsloth-2024.11.11-py3-none-any.whl.metadata (58 kB)
Using cached unsloth-2024.11.11-py3-none-any.whl (167 kB)
Installing collected packages: unsloth
Successfully installed unsloth-2024.11.11
Found existing installation: unsloth 2024.11.11
Uninstalling unsloth-2024.11.11:
  Successfully uninstalled unsloth-2024.11.11
Collecting git+https://github.com/unslothai/unsloth.git
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-req-build-casitdjm
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-req-build-casitdjm
  Resolved https://github.com/unslothai/unsloth.git to commit 8558bc92b06f9128499484ef737fa71b966ffc23
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: unsloth
  Building wheel for unsloth (pyproject.toml) ... [?25l[?25hd

In [None]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/mistral-7b-v0.3", # Choose ANY! eg teknium/OpenHermes-2.5-Mistral-7B
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)