In [None]:
!pip install -U unsloth

In [None]:
from huggingface_hub import login
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

Token has not been saved to git credential helper.


In [1]:
from unsloth import FastLanguageModel
import torch

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/mistral-7b-v0.3", # Choose ANY! eg teknium/OpenHermes-2.5-Mistral-7B
    max_seq_length = 2048,
    dtype = None,
    load_in_4bit = True,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2024.12.2: Fast Mistral patching. Transformers:4.46.2.
   \\   /|    GPU: NVIDIA L4. Max memory: 21.951 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 8.9. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [2]:
from datasets import Dataset
import torch

# Load the dataset from CSV
dataset = Dataset.from_csv("/content/Query_response_with_responses.csv")
column_names = ['Unnamed: 0', 'article', 'abstract', 'section_names', 'query', 'response']
# Add special tokens if necessary
tokenizer.add_special_tokens({'pad_token': '[PAD]'})


# Define preprocess function
def preprocess_function(examples):
    inputs = [
        f"### Context:\n{context}\n\n### Query:\n{query}\n\n"
        for context, query in zip(examples["article"], examples["query"])
    ]
    tokenized = tokenizer(inputs,  truncation=True, padding="max_length", max_length=512)
    responses  = [f"### Response: \n{response}" for response in examples["response"]]
    tokenized_outputs = tokenizer(responses, truncation=True, padding = "max_length", max_length=512)
    input_ids = tokenized["input_ids"]
    labels = tokenized_outputs["input_ids"]

    # Mask all non-response tokens in labels
    # for i, input_ids_seq in enumerate(input_ids):
    #     response_start = inputs[i].find("### Response:\n") + len("### Response:\n")
    #     response_token_start = len(tokenizer.encode(inputs[i][:response_start], add_special_tokens=False))

    #     labels[i][:response_token_start] = [-100]  # Mask everything before the response
    # Mask padding tokens in labels
    for i in range(len(labels)):
        labels[i] = [
            token if token != tokenizer.pad_token_id else -100
            for token in labels[i]
        ]

    tokenized["labels"] = labels
    return tokenized

tokenized_datasets = dataset.map(preprocess_function, batched=True, remove_columns=column_names)

train_test_split = tokenized_datasets.train_test_split(test_size=0.8)

train_dataset = train_test_split["train"]
test_dataset = train_test_split["test"]

In [3]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 8, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

Unsloth 2024.12.2 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [4]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = train_dataset,
    max_seq_length = 2048,
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 8,
        gradient_accumulation_steps =1,
        warmup_steps = 5,
        num_train_epochs=5, # Set num_train_epochs = 1 for full training runs
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none", # Use this for WandB etc
    ),
)

In [5]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 40 | Num Epochs = 5
O^O/ \_/ \    Batch size per device = 8 | Gradient Accumulation steps = 1
\        /    Total batch size = 8 | Total steps = 25
 "-____-"     Number of trainable parameters = 20,971,520


Step,Training Loss
1,2.2397
2,2.0519
3,2.1937
4,2.1371
5,1.9713
6,1.9038
7,1.9351
8,1.9137
9,1.8516
10,1.5598


In [6]:
model.save_pretrained("./mistral-7b-qlora-finetuned")
tokenizer.save_pretrained("./mistral-7b-qlora-finetuned")

('./mistral-7b-qlora-finetuned/tokenizer_config.json',
 './mistral-7b-qlora-finetuned/special_tokens_map.json',
 './mistral-7b-qlora-finetuned/tokenizer.model',
 './mistral-7b-qlora-finetuned/added_tokens.json',
 './mistral-7b-qlora-finetuned/tokenizer.json')

In [7]:
# Install Required Libraries

import locale
locale.getpreferredencoding = lambda: "UTF-8"
!apt-get install git-lfs

# Login to Hugging Face
from huggingface_hub import notebook_login

notebook_login()

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
git-lfs is already the newest version (3.0.2-1ubuntu0.2).
0 upgraded, 0 newly installed, 0 to remove and 49 not upgraded.


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [9]:
# Create and Clone a Hugging Face Repository
from huggingface_hub import create_repo, Repository

# Replace with your Hugging Face username and desired repository name
repo_name = "mistral-7b-finetuned-query-response"
username = "yugdave"

create_repo(repo_name, token=True)

# Clone the repository
repo = Repository(local_dir="/content/mistral-7b-finetuned-query-response", clone_from=f"{username}/{repo_name}")

Cloning https://huggingface.co/yugdave/mistral-7b-finetuned-query-response into local empty directory.


In [10]:
!cp -r /content/mistral-7b-qlora-finetuned/* /content/mistral-7b-finetuned-query-response/

In [12]:
!git config --global user.email "yugdave578@utexas.edu"
!git config --global user.name "yugdave"
repo.push_to_hub(commit_message="Add my fine-tuned model")

Upload file adapter_model.safetensors:   0%|          | 1.00/80.1M [00:00<?, ?B/s]

Upload file tokenizer.json:   0%|          | 1.00/3.50M [00:00<?, ?B/s]

Upload file tokenizer.model:   0%|          | 1.00/574k [00:00<?, ?B/s]

To https://huggingface.co/yugdave/mistral-7b-finetuned-query-response
   1470cf6..68096f8  main -> main

   1470cf6..68096f8  main -> main



'https://huggingface.co/yugdave/mistral-7b-finetuned-query-response/commit/68096f859548e0ff7b1bb38d729b789738d7e6de'