In [1]:
# !pip install pandas datasets torch==2.3.0
# !pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
# !pip install xformers
# !pip install --no-deps xformers "trl<0.9.0" peft accelerate bitsandbytes
# !pip uninstall transformers -y && pip install --upgrade --no-cache-dir "git+https://github.com/huggingface/transformers.git"
# !pip install protobuf==3.20

In [2]:
import ast, time, tqdm, pandas as pd
from datasets import Dataset

import torch

from trl import SFTTrainer
from transformers import TrainingArguments

from unsloth.chat_templates import get_chat_template
from unsloth import FastLanguageModel, is_bfloat16_supported

2025-04-14 20:34:08.588914: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1744680848.617381   50757 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1744680848.626912   50757 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1744680848.651950   50757 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1744680848.651984   50757 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1744680848.651988   50757 computation_placer.cc:177] computation placer alr

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


In [3]:
train_file = './train_set.csv'
validation_file = './dev_set.csv'

In [4]:
train_in_4_bit = True
model_name = "unsloth/Llama-3.2-3B-Instruct"

max_seq_length = 4096

if train_in_4_bit:
    model, tokenizer = FastLanguageModel.from_pretrained(model_name=model_name, max_seq_length=max_seq_length, load_in_4bit=True, dtype=None)
else:
    model, tokenizer = FastLanguageModel.from_pretrained(model_name=model_name, max_seq_length=max_seq_length, load_in_4bit=False, dtype=torch.float16)

model = FastLanguageModel.get_peft_model(model, r=16, lora_alpha=16, lora_dropout=0, target_modules=["q_proj", "k_proj", "v_proj", "up_proj", "down_proj", "o_proj", "gate_proj"], use_rslora=True, use_gradient_checkpointing="unsloth", random_state=5000)

==((====))==  Unsloth 2024.10.7: Fast Llama patching. Transformers = 4.45.0.
   \\   /|    GPU: NVIDIA GeForce RTX 2080 Ti. Max memory: 10.747 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.3.0+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.26.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth


Unsloth: We fixed a gradient accumulation bug, but it seems like you don't have the latest transformers version!
Please update transformers, TRL and unsloth via:
`pip install --upgrade --no-cache-dir unsloth git+https://github.com/huggingface/transformers.git git+https://github.com/huggingface/trl.git`
Unsloth 2024.10.7 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.


In [None]:
tokenizer = get_chat_template(tokenizer, mapping={"role": "from", "content": "value", "user": "human", "assistant": "gpt"}, chat_template="chatml")

def apply_template(examples):
    messages = examples["conversations"]
    text = [tokenizer.apply_chat_template(message, tokenize=False, add_generation_prompt=False) for message in messages]
    return {"text": text}


dataset = pd.read_csv(train_file)
dataset['conversations'] = dataset['conversations'].apply(lambda x: ast.literal_eval(x))
dataset = Dataset.from_pandas(dataset)
dataset = dataset.map(apply_template, batched=True)

In [6]:
for i in dataset:
    print(i['text'])
    break

<|im_start|>user
Based on the SQL db schema given below, you have to answer the question that follows it. Your answer should be a valid, correct SQL query. You are provided with a HINT to generate the SQL query.

SCHEMA: Tables in the database student_loan: bool, person, disabled, enlist, filed_for_bankrupcy, longest_absense_from_school, male, no_payment_due, unemployed, enrolled
-------------------------
CREATE query for table: bool

CREATE TABLE bool ( "name" TEXT default '' not null primary key )
-------------------------
CREATE query for table: person

CREATE TABLE person ( "name" TEXT default '' not null primary key )
-------------------------
CREATE query for table: disabled

CREATE TABLE disabled ( "name" TEXT default '' not null primary key, foreign key ("name") references person ("name") on update cascade on delete cascade )
-------------------------
CREATE query for table: enlist

CREATE TABLE enlist ( "name" TEXT not null, organ TEXT not null, foreign key ("name") references

In [7]:
if train_in_4_bit:
    arg_fp16 = not is_bfloat16_supported()
    arg_bf16 = is_bfloat16_supported()
else:
    arg_fp16 = True
    arg_bf16 = False

trainer = SFTTrainer(
    model=model, tokenizer=tokenizer, train_dataset=dataset, dataset_text_field="text", max_seq_length=max_seq_length, dataset_num_proc=2, packing=True,
    args=TrainingArguments(
        learning_rate=2e-4,
        lr_scheduler_type="linear",
        per_device_train_batch_size=2,
        num_train_epochs=3,
        fp16=arg_fp16,
        bf16=arg_bf16,
        logging_steps=1,
        optim="adamw_8bit",
        weight_decay=0.01,
        warmup_steps=10,
        output_dir="output",
        seed=0
    )
)
trainer.train()

Detected kernel version 3.10.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 2,434 | Num Epochs = 3
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 1
\        /    Total batch size = 2 | Total steps = 3,651
 "-____-"     Number of trainable parameters = 24,313,856


Step,Training Loss
1,0.8879
2,0.8052
3,0.6325
4,0.8896
5,0.8136
6,0.9386
7,0.8027
8,0.9902
9,0.7438
10,0.6722



Invalid credentials in Authorization header - silently ignoring the lookup for the file config.json in unsloth/llama-3.2-3b-instruct-bnb-4bit.

Invalid credentials in Authorization header - silently ignoring the lookup for the file config.json in unsloth/llama-3.2-3b-instruct-bnb-4bit.

Invalid credentials in Authorization header - silently ignoring the lookup for the file config.json in unsloth/llama-3.2-3b-instruct-bnb-4bit.

Invalid credentials in Authorization header - silently ignoring the lookup for the file config.json in unsloth/llama-3.2-3b-instruct-bnb-4bit.

Invalid credentials in Authorization header - silently ignoring the lookup for the file config.json in unsloth/llama-3.2-3b-instruct-bnb-4bit.

Invalid credentials in Authorization header - silently ignoring the lookup for the file config.json in unsloth/llama-3.2-3b-instruct-bnb-4bit.

Invalid credentials in Authorization header - silently ignoring the lookup for the file config.json in unsloth/llama-3.2-3b-instruct-bn

TrainOutput(global_step=3651, training_loss=0.06656997408997829, metrics={'train_runtime': 19343.1558, 'train_samples_per_second': 0.377, 'train_steps_per_second': 0.189, 'total_flos': 5.809038702516634e+17, 'train_loss': 0.06656997408997829, 'epoch': 3.0})

In [None]:
df = pd.read_csv(validation_file)
df['conversations'] = df['conversations'].apply(lambda x: ast.literal_eval(x))
df['prediction'] = ''

model = FastLanguageModel.for_inference(model)
for idx, row in tqdm.tqdm(df.iterrows(), desc='Evaluating Dev Set', total=df.shape[0]):
    prompt = [row['conversations'][0]]
    inputs = tokenizer.apply_chat_template(
        prompt,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt",
    ).to("cuda")

    response = model.generate(input_ids=inputs, max_new_tokens=512, use_cache=True)
    y_pred = tokenizer.decode(response[0], skip_special_tokens=True)
    
    df['prediction'][idx] = y_pred

df.to_csv('dev_set_finetuned.csv', index=False)

In [None]:
model.save_pretrained_gguf("model", tokenizer, quantization_method="f16")