In [None]:
# !pip install pandas datasets torch==2.3.0
# !pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
# !pip install xformers
# !pip install --no-deps xformers "trl<0.9.0" peft accelerate bitsandbytes
# !pip uninstall transformers -y && pip install --upgrade --no-cache-dir "git+https://github.com/huggingface/transformers.git"
# !pip install protobuf==3.20

In [None]:
import ast, time, tqdm, pandas as pd
from datasets import Dataset

import torch

from trl import SFTTrainer
from transformers import TrainingArguments

from unsloth.chat_templates import get_chat_template
from unsloth import FastLanguageModel, is_bfloat16_supported

In [None]:
train_file = './train_set.csv'
validation_file = './dev_set.csv'

In [None]:
train_in_4_bit = True
model_name = "unsloth/Llama-3.2-3B-Instruct"

max_seq_length = 4096

if train_in_4_bit:
    model, tokenizer = FastLanguageModel.from_pretrained(model_name=model_name, max_seq_length=max_seq_length, load_in_4bit=True, dtype=None)
else:
    model, tokenizer = FastLanguageModel.from_pretrained(model_name=model_name, max_seq_length=max_seq_length, load_in_4bit=False, dtype=torch.float16)

model = FastLanguageModel.get_peft_model(model, r=16, lora_alpha=16, lora_dropout=0, target_modules=["q_proj", "k_proj", "v_proj", "up_proj", "down_proj", "o_proj", "gate_proj"], use_rslora=True, use_gradient_checkpointing="unsloth", random_state=5000)

In [None]:
tokenizer = get_chat_template(tokenizer, mapping={"role": "from", "content": "value", "user": "human", "assistant": "gpt"}, chat_template="chatml")

def apply_template(examples):
    messages = examples["conversations"]
    text = [tokenizer.apply_chat_template(message, tokenize=False, add_generation_prompt=False) for message in messages]
    return {"text": text}


dataset = pd.read_csv(train_file)
dataset['conversations'] = dataset['conversations'].apply(lambda x: ast.literal_eval(x))
dataset = Dataset.from_pandas(dataset)
dataset = dataset.map(apply_template, batched=True)

In [None]:
for i in dataset:
    print(i['text'])
    break

In [None]:
if train_in_4_bit:
    arg_fp16 = not is_bfloat16_supported()
    arg_bf16 = is_bfloat16_supported()
else:
    arg_fp16 = True
    arg_bf16 = False

trainer = SFTTrainer(
    model=model, tokenizer=tokenizer, train_dataset=dataset, dataset_text_field="text", max_seq_length=max_seq_length, dataset_num_proc=2, packing=True,
    args=TrainingArguments(
        learning_rate=2e-4,
        lr_scheduler_type="linear",
        per_device_train_batch_size=2,
        num_train_epochs=3,
        fp16=arg_fp16,
        bf16=arg_bf16,
        logging_steps=1,
        optim="adamw_8bit",
        weight_decay=0.01,
        warmup_steps=10,
        output_dir="output",
        seed=0
    )
)
trainer.train()

In [None]:
df = pd.read_csv(validation_file)
df['conversations'] = df['conversations'].apply(lambda x: ast.literal_eval(x))
df['prediction'] = ''

model = FastLanguageModel.for_inference(model)
for idx, row in tqdm.tqdm(df.iterrows(), desc='Evaluating Dev Set', total=df.shape[0]):
    prompt = [row['conversations'][0]]
    inputs = tokenizer.apply_chat_template(
        prompt,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt",
    ).to("cuda")

    response = model.generate(input_ids=inputs, max_new_tokens=512, use_cache=True)
    y_pred = tokenizer.decode(response[0], skip_special_tokens=True)
    
    df['prediction'][idx] = y_pred

df.to_csv('dev_set_finetuned.csv', index=False)

In [None]:
model.save_pretrained_gguf("model", tokenizer, quantization_method="f16")