In [None]:
%%capture
!pip install --upgrade pip
!pip install --upgrade torch
!pip install transformers trl accelerate peft datasets bitsandbytes huggingface_hub
!pip install "unsloth[cu121-torch240] @ git+https://github.com/unslothai/unsloth.git"

In [None]:
from huggingface_hub import notebook_login

notebook_login()

In [None]:
from unsloth import FastLanguageModel
import torch
from datasets import load_dataset
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

In [None]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/gemma-2-2b-it",
    max_seq_length = 4096,
    dtype = torch.bfloat16,
    load_in_4bit = False,
)

In [None]:
peft_model = FastLanguageModel.get_peft_model(
    model,
    r = 64, 
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj"],
    lora_alpha = 256,
    lora_dropout = 0, 
    bias = "none",    
    use_gradient_checkpointing = True, 
    random_state = 3407,
    use_rslora = False,  
    loftq_config = None,
)

In [None]:
def tool_call_format(system, tools, user, response):
    text = f"""<start_of_turn>user
                {system}
                <tools>
                {tools}
                </tools>
                {user}
                <end_of_turn>
                <start_of_turn>model
                <tool_call>
                {response}
                </tool_call>
                <end_of_turn>
            """
    return text


def rag_format(system, context, user, response):
    text = f"""<start_of_turn>user
                {system}
                <context>
                {context}
                </context>
                {user}
                <end_of_turn>
                <start_of_turn>model
                {response}
                <end_of_turn>
            """
    return text


def general_format(system, user, response):
    text = f"""<start_of_turn>user
                {system}
                {user}
                <end_of_turn>
                <start_of_turn>model
                {response}
                <end_of_turn>   
            """
    return text

In [None]:
EOS_TOKEN = tokenizer.eos_token

tool_call_system = "You are a helpful AI assistant that has to a set of tools listed between the <tools> xml tags that you may use to help the user. Only use them if the user query requires them. For each tool call return a json object with the name of the tool and its arguments surrounded by <tool_call> xml tags"
rag_system = "You are a helpful AI assistant and you should answer the user's query based on the provided context. Try to answer briefly and clearly. Make sure to derive your answer from the context as much as possible."
general_system = "You are a helpful AI assistant and you answer the user's questions accurately and clearly. For more complex questions you may write down your reasoning steps to avoid mistakes."

In [None]:
def formatting_prompts_func(examples):
    user = examples["user"]
    tools = examples["tools"]
    context = examples["context"]
    response = examples["response"]
    
    if tools:
        text = tool_call_format(tool_call_system, tools, user, response) + EOS_TOKEN
    elif context:
        text = rag_format(rag_system, context, user, response) + EOS_TOKEN
    else:
        text = general_format(general_system, user, response) + EOS_TOKEN
       
    return { "text" : text }

In [None]:
dataset = load_dataset("trishonc/agent-buff", split = "train")
dataset = dataset.map(formatting_prompts_func)
dataset = dataset.train_test_split(test_size=0.1, seed=42)

In [None]:
args = TrainingArguments(
  output_dir = "main",
  num_train_epochs = 1,
  per_device_train_batch_size = 4,
  gradient_accumulation_steps = 4,
  weight_decay = 1e-3,
  warmup_steps = 25,
  logging_steps = 10,
  logging_dir="logs",
  save_strategy = "steps",
  evaluation_strategy= "steps",
  eval_steps = 85,
  save_steps = 85,
  learning_rate = 1e-4,
  fp16 = not is_bfloat16_supported,
  bf16 = is_bfloat16_supported,
  lr_scheduler_type = 'cosine',
  seed = 3407, 
)

trainer = SFTTrainer(
  model=peft_model,
  max_seq_length = 4096,
  dataset_text_field = "text",
  tokenizer=tokenizer,
  packing=False,
  args=args,
  train_dataset=dataset['train'],
  eval_dataset=dataset['test'],
)

In [None]:
trainer_stats = trainer.train()

In [None]:
question = "Lee had $10 and his friend had $8. They went to a restaurant where they ordered chicken wings for $6 and a chicken salad for some amount. They also got 2 sodas for $1.00 each. The tax came to $3. They received $3 in change in total. How much did the chicken salad cost?"

FastLanguageModel.for_inference(model)
inputs = tokenizer([general_format(general_system, question, "")], return_tensors = "pt").to("cuda")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 500)

In [None]:
peft_model.save_pretrained("lora_model") 
tokenizer.save_pretrained("lora_model")

In [None]:
# model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",)
peft_model.push_to_hub_merged("trishonc/gemma-2-2b-it-buffed", tokenizer, save_method = "merged_16bit")