In [None]:
%%capture
!pip install --upgrade pip
!pip install transformers torch trl accelerate peft datasets bitsandbytes huggingface_hub

In [None]:
from huggingface_hub import notebook_login

notebook_login()

In [1]:
import torch
from datasets import load_dataset
from trl import SFTTrainer
from transformers import TrainingArguments, AutoModelForCausalLM, AutoTokenizer
from peft import get_peft_model, LoraConfig

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    pretrained_model_name_or_path = "google/gemma-2-2b-it",
    torch_dtype=torch.bfloat16,
    attn_implementation='eager',
    device_map="cuda:0"
)
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-2b-it")

In [5]:
lora_config = LoraConfig(
        r=64,
        lora_alpha=256,
        lora_dropout=0,
        target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
        bias="none",
        task_type="CAUSAL_LM",
    )

In [6]:
peft_model = get_peft_model(model, lora_config)

In [7]:
def tool_call_format(system, tools, user, response):
    text = f"""<start_of_turn>user
                {system}
                <tools>
                {tools}
                </tools>
                {user}
                <end_of_turn>
                <start_of_turn>model
                <tool_call>
                {response}
                </tool_call>
                <end_of_turn>
            """
    return text


def rag_format(system, context, user, response):
    text = f"""<start_of_turn>user
                {system}
                <context>
                {context}
                </context>
                {user}
                <end_of_turn>
                <start_of_turn>model
                {response}
                <end_of_turn>
            """
    return text


def general_format(system, user, response):
    text = f"""<start_of_turn>user
                {system}
                {user}
                <end_of_turn>
                <start_of_turn>model
                {response}
                <end_of_turn>
            """
    return text

In [8]:
EOS_TOKEN = tokenizer.eos_token

tool_call_system = "You are a helpful AI assistant that has to a set of tools listed between the <tools> xml tags that you may use to help the user. Only use them if the user query requires them. For each tool call return a json object with the name of the tool and its arguments surrounded by <tool_call> xml tags"
rag_system = "You are a helpful AI assistant and you should answer the user's query based on the provided context. Try to answer briefly and clearly. Make sure to derive your answer from the context as much as possible."
general_system = "You are a helpful AI assistant and you answer the user's questions accurately and clearly. For more complex questions you may write down your reasoning steps to avoid mistakes."

In [9]:
def formatting_prompts_func(examples):
    user = examples["user"]
    tools = examples["tools"]
    context = examples["context"]
    response = examples["response"]
    
    if tools:
        text = tool_call_format(tool_call_system, tools, user, response) + EOS_TOKEN
    elif context:
        text = rag_format(rag_system, context, user, response) + EOS_TOKEN
    else:
        text = general_format(general_system, user, response) + EOS_TOKEN
       
    return { "text" : text }

In [None]:
dataset = load_dataset("trishonc/agent-buff", split = "train")
dataset = dataset.map(formatting_prompts_func)
dataset = dataset.train_test_split(test_size=0.1, seed=42)

In [None]:
args = TrainingArguments(
  output_dir = "main",
  num_train_epochs = 1,
  per_device_train_batch_size = 1,
  weight_decay = 1e-3,
  warmup_steps = 50,
  logging_steps = 10,
  logging_dir="logs",
  save_strategy = "steps",
  eval_strategy= "steps",
  eval_steps = 300,
  save_steps = 300,
  learning_rate = 1e-4,
  bf16 = True,
  lr_scheduler_type = 'cosine',
  seed = 3407, 
)

trainer = SFTTrainer(
  model=peft_model,
  max_seq_length = 4096,
  dataset_text_field = "text",
  tokenizer=tokenizer,
  packing=False,
  args=args,
  train_dataset=dataset['train'],
  eval_dataset=dataset['test'],
)

In [None]:
trainer_stats = trainer.train()

In [None]:
gemma_prompt = """<start_of_turn>user
{}
<end_of_turn>
<start_of_turn>model
"""

prompt = "Fifteen sheets of colored tape, each 25 centimeters (cm) long, were joined together with an overlap of 0.5 centimeters (cm). How many meters (m) is the total length of the continuous color tape?"


inputs = tokenizer(
[
    gemma_prompt.format(
        general_system + prompt, 
        "",
    )
], return_tensors = "pt").to("cuda")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)
_ = peft_model.generate(**inputs, streamer = text_streamer, max_new_tokens = 500)

In [None]:
peft_model.save_pretrained("lora_model") 
tokenizer.save_pretrained("lora_model")

In [None]:
merged_model = peft_model.merge_and_unload()

In [None]:
merged_model.save_pretrained("./merged_model")
tokenizer.save_pretrained("./merged_model")

In [None]:
from huggingface_hub import HfApi

api = HfApi()
repo_name = "trishonc/gemma-2-2b-it-buffed"

api.upload_folder(
    folder_path="./merged_model",
    repo_id=repo_name,
    repo_type="model",
)