In [2]:
 %%capture
 !pip install transformers trl accelerate torch peft datasets bitsandbytes

In [3]:
import torch

In [None]:
%%capture
!pip install "unsloth[cu121-ampere-torch220] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps "xformers<0.0.26"

In [None]:
!pip install huggingface_hub -q -U
from huggingface_hub import notebook_login

notebook_login()

In [4]:
from unsloth import FastLanguageModel

base_model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "meta-llama/Meta-Llama-3-8B-Instruct", 
    max_seq_length = 3000,
    dtype = torch.bfloat16,
    load_in_4bit = False,
)

tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [None]:
peft_model = FastLanguageModel.get_peft_model(
    base_model,
    r = 64,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 256,
    lora_dropout = 0, 
    bias = "none",   
    use_gradient_checkpointing = True,
    random_state = 3407, 
)

In [8]:
from datasets import load_dataset

prompt_template = '''
<|begin_of_text|><|start_header_id|>system<|end_header_id|>

{}<|eot_id|><|start_header_id|>user<|end_header_id|>

{}<|eot_id|><|start_header_id|>assistant<|end_header_id|>

{}
'''

EOS_TOKEN = tokenizer.eos_token

def preprocess_dataset(example):
    system_message = example['system_message']
    instruction = example['instruction']
    response = example['response']
    
    prompt = prompt_template.format(system_message, instruction, response) + EOS_TOKEN
    
    return {'text': prompt}

In [9]:
dataset = load_dataset('csv', data_files='./dataset.csv')['train']
dataset = dataset.map(preprocess_dataset)
dataset = dataset.train_test_split(test_size=0.1, seed=42)

In [None]:
from transformers import TrainingArguments
from trl import SFTTrainer

args = TrainingArguments(
  output_dir = "main",
  num_train_epochs = 1,
  #max_steps = 200, 
  per_device_train_batch_size = 4,
  weight_decay = 1e-3,
  warmup_steps = 0.01,
  logging_steps = 10,
  logging_dir="logs",
  save_strategy = "steps",
  evaluation_strategy= "steps",
  eval_steps = 1505,
  save_steps = 1505,
  learning_rate = 1e-4,
  bf16 = True,
  lr_scheduler_type = 'cosine',
  seed = 3407, 
)

trainer = SFTTrainer(
  model=peft_model,
  max_seq_length = 3000,
  dataset_text_field = "text",
  tokenizer=tokenizer,
  packing=False,
  args=args,
  train_dataset=dataset['train'],
  eval_dataset=dataset['test'],
)

In [None]:
trainer_stats = trainer.train()

In [None]:
from transformers import TextStreamer

msg = "You are a song writing assistant. You always write good lyrics based on the context provided."
input = "Create a rap track focused on the success, money, struggle"
output = ""


inputs = tokenizer(
[
    prompt_template.format(msg, input, output)
]*1, return_tensors = "pt").to("cuda")


text_streamer = TextStreamer(tokenizer)
outputs = peft_model.generate(
    **inputs, 
    streamer=text_streamer, 
    max_new_tokens=128,
    temperature=0.75, 
    do_sample=True,   
    top_k=50, 
    top_p=0.9,
    repetition_penalty=1.2,
)


In [None]:
peft_model.save_pretrained("lora_model")
tokenizer.save_pretrained("lora")

In [None]:
peft_model.push_to_hub_merged("trishonc/llama3-8b-rap", tokenizer, save_method = "merged_16bit")