In [1]:
!pip install -q datasets
!pip install -q transformers==4.30
!pip install -q peft
!pip install -q trl
!pip install -q accelerate -U
!pip install -q bitsandbytes

Collecting transformers==4.30
  Using cached transformers-4.30.0-py3-none-any.whl (7.2 MB)
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers==4.30)
  Using cached tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
Installing collected packages: tokenizers, transformers
  Attempting uninstall: tokenizers
    Found existing installation: tokenizers 0.15.2
    Uninstalling tokenizers-0.15.2:
      Successfully uninstalled tokenizers-0.15.2
  Attempting uninstall: transformers
    Found existing installation: transformers 4.39.3
    Uninstalling transformers-4.39.3:
      Successfully uninstalled transformers-4.39.3
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
trl 0.8.1 requires transformers>=4.31.0, but you have transformers 4.30.0 which is incompatible.[0m[31m
[0mSuccessfully installed tokenizers-0.13.3 

In [58]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [59]:
from datasets import load_dataset
from peft import get_peft_config, get_peft_model, LoraConfig, TaskType
from transformers import AutoModelForCausalLM, TrainingArguments, BitsAndBytesConfig

import torch

In [77]:
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.CAUSAL_LM,
)

dataset = load_dataset("bitext/Bitext-customer-support-llm-chatbot-training-dataset",  split="train")

In [78]:
ds = dataset.train_test_split(test_size=0.3)
ds

DatasetDict({
    train: Dataset({
        features: ['flags', 'instruction', 'category', 'intent', 'response'],
        num_rows: 18810
    })
    test: Dataset({
        features: ['flags', 'instruction', 'category', 'intent', 'response'],
        num_rows: 8062
    })
})

In [79]:
training_args = TrainingArguments(
    output_dir="/content/gdrive/MyDrive/llm/fine-tuned-fb-opt-125",
    dataloader_drop_last=True,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    num_train_epochs=20,
    logging_steps=5,
    per_device_train_batch_size=12,
    per_device_eval_batch_size=12,
    learning_rate=1e-4,
    lr_scheduler_type="cosine",
    warmup_steps=100,
    gradient_accumulation_steps=1,
    gradient_checkpointing=False,
    fp16=False,
    weight_decay=0.05,
    ddp_find_unused_parameters=False,
    run_name="fb-opt-125",
)

In [81]:
from transformers import AutoModelForQuestionAnswering, AutoTokenizer, AutoModelForCausalLM
import torch

nf4_config = BitsAndBytesConfig(
   load_in_8bit=True,
   bnb_4bit_quant_type="fp4",
   bnb_4bit_use_double_quant=True,
   bnb_4bit_compute_dtype=torch.float16
)

tokenizer = AutoTokenizer.from_pretrained("facebook/opt-125m")
model = AutoModelForCausalLM.from_pretrained("facebook/opt-125m", quantization_config=nf4_config)

`low_cpu_mem_usage` was None, now set to True since model is quantized.


In [82]:
from torch import nn

for param in model.parameters():
  param.requires_grad = False
  if param.ndim == 1:
    param.data = param.data.to(torch.float16)

model.gradient_checkpointing_enable()  # reduce number of stored activations
model.enable_input_require_grads()
class CastOutputToFloat(nn.Sequential):
  def forward(self, x): return super().forward(x).to(torch.float16)
model.lm_head = CastOutputToFloat(model.lm_head)

In [83]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [85]:
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()
model.to(device)

trainable params: 589,824 || all params: 125,829,120 || trainable%: 0.46875


PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): PeftModelForCausalLM(
      (base_model): LoraModel(
        (model): OPTForCausalLM(
          (model): OPTModel(
            (decoder): OPTDecoder(
              (embed_tokens): Embedding(50272, 768, padding_idx=1)
              (embed_positions): OPTLearnedPositionalEmbedding(2050, 768)
              (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
              (layers): ModuleList(
                (0-11): 12 x OPTDecoderLayer(
                  (self_attn): OPTAttention(
                    (k_proj): Linear8bitLt(in_features=768, out_features=768, bias=True)
                    (v_proj): lora.Linear8bitLt(
                      (base_layer): Linear8bitLt(in_features=768, out_features=768, bias=True)
                      (lora_dropout): ModuleDict(
                        (default): Dropout(p=0.05, inplace=False)
                      )
                      (lora_A): ModuleDict(
               

In [86]:
from trl import SFTTrainer
from trl.trainer import ConstantLengthDataset

def formatting_func(example):
    # print ("formatting_func : ", example)
    text = f"### Question: {example['instruction']}\n  ### Category: {example['category']}\n ### Answer: {example['response']}"
    return text

train_dataset = ConstantLengthDataset(
    tokenizer,
    ds['train'],
    formatting_func=formatting_func,
    seq_length=1024
)

eval_dataset = ConstantLengthDataset(
    tokenizer,
    ds['test'],
    formatting_func=formatting_func,
    seq_length=1024
)

trainer = SFTTrainer(
    model=model,
    args=training_args,
    max_seq_length=1024,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    peft_config=lora_config,
    packing=True,
)

print("Training...")
trainer.train()

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Training...


Epoch,Training Loss,Validation Loss
0,1.8006,1.709961
1,1.601,1.482422
2,1.4492,1.363281
3,1.373,1.291016
4,1.3068,1.241211
5,1.316,1.216797
6,1.3248,1.210938
7,1.251,1.177734
8,1.2443,1.150391
9,1.1934,1.130859


TrainOutput(global_step=4800, training_loss=1.2973309326171876, metrics={'train_runtime': 4976.2802, 'train_samples_per_second': 75.599, 'train_steps_per_second': 6.302, 'total_flos': 3.022538422616064e+16, 'train_loss': 1.2973309326171876, 'epoch': 19.15})

In [87]:
model = AutoModelForCausalLM.from_pretrained(
  "facebook/opt-125m", return_dict=True, torch_dtype=torch.bfloat16
)

# Load the LoRA adaptors
from peft import PeftModel

# Load the Lora model
model = PeftModel.from_pretrained(model, "/content/gdrive/MyDrive/llm/fine-tuned-fb-opt-125/checkpoint-4800/")
model.eval()
model = model.merge_and_unload()

# Save for future use
model.save_pretrained("/content/gdrive/MyDrive/llm/fine-tuned-fb-opt-125/merged")
model = model.to(device)

In [106]:
inputs = tokenizer("I am trying to cancel a purchase order no. 1234.", return_tensors="pt").to(device)
generation_output = model.generate(**inputs,
                                    return_dict_in_generate=True,
                                    output_scores=True,
                                    max_length=100,
                                    num_beams=1,
                                    do_sample=True,
                                    repetition_penalty=1.5,
                                    length_penalty=2.)

In [107]:
generation_output
print( tokenizer.decode(generation_output['sequences'][0]) )

</s>I am trying to cancel a purchase order no. 1234. Is there an option for me? That was the last time I went and it has been getting cancelled too :(
Just go do this!  You can get them at any WalMart, Target OR your local grocery store - but that is on top of their clearance specials!! :) Just look under my items & search "Walmart offers discounts," which you would find if anyone actually looked up what they are giving away/affiliate
