In [None]:
!pip install -q accelerate peft bitsandbytes transformers trl

In [None]:
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments
from trl import SFTTrainer
from peft import LoraConfig, get_peft_model, PeftModel
import torch

In [None]:
data = load_dataset("json", data_files="output.completion.jsonl", split="train")
print(data[0])

In [None]:
bnb = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type='nf4',
                         bnb_4bit_compute_dtype='float16', bnb_4bit_use_double_quant=False)

model = AutoModelForCausalLM.from_pretrained('facebook/opt-1.3b',
                                             quantization_config=bnb,
                                             device_map={"":0})

model.config.use_cache=False
model.config.pretraining_tp=1

In [None]:
tokenizer = AutoTokenizer.from_pretrained('facebook/opt-1.3b', trust_remote_code=True,
                                          use_fast=True)
tokenizer.pad_token=tokenizer.eos_token

def preprocess(line):
    line['text'] = f"Prompt: {line['prompt']} --- Completion: {line['completion']}"
    del line['prompt'], line['completion']
    return line

data = data.map(preprocess)
print(data)

In [None]:
loraconfig = LoraConfig(lora_alpha=0.5, lora_dropout=0.1, r=16,
                        target_modules=['k_proj', 'v_proj', 'q_proj'], task_type='CAUSAL_LM', bias='none')

In [None]:
loramodel = get_peft_model(model, loraconfig)

In [None]:
train_args = TrainingArguments(output_dir='.',
                               num_train_epochs=1,
                               per_device_train_batch_size=2,
                               gradient_accumulation_steps=1,
                               optim='adamw_torch',
                               save_steps=0,
                               logging_steps=10,
                               learning_rate=0.03,
                               weight_decay=0.001,
                               fp16=False,
                               bf16=False,
                               max_grad_norm=0.3,
                               max_steps=-1,
                               warmup_ratio=0.3,
                               group_by_length=True,
                               lr_scheduler_type='cosine',
                               report_to='none')

In [None]:
trainer = SFTTrainer(model=loramodel,
                     train_dataset=data,
                     tokenizer=tokenizer,
                     args=train_args)

In [None]:
trainer.train()

In [None]:
trainer.model.save_pretrained('new_model')

baseline = AutoModelForCausalLM.from_pretrained('facebook/opt-1.3b')
#finetuned = PeftModel.from_pretrained(baseline, 'new_model')
#finetuned = finetuned.merge_and_unload()
finetuned_model = PeftModel.from_pretrained(AutoModelForCausalLM.from_pretrained('facebook/opt-1.3b'), 'new_model')
#finetuned.save_pretrained('.')

In [None]:
print(baseline.config)

In [None]:
from transformers import pipeline
tokenizer = AutoTokenizer.from_pretrained('facebook/opt-1.3b')

In [None]:
text='Where is Atlanta?'
tokenized = tokenizer(text, return_tensors='pt', padding=True)
output = finetuned_model.generate(tokenized.input_ids)
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
print(generated_text)

In [None]:
del model, trainer
torch.cuda.empty_cache()