# Phi3 Fine Tuning
- LoRA 방식 파인튜닝

In [None]:
!pip install -qqq --upgrade bitsandbytes transformers peft accelerate datasets trl flash_attn wandb

In [None]:
!pip install huggingface_hub

!pip install python-dotenv

In [None]:
!pip install absl-py nltk rouge_score

In [None]:
!pip list | grep transformers.

In [None]:
from random import randrange

import torch
from datasets import load_dataset
from peft import LoraConfig, prepare_model_for_kbit_training, TaskType, PeftModel

from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    set_seed,
    pipeline
)

from trl import SFTTrainer

In [None]:
# Setting Global Parameters

model_id = "microsoft/Phi-3-mini-4k-instruct"
model_name = "microsoft/Phi-3-mini-4k-instruct"

dataset_name = "oz1115/korea_summary_thesis"

dataset_split= "train"

new_model = "phi3_fine_tuning"

hf_model_repo="wonik-hi/"+new_model

device_map = {"": 0}

lora_r = 16
lora_alpha = 16
lora_dropout = 0.05
target_modules= ['k_proj', 'q_proj', 'v_proj', 'o_proj', "gate_proj", "down_proj", "up_proj"]
set_seed(1234)

In [None]:
# Huggingface Hug 연결
from huggingface_hub import notebook_login
notebook_login()

In [None]:
from huggingface_hub import login
from dotenv import load_dotenv
import os

load_dotenv()

login(token=os.getenv("HF_HUB_TOKEN"))

In [None]:
# Dataset Load

dataset = load_dataset(dataset_name, split=dataset_split)

print(f"dataset size: {len(dataset)}")
print(dataset[randrange(len(dataset))])

In [None]:
# 실제 훈련 시에는 삭제
dataset = dataset.select(range(5000))

In [None]:
dataset

In [None]:
print(dataset[randrange(len(dataset))])

In [None]:
# Load the tokeninzer to prepare the dataset

tokenizer_id = model_id
tokenizer = AutoTokenizer.from_pretrained(tokenizer_id)
tokenizer.padding_side = "right"

In [None]:
# Chat 
def create_message_column(row):
    
    messages = []
    user = {
        "content" : f"{row['instruction']}\n Input: {row['input']}",
        "role": "user"
    }
    messages.append(user)

    assistant = {
        "content": f"{row['output']}",
        "role": "assistant"
    }

    messages.append(assistant)
    return {"messages": messages}

def format_dataset_chatml(row):
    return {"text": tokenizer.apply_chat_template(row["message"], add_generation_prompt=False, tokenize=False)}

In [None]:
dataset_chatml = dataset.map(create_message_column)
dataset_chatml = dataset_chatml.map(format_dataset_chatml)
dataset_chatml[0]

In [None]:
# 테스트 데이터와 트레인 데이터를 분할하는 함수
dataset_chatml = dataset_chatml.train_test_split(test_size=0.05, seed=1234)
dataset_chatml

### Using LoRA and trl

In [None]:
if torch.cuda.is_bf16_supported():
    compute_dtype = torch.bfloat16
    attn_implementation = 'flash_attention_2'
else:
    compute_dtype = torch.float16
    attn_implementation = 'sdpa'

print(attn_implementation)

In [None]:
model_name = "microsoft/Phi-3-mini-4k-instruct"

tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True, add_eos_token=True, use_fast=True)

tokenizer.pad_token = tokenizer.unk_token
tokenizer.pad_token_id = tokenizer.convert_tokens_to_ids(tokenizer.pad_token)

tokenizer.padding_side = 'left'

model = AutoModelForCausalLM.from_pretrained(
          model_id, torch_dtype=compute_dtype, trust_remote_code=True, device_map=device_map,
          attn_implementation=attn_implementation
)

In [None]:
# Configure the LoRA 
args = TrainingArguments(
        output_dir="./phi-3-mini-LoRA",
        evaluation_strategy="steps",
        do_eval=True,
        optim="adamw_torch",
        per_device_train_batch_size=8,
        gradient_accumulation_steps=8,
        per_device_eval_batch_size=8,
        log_level="debug",
        save_strategy="epoch",
        logging_steps=100,
        learning_rate=1e-4,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        eval_steps=100,
        num_train_epochs=1,
        #num_train_epochs=3,
        warmup_ratio=0.1,
        lr_scheduler_type="linear",
        report_to="wandb",
        seed=42,
)

peft_config = LoraConfig(
        r=lora_r,
        lora_alpha=lora_alpha,
        lora_dropout=lora_dropout,
        task_type=TaskType.CAUSAL_LM,
        target_modules=target_modules,
)

In [None]:
# 모니터링을 위해 wandb 이용
import wandb

wandb.login()

In [None]:
project_name = "Phi3-mini-ft-python-code"

wandb.init(project=project_name, name = "phi-3-mini-ft-py-3e")

In [None]:
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset_chatml['train'],
    eval_dataset=dataset_chatml['test'],
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=512,
    tokenizer=tokenizer,
    args=args
)

In [None]:
trainer.train()
trainer.save_model()

### HuggingFace 저장

In [None]:
from huggingface_hub import notebook_login

notebook_login()

In [None]:
trainer.push_to_hub("wonik-hi/phi3_adapter")

In [None]:
# Merge

del model
del trainer
import gc # 메모리 관리를 위해 gabage collector 이용

gc.collect()
gc.collect()

In [None]:
torch.cuda.empty_cache()

In [None]:
gc.collect()

In [None]:
from peft import AutoPeftModelForCausalLM

new_model = AutoPeftModelForCausalLM.from_pretrained(
    args.output_dir,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.bfloat16, #torch.float16,
    trust_remote_code=True,
    device_map=device_map,
)

merged_model = new_model.merge_and_unload()

merged_model.save_pretrained("merged_model", trust_remote_code=True, safe_serialization=True)

tokenizer.save_pretrained("merged_model")

In [None]:
merged_model.push_to_hub(hf_model_repo)

tokenizer.push_to_hub(hf_model_repo)

In [None]:
hf_model_repo

In [None]:
hf_model_repo = 'wonik-hi/phi3_fine_tuning' if not hf_model_repo else hf_model_repo

In [None]:
# 허깅페이스에 올라간 모델로 파인튜닝
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed

set_seed(1234)  # For reproducibility

tokenizer = AutoTokenizer.from_pretrained(hf_model_repo,trust_remote_code=True)

model = AutoModelForCausalLM.from_pretrained(hf_model_repo, trust_remote_code=True, torch_dtype="auto", device_map="cuda")


In [None]:
dataset_chatml = dataset.map(create_message_column)

dataset_chatml = dataset_chatml.map(format_dataset_chatml)

dataset_chatml = dataset_chatml.train_test_split(test_size=0.05, seed=1234)

dataset_chatml

In [None]:
dataset_chatml['test'][0]

In [None]:
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

In [None]:
pipe.tokenizer.apply_chat_template([{"role": "user", "content": dataset_chatml['test'][0]['messages'][0]['content']}], tokenize=False, add_generation_prompt=True)

In [None]:
def test_inference(prompt):
    prompt = pipe.tokenizer.apply_chat_template([{"role": "user", "content": prompt}], tokenize=False, add_generation_prompt=True)
    outputs = pipe(prompt, max_new_tokens=256, do_sample=True, num_beams=1, temperature=0.3, top_k=50, top_p=0.95,
                   max_time= 180) #, eos_token_id=eos_token)
    return outputs[0]['generated_text'][len(prompt):].strip()

In [None]:
test_inference(dataset_chatml['test'][0]['messages'][0]['content'])

## 성능 검증

In [None]:
from datasets import load_metric
rouge_metric = load_metric("rouge", trust_remote_code=True)

In [None]:
def calculate_rogue(row):
    response = test_inference(row['messages'][0]['content'])
    result = rouge_metric.compute(predictions=[response], references=[row['output']], use_stemmer=True)
    result = {key: value.mid.fmeasure * 100 for key, value in result.items()}
    result['response']=response
    return result

In [None]:
%%time
#metricas = dataset_chatml['test'].select(range(0,500)).map(calculate_rogue, batched=False)
metricas = dataset_chatml['test'].select(range(0,30)).map(calculate_rogue, batched=False)


In [None]:
import numpy as np

In [None]:
rouge1 = "Rouge 1 Mean: " + str(np.mean(metricas['rouge1']))
rouge2 = "Rouge 2 Mean: " + str(np.mean(metricas['rouge2']))
rouge3 = "Rouge L Mean: " + str(np.mean(metricas['rougeL']))
Lsum ="Rouge Lsum Mean: " + str(np.mean(metricas['rougeLsum']))

In [None]:
rouge1

In [None]:
rouge2

In [None]:
rouge3

In [None]:
Lsum