### Phi3-4K-mini 모델불러오기

In [6]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
model = AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-4k-instruct", torch_dtype = "auto",  trust_remote_code=False)
messages = [
    {"role": "user", "content": "My dog vomitted since yesterday with no reason"},
]
inputs = tokenizer.apply_chat_template(
	messages,
	add_generation_prompt=True,
	tokenize=True,
	return_dict=True,
	return_tensors="pt",
).to(model.device)

outputs = model.generate(**inputs, max_new_tokens=416)
print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:]))


Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00, 27.78it/s]


I'm sorry to hear that your dog has been vomiting. Vomiting can be caused by a variety of factors, including dietary indiscretion (eating something they shouldn't have), infections, parasites, or more serious conditions like pancreatitis or gastrointestinal obstruction. It's important to monitor your dog closely. If the vomiting persists, is accompanied by other symptoms like diarrhea, lethargy, or loss of appetite, or if your dog seems to be in distress, it's crucial to seek veterinary care immediately. In the meantime, ensure your dog has access to fresh water and try to keep them calm and comfortable.<|end|>


In [None]:
%pip install -U "transformers>=4.45" accelerate safetensors

In [3]:
# pip install -U transformers datasets peft accelerate bitsandbytes trl
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from trl import SFTTrainer

BASE = "microsoft/Phi-3-mini-4k-instruct"
tokenizer = AutoTokenizer.from_pretrained(BASE)

# (옵션) 4비트 로드로 QLoRA
load_kwargs = dict(
    device_map="auto",
    torch_dtype="auto",
    load_in_4bit=True, # QLoRA면 True 그냥LoRA면 False
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True
)
model = AutoModelForCausalLM.from_pretrained(BASE, **load_kwargs)
model = prepare_model_for_kbit_training(model)

lora = LoraConfig(
    r=16, lora_alpha=32, target_modules=["q_proj","k_proj","v_proj","o_proj","gate_proj","up_proj","down_proj"],
    lora_dropout=0.05, bias="none", task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora)

# 예시용 데이터셋: {"messages":[{"role":"user","content":"..."},{"role":"assistant","content":"..."}]}
ds = load_dataset("json", data_files={"train":"train.jsonl", "eval":"eval.jsonl"})

def format_example(ex):
    # Phi-3 chat 템플릿 활용해서 supervised target 만들기
    msgs = ex["messages"]
    text = tokenizer.apply_chat_template(msgs, add_generation_prompt=False, tokenize=False)
    return {"text": text}

ds = ds.map(format_example, remove_columns=ds["train"].column_names)

args = TrainingArguments(
    output_dir="phi3_lora_out",
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=8,
    learning_rate=2e-4,
    num_train_epochs=1,
    logging_steps=20,
    save_steps=200,
    eval_strategy="steps",
    fp16=True,                               # Ampere↑에서 bf16도 가능
    report_to="none"
)

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=ds["train"],
    eval_dataset=ds["eval"],
    args=args,
    dataset_text_field="text",
    packing=True,                            # 여러 샘플을 한 시퀀스로 패킹(효율↑)
    max_seq_length=2048
)

trainer.train()
model.save_pretrained("phi3_lora_out/adapter")
tokenizer.save_pretrained("phi3_lora_out/adapter")


ModuleNotFoundError: No module named 'trl'

In [4]:
%pip install trl

Collecting trl
  Using cached trl-0.23.1-py3-none-any.whl.metadata (11 kB)
Collecting transformers>=4.56.1 (from trl)
  Using cached transformers-4.57.1-py3-none-any.whl.metadata (43 kB)
Collecting huggingface_hub>=0.21.0 (from accelerate>=1.4.0->trl)
  Using cached huggingface_hub-0.35.3-py3-none-any.whl.metadata (14 kB)
Collecting tokenizers<=0.23.0,>=0.22.0 (from transformers>=4.56.1->trl)
  Using cached tokenizers-0.22.1-cp39-abi3-win_amd64.whl.metadata (6.9 kB)
Using cached trl-0.23.1-py3-none-any.whl (564 kB)
Using cached transformers-4.57.1-py3-none-any.whl (12.0 MB)
Using cached huggingface_hub-0.35.3-py3-none-any.whl (564 kB)
Using cached tokenizers-0.22.1-cp39-abi3-win_amd64.whl (2.7 MB)
Installing collected packages: huggingface_hub, tokenizers, transformers, trl

  Attempting uninstall: huggingface_hub

    Found existing installation: huggingface-hub 0.29.3

    Uninstalling huggingface-hub-0.29.3:

      Successfully uninstalled huggingface-hub-0.29.3

   ----------------

  You can safely remove it manually.
