# Fine-Tuning Model Notebook
This notebook organizes the finetuning process for a model using LoRA adjustments.

## LoRA Configuration
Define configuration parameters for the LoRA.

In [None]:
from peft import LoraConfig, TaskType

lora_config = LoraConfig(
    r=8,                                    # LoRA rank
    lora_alpha=16,                          # scaling factor
    lora_dropout=0.01,                      # dropout probability
    task_type=TaskType.CAUSAL_LM,           # specify the task
    target_modules = "all-linear"           # layers to inject trainable adapters 
)

## Tokenizer and Model Initialization
Load the tokenizer and model, and apply the LoRA modifications.

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import get_peft_model

model_id = "microsoft/Phi-4-mini-instruct"

tokenizer = AutoTokenizer.from_pretrained(model_id)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    
model = AutoModelForCausalLM.from_pretrained(model_id)
model = get_peft_model(model, lora_config)

## Tokenization and Data Preparation
Define a function to tokenize each message and prepepare dataset.

In [None]:
from datasets import load_dataset

def tokenize_message(json_messages):
    messages = json_messages["messages"]
    text = tokenizer.apply_chat_template(messages, tokenize=False)
    tokenized = tokenizer(text, truncation=True, padding="max_length", max_length=512)
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

chatml_path = "../data/yugioh_rulebook_chatml.jsonl"
train_dataset = load_dataset("json", data_files=chatml_path, split="train")
train_dataset = train_dataset.map(tokenize_message, batched=False)

## Trainer Initialization and Training
Initialize the Trainer with training arguments and datasets, then start the training process.

In [None]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=3,
    learning_rate=3e-4
)

trainer = Trainer(
    model=model,
    train_dataset=train_dataset,
    args=training_args
)

trainer.train()

## Model Saving
After training, save the finetuned model to the specified adapter path.

In [None]:
adapter_path = "../adapters/yugioh_phi_4"
model.save_pretrained(adapter_path)