# Fine-Tuning Model Notebook
This notebook organizes the finetuning process for a model using LoRA adjustments.

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
from peft import get_peft_model, LoraConfig, TaskType
from datasets import load_dataset

## Configuration and Setup
Define paths and configuration parameters for the model, LoRA setup, and dataset.

In [None]:
lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,   # specify the task
    r=8,                            # LoRA rank
    lora_alpha=16,                  # scaling factor
    lora_dropout=0.01,              # dropout probability
    bias="none"
)

## Tokenizer and Model Initialization
Load the tokenizer and model, and apply the LoRA modifications.

In [None]:
model_id = "meta-llama/llama-3.2-1B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(model_id)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(model_id)
model = get_peft_model(model, lora_config)

## Tokenization
Define a function to tokenize each message using the model's chat template.

In [None]:
def tokenize_message(json_messages):
    messages = json_messages["messages"]
    text = tokenizer.apply_chat_template(messages, tokenize=False)
    tokenized = tokenizer(text, truncation=True, padding="max_length", max_length=512)
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

## Data Preparation
Tokenize the dataset and split it into training and testing sets.

In [None]:
chatml_path = "./yugioh_rulebook_dataset_chatml.jsonl"
dataset = load_dataset("json", data_files=chatml_path)

tokenized_datasets = dataset.map(tokenize_message, batched=False)

train_test_datasets = tokenized_datasets["train"].train_test_split(test_size=0.1)

## Trainer Initialization and Training
Initialize the Trainer with training arguments and datasets, then start the training process.

In [None]:
trainer = Trainer(
    model=model,
    args=TrainingArguments(
        output_dir="./results",
        num_train_epochs=10,
        learning_rate=3e-4
    ),
    train_dataset=train_test_datasets["train"],
    eval_dataset=train_test_datasets["test"]
)

trainer.train()

## Model Saving
After training, save the finetuned model to the specified adapter path.

In [None]:
adapter_path = "./adapters/llama_3_2"
model.save_pretrained(adapter_path)