# Fine tuning LLaMA 3.1

Mounting Google Drive

In [None]:
import os
from google.colab import drive

DRIVE_PATH = '/content/drive/MyDrive/ML_Project/'

drive.mount('/content/drive')
os.makedirs(DRIVE_PATH, exist_ok=True)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
%pip install -U bitsandbytes
%pip install datasets



In [None]:
from huggingface_hub import login

login(token="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")

### Loading Tokenizer & Model

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM

model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
model = AutoModelForCausalLM.from_pretrained(model_name, load_in_4bit=True, device_map="auto")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

### Preparing dataset

In [None]:
from datasets import load_dataset

dataset = load_dataset("json", data_files="chess_humanized_prompts.jsonl")
dataset = dataset["train"].train_test_split(test_size=0.1)

tokenizer.pad_token = tokenizer.eos_token

def format(example):
    return {
        "text": f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n{example['prompt']}<|eot_id|>\n"
                f"<|start_header_id|>assistant<|end_header_id|>\n{example['response']}<|eot_id|>"
    }

formatted_dataset = dataset.map(format)

def tokenize(example):
    return tokenizer(example["text"], truncation=True, padding="max_length", max_length=2048)

tokenized_dataset = formatted_dataset.map(tokenize, batched=True)

Map:   0%|          | 0/900 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/900 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

### Applying LoRA with peft

In [None]:
from peft import LoraConfig, get_peft_model, TaskType

lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.05,
    target_modules=["q_proj", "v_proj", "k_proj", "o_proj"],
    task_type=TaskType.CAUSAL_LM
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()


trainable params: 6,815,744 || all params: 8,037,076,992 || trainable%: 0.0848


### Setting up trainer

In [None]:
from transformers import TrainingArguments, Trainer, DataCollatorForLanguageModeling

training_args = TrainingArguments(
    output_dir="./llama3-chess-finetune",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    num_train_epochs=1,
    learning_rate=2e-4,
    fp16=True,
    logging_steps=25,
    save_strategy="epoch"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
    tokenizer=tokenizer,
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False),
)


  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


### Training model

In [None]:
trainer.train()

[34m[1mwandb[0m: Currently logged in as: [33mvrajabha[0m ([33mvrajabha-university-at-buffalo[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin




Step,Training Loss
25,1.5949
50,1.0756
75,0.9565
100,0.9633
125,0.9173
150,0.8604
175,0.9066
200,0.8703
225,0.8465


TrainOutput(global_step=225, training_loss=0.9990212080213758, metrics={'train_runtime': 615.4181, 'train_samples_per_second': 1.462, 'train_steps_per_second': 0.366, 'total_flos': 8.30738396086272e+16, 'train_loss': 0.9990212080213758, 'epoch': 1.0})

### Saving model

In [None]:
model.save_pretrained("llama3-chess-finetuned")
tokenizer.save_pretrained("llama3-chess-finetuned")

('llama3-chess-finetuned/tokenizer_config.json',
 'llama3-chess-finetuned/special_tokens_map.json',
 'llama3-chess-finetuned/tokenizer.json')

## Saving merged model

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import os

# Load base model
base_model = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Meta-Llama-3.1-8B-Instruct",
    device_map="auto"
)

# Load LoRA adapter weights into base model
merged_model = PeftModel.from_pretrained(base_model, "llama3-chess-finetuned")

# Merge LoRA weights into the base model
merged_model = merged_model.merge_and_unload()

# Tokenizer should match the base model
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3.1-8B-Instruct")

# Define output path
output_dir = os.path.join(DRIVE_PATH, "merged-llama3-finetuned")

# Save the merged model and tokenizer
print(f"Saving merged model to {output_dir}...")
merged_model.save_pretrained(output_dir, safe_serialization=True)
tokenizer.save_pretrained(output_dir)

print("✅ Model and tokenizer saved successfully.")

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]



Saving merged model to /content/drive/MyDrive/ML_Project/merged-llama3-finetuned...




Saving checkpoint shards:   0%|          | 0/7 [00:00<?, ?it/s]

✅ Model and tokenizer saved successfully.
