# Fine-Tuning GPT-2 with LoRA and FHE using `LoraTrainer`

This notebook demonstrates how to fine-tune a Llama-3.2-1B model using LoRA (Low-Rank Adaptation) with Fully Homomorphic Encryption (FHE). We leverage the `LoraTrainer` API from the `concrete.ml.torch.lora` library to simplify the process.


In [1]:
import random
import shutil
from pathlib import Path

import numpy as np
import torch
from datasets import load_dataset
from peft import LoraConfig, get_peft_model
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    DataCollatorForLanguageModeling,
    Trainer,
    TrainingArguments,
)
from utils_lora import generate_and_print

# Import LoraTrainer from the provided library
from concrete.ml.torch.lora import LoraTrainer

In [2]:
# Set seed for reproducibility
SEED = 0
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)

In [3]:
# Load the model and tokenizer
model_name = "meta-llama/Llama-3.2-1B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Ensure the tokenizer has a pad token
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = model.config.eos_token_id

# Freeze the original model's weights
for param in model.parameters():
    param.requires_grad = False

In [4]:
# Print the initial generation with the base model
PROMPT = "from concrete.ml.sklearn import LogisticRegression\n\nmodel = LogisticRegression("
print("Initial generation with base model:")
print(generate_and_print(PROMPT, model, tokenizer, seed=SEED))

Initial generation with base model:
from concrete.ml.sklearn import LogisticRegression

model = LogisticRegression( eta=0.1, max_iter=1000, random_state=42)
None


In [5]:
# Apply LoRA configuration
peft_config = LoraConfig(
    r=8,
    lora_alpha=32,
    lora_dropout=0.01,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules="all-linear",
)
peft_model = get_peft_model(model, peft_config)

In [6]:
# Load the dataset and tokenize it
dataset = load_dataset("json", data_files="data_finetune/dataset.jsonl", split="train")


def tokenize_function(examples):
    return tokenizer(examples["text"], padding="longest", truncation=True)


tokenized_dataset = dataset.map(tokenize_function, batched=True)
data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [7]:
# Define training arguments
EPOCHS = 10
PER_DEVICE_TRAIN_BATCH_SIZE = 4
training_args = TrainingArguments(
    output_dir="./checkpoints",
    num_train_epochs=EPOCHS,
    per_device_train_batch_size=PER_DEVICE_TRAIN_BATCH_SIZE,
    gradient_accumulation_steps=1,
    save_total_limit=1,
    use_cpu=True,
    learning_rate=2e-4,
    lr_scheduler_type="linear",
    seed=SEED,
    data_seed=SEED,
    warmup_steps=10,
    weight_decay=0.01,
    prediction_loss_only=True,
)

In [8]:
# Create optimizer and scheduler using HuggingFace's Trainer
hf_trainer = Trainer(
    model=peft_model,
    args=training_args,
    train_dataset=tokenized_dataset,
    data_collator=data_collator,
)
train_dataloader = hf_trainer.get_train_dataloader()
hf_trainer.create_optimizer_and_scheduler(num_training_steps=len(train_dataloader) * EPOCHS)

optimizer = hf_trainer.optimizer
lr_scheduler = hf_trainer.lr_scheduler


# Define a causal LM loss function
def causal_lm_loss(logits, labels, ignore_index=-100):
    shift_logits = logits[..., :-1, :].contiguous()
    shift_labels = labels[..., 1:].contiguous()
    shift_logits = shift_logits.view(-1, shift_logits.size(-1))
    shift_labels = shift_labels.view(-1)
    loss = torch.nn.functional.cross_entropy(
        shift_logits, shift_labels, ignore_index=ignore_index, reduction="mean"
    )
    return loss


# Prepare input data for calibration
lengths = [len(item["input_ids"]) for item in tokenized_dataset]
if not all(length == lengths[0] for length in lengths):
    raise ValueError("All examples must have the same length for calibration.")
BLOCK_SIZE = lengths[0]

input_tensor = torch.randint(
    0, tokenizer.vocab_size, (PER_DEVICE_TRAIN_BATCH_SIZE, BLOCK_SIZE), dtype=torch.long
)
label_tensor = torch.randint(
    0, tokenizer.vocab_size, (PER_DEVICE_TRAIN_BATCH_SIZE, BLOCK_SIZE), dtype=torch.long
)
attention_mask = torch.ones((PER_DEVICE_TRAIN_BATCH_SIZE, BLOCK_SIZE), dtype=torch.long)
inputset = (input_tensor, label_tensor, attention_mask)

# Initialize LoraTrainer
training_args_dict = vars(training_args)
lora_trainer = LoraTrainer(
    model=peft_model,
    optimizer=optimizer,
    loss_fn=causal_lm_loss,
    lr_scheduler=lr_scheduler,
    training_args=training_args_dict,
    n_layers_to_skip_for_backprop=3,
)

LoRA layers detected in the model.


In [9]:
# Compile the model with FHE
lora_trainer.compile(inputset, n_bits=16)

In [10]:
# Train the model using LoraTrainer
print("Starting training using LoraTrainer...")
lora_trainer.train(train_dataloader, num_epochs=EPOCHS, fhe="disable")

Starting training using LoraTrainer...


Training: 100%|██████████| 10/10 [22:19<00:00, 133.98s/epoch, Epoch=10, Avg Loss=0.0795, FHE Mode=disable]

Training completed. Final Avg Loss: 0.0795, FHE Mode: disable





In [11]:
# Compare generation before and after fine-tuning
peft_model.disable_adapter_layers()
print("Original model generation:")
print(generate_and_print(PROMPT, peft_model, tokenizer, seed=SEED))

peft_model.enable_adapter_layers()
print("Fine-tuned model generation:")
print(generate_and_print(PROMPT, peft_model, tokenizer, seed=SEED))

Original model generation:
from concrete.ml.sklearn import LogisticRegression

model = LogisticRegression( eta=0.1, max_iter=1000, random_state=42)
None
Fine-tuned model generation:
from concrete.ml.sklearn import LogisticRegression

model = LogisticRegression( n_bits=7, max_iter=50)
None


In [None]:
# Save the fine-tuned model
save_path = Path("deployment/llama_lora_finetuned")
if save_path.is_dir() and any(save_path.iterdir()):
    shutil.rmtree(save_path)
lora_trainer.save_and_clear_private_info(save_path)

print("Model saved to:", save_path)