In [3]:
from datasets import load_dataset
from transformers import T5Tokenizer, T5ForConditionalGeneration, Trainer, TrainingArguments
from peft import LoraConfig, get_peft_model

# Load the MNLI dataset from HuggingFace datasets library
dataset = load_dataset('glue', 'mnli')

# Split the dataset into train, validation, and test sets
train_dataset = dataset['train']
validation_dataset = dataset['validation_matched']
# test_dataset = dataset['test_matched']

# Load the FLAN-T5 tokenizer and model
model_name = 'google/flan-t5-base'
model = T5ForConditionalGeneration.from_pretrained(model_name)
tokenizer = T5Tokenizer.from_pretrained(model_name)


# LoRA configuration
lora_config = LoraConfig(
    r=8,  # Low-rank dimension
    lora_alpha=32,
    target_modules=["q", "v"],  # Target query and value matrices in attention layers
    lora_dropout=0.1,
    bias="none"
)

# Wrap the T5 model with LoRA
lora_model = get_peft_model(model, lora_config)

# Check trainable parameters
lora_model.print_trainable_parameters()

def preprocess_function(examples):
    # Concatenate premise and hypothesis to form the input
    inputs = ["premise: " + premise + " hypothesis: " + hypothesis for premise, hypothesis in zip(examples['premise'], examples['hypothesis'])]
    
    # Tokenize inputs
    model_inputs = tokenizer(inputs, max_length=512, truncation=True, padding='max_length')
    
    # Use the integer labels directly
    model_inputs["labels"] = examples["label"]
    
    return model_inputs

# Apply preprocessing to the datasets
train_dataset = train_dataset.map(preprocess_function, batched=True)
validation_dataset = validation_dataset.map(preprocess_function, batched=True)


# Remove unnecessary columns after preprocessing
train_dataset = train_dataset.remove_columns(['premise', 'hypothesis', 'idx'])
validation_dataset = validation_dataset.remove_columns(['premise', 'hypothesis', 'idx'])

# Set the format of the datasets to be compatible with PyTorch
train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
validation_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])



trainable params: 884,736 || all params: 248,462,592 || trainable%: 0.3561


In [8]:
import torch
from torch.utils.data import DataLoader
from transformers import AdamW, get_scheduler
from tqdm.auto import tqdm

# Set device to GPU if available
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Move the model to the selected device
lora_model.to(device)


train_dataset_subset = train_dataset.select(range(10))  # Select the first 100 rows for training
validation_dataset_subset = validation_dataset.select(range(10))

# Create PyTorch DataLoader for train and validation datasets
train_dataloader = DataLoader(train_dataset_subset, shuffle=True, batch_size=4)
validation_dataloader = DataLoader(validation_dataset_subset, batch_size=4)

# Initialize optimizer (you can use AdamW, which is commonly used in transformers)
optimizer = AdamW(lora_model.parameters(), lr=2e-5, weight_decay=0.01)

# Set the number of training epochs
num_epochs = 3

# Number of training steps for lr_scheduler
num_training_steps = num_epochs * len(train_dataloader)

# Set up learning rate scheduler
lr_scheduler = get_scheduler(
    "linear",
    optimizer=optimizer,
    num_warmup_steps=0,
    num_training_steps=num_training_steps
)


progress_bar = tqdm(range(num_training_steps))

lora_model.train()

for epoch in range(num_epochs):
    for batch in train_dataloader:
        # Move batch to the correct device
        batch = {k: v.to(device) for k, v in batch.items()}
        
        outputs = lora_model(
            input_ids=batch["input_ids"],
            attention_mask=batch["attention_mask"],
            labels=batch["labels"]
        )

        loss = outputs.loss
        

        loss.backward()
        optimizer.step()
        lr_scheduler.step()
        optimizer.zero_grad()
        
        progress_bar.update(1)

    # Validation step at the end of each epoch (optional)
    lora_model.eval()
    total_eval_loss = 0
    for batch in validation_dataloader:
        batch = {k: v.to(device) for k, v in batch.items()}
        
        with torch.no_grad():
            outputs = lora_model(
                input_ids=batch["input_ids"],
                attention_mask=batch["attention_mask"],
                labels=batch["labels"]
            )
        
        total_eval_loss += outputs.loss.item()

    avg_eval_loss = total_eval_loss / len(validation_dataloader)
    print(f"Epoch {epoch+1}: Validation Loss: {avg_eval_loss}")

# lora_model.train() 

# Save the fine-tuned model
lora_model.save_pretrained("./results_lora")
tokenizer.save_pretrained("./results_lora")



  0%|          | 0/9 [00:00<?, ?it/s]

OutOfMemoryError: CUDA out of memory. Tried to allocate 48.00 MiB. GPU 0 has a total capacity of 12.00 GiB of which 0 bytes is free. Of the allocated memory 26.30 GiB is allocated by PyTorch, and 195.73 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)