In [1]:
# Установите необходимые библиотеки
!pip install transformers datasets peft accelerate evaluate tqdm pytest flash_attn tiktoken einops



In [2]:
from transformers import AutoModelForSeq2SeqLM, TrainingArguments
from transformers import Trainer, AutoTokenizer
from datasets import load_dataset
from peft import LoraConfig, get_peft_model
from evaluate import load 
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Initialize necessary variables
model_names = [
    "google/flan-t5-small", 
    "google/flan-t5-base", 
    "google/flan-t5-large",
]

In [None]:
# 1. Data Loading and Preparation
def preprocess_data(example):
    """Format data for QA task."""
    input_text = f"question: {example['question']} context: {example['context']}"
    target_text = example['answers']['text'][0] if example['answers']['text'] else ""
    return {'input_text': input_text, 'target_text': target_text}

# Load dataset
dataset = load_dataset("squad")
train_data = dataset['train'].select(range(len(dataset['train']) // 10)).map(preprocess_data, remove_columns=dataset['train'].column_names)
validation_data = dataset['validation'].select(range(len(dataset['validation']) // 10)).map(preprocess_data, remove_columns=dataset['validation'].column_names)

In [None]:
# Tokenize data
def tokenize_data(batch, tokenizer):
    inputs = tokenizer(batch["input_text"], padding="max_length", truncation=True, max_length=512)
    labels = tokenizer(batch["target_text"], padding="max_length", truncation=True, max_length=512)
    inputs["labels"] = labels["input_ids"]
    return inputs

tokenizer = AutoTokenizer.from_pretrained(model_names[0])
train_data = train_data.map(lambda x: tokenize_data(x, tokenizer), batched=True)
validation_data = validation_data.map(lambda x: tokenize_data(x, tokenizer), batched=True)

In [None]:
# Remove unnecessary columns and set format
train_data = train_data.remove_columns(["input_text", "target_text"])
validation_data = validation_data.remove_columns(["input_text", "target_text"])
train_data.set_format("torch")
validation_data.set_format("torch")

In [None]:
# Helper function for training and evaluation
def compute_metrics(pred):
    predictions = pred.predictions
    references = pred.label_ids
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    decoded_refs = tokenizer.batch_decode(references, skip_special_tokens=True)
    
    # Use the SQuAD metric to calculate F1 and Exact Match
    results = metric.compute(predictions=decoded_preds, references=[
        {"id": str(i), "answers": {"text": [ref], "answer_start": []}}
        for i, ref in enumerate(decoded_refs)
    ])
    return {"f1": results["f1"], "exact_match": results["exact_match"]}

def train_and_evaluate(model, training_args, train_data, eval_data, tokenizer, description):
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_data,
        eval_dataset=eval_data,
        tokenizer=tokenizer,
        compute_metrics=compute_metrics,
    )
    trainer.train()
    metrics = trainer.evaluate()
    print(f"Metrics for {description}: {metrics}")
    return metrics

def evaluate_model(model, eval_data, batch_size, description):
    trainer = Trainer(
        model=model,
        eval_dataset=eval_data,
        args=TrainingArguments(
            output_dir="./results_baseline_eval",
            per_device_eval_batch_size=batch_size,
            eval_strategy="no",
            fp16=False,  # Disable mixed precision to reduce incompatibility issues
            eval_accumulation_steps=4,  # Evaluate in smaller chunks
            logging_dir="./logs",
        ),
        tokenizer=tokenizer,
        compute_metrics=compute_metrics,
    )
    try:
        metrics = trainer.evaluate()
        print(f"Metrics for {description}: {metrics}")
        return metrics
    except RuntimeError as e:
        if "CUDA out of memory" in str(e):
            print(f"CUDA OOM during evaluation {description}. Skipping.")
            return {"error": "CUDA OOM"}
        elif "OutOfResources" in str(e):
            print(f"Shared memory limit exceeded during evaluation {description}. Skipping.")
            return {"error": "OutOfResources"}
        else:
            raise e

In [None]:
results = []
metric = load("squad")

In [None]:
# Part 0: Evaluate models without any fine-tuning
for model_name in model_names:
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name, trust_remote_code=True)
    try:
        metrics = evaluate_model(model, validation_data, batch_size=1, description=f"Baseline ({model_name})")
        results.append({"model_name": model_name, "method" : '-', "rank" : "-", "metrics": metrics})
    except RuntimeError as e:
        if "CUDA out of memory" in str(e) or "OutOfResources" in str(e):
            print(f"Error during evaluation {model_name}: {e}. Skipping.")
            results.append({"model_name": model_name, "error": str(e)})
        else:
            raise e

In [None]:
%%time
# Part 1: Fine-tuning
for model_name in model_names:
    model_finetune = AutoModelForSeq2SeqLM.from_pretrained(model_name, trust_remote_code=True)
    training_args_finetune = TrainingArguments(
        output_dir=f"./results_finetune_{model_name.split('/')[-1]}",
        per_device_train_batch_size=2,  # Reduced batch size
        gradient_accumulation_steps=4,  # Simulate larger batch size
        num_train_epochs=3,
        eval_strategy="steps",
        save_steps=1000,
        logging_steps=10,
        learning_rate=2e-4,
        fp16=True,
        gradient_checkpointing=True,
    )
    metrics = train_and_evaluate(
        model_finetune, training_args_finetune, train_data, validation_data, tokenizer, f"Full Fine-tuning ({model_name})"
    )
    results.append({"model_name": model_name, "method" : 'Full-parameter', "rank" : "-", "metrics": metrics})

In [None]:
%%time
# Part 2: LoRA with different ranks
t5_target_modules = ["q", "k", "v", "o"]  # Simplified for T5 models

for model_name in model_names:
    for r in [4, 8, 16]:
        model = AutoModelForSeq2SeqLM.from_pretrained(model_name, trust_remote_code=True)
    
        # LoRA configuration
        lora_config = LoraConfig(
            r=r,  # Low-rank adaptation rank
            lora_alpha=32,
            target_modules=t5_target_modules,
            lora_dropout=0.1,
            bias="none"  # Corrected for T5
        )
        
        # Apply LoRA adapters
        model = get_peft_model(model, lora_config)
        
        # Ensure LoRA layers are trainable
        for name, param in model.named_parameters():
            if "lora" in name:
                param.requires_grad = True
                
        # Training Arguments
        training_args = TrainingArguments(
            output_dir=f"./results_lora_{model_name.split('/')[-1]}",
            per_device_train_batch_size=4,
            gradient_accumulation_steps=4,
            num_train_epochs=3,
            evaluation_strategy="epoch",
            save_strategy="epoch",
            learning_rate=5e-4,
            fp16=True,
            logging_steps=10,
            save_total_limit=1,
            gradient_checkpointing=False,  # Disabled to avoid conflicts
        )
    
        # Train and evaluate
        print(f"Starting LoRA Fine-tuning for {model_name}...")
        metrics = train_and_evaluate(
            model,
            training_args,
            train_data,
            validation_data,
            tokenizer,
            f"LoRA Fine-tuning ({model_name})"
        )
        results.append({"model_name": model_name, "method" : 'LoRA', "rank" : r, "metrics": metrics})

In [None]:
# Save results
results_df = pd.DataFrame(results)
print(results_df)
results_df.to_csv("results_comparison.csv", index=False)