In [None]:
# Force-upgrade the key libraries to ensure we have the latest stable versions
!pip install -q -U "transformers[torch]" "peft" "bitsandbytes" "datasets" "accelerate" "wandb"

In [2]:
import os
from google.colab import userdata
import wandb

# --- Set Secrets & Login ---
# (Make sure to set HF_TOKEN and WANDB_API_KEY in the Colab Secrets tab)
os.environ["HF_TOKEN"] = userdata.get('HF_TOKEN')
os.environ["WANDB_API_KEY"] = userdata.get('WANDB_API_KEY')
wandb.login()

# --- Imports ---
import torch
import re
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling
)
from peft import get_peft_model, LoraConfig, IA3Config, TaskType
from datasets import load_dataset

  | |_| | '_ \/ _` / _` |  _/ -_)
[34m[1mwandb[0m: Currently logged in as: [33mujwaljibhkate06[0m ([33miu-aml-hw4-team[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [3]:
def load_model_and_tokenizer(model_name, token):
    """Loads the 8-bit model and tokenizer."""
    print(f"--- Loading Model: {model_name} ---")

    bnb_config = BitsAndBytesConfig(
        load_in_8bit=True,
    )

    tokenizer = AutoTokenizer.from_pretrained(
        model_name,
        token=token,
        trust_remote_code=True
    )
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token

    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config=bnb_config,
        device_map="auto",
        token=token,
        trust_remote_code=True
    )
    print("--- Model Loaded Successfully ---")
    return model, tokenizer

def load_and_prep_data(dataset_name, tokenizer, train_size, eval_size, max_len):
    """Loads and tokenizes the dataset subsets."""

    def preprocess_dolly(examples):
        prompts = []
        for instruction, context, response in zip(examples['instruction'], examples['context'], examples['response']):
            text = f"Instruction:\n{instruction}\n"
            if context:
                text += f"Context:\n{context}\n"
            text += f"Response:\n{response}"
            prompts.append(text + tokenizer.eos_token)

        tokenized = tokenizer(
            prompts,
            truncation=True,
            padding="max_length",
            max_length=max_len
        )
        tokenized["labels"] = tokenized["input_ids"]
        return tokenized

    print(f"--- Loading & Preprocessing {train_size} train / {eval_size} eval samples ---")
    # Load 2000 for train
    train_data = load_dataset(dataset_name, split=f"train[:{train_size}]")
    # Load 500 for eval, starting *after* the train set
    eval_data = load_dataset(dataset_name, split=f"train[{train_size}:{train_size + eval_size}]")

    train_dataset = train_data.map(preprocess_dolly, batched=True, remove_columns=train_data.column_names)
    eval_dataset = eval_data.map(preprocess_dolly, batched=True, remove_columns=eval_data.column_names)

    print("--- Data Ready ---")
    return train_dataset, eval_dataset

In [4]:
def configure_lora(model, config_dict):
    """Applies the LoRA adapter to the model."""
    print("--- Applying LoRA Adapter ---")

    peft_config = LoraConfig(
        task_type=TaskType.CAUSAL_LM,
        r=config_dict["lora_r"],
        lora_alpha=config_dict["lora_alpha"],
        lora_dropout=config_dict["lora_dropout"],
        target_modules=config_dict["lora_targets"],
        bias="none",
    )
    model = get_peft_model(model, peft_config)
    model.print_trainable_parameters()
    return model

def configure_ia3(model, config_dict):
    """Applies the IA3 adapter to the model."""
    print("--- Applying IA3 Adapter ---")

    target_modules = config_dict["lora_targets"]

    # This regex is crucial. 'dense' matches Pythia's FFN layers.
    ff_pattern = r".*(mlp|fc|wi|w0|w1|w2|wo|down_proj|out_lin|dense).*"
    feedforward_modules = [m for m in target_modules if re.match(ff_pattern, m)]

    if not feedforward_modules:
         print("Warning: No feedforward modules found for IA3. Check target_modules and regex.")

    peft_config = IA3Config(
        task_type=TaskType.CAUSAL_LM,
        target_modules=target_modules,
        feedforward_modules=feedforward_modules,
    )
    model = get_peft_model(model, peft_config)
    model.print_trainable_parameters()
    return model

In [5]:
def run_experiment(model, tokenizer, train_dataset, eval_dataset, run_name, config_dict):
    """Initializes and runs the Trainer."""

    print(f"--- Configuring Trainer for run: {run_name} ---")

    # --- 1. Define Training Arguments ---
    # We are re-enabling evaluation to get eval_loss
    training_args = TrainingArguments(
        output_dir=f"{config_dict['output_dir']}/{run_name}",
        num_train_epochs=config_dict["epochs"],
        per_device_train_batch_size=config_dict["batch_size"],
        gradient_accumulation_steps=config_dict["grad_accum"],
        learning_rate=config_dict["lr"],

        # --- The Stability Stack ---
        fp16=True,
        max_grad_norm=0.3,
        optim="paged_adamw_8bit",

        logging_dir=f"./logs/{run_name}",
        logging_strategy="steps",
        logging_steps=5, # Log training loss

        # --- EVALUATION RE-ENABLED ---
        # This is the test. If this fails, the CUDA error is real.
        eval_strategy="epoch",
        save_strategy="epoch",
        # -----------------------------

        report_to="wandb",
        run_name=run_name, # Pass run name to W&B
        load_best_model_at_end=True,
        metric_for_best_model="eval_loss",
        greater_is_better=False,
    )

    # --- 2. Define Data Collator ---
    data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

    # --- 3. Initialize Trainer ---
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        tokenizer=tokenizer,
        data_collator=data_collator,
    )

    print(f"--- STARTING TRAINING: {run_name} ---")
    trainer.train()
    print(f"--- TRAINING FINISHED: {run_name} ---")

    # --- 4. Evaluate ---
    print(f"--- STARTING EVALUATION: {run_name} ---")
    eval_results = trainer.evaluate()
    print(f"Evaluation Results ({run_name}):", eval_results)

    return trainer, eval_results

In [6]:
import time

# --- 1. Base Configuration ---
HF_TOKEN = os.getenv("HF_TOKEN")
BASE_CONFIG = {
    "model_name": "EleutherAI/pythia-2.8b",
    "dataset_name": "databricks/databricks-dolly-15k",
    "train_subset_size": 2000, # Using 2k train samples
    "eval_subset_size": 500,   # Using 500 eval samples
    "max_seq_len": 512,
    "lora_dropout": 0.1,
    "lora_targets": ["query_key_value", "dense", "dense_4h_to_h"],
    "epochs": 1,
    "batch_size": 2,
    "grad_accum": 8,
    "lr": 2e-5,
    "output_dir": "./results_pythia",
    "wandb_project": "lora-vs-ia3-pythia-final" # Final W&B project
}


In [7]:
# --- 2. Load Data ONCE ---
# (We can re-use the same data for all runs)
model_for_tokenizer, tokenizer = load_model_and_tokenizer(BASE_CONFIG["model_name"], HF_TOKEN)
del model_for_tokenizer # Free up memory, we only needed the tokenizer

train_ds, eval_ds = load_and_prep_data(
    BASE_CONFIG["dataset_name"],
    tokenizer,
    BASE_CONFIG["train_subset_size"],
    BASE_CONFIG["eval_subset_size"],
    BASE_CONFIG["max_seq_len"]
)

--- Loading Model: EleutherAI/pythia-2.8b ---


tokenizer_config.json:   0%|          | 0.00/396 [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/5.68G [00:00<?, ?B/s]

--- Model Loaded Successfully ---
--- Loading & Preprocessing 2000 train / 500 eval samples ---


README.md: 0.00B [00:00, ?B/s]

databricks-dolly-15k.jsonl:   0%|          | 0.00/13.1M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/15011 [00:00<?, ? examples/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

--- Data Ready ---


In [None]:
# --- 3. Run IA3 ---
print(f"\n{'='*20} STARTING IA3 {'='*20}\n")

run_config = BASE_CONFIG.copy()
run_name = "pythia_ia3"

# Load a fresh model
model, tokenizer = load_model_and_tokenizer(run_config["model_name"], HF_TOKEN)
model = configure_ia3(model, run_config)

# Run
trainer, results = run_experiment(model, tokenizer, train_ds, eval_ds, run_name, run_config)

# Clean up
del model
del trainer
torch.cuda.empty_cache()



--- Loading Model: EleutherAI/pythia-2.8b ---
--- Model Loaded Successfully ---
--- Applying IA3 Adapter ---
trainable params: 655,360 || all params: 2,775,864,320 || trainable%: 0.0236
--- Configuring Trainer for run: pythia_ia3 ---


  trainer = Trainer(
The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'pad_token_id': 0}.


--- STARTING TRAINING: pythia_ia3 ---




Epoch,Training Loss,Validation Loss
1,2.2332,2.240971


--- TRAINING FINISHED: pythia_ia3 ---
--- STARTING EVALUATION: pythia_ia3 ---




Evaluation Results (pythia_ia3): {'eval_loss': 2.240971088409424, 'eval_runtime': 104.7265, 'eval_samples_per_second': 4.774, 'eval_steps_per_second': 0.602, 'epoch': 1.0}


In [None]:
# --- 4. Run LoRA Sweep ---
lora_ranks = [8] # Representative ranks
for r_val in lora_ranks:
    print(f"\n{'='*20} STARTING LoRA r={r_val} {'='*20}\n")

    # Update config for this run
    run_config = BASE_CONFIG.copy()
    run_config.update({
        "lora_r": r_val,
        "lora_alpha": r_val * 2
    })
    run_name = f"pythia_lora_r{r_val}"

    # Load a fresh model
    model, tokenizer = load_model_and_tokenizer(run_config["model_name"], HF_TOKEN)
    model = configure_lora(model, run_config)

    # Run
    trainer, results = run_experiment(model, tokenizer, train_ds, eval_ds, run_name, run_config)

    # Clean up memory
    del model
    del trainer
    torch.cuda.empty_cache()
    time.sleep(10) # Pause to let W&B sync



print(f"\n{'='*20} ALL EXPERIMENTS FINISHED {'='*20}\n")



--- Loading Model: EleutherAI/pythia-2.8b ---


  trainer = Trainer(
The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'pad_token_id': 0}.


--- Model Loaded Successfully ---
--- Applying LoRA Adapter ---
trainable params: 7,208,960 || all params: 2,782,417,920 || trainable%: 0.2591
--- Configuring Trainer for run: pythia_lora_r8 ---
--- STARTING TRAINING: pythia_lora_r8 ---




Epoch,Training Loss,Validation Loss
1,2.0214,2.033304


--- TRAINING FINISHED: pythia_lora_r8 ---
--- STARTING EVALUATION: pythia_lora_r8 ---




Evaluation Results (pythia_lora_r8): {'eval_loss': 2.033304452896118, 'eval_runtime': 95.6462, 'eval_samples_per_second': 5.228, 'eval_steps_per_second': 0.659, 'epoch': 1.0}




In [8]:
# --- 4. Run LoRA Sweep ---
lora_ranks = [32] # Representative ranks
for r_val in lora_ranks:
    print(f"\n{'='*20} STARTING LoRA r={r_val} {'='*20}\n")

    # Update config for this run
    run_config = BASE_CONFIG.copy()
    run_config.update({
        "lora_r": r_val,
        "lora_alpha": r_val * 2
    })
    run_name = f"pythia_lora_r{r_val}"

    # Load a fresh model
    model, tokenizer = load_model_and_tokenizer(run_config["model_name"], HF_TOKEN)
    model = configure_lora(model, run_config)

    # Run
    trainer, results = run_experiment(model, tokenizer, train_ds, eval_ds, run_name, run_config)

    # Clean up memory
    del model
    del trainer
    torch.cuda.empty_cache()
    time.sleep(10) # Pause to let W&B sync



print(f"\n{'='*20} ALL EXPERIMENTS FINISHED {'='*20}\n")



--- Loading Model: EleutherAI/pythia-2.8b ---
--- Model Loaded Successfully ---
--- Applying LoRA Adapter ---


  trainer = Trainer(
The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'pad_token_id': 0}.


trainable params: 28,835,840 || all params: 2,804,044,800 || trainable%: 1.0284
--- Configuring Trainer for run: pythia_lora_r32 ---
--- STARTING TRAINING: pythia_lora_r32 ---




Epoch,Training Loss,Validation Loss
1,1.9901,2.003991


--- TRAINING FINISHED: pythia_lora_r32 ---
--- STARTING EVALUATION: pythia_lora_r32 ---




Evaluation Results (pythia_lora_r32): {'eval_loss': 2.003990888595581, 'eval_runtime': 94.7731, 'eval_samples_per_second': 5.276, 'eval_steps_per_second': 0.665, 'epoch': 1.0}




In [9]:
# --- 4. Run LoRA Sweep ---
lora_ranks = [128] # Representative ranks
for r_val in lora_ranks:
    print(f"\n{'='*20} STARTING LoRA r={r_val} {'='*20}\n")

    # Update config for this run
    run_config = BASE_CONFIG.copy()
    run_config.update({
        "lora_r": r_val,
        "lora_alpha": r_val * 2
    })
    run_name = f"pythia_lora_r{r_val}"

    # Load a fresh model
    model, tokenizer = load_model_and_tokenizer(run_config["model_name"], HF_TOKEN)
    model = configure_lora(model, run_config)

    # Run
    trainer, results = run_experiment(model, tokenizer, train_ds, eval_ds, run_name, run_config)

    # Clean up memory
    del model
    del trainer
    torch.cuda.empty_cache()
    time.sleep(10) # Pause to let W&B sync



print(f"\n{'='*20} ALL EXPERIMENTS FINISHED {'='*20}\n")



--- Loading Model: EleutherAI/pythia-2.8b ---
--- Model Loaded Successfully ---
--- Applying LoRA Adapter ---


  trainer = Trainer(
The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'pad_token_id': 0}.


trainable params: 115,343,360 || all params: 2,890,552,320 || trainable%: 3.9904
--- Configuring Trainer for run: pythia_lora_r128 ---
--- STARTING TRAINING: pythia_lora_r128 ---




Epoch,Training Loss,Validation Loss
1,1.967,1.983843


--- TRAINING FINISHED: pythia_lora_r128 ---
--- STARTING EVALUATION: pythia_lora_r128 ---




Evaluation Results (pythia_lora_r128): {'eval_loss': 1.9838427305221558, 'eval_runtime': 96.0734, 'eval_samples_per_second': 5.204, 'eval_steps_per_second': 0.656, 'epoch': 1.0}


