In [1]:
# Install required packages (run only if not already installed)
%pip install -q -U transformers datasets accelerate peft bitsandbytes trl pyarrow==19.0.0
%pip install -U pip setuptools wheel
%pip install bitsandbytes
%pip install hf_xet

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Collecting hf_xet
  Downloading hf_xet-1.1.10-cp37-abi3-win_amd64.whl.metadata (4.7 kB)
Downloading hf_xet-1.1.10-cp37-abi3-win_amd64.whl (2.8 MB)
   ---------------------------------------- 0.0/2.8 MB ? eta -:--:--
   --- ------------------------------------ 0.3/2.8 MB ? eta -:--:--
   ------- -------------------------------- 0.5/2.8 MB 1.3 MB/s eta 0:00:02
   ----------- ---------------------------- 0.8/2.8 MB 1.2 MB/s eta 0:00:02
   ----------- ---------------------------- 0.8/2.8 MB 1.2 MB/s eta 0:00:02
   -------------- ------------------------- 1.0/2.8 MB 1.1 MB/s eta 0:00:02
   ------------------ --------------------- 1.3/2.8 MB 1.2 MB/s eta 0:00:02
   ---------------------- ----------------- 1.6/2.8 MB 1.1 MB/s eta 0:00:02
   -------------------------- ------------- 1.8/2.8 MB 1.1 MB

In [2]:
! pip install -q -U transformers datasets accelerate peft bitsandbytes trl pyarrow==19.0.0

In [None]:
%pip install -U ipywidgets



In [4]:
!jupyter nbextension enable --py widgetsnbextension

usage: jupyter [-h] [--version] [--config-dir] [--data-dir] [--runtime-dir]
               [--paths] [--json] [--debug]
               [subcommand]

Jupyter: Interactive Computing

positional arguments:
  subcommand     the subcommand to launch

options:
  -h, --help     show this help message and exit
  --version      show the versions of core jupyter packages and exit
  --config-dir   show Jupyter config dir
  --data-dir     show Jupyter data dir
  --runtime-dir  show Jupyter runtime dir
  --paths        show all Jupyter paths. Add --json for machine-readable
                 format.
  --json         output paths as machine-readable json
  --debug        output debug information about paths

Available subcommands: kernel kernelspec migrate run troubleshoot

Jupyter command `jupyter-nbextension` not found.


In [5]:
# robust_llava_loader.py
# Loads a LLaVA-style model even when AutoModelForCausalLM doesn't recognize LlavaConfig.
# Requires: transformers, torch, datasets, peft, trl, etc.
# Make sure to run: pip install -U "transformers>=4.31.0" bitsandbytes peft trl datasets safetensors
# if you want 4-bit quantization support (and bitsandbytes installed).

import importlib
import sys
import traceback
import random
import torch
import transformers
from transformers import AutoConfig, AutoTokenizer, AutoModelForCausalLM
from transformers import BitsAndBytesConfig  # if bitsandbytes present; import may fail if not installed
from datasets import load_dataset

# CONFIG
BASE_MODEL_NAME = "llava-hf/llava-1.5-7b-hf"
USE_4BIT = False  # set True only if bitsandbytes is installed & you want 4-bit quant
DEVICE_MAP = "auto"
LOW_CPU_MEM = True

def print_versions():
    print("torch:", torch.__version__)
    print("transformers:", transformers.__version__)
    try:
        import bitsandbytes as bnb
        print("bitsandbytes:", bnb.__version__)
    except Exception:
        print("bitsandbytes: NOT INSTALLED")

def try_load_llava_class_and_model(model_name, quant_config=None, device_map="auto", low_cpu_mem=True):
    candidate_module_paths = [
        "transformers.models.llava.modeling_llava",
        "transformers.models.llava.modeling_llava_for_causal_lm",
        "llava.modeling_llava",
        "modeling_llava",
    ]
    candidate_class_names = [
        "LlavaForCausalLM",
        "LlavaModelForCausalLM",
        "LlavaForConditionalGeneration",
        "LlavaModel",
        "LlavaForVision2Seq",
    ]

    last_exc = None
    for mod_path in candidate_module_paths:
        try:
            module = importlib.import_module(mod_path)
        except Exception as e:
            last_exc = e
            continue

        for cls_name in candidate_class_names:
            ModelClass = getattr(module, cls_name, None)
            if ModelClass is None:
                continue

            # Try strategy sequence:
            # 1) If quant_config provided -> try direct (fast path)
            # 2) If ValueError complaining about dispatch -> retry with llm_int8_enable_fp32_cpu_offload + device_map="auto"
            # 3) If still failing -> fallback to no-quant (float16)
            try:
                print(f"Trying to load {cls_name} with device_map={device_map} (quant_config={'yes' if quant_config else 'no'})...")
                return _attempt_from_pretrained(ModelClass, model_name, quant_config, device_map, low_cpu_mem, extra_kwargs={})
            except Exception as e:
                last_exc = e
                tb = traceback.format_exc()
                print(f"Initial attempt with {cls_name} failed: {e}\n{tb}")

                # If message suggests offload, try offload route (only if quant_config not None)
                msg = str(e).lower()
                if quant_config is not None and ("offload" in msg or "dispatched on the cpu" in msg or "some modules are dispatched" in msg):
                    try:
                        print("Retrying with llm_int8_enable_fp32_cpu_offload=True and device_map='auto'...")
                        return ModelClass.from_pretrained(
                            model_name,
                            quantization_config=quant_config,
                            device_map="auto",
                            trust_remote_code=True,
                            low_cpu_mem_usage=low_cpu_mem,
                            llm_int8_enable_fp32_cpu_offload=True,
                        )
                    except Exception as e2:
                        last_exc = e2
                        tb2 = traceback.format_exc()
                        print(f"Retry with offload failed: {e2}\n{tb2}")

                # Final fallback: try without quantization (float16)
                try:
                    print("Retrying without quantization (float16) as a fallback...")
                    return ModelClass.from_pretrained(
                        model_name,
                        device_map=device_map,
                        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
                        trust_remote_code=True,
                        low_cpu_mem_usage=low_cpu_mem,
                    )
                except Exception as e3:
                    last_exc = e3
                    tb3 = traceback.format_exc()
                    print(f"Fallback without quantization also failed: {e3}\n{tb3}")
                    continue

    raise RuntimeError("Tried candidate Llava classes but all failed. Last exception:\n" + (str(last_exc) if last_exc is not None else "None"))

def _attempt_from_pretrained(ModelClass, model_name, quant_config, device_map, low_cpu_mem, extra_kwargs):
    """Helper to call from_pretrained with given kwargs and bubble exceptions."""
    try:
        if quant_config is not None:
            return ModelClass.from_pretrained(
                model_name,
                quantization_config=quant_config,
                device_map=device_map,
                trust_remote_code=True,
                low_cpu_mem_usage=low_cpu_mem,
                **extra_kwargs,
            )
        else:
            return ModelClass.from_pretrained(
                model_name,
                device_map=device_map,
                torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
                trust_remote_code=True,
                low_cpu_mem_usage=low_cpu_mem,
                **extra_kwargs,
            )
    except Exception as e:
        raise

def load_model_and_tokenizer(model_name, use_4bit=True, device_map="auto", low_cpu_mem=True):
    # Load config + tokenizer
    config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)
    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
    tokenizer.padding_side = "right"

    quant_config = None
    if use_4bit:
        try:
            quant_config = BitsAndBytesConfig(
                load_in_4bit=True,
                bnb_4bit_quant_type="nf4",
                bnb_4bit_compute_dtype=torch.float16,
                bnb_4bit_use_double_quant=True,
            )
        except Exception as e:
            print("Could not create BitsAndBytesConfig:", e)
            quant_config = None

    # If LlavaConfig detected: try repo model classes (with fallback paths)
    cfg_name = config.__class__.__name__.lower()
    if "llava" in cfg_name:
        model = try_load_llava_class_and_model(model_name, quant_config=quant_config, device_map=device_map, low_cpu_mem=low_cpu_mem)
    else:
        # generic fallback to AutoModelForCausalLM
        if quant_config is not None:
            try:
                model = AutoModelForCausalLM.from_pretrained(
                    model_name,
                    quantization_config=quant_config,
                    device_map=device_map,
                    trust_remote_code=True,
                    low_cpu_mem_usage=low_cpu_mem,
                    llm_int8_enable_fp32_cpu_offload=True,  # safe to include
                )
            except Exception as e:
                print("AutoModelForCausalLM with quant failed, retrying without quant:", e)
                model = AutoModelForCausalLM.from_pretrained(
                    model_name,
                    device_map=device_map,
                    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
                    trust_remote_code=True,
                    low_cpu_mem_usage=low_cpu_mem,
                )
        else:
            model = AutoModelForCausalLM.from_pretrained(
                model_name,
                device_map=device_map,
                torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
                trust_remote_code=True,
                low_cpu_mem_usage=low_cpu_mem,
            )

    try:
        model.gradient_checkpointing_enable()
    except Exception:
        pass
    model.config.use_cache = False
    return model, tokenizer

# Now, when you run main(), it will automatically retry with llm_int8_enable_fp32_cpu_offload if needed.

In [6]:
import os
import sys

# Check if running in Colab
try:
    import google.colab
    IN_COLAB = True
except ImportError:
    IN_COLAB = False

if IN_COLAB:
    from google.colab import drive
    drive.mount('/content/drive')
    print("Running in Colab. You may use Colab resources.")
    # Optionally: get API key from user input or environment
    # api_key = input("Enter your API key: ")
else:
    print("Not running in Colab. Using local environment.")
    # Optionally: get API key from environment variable
    # api_key = os.getenv("YOUR_API_KEY_ENV_VAR")

Not running in Colab. Using local environment.


In [None]:
"""
finetune_after_script_b.py

Uses robust_llava_loader.load_model_and_tokenizer() (script B output) to load the model,
auto-detects good LoRA target_modules, and runs PEFT (LoRA) fine-tuning with trl.SFTTrainer.

Usage: python finetune_after_script_b.py
"""

import os
import re
import torch
from datasets import load_dataset
from transformers import TrainingArguments
from peft import LoraConfig, get_peft_model, PeftModel

# try to import helper for preparing k-bit training (peft versions vary)
try:
    from peft import prepare_model_for_kbit_training
except Exception:
    try:
        from peft.utils import prepare_model_for_kbit_training
    except Exception:
        prepare_model_for_kbit_training = None

# Add the src directory to the Python path so we can import robust_llava_loader
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), 'src')))
from robust_llava_loader import load_model_and_tokenizer

# --------------------- USER CONFIG ---------------------
BASE_MODEL_NAME = "llava-hf/llava-1.5-7b-hf"
DATASET_PATH = "meld_with_rationales.jsonl"   # jsonl containing utterance, sentiment, rationale
OUTPUT_DIR = "./llava-peft-adapters-auto"
USE_4BIT_IF_AVAILABLE = True
MAX_SEQ_LENGTH = 512
PER_DEVICE_BATCH_SIZE = 4
NUM_EPOCHS = 1
LEARNING_RATE = 2e-4
GRADIENT_ACCUMULATION_STEPS = 1
SAVE_STEPS = 200
LOGGING_STEPS = 20
# -------------------------------------------------------

# helper: create the training prompt
def build_prompt(example):
    return (
        "You are a sentiment analysis expert. Analyze the following utterance and provide "
        "the sentiment along with a step-by-step rationale for your decision.\n\n"
        "### Utterance:\n"
        f"{example.get('utterance','')}\n\n"
        "### Analysis:\n"
        f"Sentiment: {example.get('sentiment','')}\n"
        f"Rationale: {example.get('rationale','')}"
    )

# helper: scan model.named_modules() and choose candidate target module name substrings
def auto_detect_target_module_names(model, prefer_text=True):
    """
    Returns a list of module-name substrings to use in LoraConfig.target_modules.
    Strategy:
      - Collect names of submodules that look like projections (q_proj, k_proj, v_proj, out_proj, o_proj)
      - Prefer modules under 'model' that contain tokens like 'self_attn', 'attn', 'q_proj' etc.
      - If prefer_text=True, try to exclude modules under vision tower (module name containing 'vision' or 'vision_tower')
    """
    proj_patterns = set()
    name_list = [n for n, _ in model.named_modules()]

    for n in name_list:
        # skip top-level empty name
        if not n:
            continue
        # skip vision modules if preferring text modules
        if prefer_text and ("vision" in n or "vision_tower" in n or "vision_model" in n):
            continue
        # find typical projection/fc names in module path
        if re.search(r"(q_proj|k_proj|v_proj|o_proj|out_proj|gate_proj|up_proj|down_proj|fc1|fc2|mlp)", n):
            # extract final token (last part after '.')
            final = n.split(".")[-1]
            proj_patterns.add(final)
    # fallback if empty
    if not proj_patterns:
        # default common names
        proj_patterns = {"q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"}
    # keep consistent ordering and return as list
    return sorted(list(proj_patterns))

def format_and_map(example, tokenizer):
    text = build_prompt(example)
    eos = tokenizer.eos_token or ""
    return {"text": text + eos}

def main():
    # decide 4-bit usage
    use_4bit = False
    if USE_4BIT_IF_AVAILABLE:
        try:
            import bitsandbytes  # noqa: F401
            use_4bit = True
        except Exception:
            print("bitsandbytes not installed/found — running without 4-bit.")

    # 1) Load model + tokenizer (robust loader)
    print("Loading model + tokenizer (robust loader)...")
    model, tokenizer = load_model_and_tokenizer(model_name=BASE_MODEL_NAME, use_4bit=use_4bit)
    print("Loaded model and tokenizer. Model dtype hint:", getattr(model, "dtype", None))
    model.config.use_cache = False
    try:
        model.gradient_checkpointing_enable()
    except Exception:
        pass

    # 2) Auto-detect target_modules for LoRA (based on model module names)
    print("Auto-detecting candidate LoRA target module name tokens from model.named_modules()...")
    detected = auto_detect_target_module_names(model, prefer_text=True)
    print("Detected target-module name tokens (candidates):", detected)

    # We'll use these tokens as LoraConfig.target_modules (PEFT expects substrings)
    target_modules = detected

    # 3) Prepare model for k-bit training (if using 4-bit & helper present)
    if use_4bit:
        if prepare_model_for_kbit_training is not None:
            print("Preparing model for k-bit training (peft.prepare_model_for_kbit_training)...")
            model = prepare_model_for_kbit_training(model)
        else:
            print("prepare_model_for_kbit_training not available in this peft version — continuing.")

    # 4) Create LoraConfig and wrap model
    lora_cfg = LoraConfig(
        r=64,
        lora_alpha=16,
        lora_dropout=0.1,
        bias="none",
        task_type="CAUSAL_LM",
        target_modules=target_modules,
    )
    print("Applying LoRA with LoraConfig:", lora_cfg)
    model = get_peft_model(model, lora_cfg)
    print("PEFT/LoRA applied. Peft model keys:", list(model.named_parameters())[:5])

    # 5) Load and format dataset
    print("Loading dataset from", DATASET_PATH)
    ds = load_dataset("json", data_files=DATASET_PATH, split="train")
    print("Dataset size:", len(ds))
    # map to `text` field expected by SFTTrainer
    ds = ds.map(lambda ex: format_and_map(ex, tokenizer), remove_columns=ds.column_names)

    # 6) TrainingArguments + trainer
    training_args = TrainingArguments(
        output_dir=OUTPUT_DIR,
        num_train_epochs=NUM_EPOCHS,
        per_device_train_batch_size=PER_DEVICE_BATCH_SIZE,
        gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
        learning_rate=LEARNING_RATE,
        fp16=use_4bit or (torch.cuda.is_available() and torch.cuda.get_device_properties(0).total_memory >= 12 * 1024 ** 2),
        save_steps=SAVE_STEPS,
        logging_steps=LOGGING_STEPS,
        save_total_limit=3,
        report_to="none",
    )

    # import here to avoid top-level dependency until needed
    from trl import SFTTrainer

    trainer = SFTTrainer(
        model=model,
        train_dataset=ds,
        peft_config=lora_cfg,
        dataset_text_field="text",
        max_seq_length=MAX_SEQ_LENGTH,
        tokenizer=tokenizer,
        args=training_args,
        packing=False,
    )

    # 7) Dry-run: single step to validate forward/backward
    print("Running a 1-step dry-run to validate training loop...")
    try:
        trainer.train(max_steps=1)
        print("Dry-run succeeded.")
    except Exception as e:
        print("Dry-run failed — inspect traceback. Error:", e)
        raise

    # 8) Full training
    print("Starting full training...")
    trainer.train()
    print("Training finished.")

    # 9) Save PEFT adapters
    print("Saving adapters to:", OUTPUT_DIR)
    trainer.save_model(OUTPUT_DIR)
    print("Saved. You can load later with PeftModel.from_pretrained(base_model, OUTPUT_DIR)")

if __name__ == "__main__":
    main()


Loading model + tokenizer (robust loader)...
Trying to load LlavaForConditionalGeneration with device_map=auto (quant_config=yes)...


Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.18G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/4.96G [00:00<?, ?B/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.99G [00:00<?, ?B/s]

In [None]:
# (This cell was a duplicate of robust_llava_loader.py logic and is now removed for clarity)

In [None]:
# (Empty cell placeholder removed for clarity)