# Beam Search Debugging for Unsloth

This notebook helps debug the beam search `_reorder_cache` issue in Google Colab with GPU.

In [None]:
# Clone the fork with our fixes
!git clone https://github.com/amrothemich/unsloth.git
!cd unsloth && git checkout fix-reorder-cache

In [None]:
# Install unsloth from our local fork
!pip install -e unsloth/
!pip install transformers datasets accelerate bitsandbytes

In [None]:
# Test beam search with a minimal example
from unsloth import FastLanguageModel
import torch

# Load a small model for testing
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/tinyllama-bnb-4bit",
    max_seq_length=512,
    load_in_4bit=True,
)

# Get PEFT model
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
    lora_alpha=16,
)

# Test beam search
inputs = tokenizer("Hello, how are", return_tensors="pt").to("cuda")
with torch.no_grad():
    outputs = model.generate(
        **inputs,
        max_new_tokens=20,
        num_beams=2,
        num_return_sequences=2,
    )
    
print("Beam search successful!")
for i, output in enumerate(outputs):
    print(f"Sequence {i}: {tokenizer.decode(output, skip_special_tokens=True)}")

In [None]:
# If it fails, add debug code
# Check what module the model thinks it is
print(f"Model class: {model.__class__}")
print(f"Model module: {model.__class__.__module__}")
print(f"Has _reorder_cache: {hasattr(model, '_reorder_cache')}")
if hasattr(model, 'base_model'):
    print(f"Base model class: {model.base_model.__class__}")
    print(f"Base model has _reorder_cache: {hasattr(model.base_model, '_reorder_cache')}")