# FunctionGemma CLI Tool Recommender

Fine-tune FunctionGemma (270M) to recommend CLI tools based on:
- User queries
- Operating system (macOS, Linux, Ubuntu, Windows, etc.)
- Shell type (bash, zsh, fish, etc.)
- User preferences

**Based on official Unsloth FunctionGemma notebook with:**
- `tokenizer.apply_chat_template()` for proper formatting
- `train_on_responses_only()` for better training
- Higher LoRA rank (r=128) for quality
- HF-style tool schemas

**Requirements:** Free Colab T4 GPU

## 1. Install Dependencies

In [None]:
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9]{1,}\.[0-9]{1,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.33.post1" if v=="2.9" else "0.0.32.post2" if v=="2.8" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets==4.3.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.57.3
!pip install --no-deps trl==0.22.2

## 2. Load Model and Tokenizer

In [None]:
from unsloth import FastLanguageModel
import torch

max_seq_length = 4096

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/functiongemma-270m-it",
    max_seq_length=max_seq_length,
    load_in_4bit=False,
    load_in_8bit=False,
    load_in_16bit=True,
    full_finetuning=False,
)

print(f"Model loaded: {model.config._name_or_path}")
print(f"Parameters: {model.num_parameters():,}")

## 3. Add LoRA Adapters (Higher Rank for Quality)

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r=128,  # Higher rank for better quality
    lora_alpha=256,  # 2x rank
    lora_dropout=0,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
                    "gate_proj", "up_proj", "down_proj"],
    bias="none",
    use_gradient_checkpointing="unsloth",  # 30% less VRAM
    random_state=3407,
    use_rslora=False,
    loftq_config=None,
)

print("LoRA adapters applied (r=128, alpha=256)")

## 4. Define Tool Schema (HF-style)

This is the proper way to define tools for FunctionGemma.

In [None]:
# CLI Tool Recommendation function schema (HF-style)
CLI_TOOL_SCHEMA = {
    "type": "function",
    "function": {
        "name": "recommend_tools",
        "description": "Recommend CLI tools for a task based on OS, shell, and preferences.",
        "parameters": {
            "type": "object",
            "properties": {
                "primary_tools": {
                    "type": "array",
                    "description": "Tools most likely installed by default",
                    "items": {
                        "type": "object",
                        "properties": {
                            "name": {"type": "string"},
                            "category": {"type": "string"},
                            "confidence": {"type": "number"},
                            "reason": {"type": "string"},
                        },
                        "required": ["name", "category", "confidence", "reason"]
                    }
                },
                "alternative_tools": {
                    "type": "array",
                    "description": "Modern alternatives that may need installation",
                    "items": {
                        "type": "object",
                        "properties": {
                            "name": {"type": "string"},
                            "install_cmd": {"type": "string"},
                            "reason": {"type": "string"},
                            "improvements": {"type": "array", "items": {"type": "string"}},
                        },
                        "required": ["name", "install_cmd", "reason"]
                    }
                },
                "task_category": {"type": "string"}
            },
            "required": ["primary_tools", "task_category"]
        }
    }
}

tools = [CLI_TOOL_SCHEMA]
print("Tool schema defined")

## 5. Prepare Training Data

Using `tokenizer.apply_chat_template()` with tools parameter - the proper way to format FunctionGemma prompts.

In [None]:
import json

# Training examples with thinking + tool calls
training_examples = [
    {
        "messages": [
            {"role": "system", "content": "You are a CLI tool recommendation assistant."},
            {"role": "user", "content": "OS: darwin, Shell: zsh\nQuery: find all python files"},
            {
                "role": "assistant",
                "content": "<think>User wants to find Python files. On macOS: find is installed by default (BSD). fd is a modern alternative.</think>",
                "tool_calls": [{
                    "id": "call_1",
                    "type": "function",
                    "function": {
                        "name": "recommend_tools",
                        "arguments": {
                            "primary_tools": [{"name": "find", "category": "file_management", "confidence": 1.0, "reason": "Standard POSIX tool"}],
                            "alternative_tools": [{"name": "fd", "install_cmd": "brew install fd", "reason": "Modern find alternative", "improvements": ["faster", "simpler"]}],
                            "task_category": "file_management"
                        }
                    }
                }]
            }
        ]
    },
    {
        "messages": [
            {"role": "system", "content": "You are a CLI tool recommendation assistant."},
            {"role": "user", "content": "OS: ubuntu, Shell: bash\nQuery: search for error in log files"},
            {
                "role": "assistant",
                "content": "<think>User wants text search. On Ubuntu: grep is installed by default. ripgrep is faster but needs install.</think>",
                "tool_calls": [{
                    "id": "call_1",
                    "type": "function",
                    "function": {
                        "name": "recommend_tools",
                        "arguments": {
                            "primary_tools": [{"name": "grep", "category": "search", "confidence": 1.0, "reason": "Standard GNU grep"}],
                            "alternative_tools": [{"name": "rg", "install_cmd": "apt install ripgrep", "reason": "Fastest grep", "improvements": ["faster", "gitignore"]}],
                            "task_category": "search"
                        }
                    }
                }]
            }
        ]
    },
    {
        "messages": [
            {"role": "system", "content": "You are a CLI tool recommendation assistant."},
            {"role": "user", "content": "OS: darwin, Shell: zsh\nQuery: parse JSON API response"},
            {
                "role": "assistant",
                "content": "<think>User needs JSON parsing. curl is on macOS by default. jq is the standard for JSON but needs install.</think>",
                "tool_calls": [{
                    "id": "call_1",
                    "type": "function",
                    "function": {
                        "name": "recommend_tools",
                        "arguments": {
                            "primary_tools": [{"name": "curl", "category": "network", "confidence": 1.0, "reason": "Built-in HTTP client"}],
                            "alternative_tools": [{"name": "jq", "install_cmd": "brew install jq", "reason": "JSON processor", "improvements": ["query syntax", "filtering"]}],
                            "task_category": "json_processing"
                        }
                    }
                }]
            }
        ]
    },
    {
        "messages": [
            {"role": "system", "content": "You are a CLI tool recommendation assistant."},
            {"role": "user", "content": "OS: linux, Shell: bash\nQuery: check disk usage"},
            {
                "role": "assistant",
                "content": "<think>User wants disk usage info. du is standard POSIX. ncdu is interactive alternative.</think>",
                "tool_calls": [{
                    "id": "call_1",
                    "type": "function",
                    "function": {
                        "name": "recommend_tools",
                        "arguments": {
                            "primary_tools": [{"name": "du", "category": "disk", "confidence": 1.0, "reason": "Standard POSIX tool"}],
                            "alternative_tools": [{"name": "ncdu", "install_cmd": "apt install ncdu", "reason": "Interactive disk analyzer", "improvements": ["interactive", "visual"]}],
                            "task_category": "disk"
                        }
                    }
                }]
            }
        ]
    },
    {
        "messages": [
            {"role": "system", "content": "You are a CLI tool recommendation assistant."},
            {"role": "user", "content": "OS: darwin, Shell: fish\nQuery: monitor processes"},
            {
                "role": "assistant",
                "content": "<think>Process monitoring on macOS. top is always available. htop has better UI but needs brew.</think>",
                "tool_calls": [{
                    "id": "call_1",
                    "type": "function",
                    "function": {
                        "name": "recommend_tools",
                        "arguments": {
                            "primary_tools": [{"name": "top", "category": "process", "confidence": 1.0, "reason": "Built-in process monitor"}],
                            "alternative_tools": [{"name": "htop", "install_cmd": "brew install htop", "reason": "Interactive viewer", "improvements": ["colors", "mouse", "tree"]}],
                            "task_category": "process"
                        }
                    }
                }]
            }
        ]
    }
]

# Format using apply_chat_template with tools
def format_example(example):
    chat_str = tokenizer.apply_chat_template(
        example["messages"],
        tools=tools,
        add_generation_prompt=False,
        tokenize=False,
    )
    # Remove BOS if present
    if chat_str.startswith("<bos>"):
        chat_str = chat_str[5:]
    return chat_str

formatted_data = [format_example(ex) for ex in training_examples]

print(f"Prepared {len(formatted_data)} training examples")
print("\nSample formatted example:")
print("=" * 50)
print(formatted_data[0][:800] + "...")

## 6. Create Dataset

In [None]:
from datasets import Dataset

dataset = Dataset.from_dict({"text": formatted_data})
print(f"Dataset size: {len(dataset)}")

## 7. Configure Trainer with train_on_responses_only

This masks user inputs and only trains on assistant outputs - significantly improves quality.

In [None]:
from trl import SFTTrainer, SFTConfig
from unsloth.chat_templates import train_on_responses_only

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    eval_dataset=None,
    args=SFTConfig(
        dataset_text_field="text",
        per_device_train_batch_size=4,
        gradient_accumulation_steps=2,
        warmup_steps=10,
        max_steps=500,  # Set to None for full epochs
        learning_rate=2e-4,
        logging_steps=1,
        optim="adamw_8bit",
        weight_decay=0.001,
        lr_scheduler_type="linear",
        seed=3407,
        output_dir="outputs",
        report_to="none",
    ),
)

# IMPORTANT: Only train on assistant responses
trainer = train_on_responses_only(
    trainer,
    instruction_part="<start_of_turn>user\n",
    response_part="<start_of_turn>model\n",
)

print("Trainer configured with train_on_responses_only")

## 8. Show Memory Stats

In [None]:
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

## 9. Train

In [None]:
print("Starting training...")
trainer_stats = trainer.train()
print("Training complete!")

# Show stats
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
print(f"\nTraining time: {trainer_stats.metrics['train_runtime']:.1f}s")
print(f"Peak memory: {used_memory} GB ({round(used_memory/max_memory*100, 1)}%)")

## 10. Test Inference

In [None]:
# Enable inference mode
FastLanguageModel.for_inference(model)

def get_recommendation(query, os_type="darwin", shell="zsh"):
    """Get CLI tool recommendations using apply_chat_template."""
    messages = [
        {"role": "system", "content": "You are a CLI tool recommendation assistant."},
        {"role": "user", "content": f"OS: {os_type}, Shell: {shell}\nQuery: {query}"},
    ]

    prompt = tokenizer.apply_chat_template(
        messages,
        tools=tools,
        add_generation_prompt=True,
        tokenize=False,
    )

    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

    outputs = model.generate(
        **inputs,
        max_new_tokens=512,
        temperature=1.0,
        top_k=64,
        top_p=0.95,
        do_sample=True,
    )

    response = tokenizer.decode(outputs[0], skip_special_tokens=False)
    model_response = response.split("<start_of_turn>model\n")[-1]
    if "<end_of_turn>" in model_response:
        model_response = model_response.split("<end_of_turn>")[0]

    return model_response

# Test queries
test_queries = [
    ("find all javascript files", "darwin", "zsh"),
    ("compress a folder", "ubuntu", "bash"),
    ("view running processes", "linux", "fish"),
]

for query, os_type, shell in test_queries:
    print(f"\n{'='*50}")
    print(f"Query: {query} (OS: {os_type}, Shell: {shell})")
    print("="*50)
    result = get_recommendation(query, os_type, shell)
    print(result)

## 11. Save Model

In [None]:
# Save LoRA adapters
model.save_pretrained("cli-tool-recommender-lora")
tokenizer.save_pretrained("cli-tool-recommender-lora")
print("Model saved to cli-tool-recommender-lora/")

# Optional: Save merged 16-bit model
# model.save_pretrained_merged("cli-tool-recommender-merged", tokenizer, save_method="merged_16bit")

# Optional: Save as GGUF for llama.cpp (only Q8_0, BF16, F16 supported)
# model.save_pretrained_gguf("cli-tool-recommender-gguf", tokenizer, quantization_method="Q8_0")

## 12. Download Model

In [None]:
!zip -r cli-tool-recommender-lora.zip cli-tool-recommender-lora/

from google.colab import files
files.download('cli-tool-recommender-lora.zip')