# CLI Generator

Fetches SDK documentation from a URL and uses an LLM (HuggingFace or Anthropic) to generate a `click`-based CLI tool. Every run is tracked in MLflow.

**Parameters** (papermill-compatible):
| Parameter | Description | Example |
|---|---|---|
| `model_provider` | `"hf"` or `"anthropic"` | `"anthropic"` |
| `model_name` | Model identifier | `"claude-opus-4-6"` |
| `doc_url` | URL of the SDK docs to turn into a CLI | `"https://..."` |

Output: `../repositories/<domain>/<model>/cli.py`  
Tracking: MLflow at `MLFLOW_TRACKING_URI` (default `http://127.0.0.1:5000`)

In [None]:
# Parameters – override these when running with papermill
model_provider = "anthropic"   # "hf" or "anthropic"
model_name     = "claude-opus-4-6"  # HF e.g. "mistralai/Mistral-7B-Instruct-v0.3"
doc_url        = "https://docs.anthropic.com/en/api/getting-started"

In [None]:
%pip install -q python-dotenv requests beautifulsoup4 anthropic huggingface_hub click mlflow

In [None]:
import os
import re
import time
from pathlib import Path
from urllib.parse import urlparse

import mlflow
from dotenv import load_dotenv

load_dotenv(Path("../.env"))

assert model_provider in ("hf", "anthropic"), (
    f"model_provider must be 'hf' or 'anthropic', got {model_provider!r}"
)

print(f"Provider : {model_provider}")
print(f"Model    : {model_name}")
print(f"Doc URL  : {doc_url}")

In [None]:
MLFLOW_TRACKING_URI = os.environ.get("MLFLOW_TRACKING_URI", "http://127.0.0.1:5000")
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
mlflow.set_experiment("cli-generator")

# Derive the same folder names used for the output path so we can tag the run.
parsed_url   = urlparse(doc_url)
repo_name    = parsed_url.netloc.replace(".", "-")
model_folder = model_name.replace("/", "--")

active_run = mlflow.start_run()
mlflow.set_tags({
    "repo": repo_name,
    "model_folder": model_folder,
})
mlflow.log_params({
    "model_provider": model_provider,
    "model_name": model_name,
    "doc_url": doc_url,
})

print(f"MLflow tracking URI : {MLFLOW_TRACKING_URI}")
print(f"Run ID              : {active_run.info.run_id}")

In [None]:
import requests
from bs4 import BeautifulSoup

print(f"Fetching documentation from {doc_url} ...")
response = requests.get(
    doc_url, timeout=30, headers={"User-Agent": "Mozilla/5.0"}
)
response.raise_for_status()

soup = BeautifulSoup(response.text, "html.parser")
for tag in soup(["script", "style", "nav", "footer", "header"]):
    tag.decompose()

doc_text = soup.get_text(separator="\n", strip=True)

MAX_DOC_CHARS = 15_000
if len(doc_text) > MAX_DOC_CHARS:
    doc_text = doc_text[:MAX_DOC_CHARS] + "\n\n[... documentation truncated ...]"

mlflow.log_metric("doc_chars", len(doc_text))
print(f"Fetched {len(doc_text):,} characters of documentation.")

In [None]:
if model_provider == "hf":
    from huggingface_hub import InferenceClient

    hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN")
    if not hf_token:
        raise EnvironmentError("HF_TOKEN not found in environment / .env")

    client = InferenceClient(model_name, token=hf_token)
    print(f"HuggingFace InferenceClient ready: {model_name}")

elif model_provider == "anthropic":
    import anthropic

    api_key = os.environ.get("ANTHROPIC_API_KEY")
    if not api_key:
        raise EnvironmentError("ANTHROPIC_API_KEY not found in environment / .env")

    client = anthropic.Anthropic(api_key=api_key)
    print(f"Anthropic client ready: {model_name}")

In [None]:
prompt = f"""You are an expert Python developer. Your task is to create a complete, production-ready CLI tool based on the SDK documentation below.

## SDK Documentation

{doc_text}

## Instructions

Generate a Python CLI tool (`cli.py`) that:
1. Uses the `click` library for argument/option parsing.
2. Exposes the main features and endpoints of the SDK as CLI commands and subcommands.
3. Reads credentials/tokens from environment variables (never hardcoded).
4. Includes `--help` text for every command and option.
5. Handles errors gracefully with informative messages.
6. Is self-contained and runnable with `python cli.py <command> --help`.

Return ONLY the Python code. No explanation, no markdown fences — raw Python starting with `#!/usr/bin/env python3`."""

mlflow.log_metric("prompt_chars", len(prompt))
print(f"Sending prompt ({len(prompt):,} chars) to model ...")

t0 = time.perf_counter()

if model_provider == "hf":
    result = client.chat.completions.create(
        messages=[{"role": "user", "content": prompt}],
        max_tokens=4096,
    )
    raw_response = result.choices[0].message.content

elif model_provider == "anthropic":
    message = client.messages.create(
        model=model_name,
        max_tokens=4096,
        messages=[{"role": "user", "content": prompt}],
    )
    raw_response = message.content[0].text

latency_s = time.perf_counter() - t0
mlflow.log_metrics({
    "response_chars": len(raw_response),
    "latency_s": round(latency_s, 3),
})
print(f"Received {len(raw_response):,} characters in {latency_s:.1f}s.")

In [None]:
def extract_python_code(text: str) -> str:
    """Strip markdown fences if the model wrapped the code."""
    match = re.search(r"```(?:python)?\n(.*?)```", text, re.DOTALL)
    if match:
        return match.group(1).strip()
    return text.strip()

cli_code = extract_python_code(raw_response)

print("--- Preview (first 500 chars) ---")
print(cli_code[:500])
print("...")

In [None]:
output_dir = Path("..") / "repositories" / repo_name / model_folder
output_dir.mkdir(parents=True, exist_ok=True)

cli_file = output_dir / "cli.py"
cli_file.write_text(cli_code, encoding="utf-8")

mlflow.log_metric("cli_chars", len(cli_code))
mlflow.log_artifact(str(cli_file.resolve()), artifact_path="generated")
mlflow.end_run()

print(f"CLI saved to : {cli_file.resolve()}")
print(f"Run with     : python {cli_file} --help")
print(f"MLflow run   : {MLFLOW_TRACKING_URI}/#/experiments/cli-generator")