usemoss · ashvathsureshkumar · Apr 8, 2026 · Apr 3, 2026 · Apr 3, 2026 · Apr 8, 2026
@@ -0,0 +1,92 @@
+# VAPI + Moss: Custom Tool Webhook Server
+
+A webhook server that connects [VAPI](https://vapi.ai/) voice agents to [Moss](https://www.moss.dev/) semantic search via a Custom Tool. The LLM decides when to search and refines the query before sending it, resulting in better retrieval quality.
+
+## Architecture
+
+```
+User speaks → VAPI STT → LLM refines query → tool-calls request → This server → Moss query (sub-10ms) → Results returned → LLM synthesizes answer → TTS
+```
+
+## Prerequisites
+
+- [uv](https://docs.astral.sh/uv/getting-started/installation/)
+- [ngrok](https://ngrok.com/) (for exposing localhost to VAPI)
+- API keys:
+  - [Moss](https://portal.usemoss.dev) — semantic retrieval
+  - [VAPI](https://vapi.ai/) — voice agent platform
+
+## Quick Start
+
+1. **Configure environment:**
+
+   ```bash
+   cp env.example .env
+   # Edit .env and fill in your Moss credentials
+   ```
+
+2. **Start the server:**
+
+   ```bash
+   uv run uvicorn server:app --port 3001
+   ```
+
+4. **Expose with ngrok** (separate terminal):
+
+   ```bash
+   ngrok http 3001
+   ```
+
+5. **Create a VAPI assistant with the Moss tool:**
+
+   ```bash
+   curl -X POST https://api.vapi.ai/assistant \
+     -H "Authorization: Bearer $VAPI_API_KEY" \
+     -H "Content-Type: application/json" \
+     -d '{
+       "name": "Moss Support Agent",
+       "model": {
+         "provider": "openai",
+         "model": "gpt-4o",
+         "messages": [
+           {
+             "role": "system",
+             "content": "You are a helpful customer support agent. Use the search_knowledge tool to look up answers before responding."
+           }
+         ],
+         "tools": [
+           {
+             "type": "function",
+             "function": {
+               "name": "search_knowledge",
+               "description": "Search the knowledge base for relevant information. Refine the user question into a clear search query.",
+               "parameters": {
+                 "type": "object",
+                 "properties": {
+                   "query": {
+                     "type": "string",
+                     "description": "The search query to find relevant knowledge base articles"
+                   }
+                 },
+                 "required": ["query"]
+               }
+             },
+             "server": {
+               "url": "https://YOUR_NGROK_URL/tool/search"
+             }
+           }
+         ]
+       }
+     }'
+   ```
+
+6. **Test it** — call the assistant via VAPI dashboard or API.
+
+## Configuration
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `MOSS_PROJECT_ID` | — | Moss project ID |
+| `MOSS_PROJECT_KEY` | — | Moss project key |
+| `MOSS_INDEX_NAME` | — | Moss index to query |
+| `VAPI_WEBHOOK_SECRET` | — | Webhook secret for signature verification (leave empty to disable) |
@@ -0,0 +1,8 @@
+# Moss — portal.usemoss.dev
+MOSS_PROJECT_ID=your_moss_project_id
+MOSS_PROJECT_KEY=your_moss_project_key
+MOSS_INDEX_NAME=product-knowledge
+
+# VAPI Webhook Secret — set when creating the Custom Knowledge Base via API
+# Leave empty to disable signature verification (development only)
+VAPI_WEBHOOK_SECRET=your_webhook_secret
@@ -0,0 +1,26 @@
+[project]
+name = "vapi-moss-demo"
+version = "0.1.0"
+description = "VAPI Custom Knowledge Base webhook server with Moss semantic retrieval"
+requires-python = ">=3.10"
+dependencies = [
+    "vapi-moss>=0.0.1",
+    "fastapi>=0.100.0",
+    "uvicorn>=0.20.0",
+    "python-dotenv>=1.0.0",
+    "loguru>=0.7.0",
+]
+
+[dependency-groups]
+dev = [
+    "ruff>=0.1.0",
+]
+
+[tool.uv.sources]
+vapi-moss = { path = "../../packages/vapi-moss" }
+
+[tool.ruff]
+line-length = 100
+
+[tool.ruff.lint]
+select = ["I"]
@@ -0,0 +1,127 @@
+"""VAPI Custom Tool webhook server powered by Moss semantic search.
+
+Preloads a Moss index at startup for sub-10ms retrieval. When the LLM
+decides to search, VAPI sends a tool-calls request with the LLM-refined
+query; this server queries Moss and returns results.
+
+Run::
+
+    uv run uvicorn server:app --port 3001
+"""
+
+import json
+import logging
+import os
+from contextlib import asynccontextmanager
+
+from dotenv import load_dotenv
+from fastapi import FastAPI, Request
+from fastapi.responses import JSONResponse
+
+from vapi_moss import MossVapiSearch, verify_vapi_signature
+
+load_dotenv(override=True)
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="[%(asctime)s] %(levelname)s %(name)s: %(message)s",
+)
+logger = logging.getLogger("vapi_moss_server")
+
+# --- Configuration ---
+
+MOSS_PROJECT_ID = os.getenv("MOSS_PROJECT_ID")
+MOSS_PROJECT_KEY = os.getenv("MOSS_PROJECT_KEY")
+INDEX_NAME = os.getenv("MOSS_INDEX_NAME")
+WEBHOOK_SECRET = os.getenv("VAPI_WEBHOOK_SECRET", "").strip()
+
+moss_search = MossVapiSearch(
+    project_id=MOSS_PROJECT_ID,
+    project_key=MOSS_PROJECT_KEY,
+    index_name=INDEX_NAME,
+)
+
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """Preload Moss index at startup. Fail closed if it can't load."""
+    await moss_search.load_index()
+    logger.info("Moss index '%s' loaded — server ready", INDEX_NAME)
+    yield
+
+
+app = FastAPI(lifespan=lifespan)
+
+
+@app.post("/tool/search")
+async def tool_search(request: Request):
+    """Handle VAPI Custom Tool requests.
+
+    VAPI sends:
+        {"message": {"type": "tool-calls", "toolCallList": [
+            {"id": "...", "name": "search_knowledge", "parameters": {"query": "..."}}
+        ]}}
+
+    We return:
+        {"results": [{"toolCallId": "...", "result": "..."}]}
+    """
+    raw_body = await request.body()
+
+    # Verify signature if secret is configured
+    if WEBHOOK_SECRET:
+        signature = request.headers.get("x-vapi-signature")
+        if not signature:
+            return JSONResponse({"results": []}, status_code=401)
+        if not verify_vapi_signature(raw_body, signature, WEBHOOK_SECRET):
+            return JSONResponse({"results": []}, status_code=401)
+
+    try:
+        body = json.loads(raw_body)
+    except (json.JSONDecodeError, ValueError):
+        return JSONResponse({"results": []}, status_code=400)
+
+    message = body.get("message", {})
+
+    if message.get("type") != "tool-calls":
+        return JSONResponse({"results": []}, status_code=400)
+
+    # Process each tool call
+    results = []
+    for tool_call in message.get("toolCallList", []):
+        call_id = tool_call.get("id", "")
+        function = tool_call.get("function", {})
+        params = function.get("arguments", {}) or tool_call.get("parameters", {})
+        if isinstance(params, str):
+            try:
+                params = json.loads(params)
+            except (json.JSONDecodeError, ValueError):
+                params = {}
+        if not isinstance(params, dict):
+            params = {}
+        query = (params.get("query") or "").strip()
+
+        if not query:
+            results.append({"toolCallId": call_id, "result": "No query provided."})
+            continue
+
+        try:
+            search_result = await moss_search.search(query)
+            logger.info(
+                "Query: %r — %d docs in %sms",
+                query,
+                len(search_result.documents),
+                search_result.time_taken_ms,
+            )
+
+            # Format results as text for the LLM
+            lines = []
+            for i, doc in enumerate(search_result.documents, 1):
+                lines.append(f"{i}. {doc['content']}")
+            result_text = "\n".join(lines) if lines else "No results found."
+
+            results.append({"toolCallId": call_id, "result": result_text})
+        except Exception:
+            logger.exception("Moss search failed for query: %r", query)
+            results.append({"toolCallId": call_id, "result": "Search unavailable."})
+
+    return {"results": results}