workglow-dev · sroussey · May 10, 2026 · May 10, 2026 · May 10, 2026 · May 10, 2026
diff --git a/packages/ai/src/task/ChunkRetrievalTask.ts b/packages/ai/src/task/ChunkRetrievalTask.ts
@@ -48,7 +48,9 @@ const inputSchema = {
       enum: ["similarity", "hybrid"],
       title: "Retrieval Method",
       description:
-        "Retrieval strategy: 'similarity' (vector only) or 'hybrid' (vector + full-text).",
+        "Retrieval strategy: 'similarity' (vector only, scores are cosine similarity in [0,1]) " +
+        "or 'hybrid' (vector + full-text fused via Reciprocal Rank Fusion; scores are RRF " +
+        "fusion scores, NOT comparable to cosine similarity).",
       default: "similarity",
     },
     topK: {
@@ -66,7 +68,10 @@ const inputSchema = {
     scoreThreshold: {
       type: "number",
       title: "Score Threshold",
-      description: "Minimum similarity score threshold (0-1)",
+      description:
+        "Minimum cosine similarity score threshold (0-1). Applies only to method='similarity'; " +
+        "ignored for method='hybrid' because RRF fusion scores are not comparable to cosine " +
+        "similarity. Use topK to size hybrid results instead.",
       minimum: 0,
       maximum: 1,
       default: 0,
@@ -129,7 +134,19 @@ const outputSchema = {
       type: "array",
       items: { type: "number" },
       title: "Scores",
-      description: "Similarity scores for each result",
+      description:
+        "Per-result scores. For method='similarity', these are cosine similarity scores in " +
+        "[0,1]. For method='hybrid', these are Reciprocal Rank Fusion scores — small positive " +
+        "numbers (typically <0.05) that rank results but do not correspond to a similarity.",
+    },
+    scoreType: {
+      type: "string",
+      enum: ["cosine", "bm25", "rrf"],
+      title: "Score Type",
+      description:
+        "Discriminator naming the scorer used for `scores`: 'cosine' for similarity search " +
+        "and for hybrid fallback when the text query is empty/whitespace; 'rrf' for hybrid " +
+        "fusion. ('bm25' is reserved for direct text search and is not produced by this task.)",
     },
     vectors: {
       type: "array",
@@ -157,7 +174,7 @@ const outputSchema = {
       description: "The query used for retrieval (pass-through)",
     },
   },
-  required: ["chunks", "chunk_ids", "metadata", "scores", "count", "query"],
+  required: ["chunks", "chunk_ids", "metadata", "scores", "scoreType", "count", "query"],
   additionalProperties: false,
 } as const satisfies DataPortSchema;
 
@@ -216,7 +233,9 @@ export class ChunkRetrievalTask extends Task<
     }
     if (method === "hybrid" && !kb.supportsHybridSearch()) {
       throw new Error(
-        "The provided knowledge base does not support hybrid search. Use method: 'similarity' or a backend with hybrid support (e.g., Postgres with pgvector)."
+        "Hybrid retrieval requires a text index installed on the knowledge base. " +
+          "Install one via `kb.installTextIndex(new BM25Index())` or pass " +
+          "`textIndex` to `createKnowledgeBase`. Otherwise use method: 'similarity'."
       );
     }
 
@@ -250,7 +269,6 @@ export class ChunkRetrievalTask extends Task<
             textQuery: queryText!,
             topK,
             filter,
-            scoreThreshold,
             vectorWeight,
           })
         : await kb.similaritySearch(searchVector, {
@@ -264,11 +282,19 @@ export class ChunkRetrievalTask extends Task<
       return meta.text || JSON.stringify(meta);
     });
 
+    // The KB tags every result with the same scoreType; the empty-textQuery
+    // fallback inside hybridSearch can flip this from "rrf" to "cosine", which
+    // is exactly the signal we want to surface to callers.
+    const scoreType =
+      results.length > 0 ? (results[0].scoreType ?? (method === "hybrid" ? "rrf" : "cosine"))
+                         : method === "hybrid" ? "rrf" : "cosine";
+
     const output: ChunkRetrievalTaskOutput = {
       chunks,
       chunk_ids: results.map((r) => r.chunk_id),
       metadata: results.map((r) => r.metadata),
       scores: results.map((r) => r.score),
+      scoreType,
       count: results.length,
       query,
     };

diff --git a/packages/indexeddb/src/storage/IndexedDbVectorStorage.ts b/packages/indexeddb/src/storage/IndexedDbVectorStorage.ts
@@ -18,7 +18,6 @@ import { getMetadataProperty, getVectorProperty } from "@workglow/storage";
 import type {
   ClientProvidedKeysOption,
   AnyVectorStorage,
-  HybridSearchOptions,
   IVectorStorage,
   VectorSearchOptions,
 } from "@workglow/storage";
@@ -40,25 +39,6 @@ function matchesFilter<Metadata>(metadata: Metadata, filter: Partial<Metadata>):
   return true;
 }
 
-/**
- * Simple full-text search scoring (keyword matching)
- */
-function textRelevance(text: string, query: string): number {
-  const textLower = text.toLowerCase();
-  const queryLower = query.toLowerCase();
-  const queryWords = queryLower.split(/\s+/).filter((w) => w.length > 0);
-  if (queryWords.length === 0) {
-    return 0;
-  }
-  let matches = 0;
-  for (const word of queryWords) {
-    if (textLower.includes(word)) {
-      matches++;
-    }
-  }
-  return matches / queryWords.length;
-}
-
 /**
  * IndexedDB vector storage implementation.
  * Extends IndexedDbTabularStorage for storage.
@@ -168,55 +148,4 @@ export class IndexedDbVectorStorage<
 
     return topResults;
   }
-
-  async hybridSearch(query: TypedArray, options: HybridSearchOptions<Record<string, unknown>>) {
-    const { topK = 10, filter, scoreThreshold = 0, textQuery, vectorWeight = 0.7 } = options;
-
-    if (!textQuery || textQuery.trim().length === 0) {
-      // Fall back to regular vector search if no text query
-      return this.similaritySearch(query, { topK, filter, scoreThreshold });
-    }
-
-    const results: Array<Entity & { score: number }> = [];
-    const allEntities = (await this.getAll()) || [];
-
-    for (const entity of allEntities) {
-      // IndexedDB stores TypedArrays natively via structured clone (no deserialization needed)
-      const vector = entity[this.vectorPropertyName] as TypedArray;
-      const metadata = this.metadataPropertyName
-        ? (entity[this.metadataPropertyName] as Metadata)
-        : ({} as Metadata);
-
-      // Apply filter if provided
-      if (filter && !matchesFilter(metadata, filter)) {
-        continue;
-      }
-
-      // Calculate vector similarity
-      const vectorScore = cosineSimilarity(query, vector);
-
-      // Calculate text relevance (simple keyword matching)
-      const metadataText = Object.values(metadata).join(" ").toLowerCase();
-      const textScore = textRelevance(metadataText, textQuery);
-
-      // Combine scores
-      const combinedScore = vectorWeight * vectorScore + (1 - vectorWeight) * textScore;
-
-      // Apply threshold
-      if (combinedScore < scoreThreshold) {
-        continue;
-      }
-
-      results.push({
-        ...entity,
-        score: combinedScore,
-      } as Entity & { score: number });
-    }
-
-    // Sort by combined score descending and take top K
-    results.sort((a, b) => b.score - a.score);
-    const topResults = results.slice(0, topK);
-
-    return topResults;
-  }
 }
diff --git a/packages/knowledge-base/src/chunk/ChunkVectorStorageSchema.ts b/packages/knowledge-base/src/chunk/ChunkVectorStorageSchema.ts
@@ -60,7 +60,26 @@ export type ChunkVectorStorage = IVectorStorage<
   ChunkVectorPrimaryKey
 >;
 
+/**
+ * Discriminator for the scoring function used to produce a
+ * {@link ChunkSearchResult.score}. Callers (typically UI) use this to render
+ * the score appropriately, since the three scorers live on different scales:
+ *
+ * - `"cosine"`: cosine similarity in `[-1, 1]`, typically `[0, 1]` for text
+ *   embeddings. Absolute — higher means more similar.
+ * - `"bm25"`: BM25(F) score in `[0, ∞)`. Absolute but corpus-dependent — not
+ *   comparable across knowledge bases.
+ * - `"rrf"`: Reciprocal Rank Fusion score, bounded above by
+ *   `2 / (rrfK + 1)` (~`0.033` with the default `rrfK=60`). Rank-based, not
+ *   absolute — the magnitude is not a similarity, only an ordering signal.
+ *   Not comparable across queries.
+ */
+export type ScoreType = "cosine" | "bm25" | "rrf";
+
 /**
  * Search result with score
  */
-export type ChunkSearchResult = ChunkVectorEntity & { score: number };
+export type ChunkSearchResult = ChunkVectorEntity & {
+  score: number;
+  scoreType?: ScoreType;
+};