vectorize-io
diff --git a/‎hindsight-api/hindsight_api/api/http.py‎
Lines changed: 17 additions & 3 deletions b/‎hindsight-api/hindsight_api/api/http.py‎
Lines changed: 17 additions & 3 deletions
diff --git a/‎hindsight-api/hindsight_api/engine/interface.py‎
Lines changed: 6 additions & 1 deletion b/‎hindsight-api/hindsight_api/engine/interface.py‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎hindsight-api/hindsight_api/engine/memory_engine.py‎
Lines changed: 14 additions & 3 deletions b/‎hindsight-api/hindsight_api/engine/memory_engine.py‎
Lines changed: 14 additions & 3 deletions
diff --git a/‎hindsight-api/tests/test_list_documents.py‎
Lines changed: 174 additions & 0 deletions b/‎hindsight-api/tests/test_list_documents.py‎
Lines changed: 174 additions & 0 deletions
diff --git a/‎hindsight-cli/src/api.rs‎
Lines changed: 2 additions & 0 deletions b/‎hindsight-cli/src/api.rs‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎hindsight-clients/go/api/openapi.yaml‎
Lines changed: 25 additions & 1 deletion b/‎hindsight-clients/go/api/openapi.yaml‎
Lines changed: 25 additions & 1 deletion
@@ -3064,7 +3064,13 @@ async def api_delete_directive(
     )
     async def api_list_documents(
         bank_id: str,
-        q: str | None = None,
+        q: str | None = Query(
+            None, description="Case-insensitive substring filter on document ID (e.g. 'report' matches 'report-2024')"
+        ),
+        tags: list[str] | None = Query(None, description="Filter documents by tags"),
+        tags_match: str = Query(
+            "any_strict", description="How to match tags: 'any', 'all', 'any_strict', 'all_strict'"
+        ),
         limit: int = 100,
         offset: int = 0,
         request_context: RequestContext = Depends(get_request_context),
@@ -3074,13 +3080,21 @@ async def api_list_documents(
 
         Args:
             bank_id: Memory Bank ID (from path)
-            q: Search query (searches document ID and metadata)
+            q: Case-insensitive substring filter on document ID
+            tags: Filter documents by tags
+            tags_match: How to match tags (any, all, any_strict, all_strict)
             limit: Maximum number of results (default: 100)
             offset: Offset for pagination (default: 0)
         """
         try:
             data = await app.state.memory.list_documents(
-                bank_id=bank_id, search_query=q, limit=limit, offset=offset, request_context=request_context
+                bank_id=bank_id,
+                search_query=q,
+                tags=tags,
+                tags_match=tags_match,
+                limit=limit,
+                offset=offset,
+                request_context=request_context,
             )
             return data
         except OperationValidationError as e:
 
@@ -12,6 +12,7 @@
 if TYPE_CHECKING:
     from hindsight_api.engine.memory_engine import Budget
     from hindsight_api.engine.response_models import RecallResult, ReflectResult
+    from hindsight_api.engine.search.tags import TagsMatch
     from hindsight_api.models import RequestContext
 
 
@@ -337,6 +338,8 @@ async def list_documents(
         bank_id: str,
         *,
         search_query: str | None = None,
+        tags: list[str] | None = None,
+        tags_match: "TagsMatch" = "any_strict",
         limit: int = 100,
         offset: int = 0,
         request_context: "RequestContext",
@@ -346,7 +349,9 @@ async def list_documents(
 
         Args:
             bank_id: The memory bank ID.
-            search_query: Search query.
+            search_query: Case-insensitive substring filter on document ID.
+            tags: Filter by tags.
+            tags_match: How to match tags (any, all, any_strict, all_strict).
             limit: Maximum results.
             offset: Pagination offset.
             request_context: Request context for authentication.
 
@@ -184,7 +184,7 @@ def validate_sql_schema(sql: str) -> None:
 from .retain.types import RetainContentDict
 from .search import think_utils
 from .search.reranking import CrossEncoderReranker
-from .search.tags import TagsMatch
+from .search.tags import TagsMatch, build_tags_where_clause
 from .task_backend import BrokerTaskBackend, SyncTaskBackend, TaskBackend
 
 
@@ -4142,6 +4142,8 @@ async def list_documents(
         bank_id: str,
         *,
         search_query: str | None = None,
+        tags: list[str] | None = None,
+        tags_match: "TagsMatch" = "any_strict",
         limit: int = 100,
         offset: int = 0,
         request_context: "RequestContext",
@@ -4152,6 +4154,8 @@ async def list_documents(
         Args:
             bank_id: bank ID (required)
             search_query: Search in document ID
+            tags: Filter by tags
+            tags_match: How to match tags (any, all, any_strict, all_strict)
             limit: Maximum number of results
             offset: Offset for pagination
             request_context: Request context for authentication.
@@ -4182,7 +4186,16 @@ async def list_documents(
                 query_conditions.append(f"id ILIKE ${param_count}")
                 query_params.append(f"%{search_query}%")
 
+            tags_clause, tags_params, next_param = build_tags_where_clause(
+                tags, param_offset=param_count + 1, match=tags_match
+            )
+            query_params.extend(tags_params)
+            param_count = next_param - 1  # next_param is next available; convert to last used
+
             where_clause = "WHERE " + " AND ".join(query_conditions) if query_conditions else ""
+            if tags_clause:
+                # tags_clause starts with "AND", append after WHERE conditions
+                where_clause = where_clause + " " + tags_clause if where_clause else "WHERE " + tags_clause[4:].lstrip()
 
             # Get total count
             count_query = f"""
@@ -6038,8 +6051,6 @@ async def list_directives(
 
         async with acquire_with_retry(pool) as conn:
             # Build filters
-            from .search.tags import build_tags_where_clause
-
             filters = ["bank_id = $1"]
             params: list[Any] = [bank_id]
             param_idx = 2
 
@@ -0,0 +1,174 @@
+"""
+Tests for list_documents pagination and tags filtering.
+"""
+from datetime import datetime, timezone
+
+import pytest
+
+
+async def _retain_doc(memory, bank_id, document_id, tags, request_context):
+    """Helper to retain a document with given tags. Uses gibberish content to avoid LLM
+    fact extraction (documents are persisted even with zero facts)."""
+    await memory.retain_batch_async(
+        bank_id=bank_id,
+        contents=[{"content": f"xyzabc123 !@# $$$ {document_id}"}],
+        document_id=document_id,
+        document_tags=tags or None,
+        request_context=request_context,
+    )
+
+
+@pytest.mark.asyncio
+async def test_list_documents_offset_pagination(memory, request_context):
+    """offset parameter returns the correct slice of documents."""
+    bank_id = f"test_list_docs_offset_{datetime.now(timezone.utc).timestamp()}"
+
+    try:
+        for i in range(4):
+            await _retain_doc(memory, bank_id, f"doc-{i:02d}", [], request_context)
+
+        # All documents, ordered by created_at DESC → doc-03, doc-02, doc-01, doc-00
+        all_docs = await memory.list_documents(
+            bank_id=bank_id, limit=10, offset=0, request_context=request_context
+        )
+        assert all_docs["total"] == 4
+        assert len(all_docs["items"]) == 4
+        all_ids = [d["id"] for d in all_docs["items"]]
+
+        # offset=2 should skip the first two and return the remaining two
+        page2 = await memory.list_documents(
+            bank_id=bank_id, limit=10, offset=2, request_context=request_context
+        )
+        assert page2["total"] == 4  # total is always the full count
+        assert len(page2["items"]) == 2
+        assert [d["id"] for d in page2["items"]] == all_ids[2:]
+
+        # offset beyond total returns empty items but correct total
+        beyond = await memory.list_documents(
+            bank_id=bank_id, limit=10, offset=10, request_context=request_context
+        )
+        assert beyond["total"] == 4
+        assert beyond["items"] == []
+
+    finally:
+        await memory.delete_bank(bank_id, request_context=request_context)
+
+
+@pytest.mark.asyncio
+async def test_list_documents_tags_filter_any_strict(memory, request_context):
+    """tags filter with any_strict returns only tagged documents that match."""
+    bank_id = f"test_list_docs_tags_{datetime.now(timezone.utc).timestamp()}"
+
+    try:
+        await _retain_doc(memory, bank_id, "doc-alpha", ["team-a"], request_context)
+        await _retain_doc(memory, bank_id, "doc-beta", ["team-b"], request_context)
+        await _retain_doc(memory, bank_id, "doc-both", ["team-a", "team-b"], request_context)
+        await _retain_doc(memory, bank_id, "doc-untagged", [], request_context)
+
+        # any_strict: only docs with at least one of the given tags, untagged excluded
+        result = await memory.list_documents(
+            bank_id=bank_id,
+            tags=["team-a"],
+            tags_match="any_strict",
+            request_context=request_context,
+        )
+        ids = {d["id"] for d in result["items"]}
+        assert ids == {"doc-alpha", "doc-both"}
+        assert result["total"] == 2
+
+    finally:
+        await memory.delete_bank(bank_id, request_context=request_context)
+
+
+@pytest.mark.asyncio
+async def test_list_documents_tags_filter_any_includes_untagged(memory, request_context):
+    """tags filter with 'any' mode includes untagged documents."""
+    bank_id = f"test_list_docs_tags_any_{datetime.now(timezone.utc).timestamp()}"
+
+    try:
+        await _retain_doc(memory, bank_id, "doc-tagged", ["team-a"], request_context)
+        await _retain_doc(memory, bank_id, "doc-other", ["team-b"], request_context)
+        await _retain_doc(memory, bank_id, "doc-untagged", [], request_context)
+
+        result = await memory.list_documents(
+            bank_id=bank_id,
+            tags=["team-a"],
+            tags_match="any",
+            request_context=request_context,
+        )
+        ids = {d["id"] for d in result["items"]}
+        # "any" includes untagged + matching tagged
+        assert "doc-tagged" in ids
+        assert "doc-untagged" in ids
+        assert "doc-other" not in ids
+
+    finally:
+        await memory.delete_bank(bank_id, request_context=request_context)
+
+
+@pytest.mark.asyncio
+async def test_list_documents_tags_filter_all_strict(memory, request_context):
+    """tags filter with all_strict returns only docs that have ALL the specified tags."""
+    bank_id = f"test_list_docs_tags_all_{datetime.now(timezone.utc).timestamp()}"
+
+    try:
+        await _retain_doc(memory, bank_id, "doc-a-only", ["team-a"], request_context)
+        await _retain_doc(memory, bank_id, "doc-a-and-b", ["team-a", "team-b"], request_context)
+        await _retain_doc(memory, bank_id, "doc-untagged", [], request_context)
+
+        result = await memory.list_documents(
+            bank_id=bank_id,
+            tags=["team-a", "team-b"],
+            tags_match="all_strict",
+            request_context=request_context,
+        )
+        ids = {d["id"] for d in result["items"]}
+        assert ids == {"doc-a-and-b"}
+
+    finally:
+        await memory.delete_bank(bank_id, request_context=request_context)
+
+
+@pytest.mark.asyncio
+async def test_list_documents_no_tags_filter_returns_all(memory, request_context):
+    """When no tags filter is specified, all documents are returned."""
+    bank_id = f"test_list_docs_no_tags_{datetime.now(timezone.utc).timestamp()}"
+
+    try:
+        await _retain_doc(memory, bank_id, "doc-tagged", ["team-a"], request_context)
+        await _retain_doc(memory, bank_id, "doc-untagged", [], request_context)
+
+        result = await memory.list_documents(
+            bank_id=bank_id,
+            tags=None,
+            request_context=request_context,
+        )
+        ids = {d["id"] for d in result["items"]}
+        assert ids == {"doc-tagged", "doc-untagged"}
+
+    finally:
+        await memory.delete_bank(bank_id, request_context=request_context)
+
+
+@pytest.mark.asyncio
+async def test_list_documents_tags_and_search_query_combined(memory, request_context):
+    """tags filter and q (search_query) can be combined."""
+    bank_id = f"test_list_docs_tags_q_{datetime.now(timezone.utc).timestamp()}"
+
+    try:
+        await _retain_doc(memory, bank_id, "report-2024", ["team-a"], request_context)
+        await _retain_doc(memory, bank_id, "report-2025", ["team-b"], request_context)
+        await _retain_doc(memory, bank_id, "summary-2024", ["team-a"], request_context)
+
+        result = await memory.list_documents(
+            bank_id=bank_id,
+            search_query="report",
+            tags=["team-a"],
+            tags_match="any_strict",
+            request_context=request_context,
+        )
+        ids = {d["id"] for d in result["items"]}
+        assert ids == {"report-2024"}
+
+    finally:
+        await memory.delete_bank(bank_id, request_context=request_context)
@@ -300,6 +300,8 @@ impl ApiClient {
                 offset.map(|o| o as i64),
                 q,
                 None,
+                None,
+                None,
             ).await?;
             Ok(response.into_inner())
         })
 
@@ -1173,14 +1173,38 @@ paths:
           title: Bank Id
           type: string
         style: simple
-      - explode: true
+      - description: Case-insensitive substring filter on document ID (e.g. 'report'
+          matches 'report-2024')
+        explode: true
         in: query
         name: q
         required: false
         schema:
           nullable: true
           type: string
         style: form
+      - description: Filter documents by tags
+        explode: true
+        in: query
+        name: tags
+        required: false
+        schema:
+          items:
+            type: string
+          nullable: true
+          type: array
+        style: form
+      - description: "How to match tags: 'any', 'all', 'any_strict', 'all_strict'"
+        explode: true
+        in: query
+        name: tags_match
+        required: false
+        schema:
+          default: any_strict
+          description: "How to match tags: 'any', 'all', 'any_strict', 'all_strict'"
+          title: Tags Match
+          type: string
+        style: form
       - explode: true
         in: query
         name: limit