vectorize-io
diff --git a/‎hindsight-api/hindsight_api/alembic/versions/b3c4d5e6f7g8_add_temporal_date_indexes.py‎
Lines changed: 68 additions & 0 deletions b/‎hindsight-api/hindsight_api/alembic/versions/b3c4d5e6f7g8_add_temporal_date_indexes.py‎
Lines changed: 68 additions & 0 deletions
diff --git a/‎hindsight-api/hindsight_api/alembic/versions/c1a2b3d4e5f6_enable_pg_trgm_and_entities_trgm_index.py‎
Lines changed: 46 additions & 0 deletions b/‎hindsight-api/hindsight_api/alembic/versions/c1a2b3d4e5f6_enable_pg_trgm_and_entities_trgm_index.py‎
Lines changed: 46 additions & 0 deletions
diff --git a/‎hindsight-api/hindsight_api/alembic/versions/d2e3f4a5b6c7_add_memory_links_expansion_indexes.py‎
Lines changed: 83 additions & 0 deletions b/‎hindsight-api/hindsight_api/alembic/versions/d2e3f4a5b6c7_add_memory_links_expansion_indexes.py‎
Lines changed: 83 additions & 0 deletions
diff --git a/‎hindsight-api/hindsight_api/api/http.py‎
Lines changed: 26 additions & 142 deletions b/‎hindsight-api/hindsight_api/api/http.py‎
Lines changed: 26 additions & 142 deletions
@@ -0,0 +1,68 @@
+"""Add partial indexes on memory_units temporal date fields for fast temporal retrieval
+
+Revision ID: b3c4d5e6f7g8
+Revises: c1a2b3d4e5f6
+Create Date: 2026-03-02
+
+The temporal retrieval entry-point query filters memory_units by occurred_start,
+occurred_end, and mentioned_at using OR conditions. Without dedicated indexes the
+planner falls back to a sequential scan of all bank rows after applying the
+(bank_id, fact_type) index, then re-checks each date field.
+
+These three partial indexes give the planner bitmap-index scan options for the
+three most common date predicates, dramatically reducing the row set before any
+embedding computation is required.
+
+All indexes are created CONCURRENTLY so the migration does not block writes on
+memory_units during production deployments. CONCURRENTLY requires running outside
+a transaction block; see migrations.py for how this is handled safely.
+"""
+
+from collections.abc import Sequence
+
+from alembic import context, op
+
+revision: str = "b3c4d5e6f7g8"
+down_revision: str | Sequence[str] | None = "c1a2b3d4e5f6"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def _get_schema_prefix() -> str:
+    schema = context.config.get_main_option("target_schema")
+    return f'"{schema}".' if schema else ""
+
+
+def upgrade() -> None:
+    schema = _get_schema_prefix()
+    # Partial index on occurred_start (covers "occurred_start BETWEEN $4 AND $5")
+    op.execute("COMMIT")
+    op.execute(
+        f"CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_memory_units_bank_occurred_start "
+        f"ON {schema}memory_units(bank_id, fact_type, occurred_start) "
+        f"WHERE occurred_start IS NOT NULL"
+    )
+    # Partial index on occurred_end (covers "occurred_end BETWEEN $4 AND $5")
+    op.execute("COMMIT")
+    op.execute(
+        f"CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_memory_units_bank_occurred_end "
+        f"ON {schema}memory_units(bank_id, fact_type, occurred_end) "
+        f"WHERE occurred_end IS NOT NULL"
+    )
+    # Partial index on mentioned_at (covers "mentioned_at BETWEEN $4 AND $5")
+    op.execute("COMMIT")
+    op.execute(
+        f"CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_memory_units_bank_mentioned_at "
+        f"ON {schema}memory_units(bank_id, fact_type, mentioned_at) "
+        f"WHERE mentioned_at IS NOT NULL"
+    )
+
+
+def downgrade() -> None:
+    schema = _get_schema_prefix()
+    op.execute("COMMIT")
+    op.execute(f"DROP INDEX CONCURRENTLY IF EXISTS {schema}idx_memory_units_bank_mentioned_at")
+    op.execute("COMMIT")
+    op.execute(f"DROP INDEX CONCURRENTLY IF EXISTS {schema}idx_memory_units_bank_occurred_end")
+    op.execute("COMMIT")
+    op.execute(f"DROP INDEX CONCURRENTLY IF EXISTS {schema}idx_memory_units_bank_occurred_start")
@@ -0,0 +1,46 @@
+"""Enable pg_trgm extension and add GIN trigram index on entities.canonical_name
+
+Revision ID: c1a2b3d4e5f6
+Revises: b4c5d6e7f8a9
+Create Date: 2026-03-02
+
+Index is created CONCURRENTLY so the migration does not block writes on entities
+during production deployments. CONCURRENTLY requires running outside a transaction
+block; see migrations.py for how this is handled safely.
+"""
+
+from collections.abc import Sequence
+
+from alembic import context, op
+
+revision: str = "c1a2b3d4e5f6"
+down_revision: str | Sequence[str] | None = "b4c5d6e7f8a9"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def _get_schema_prefix() -> str:
+    schema = context.config.get_main_option("target_schema")
+    return f'"{schema}".' if schema else ""
+
+
+def upgrade() -> None:
+    # pg_trgm ships with every standard PostgreSQL installation as a contrib module.
+    # It enables fast similarity lookups via GIN indexes, used for entity name matching.
+    op.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm")
+
+    schema = _get_schema_prefix()
+    # GIN index on canonical_name enables sub-millisecond trigram similarity queries
+    # (% operator, similarity()) instead of full-table scans across all bank entities.
+    op.execute("COMMIT")
+    op.execute(
+        f"CREATE INDEX CONCURRENTLY IF NOT EXISTS entities_canonical_name_trgm_idx "
+        f"ON {schema}entities USING GIN (canonical_name gin_trgm_ops)"
+    )
+
+
+def downgrade() -> None:
+    schema = _get_schema_prefix()
+    op.execute("COMMIT")
+    op.execute(f"DROP INDEX CONCURRENTLY IF EXISTS {schema}entities_canonical_name_trgm_idx")
+    # Note: not dropping pg_trgm extension as other indexes may depend on it
@@ -0,0 +1,83 @@
+"""Add covering and composite indexes to speed up link expansion graph retrieval.
+
+Two indexes target the two bottlenecks identified by EXPLAIN ANALYZE on a 17M-row
+memory_links table:
+
+1. idx_memory_links_to_type_weight  (to_unit_id, link_type, weight DESC)
+   The semantic incoming direction — finding facts that consider seeds as their
+   nearest neighbour — currently hits an expensive BitmapAnd of two separate
+   bitmap scans (to_unit_id bitmap ∩ link_type bitmap).  A composite index
+   on (to_unit_id, link_type) turns this into a single index scan and reduces
+   latency from ~36 ms to < 5 ms per query.
+
+2. idx_memory_links_entity_covering  (from_unit_id) INCLUDE (to_unit_id, entity_id)
+   WHERE link_type = 'entity'
+   The entity co-occurrence expansion uses COUNT(DISTINCT ml.entity_id) and
+   joins on ml.to_unit_id.  Without a covering index the planner must read
+   ~2 500 heap pages to fetch entity_id and to_unit_id after the bitmap index
+   scan, adding ~230 ms of random I/O.  INCLUDE adds those two columns to the
+   index leaf pages so the entire query can be served from the index (index-only
+   scan), eliminating the heap reads entirely.
+   Partial index (WHERE link_type = 'entity') keeps index size ~40 % smaller.
+
+Both indexes are created with CONCURRENTLY so the migration does not block
+concurrent reads or writes on memory_links.  CONCURRENTLY requires running
+outside a transaction block, so the migration emits an explicit COMMIT before
+each statement and uses IF NOT EXISTS for idempotency.
+
+Revision ID: d2e3f4a5b6c7
+Revises: b3c4d5e6f7g8
+Create Date: 2026-03-02
+"""
+
+from collections.abc import Sequence
+
+from alembic import context, op
+
+revision: str = "d2e3f4a5b6c7"
+down_revision: str | Sequence[str] | None = "b3c4d5e6f7g8"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def _get_schema_prefix() -> str:
+    schema = context.config.get_main_option("target_schema")
+    return f'"{schema}".' if schema else ""
+
+
+def upgrade() -> None:
+    schema = _get_schema_prefix()
+
+    # CREATE INDEX CONCURRENTLY cannot run inside a transaction block.
+    # Commit the current Alembic transaction, then issue each CONCURRENTLY
+    # statement in its own implicit autocommit transaction.
+    # IF NOT EXISTS makes each statement idempotent if the migration is retried.
+
+    # Index for the semantic *incoming* direction in link_expansion_retrieval.py.
+    # Replaces the BitmapAnd of idx_memory_links_to_unit ∩ idx_memory_links_link_type
+    # with a single composite index scan.
+    op.execute("COMMIT")
+    op.execute(
+        f"CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_memory_links_to_type_weight "
+        f"ON {schema}memory_links(to_unit_id, link_type, weight DESC)"
+    )
+
+    # Covering index for entity co-occurrence expansion.
+    # Enables an index-only scan: entity_id and to_unit_id are read from the
+    # index leaf pages instead of the heap, eliminating ~2 500 random heap-page
+    # reads per expansion query.
+    op.execute("COMMIT")
+    op.execute(
+        f"CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_memory_links_entity_covering "
+        f"ON {schema}memory_links(from_unit_id) "
+        f"INCLUDE (to_unit_id, entity_id) "
+        f"WHERE link_type = 'entity'"
+    )
+
+
+def downgrade() -> None:
+    schema = _get_schema_prefix()
+    op.execute("COMMIT")
+    op.execute(f"DROP INDEX CONCURRENTLY IF EXISTS {schema}idx_memory_links_entity_covering")
+    op.execute("COMMIT")
+    op.execute(f"DROP INDEX CONCURRENTLY IF EXISTS {schema}idx_memory_links_to_type_weight")
@@ -2385,148 +2385,32 @@ async def api_stats(
     ):
         """Get statistics about memory nodes and links for a memory bank."""
         try:
-            # Authenticate and set tenant schema
-            await app.state.memory._authenticate_tenant(request_context)
-            if app.state.memory._operation_validator:
-                from hindsight_api.extensions import BankReadContext
-
-                ctx = BankReadContext(bank_id=bank_id, operation="get_bank_stats", request_context=request_context)
-                await app.state.memory._validate_operation(
-                    app.state.memory._operation_validator.validate_bank_read(ctx)
-                )
-            pool = await app.state.memory._get_pool()
-            async with acquire_with_retry(pool) as conn:
-                # Get node counts by fact_type
-                node_stats = await conn.fetch(
-                    f"""
-                    SELECT fact_type, COUNT(*) as count
-                    FROM {fq_table("memory_units")}
-                    WHERE bank_id = $1
-                    GROUP BY fact_type
-                    """,
-                    bank_id,
-                )
-
-                # Get link counts by link_type
-                link_stats = await conn.fetch(
-                    f"""
-                    SELECT ml.link_type, COUNT(*) as count
-                    FROM {fq_table("memory_links")} ml
-                    JOIN {fq_table("memory_units")} mu ON ml.from_unit_id = mu.id
-                    WHERE mu.bank_id = $1
-                    GROUP BY ml.link_type
-                    """,
-                    bank_id,
-                )
-
-                # Get link counts by fact_type (from nodes)
-                link_fact_type_stats = await conn.fetch(
-                    f"""
-                    SELECT mu.fact_type, COUNT(*) as count
-                    FROM {fq_table("memory_links")} ml
-                    JOIN {fq_table("memory_units")} mu ON ml.from_unit_id = mu.id
-                    WHERE mu.bank_id = $1
-                    GROUP BY mu.fact_type
-                    """,
-                    bank_id,
-                )
-
-                # Get link counts by fact_type AND link_type
-                link_breakdown_stats = await conn.fetch(
-                    f"""
-                    SELECT mu.fact_type, ml.link_type, COUNT(*) as count
-                    FROM {fq_table("memory_links")} ml
-                    JOIN {fq_table("memory_units")} mu ON ml.from_unit_id = mu.id
-                    WHERE mu.bank_id = $1
-                    GROUP BY mu.fact_type, ml.link_type
-                    """,
-                    bank_id,
-                )
-
-                # Get pending and failed operations counts
-                ops_stats = await conn.fetch(
-                    f"""
-                    SELECT status, COUNT(*) as count
-                    FROM {fq_table("async_operations")}
-                    WHERE bank_id = $1
-                    GROUP BY status
-                    """,
-                    bank_id,
-                )
-                ops_by_status = {row["status"]: row["count"] for row in ops_stats}
-                pending_operations = ops_by_status.get("pending", 0)
-                failed_operations = ops_by_status.get("failed", 0)
-
-                # Get document count
-                doc_count_result = await conn.fetchrow(
-                    f"""
-                    SELECT COUNT(*) as count
-                    FROM {fq_table("documents")}
-                    WHERE bank_id = $1
-                    """,
-                    bank_id,
-                )
-                total_documents = doc_count_result["count"] if doc_count_result else 0
-
-                # Get consolidation stats from memory-level tracking
-                consolidation_stats = await conn.fetchrow(
-                    f"""
-                    SELECT
-                        MAX(consolidated_at) as last_consolidated_at,
-                        COUNT(*) FILTER (WHERE consolidated_at IS NULL AND fact_type IN ('experience', 'world')) as pending
-                    FROM {fq_table("memory_units")}
-                    WHERE bank_id = $1
-                    """,
-                    bank_id,
-                )
-                last_consolidated_at = consolidation_stats["last_consolidated_at"] if consolidation_stats else None
-                pending_consolidation = consolidation_stats["pending"] if consolidation_stats else 0
-
-                # Count total observations (consolidated knowledge)
-                observation_count_result = await conn.fetchrow(
-                    f"""
-                    SELECT COUNT(*) as count
-                    FROM {fq_table("memory_units")}
-                    WHERE bank_id = $1 AND fact_type = 'observation'
-                    """,
-                    bank_id,
-                )
-                total_observations = observation_count_result["count"] if observation_count_result else 0
-
-                # Format results
-                nodes_by_type = {row["fact_type"]: row["count"] for row in node_stats}
-                links_by_type = {row["link_type"]: row["count"] for row in link_stats}
-                links_by_fact_type = {row["fact_type"]: row["count"] for row in link_fact_type_stats}
-
-                # Build detailed breakdown: {fact_type: {link_type: count}}
-                links_breakdown = {}
-                for row in link_breakdown_stats:
-                    fact_type = row["fact_type"]
-                    link_type = row["link_type"]
-                    count = row["count"]
-                    if fact_type not in links_breakdown:
-                        links_breakdown[fact_type] = {}
-                    links_breakdown[fact_type][link_type] = count
-
-                total_nodes = sum(nodes_by_type.values())
-                total_links = sum(links_by_type.values())
-
-                return BankStatsResponse(
-                    bank_id=bank_id,
-                    total_nodes=total_nodes,
-                    total_links=total_links,
-                    total_documents=total_documents,
-                    nodes_by_fact_type=nodes_by_type,
-                    links_by_link_type=links_by_type,
-                    links_by_fact_type=links_by_fact_type,
-                    links_breakdown=links_breakdown,
-                    pending_operations=pending_operations,
-                    failed_operations=failed_operations,
-                    last_consolidated_at=(last_consolidated_at.isoformat() if last_consolidated_at else None),
-                    pending_consolidation=pending_consolidation,
-                    total_observations=total_observations,
-                )
-
+            stats = await app.state.memory.get_bank_stats(bank_id, request_context=request_context)
+            nodes_by_type = stats["node_counts"]
+            links_by_type = stats["link_counts"]
+            links_by_fact_type = stats["link_counts_by_fact_type"]
+            links_breakdown: dict[str, dict[str, int]] = {}
+            for row in stats["link_breakdown"]:
+                ft = row["fact_type"]
+                if ft not in links_breakdown:
+                    links_breakdown[ft] = {}
+                links_breakdown[ft][row["link_type"]] = row["count"]
+            ops = stats["operations"]
+            return BankStatsResponse(
+                bank_id=bank_id,
+                total_nodes=sum(nodes_by_type.values()),
+                total_links=sum(links_by_type.values()),
+                total_documents=stats["total_documents"],
+                nodes_by_fact_type=nodes_by_type,
+                links_by_link_type=links_by_type,
+                links_by_fact_type=links_by_fact_type,
+                links_breakdown=links_breakdown,
+                pending_operations=ops.get("pending", 0),
+                failed_operations=ops.get("failed", 0),
+                last_consolidated_at=stats["last_consolidated_at"],
+                pending_consolidation=stats["pending_consolidation"],
+                total_observations=stats["total_observations"],
+            )
         except OperationValidationError as e:
             raise HTTPException(status_code=e.status_code, detail=e.reason)
         except (AuthenticationError, HTTPException):