vectorize-io
diff --git a/‎hindsight-api/hindsight_api/api/http.py‎
Lines changed: 2 additions & 1 deletion b/‎hindsight-api/hindsight_api/api/http.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎hindsight-api/hindsight_api/config.py‎
Lines changed: 10 additions & 4 deletions b/‎hindsight-api/hindsight_api/config.py‎
Lines changed: 10 additions & 4 deletions
diff --git a/‎hindsight-api/hindsight_api/main.py‎
Lines changed: 2 additions & 1 deletion b/‎hindsight-api/hindsight_api/main.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎hindsight-api/hindsight_api/worker/main.py‎
Lines changed: 4 additions & 8 deletions b/‎hindsight-api/hindsight_api/worker/main.py‎
Lines changed: 4 additions & 8 deletions
@@ -1404,9 +1404,10 @@ async def lifespan(app: FastAPI):
                 worker_id=worker_id,
                 executor=memory.execute_task,
                 poll_interval_ms=config.worker_poll_interval_ms,
-                batch_size=config.worker_batch_size,
                 max_retries=config.worker_max_retries,
                 tenant_extension=getattr(memory, "_tenant_extension", None),
+                max_slots=config.worker_max_slots,
+                consolidation_max_slots=config.worker_consolidation_max_slots,
             )
             poller_task = asyncio.create_task(poller.run())
             logging.info(f"Worker poller started (worker_id={worker_id})")
 
@@ -143,8 +143,9 @@
 ENV_WORKER_ID = "HINDSIGHT_API_WORKER_ID"
 ENV_WORKER_POLL_INTERVAL_MS = "HINDSIGHT_API_WORKER_POLL_INTERVAL_MS"
 ENV_WORKER_MAX_RETRIES = "HINDSIGHT_API_WORKER_MAX_RETRIES"
-ENV_WORKER_BATCH_SIZE = "HINDSIGHT_API_WORKER_BATCH_SIZE"
 ENV_WORKER_HTTP_PORT = "HINDSIGHT_API_WORKER_HTTP_PORT"
+ENV_WORKER_MAX_SLOTS = "HINDSIGHT_API_WORKER_MAX_SLOTS"
+ENV_WORKER_CONSOLIDATION_MAX_SLOTS = "HINDSIGHT_API_WORKER_CONSOLIDATION_MAX_SLOTS"
 
 # Reflect agent settings
 ENV_REFLECT_MAX_ITERATIONS = "HINDSIGHT_API_REFLECT_MAX_ITERATIONS"
@@ -229,8 +230,9 @@
 DEFAULT_WORKER_ID = None  # Will use hostname if not specified
 DEFAULT_WORKER_POLL_INTERVAL_MS = 500  # Poll database every 500ms
 DEFAULT_WORKER_MAX_RETRIES = 3  # Max retries before marking task failed
-DEFAULT_WORKER_BATCH_SIZE = 10  # Tasks to claim per poll cycle
 DEFAULT_WORKER_HTTP_PORT = 8889  # HTTP port for worker metrics/health
+DEFAULT_WORKER_MAX_SLOTS = 10  # Total concurrent tasks per worker
+DEFAULT_WORKER_CONSOLIDATION_MAX_SLOTS = 2  # Max concurrent consolidation tasks per worker
 
 # Reflect agent settings
 DEFAULT_REFLECT_MAX_ITERATIONS = 10  # Max tool call iterations before forcing response
@@ -419,8 +421,9 @@ class HindsightConfig:
     worker_id: str | None
     worker_poll_interval_ms: int
     worker_max_retries: int
-    worker_batch_size: int
     worker_http_port: int
+    worker_max_slots: int
+    worker_consolidation_max_slots: int
 
     # Reflect agent settings
     reflect_max_iterations: int
@@ -582,8 +585,11 @@ def from_env(cls) -> "HindsightConfig":
             worker_id=os.getenv(ENV_WORKER_ID) or DEFAULT_WORKER_ID,
             worker_poll_interval_ms=int(os.getenv(ENV_WORKER_POLL_INTERVAL_MS, str(DEFAULT_WORKER_POLL_INTERVAL_MS))),
             worker_max_retries=int(os.getenv(ENV_WORKER_MAX_RETRIES, str(DEFAULT_WORKER_MAX_RETRIES))),
-            worker_batch_size=int(os.getenv(ENV_WORKER_BATCH_SIZE, str(DEFAULT_WORKER_BATCH_SIZE))),
             worker_http_port=int(os.getenv(ENV_WORKER_HTTP_PORT, str(DEFAULT_WORKER_HTTP_PORT))),
+            worker_max_slots=int(os.getenv(ENV_WORKER_MAX_SLOTS, str(DEFAULT_WORKER_MAX_SLOTS))),
+            worker_consolidation_max_slots=int(
+                os.getenv(ENV_WORKER_CONSOLIDATION_MAX_SLOTS, str(DEFAULT_WORKER_CONSOLIDATION_MAX_SLOTS))
+            ),
             # Reflect agent settings
             reflect_max_iterations=int(os.getenv(ENV_REFLECT_MAX_ITERATIONS, str(DEFAULT_REFLECT_MAX_ITERATIONS))),
         )
 
@@ -253,8 +253,9 @@ def release_lock():
             worker_id=config.worker_id,
             worker_poll_interval_ms=config.worker_poll_interval_ms,
             worker_max_retries=config.worker_max_retries,
-            worker_batch_size=config.worker_batch_size,
             worker_http_port=config.worker_http_port,
+            worker_max_slots=config.worker_max_slots,
+            worker_consolidation_max_slots=config.worker_consolidation_max_slots,
             reflect_max_iterations=config.reflect_max_iterations,
             mental_model_refresh_concurrency=config.mental_model_refresh_concurrency,
         )
 
@@ -124,12 +124,6 @@ def main():
         default=config.worker_poll_interval_ms,
         help=f"Poll interval in milliseconds (default: {config.worker_poll_interval_ms}, env: HINDSIGHT_API_WORKER_POLL_INTERVAL_MS)",
     )
-    parser.add_argument(
-        "--batch-size",
-        type=int,
-        default=config.worker_batch_size,
-        help=f"Tasks to claim per poll (default: {config.worker_batch_size}, env: HINDSIGHT_API_WORKER_BATCH_SIZE)",
-    )
     parser.add_argument(
         "--max-retries",
         type=int,
@@ -168,8 +162,9 @@ def main():
 
     print(f"Starting Hindsight Worker: {args.worker_id}")
     print(f"  Poll interval: {args.poll_interval}ms")
-    print(f"  Batch size: {args.batch_size}")
     print(f"  Max retries: {args.max_retries}")
+    print(f"  Max slots: {config.worker_max_slots}")
+    print(f"  Consolidation max slots: {config.worker_consolidation_max_slots}")
     print(f"  HTTP server: {args.http_host}:{args.http_port}")
     print()
 
@@ -213,9 +208,10 @@ async def run():
             worker_id=args.worker_id,
             executor=memory.execute_task,
             poll_interval_ms=args.poll_interval,
-            batch_size=args.batch_size,
             max_retries=args.max_retries,
             tenant_extension=tenant_extension,
+            max_slots=config.worker_max_slots,
+            consolidation_max_slots=config.worker_consolidation_max_slots,
         )
 
         # Create the HTTP app for metrics/health