vectorize-io
diff --git a/‎hindsight-api/tests/test_config_validation.py‎
Lines changed: 109 additions & 0 deletions b/‎hindsight-api/tests/test_config_validation.py‎
Lines changed: 109 additions & 0 deletions
diff --git a/‎hindsight-api/tests/test_mental_models.py‎
Lines changed: 11 additions & 3 deletions b/‎hindsight-api/tests/test_mental_models.py‎
Lines changed: 11 additions & 3 deletions
diff --git a/‎hindsight-embed/hindsight_embed/cli.py‎
Lines changed: 32 additions & 15 deletions b/‎hindsight-embed/hindsight_embed/cli.py‎
Lines changed: 32 additions & 15 deletions
diff --git a/‎hindsight-embed/hindsight_embed/daemon_embed_manager.py‎
Lines changed: 7 additions & 0 deletions b/‎hindsight-embed/hindsight_embed/daemon_embed_manager.py‎
Lines changed: 7 additions & 0 deletions
@@ -0,0 +1,109 @@
+"""
+Tests for configuration validation.
+
+Verifies that config validation catches invalid parameter combinations.
+"""
+
+import os
+
+import pytest
+
+
+@pytest.fixture(autouse=True)
+def setup_test_env():
+    """Set up environment for each test, restoring original values after."""
+    from hindsight_api.config import clear_config_cache
+
+    # Save original environment values
+    env_vars_to_save = [
+        "HINDSIGHT_API_RETAIN_MAX_COMPLETION_TOKENS",
+        "HINDSIGHT_API_RETAIN_CHUNK_SIZE",
+        "HINDSIGHT_API_LLM_PROVIDER",
+        "HINDSIGHT_API_LLM_MODEL",
+    ]
+
+    # Save original values
+    original_values = {}
+    for key in env_vars_to_save:
+        original_values[key] = os.environ.get(key)
+
+    clear_config_cache()
+
+    yield
+
+    # Restore original environment
+    for key, original_value in original_values.items():
+        if original_value is None:
+            os.environ.pop(key, None)
+        else:
+            os.environ[key] = original_value
+
+    clear_config_cache()
+
+
+def test_retain_max_completion_tokens_must_be_greater_than_chunk_size():
+    """Test that RETAIN_MAX_COMPLETION_TOKENS > RETAIN_CHUNK_SIZE validation works."""
+    from hindsight_api.config import HindsightConfig
+
+    # Set invalid config: max_completion_tokens <= chunk_size
+    os.environ["HINDSIGHT_API_RETAIN_MAX_COMPLETION_TOKENS"] = "1000"
+    os.environ["HINDSIGHT_API_RETAIN_CHUNK_SIZE"] = "2000"
+    os.environ["HINDSIGHT_API_LLM_PROVIDER"] = "mock"
+
+    # Should raise ValueError with helpful message
+    with pytest.raises(ValueError) as exc_info:
+        HindsightConfig.from_env()
+
+    error_message = str(exc_info.value)
+
+    # Verify error message contains helpful information
+    assert "HINDSIGHT_API_RETAIN_MAX_COMPLETION_TOKENS" in error_message
+    assert "1000" in error_message
+    assert "HINDSIGHT_API_RETAIN_CHUNK_SIZE" in error_message
+    assert "2000" in error_message
+    assert "must be greater than" in error_message
+    assert "You have two options to fix this:" in error_message
+    assert "Increase HINDSIGHT_API_RETAIN_MAX_COMPLETION_TOKENS" in error_message
+    assert "Use a model that supports" in error_message
+
+
+def test_retain_max_completion_tokens_equal_to_chunk_size_fails():
+    """Test that RETAIN_MAX_COMPLETION_TOKENS == RETAIN_CHUNK_SIZE also fails."""
+    from hindsight_api.config import HindsightConfig
+
+    # Set invalid config: max_completion_tokens == chunk_size
+    os.environ["HINDSIGHT_API_RETAIN_MAX_COMPLETION_TOKENS"] = "3000"
+    os.environ["HINDSIGHT_API_RETAIN_CHUNK_SIZE"] = "3000"
+    os.environ["HINDSIGHT_API_LLM_PROVIDER"] = "mock"
+
+    # Should raise ValueError
+    with pytest.raises(ValueError) as exc_info:
+        HindsightConfig.from_env()
+
+    error_message = str(exc_info.value)
+    assert "must be greater than" in error_message
+
+
+def test_valid_retain_config_succeeds():
+    """Test that valid config with max_completion_tokens > chunk_size works."""
+    from hindsight_api.config import HindsightConfig
+
+    # Set valid config: max_completion_tokens > chunk_size
+    os.environ["HINDSIGHT_API_RETAIN_MAX_COMPLETION_TOKENS"] = "64000"
+    os.environ["HINDSIGHT_API_RETAIN_CHUNK_SIZE"] = "3000"
+    os.environ["HINDSIGHT_API_LLM_PROVIDER"] = "mock"
+
+    # Should not raise
+    config = HindsightConfig.from_env()
+    assert config.retain_max_completion_tokens == 64000
+    assert config.retain_chunk_size == 3000
+
+
+# Note: The BadRequestError wrapping is implemented in fact_extraction.py
+# but requires a complex integration test setup. The functionality is
+# straightforward: when a BadRequestError containing keywords like
+# "max_tokens", "max_completion_tokens", or "maximum context" is caught,
+# it's wrapped in a ValueError with helpful guidance.
+#
+# The config validation tests above ensure users get early feedback
+# about invalid configurations before runtime errors occur.
@@ -738,12 +738,20 @@ async def test_refresh_with_tags_only_accesses_same_tagged_models(
             "Refreshed model should access memories/models with matching tags (user:alice)"
 
         # MUST NOT include Bob's content (security violation)
-        assert "bob" not in refreshed_content and "python" not in refreshed_content and "tea" not in refreshed_content, \
-            f"SECURITY VIOLATION: Refreshed model accessed memories/models with different tags (user:bob). Content: {refreshed_content}"
+        # Use word boundary matching to avoid false positives (e.g., "team" contains "tea")
+        import re
+        def contains_word(text: str, word: str) -> bool:
+            """Check if text contains word as a whole word (not substring)."""
+            return bool(re.search(rf'\b{re.escape(word)}\b', text, re.IGNORECASE))
+
+        assert not contains_word(refreshed_content, "bob") and \
+               not contains_word(refreshed_content, "python") and \
+               not contains_word(refreshed_content, "tea"), \
+            f"SECURITY VIOLATION: Refreshed model accessed memories/models with different tags (user:bob). Content: {refreshed['content']}"
 
         # MUST NOT include untagged content (security violation)
         assert "100 employees" not in refreshed_content and "growing fast" not in refreshed_content, \
-            f"SECURITY VIOLATION: Refreshed model accessed untagged memories/models. Content: {refreshed_content}"
+            f"SECURITY VIOLATION: Refreshed model accessed untagged memories/models. Content: {refreshed['content']}"
 
         # Cleanup
         await memory.delete_bank(bank_id, request_context=request_context)
 
@@ -86,21 +86,34 @@ def setup_logging(verbose: bool = False):
 
 
 def load_config_file():
-    """Load configuration from file if it exists."""
-    # Check both config file locations
-    config_files = [CONFIG_FILE, CONFIG_FILE_ALT]
-    for config_path in config_files:
-        if config_path.exists():
-            with open(config_path) as f:
-                for line in f:
-                    line = line.strip()
-                    if line and not line.startswith("#") and "=" in line:
-                        # Handle 'export VAR=value' format
-                        if line.startswith("export "):
-                            line = line[7:]
-                        key, value = line.split("=", 1)
-                        if key not in os.environ:  # Don't override env vars
-                            os.environ[key] = value
+    """Load configuration from the active profile's file if it exists.
+
+    IMPORTANT: Only loads from the active profile, never from default if a specific profile is set.
+    Uses dynamic path resolution to support testing with temporary HOME directories.
+    """
+    from .profile_manager import ProfileManager, resolve_active_profile
+
+    # Resolve which profile to use (respects --profile flag, env vars, active_profile file)
+    active_profile = resolve_active_profile()
+
+    # Get the config file path for this profile
+    # Use ProfileManager which resolves paths dynamically
+    pm = ProfileManager()
+    paths = pm.resolve_profile_paths(active_profile)
+    config_path = paths.config
+
+    # Load ONLY this profile's config, never fall back to default
+    if config_path.exists():
+        with open(config_path) as f:
+            for line in f:
+                line = line.strip()
+                if line and not line.startswith("#") and "=" in line:
+                    # Handle 'export VAR=value' format
+                    if line.startswith("export "):
+                        line = line[7:]
+                    key, value = line.split("=", 1)
+                    if key not in os.environ:  # Don't override env vars
+                        os.environ[key] = value
 
 
 def get_config():
@@ -1156,6 +1169,10 @@ def main():
     if global_profile == "default":
         global_profile = None
 
+    # Set the CLI profile override so it's available to resolve_active_profile()
+    # This must happen BEFORE any config loading (load_config_file, get_config, etc.)
+    set_cli_profile_override(global_profile)
+
     # Check for built-in commands first
     # Find the first non-flag argument (the actual command)
     command = None
 
@@ -108,6 +108,13 @@ def _start_daemon(self, config: dict, profile: str) -> bool:
         daemon_log = paths.log
         port = paths.port
 
+        # Load profile's .env file and merge with provided config
+        # This fixes issue #305 where profile env vars were ignored
+        profile_config = self._profile_manager.load_profile_config(profile)
+        # Merge: profile config first, then override with explicitly provided config
+        merged_config = {**profile_config, **config}
+        config = merged_config
+
         # Build environment with LLM config
         # Support both formats: simple keys ("llm_api_key") and env var format ("HINDSIGHT_API_LLM_API_KEY")
         env = os.environ.copy()