fix(embed): daemon process XPC connection crash on macos (#215)

nicoloboschi · web-flow · commit e5fc6eedb661 · 2026-01-28T14:52:31.000+01:00
* fix(embed): daemon process XPC connection crash on macos

* other fix
diff --git a/hindsight-api/hindsight_api/engine/cross_encoder.py b/hindsight-api/hindsight_api/engine/cross_encoder.py
@@ -163,11 +163,101 @@ async def initialize(self) -> None:
         else:
             logger.info("Reranker: local provider initialized (using existing executor)")
 
+    def _is_xpc_error(self, error: Exception) -> bool:
+        """
+        Check if an error is an XPC connection error (macOS daemon issue).
+
+        On macOS, long-running daemons can lose XPC connections to system services
+        when the process is idle for extended periods.
+        """
+        error_str = str(error).lower()
+        return "xpc_error_connection_invalid" in error_str or "xpc error" in error_str
+
+    def _reinitialize_model_sync(self) -> None:
+        """
+        Clear and reinitialize the cross-encoder model synchronously.
+
+        This is used to recover from XPC errors on macOS where the
+        PyTorch/MPS backend loses its connection to system services.
+        """
+        logger.warning(f"Reinitializing reranker model {self.model_name} due to backend error")
+
+        # Clear existing model
+        self._model = None
+
+        # Force garbage collection to free resources
+        import gc
+
+        import torch
+
+        gc.collect()
+
+        # If using CUDA/MPS, clear the cache
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+        elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
+            try:
+                torch.mps.empty_cache()
+            except AttributeError:
+                pass  # Method might not exist in all PyTorch versions
+
+        # Reinitialize the model
+        try:
+            from sentence_transformers import CrossEncoder
+        except ImportError:
+            raise ImportError(
+                "sentence-transformers is required for LocalSTCrossEncoder. "
+                "Install it with: pip install sentence-transformers"
+            )
+
+        # Determine device based on hardware availability
+        has_gpu = torch.cuda.is_available() or (hasattr(torch.backends, "mps") and torch.backends.mps.is_available())
+
+        if has_gpu:
+            device = None  # Let sentence-transformers auto-detect GPU/MPS
+        else:
+            device = "cpu"
+
+        self._model = CrossEncoder(
+            self.model_name,
+            device=device,
+            model_kwargs={"low_cpu_mem_usage": False},
+        )
+
+        logger.info("Reranker: local provider reinitialized successfully")
+
+    def _predict_with_recovery(self, pairs: list[tuple[str, str]]) -> list[float]:
+        """
+        Predict with automatic recovery from XPC errors.
+
+        This runs synchronously in the thread pool.
+        """
+        max_retries = 1
+        for attempt in range(max_retries + 1):
+            try:
+                scores = self._model.predict(pairs, show_progress_bar=False)
+                return scores.tolist() if hasattr(scores, "tolist") else list(scores)
+            except Exception as e:
+                # Check if this is an XPC error (macOS daemon issue)
+                if self._is_xpc_error(e) and attempt < max_retries:
+                    logger.warning(f"XPC error detected in reranker (attempt {attempt + 1}): {e}")
+                    try:
+                        self._reinitialize_model_sync()
+                        logger.info("Reranker reinitialized successfully, retrying prediction")
+                        continue
+                    except Exception as reinit_error:
+                        logger.error(f"Failed to reinitialize reranker: {reinit_error}")
+                        raise Exception(f"Failed to recover from XPC error: {str(e)}")
+                else:
+                    # Not an XPC error or out of retries
+                    raise
+
     async def predict(self, pairs: list[tuple[str, str]]) -> list[float]:
         """
         Score query-document pairs for relevance.
 
         Uses a dedicated thread pool with limited workers to prevent CPU thrashing.
+        Automatically recovers from XPC errors on macOS by reinitializing the model.
 
         Args:
             pairs: List of (query, document) tuples to score
@@ -180,11 +270,11 @@ async def predict(self, pairs: list[tuple[str, str]]) -> list[float]:
 
         # Use dedicated executor - limited workers naturally limits concurrency
         loop = asyncio.get_event_loop()
-        scores = await loop.run_in_executor(
+        return await loop.run_in_executor(
             LocalSTCrossEncoder._executor,
-            lambda: self._model.predict(pairs, show_progress_bar=False),
+            self._predict_with_recovery,
+            pairs,
         )
-        return scores.tolist() if hasattr(scores, "tolist") else list(scores)
 
 
 class RemoteTEICrossEncoder(CrossEncoderModel):
diff --git a/hindsight-api/hindsight_api/engine/embeddings.py b/hindsight-api/hindsight_api/engine/embeddings.py
@@ -151,10 +151,75 @@ async def initialize(self) -> None:
         self._dimension = self._model.get_sentence_embedding_dimension()
         logger.info(f"Embeddings: local provider initialized (dim: {self._dimension})")
 
+    def _is_xpc_error(self, error: Exception) -> bool:
+        """
+        Check if an error is an XPC connection error (macOS daemon issue).
+
+        On macOS, long-running daemons can lose XPC connections to system services
+        when the process is idle for extended periods.
+        """
+        error_str = str(error).lower()
+        return "xpc_error_connection_invalid" in error_str or "xpc error" in error_str
+
+    def _reinitialize_model_sync(self) -> None:
+        """
+        Clear and reinitialize the embedding model synchronously.
+
+        This is used to recover from XPC errors on macOS where the
+        PyTorch/MPS backend loses its connection to system services.
+        """
+        logger.warning(f"Reinitializing embedding model {self.model_name} due to backend error")
+
+        # Clear existing model
+        self._model = None
+
+        # Force garbage collection to free resources
+        import gc
+
+        import torch
+
+        gc.collect()
+
+        # If using CUDA/MPS, clear the cache
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+        elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
+            try:
+                torch.mps.empty_cache()
+            except AttributeError:
+                pass  # Method might not exist in all PyTorch versions
+
+        # Reinitialize the model (inline version of initialize() but synchronous)
+        try:
+            from sentence_transformers import SentenceTransformer
+        except ImportError:
+            raise ImportError(
+                "sentence-transformers is required for LocalSTEmbeddings. "
+                "Install it with: pip install sentence-transformers"
+            )
+
+        # Determine device based on hardware availability
+        has_gpu = torch.cuda.is_available() or (hasattr(torch.backends, "mps") and torch.backends.mps.is_available())
+
+        if has_gpu:
+            device = None  # Let sentence-transformers auto-detect GPU/MPS
+        else:
+            device = "cpu"
+
+        self._model = SentenceTransformer(
+            self.model_name,
+            device=device,
+            model_kwargs={"low_cpu_mem_usage": False},
+        )
+
+        logger.info("Embeddings: local provider reinitialized successfully")
+
     def encode(self, texts: list[str]) -> list[list[float]]:
         """
         Generate embeddings for a list of texts.
 
+        Automatically recovers from XPC errors on macOS by reinitializing the model.
+
         Args:
             texts: List of text strings to encode
 
@@ -163,8 +228,27 @@ def encode(self, texts: list[str]) -> list[list[float]]:
         """
         if self._model is None:
             raise RuntimeError("Embeddings not initialized. Call initialize() first.")
-        embeddings = self._model.encode(texts, convert_to_numpy=True, show_progress_bar=False)
-        return [emb.tolist() for emb in embeddings]
+
+        # Try encoding with automatic recovery from XPC errors
+        max_retries = 1
+        for attempt in range(max_retries + 1):
+            try:
+                embeddings = self._model.encode(texts, convert_to_numpy=True, show_progress_bar=False)
+                return [emb.tolist() for emb in embeddings]
+            except Exception as e:
+                # Check if this is an XPC error (macOS daemon issue)
+                if self._is_xpc_error(e) and attempt < max_retries:
+                    logger.warning(f"XPC error detected in embedding generation (attempt {attempt + 1}): {e}")
+                    try:
+                        self._reinitialize_model_sync()
+                        logger.info("Model reinitialized successfully, retrying embedding generation")
+                        continue
+                    except Exception as reinit_error:
+                        logger.error(f"Failed to reinitialize model: {reinit_error}")
+                        raise Exception(f"Failed to recover from XPC error: {str(e)}")
+                else:
+                    # Not an XPC error or out of retries
+                    raise
 
 
 class RemoteTEIEmbeddings(Embeddings):
diff --git a/hindsight-api/tests/test_cross_encoder_xpc_recovery.py b/hindsight-api/tests/test_cross_encoder_xpc_recovery.py
@@ -0,0 +1,148 @@
+"""
+Tests for XPC error recovery in LocalSTCrossEncoder.
+
+This tests the automatic reinitialization of the cross-encoder model when
+XPC connection errors occur on macOS (common in long-running daemon processes).
+"""
+
+import asyncio
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from hindsight_api.engine.cross_encoder import LocalSTCrossEncoder
+
+
+class TestCrossEncoderXPCErrorRecovery:
+    """Tests for XPC error detection and recovery in LocalSTCrossEncoder."""
+
+    @pytest.fixture
+    def cross_encoder(self):
+        """Create a LocalSTCrossEncoder instance."""
+        return LocalSTCrossEncoder(model_name="cross-encoder/ms-marco-TinyBERT-L-2-v2")
+
+    def test_is_xpc_error_detection(self, cross_encoder):
+        """Test that XPC errors are correctly detected."""
+        # Test various XPC error message formats
+        xpc_error = Exception("Compiler encountered XPC_ERROR_CONNECTION_INVALID (is the OS shutting down?)")
+        assert cross_encoder._is_xpc_error(xpc_error)
+
+        xpc_error2 = Exception("XPC error occurred")
+        assert cross_encoder._is_xpc_error(xpc_error2)
+
+        # Test that non-XPC errors are not detected
+        normal_error = Exception("Some other error")
+        assert not cross_encoder._is_xpc_error(normal_error)
+
+    @pytest.mark.asyncio
+    async def test_predict_with_xpc_recovery(self, cross_encoder):
+        """Test that predict() recovers from XPC errors by reinitializing."""
+        # Initialize the cross-encoder
+        await cross_encoder.initialize()
+
+        # Track calls to reinitialize
+        reinit_called = False
+        original_reinit = cross_encoder._reinitialize_model_sync
+
+        def track_reinit():
+            nonlocal reinit_called
+            reinit_called = True
+            original_reinit()
+
+        # Track predict attempts
+        predict_attempts = []
+        original_predict = cross_encoder._model.predict
+
+        def mock_predict(*args, **kwargs):
+            predict_attempts.append(1)
+            # Only fail on first attempt
+            if len(predict_attempts) == 1:
+                raise RuntimeError("Compiler encountered XPC_ERROR_CONNECTION_INVALID (is the OS shutting down?)")
+            else:
+                # After reinit: succeed
+                return original_predict(*args, **kwargs)
+
+        # Mock the initial predict to fail, reinit happens, then new model succeeds
+        with patch.object(cross_encoder, "_reinitialize_model_sync", side_effect=track_reinit):
+            with patch.object(cross_encoder._model, "predict", side_effect=mock_predict):
+                # This should trigger XPC error on first attempt, then recover and succeed
+                result = await cross_encoder.predict([("query", "document")])
+
+                # Verify we got a result
+                assert result is not None
+                assert len(result) == 1
+                assert isinstance(result[0], float)
+                assert reinit_called  # Should have reinitialized
+                assert len(predict_attempts) >= 1  # At least one attempt was made
+
+    @pytest.mark.asyncio
+    async def test_predict_fails_on_non_xpc_error(self, cross_encoder):
+        """Test that predict() does not retry for non-XPC errors."""
+        # Initialize the cross-encoder
+        await cross_encoder.initialize()
+
+        # Create a mock that raises a non-XPC error
+        def mock_predict(*args, **kwargs):
+            raise RuntimeError("Some other error")
+
+        # Patch the model's predict method
+        with patch.object(cross_encoder._model, "predict", side_effect=mock_predict):
+            # This should fail without retry
+            with pytest.raises(RuntimeError) as exc_info:
+                await cross_encoder.predict([("query", "document")])
+
+            assert "Some other error" in str(exc_info.value)
+
+    @pytest.mark.asyncio
+    async def test_reinitialize_clears_model(self, cross_encoder):
+        """Test that _reinitialize_model_sync properly clears and reinits the model."""
+        # Initialize the cross-encoder
+        await cross_encoder.initialize()
+
+        original_model = cross_encoder._model
+        assert original_model is not None
+
+        # Reinitialize
+        cross_encoder._reinitialize_model_sync()
+
+        # Model should be reinitialized (new instance)
+        assert cross_encoder._model is not None
+        assert cross_encoder._model is not original_model
+
+        # Should still work
+        result = await cross_encoder.predict([("test query", "test document")])
+        assert len(result) == 1
+        assert isinstance(result[0], float)
+
+    @pytest.mark.asyncio
+    async def test_xpc_recovery_exhausts_retries(self, cross_encoder):
+        """Test that XPC recovery gives up after max retries."""
+        # Initialize the cross-encoder
+        await cross_encoder.initialize()
+
+        # Track reinit calls
+        reinit_count = 0
+        original_reinit = cross_encoder._reinitialize_model_sync
+
+        def track_and_fail_reinit():
+            nonlocal reinit_count
+            reinit_count += 1
+            # Call original reinit, but the new model will also be mocked to fail
+            original_reinit()
+            # After reinit, patch the new model too
+            cross_encoder._model.predict = MagicMock(
+                side_effect=RuntimeError("Compiler encountered XPC_ERROR_CONNECTION_INVALID")
+            )
+
+        # Mock that always raises XPC error
+        cross_encoder._model.predict = MagicMock(
+            side_effect=RuntimeError("Compiler encountered XPC_ERROR_CONNECTION_INVALID")
+        )
+
+        with patch.object(cross_encoder, "_reinitialize_model_sync", side_effect=track_and_fail_reinit):
+            # Should try once, reinitialize, try again, and fail
+            with pytest.raises(Exception) as exc_info:
+                await cross_encoder.predict([("query", "document")])
+
+            assert "XPC_ERROR_CONNECTION_INVALID" in str(exc_info.value) or "Failed to recover" in str(exc_info.value)
+            assert reinit_count == 1  # Should have tried to reinitialize once
diff --git a/hindsight-api/tests/test_embedding_xpc_recovery.py b/hindsight-api/tests/test_embedding_xpc_recovery.py