From 3880eb5f4d307707f4fdfd3d8c94304449f69e7e Mon Sep 17 00:00:00 2001
From: themartto <themartto@gmail.com>
Date: Tue, 19 May 2026 10:11:48 +0300
Subject: [PATCH 1/7] fix: add missed tool calls to history sessions

---
 src/acp/mod.rs | 45 +++++++++++++++++++++++++++++++++++----------
 1 file changed, 35 insertions(+), 10 deletions(-)

diff --git a/src/acp/mod.rs b/src/acp/mod.rs
index 4c58657..c2e8ed1 100644
--- a/src/acp/mod.rs
+++ b/src/acp/mod.rs
@@ -231,20 +231,45 @@ impl AgentState {
         );
 
         for msg in &conversation.messages {
-            let text = msg.content.clone().unwrap_or_default();
-            if text.is_empty() {
-                continue;
-            }
-            let update = match msg.role {
+            match msg.role {
                 Role::User => {
-                    SessionUpdate::UserMessageChunk(ContentChunk::new(ContentBlock::from(text)))
+                    let text = msg.content.clone().unwrap_or_default();
+                    if !text.is_empty() {
+                        on_update(SessionUpdate::UserMessageChunk(ContentChunk::new(
+                            ContentBlock::from(text),
+                        )));
+                    }
                 }
                 Role::Assistant => {
-                    SessionUpdate::AgentMessageChunk(ContentChunk::new(ContentBlock::from(text)))
+                    let text = msg.content.clone().unwrap_or_default();
+                    if !text.is_empty() {
+                        on_update(SessionUpdate::AgentMessageChunk(ContentChunk::new(
+                            ContentBlock::from(text),
+                        )));
+                    }
+                    if let Some(tool_calls) = &msg.tool_calls {
+                        for tc in tool_calls {
+                            let raw_input = serde_json::from_str(&tc.function.arguments).ok();
+                            on_update(SessionUpdate::ToolCall(
+                                AcpToolCall::new(tc.id.clone(), &tc.function.name)
+                                    .status(ToolCallStatus::Completed)
+                                    .raw_input(raw_input),
+                            ));
+                        }
+                    }
+                }
+                Role::Tool => {
+                    if let (Some(id), Some(content)) = (&msg.tool_call_id, &msg.content) {
+                        on_update(SessionUpdate::ToolCallUpdate(ToolCallUpdate::new(
+                            id.clone(),
+                            ToolCallUpdateFields::new()
+                                .status(ToolCallStatus::Completed)
+                                .raw_output(serde_json::Value::String(content.clone())),
+                        )));
+                    }
                 }
-                _ => continue,
-            };
-            on_update(update);
+                _ => {}
+            }
         }
 
         Ok(())

From 3a76a46f38dce7907a9029e2a699ba73ef71348c Mon Sep 17 00:00:00 2001
From: themartto <themartto@gmail.com>
Date: Thu, 21 May 2026 10:00:37 +0300
Subject: [PATCH 2/7] feat: fix tool call errors logging

---
 src/acp/mod.rs | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/acp/mod.rs b/src/acp/mod.rs
index c2e8ed1..bef6fa3 100644
--- a/src/acp/mod.rs
+++ b/src/acp/mod.rs
@@ -249,7 +249,17 @@ impl AgentState {
                     }
                     if let Some(tool_calls) = &msg.tool_calls {
                         for tc in tool_calls {
-                            let raw_input = serde_json::from_str(&tc.function.arguments).ok();
+                            let raw_input = match serde_json::from_str(&tc.function.arguments) {
+                                Ok(v) => Some(v),
+                                Err(e) => {
+                                    tracing::warn!(
+                                        tool_call_id = %tc.id,
+                                        tool_name = %tc.function.name,
+                                        "failed to parse tool call arguments: {e}"
+                                    );
+                                    None
+                                }
+                            };
                             on_update(SessionUpdate::ToolCall(
                                 AcpToolCall::new(tc.id.clone(), &tc.function.name)
                                     .status(ToolCallStatus::Completed)

From 87ad5c51226fee5e7eaa7416f52ec26253d89d81 Mon Sep 17 00:00:00 2001
From: themartto <themartto@gmail.com>
Date: Thu, 21 May 2026 16:15:56 +0300
Subject: [PATCH 3/7] feat: persist tool error state in history and surface via
 ACP status

Add is_error: bool to Message so tool failures are stored explicitly in
conversation history rather than being lost as plain text. Propagate the
flag through StreamEvent::ToolResult and agent.rs so both the live path
and history replay emit ToolCallStatus::Failed instead of always Completed.
Forward is_error to Anthropic's tool_result block so the LLM gets accurate
signal on failure.
---
 src/acp/mod.rs            | 18 +++++++++++++++---
 src/core/agent.rs         |  9 ++++++---
 src/core/llm/anthropic.rs | 15 +++++++++++++--
 src/core/llm/gemini.rs    |  8 ++++++--
 src/core/llm/retry.rs     |  1 +
 src/core/models.rs        | 28 +++++++++++++++++++++++++---
 src/rag/prompt.rs         |  1 +
 7 files changed, 67 insertions(+), 13 deletions(-)

diff --git a/src/acp/mod.rs b/src/acp/mod.rs
index bef6fa3..458810e 100644
--- a/src/acp/mod.rs
+++ b/src/acp/mod.rs
@@ -148,12 +148,19 @@ impl AgentState {
                             .raw_input(raw_input),
                     ));
                 }
-                StreamEvent::ToolResult { result, .. } => {
+                StreamEvent::ToolResult {
+                    result, is_error, ..
+                } => {
                     if let Some(id) = last_tool_call_id.take() {
+                        let status = if is_error {
+                            ToolCallStatus::Failed
+                        } else {
+                            ToolCallStatus::Completed
+                        };
                         on_update(SessionUpdate::ToolCallUpdate(ToolCallUpdate::new(
                             id,
                             ToolCallUpdateFields::new()
-                                .status(ToolCallStatus::Completed)
+                                .status(status)
                                 .raw_output(serde_json::Value::String(result)),
                         )));
                     }
@@ -270,10 +277,15 @@ impl AgentState {
                 }
                 Role::Tool => {
                     if let (Some(id), Some(content)) = (&msg.tool_call_id, &msg.content) {
+                        let status = if msg.is_error {
+                            ToolCallStatus::Failed
+                        } else {
+                            ToolCallStatus::Completed
+                        };
                         on_update(SessionUpdate::ToolCallUpdate(ToolCallUpdate::new(
                             id.clone(),
                             ToolCallUpdateFields::new()
-                                .status(ToolCallStatus::Completed)
+                                .status(status)
                                 .raw_output(serde_json::Value::String(content.clone())),
                         )));
                     }
diff --git a/src/core/agent.rs b/src/core/agent.rs
index 35ff1cb..886c3c8 100644
--- a/src/core/agent.rs
+++ b/src/core/agent.rs
@@ -75,15 +75,16 @@ where
                     });
                 }
 
-                let result = match tool_executor.execute(tool_name, arguments).await {
-                    Ok(r) => r,
-                    Err(e) => format!("Error: {e}"),
+                let (result, is_error) = match tool_executor.execute(tool_name, arguments).await {
+                    Ok(r) => (r, false),
+                    Err(e) => (format!("Error: {e}"), true),
                 };
 
                 if let Some(cb) = callback.as_mut() {
                     cb(StreamEvent::ToolResult {
                         tool_name: tool_name.clone(),
                         result: result.clone(),
+                        is_error,
                     });
                 }
 
@@ -97,6 +98,7 @@ where
                     tool_call.id.clone(),
                     tool_name.clone(),
                     result,
+                    is_error,
                 ));
             }
 
@@ -245,6 +247,7 @@ mod tests {
                 }]),
                 tool_call_id: None,
                 tool_name: None,
+                is_error: false,
             },
             finish_reason: Some("tool_calls".into()),
         }
diff --git a/src/core/llm/anthropic.rs b/src/core/llm/anthropic.rs
index 6795241..66cfb51 100644
--- a/src/core/llm/anthropic.rs
+++ b/src/core/llm/anthropic.rs
@@ -11,6 +11,10 @@ use super::LlmClient;
 const ANTHROPIC_VERSION: &str = "2023-06-01";
 const DEFAULT_MAX_TOKENS: u32 = 4096;
 
+fn is_false(b: &bool) -> bool {
+    !b
+}
+
 #[derive(Clone)]
 pub struct AnthropicClient {
     client: ReqwestClient,
@@ -72,6 +76,8 @@ enum AnthropicContentBlock {
     ToolResult {
         tool_use_id: String,
         content: String,
+        #[serde(skip_serializing_if = "is_false")]
+        is_error: bool,
     },
 }
 
@@ -115,6 +121,7 @@ fn convert_messages(messages: &[Message]) -> Result<Vec<AnthropicMessage>> {
                 let block = AnthropicContentBlock::ToolResult {
                     tool_use_id: msg.tool_call_id.clone().unwrap_or_default(),
                     content: msg.content.clone().unwrap_or_default(),
+                    is_error: msg.is_error,
                 };
                 // Merge into the last user message if it exists, otherwise create new
                 if let Some(last) = result.last_mut() {
@@ -231,6 +238,7 @@ fn convert_response(resp: AnthropicResponse) -> Choice {
             },
             tool_call_id: None,
             tool_name: None,
+            is_error: false,
         },
         finish_reason,
     }
@@ -313,6 +321,7 @@ mod tests {
             tool_calls: None,
             tool_call_id: None,
             tool_name: None,
+            is_error: false,
         }];
         let result = convert_messages(&messages).unwrap();
         assert_eq!(result.len(), 0);
@@ -333,6 +342,7 @@ mod tests {
             }]),
             tool_call_id: None,
             tool_name: None,
+            is_error: false,
         }];
         let result = convert_messages(&messages).unwrap();
         assert_eq!(result.len(), 1);
@@ -355,6 +365,7 @@ mod tests {
             }]),
             tool_call_id: None,
             tool_name: None,
+            is_error: false,
         }];
         assert!(convert_messages(&messages).is_err());
     }
@@ -362,8 +373,8 @@ mod tests {
     #[test]
     fn convert_messages_merges_consecutive_tool_results() {
         let messages = vec![
-            Message::tool_result("call_1".into(), "read_file".into(), "content1".into()),
-            Message::tool_result("call_2".into(), "write_file".into(), "content2".into()),
+            Message::tool_result("call_1".into(), "read_file".into(), "content1".into(), false),
+            Message::tool_result("call_2".into(), "write_file".into(), "content2".into(), false),
         ];
         let result = convert_messages(&messages).unwrap();
         // Both tool results should merge into a single user message
diff --git a/src/core/llm/gemini.rs b/src/core/llm/gemini.rs
index 54bd240..7f1089e 100644
--- a/src/core/llm/gemini.rs
+++ b/src/core/llm/gemini.rs
@@ -274,6 +274,7 @@ fn convert_response(resp: GeminiResponse) -> Result<Choice> {
             },
             tool_call_id: None,
             tool_name: None,
+            is_error: false,
         },
         finish_reason,
     })
@@ -380,6 +381,7 @@ mod tests {
             }]),
             tool_call_id: None,
             tool_name: None,
+            is_error: false,
         }];
         let result = convert_messages(&messages).unwrap();
         assert_eq!(result.len(), 1);
@@ -406,6 +408,7 @@ mod tests {
             }]),
             tool_call_id: None,
             tool_name: None,
+            is_error: false,
         }];
         assert!(convert_messages(&messages).is_err());
     }
@@ -416,6 +419,7 @@ mod tests {
             "call_1".into(),
             "read_file".into(),
             "file content".into(),
+            false,
         )];
         let result = convert_messages(&messages).unwrap();
         assert_eq!(result.len(), 1);
@@ -427,8 +431,8 @@ mod tests {
     #[test]
     fn convert_messages_merges_tool_results_into_user() {
         let messages = vec![
-            Message::tool_result("call_1".into(), "read_file".into(), "a".into()),
-            Message::tool_result("call_2".into(), "write_file".into(), "b".into()),
+            Message::tool_result("call_1".into(), "read_file".into(), "a".into(), false),
+            Message::tool_result("call_2".into(), "write_file".into(), "b".into(), false),
         ];
         let result = convert_messages(&messages).unwrap();
         assert_eq!(result.len(), 1);
diff --git a/src/core/llm/retry.rs b/src/core/llm/retry.rs
index 045539d..00722dc 100644
--- a/src/core/llm/retry.rs
+++ b/src/core/llm/retry.rs
@@ -61,6 +61,7 @@ mod tests {
                 tool_calls: None,
                 tool_call_id: None,
                 tool_name: None,
+                is_error: false,
             },
             finish_reason: Some("stop".into()),
         }
diff --git a/src/core/models.rs b/src/core/models.rs
index f907ba1..ea03a6c 100644
--- a/src/core/models.rs
+++ b/src/core/models.rs
@@ -2,6 +2,10 @@ use serde::{Deserialize, Serialize};
 use serde_json::Value;
 use uuid::Uuid;
 
+fn is_false(b: &bool) -> bool {
+    !b
+}
+
 /// Chat role for a conversation message.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
 #[serde(rename_all = "lowercase")]
@@ -35,6 +39,10 @@ pub struct Message {
     pub tool_call_id: Option<String>,
     #[serde(skip_serializing_if = "Option::is_none")]
     pub tool_name: Option<String>,
+    /// Whether the tool returned an error. Stored in history so replayed sessions
+    /// can surface the correct status without heuristics.
+    #[serde(default, skip_serializing_if = "is_false")]
+    pub is_error: bool,
 }
 
 impl Message {
@@ -45,6 +53,7 @@ impl Message {
             tool_calls: None,
             tool_call_id: None,
             tool_name: None,
+            is_error: false,
         }
     }
 
@@ -55,16 +64,23 @@ impl Message {
             tool_calls: None,
             tool_call_id: None,
             tool_name: None,
+            is_error: false,
         }
     }
 
-    pub fn tool_result(tool_call_id: String, tool_name: String, content: String) -> Self {
+    pub fn tool_result(
+        tool_call_id: String,
+        tool_name: String,
+        content: String,
+        is_error: bool,
+    ) -> Self {
         Self {
             role: Role::Tool,
             content: Some(content),
             tool_calls: None,
             tool_call_id: Some(tool_call_id),
             tool_name: Some(tool_name),
+            is_error,
         }
     }
 }
@@ -155,7 +171,11 @@ pub enum StreamEvent {
     },
     /// A tool has finished executing.
     #[serde(rename = "tool_result")]
-    ToolResult { tool_name: String, result: String },
+    ToolResult {
+        tool_name: String,
+        result: String,
+        is_error: bool,
+    },
     /// A chunk of text from the LLM.
     #[serde(rename = "llm_response")]
     LlmResponse { content: String },
@@ -360,11 +380,13 @@ mod tests {
 
     #[test]
     fn message_tool_result_sets_correct_fields() {
-        let msg = Message::tool_result("call_1".into(), "read_file".into(), "content".into());
+        let msg =
+            Message::tool_result("call_1".into(), "read_file".into(), "content".into(), false);
         assert_eq!(msg.role, Role::Tool);
         assert_eq!(msg.content.as_deref(), Some("content"));
         assert_eq!(msg.tool_call_id.as_deref(), Some("call_1"));
         assert_eq!(msg.tool_name.as_deref(), Some("read_file"));
+        assert!(!msg.is_error);
         assert!(msg.tool_calls.is_none());
     }
 
diff --git a/src/rag/prompt.rs b/src/rag/prompt.rs
index 51d9387..072ea4e 100644
--- a/src/rag/prompt.rs
+++ b/src/rag/prompt.rs
@@ -43,6 +43,7 @@ impl PromptBuilder {
                 tool_calls: None,
                 tool_call_id: None,
                 tool_name: None,
+                is_error: false,
             });
         }
 

From 99c9dc2aa5292d0ddc68d90b11b049434cd23b83 Mon Sep 17 00:00:00 2001
From: themartto <themartto@gmail.com>
Date: Thu, 21 May 2026 16:16:00 +0300
Subject: [PATCH 4/7] docs: document is_error field and correct tool call
 history replay

---
 docs/api.md | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/docs/api.md b/docs/api.md
index 8172d9b..890ae4b 100644
--- a/docs/api.md
+++ b/docs/api.md
@@ -341,6 +341,14 @@ Returns the full conversation for a session, including all messages.
       "tool_calls": null,
       "tool_call_id": "call_abc123",
       "tool_name": "read_file"
+    },
+    {
+      "role": "tool",
+      "content": "Error: permission denied: /etc/shadow",
+      "tool_calls": null,
+      "tool_call_id": "call_def456",
+      "tool_name": "read_file",
+      "is_error": true
     }
   ]
 }
@@ -355,6 +363,14 @@ Returns the full conversation for a session, including all messages.
 | `"tool"` | Tool execution result fed back to the LLM |
 | `"system"` | System prompt injected by the agent (skills, context) |
 
+**`role: "tool"` fields:**
+
+| Field | Type | Description |
+|---|---|---|
+| `tool_call_id` | `string` | ID linking this result to the assistant's tool call request |
+| `tool_name` | `string` | Name of the tool that was invoked |
+| `is_error` | `boolean` | `true` if the tool returned an error. Omitted from JSON when `false` (i.e. absence means success). Also forwarded to Anthropic as `is_error` in the tool result block so the LLM receives accurate signal. |
+
 **Error `400`** — if `:id` is not a valid UUID:
 
 ```json
@@ -736,7 +752,7 @@ The flow is:
 }
 ```
 
-> Tool call messages from the original session are **not** replayed — only user and assistant text. Use `GET /api/sessions/:id` if you need the raw tool call history.
+> Tool calls from the original session **are** replayed: assistant tool-call requests arrive as `tool_call` notifications (`status: "in_progress"`), and tool results arrive as `tool_call_update` notifications with `status: "completed"` or `status: "failed"` — the correct status is preserved in the stored history via the `is_error` field on the message.
 
 **Response (after all history has been replayed):**
 
@@ -1354,6 +1370,7 @@ interface Message {
   tool_calls?: ToolCall[] | null;
   tool_call_id?: string | null; // present on role:"tool" messages
   tool_name?: string | null;    // present on role:"tool" messages
+  is_error?: boolean;           // true when the tool returned an error; omitted when false
 }
 
 interface ToolCall {

From 46b992d687a8df2ad10ad5d8edfa864ade237f37 Mon Sep 17 00:00:00 2001
From: themartto <themartto@gmail.com>
Date: Thu, 21 May 2026 16:19:15 +0300
Subject: [PATCH 5/7] style: cargo fmt

---
 src/core/llm/anthropic.rs | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/src/core/llm/anthropic.rs b/src/core/llm/anthropic.rs
index 66cfb51..3a75053 100644
--- a/src/core/llm/anthropic.rs
+++ b/src/core/llm/anthropic.rs
@@ -373,8 +373,18 @@ mod tests {
     #[test]
     fn convert_messages_merges_consecutive_tool_results() {
         let messages = vec![
-            Message::tool_result("call_1".into(), "read_file".into(), "content1".into(), false),
-            Message::tool_result("call_2".into(), "write_file".into(), "content2".into(), false),
+            Message::tool_result(
+                "call_1".into(),
+                "read_file".into(),
+                "content1".into(),
+                false,
+            ),
+            Message::tool_result(
+                "call_2".into(),
+                "write_file".into(),
+                "content2".into(),
+                false,
+            ),
         ];
         let result = convert_messages(&messages).unwrap();
         // Both tool results should merge into a single user message

From 7dd6413f2030532ac981de214dd3f31b642128d3 Mon Sep 17 00:00:00 2001
From: themartto <themartto@gmail.com>
Date: Thu, 21 May 2026 16:34:45 +0300
Subject: [PATCH 6/7] fix: emit InProgress status for replayed tool calls in
 acp_load_session

---
 src/acp/mod.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/acp/mod.rs b/src/acp/mod.rs
index 458810e..45193e9 100644
--- a/src/acp/mod.rs
+++ b/src/acp/mod.rs
@@ -269,7 +269,7 @@ impl AgentState {
                             };
                             on_update(SessionUpdate::ToolCall(
                                 AcpToolCall::new(tc.id.clone(), &tc.function.name)
-                                    .status(ToolCallStatus::Completed)
+                                    .status(ToolCallStatus::InProgress)
                                     .raw_input(raw_input),
                             ));
                         }

From 4588b77eabc9ee5a6f2257d824e32da2eaed50a2 Mon Sep 17 00:00:00 2001
From: themartto <themartto@gmail.com>
Date: Thu, 21 May 2026 16:38:03 +0300
Subject: [PATCH 7/7] chore: update README.md

---
 README.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 8d99c0b..dc5f586 100644
--- a/README.md
+++ b/README.md
@@ -21,10 +21,10 @@ Openheim runs an iterative agent loop — it calls your LLM, executes tools on i
 
 Openheim is built in Rust from the ground up:
 
-- **Low memory** — runs in a fraction of the RAM a Python agent would need
-- **Fast startup** — no interpreter warmup
-- **True concurrency** — async Tokio runtime, multiple agents without threading headaches
-- **Safe by default** — Rust's ownership model means fewer footguns in long-running agent processes
+- **Low memory**
+- **Fast startup**
+- **True concurrency**
+- **Memory-safe by default**
 
 ---