From 3880eb5f4d307707f4fdfd3d8c94304449f69e7e Mon Sep 17 00:00:00 2001 From: themartto Date: Tue, 19 May 2026 10:11:48 +0300 Subject: [PATCH 1/7] fix: add missed tool calls to history sessions --- src/acp/mod.rs | 45 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 35 insertions(+), 10 deletions(-) diff --git a/src/acp/mod.rs b/src/acp/mod.rs index 4c58657..c2e8ed1 100644 --- a/src/acp/mod.rs +++ b/src/acp/mod.rs @@ -231,20 +231,45 @@ impl AgentState { ); for msg in &conversation.messages { - let text = msg.content.clone().unwrap_or_default(); - if text.is_empty() { - continue; - } - let update = match msg.role { + match msg.role { Role::User => { - SessionUpdate::UserMessageChunk(ContentChunk::new(ContentBlock::from(text))) + let text = msg.content.clone().unwrap_or_default(); + if !text.is_empty() { + on_update(SessionUpdate::UserMessageChunk(ContentChunk::new( + ContentBlock::from(text), + ))); + } } Role::Assistant => { - SessionUpdate::AgentMessageChunk(ContentChunk::new(ContentBlock::from(text))) + let text = msg.content.clone().unwrap_or_default(); + if !text.is_empty() { + on_update(SessionUpdate::AgentMessageChunk(ContentChunk::new( + ContentBlock::from(text), + ))); + } + if let Some(tool_calls) = &msg.tool_calls { + for tc in tool_calls { + let raw_input = serde_json::from_str(&tc.function.arguments).ok(); + on_update(SessionUpdate::ToolCall( + AcpToolCall::new(tc.id.clone(), &tc.function.name) + .status(ToolCallStatus::Completed) + .raw_input(raw_input), + )); + } + } + } + Role::Tool => { + if let (Some(id), Some(content)) = (&msg.tool_call_id, &msg.content) { + on_update(SessionUpdate::ToolCallUpdate(ToolCallUpdate::new( + id.clone(), + ToolCallUpdateFields::new() + .status(ToolCallStatus::Completed) + .raw_output(serde_json::Value::String(content.clone())), + ))); + } } - _ => continue, - }; - on_update(update); + _ => {} + } } Ok(()) From 3a76a46f38dce7907a9029e2a699ba73ef71348c Mon Sep 17 00:00:00 2001 From: themartto Date: Thu, 21 May 2026 10:00:37 +0300 Subject: [PATCH 2/7] feat: fix tool call errors logging --- src/acp/mod.rs | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/acp/mod.rs b/src/acp/mod.rs index c2e8ed1..bef6fa3 100644 --- a/src/acp/mod.rs +++ b/src/acp/mod.rs @@ -249,7 +249,17 @@ impl AgentState { } if let Some(tool_calls) = &msg.tool_calls { for tc in tool_calls { - let raw_input = serde_json::from_str(&tc.function.arguments).ok(); + let raw_input = match serde_json::from_str(&tc.function.arguments) { + Ok(v) => Some(v), + Err(e) => { + tracing::warn!( + tool_call_id = %tc.id, + tool_name = %tc.function.name, + "failed to parse tool call arguments: {e}" + ); + None + } + }; on_update(SessionUpdate::ToolCall( AcpToolCall::new(tc.id.clone(), &tc.function.name) .status(ToolCallStatus::Completed) From 87ad5c51226fee5e7eaa7416f52ec26253d89d81 Mon Sep 17 00:00:00 2001 From: themartto Date: Thu, 21 May 2026 16:15:56 +0300 Subject: [PATCH 3/7] feat: persist tool error state in history and surface via ACP status Add is_error: bool to Message so tool failures are stored explicitly in conversation history rather than being lost as plain text. Propagate the flag through StreamEvent::ToolResult and agent.rs so both the live path and history replay emit ToolCallStatus::Failed instead of always Completed. Forward is_error to Anthropic's tool_result block so the LLM gets accurate signal on failure. --- src/acp/mod.rs | 18 +++++++++++++++--- src/core/agent.rs | 9 ++++++--- src/core/llm/anthropic.rs | 15 +++++++++++++-- src/core/llm/gemini.rs | 8 ++++++-- src/core/llm/retry.rs | 1 + src/core/models.rs | 28 +++++++++++++++++++++++++--- src/rag/prompt.rs | 1 + 7 files changed, 67 insertions(+), 13 deletions(-) diff --git a/src/acp/mod.rs b/src/acp/mod.rs index bef6fa3..458810e 100644 --- a/src/acp/mod.rs +++ b/src/acp/mod.rs @@ -148,12 +148,19 @@ impl AgentState { .raw_input(raw_input), )); } - StreamEvent::ToolResult { result, .. } => { + StreamEvent::ToolResult { + result, is_error, .. + } => { if let Some(id) = last_tool_call_id.take() { + let status = if is_error { + ToolCallStatus::Failed + } else { + ToolCallStatus::Completed + }; on_update(SessionUpdate::ToolCallUpdate(ToolCallUpdate::new( id, ToolCallUpdateFields::new() - .status(ToolCallStatus::Completed) + .status(status) .raw_output(serde_json::Value::String(result)), ))); } @@ -270,10 +277,15 @@ impl AgentState { } Role::Tool => { if let (Some(id), Some(content)) = (&msg.tool_call_id, &msg.content) { + let status = if msg.is_error { + ToolCallStatus::Failed + } else { + ToolCallStatus::Completed + }; on_update(SessionUpdate::ToolCallUpdate(ToolCallUpdate::new( id.clone(), ToolCallUpdateFields::new() - .status(ToolCallStatus::Completed) + .status(status) .raw_output(serde_json::Value::String(content.clone())), ))); } diff --git a/src/core/agent.rs b/src/core/agent.rs index 35ff1cb..886c3c8 100644 --- a/src/core/agent.rs +++ b/src/core/agent.rs @@ -75,15 +75,16 @@ where }); } - let result = match tool_executor.execute(tool_name, arguments).await { - Ok(r) => r, - Err(e) => format!("Error: {e}"), + let (result, is_error) = match tool_executor.execute(tool_name, arguments).await { + Ok(r) => (r, false), + Err(e) => (format!("Error: {e}"), true), }; if let Some(cb) = callback.as_mut() { cb(StreamEvent::ToolResult { tool_name: tool_name.clone(), result: result.clone(), + is_error, }); } @@ -97,6 +98,7 @@ where tool_call.id.clone(), tool_name.clone(), result, + is_error, )); } @@ -245,6 +247,7 @@ mod tests { }]), tool_call_id: None, tool_name: None, + is_error: false, }, finish_reason: Some("tool_calls".into()), } diff --git a/src/core/llm/anthropic.rs b/src/core/llm/anthropic.rs index 6795241..66cfb51 100644 --- a/src/core/llm/anthropic.rs +++ b/src/core/llm/anthropic.rs @@ -11,6 +11,10 @@ use super::LlmClient; const ANTHROPIC_VERSION: &str = "2023-06-01"; const DEFAULT_MAX_TOKENS: u32 = 4096; +fn is_false(b: &bool) -> bool { + !b +} + #[derive(Clone)] pub struct AnthropicClient { client: ReqwestClient, @@ -72,6 +76,8 @@ enum AnthropicContentBlock { ToolResult { tool_use_id: String, content: String, + #[serde(skip_serializing_if = "is_false")] + is_error: bool, }, } @@ -115,6 +121,7 @@ fn convert_messages(messages: &[Message]) -> Result> { let block = AnthropicContentBlock::ToolResult { tool_use_id: msg.tool_call_id.clone().unwrap_or_default(), content: msg.content.clone().unwrap_or_default(), + is_error: msg.is_error, }; // Merge into the last user message if it exists, otherwise create new if let Some(last) = result.last_mut() { @@ -231,6 +238,7 @@ fn convert_response(resp: AnthropicResponse) -> Choice { }, tool_call_id: None, tool_name: None, + is_error: false, }, finish_reason, } @@ -313,6 +321,7 @@ mod tests { tool_calls: None, tool_call_id: None, tool_name: None, + is_error: false, }]; let result = convert_messages(&messages).unwrap(); assert_eq!(result.len(), 0); @@ -333,6 +342,7 @@ mod tests { }]), tool_call_id: None, tool_name: None, + is_error: false, }]; let result = convert_messages(&messages).unwrap(); assert_eq!(result.len(), 1); @@ -355,6 +365,7 @@ mod tests { }]), tool_call_id: None, tool_name: None, + is_error: false, }]; assert!(convert_messages(&messages).is_err()); } @@ -362,8 +373,8 @@ mod tests { #[test] fn convert_messages_merges_consecutive_tool_results() { let messages = vec![ - Message::tool_result("call_1".into(), "read_file".into(), "content1".into()), - Message::tool_result("call_2".into(), "write_file".into(), "content2".into()), + Message::tool_result("call_1".into(), "read_file".into(), "content1".into(), false), + Message::tool_result("call_2".into(), "write_file".into(), "content2".into(), false), ]; let result = convert_messages(&messages).unwrap(); // Both tool results should merge into a single user message diff --git a/src/core/llm/gemini.rs b/src/core/llm/gemini.rs index 54bd240..7f1089e 100644 --- a/src/core/llm/gemini.rs +++ b/src/core/llm/gemini.rs @@ -274,6 +274,7 @@ fn convert_response(resp: GeminiResponse) -> Result { }, tool_call_id: None, tool_name: None, + is_error: false, }, finish_reason, }) @@ -380,6 +381,7 @@ mod tests { }]), tool_call_id: None, tool_name: None, + is_error: false, }]; let result = convert_messages(&messages).unwrap(); assert_eq!(result.len(), 1); @@ -406,6 +408,7 @@ mod tests { }]), tool_call_id: None, tool_name: None, + is_error: false, }]; assert!(convert_messages(&messages).is_err()); } @@ -416,6 +419,7 @@ mod tests { "call_1".into(), "read_file".into(), "file content".into(), + false, )]; let result = convert_messages(&messages).unwrap(); assert_eq!(result.len(), 1); @@ -427,8 +431,8 @@ mod tests { #[test] fn convert_messages_merges_tool_results_into_user() { let messages = vec![ - Message::tool_result("call_1".into(), "read_file".into(), "a".into()), - Message::tool_result("call_2".into(), "write_file".into(), "b".into()), + Message::tool_result("call_1".into(), "read_file".into(), "a".into(), false), + Message::tool_result("call_2".into(), "write_file".into(), "b".into(), false), ]; let result = convert_messages(&messages).unwrap(); assert_eq!(result.len(), 1); diff --git a/src/core/llm/retry.rs b/src/core/llm/retry.rs index 045539d..00722dc 100644 --- a/src/core/llm/retry.rs +++ b/src/core/llm/retry.rs @@ -61,6 +61,7 @@ mod tests { tool_calls: None, tool_call_id: None, tool_name: None, + is_error: false, }, finish_reason: Some("stop".into()), } diff --git a/src/core/models.rs b/src/core/models.rs index f907ba1..ea03a6c 100644 --- a/src/core/models.rs +++ b/src/core/models.rs @@ -2,6 +2,10 @@ use serde::{Deserialize, Serialize}; use serde_json::Value; use uuid::Uuid; +fn is_false(b: &bool) -> bool { + !b +} + /// Chat role for a conversation message. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] #[serde(rename_all = "lowercase")] @@ -35,6 +39,10 @@ pub struct Message { pub tool_call_id: Option, #[serde(skip_serializing_if = "Option::is_none")] pub tool_name: Option, + /// Whether the tool returned an error. Stored in history so replayed sessions + /// can surface the correct status without heuristics. + #[serde(default, skip_serializing_if = "is_false")] + pub is_error: bool, } impl Message { @@ -45,6 +53,7 @@ impl Message { tool_calls: None, tool_call_id: None, tool_name: None, + is_error: false, } } @@ -55,16 +64,23 @@ impl Message { tool_calls: None, tool_call_id: None, tool_name: None, + is_error: false, } } - pub fn tool_result(tool_call_id: String, tool_name: String, content: String) -> Self { + pub fn tool_result( + tool_call_id: String, + tool_name: String, + content: String, + is_error: bool, + ) -> Self { Self { role: Role::Tool, content: Some(content), tool_calls: None, tool_call_id: Some(tool_call_id), tool_name: Some(tool_name), + is_error, } } } @@ -155,7 +171,11 @@ pub enum StreamEvent { }, /// A tool has finished executing. #[serde(rename = "tool_result")] - ToolResult { tool_name: String, result: String }, + ToolResult { + tool_name: String, + result: String, + is_error: bool, + }, /// A chunk of text from the LLM. #[serde(rename = "llm_response")] LlmResponse { content: String }, @@ -360,11 +380,13 @@ mod tests { #[test] fn message_tool_result_sets_correct_fields() { - let msg = Message::tool_result("call_1".into(), "read_file".into(), "content".into()); + let msg = + Message::tool_result("call_1".into(), "read_file".into(), "content".into(), false); assert_eq!(msg.role, Role::Tool); assert_eq!(msg.content.as_deref(), Some("content")); assert_eq!(msg.tool_call_id.as_deref(), Some("call_1")); assert_eq!(msg.tool_name.as_deref(), Some("read_file")); + assert!(!msg.is_error); assert!(msg.tool_calls.is_none()); } diff --git a/src/rag/prompt.rs b/src/rag/prompt.rs index 51d9387..072ea4e 100644 --- a/src/rag/prompt.rs +++ b/src/rag/prompt.rs @@ -43,6 +43,7 @@ impl PromptBuilder { tool_calls: None, tool_call_id: None, tool_name: None, + is_error: false, }); } From 99c9dc2aa5292d0ddc68d90b11b049434cd23b83 Mon Sep 17 00:00:00 2001 From: themartto Date: Thu, 21 May 2026 16:16:00 +0300 Subject: [PATCH 4/7] docs: document is_error field and correct tool call history replay --- docs/api.md | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/docs/api.md b/docs/api.md index 8172d9b..890ae4b 100644 --- a/docs/api.md +++ b/docs/api.md @@ -341,6 +341,14 @@ Returns the full conversation for a session, including all messages. "tool_calls": null, "tool_call_id": "call_abc123", "tool_name": "read_file" + }, + { + "role": "tool", + "content": "Error: permission denied: /etc/shadow", + "tool_calls": null, + "tool_call_id": "call_def456", + "tool_name": "read_file", + "is_error": true } ] } @@ -355,6 +363,14 @@ Returns the full conversation for a session, including all messages. | `"tool"` | Tool execution result fed back to the LLM | | `"system"` | System prompt injected by the agent (skills, context) | +**`role: "tool"` fields:** + +| Field | Type | Description | +|---|---|---| +| `tool_call_id` | `string` | ID linking this result to the assistant's tool call request | +| `tool_name` | `string` | Name of the tool that was invoked | +| `is_error` | `boolean` | `true` if the tool returned an error. Omitted from JSON when `false` (i.e. absence means success). Also forwarded to Anthropic as `is_error` in the tool result block so the LLM receives accurate signal. | + **Error `400`** — if `:id` is not a valid UUID: ```json @@ -736,7 +752,7 @@ The flow is: } ``` -> Tool call messages from the original session are **not** replayed — only user and assistant text. Use `GET /api/sessions/:id` if you need the raw tool call history. +> Tool calls from the original session **are** replayed: assistant tool-call requests arrive as `tool_call` notifications (`status: "in_progress"`), and tool results arrive as `tool_call_update` notifications with `status: "completed"` or `status: "failed"` — the correct status is preserved in the stored history via the `is_error` field on the message. **Response (after all history has been replayed):** @@ -1354,6 +1370,7 @@ interface Message { tool_calls?: ToolCall[] | null; tool_call_id?: string | null; // present on role:"tool" messages tool_name?: string | null; // present on role:"tool" messages + is_error?: boolean; // true when the tool returned an error; omitted when false } interface ToolCall { From 46b992d687a8df2ad10ad5d8edfa864ade237f37 Mon Sep 17 00:00:00 2001 From: themartto Date: Thu, 21 May 2026 16:19:15 +0300 Subject: [PATCH 5/7] style: cargo fmt --- src/core/llm/anthropic.rs | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/core/llm/anthropic.rs b/src/core/llm/anthropic.rs index 66cfb51..3a75053 100644 --- a/src/core/llm/anthropic.rs +++ b/src/core/llm/anthropic.rs @@ -373,8 +373,18 @@ mod tests { #[test] fn convert_messages_merges_consecutive_tool_results() { let messages = vec![ - Message::tool_result("call_1".into(), "read_file".into(), "content1".into(), false), - Message::tool_result("call_2".into(), "write_file".into(), "content2".into(), false), + Message::tool_result( + "call_1".into(), + "read_file".into(), + "content1".into(), + false, + ), + Message::tool_result( + "call_2".into(), + "write_file".into(), + "content2".into(), + false, + ), ]; let result = convert_messages(&messages).unwrap(); // Both tool results should merge into a single user message From 7dd6413f2030532ac981de214dd3f31b642128d3 Mon Sep 17 00:00:00 2001 From: themartto Date: Thu, 21 May 2026 16:34:45 +0300 Subject: [PATCH 6/7] fix: emit InProgress status for replayed tool calls in acp_load_session --- src/acp/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/acp/mod.rs b/src/acp/mod.rs index 458810e..45193e9 100644 --- a/src/acp/mod.rs +++ b/src/acp/mod.rs @@ -269,7 +269,7 @@ impl AgentState { }; on_update(SessionUpdate::ToolCall( AcpToolCall::new(tc.id.clone(), &tc.function.name) - .status(ToolCallStatus::Completed) + .status(ToolCallStatus::InProgress) .raw_input(raw_input), )); } From 4588b77eabc9ee5a6f2257d824e32da2eaed50a2 Mon Sep 17 00:00:00 2001 From: themartto Date: Thu, 21 May 2026 16:38:03 +0300 Subject: [PATCH 7/7] chore: update README.md --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 8d99c0b..dc5f586 100644 --- a/README.md +++ b/README.md @@ -21,10 +21,10 @@ Openheim runs an iterative agent loop — it calls your LLM, executes tools on i Openheim is built in Rust from the ground up: -- **Low memory** — runs in a fraction of the RAM a Python agent would need -- **Fast startup** — no interpreter warmup -- **True concurrency** — async Tokio runtime, multiple agents without threading headaches -- **Safe by default** — Rust's ownership model means fewer footguns in long-running agent processes +- **Low memory** +- **Fast startup** +- **True concurrency** +- **Memory-safe by default** ---