weirdstuff-dev · themartto · May 21, 2026 · May 19, 2026 · May 21, 2026 · May 21, 2026
diff --git a/README.md b/README.md
@@ -21,10 +21,10 @@ Openheim runs an iterative agent loop — it calls your LLM, executes tools on i
 
 Openheim is built in Rust from the ground up:
 
-- **Low memory** — runs in a fraction of the RAM a Python agent would need
-- **Fast startup** — no interpreter warmup
-- **True concurrency** — async Tokio runtime, multiple agents without threading headaches
-- **Safe by default** — Rust's ownership model means fewer footguns in long-running agent processes
+- **Low memory**
+- **Fast startup**
+- **True concurrency**
+- **Memory-safe by default**
 
 ---
 

diff --git a/docs/api.md b/docs/api.md
@@ -341,6 +341,14 @@ Returns the full conversation for a session, including all messages.
       "tool_calls": null,
       "tool_call_id": "call_abc123",
       "tool_name": "read_file"
+    },
+    {
+      "role": "tool",
+      "content": "Error: permission denied: /etc/shadow",
+      "tool_calls": null,
+      "tool_call_id": "call_def456",
+      "tool_name": "read_file",
+      "is_error": true
     }
   ]
 }
@@ -355,6 +363,14 @@ Returns the full conversation for a session, including all messages.
 | `"tool"` | Tool execution result fed back to the LLM |
 | `"system"` | System prompt injected by the agent (skills, context) |
 
+**`role: "tool"` fields:**
+
+| Field | Type | Description |
+|---|---|---|
+| `tool_call_id` | `string` | ID linking this result to the assistant's tool call request |
+| `tool_name` | `string` | Name of the tool that was invoked |
+| `is_error` | `boolean` | `true` if the tool returned an error. Omitted from JSON when `false` (i.e. absence means success). Also forwarded to Anthropic as `is_error` in the tool result block so the LLM receives accurate signal. |
+
 **Error `400`** — if `:id` is not a valid UUID:
 
 ```json
@@ -736,7 +752,7 @@ The flow is:
 }
 ```
 
-> Tool call messages from the original session are **not** replayed — only user and assistant text. Use `GET /api/sessions/:id` if you need the raw tool call history.
+> Tool calls from the original session **are** replayed: assistant tool-call requests arrive as `tool_call` notifications (`status: "in_progress"`), and tool results arrive as `tool_call_update` notifications with `status: "completed"` or `status: "failed"` — the correct status is preserved in the stored history via the `is_error` field on the message.
 
 **Response (after all history has been replayed):**
 
@@ -1354,6 +1370,7 @@ interface Message {
   tool_calls?: ToolCall[] | null;
   tool_call_id?: string | null; // present on role:"tool" messages
   tool_name?: string | null;    // present on role:"tool" messages
+  is_error?: boolean;           // true when the tool returned an error; omitted when false
 }
 
 interface ToolCall {

diff --git a/src/acp/mod.rs b/src/acp/mod.rs
@@ -148,12 +148,19 @@ impl AgentState {
                             .raw_input(raw_input),
                     ));
                 }
-                StreamEvent::ToolResult { result, .. } => {
+                StreamEvent::ToolResult {
+                    result, is_error, ..
+                } => {
                     if let Some(id) = last_tool_call_id.take() {
+                        let status = if is_error {
+                            ToolCallStatus::Failed
+                        } else {
+                            ToolCallStatus::Completed
+                        };
                         on_update(SessionUpdate::ToolCallUpdate(ToolCallUpdate::new(
                             id,
                             ToolCallUpdateFields::new()
-                                .status(ToolCallStatus::Completed)
+                                .status(status)
                                 .raw_output(serde_json::Value::String(result)),
                         )));
                     }
@@ -231,20 +238,60 @@ impl AgentState {
         );
 
         for msg in &conversation.messages {
-            let text = msg.content.clone().unwrap_or_default();
-            if text.is_empty() {
-                continue;
-            }
-            let update = match msg.role {
+            match msg.role {
                 Role::User => {
-                    SessionUpdate::UserMessageChunk(ContentChunk::new(ContentBlock::from(text)))
+                    let text = msg.content.clone().unwrap_or_default();
+                    if !text.is_empty() {
+                        on_update(SessionUpdate::UserMessageChunk(ContentChunk::new(
+                            ContentBlock::from(text),
+                        )));
+                    }
                 }
                 Role::Assistant => {
-                    SessionUpdate::AgentMessageChunk(ContentChunk::new(ContentBlock::from(text)))
+                    let text = msg.content.clone().unwrap_or_default();
+                    if !text.is_empty() {
+                        on_update(SessionUpdate::AgentMessageChunk(ContentChunk::new(
+                            ContentBlock::from(text),
+                        )));
+                    }
+                    if let Some(tool_calls) = &msg.tool_calls {
+                        for tc in tool_calls {
+                            let raw_input = match serde_json::from_str(&tc.function.arguments) {
+                                Ok(v) => Some(v),
+                                Err(e) => {
+                                    tracing::warn!(
+                                        tool_call_id = %tc.id,
+                                        tool_name = %tc.function.name,
+                                        "failed to parse tool call arguments: {e}"
+                                    );
+                                    None
+                                }
+                            };
+                            on_update(SessionUpdate::ToolCall(
+                                AcpToolCall::new(tc.id.clone(), &tc.function.name)
+                                    .status(ToolCallStatus::InProgress)
+                                    .raw_input(raw_input),
+                            ));
+                        }
+                    }
                 }
-                _ => continue,
-            };
-            on_update(update);
+                Role::Tool => {
+                    if let (Some(id), Some(content)) = (&msg.tool_call_id, &msg.content) {
+                        let status = if msg.is_error {
+                            ToolCallStatus::Failed
+                        } else {
+                            ToolCallStatus::Completed
+                        };
+                        on_update(SessionUpdate::ToolCallUpdate(ToolCallUpdate::new(
+                            id.clone(),
+                            ToolCallUpdateFields::new()
+                                .status(status)
+                                .raw_output(serde_json::Value::String(content.clone())),
+                        )));
+                    }
+                }
+                _ => {}
+            }
         }
 
         Ok(())

diff --git a/src/core/agent.rs b/src/core/agent.rs
@@ -75,15 +75,16 @@ where
                     });
                 }
 
-                let result = match tool_executor.execute(tool_name, arguments).await {
-                    Ok(r) => r,
-                    Err(e) => format!("Error: {e}"),
+                let (result, is_error) = match tool_executor.execute(tool_name, arguments).await {
+                    Ok(r) => (r, false),
+                    Err(e) => (format!("Error: {e}"), true),
                 };
 
                 if let Some(cb) = callback.as_mut() {
                     cb(StreamEvent::ToolResult {
                         tool_name: tool_name.clone(),
                         result: result.clone(),
+                        is_error,
                     });
                 }
 
@@ -97,6 +98,7 @@ where
                     tool_call.id.clone(),
                     tool_name.clone(),
                     result,
+                    is_error,
                 ));
             }
 
@@ -245,6 +247,7 @@ mod tests {
                 }]),
                 tool_call_id: None,
                 tool_name: None,
+                is_error: false,
             },
             finish_reason: Some("tool_calls".into()),
         }

diff --git a/src/core/llm/anthropic.rs b/src/core/llm/anthropic.rs
@@ -11,6 +11,10 @@ use super::LlmClient;
 const ANTHROPIC_VERSION: &str = "2023-06-01";
 const DEFAULT_MAX_TOKENS: u32 = 4096;
 
+fn is_false(b: &bool) -> bool {
+    !b
+}
+
 #[derive(Clone)]
 pub struct AnthropicClient {
     client: ReqwestClient,
@@ -72,6 +76,8 @@ enum AnthropicContentBlock {
     ToolResult {
         tool_use_id: String,
         content: String,
+        #[serde(skip_serializing_if = "is_false")]
+        is_error: bool,
     },
 }
 
@@ -115,6 +121,7 @@ fn convert_messages(messages: &[Message]) -> Result<Vec<AnthropicMessage>> {
                 let block = AnthropicContentBlock::ToolResult {
                     tool_use_id: msg.tool_call_id.clone().unwrap_or_default(),
                     content: msg.content.clone().unwrap_or_default(),
+                    is_error: msg.is_error,
                 };
                 // Merge into the last user message if it exists, otherwise create new
                 if let Some(last) = result.last_mut() {
@@ -231,6 +238,7 @@ fn convert_response(resp: AnthropicResponse) -> Choice {
             },
             tool_call_id: None,
             tool_name: None,
+            is_error: false,
         },
         finish_reason,
     }
@@ -313,6 +321,7 @@ mod tests {
             tool_calls: None,
             tool_call_id: None,
             tool_name: None,
+            is_error: false,
         }];
         let result = convert_messages(&messages).unwrap();
         assert_eq!(result.len(), 0);
@@ -333,6 +342,7 @@ mod tests {
             }]),
             tool_call_id: None,
             tool_name: None,
+            is_error: false,
         }];
         let result = convert_messages(&messages).unwrap();
         assert_eq!(result.len(), 1);
@@ -355,15 +365,26 @@ mod tests {
             }]),
             tool_call_id: None,
             tool_name: None,
+            is_error: false,
         }];
         assert!(convert_messages(&messages).is_err());
     }
 
     #[test]
     fn convert_messages_merges_consecutive_tool_results() {
         let messages = vec![
-            Message::tool_result("call_1".into(), "read_file".into(), "content1".into()),
-            Message::tool_result("call_2".into(), "write_file".into(), "content2".into()),
+            Message::tool_result(
+                "call_1".into(),
+                "read_file".into(),
+                "content1".into(),
+                false,
+            ),
+            Message::tool_result(
+                "call_2".into(),
+                "write_file".into(),
+                "content2".into(),
+                false,
+            ),
         ];
         let result = convert_messages(&messages).unwrap();
         // Both tool results should merge into a single user message

diff --git a/src/core/llm/gemini.rs b/src/core/llm/gemini.rs
@@ -274,6 +274,7 @@ fn convert_response(resp: GeminiResponse) -> Result<Choice> {
             },
             tool_call_id: None,
             tool_name: None,
+            is_error: false,
         },
         finish_reason,
     })
@@ -380,6 +381,7 @@ mod tests {
             }]),
             tool_call_id: None,
             tool_name: None,
+            is_error: false,
         }];
         let result = convert_messages(&messages).unwrap();
         assert_eq!(result.len(), 1);
@@ -406,6 +408,7 @@ mod tests {
             }]),
             tool_call_id: None,
             tool_name: None,
+            is_error: false,
         }];
         assert!(convert_messages(&messages).is_err());
     }
@@ -416,6 +419,7 @@ mod tests {
             "call_1".into(),
             "read_file".into(),
             "file content".into(),
+            false,
         )];
         let result = convert_messages(&messages).unwrap();
         assert_eq!(result.len(), 1);
@@ -427,8 +431,8 @@ mod tests {
     #[test]
     fn convert_messages_merges_tool_results_into_user() {
         let messages = vec![
-            Message::tool_result("call_1".into(), "read_file".into(), "a".into()),
-            Message::tool_result("call_2".into(), "write_file".into(), "b".into()),
+            Message::tool_result("call_1".into(), "read_file".into(), "a".into(), false),
+            Message::tool_result("call_2".into(), "write_file".into(), "b".into(), false),
         ];
         let result = convert_messages(&messages).unwrap();
         assert_eq!(result.len(), 1);

diff --git a/src/core/llm/retry.rs b/src/core/llm/retry.rs
@@ -61,6 +61,7 @@ mod tests {
                 tool_calls: None,
                 tool_call_id: None,
                 tool_name: None,
+                is_error: false,
             },
             finish_reason: Some("stop".into()),
         }