Skip to content

Commit aad6caa

Browse files
Ethan0456ekzhu
andauthored
Add self-debugging loop to CodeExecutionAgent (#6306)
## Why are these changes needed? This PR introduces a baseline self-debugging loop to the `CodeExecutionAgent`. The loop automatically retries code generation and execution up to a configurable number of attempts (n) until the execution succeeds or the retry limit is reached. This enables the agent to recover from transient failures (e.g., syntax errors, runtime errors) by using its own reasoning to iteratively improve generated code—laying the foundation for more robust autonomous behavior. ## Related issue number Closes #6207 ## Checks - [x] I've included any doc changes needed for <https://microsoft.github.io/autogen/>. See <https://github.com/microsoft/autogen/blob/main/CONTRIBUTING.md> to build and test documentation locally. - [x] I've added tests (if relevant) corresponding to the changes introduced in this PR. - [x] I've made sure all auto checks have passed. --------- Signed-off-by: Abhijeetsingh Meena <abhijeet040403@gmail.com> Co-authored-by: Eric Zhu <ekzhu@users.noreply.github.com>
1 parent b3f3731 commit aad6caa

File tree

3 files changed

+276
-97
lines changed

3 files changed

+276
-97
lines changed

python/packages/autogen-agentchat/src/autogen_agentchat/agents/_code_executor_agent.py

Lines changed: 125 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,7 @@
99
)
1010

1111
from autogen_core import CancellationToken, Component, ComponentModel
12-
from autogen_core.code_executor import CodeBlock, CodeExecutor
13-
from autogen_core.memory import Memory
12+
from autogen_core.code_executor import CodeBlock, CodeExecutor, CodeResult
1413
from autogen_core.model_context import (
1514
ChatCompletionContext,
1615
UnboundedChatCompletionContext,
@@ -34,7 +33,6 @@
3433
CodeExecutionEvent,
3534
CodeGenerationEvent,
3635
HandoffMessage,
37-
MemoryQueryEvent,
3836
ModelClientStreamingChunkEvent,
3937
TextMessage,
4038
ThoughtEvent,
@@ -58,6 +56,11 @@ class CodeExecutorAgentConfig(BaseModel):
5856
model_context: ComponentModel | None = None
5957

6058

59+
class RetryDecision(BaseModel):
60+
reason: str
61+
retry: bool
62+
63+
6164
class CodeExecutorAgent(BaseChatAgent, Component[CodeExecutorAgentConfig]):
6265
"""(Experimental) An agent that generates and executes code snippets based on user instructions.
6366
@@ -91,6 +94,8 @@ class CodeExecutorAgent(BaseChatAgent, Component[CodeExecutorAgentConfig]):
9194
(:py:class:`~autogen_ext.code_executors.docker.DockerCommandLineCodeExecutor` recommended. See example below)
9295
model_client (ChatCompletionClient, optional): The model client to use for inference and generating code.
9396
If not provided, the agent will only execute code blocks found in input messages.
97+
Currently, the model must support structured output mode, which is required for
98+
the automatic retry mechanism to work.
9499
model_client_stream (bool, optional): If `True`, the model client will be used in streaming mode.
95100
:meth:`on_messages_stream` and :meth:`BaseChatAgent.run_stream` methods will
96101
also yield :class:`~autogen_agentchat.messages.ModelClientStreamingChunkEvent`
@@ -103,6 +108,8 @@ class CodeExecutorAgent(BaseChatAgent, Component[CodeExecutorAgentConfig]):
103108
This is useful when the agent is part of a group chat and you want to limit the code execution to messages from specific agents.
104109
If not provided, all messages will be checked for code blocks.
105110
This is only used if `model_client` is not provided.
111+
max_retries_on_error (int, optional): The maximum number of retries on error. If the code execution fails, the agent will retry up to this number of times.
112+
If the code execution fails after this number of retries, the agent will yield a reflection result.
106113
107114
108115
.. note::
@@ -334,6 +341,7 @@ def __init__(
334341
model_client: ChatCompletionClient | None = None,
335342
model_context: ChatCompletionContext | None = None,
336343
model_client_stream: bool = False,
344+
max_retries_on_error: int = 0,
337345
description: str | None = None,
338346
system_message: str | None = DEFAULT_SYSTEM_MESSAGE,
339347
sources: Sequence[str] | None = None,
@@ -348,6 +356,7 @@ def __init__(
348356
self._code_executor = code_executor
349357
self._sources = sources
350358
self._model_client_stream = model_client_stream
359+
self._max_retries_on_error = max_retries_on_error
351360

352361
self._model_client = None
353362
if model_client is not None:
@@ -364,6 +373,12 @@ def __init__(
364373
else:
365374
self._system_messages = [SystemMessage(content=system_message)]
366375

376+
if self._max_retries_on_error > 0:
377+
if not self._model_client or not self._model_client.model_info:
378+
raise ValueError("model_client.model_info must be provided when max_retries_on_error > 0")
379+
if not self._model_client.model_info["structured_output"]:
380+
raise ValueError("Specified model_client doesn't support structured output mode.")
381+
367382
@property
368383
def produced_message_types(self) -> Sequence[type[BaseChatMessage]]:
369384
"""The types of messages that the code executor agent produces."""
@@ -395,8 +410,9 @@ async def on_messages_stream(
395410
system_messages = self._system_messages
396411
model_client = self._model_client
397412
model_client_stream = self._model_client_stream
413+
max_retries_on_error = self._max_retries_on_error
398414

399-
execution_result: CodeExecutionEvent | None = None
415+
execution_result: CodeResult | None = None
400416
if model_client is None: # default behaviour for backward compatibility
401417
# execute generated code if present
402418
code_blocks: List[CodeBlock] = await self.extract_code_blocks_from_messages(messages)
@@ -409,93 +425,130 @@ async def on_messages_stream(
409425
)
410426
return
411427
execution_result = await self.execute_code_block(code_blocks, cancellation_token)
412-
yield Response(chat_message=TextMessage(content=execution_result.to_text(), source=execution_result.source))
428+
yield Response(chat_message=TextMessage(content=execution_result.output, source=self.name))
413429
return
414430

415-
# STEP 1: Add new user/handoff messages to the model context
416-
await self._add_messages_to_context(
417-
model_context=model_context,
418-
messages=messages,
419-
)
420-
421-
# STEP 2: Update model context with any relevant memory
422431
inner_messages: List[BaseAgentEvent | BaseChatMessage] = []
423-
for event_msg in await self._update_model_context_with_memory(
424-
memory=None,
425-
model_context=model_context,
426-
agent_name=agent_name,
427-
):
428-
inner_messages.append(event_msg)
429-
yield event_msg
430432

431-
# STEP 3: Run the first inference
432-
model_result = None
433-
async for inference_output in self._call_llm(
434-
model_client=model_client,
435-
model_client_stream=model_client_stream,
436-
system_messages=system_messages,
437-
model_context=model_context,
438-
agent_name=agent_name,
439-
cancellation_token=cancellation_token,
440-
):
441-
if isinstance(inference_output, CreateResult):
442-
model_result = inference_output
443-
else:
444-
# Streaming chunk event
445-
yield inference_output
433+
for nth_try in range(max_retries_on_error + 1): # Do one default generation, execution and inference loop
434+
# Step 1: Add new user/handoff messages to the model context
435+
await self._add_messages_to_context(
436+
model_context=model_context,
437+
messages=messages,
438+
)
446439

447-
assert model_result is not None, "No model result was produced."
440+
# Step 2: Run inference with the model context
441+
model_result = None
442+
async for inference_output in self._call_llm(
443+
model_client=model_client,
444+
model_client_stream=model_client_stream,
445+
system_messages=system_messages,
446+
model_context=model_context,
447+
agent_name=agent_name,
448+
cancellation_token=cancellation_token,
449+
):
450+
if isinstance(inference_output, CreateResult):
451+
model_result = inference_output
452+
else:
453+
# Streaming chunk event
454+
yield inference_output
448455

449-
# --- NEW: If the model produced a hidden "thought," yield it as an event ---
450-
if model_result.thought:
451-
thought_event = ThoughtEvent(content=model_result.thought, source=agent_name)
452-
yield thought_event
453-
inner_messages.append(thought_event)
456+
assert model_result is not None, "No model result was produced."
454457

455-
# Add the assistant message to the model context (including thought if present)
456-
await model_context.add_message(
457-
AssistantMessage(
458+
# Step 3: [NEW] If the model produced a hidden "thought," yield it as an event
459+
if model_result.thought:
460+
thought_event = ThoughtEvent(content=model_result.thought, source=agent_name)
461+
yield thought_event
462+
inner_messages.append(thought_event)
463+
464+
# Step 4: Add the assistant message to the model context (including thought if present)
465+
await model_context.add_message(
466+
AssistantMessage(
467+
content=model_result.content,
468+
source=agent_name,
469+
thought=getattr(model_result, "thought", None),
470+
)
471+
)
472+
473+
# Step 5: Extract the code blocks from inferred text
474+
assert isinstance(model_result.content, str), "Expected inferred model_result.content to be of type str."
475+
code_blocks = self._extract_markdown_code_blocks(str(model_result.content))
476+
477+
# Step 6: Exit the loop if no code blocks found
478+
if not code_blocks:
479+
yield Response(
480+
chat_message=TextMessage(
481+
content=str(model_result.content),
482+
source=agent_name,
483+
)
484+
)
485+
return
486+
487+
# Step 7: Yield a CodeGenerationEvent
488+
inferred_text_message: CodeGenerationEvent = CodeGenerationEvent(
489+
retry_attempt=nth_try,
458490
content=model_result.content,
491+
code_blocks=code_blocks,
459492
source=agent_name,
460-
thought=getattr(model_result, "thought", None),
461493
)
462-
)
463494

464-
code_blocks = self._extract_markdown_code_blocks(str(model_result.content))
495+
yield inferred_text_message
465496

466-
if not code_blocks:
467-
yield Response(
468-
chat_message=TextMessage(
469-
content=str(model_result.content),
497+
# Step 8: Execute the extracted code blocks
498+
execution_result = await self.execute_code_block(inferred_text_message.code_blocks, cancellation_token)
499+
500+
# Step 9: Update model context with the code execution result
501+
await model_context.add_message(
502+
UserMessage(
503+
content=execution_result.output,
470504
source=agent_name,
471505
)
472506
)
473-
return
474507

475-
# NOTE: error: Argument of type "str | List[FunctionCall]" cannot be assigned to parameter "content" of type "str" in function "__init__".
476-
# For now we can assume that there are no FunctionCalls in the response because we are not providing tools to the CodeExecutorAgent.
477-
# So, for now we cast model_result.content to string
478-
inferred_text_message: CodeGenerationEvent = CodeGenerationEvent(
479-
content=str(model_result.content),
480-
code_blocks=code_blocks,
481-
source=agent_name,
482-
)
508+
# Step 10: Yield a CodeExecutionEvent
509+
yield CodeExecutionEvent(retry_attempt=nth_try, result=execution_result, source=self.name)
483510

484-
yield inferred_text_message
511+
# If execution was successful or last retry, then exit
512+
if execution_result.exit_code == 0 or nth_try == max_retries_on_error:
513+
break
485514

486-
execution_result = await self.execute_code_block(inferred_text_message.code_blocks, cancellation_token)
515+
# Step 11: If exit code is non-zero and retries are available then
516+
# make an inference asking if we should retry or not
517+
chat_context = await model_context.get_messages()
487518

488-
# Add the code execution result to the model context
489-
await model_context.add_message(
490-
UserMessage(
491-
content=execution_result.result.output,
492-
source=agent_name,
519+
retry_prompt = (
520+
f"The most recent code execution resulted in an error:\n{execution_result.output}\n\n"
521+
"Should we attempt to resolve it? Please respond with:\n"
522+
"- A boolean value for 'retry' indicating whether it should be retried.\n"
523+
"- A detailed explanation in 'reason' that identifies the issue, justifies your decision to retry or not, and outlines how you would resolve the error if a retry is attempted."
493524
)
494-
)
495525

496-
yield execution_result
526+
chat_context = chat_context + [
527+
UserMessage(
528+
content=retry_prompt,
529+
source=agent_name,
530+
)
531+
]
532+
533+
response = await model_client.create(messages=chat_context, json_output=RetryDecision)
534+
535+
assert isinstance(
536+
response.content, str
537+
), "Expected structured response for retry decision to be of type str."
538+
should_retry_generation = RetryDecision.model_validate_json(str(response.content))
539+
540+
# Exit if no-retry is needed
541+
if not should_retry_generation.retry:
542+
break
497543

498-
# always reflect on the execution result
544+
yield CodeGenerationEvent(
545+
retry_attempt=nth_try,
546+
content=f"Attempt number: {nth_try + 1}\nProposed correction: {should_retry_generation.reason}",
547+
code_blocks=[],
548+
source=agent_name,
549+
)
550+
551+
# Always reflect on the execution result
499552
async for reflection_response in CodeExecutorAgent._reflect_on_code_block_results_flow(
500553
system_messages=system_messages,
501554
model_client=model_client,
@@ -504,7 +557,7 @@ async def on_messages_stream(
504557
agent_name=agent_name,
505558
inner_messages=inner_messages,
506559
):
507-
yield reflection_response # last reflection_response is of type Response so it will finish the routine
560+
yield reflection_response # Last reflection_response is of type Response so it will finish the routine
508561

509562
async def extract_code_blocks_from_messages(self, messages: Sequence[BaseChatMessage]) -> List[CodeBlock]:
510563
# Extract code blocks from the messages.
@@ -518,7 +571,7 @@ async def extract_code_blocks_from_messages(self, messages: Sequence[BaseChatMes
518571

519572
async def execute_code_block(
520573
self, code_blocks: List[CodeBlock], cancellation_token: CancellationToken
521-
) -> CodeExecutionEvent:
574+
) -> CodeResult:
522575
# Execute the code blocks.
523576
result = await self._code_executor.execute_code_blocks(code_blocks, cancellation_token=cancellation_token)
524577

@@ -529,7 +582,7 @@ async def execute_code_block(
529582
# Error
530583
result.output = f"The script ran, then exited with an error (POSIX exit code: {result.exit_code})\nIts output was:\n{result.output}"
531584

532-
return CodeExecutionEvent(result=result, source=self.name)
585+
return result
533586

534587
async def on_reset(self, cancellation_token: CancellationToken) -> None:
535588
"""Its a no-op as the code executor agent has no mutable state."""
@@ -618,27 +671,6 @@ async def _call_llm(
618671
model_result = await model_client.create(llm_messages, tools=[], cancellation_token=cancellation_token)
619672
yield model_result
620673

621-
@staticmethod
622-
async def _update_model_context_with_memory(
623-
memory: Optional[Sequence[Memory]],
624-
model_context: ChatCompletionContext,
625-
agent_name: str,
626-
) -> List[MemoryQueryEvent]:
627-
"""
628-
If memory modules are present, update the model context and return the events produced.
629-
"""
630-
events: List[MemoryQueryEvent] = []
631-
if memory:
632-
for mem in memory:
633-
update_context_result = await mem.update_context(model_context)
634-
if update_context_result and len(update_context_result.memories.results) > 0:
635-
memory_query_event_msg = MemoryQueryEvent(
636-
content=update_context_result.memories.results,
637-
source=agent_name,
638-
)
639-
events.append(memory_query_event_msg)
640-
return events
641-
642674
@staticmethod
643675
async def _add_messages_to_context(
644676
model_context: ChatCompletionContext,

python/packages/autogen-agentchat/src/autogen_agentchat/messages.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -433,22 +433,33 @@ def to_text(self) -> str:
433433

434434

435435
class CodeGenerationEvent(BaseAgentEvent):
436-
"""An event signaling code generation for execution."""
436+
"""An event signaling code generation event."""
437+
438+
retry_attempt: int
439+
"Retry number, 0 means first generation"
437440

438441
content: str
439442
"The complete content as string."
440443

441-
type: Literal["CodeGenerationEvent"] = "CodeGenerationEvent"
442-
443444
code_blocks: List[CodeBlock]
445+
"List of code blocks present in content"
446+
447+
type: Literal["CodeGenerationEvent"] = "CodeGenerationEvent"
444448

445449
def to_text(self) -> str:
446450
return self.content
447451

448452

449453
class CodeExecutionEvent(BaseAgentEvent):
450-
type: Literal["CodeExecutionEvent"] = "CodeExecutionEvent"
454+
"""An event signaling code execution event."""
455+
456+
retry_attempt: int
457+
"Retry number, 0 means first execution"
458+
451459
result: CodeResult
460+
"Code Execution Result"
461+
462+
type: Literal["CodeExecutionEvent"] = "CodeExecutionEvent"
452463

453464
def to_text(self) -> str:
454465
return self.result.output

0 commit comments

Comments
 (0)