In [1]:
# All but "Failure Mode 3 (Inconsistent Quotes)" worked

import json
import re

def parse_agent_response(response_text: str) -> (str | None, dict | None):
    """
    Parses a potentially messy agent response to separate prose from a valid command JSON.

    This function is designed to handle "mixed messages" containing both natural
    language text (prose) and a command in JSON format. It addresses several
    failure modes, including missing JSON fences, malformed JSON, and prose
    that might be mistaken for JSON.

    Args:
        response_text: The raw string response from the agent.

    Returns:
        A tuple containing two elements:
        - The cleaned prose string (or None if no prose is found).
        - The parsed command JSON as a Python dictionary (or None if no valid JSON is found).
    """
    prose, command_json_str = _extract_json_with_fences(response_text)

    if command_json_str:
        # If fences are found, we prioritize that and attempt to parse it.
        try:
            # First, try to load it as is.
            # If the agent provides a valid JSON with escaped newlines, this will work.
            command_json = json.loads(command_json_str)
            return _clean_prose(prose), command_json
        except json.JSONDecodeError:
            # If it fails, it might be malformed. Let's try to repair it.
            repaired_json_str = _repair_json(command_json_str)
            try:
                command_json = json.loads(repaired_json_str)
                return _clean_prose(prose), command_json
            except json.JSONDecodeError:
                # If repair fails, we fall through to brace counting on the whole text.
                pass

    # If no fences were found or the fenced content was irreparable, try brace counting.
    prose, command_json_str = _extract_json_with_brace_counting(response_text)

    if command_json_str:
        try:
            command_json = json.loads(command_json_str)
            return _clean_prose(prose), command_json
        except json.JSONDecodeError:
            repaired_json_str = _repair_json(command_json_str)
            try:
                command_json = json.loads(repaired_json_str)
                # The prose here is what's left after extracting the JSON
                return _clean_prose(prose), command_json
            except json.JSONDecodeError as e:
                print(f"Failed to parse JSON even after repair: {e}")
                # If all attempts fail, return the original text as prose.
                return _clean_prose(response_text), None

    # If no JSON of any kind is found, the whole response is prose.
    return _clean_prose(response_text), None

def _extract_json_with_fences(text: str) -> (str, str | None):
    """
    Extracts the largest JSON block enclosed in ```json ... ``` fences.
    """
    matches = list(re.finditer(r"```json\s*\n?({.*?})\s*\n?```", text, re.DOTALL))
    
    if not matches:
        return text, None

    largest_json_str = ""
    largest_match_obj = None

    # Find the largest JSON block among all fenced blocks
    for match in matches:
        json_str = match.group(1)
        if len(json_str) > len(largest_json_str):
            largest_json_str = json_str
            largest_match_obj = match

    if largest_match_obj:
        # The prose is everything outside the largest matched block.
        prose = text.replace(largest_match_obj.group(0), "").strip()
        return prose, largest_json_str
    
    return text, None

def _extract_json_with_brace_counting(text: str) -> (str, str | None):
    """
    Finds the largest valid JSON object in a string by counting braces.
    This is a fallback for when JSON is not properly fenced.
    """
    best_json_candidate = None
    best_candidate_prose = text
    
    # Find all potential start indices for a JSON object
    start_indices = [m.start() for m in re.finditer('{', text)]

    for start_index in start_indices:
        open_braces = 0
        in_string = False
        # We must check every possible end point for each start point
        for i, char in enumerate(text[start_index:]):
            if char == '"' and (i == 0 or text[start_index + i - 1] != '\\'):
                in_string = not in_string
            
            if not in_string:
                if char == '{':
                    open_braces += 1
                elif char == '}':
                    open_braces -= 1
            
            if open_braces == 0:
                # We found a potential JSON object
                potential_json = text[start_index : start_index + i + 1]
                
                # Check if it's a valid JSON
                try:
                    # Use our repair function to increase chances of success
                    repaired_potential = _repair_json(potential_json)
                    json.loads(repaired_potential)
                    # If it's the best one so far (largest), store it
                    if not best_json_candidate or len(repaired_potential) > len(best_json_candidate):
                        best_json_candidate = repaired_potential
                        # The prose is what's before and after this candidate
                        prose_before = text[:start_index].strip()
                        prose_after = text[start_index + i + 1:].strip()
                        best_candidate_prose = f"{prose_before}\n{prose_after}".strip()

                except json.JSONDecodeError:
                    # Not a valid JSON, continue searching within this start_index
                    continue
    
    return best_candidate_prose, best_json_candidate

def _repair_json(s: str) -> str:
    """
    Attempts to repair a malformed JSON string by iteratively fixing errors
    based on feedback from the JSON parser. This approach is safer for complex
    string values than broad regex replacements.
    """

    if False:
        # First, do a pass for single quotes, which is a common and safe fix.
        # Handles keys: 'key' -> "key"
        s = re.sub(r"'([^']*)'\s*:", r'"\1":', s)
        # Handles values: : 'value' -> : "value"
        s = re.sub(r":\s*'([^']*)'", r': "\1"', s)

    s_before_loop = s
    max_iterations = 1000  # Safety break to prevent infinite loops

    for _ in range(max_iterations):
        try:
            json.loads(s)
            # If parsing succeeds, the JSON is valid.
            return s
        except json.JSONDecodeError as e:
            error_fixed = False
            #import IPython; IPython.embed()
            # Fix 1: Unescaped control characters (e.g., newlines in string content).
            if "Invalid control character at" in e.msg:
                char_pos = e.pos
                char_to_escape = s[char_pos]
                escape_map = {'\n': '\\n', '\r': '\\r', '\t': '\\t'}
                if char_to_escape in escape_map:
                    s = s[:char_pos] + escape_map[char_to_escape] + s[char_pos+1:]
                    error_fixed = True

            # Fix 2: Unescaped double quotes inside a string.
            # This often leads to "Expecting ',' delimiter" or "Unterminated string".
            elif "Expecting" in e.msg or "Unterminated string" in e.msg:
                # Find the last quote before the error position.
                quote_pos = s.rfind('"', 0, e.pos)
                if quote_pos != -1:
                    # Check if it's already properly escaped by counting preceding backslashes.
                    p = quote_pos - 1
                    slashes = 0
                    while p >= 0 and s[p] == '\\':
                        slashes += 1
                        p -= 1
                    # If the number of preceding backslashes is even, the quote is not escaped.
                    if slashes % 2 == 0:
                        s = s[:quote_pos] + '\\' + s[quote_pos:]
                        error_fixed = True
            
            if not error_fixed:
                # If we can't identify a fix in this iteration, break the loop.
                return s_before_loop

    # If we exhausted iterations, return the last attempted state.
    return s

def _clean_prose(prose: str | None) -> str | None:
    """
    Utility to clean up the final prose string.
    """
    if prose:
        return prose.strip()
    return None

In [2]:
# Failure Mode 1: Unescaped characters in content
test_case_1 = """
Okay, I've created the file content. Here is the command to write it:
```json
{
    "action": "create_file",
    "parameters": {
        "filename": "broken_json.txt",
        "content": "This is a "test" with unescaped quotes."
    }
}
```
"""

# Failure Mode 2: Prose contains code-like structures
test_case_2 = """
You can use a JSON like this: `{"key": "value"}`.
For your request, I will now create the file.
{
    "action": "create_file",
    "parameters": {
        "filename": "prose_with_code.txt",
        "content": "This content is simple."
    }
}
The command above should work.
"""

# Failure Mode 3: No fences
test_case_3 = """
Here is the command you requested.
{
    "action": "create_file",
    "parameters": {
        "filename": "inconsistent_quotes.txt",
        "content": "Some text here."
    }
}
"""

# Command-only response
test_case_4 = """
```json
{
    "action": "reply",
    "parameters": {
        "content": "This is a direct reply."
    }
}
```
"""

# Prose-only response
test_case_5 = "Hello! How can I help you today?"

# A very messy response
test_case_6 = """
I think the best approach is to use a `create_file` action. For example, you might have something like ```json {"action": "example"} ``` in your history.

Let's do it. Here is the command:
```json
{
    "action": "create_file",
    "parameters": {
        "filename": "very_messy.txt",
        "content": "This file's content has a quote: \\" and another one: ". It's tricky."
    }
}
```
Let me know if this works.
"""

# New Test Case: Multiple fenced JSON blocks
test_case_7 = """
Here is an example of a small JSON:
```json
{"action": "small_example"}
```
Now, here is the actual command you should execute, which is larger.
```json
{
    "action": "the_real_deal",
    "parameters": {
        "info": "This is the one you want to parse because it is the largest JSON block."
    }
}
```
Please use the second one.
"""

# New Test Case: Unescaped newlines in content
test_case_8 = """
Here is the diff you requested.
```json
{
    "action": "create_file",
    "parameters": {
        "filename": "history_replay_fix.diff",
        "content": "--- a/orchestrator.py
+++ b/orchestrator.py
"
    }
}
```
"""

test_case_A = """Excellent! The test was a success. The successful creation of `rigorous_test_complete.txt` proves that the new parsing and self-healing logic in the orchestrator is working perfectly. It correctly handled the mixed message, the markdown JSON block, the curly braces, and the unescaped quote all at once. Now that this critical bug is resolved, we can finally return to our original mission: **fixing the chat history rendering bug.** I will now, once again, attempt to create the `history_replay_fix.diff` file. With the new robust parser, this command should execute without any issues. ```json { "action": "create_file", "parameters": { "filename": "history_replay_fix.diff", "content": "--- a/orchestrator.py\n+++ b/orchestrator.py\n@@ -87,29 +87,46 @@\n \n for item in history:\n role = item.get('role')\n- raw_text = \"\"\n- if item.get('parts') and isinstance(item['parts'], list) and len(item['parts']) > 0:\n- part = item['parts'][0]\n- if isinstance(part, dict) and 'text' in part:\n- raw_text = part.get('text', '')\n- elif isinstance(part, str):\n- raw_text = part\n+ raw_text = item.get('parts', [{}])[0].get('text', '')\n \n if not raw_text or not raw_text.strip():\n continue\n \n if role == 'user':\n if raw_text.startswith(('TOOL_RESULT:', 'OBSERVATION:')):\n+ # Attempt to parse the JSON part of the tool result for rich formatting.\n try:\n json_str = raw_text[raw_text.find('{'):]\n tool_result = json.loads(json_str)\n- log_message = tool_result.get('message', f\"Tool executed.\")\n+ status = tool_result.get('status')\n+ message = tool_result.get('message', 'No message.')\n+\n+ # Create a more descriptive log message based on the content\n+ if status == 'error':\n+ log_message = f\"Error: {message}\"\n+ elif 'directory_listing' in tool_result:\n+ listing = ', '.join(tool_result['directory_listing']) or \"No files found.\"\n+ log_message = f\"{message}: {listing}\"\n+ elif 'file_content' in tool_result:\n+ log_message = f\"{message}\\n---\\n{tool_result['file_content']}\"\n+ else:\n+ log_message = message\n+\n+ socketio.emit('tool_log', {'data': f\"[{log_message}]\"}, to=session_id) \n+ except (json.JSONDecodeError, IndexError):\n+ # Fallback for malformed or simple tool results\n+ log_message = raw_text.replace('TOOL_RESULT: ', '').replace('OBSERVATION: ', '').strip()\n+ socketio.emit('tool_log', {'data': f\"[{log_message}]\"}, to=session_id)\n+\n elif not raw_text.startswith('USER_CONFIRMATION:'):\n socketio.emit('log_message', {'type': 'user', 'data': raw_text}, to=session_id)\n \n elif role == 'model':\n- start_index, end_index = find_json_block(raw_text)\n- if start_index is not None:\n- attachment = raw_text[:start_index].strip().strip('```json').strip('`')\n- if attachment:\n- socketio.emit('log_message', {'type': 'info', 'data': attachment}, to=session_id)\n- \n- json_str = raw_text[start_index:end_index]\n+ json_str, prose_str = find_and_extract_json_with_prose(raw_text)\n+\n+ # First, render the prose/preamble if it exists.\n+ if prose_str:\n+ socketio.emit('log_message', {'type': 'info', 'data': prose_str}, to=session_id)\n+ \n+ if json_str:\n try:\n command = json.loads(json_str)\n action = command.get('action')\n params = command.get('parameters', {})\n- if action in ['respond', 'task_complete'] and params.get('response') and params.get('response').strip():\n- socketio.emit('log_message', {'type': 'final_answer', 'data': params['response']}, to=session_id)\n- elif action == 'request_confirmation' and params.get('prompt'):\n+ \n+ # Now, render the action itself.\n+ if action in ['respond', 'task_complete']:\n+ response = params.get('response', '').strip()\n+ if response:\n+ socketio.emit('log_message', {'type': 'final_answer', 'data': response}, to=session_id)\n+ elif action == 'request_confirmation':\n+ prompt = params.get('prompt')\n+ if prompt:\n+ socketio.emit('log_message', {'type': 'system_confirm', 'data': prompt}, to=session_id)\n+ else: # This handles all other tool calls (create_file, list_directory, etc.)\n+ socketio.emit('log_message', {'type': 'info', 'data': f\"(Agent decided to use tool: {action})\"}, to=session_id)\n+\n+ except json.JSONDecodeError:\n+ # If JSON parsing fails, it's likely part of a larger, malformed response.\n+ # The prose_str has already been rendered, so we can often ignore this.\n+ pass\n+ elif not prose_str and raw_text: # No JSON and no prose means the whole thing is the answer\n+ socketio.emit('log_message', {'type': 'final_answer', 'data': raw_text}, to=session_id)\n+ socketio.sleep(0.01)\n+\n \n def execute_reasoning_loop(socketio, session_data, initial_prompt, session_id, chat_sessions, model, api_stats):\n loop_id = str(uuid.uuid4())\n" } } ```"""

In [3]:
tests = {
    "Failure Mode 1 (Unescaped Quotes)": test_case_1,
    "Failure Mode 2 (Prose with Code)": test_case_2,
    "Failure Mode 3 (No Fence)": test_case_3,
    "Command-Only Response": test_case_4,
    "Prose-Only Response": test_case_5,
    "Very Messy Response": test_case_6,
    "Multiple Fenced Blocks": test_case_7,
    "Unescaped Newlines": test_case_8,
    "Real Gemini Response A": test_case_A
}

In [4]:
for name, test_str in tests.items():
    print(f"--- Testing: {name} ---\n")
    print(f"Agent Response:\n {test_str}")
    prose, command = parse_agent_response(test_str)
    print(f"Prose: {prose}")
    print(f"Command: {command}\n\n")

--- Testing: Failure Mode 1 (Unescaped Quotes) ---

Agent Response:
 
Okay, I've created the file content. Here is the command to write it:
```json
{
    "action": "create_file",
    "parameters": {
        "filename": "broken_json.txt",
        "content": "This is a "test" with unescaped quotes."
    }
}
```

Prose: Okay, I've created the file content. Here is the command to write it:
Command: {'action': 'create_file', 'parameters': {'filename': 'broken_json.txt', 'content': 'This is a "test" with unescaped quotes.'}}


--- Testing: Failure Mode 2 (Prose with Code) ---

Agent Response:
 
You can use a JSON like this: `{"key": "value"}`.
For your request, I will now create the file.
{
    "action": "create_file",
    "parameters": {
        "filename": "prose_with_code.txt",
        "content": "This content is simple."
    }
}
The command above should work.

Prose: You can use a JSON like this: `{"key": "value"}`.
For your request, I will now create the file.
The command above should w

In [41]:
attachment_text, json_str = parse_agent_response(test_case_A)

In [42]:
attachment_text

'Excellent! The test was a success. The successful creation of `rigorous_test_complete.txt` proves that the new parsing and self-healing logic in the orchestrator is working perfectly. It correctly handled the mixed message, the markdown JSON block, the curly braces, and the unescaped quote all at once. Now that this critical bug is resolved, we can finally return to our original mission: **fixing the chat history rendering bug.** I will now, once again, attempt to create the `history_replay_fix.diff` file. With the new robust parser, this command should execute without any issues.'

In [43]:
json_str

{'action': 'create_file',
 'parameters': {'filename': 'history_replay_fix.diff',
  'content': '--- a/orchestrator.py\n+++ b/orchestrator.py\n@@ -87,29 +87,46 @@\n \n for item in history:\n role = item.get(\'role\')\n- raw_text = ""\n- if item.get(\'parts\') and isinstance(item[\'parts\'], list) and len(item[\'parts\']) > 0:\n- part = item[\'parts\'][0]\n- if isinstance(part, dict) and \'text\' in part:\n- raw_text = part.get(\'text\', \'\')\n- elif isinstance(part, str):\n- raw_text = part\n+ raw_text = item.get(\'parts\', [{}])[0].get(\'text\', \'\')\n \n if not raw_text or not raw_text.strip():\n continue\n \n if role == \'user\':\n if raw_text.startswith((\'TOOL_RESULT:\', \'OBSERVATION:\')):\n+ # Attempt to parse the JSON part of the tool result for rich formatting.\n try:\n json_str = raw_text[raw_text.find(\'{\'):]\n tool_result = json.loads(json_str)\n- log_message = tool_result.get(\'message\', f"Tool executed.")\n+ status = tool_result.get(\'status\')\n+ message = tool_result.

In [44]:
action = command_json.get("action")

In [45]:
action

'create_file'