Skip to content

Commit 1858799

Browse files
Parse backtick-enclosed json (#6607)
## Why are these changes needed? Some models enclose json in markdown code blocks ## Related issue number resolves #6599. , #6547 ## Checks - [ ] I've included any doc changes needed for <https://microsoft.github.io/autogen/>. See <https://github.com/microsoft/autogen/blob/main/CONTRIBUTING.md> to build and test documentation locally. - [ ] I've added tests (if relevant) corresponding to the changes introduced in this PR. - [ ] I've made sure all auto checks have passed. --------- Co-authored-by: Victor Dibia <victordibia@microsoft.com>
1 parent d1d664b commit 1858799

File tree

5 files changed

+119
-3
lines changed

5 files changed

+119
-3
lines changed

python/packages/autogen-agentchat/src/autogen_agentchat/teams/_group_chat/_magentic_one/_magentic_one_orchestrator.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
LLMMessage,
1212
UserMessage,
1313
)
14+
from autogen_core.utils import extract_json_from_str
1415

1516
from .... import TRACE_LOGGER_NAME
1617
from ....base import Response, TerminationCondition
@@ -325,7 +326,12 @@ async def _orchestrate_step(self, cancellation_token: CancellationToken) -> None
325326
ledger_str = response.content
326327
try:
327328
assert isinstance(ledger_str, str)
328-
progress_ledger = json.loads(ledger_str)
329+
output_json = extract_json_from_str(ledger_str)
330+
if len(output_json) != 1:
331+
raise ValueError(
332+
f"Progress ledger should contain a single JSON object, but found: {len(progress_ledger)}"
333+
)
334+
progress_ledger = output_json[0]
329335

330336
# If the team consists of a single agent, deterministically set the next speaker
331337
if len(self._participant_names) == 1:
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
from ._json_to_pydantic import schema_to_pydantic_model
2+
from ._load_json import extract_json_from_str
23

3-
__all__ = ["schema_to_pydantic_model"]
4+
__all__ = ["schema_to_pydantic_model", "extract_json_from_str"]
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
import json
2+
import re
3+
from typing import Any, Dict, List
4+
5+
6+
def extract_json_from_str(content: str) -> List[Dict[str, Any]]:
7+
"""Extract JSON objects from a string. Supports backtick enclosed JSON objects"""
8+
pattern = re.compile(r"```(?:\s*([\w\+\-]+))?\n([\s\S]*?)```")
9+
matches = pattern.findall(content)
10+
ret: List[Dict[str, Any]] = []
11+
# If no matches found, assume the entire content is a JSON object
12+
if not matches:
13+
ret.append(json.loads(content))
14+
for match in matches:
15+
language = match[0].strip() if match[0] else None
16+
if language and language.lower() != "json":
17+
raise ValueError(f"Expected JSON object, but found language: {language}")
18+
content = match[1]
19+
ret.append(json.loads(content))
20+
return ret
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
import pytest
2+
from autogen_core.utils import extract_json_from_str
3+
4+
5+
def test_extract_json_from_str() -> None:
6+
json_str = """
7+
{
8+
"name": "John",
9+
"age": 30,
10+
"city": "New York"
11+
}
12+
"""
13+
json_resp = [{"name": "John", "age": 30, "city": "New York"}]
14+
resp = extract_json_from_str(json_str)
15+
assert resp == json_resp
16+
17+
invalid_json_str = """
18+
{
19+
"name": "John",
20+
"age": 30,
21+
"city": "New York"
22+
"""
23+
with pytest.raises(ValueError):
24+
extract_json_from_str(invalid_json_str)
25+
26+
27+
def test_extract_json_from_str_codeblock() -> None:
28+
code_block_lang_str = """
29+
```json
30+
{
31+
"name": "Alice",
32+
"age": 28,
33+
"city": "Seattle"
34+
}
35+
```
36+
"""
37+
code_block_no_lang_str = """
38+
```
39+
{
40+
"name": "Alice",
41+
"age": 28,
42+
"city": "Seattle"
43+
}
44+
```
45+
"""
46+
code_block_resp = [{"name": "Alice", "age": 28, "city": "Seattle"}]
47+
multi_json_str = """
48+
```json
49+
{
50+
"name": "John",
51+
"age": 30,
52+
"city": "New York"
53+
}
54+
```
55+
```json
56+
{
57+
"name": "Jane",
58+
"age": 25,
59+
"city": "Los Angeles"
60+
}
61+
```
62+
"""
63+
multi_json_resp = [
64+
{"name": "John", "age": 30, "city": "New York"},
65+
{"name": "Jane", "age": 25, "city": "Los Angeles"},
66+
]
67+
68+
lang_resp = extract_json_from_str(code_block_lang_str)
69+
assert lang_resp == code_block_resp
70+
no_lang_resp = extract_json_from_str(code_block_no_lang_str)
71+
assert no_lang_resp == code_block_resp
72+
multi_resp = extract_json_from_str(multi_json_str)
73+
assert multi_resp == multi_json_resp
74+
75+
invalid_lang_code_block_str = """
76+
```notjson
77+
{
78+
"name": "Jane",
79+
"age": 25,
80+
"city": "Los Angeles"
81+
}
82+
```
83+
"""
84+
with pytest.raises(ValueError):
85+
extract_json_from_str(invalid_lang_code_block_str)

python/packages/autogen-ext/src/autogen_ext/models/anthropic/_anthropic_client.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
validate_model_info,
6262
)
6363
from autogen_core.tools import Tool, ToolSchema
64+
from autogen_core.utils import extract_json_from_str
6465
from pydantic import BaseModel, SecretStr
6566
from typing_extensions import Self, Unpack
6667

@@ -220,7 +221,10 @@ def assistant_message_to_anthropic(message: AssistantMessage) -> MessageParam:
220221
args = __empty_content_to_whitespace(args)
221222
if isinstance(args, str):
222223
try:
223-
args_dict = json.loads(args)
224+
json_objs = extract_json_from_str(args)
225+
if len(json_objs) != 1:
226+
raise ValueError(f"Expected a single JSON object, but found {len(json_objs)}")
227+
args_dict = json_objs[0]
224228
except json.JSONDecodeError:
225229
args_dict = {"text": args}
226230
else:

0 commit comments

Comments
 (0)