update: openai response api (#6622)

bassmang · victordibia · web-flow · commit f101469e2985 · 2025-06-16T10:30:57.000-07:00
Co-authored-by: Victor Dibia &lt;victordibia@microsoft.com&gt;
diff --git a/python/packages/autogen-ext/src/autogen_ext/agents/openai/_openai_agent.py b/python/packages/autogen-ext/src/autogen_ext/agents/openai/_openai_agent.py
@@ -505,7 +505,7 @@ def _build_api_parameters(self: "OpenAIAgent", messages: List[Dict[str, Any]]) -
         if self._tools:
             api_params["tools"] = self._tools
         if self._json_mode:
-            api_params["response_format"] = {"type": "json_object"}
+            api_params["text"] = {"type": "json_object"}
         api_params["store"] = self._store
         api_params["truncation"] = self._truncation
         if self._last_response_id:
diff --git a/python/packages/autogen-ext/tests/test_openai_agent.py b/python/packages/autogen-ext/tests/test_openai_agent.py
@@ -4,8 +4,8 @@
 
 import pytest
 from autogen_agentchat.base import Response
-from autogen_agentchat.messages import BaseChatMessage, TextMessage
-from autogen_core import CancellationToken, FunctionCall
+from autogen_agentchat.messages import BaseChatMessage, MultiModalMessage, TextMessage
+from autogen_core import CancellationToken, FunctionCall, Image
 from autogen_core.models import UserMessage
 from autogen_core.tools import Tool, ToolSchema
 from autogen_ext.agents.openai import OpenAIAgent
@@ -446,7 +446,7 @@ async def test_build_api_params(agent: OpenAIAgent) -> None:
     agent._json_mode = True  # type: ignore
     params = agent._build_api_parameters([{"role": "user", "content": "hi"}])  # type: ignore
     assert "text.format" not in params
-    assert params.get("response_format") == {"type": "json_object"}
+    assert params.get("text") == {"type": "json_object"}
 
 
 @pytest.mark.asyncio
@@ -568,3 +568,29 @@ async def test_from_config(agent: OpenAIAgent) -> None:
         assert loaded_agent._max_output_tokens == 1000  # type: ignore
         assert loaded_agent._store is True  # type: ignore
         assert loaded_agent._truncation == "auto"  # type: ignore
+
+
+@pytest.mark.asyncio
+async def test_multimodal_message_response(agent: OpenAIAgent, cancellation_token: CancellationToken) -> None:
+    # Test that the multimodal message is converted to the correct format
+    img = Image.from_base64(
+        "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAIAAACQd1PeAAAADElEQVR4nGP4z8AAAAMBAQDJ/pLvAAAAAElFTkSuQmCC"
+    )
+    multimodal_message = MultiModalMessage(content=["Can you describe the content of this image?", img], source="user")
+
+    # Patch client.responses.create to simulate image-capable output
+    async def mock_responses_create(**kwargs: Any) -> Any:
+        class MockResponse:
+            def __init__(self) -> None:
+                self.output_text = "I see a cat in the image."
+                self.id = "resp-image-001"
+
+        return MockResponse()
+
+    agent._client.responses.create = AsyncMock(side_effect=mock_responses_create)  # type: ignore
+
+    response = await agent.on_messages([multimodal_message], cancellation_token)
+
+    assert response.chat_message is not None
+    assert isinstance(response.chat_message, TextMessage)
+    assert "cat" in response.chat_message.content.lower()