Python: fixes for new MCP types and small openapi fix (#12486)

eavanvalkenburg · web-flow · commit 8d1b3fd55155 · 2025-06-16T21:32:42.000Z
### Motivation and Context  Fixes to include AudioContent for MCP Small typing improvement in OpenAPI. ### Description  ### Contribution Checklist  - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone 😄
diff --git a/python/semantic_kernel/connectors/mcp.py b/python/semantic_kernel/connectors/mcp.py
@@ -25,6 +25,7 @@
 
 from semantic_kernel import Kernel
 from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase
+from semantic_kernel.contents.audio_content import AudioContent
 from semantic_kernel.contents.binary_content import BinaryContent
 from semantic_kernel.contents.chat_history import ChatHistory
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
@@ -81,20 +82,22 @@ def _mcp_prompt_message_to_kernel_content(
 @experimental
 def _mcp_call_tool_result_to_kernel_contents(
     mcp_type: types.CallToolResult,
-) -> list[TextContent | ImageContent | BinaryContent]:
+) -> list[TextContent | ImageContent | BinaryContent | AudioContent]:
     """Convert a MCP container type to a Semantic Kernel type."""
     return [_mcp_content_types_to_kernel_content(item) for item in mcp_type.content]
 
 
 @experimental
 def _mcp_content_types_to_kernel_content(
-    mcp_type: types.ImageContent | types.TextContent | types.EmbeddedResource,
-) -> TextContent | ImageContent | BinaryContent:
+    mcp_type: types.ImageContent | types.TextContent | types.AudioContent | types.EmbeddedResource,
+) -> TextContent | ImageContent | BinaryContent | AudioContent:
     """Convert a MCP type to a Semantic Kernel type."""
     if isinstance(mcp_type, types.TextContent):
         return TextContent(text=mcp_type.text, inner_content=mcp_type)
     if isinstance(mcp_type, types.ImageContent):
         return ImageContent(data=mcp_type.data, mime_type=mcp_type.mimeType, inner_content=mcp_type)
+    if isinstance(mcp_type, types.AudioContent):
+        return AudioContent(data=mcp_type.data, mime_type=mcp_type.mimeType, inner_content=mcp_type)
     # subtypes of EmbeddedResource
     if isinstance(mcp_type.resource, types.TextResourceContents):
         return TextContent(
@@ -111,13 +114,15 @@ def _mcp_content_types_to_kernel_content(
 
 @experimental
 def _kernel_content_to_mcp_content_types(
-    content: TextContent | ImageContent | BinaryContent | ChatMessageContent,
-) -> Sequence[types.TextContent | types.ImageContent | types.EmbeddedResource]:
+    content: TextContent | ImageContent | BinaryContent | AudioContent | ChatMessageContent,
+) -> Sequence[types.TextContent | types.ImageContent | types.AudioContent | types.EmbeddedResource]:
     """Convert a kernel content type to a MCP type."""
     if isinstance(content, TextContent):
         return [types.TextContent(type="text", text=content.text)]
     if isinstance(content, ImageContent):
         return [types.ImageContent(type="image", data=content.data_string, mimeType=content.mime_type)]
+    if isinstance(content, AudioContent):
+        return [types.AudioContent(type="audio", data=content.data_string, mimeType=content.mime_type)]
     if isinstance(content, BinaryContent):
         return [
             types.EmbeddedResource(
@@ -128,9 +133,9 @@ def _kernel_content_to_mcp_content_types(
             )
         ]
     if isinstance(content, ChatMessageContent):
-        messages: list[types.TextContent | types.ImageContent | types.EmbeddedResource] = []
+        messages: list[types.TextContent | types.ImageContent | types.AudioContent | types.EmbeddedResource] = []
         for item in content.items:
-            if isinstance(item, (TextContent, ImageContent, BinaryContent)):
+            if isinstance(item, (TextContent, ImageContent, BinaryContent, AudioContent)):
                 messages.extend(_kernel_content_to_mcp_content_types(item))
             else:
                 logger.debug("Unsupported content type: %s", type(item))
@@ -891,24 +896,30 @@ async def _list_tools() -> list[types.Tool]:
             return tools
 
         @server.call_tool()
-        async def _call_tool(*args: Any) -> Sequence[types.TextContent | types.ImageContent | types.EmbeddedResource]:
+        async def _call_tool(
+            *args: Any,
+        ) -> Sequence[types.TextContent | types.ImageContent | types.AudioContent | types.EmbeddedResource]:
             """Call a tool in the kernel."""
             await _log(level="debug", data=f"Calling tool with args: {args}")
             function_name, arguments = args[0], args[1]
             result = await _call_kernel_function(function_name, arguments)
             if result:
                 value = result.value
-                messages: list[types.TextContent | types.ImageContent | types.EmbeddedResource] = []
+                messages: list[
+                    types.TextContent | types.ImageContent | types.AudioContent | types.EmbeddedResource
+                ] = []
                 if isinstance(value, list):
                     for item in value:
-                        if isinstance(value, (TextContent, ImageContent, BinaryContent, ChatMessageContent)):
+                        if isinstance(
+                            value, (TextContent, ImageContent, BinaryContent, AudioContent, ChatMessageContent)
+                        ):
                             messages.extend(_kernel_content_to_mcp_content_types(item))
                         else:
                             messages.append(
                                 types.TextContent(type="text", text=str(item)),
                             )
                 else:
-                    if isinstance(value, (TextContent, ImageContent, BinaryContent, ChatMessageContent)):
+                    if isinstance(value, (TextContent, ImageContent, BinaryContent, AudioContent, ChatMessageContent)):
                         messages.extend(_kernel_content_to_mcp_content_types(value))
                     else:
                         messages.append(
diff --git a/python/semantic_kernel/connectors/openapi_plugin/openapi_parser.py b/python/semantic_kernel/connectors/openapi_plugin/openapi_parser.py
@@ -7,18 +7,12 @@
 
 from prance import ResolvingParser
 
-from semantic_kernel.connectors.openapi_plugin.models.rest_api_expected_response import (
-    RestApiExpectedResponse,
-)
+from semantic_kernel.connectors.openapi_plugin.models.rest_api_expected_response import RestApiExpectedResponse
 from semantic_kernel.connectors.openapi_plugin.models.rest_api_operation import RestApiOperation
 from semantic_kernel.connectors.openapi_plugin.models.rest_api_parameter import RestApiParameter
-from semantic_kernel.connectors.openapi_plugin.models.rest_api_parameter_location import (
-    RestApiParameterLocation,
-)
+from semantic_kernel.connectors.openapi_plugin.models.rest_api_parameter_location import RestApiParameterLocation
 from semantic_kernel.connectors.openapi_plugin.models.rest_api_payload import RestApiPayload
-from semantic_kernel.connectors.openapi_plugin.models.rest_api_payload_property import (
-    RestApiPayloadProperty,
-)
+from semantic_kernel.connectors.openapi_plugin.models.rest_api_payload_property import RestApiPayloadProperty
 from semantic_kernel.connectors.openapi_plugin.models.rest_api_security_requirement import RestApiSecurityRequirement
 from semantic_kernel.connectors.openapi_plugin.models.rest_api_security_scheme import RestApiSecurityScheme
 from semantic_kernel.exceptions.function_exceptions import PluginInitializationError
@@ -66,7 +60,7 @@ def _parse_parameters(self, parameters: list[dict[str, Any]]):
                 # The schema and content fields are mutually exclusive.
                 raise PluginInitializationError(f"Parameter {name} cannot have a 'content' field. Expected: schema.")
             location = RestApiParameterLocation(param["in"])
-            description: str = param.get("description", None)
+            description: str | None = param.get("description", None)
             is_required: bool = param.get("required", False)
             default_value = param.get("default", None)
             schema: dict[str, Any] | None = param.get("schema", None)
diff --git a/python/uv.lock b/python/uv.lock