In [1]:
import asyncio
from autogen_agentchat.agents import AssistantAgent
from autogen_ext.models.openai import OpenAIChatCompletionClient
from autogen_agentchat.messages import TextMessage, MultiModalMessage
from autogen_core import Image as AGImage
from PIL import Image
from io import BytesIO
import requests
import os
from dotenv import load_dotenv

load_dotenv()

api_key = os.getenv("GROQ_API_KEY")
if api_key is None:
    raise ValueError("GROQ_API_KEY not found in environment variables")

model_client = OpenAIChatCompletionClient(
    api_key=api_key,
    base_url="https://api.groq.com/openai/v1",
    model="meta-llama/llama-4-maverick-17b-128e-instruct",
    model_info={
        "family": "llama",
        "vision": False,
        "function_calling": True,
        "json_output": True,
    },
) 

  validate_model_info(self._model_info)


In [2]:
agent = AssistantAgent(
    name="text_agent",
    model_client=model_client,
    system_message="You are a helpful assistant, so answer the questions accurately.",
)

In [3]:
async def test_text_messages():
    text_msg = TextMessage(content="What are the capitals of Canada and Mexico?", source="user")
    result = await agent.run(task=text_msg)
    print(result)
    print(result.messages[-1].content)

await test_text_messages()

messages=[TextMessage(id='d1c6cd52-3173-4b27-b2fd-4ae76f46bbcd', source='user', models_usage=None, metadata={}, created_at=datetime.datetime(2025, 12, 19, 7, 57, 51, 937259, tzinfo=datetime.timezone.utc), content='What are the capitals of Canada and Mexico?', type='TextMessage'), TextMessage(id='4c012d55-8d56-4c65-bcfe-b200c4fd7736', source='text_agent', models_usage=RequestUsage(prompt_tokens=37, completion_tokens=18), metadata={}, created_at=datetime.datetime(2025, 12, 19, 7, 58, 3, 958071, tzinfo=datetime.timezone.utc), content='The capitals are:\n\n1. Canada: Ottawa\n2. Mexico: Mexico City', type='TextMessage')] stop_reason=None
The capitals are:

1. Canada: Ottawa
2. Mexico: Mexico City


In [4]:
async def test_multimodal():
    response = requests.get('https://picsum.photos/id/237/200/300')
    pil_image = Image.open(BytesIO(response.content))
    ag_image = AGImage(pil_image)
    multimodal_msg = MultiModalMessage(
        content=['What is in this image?', ag_image],
        source='user'
    )

    result = await agent.run(task=multimodal_msg)
    print(result.messages[-1].content)

await test_multimodal()

I'm a text-based AI and do not have the capability to visually access or analyze images. I can only respond to text-based input. If you'd like to describe the image, I'd be happy to try and help with any related questions or topics.


### Note: These structured output format are compatible with OpenAI models and not with Groq models.

In [14]:
from pydantic import BaseModel

class PlanetInfo(BaseModel):
    name: str
    color: str
    distance_miles: int

In [17]:
structured_model_client = OpenAIChatCompletionClient(
    api_key=api_key,
    base_url="https://api.groq.com/openai/v1",
    model="meta-llama/llama-4-maverick-17b-128e-instruct",
    model_info={
        "family": "llama",
        "vision": False,
        "function_calling": True,
        "json_output": True,
    },
    response_format=PlanetInfo,
) 

In [18]:
unstructured_model_client = OpenAIChatCompletionClient(
    api_key=api_key,
    base_url="https://api.groq.com/openai/v1",
    model="meta-llama/llama-4-maverick-17b-128e-instruct",
    model_info={
        "family": "llama",
        "vision": False,
        "function_calling": True,
        "json_output": True,
    },
    # response_format=PlanetInfo,
) 

In [21]:
structured_agent = AssistantAgent(
    name="planet_agent",
    model_client=structured_model_client,
    system_message="You are a helpful assistant that provides information about planets in the structure JSON" \
    "{name: str" \
    "age: int" \
    "}"
)

In [22]:
async def test_structured_output():
    task = TextMessage(content="Please provide information about Mars.", source='user')
    result = await agent.run(task=task)
    structured_response = result.messages[-1].content
    print(structured_response)

await test_structured_output()

KeyError: 'structured_output'

In [12]:
unstructured_agent = AssistantAgent(
    name="planet_agent",
    model_client=unstructured_model_client,
    system_message="You are a helpful assistant that provides information about planets in the structure JSON"
)

In [13]:
async def test_unstructured_output():
    task = TextMessage(content="Please provide information about Mars.", source='user')
    result = await unstructured_agent.run(task=task)
    unstructured_response = result.messages[-1].content
    print(unstructured_response)

await test_unstructured_output()

```json
{
  "name": "Mars",
  "overview": "Mars is the fourth planet from the Sun and the second-smallest planet in the Solar System.",
  "details": {
    "diameter": 6792,
    "surfaceGravity": 3.711,
    "orbitalPeriod": 687,
    "rotationPeriod": 24.6,
    "atmosphere": {
      "mainComponents": ["carbon dioxide", "nitrogen", "argon"],
      "pressure": 6.1
    },
    "temperature": {
      "average": -67,
      "min": -125,
      "max": 20
    },
    "moons": [
      {
        "name": "Phobos",
        "diameter": 27
      },
      {
        "name": "Deimos",
        "diameter": 12
      }
    ]
  },
  "exploration": {
    "notableMissions": [
      {
        "name": "NASA's Curiosity Rover",
        "launchYear": 2011,
        "landingYear": 2012
      },
      {
        "name": "NASA's Perseverance Rover",
        "launchYear": 2020,
        "landingYear": 2021
      }
    ]
  }
}
```
