Description
Describe the bug
I am having issues with continuing a conversation with a Multimodal agent after that a function call has been used.
The goal that I have is to save a report that a first Multimodal agent generates based off of an image. I then ask an Assistant agent to use a callable function to save the report in a json file. I then would like to make a "fact checker" Multimodal agent check if the information on the image is coherent with the information in the report.
Unfortunately, I am getting the following error message:
BadRequestError: Error code: 400 - {'error': {'message': "Missing parameter 'tool_call_id': messages with role 'tool' must have a 'tool_call_id'.", 'type': 'invalid_request_error', 'param': 'messages.[4].tool_call_id', 'code': None}}
This problem only arrises if the next agent is a Multimodal agent.
Steps to reproduce
Here is the code used to replicate the problem:
The version of pyautogen is 0.2.26
import sys
path_project = #PATH TO PROJECT FOLDER
sys.path.append(path_project)
import json
import autogen
from autogen import AssistantAgent, UserProxyAgent
from autogen.agentchat.contrib.multimodal_conversable_agent import MultimodalConversableAgent
from annotated_types import Annotated
openaikey = #YOUR API KEY
seed = 46
timeout = 120
no_temp = 0.0
temp = 0.5
high_temp = 1.2
path_for_json = path_project + #NAME OF JSON
def is_terminal_message(content):
have_content = content.get("content", None) is not None
if have_content and 'TERMINATE' in content["content"]:
return True
return False
v4_config_list = [{
"model": "gpt-4-turbo",
"api_key": openaikey
}]
gpt4_config_list = [{
"model": "gpt-4-turbo-2024-04-09",
"api_key": openaikey
}]
user_proxy = UserProxyAgent(
name="User",
system_message="User. Once the question has been verified by the fact_checker, you must say TERMINATE to end the conversation.",
human_input_mode="NEVER",
max_consecutive_auto_reply=10,
llm_config={
"cache_seed": seed,
"temperature": temp,
"config_list": gpt4_config_list,
"timeout": timeout,
},
is_termination_msg=is_terminal_message,
code_execution_config=False,
)
assistant = MultimodalConversableAgent(
name="Assistant",
system_message="Describor. You describe the image accurately and generate a report on the image.",
human_input_mode="NEVER",
llm_config={
"cache_seed": seed,
"temperature": high_temp,
"config_list": v4_config_list,
"timeout": timeout + 500,
"max_tokens": 1200
},
)
coder = AssistantAgent(
name="Coder",
system_message="Coder. Save the report thanks to the given function. YOU MUST USE THE FUNCTION PROVIDED. Once the question has been saved, say APPROVED.",
llm_config={
"cache_seed": seed,
"temperature": temp,
"config_list": gpt4_config_list,
"timeout": timeout,
},
is_termination_msg=is_terminal_message,
human_input_mode="NEVER",
code_execution_config=False,
)
fact_checker = MultimodalConversableAgent(
name="Fact_Checker",
system_message="Fact_Checker. You must check the report for accuracy and correct any errors you find.",
human_input_mode="NEVER",
llm_config={
"cache_seed": seed,
"temperature": high_temp,
"config_list": v4_config_list,
"timeout": timeout + 500,
"max_tokens": 1200
},
)
def register_report(
title: Annotated[str, "The title of the report"],
report: Annotated[str, "The report generated"],
tldr: Annotated[str, "A quick summary of the report"],
) -> str:
quest = {
"title": title,
"report": report,
"tldr": tldr
}
with open(f"{path_for_json}", 'w') as f:
json.dump(quest, f)
return f"""
Title: {title}
Report: {report}
TLDR: {tldr}
file name: {path_for_json}
"""
coder.register_for_llm(name="register_report", description="Save the report with the title, report and a tldr.")(register_report)
user_proxy.register_for_execution(name="register_report")(register_report)
def custom_speaker_selection_func(last_speaker: autogen.Agent, groupchat: autogen.GroupChat):
messages = groupchat.messages
if len(messages) <= 1:
return assistant
elif last_speaker is user_proxy:
if messages[-2]['name'] == coder.name:
return coder
else:
return assistant
elif last_speaker is assistant:
return coder
elif last_speaker is coder:
if "APPROVED" in messages[-1]['content']:
return fact_checker
else:
return user_proxy
elif last_speaker is fact_checker:
return user_proxy
else:
return "random"
groupchat = autogen.GroupChat(
agents=[user_proxy, assistant, coder, fact_checker], messages=[], max_round=30, speaker_selection_method=custom_speaker_selection_func,
)
manager_llm_config = {
"config_list": gpt4_config_list,
"seed": seed,
"timeout": timeout,
}
groupchat_manager = autogen.GroupChatManager(
groupchat=groupchat,
name="GroupChatManager",
llm_config=manager_llm_config,
)
user_proxy.initiate_chat(
groupchat_manager,
clear_history=True,
message=f"""Make a report based in the image provided: <img {#YOUR IMAGE}"""
)
The error should be the one stated above.
Model Used
gpt-4-turbo-2024-04-09
gpt-4-turbo
Expected Behavior
I would like for the fact_checker agent to be able to check the validity of the report.
Screenshots and logs
No response
Additional Information
pyautogen: 0.2.26
python: 3.11.7