# Name: innovator

## Description: A waldiez flow that provides innovative ideas based on recent arxiv papers.

## Tags: arxiv

###🧩 generated with ❤️ by Waldiez.

### Requirements

In [None]:
import sys  # pyright: ignore

# # !{sys.executable} -m pip install -q ag2[openai]==0.9.9 arxiv

### Imports

In [None]:
# pyright: reportUnusedImport=false,reportMissingTypeStubs=false
import csv
import importlib
import json
import os
import sqlite3
import sys
from dataclasses import asdict
from pprint import pprint
from types import ModuleType
from typing import (
    Annotated,
    Any,
    Callable,
    Coroutine,
    Dict,
    List,
    Optional,
    Set,
    Tuple,
    Union,
)

import autogen  # type: ignore
from autogen import (
    Agent,
    Cache,
    ChatResult,
    ConversableAgent,
    GroupChat,
    UserProxyAgent,
    register_function,
    runtime_logging,
)
from autogen.agentchat import GroupChatManager, run_group_chat
from autogen.agentchat.group import ContextVariables
from autogen.agentchat.group.patterns import AutoPattern
from autogen.coding import LocalCommandLineCodeExecutor
from autogen.events import BaseEvent
from autogen.io.run_response import AsyncRunResponseProtocol, RunResponseProtocol
import arxiv
import numpy as np
from dotenv import load_dotenv
from typing_extensions import Annotated

# Common environment variable setup for Waldiez flows
load_dotenv(override=True)
os.environ["AUTOGEN_USE_DOCKER"] = "0"
os.environ["ANONYMIZED_TELEMETRY"] = "False"
#
# let's try to avoid:
# module 'numpy' has no attribute '_no_nep50_warning'"
# ref: https://github.com/numpy/numpy/blob/v2.2.2/doc/source/release/2.2.0-notes.rst#nep-50-promotion-state-option-removed
os.environ["NEP50_DEPRECATION_WARNING"] = "0"
os.environ["NEP50_DISABLE_WARNING"] = "1"
os.environ["NPY_PROMOTION_STATE"] = "weak"
if not hasattr(np, "_no_pep50_warning"):

    import contextlib
    from typing import Generator

    @contextlib.contextmanager
    def _np_no_nep50_warning() -> Generator[None, None, None]:
        """Dummy function to avoid the warning.

        Yields
        ------
        None
            Nothing.
        """
        yield

    setattr(np, "_no_pep50_warning", _np_no_nep50_warning)  # noqa

### Start logging.

In [None]:
def start_logging() -> None:
    """Start logging."""
    runtime_logging.start(
        logger_type="sqlite",
        config={"dbname": "flow.db"},
    )


start_logging()

### Load model API keys

In [None]:
# NOTE:
# This section assumes that a file named:
# "innovator_api_keys.py"
# exists in the same directory as this file.
# This file contains the API keys for the models used in this flow.
# It should be .gitignored and not shared publicly.
# If this file is not present, you can either create it manually
# or change the way API keys are loaded in the flow.


def load_api_key_module(flow_name: str) -> ModuleType:
    """Load the api key module.

    Parameters
    ----------
    flow_name : str
        The flow name.

    Returns
    -------
    ModuleType
        The api keys loading module.
    """
    module_name = f"{flow_name}_api_keys"
    if module_name in sys.modules:
        return importlib.reload(sys.modules[module_name])
    return importlib.import_module(module_name)


__MODELS_MODULE__ = load_api_key_module("innovator")


def get_innovator_model_api_key(model_name: str) -> str:
    """Get the model api key.
    Parameters
    ----------
    model_name : str
        The model name.

    Returns
    -------
    str
        The model api key.
    """
    return __MODELS_MODULE__.get_innovator_model_api_key(model_name)

### Tools

In [None]:
def get_papers(search_query: str, max_results: int) -> Annotated[str, "list of papers"]:
    """return list of papers based on the user topic.
    Args:
        search_query (str): Name of topic of interest (e.g., 'agents')
        max_results (int): number of papers returned.
    """
    import arxiv

    results = []
    # Search for recent papers on the topic
    search = arxiv.Search(
        query=search_query,
        max_results=max_results,
        sort_by=arxiv.SortCriterion.SubmittedDate,
        sort_order=arxiv.SortOrder.Descending,
    )
    # Retrieve and print the paper details
    for result in search.results():
        print(f"Title: {result.title}")
        print(f"Authors: {', '.join(author.name for author in result.authors)}")
        print(f"Published: {result.published}")
        print(f"Summary: {result.summary}")
        print(f"PDF URL: {result.pdf_url}")
        results.append(result.summary)
    return results

### Models

In [None]:
gpt_4_1_llm_config: dict[str, Any] = {
    "model": "gpt-4.1",
    "api_type": "openai",
    "api_key": get_innovator_model_api_key("gpt_4_1"),
}

### Agents

In [None]:
# pyright: reportUnnecessaryIsInstance=false

executor_executor = LocalCommandLineCodeExecutor(
    work_dir="coding",
    timeout=30,
    functions=[get_papers],
)

executor = ConversableAgent(
    name="executor",
    description="A new Assistant agent that executes code.",
    human_input_mode="NEVER",
    max_consecutive_auto_reply=None,
    default_auto_reply="",
    code_execution_config={"executor": executor_executor},
    is_termination_msg=None,  # pyright: ignore
    functions=[
        get_papers,
    ],
    update_agent_state_before_reply=[],
    llm_config=autogen.LLMConfig(
        config_list=[
            gpt_4_1_llm_config,
        ],
        cache_seed=None,
    ),
)

researcher = ConversableAgent(
    name="researcher",
    description="A researcher agent, capable of retrieving data using tools.",
    human_input_mode="NEVER",
    max_consecutive_auto_reply=None,
    default_auto_reply="",
    code_execution_config=False,
    is_termination_msg=None,  # pyright: ignore
    functions=[
        get_papers,
    ],
    update_agent_state_before_reply=[],
    llm_config=autogen.LLMConfig(
        config_list=[
            gpt_4_1_llm_config,
        ],
        cache_seed=None,
    ),
)

user = UserProxyAgent(
    name="user",
    description="A new User agent",
    human_input_mode="ALWAYS",
    max_consecutive_auto_reply=None,
    default_auto_reply="",
    code_execution_config=False,
    is_termination_msg=None,  # pyright: ignore
    llm_config=False,  # pyright: ignore
)

manager_pattern = AutoPattern(
    initial_agent=researcher,
    agents=[researcher, executor],
    user_agent=user,
    group_manager_args={
        "llm_config": autogen.LLMConfig(
            config_list=[
                gpt_4_1_llm_config,
            ],
            cache_seed=None,
        ),
        "name": "manager",
    },
)


def get_sqlite_out(dbname: str, table: str, csv_file: str) -> None:
    """Convert a sqlite table to csv and json files.

    Parameters
    ----------
    dbname : str
        The sqlite database name.
    table : str
        The table name.
    csv_file : str
        The csv file name.
    """
    conn = sqlite3.connect(dbname)
    query = f"SELECT * FROM {table}"  # nosec
    try:
        cursor = conn.execute(query)
    except sqlite3.OperationalError:
        conn.close()
        return
    rows = cursor.fetchall()
    column_names = [description[0] for description in cursor.description]
    data = [dict(zip(column_names, row, strict=True)) for row in rows]
    conn.close()
    with open(csv_file, "w", newline="", encoding="utf-8") as file:
        csv_writer = csv.DictWriter(file, fieldnames=column_names)
        csv_writer.writeheader()
        csv_writer.writerows(data)
    json_file = csv_file.replace(".csv", ".json")
    with open(json_file, "w", encoding="utf-8") as file:
        json.dump(data, file, indent=4, ensure_ascii=False)


def stop_logging() -> None:
    """Stop logging."""
    runtime_logging.stop()
    if not os.path.exists("logs"):
        os.makedirs("logs")
    for table in [
        "chat_completions",
        "agents",
        "oai_wrappers",
        "oai_clients",
        "version",
        "events",
        "function_calls",
    ]:
        dest = os.path.join("logs", f"{table}.csv")
        get_sqlite_out("flow.db", table, dest)

### Start chatting

In [None]:
def main(
    on_event: Optional[Callable[[BaseEvent], bool]] = None,
) -> list[dict[str, Any]]:
    """Start chatting.

    Returns
    -------
    list[dict[str, Any]]
        The result of the chat session.

    Raises
    ------
    SystemExit
        If the user interrupts the chat session.
    """
    results: list[RunResponseProtocol] | RunResponseProtocol = []
    result_dicts: list[dict[str, Any]] = []
    results = run_group_chat(
        pattern=manager_pattern,
        messages="Please retrieve 2 recent papers on agentic AI from arxiv. After retrieving them write down a solid idea for research.",
        max_rounds=20,
    )
    if on_event:
        if not isinstance(results, list):
            results = [results]  # pylint: disable=redefined-variable-type
        for index, result in enumerate(results):
            for event in result.events:
                try:
                    should_continue = on_event(event)
                except BaseException as e:
                    print(f"Error in event handler: {e}")
                    raise SystemExit("Error in event handler: " + str(e)) from e
                if event.type == "run_completion":
                    break
                if not should_continue:
                    raise SystemExit("Event handler stopped processing")
            result_dict = {
                "index": index,
                "messages": result.messages,
                "summary": result.summary,
                "cost": (
                    result.cost.model_dump(mode="json", fallback=str)
                    if result.cost
                    else None
                ),
                "context_variables": (
                    result.context_variables.model_dump(mode="json", fallback=str)
                    if result.context_variables
                    else None
                ),
                "last_speaker": result.last_speaker,
                "uuid": str(result.uuid),
            }
            result_dicts.append(result_dict)
    else:
        if not isinstance(results, list):
            results = [results]  # pylint: disable=redefined-variable-type
        for index, result in enumerate(results):
            result.process()
            result_dict = {
                "index": index,
                "messages": result.messages,
                "summary": result.summary,
                "cost": (
                    result.cost.model_dump(mode="json", fallback=str)
                    if result.cost
                    else None
                ),
                "context_variables": (
                    result.context_variables.model_dump(mode="json", fallback=str)
                    if result.context_variables
                    else None
                ),
                "last_speaker": result.last_speaker,
                "uuid": str(result.uuid),
            }
            result_dicts.append(result_dict)

    stop_logging()
    return result_dicts

In [None]:
main()