In [None]:
import dotenv
dotenv.load_dotenv(override=True)

import os
import json
import uuid
import numpy as np
from pydantic import BaseModel
from llama_index.core.schema import TextNode

from lassie.core.model_loader import ModelLoader
from lassie.core.indices import IndexLoader 
from lassie.core.retriever import RetrieverBuilder

from agents import (
    Agent, 
    Runner, 
    trace, 
    function_tool,
    TResponseInputItem
)
from agents.extensions.visualization import draw_graph
from agents.extensions.models.litellm_model import LitellmModel
from agents.mcp import MCPServerStdio

import asyncio
import nest_asyncio
nest_asyncio.apply()

# Tools setting

In [None]:
## load embedding model 
rag_models = ModelLoader(llm_source = "openailike", embed_model_source = "openailike")
rag_models._embed_model, rag_models._embed_model_tokenizer = rag_models.load_embedding_model(
    base_url = os.getenv("OPENAI_BASE_URL"),
    api_key = os.getenv("OPENAI_API_KEY", "sk-fake-key"),
    model_name = "nomic-embed-text:latest",
    max_length = 512,
    prefix_tokens = ("search_query: ", "search_document: "),
    tokenizer_name = "nomic-ai/nomic-embed-text-v2-moe"
)
index_loader = IndexLoader(loaded_models = rag_models, database_type = "opensearch")
retriever_builder = RetrieverBuilder(loaded_models = rag_models, retriever_type = "vector_index")

In [None]:
# Retriever (LassieRAG)
from typing import List
from llama_index.core.schema import NodeWithScore

@function_tool
def retrieve_in_vector_database(index_name: str, query: str) -> List[NodeWithScore]:
    """
    Retrieve information relevant to the user query. 

    Args: 
        index_name: the index name of vector database
        #TODO: 把index描述移到這邊
        query: the query from user used to retrieve the relevant information in the vector database
    
    Returns:
        List[str]: The clarificarion response from the user
    """
    index = index_loader.load(
        host = "http://localhost:9200",
        http_auth = ("admin", "admin"),
        index_name = index_name,
        search_pipeline_name = "hybrid_search",
    )
    retriever = retriever_builder.build(index = index, similarity_top_k = 5, vector_store_query_mode = "hybrid")
    retrieved_nodes = asyncio.run(retriever.aretrieve(query))
    return [
        {
            "score": node.score,
            "text": node.node.text,
        } 
        for node in retrieved_nodes
    ]

# retrieved_nodes = retrieve_in_vector_database(index_name = "fps_rules", query="端午節加班可以有多少加班費？")

# Agent settings

In [3]:
# structured output
from pydantic import BaseModel

class Reference(BaseModel):
    score: float
    text: str

class RAGFormat(BaseModel):
    reference: List[Reference]
    final_answer: str


In [None]:
# for multi-turn conversation managing
def get_visible_messages(messages, max_turns = 3):
    if len(messages) >= 2 * max_turns:
        return messages[-(2 * max_turns):]
    else:
        return messages


def append_new_turn(conversation_log: list[dict], messages: list[dict], result_input_list: list[dict]):
    return conversation_log[:-len(messages)] + result_input_list

async def multi_turn_conversation(start_agent, full_history):
    convo: List[TResponseInputItem] = []
    conversation_id = uuid.uuid4().hex[:16]
    full_history[conversation_id] = []
    
    while True:
        with trace("RAG service", group_id=conversation_id):
            user_input = input("You: ")
            print(f"You: {user_input}")

            if user_input == "exit":
                print("Goodbye!")
                exit
                
            full_history[conversation_id].append({"role": "user", "content": user_input})
            convo = get_visible_messages(full_history[conversation_id])
            #result = await Runner.run(starting_agent = start_agent, input = convo)
            result = await Runner.run(starting_agent = start_agent, input = full_history[conversation_id])
            print(f"Agent: {result.final_output}")

            full_history[conversation_id] = append_new_turn(full_history[conversation_id], convo, result.to_input_list())
        return result

In [None]:
# Agent
full_history = {}

async def main(full_history):
    rag_mcp_command = f"python {os.getcwd()}/1_rag_server.py"
    rag_mcp_server = MCPServerStdio(
        name = "RAG MCP Server",
        params={
            "command": rag_mcp_command.split(" ")[0],
            "args": rag_mcp_command.split(" ")[1:],
            "env": {
                "OPENAI_BASE_URL": "https://your.model.endpoint",
                "OPENAI_API_KEY": "sk-test-key"
                }
        },
        client_session_timeout_seconds = 60,
    )
    async with rag_mcp_server as rag:
        tools  = await rag.list_tools()
        print(tools)
        seeker_agent = Agent(
            name = "seeker_agent",
            instructions = (
                "You are a helpful and intelligent assistant. The user you are helping speaks Traditional Chinese and comes from Taiwan, so in most cases, you should respond in Traditional Chinese. \n"
                "Behavior Rules: \n"
                "1. Direct Answering: If the question is clear and within your knowledge, answer directly.\n"
                "2. Retrieval-Based Answering: If the question requires specialized or external knowledge, retrieve relevant documents from the specific vector database. When retrieving from the database, the user's original intent should be preserved as much as possible, and the clarity of the question's meaning should be maintained."
                "3. Clarification: If the question is vague or unclear, ask clarifying questions to understand the user’s intent before responding."
            ),
            model = LitellmModel(
                model = "openai/gemma3:27b-it-qat", 
                base_url = "https://your.model.endpoint", 
                api_key = "sk-test-key",
            ),
            mcp_servers = [rag],
            output_type = RAGFormat,
        )
        result = await multi_turn_conversation(seeker_agent, full_history)
    return result

In [None]:
asyncio.run(main(full_history))

[Tool(name='retrieve_fps_rules_db', description="\n    Retrieve information relevant to the user's query from a database containing documents about company work rules. \n\n    Args: \n        query: the query from user used to retrieve the relevant information in the vector database\n    \n    Returns:\n        List[dict]: The documents relevant to user's query\n    ", inputSchema={'properties': {'query': {'title': 'Query', 'type': 'string'}}, 'required': ['query'], 'title': 'retrieve_fps_rules_dbArguments', 'type': 'object'}, annotations=None), Tool(name='retrieve_company_introduction_db', description="\n    Retrieve information relevant to the user's query from a database containing documents about company notes and introductions. \n\n    Args: \n        query: the query from user used to retrieve the relevant information in the vector database\n    \n    Returns:\n        List[dict]: The documents relevant to user's query\n    ", inputSchema={'properties': {'query': {'title': 'Query

[92m14:22:54 - LiteLLM:INFO[0m: utils.py:2929 - 
LiteLLM completion() model= gemma3:27b-it-qat; provider = openai


You: 今天33度，天氣晴！
Before trimming:  [{'role': 'user', 'content': '今天33度，天氣晴！'}]
Input conversation: [{'role': 'user', 'content': '今天33度，天氣晴！'}]
2025-06-11 14:22:54 - LiteLLM - 
LiteLLM completion() model= gemma3:27b-it-qat; provider = openai
2025-06-11 14:22:55 - httpx - HTTP Request: POST https://ml.gss.com.tw/gemma3/chat/completions "HTTP/1.1 200 OK"


[92m14:22:55 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/gemma3:27b-it-qat


2025-06-11 14:22:55 - LiteLLM - selected model name for cost calculation: openai/gemma3:27b-it-qat


[92m14:22:55 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/gemma3:27b-it-qat


2025-06-11 14:22:55 - LiteLLM - selected model name for cost calculation: openai/gemma3:27b-it-qat


[92m14:22:55 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/gemma3:27b-it-qat


2025-06-11 14:22:55 - LiteLLM - selected model name for cost calculation: openai/gemma3:27b-it-qat


[92m14:22:55 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemma3:27b-it-qat


2025-06-11 14:22:55 - LiteLLM - selected model name for cost calculation: gemma3:27b-it-qat
Agent: 哇！今天天氣真好！33度加上晴朗的天空，真是太棒了！有什麼特別的計畫嗎？
[{'role': 'user', 'content': '今天33度，天氣晴！'}, {'id': '__fake_id__', 'content': [{'annotations': [], 'text': '哇！今天天氣真好！33度加上晴朗的天空，真是太棒了！有什麼特別的計畫嗎？', 'type': 'output_text'}], 'role': 'assistant', 'status': 'completed', 'type': 'message'}]


[92m14:23:10 - LiteLLM:INFO[0m: utils.py:2929 - 
LiteLLM completion() model= gemma3:27b-it-qat; provider = openai


You: 我想要出去玩，臨時請假有沒有什麼規範？
Before trimming:  [{'role': 'user', 'content': '今天33度，天氣晴！'}, {'id': '__fake_id__', 'content': [{'annotations': [], 'text': '哇！今天天氣真好！33度加上晴朗的天空，真是太棒了！有什麼特別的計畫嗎？', 'type': 'output_text'}], 'role': 'assistant', 'status': 'completed', 'type': 'message'}, {'role': 'user', 'content': '我想要出去玩，臨時請假有沒有什麼規範？'}]
Input conversation: [{'role': 'user', 'content': '今天33度，天氣晴！'}, {'id': '__fake_id__', 'content': [{'annotations': [], 'text': '哇！今天天氣真好！33度加上晴朗的天空，真是太棒了！有什麼特別的計畫嗎？', 'type': 'output_text'}], 'role': 'assistant', 'status': 'completed', 'type': 'message'}, {'role': 'user', 'content': '我想要出去玩，臨時請假有沒有什麼規範？'}]
2025-06-11 14:23:10 - LiteLLM - 
LiteLLM completion() model= gemma3:27b-it-qat; provider = openai
2025-06-11 14:23:12 - httpx - HTTP Request: POST https://ml.gss.com.tw/gemma3/chat/completions "HTTP/1.1 200 OK"


[92m14:23:12 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/gemma3:27b-it-qat


2025-06-11 14:23:12 - LiteLLM - selected model name for cost calculation: openai/gemma3:27b-it-qat


[92m14:23:12 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/gemma3:27b-it-qat


2025-06-11 14:23:12 - LiteLLM - selected model name for cost calculation: openai/gemma3:27b-it-qat


[92m14:23:12 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/gemma3:27b-it-qat


2025-06-11 14:23:12 - LiteLLM - selected model name for cost calculation: openai/gemma3:27b-it-qat


[92m14:23:12 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemma3:27b-it-qat


2025-06-11 14:23:12 - LiteLLM - selected model name for cost calculation: gemma3:27b-it-qat


[92m14:23:12 - LiteLLM:INFO[0m: utils.py:2929 - 
LiteLLM completion() model= gemma3:27b-it-qat; provider = openai


2025-06-11 14:23:12 - LiteLLM - 
LiteLLM completion() model= gemma3:27b-it-qat; provider = openai
2025-06-11 14:23:24 - httpx - HTTP Request: POST https://ml.gss.com.tw/gemma3/chat/completions "HTTP/1.1 200 OK"


[92m14:23:24 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/gemma3:27b-it-qat


2025-06-11 14:23:24 - LiteLLM - selected model name for cost calculation: openai/gemma3:27b-it-qat


[92m14:23:24 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/gemma3:27b-it-qat


2025-06-11 14:23:24 - LiteLLM - selected model name for cost calculation: openai/gemma3:27b-it-qat


[92m14:23:24 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/gemma3:27b-it-qat


2025-06-11 14:23:24 - LiteLLM - selected model name for cost calculation: openai/gemma3:27b-it-qat


[92m14:23:24 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemma3:27b-it-qat


2025-06-11 14:23:24 - LiteLLM - selected model name for cost calculation: gemma3:27b-it-qat
Agent: 根據公司規定，臨時請假有以下規範：

*   公司同仁如有下列事由，得申請休假。
    *   年假已結餘四天以上。
    *   職務身故或因故無法履行職責者。

*   一、 85aa8cc7請假以月為單位，請假者以月為單位退回日數，以30計算天數。
*   二、 85aa8cc7請假時，應事先填寫請假單，經主管核定後方可請假。
*   一、 4e0085aa4e004f114e007a2e530556e0，如因婚假、產假、喪假、公假、育嬰假、陪產假、病假、留職停薪等，應備妥相關證明文件。
*   二、 85aa8cc7病假應經由醫療單位開立診斷證明書。
*   三、 4e0085aa56db50474e898f7bc44f7f75224e0065e0，應事先填寫請假單，經主管核可後方可請假，並於事後補辦相關手續。

請您依照上述規定辦理，如有任何疑問，請隨時向主管洽詢。
[{'role': 'user', 'content': '今天33度，天氣晴！'}, {'id': '__fake_id__', 'content': [{'annotations': [], 'text': '哇！今天天氣真好！33度加上晴朗的天空，真是太棒了！有什麼特別的計畫嗎？', 'type': 'output_text'}], 'role': 'assistant', 'status': 'completed', 'type': 'message'}, {'role': 'user', 'content': '我想要出去玩，臨時請假有沒有什麼規範？'}, {'arguments': '{"query":"臨時請假 規範"}', 'call_id': 'fYg8eVM2mtCvgvja4EXMD5IDYq9ydO6k', 'name': 'retrieve_fps_rules_db', 'type': 'function_call', 'id': '__fake_id__'}, {'call_id': 'fYg8eVM2mtCvgvja4EXMD5IDYq9ydO6k', 'outpu

[92m14:23:50 - LiteLLM:INFO[0m: utils.py:2929 - 
LiteLLM completion() model= gemma3:27b-it-qat; provider = openai


You: 規範好奇怪，不請假了
Before trimming:  [{'role': 'user', 'content': '今天33度，天氣晴！'}, {'id': '__fake_id__', 'content': [{'annotations': [], 'text': '哇！今天天氣真好！33度加上晴朗的天空，真是太棒了！有什麼特別的計畫嗎？', 'type': 'output_text'}], 'role': 'assistant', 'status': 'completed', 'type': 'message'}, {'role': 'user', 'content': '我想要出去玩，臨時請假有沒有什麼規範？'}, {'arguments': '{"query":"臨時請假 規範"}', 'call_id': 'fYg8eVM2mtCvgvja4EXMD5IDYq9ydO6k', 'name': 'retrieve_fps_rules_db', 'type': 'function_call', 'id': '__fake_id__'}, {'call_id': 'fYg8eVM2mtCvgvja4EXMD5IDYq9ydO6k', 'output': '[{"type": "text", "text": "{\\n  \\"score\\": 0.7,\\n  \\"text\\": \\"\\u516c\\u53f8\\u540c\\u4ec1\\u6709\\u4e0b\\u5217\\u60c5\\u5f62\\uff0c\\u5f97\\u5f37\\u5236\\u5176\\u9000\\u4f11\\u3002\\\\n\\u4e00\\u3001\\u5e74\\u9f61\\u5df2\\u9054\\u516d\\u5341\\u4e94\\u6b72\\u8005\\u3002\\\\n\\u4e8c\\u3001\\u8eab\\u5fc3\\u969c\\u7919\\u4e0d\\u582a\\u52dd\\u4efb\\u5de5\\u4f5c\\u8005\\u3002\\"\\n}", "annotations": null}, {"type": "text", "text": "{\\n  \\"score\\"

APIError: litellm.APIError: APIError: OpenAIException - Conversation roles must alternate user/assistant/user/assistant/... at row 19, column 27:
    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
        {{ raise_exception("Conversation roles must alternate user/assistant/user/assistant/...") }}
                          ^
    {%- endif -%}
 at row 19, column 9:
    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
        {{ raise_exception("Conversation roles must alternate user/assistant/user/assistant/...") }}
        ^
    {%- endif -%}
 at row 18, column 69:
{%- for message in loop_messages -%}
    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
                                                                    ^
        {{ raise_exception("Conversation roles must alternate user/assistant/user/assistant/...") }}
 at row 18, column 5:
{%- for message in loop_messages -%}
    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
    ^
        {{ raise_exception("Conversation roles must alternate user/assistant/user/assistant/...") }}
 at row 17, column 37:
{%- endif -%}
{%- for message in loop_messages -%}
                                    ^
    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
 at row 17, column 1:
{%- endif -%}
{%- for message in loop_messages -%}
^
    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
 at row 1, column 1:
{{ bos_token }}
^
{%- if messages[0]['role'] == 'system' -%}
