In [None]:
import dotenv
dotenv.load_dotenv(override=True)

import os
import uuid
from pydantic import BaseModel

from lassie.core.model_loader import ModelLoader
from lassie.core.indices import IndexLoader 
from lassie.core.retriever import RetrieverBuilder

from agents import (
    Agent, 
    Runner, 
    trace, 
    function_tool,
    TResponseInputItem
)
#from agents.extensions.visualization import draw_graph
from agents.extensions.models.litellm_model import LitellmModel
from agents.mcp import MCPServerStdio

import asyncio
import nest_asyncio
nest_asyncio.apply()

# Tools setting

In [2]:
## load embedding model 
rag_models = ModelLoader(llm_source = "openailike", embed_model_source = "openailike")
rag_models._embed_model, rag_models._embed_model_tokenizer = rag_models.load_embedding_model(
    base_url = os.getenv("OPENAI_BASE_URL"),
    api_key = os.getenv("OPENAI_API_KEY", "sk-fake-key"),
    model_name = "nomic-embed-text:latest",
    max_length = 512,
    prefix_tokens = ("search_query: ", "search_document: "),
    tokenizer_name = "nomic-ai/nomic-embed-text-v2-moe"
)
index_loader = IndexLoader(loaded_models = rag_models, database_type = "opensearch")
retriever_builder = RetrieverBuilder(loaded_models = rag_models, retriever_type = "vector_index")

2025-06-18 17:33:36 - lassie.core.model_loader.openai_like_loader - search_query:  and search_document:  will be used as the query and text instruction, respectively.
2025-06-18 17:33:36 - lassie.core.model_loader.openai_like_loader - loading embedding model nomic-embed-text:latest...
2025-06-18 17:33:36 - lassie.core.model_loader.openai_like_loader - loading the tokenizer of embedding model...
2025-06-18 17:33:37 - lassie.core.retriever.builder - The vector_index retriever will be initialized


In [3]:
# Retriever (LassieRAG)
from typing import List
from llama_index.core.schema import NodeWithScore

@function_tool
def retrieve_in_vector_database(index_name: str, query: str) -> List[NodeWithScore]:
    """
    Retrieve information relevant to the user query. 

    Args: 
        index_name: the index name of vector database
        #TODO: 把index描述移到這邊
        query: the query from user used to retrieve the relevant information in the vector database
    
    Returns:
        List[str]: The clarificarion response from the user
    """
    index = index_loader.load(
        host = "http://localhost:9200",
        http_auth = ("admin", "admin"),
        index_name = index_name,
        search_pipeline_name = "hybrid_search",
    )
    retriever = retriever_builder.build(index = index, similarity_top_k = 5, vector_store_query_mode = "hybrid")
    retrieved_nodes = asyncio.run(retriever.aretrieve(query))
    return [
        {
            "score": node.score,
            "text": node.node.text,
        } 
        for node in retrieved_nodes
    ]

# retrieved_nodes = retrieve_in_vector_database(index_name = "fps_rules", query="端午節加班可以有多少加班費？")

# Agent settings

In [4]:
# Initialize LiteLLM model
model = LitellmModel(
    model = "openai/gemma3:27b-it-qat",  # model provider/ interface need to be added before model name
    base_url = os.getenv("AGENT_BASE_URL"), 
    api_key = os.getenv("AGENT_API_KEY"),
)

In [10]:
# structured output
class Reference(BaseModel):
    score: float
    text: str

class RAGFormat(BaseModel):
    reference: List[Reference]
    final_answer: str

In [11]:
# for multi-turn conversation managing
def get_visible_messages(message_history, max_turns = 3):
    turn_start_index = {}
    num_turns = 0
    for idx, message in enumerate(message_history):
        try:
            if message['role'] == "user":
                turn_start_index[num_turns] = idx
                num_turns += 1
        except:
            pass
    if max_turns > num_turns:
        return message_history
    else:
        return message_history[turn_start_index[num_turns-max_turns]:]


def append_new_turn(conversation_log: list[dict], messages: list[dict], result_input_list: list[dict]):
    return conversation_log[:-len(messages)] + result_input_list

async def multi_turn_conversation(start_agent, full_history):
    convo: List[TResponseInputItem] = []
    conversation_id = uuid.uuid4().hex[:16]
    full_history[conversation_id] = []
    
    while True:
        with trace("RAG service", group_id=conversation_id):
            user_input = input("You: ")
            print(f"You: {user_input}")

            if user_input == "exit":
                print("Agent: Goodbye!")
                return result
                #exit
                
            full_history[conversation_id].append({"role": "user", "content": user_input})
            convo = get_visible_messages(full_history[conversation_id])
            result = await Runner.run(starting_agent = start_agent, input = convo)
            print(f"Agent: {result.final_output}")

            full_history[conversation_id] = append_new_turn(full_history[conversation_id], convo, result.to_input_list())
            

In [12]:
# Agent
full_history = {}

async def main(full_history):
    rag_mcp_command = f"python {os.getcwd()}/1_rag_server.py"
    rag_mcp_server = MCPServerStdio(
        name = "RAG MCP Server",
        params={
            "command": rag_mcp_command.split(" ")[0],
            "args": rag_mcp_command.split(" ")[1:],
            "env": {
                "OPENAI_BASE_URL": os.getenv("OPENAI_BASE_URL"),
                "OPENAI_API_KEY": os.getenv("OPENAI_API_KEY")
                }
        },
        client_session_timeout_seconds = 60,
    )
    async with rag_mcp_server as rag:
        # tools  = await rag.list_tools()
        # print(tools)
        seeker_agent = Agent(
            name = "seeker_agent",
            instructions = (
                "You are a helpful and intelligent assistant. The user you are helping speaks Traditional Chinese and comes from Taiwan, so in most cases, you should respond in Traditional Chinese. \n"
                "Behavior Rules: \n"
                "1. Direct Answering: If the question is clear and within your knowledge, answer directly.\n"
                "2. Retrieval-Based Answering: If the question requires specialized or external knowledge, retrieve relevant documents from the specific vector database. When retrieving from the database, the user's original intent should be preserved as much as possible, and the clarity of the question's meaning should be maintained."
                "3. Clarification: If the question is vague or unclear, ask clarifying questions to understand the user’s intent before responding."
            ),
            model = model,
            #tools=[retrieve_in_vector_database],
            mcp_servers = [rag],
            output_type = RAGFormat,
            
        )
        # draw_graph(seeker_agent)
        result = await multi_turn_conversation(seeker_agent, full_history)
    return result

In [39]:
result = asyncio.run(main(full_history))

[92m16:33:00 - LiteLLM:INFO[0m: utils.py:2929 - 
LiteLLM completion() model= gemma3:27b-it-qat; provider = openai


You: 今天天氣真好～臺北現在33度
2025-06-18 16:33:00 - LiteLLM - 
LiteLLM completion() model= gemma3:27b-it-qat; provider = openai
2025-06-18 16:33:02 - httpx - HTTP Request: POST https://ml.gss.com.tw/gemma3/chat/completions "HTTP/1.1 200 OK"


[92m16:33:02 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/gemma3:27b-it-qat


2025-06-18 16:33:02 - LiteLLM - selected model name for cost calculation: openai/gemma3:27b-it-qat


[92m16:33:02 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/gemma3:27b-it-qat


2025-06-18 16:33:02 - LiteLLM - selected model name for cost calculation: openai/gemma3:27b-it-qat


[92m16:33:02 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/gemma3:27b-it-qat


2025-06-18 16:33:02 - LiteLLM - selected model name for cost calculation: openai/gemma3:27b-it-qat


[92m16:33:02 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemma3:27b-it-qat


2025-06-18 16:33:02 - LiteLLM - selected model name for cost calculation: gemma3:27b-it-qat
Agent: final_answer='是啊，今天臺北天氣確實很好！33度，要注意防曬和補充水分喔！' reference=[]


[92m16:33:07 - LiteLLM:INFO[0m: utils.py:2929 - 
LiteLLM completion() model= gemma3:27b-it-qat; provider = openai


You: 公司的地址跟電話是多少？
2025-06-18 16:33:07 - LiteLLM - 
LiteLLM completion() model= gemma3:27b-it-qat; provider = openai
2025-06-18 16:33:09 - httpx - HTTP Request: POST https://ml.gss.com.tw/gemma3/chat/completions "HTTP/1.1 200 OK"


[92m16:33:09 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/gemma3:27b-it-qat


2025-06-18 16:33:09 - LiteLLM - selected model name for cost calculation: openai/gemma3:27b-it-qat


[92m16:33:09 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/gemma3:27b-it-qat


2025-06-18 16:33:09 - LiteLLM - selected model name for cost calculation: openai/gemma3:27b-it-qat


[92m16:33:09 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/gemma3:27b-it-qat


2025-06-18 16:33:09 - LiteLLM - selected model name for cost calculation: openai/gemma3:27b-it-qat


[92m16:33:09 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemma3:27b-it-qat


2025-06-18 16:33:09 - LiteLLM - selected model name for cost calculation: gemma3:27b-it-qat


[92m16:33:09 - LiteLLM:INFO[0m: utils.py:2929 - 
LiteLLM completion() model= gemma3:27b-it-qat; provider = openai


2025-06-18 16:33:09 - LiteLLM - 
LiteLLM completion() model= gemma3:27b-it-qat; provider = openai
2025-06-18 16:33:16 - httpx - HTTP Request: POST https://ml.gss.com.tw/gemma3/chat/completions "HTTP/1.1 200 OK"


[92m16:33:16 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/gemma3:27b-it-qat


2025-06-18 16:33:16 - LiteLLM - selected model name for cost calculation: openai/gemma3:27b-it-qat


[92m16:33:16 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/gemma3:27b-it-qat


2025-06-18 16:33:16 - LiteLLM - selected model name for cost calculation: openai/gemma3:27b-it-qat


[92m16:33:16 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/gemma3:27b-it-qat


2025-06-18 16:33:16 - LiteLLM - selected model name for cost calculation: openai/gemma3:27b-it-qat


[92m16:33:16 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemma3:27b-it-qat


2025-06-18 16:33:16 - LiteLLM - selected model name for cost calculation: gemma3:27b-it-qat
Agent: final_answer='根據查詢結果，公司的地址是104439台北市中正區忠孝東路551巷9弄5號，電話是(02)2586-7890。' reference=[]


[92m16:33:25 - LiteLLM:INFO[0m: utils.py:2929 - 
LiteLLM completion() model= gemma3:27b-it-qat; provider = openai


You: 我想要請假出去玩，有沒有什麼限制？
2025-06-18 16:33:25 - LiteLLM - 
LiteLLM completion() model= gemma3:27b-it-qat; provider = openai
2025-06-18 16:33:32 - httpx - HTTP Request: POST https://ml.gss.com.tw/gemma3/chat/completions "HTTP/1.1 200 OK"


[92m16:33:32 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/gemma3:27b-it-qat


2025-06-18 16:33:32 - LiteLLM - selected model name for cost calculation: openai/gemma3:27b-it-qat


[92m16:33:32 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/gemma3:27b-it-qat


2025-06-18 16:33:32 - LiteLLM - selected model name for cost calculation: openai/gemma3:27b-it-qat


[92m16:33:32 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/gemma3:27b-it-qat


2025-06-18 16:33:32 - LiteLLM - selected model name for cost calculation: openai/gemma3:27b-it-qat


[92m16:33:32 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemma3:27b-it-qat


2025-06-18 16:33:32 - LiteLLM - selected model name for cost calculation: gemma3:27b-it-qat


[92m16:33:32 - LiteLLM:INFO[0m: utils.py:2929 - 
LiteLLM completion() model= gemma3:27b-it-qat; provider = openai


2025-06-18 16:33:32 - LiteLLM - 
LiteLLM completion() model= gemma3:27b-it-qat; provider = openai
2025-06-18 16:33:46 - httpx - HTTP Request: POST https://ml.gss.com.tw/gemma3/chat/completions "HTTP/1.1 200 OK"


[92m16:33:46 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/gemma3:27b-it-qat


2025-06-18 16:33:46 - LiteLLM - selected model name for cost calculation: openai/gemma3:27b-it-qat


[92m16:33:46 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/gemma3:27b-it-qat


2025-06-18 16:33:46 - LiteLLM - selected model name for cost calculation: openai/gemma3:27b-it-qat


[92m16:33:46 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/gemma3:27b-it-qat


2025-06-18 16:33:46 - LiteLLM - selected model name for cost calculation: openai/gemma3:27b-it-qat


[92m16:33:46 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemma3:27b-it-qat


2025-06-18 16:33:46 - LiteLLM - selected model name for cost calculation: gemma3:27b-it-qat
Agent: final_answer='根據查詢到的規定，請假有一些限制，包含:\n\n*   **請假時數限制**: 每日請假時數不得超過 8 小時，每月請假總時數不得超過 30 小時。\n*   **請假類型**: 有病假、事假、特假等多種。\n*   **請假流程**: 需要事先向主管申請，並按照規定填寫請假單。\n*   **請假天數**: 請假天數過多可能需要提供證明文件。\n*   **特殊情況**: 某些特殊情況下的請假可能需要額外的審批。\n\n建議您詳細閱讀公司內部規章制度，或向人資部門諮詢更詳細的請假規定。' reference=[]


[92m16:33:49 - LiteLLM:INFO[0m: utils.py:2929 - 
LiteLLM completion() model= gemma3:27b-it-qat; provider = openai


You: 我想要請特休假三天
2025-06-18 16:33:49 - LiteLLM - 
LiteLLM completion() model= gemma3:27b-it-qat; provider = openai
2025-06-18 16:34:00 - httpx - HTTP Request: POST https://ml.gss.com.tw/gemma3/chat/completions "HTTP/1.1 200 OK"


[92m16:34:00 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/gemma3:27b-it-qat


2025-06-18 16:34:00 - LiteLLM - selected model name for cost calculation: openai/gemma3:27b-it-qat


[92m16:34:00 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/gemma3:27b-it-qat


2025-06-18 16:34:00 - LiteLLM - selected model name for cost calculation: openai/gemma3:27b-it-qat


[92m16:34:00 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/gemma3:27b-it-qat


2025-06-18 16:34:00 - LiteLLM - selected model name for cost calculation: openai/gemma3:27b-it-qat


[92m16:34:00 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemma3:27b-it-qat


2025-06-18 16:34:00 - LiteLLM - selected model name for cost calculation: gemma3:27b-it-qat


[92m16:34:01 - LiteLLM:INFO[0m: utils.py:2929 - 
LiteLLM completion() model= gemma3:27b-it-qat; provider = openai


2025-06-18 16:34:01 - LiteLLM - 
LiteLLM completion() model= gemma3:27b-it-qat; provider = openai
2025-06-18 16:34:19 - httpx - HTTP Request: POST https://ml.gss.com.tw/gemma3/chat/completions "HTTP/1.1 200 OK"


[92m16:34:19 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/gemma3:27b-it-qat


2025-06-18 16:34:19 - LiteLLM - selected model name for cost calculation: openai/gemma3:27b-it-qat


[92m16:34:19 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/gemma3:27b-it-qat


2025-06-18 16:34:19 - LiteLLM - selected model name for cost calculation: openai/gemma3:27b-it-qat


[92m16:34:19 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/gemma3:27b-it-qat


2025-06-18 16:34:19 - LiteLLM - selected model name for cost calculation: openai/gemma3:27b-it-qat


[92m16:34:19 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemma3:27b-it-qat


2025-06-18 16:34:19 - LiteLLM - selected model name for cost calculation: gemma3:27b-it-qat
Agent: final_answer='根據查詢結果，特休假申請流程大致如下：\n\n1.  **特休假天數**: 滿一年資有 5 天特休假。\n2.  **申請資格**: 自行請假不能超過 7 天。\n3.  **申請方式**: 需事先向主管申請。\n\n由於資訊比較簡略，建議您直接向人資部門洽詢更詳細的申請流程。' reference=[]
You: exit
Agent: Goodbye!


In [42]:
full_history['9799e903f8e04c3e']

[{'role': 'user', 'content': '今天天氣真好～臺北現在33度'},
 {'id': '__fake_id__',
  'content': [{'annotations': [],
    'text': '{\n  "final_answer": "是啊，今天臺北天氣確實很好！33度，要注意防曬和補充水分喔！",\n  "reference": []\n}',
    'type': 'output_text'}],
  'role': 'assistant',
  'status': 'completed',
  'type': 'message'},
 {'role': 'user', 'content': '公司的地址跟電話是多少？'},
 {'arguments': '{"query":"公司地址和電話"}',
  'call_id': '8hCAex7heJoqP0eKkxKSbe4mcFtx88kV',
  'name': 'retrieve_company_introduction_db',
  'type': 'function_call',
  'id': '__fake_id__'},
 {'call_id': '8hCAex7heJoqP0eKkxKSbe4mcFtx88kV',
  'output': '[{"type": "text", "text": "{\\n  \\"score\\": 0.7310234,\\n  \\"text\\": \\"\\u4e09\\u3001\\u7d93\\u71df\\u7406\\u5ff5\\\\n\\\\n(\\u4e00) \\u6838\\u5fc3\\u610f\\u8b58\\\\n\\\\n\\u8ffd\\u6c42\\u5ba2\\u6236\\u3001\\u516c\\u53f8\\u53ca\\u96d9\\u65b9\\u54e1\\u5de5\\u7684\\u5171\\u540c\\u6210\\u9577\\uff0c\\u4e26\\u56de\\u994b\\u8207\\u5949\\u737b\\u793e\\u5340\\uff1b\\u63d0\\u4f9b\\u8cc7\\u8a0a\\u79d1\\u6280\\u53