In [None]:
%load_ext dotenv
%dotenv /Users/saisakulchernbumroong/Documents/vsprojects/DBT_redbox/redbox/.env.test

#### We are investigating whether an LLM can identify correct tool e.g. search or summarise given user's question

We explore prompts that 1) uses only user'question to determine the route, and 2) uses user's question and basic documents metadata (document name, description and keywords) to determine route. The reason is that by giving information about document metadata, LLM is more equipped with information to help determining the route.

Conclusion: Based on the experiment results, we achieved 80% using the prompt with metadata. In addition, using this prompt show that LLM uses slightly shorter time to make decision. Therefore, we will be implementing a new node with LLM_decide_tool_1 going forward.

### Prompts

In [None]:
LLM_decide_tool_0 = ("""Given analysis request, determine whether to use search or summarise tools.

Context:
- Search tool: Used to find and analyze specific relevant sections in a document
- Summarise tool: Used to create an overview of the entire document's content

Please analyze the following request:
{question}

Follow these steps to determine the appropriate tool:

1. Identify the key requirements in the request:
   - Is it asking for specific information or general overview?
   - Are there specific topics/keywords mentioned?
   - Is the scope focused or broad?

2. Evaluate request characteristics:
   - Does it need comprehensive coverage or targeted information?
   - Are there specific questions to answer?
   - Is context from the entire document needed?

3. Recommend either search or summarise based on:
   - If focused/specific information is needed → Recommend search
   - If general overview/main points needed → Recommend summarise

- Recommended Tool: [Search/Summarise]
- Reason for the recommendation
Provide your recommendation in this format:
\n{format_instructions}\n
                   
Analysis request:
{question}
""")


In [None]:
LLM_decide_tool_1 = ("""Given analysis request and document demtadata, determine whether to use search or summarise tools.

Context:
- Search tool: Used to find and analyze specific relevant sections in a document
- Summarise tool: Used to create an overview of the entire document's content

Please analyze the following request:
{question}

Follow these steps to determine the appropriate tool:

1. Identify the key requirements in the request:
   - Is it asking for specific information or general overview?
   - Are there specific topics/keywords mentioned?
   - Is the scope focused or broad?

2. Evaluate request characteristics:
   - Does it need comprehensive coverage or targeted information?
   - Are there specific questions to answer?
   - Is context from the entire document needed?

3. Recommend either search or summarise based on:
   - If focused/specific information is needed → Recommend search
   - If general overview/main points needed → Recommend summarise
   - Priortise search tool if both tools can be used to produce good answer 

- Recommended Tool: [Search/Summarise]

Provide your recommendation in this format:
\n{format_instructions}\n

Analysis request:
{question}
                   
Document metadata: {metadata}
""")

### Creating necceasry functions

In [None]:
from enum import Enum
from redbox.chains.components import get_basic_metadata_retriever
import time
from langchain_core.prompts import ChatPromptTemplate
from redbox.chains.parser import ClaudeParser
from pydantic import BaseModel
from redbox.chains.components import get_chat_llm
from redbox.models.chain import RedboxState, RedboxQuery, AISettings
from langchain_core.runnables import chain
from uuid import uuid4
from redbox.models.settings import ChatLLMBackend
from redbox.models.settings import get_settings

In [None]:
def get_state(user_uuid, prompts, documents, ai_setting):
    q = RedboxQuery(
        question=f"{prompts[-1]}",
        s3_keys=documents,
        user_uuid=user_uuid,
        chat_history=prompts[:-1],
        ai_settings=ai_setting,
        permitted_s3_keys=documents,
    )

    return RedboxState(
        request=q,
    )

def basic_chat_chain(system_prompt, _additional_variables: dict = {}, parser=None):
    @chain
    def _basic_chat_chain(state: RedboxState):
        nonlocal parser
        llm = get_chat_llm(state.request.ai_settings.chat_backend)
        context = ({
                    "question": state.request.question,
                    }
                | _additional_variables
            )
        
        if parser:
            format_instructions = parser.get_format_instructions()
            prompt = ChatPromptTemplate([(system_prompt)], partial_variables={"format_instructions": format_instructions})
        else:
            prompt = ChatPromptTemplate([(system_prompt)])
            parser = ClaudeParser()
        chain = prompt | llm | parser
        return chain.invoke(context)
    return _basic_chat_chain

def lm_choose_route(system_prompt: str, parser: ClaudeParser):
    metadata = None
    
    @chain
    def get_metadata(state: RedboxState):
        nonlocal metadata
        env = get_settings()
        retriever = get_basic_metadata_retriever(env)
        metadata = retriever.invoke(state)
        return state
    
    @chain
    def use_result(state: RedboxState):
        chain = basic_chat_chain(system_prompt=system_prompt, parser=parser, _additional_variables={'metadata': metadata})
        return chain.invoke(state)
    
    return get_metadata | use_result
    

### Defining class to capture LLM response

In [None]:
members = ["Search", "Summarise"]

#create options map for the supervisor output parser.
tools_options = {tool:tool for tool in members}

#create Enum object
ToolEnum = Enum('ToolEnum', tools_options)

In [None]:
class AgentDecision(BaseModel):
    next: ToolEnum = ToolEnum.Search

class AgentDecisionWithReason(BaseModel):
    next: ToolEnum = ToolEnum.Search
    reason: str

## Testing approach

### Set up Redbox state

In [None]:
x = get_state(user_uuid=uuid4(), prompts=['What did Serena Williams say about fairness in relation to her experience with postnatal complications?'], documents=['test@dbt.gov.uk/1 The power chapter.pdf'], ai_setting=AISettings(chat_backend=ChatLLMBackend(name="anthropic.claude-3-sonnet-20240229-v1:0", provider="bedrock")))

### Test prompt 0 without metadata

In [None]:
prompt = LLM_decide_tool_0
agent_parser = ClaudeParser(pydantic_object=AgentDecisionWithReason)
# return results with reason
test_chain_reason = basic_chat_chain(system_prompt=prompt, parser=agent_parser)
start = time.time()
test_chain_reason.invoke(x)
print(f'time used: {time.time() - start}')

# return results without reason
agent_parser = ClaudeParser(pydantic_object=AgentDecision)
test_chain_no_reason = basic_chat_chain(system_prompt=prompt, parser=agent_parser)
start = time.time()
test_chain_no_reason.invoke(x)
print(f'time used: {time.time() - start}')

### Test prompt 1 with metadata

In [None]:
prompt = LLM_decide_tool_1
agent_parser = ClaudeParser(pydantic_object=AgentDecisionWithReason)
# return results with reason
test_chain_reason = lm_choose_route(system_prompt=prompt, parser=agent_parser)
start = time.time()
test_chain_reason.invoke(x)
print(f'time used: {time.time() - start}')

# return results without reason
agent_parser = ClaudeParser(pydantic_object=AgentDecision)
test_chain_no_reason = lm_choose_route(system_prompt=prompt, parser=agent_parser)
start = time.time()
test_chain_no_reason.invoke(x)
print(f'time used: {time.time() - start}')

## Experiments

In [None]:
### Experiments
import pandas as pd
df = pd.read_csv("/Users/saisakulchernbumroong/Documents/RB exp/intent_exp/route_results_consensus.csv")

In [None]:
# making change on the chain and correct parser you want to test here
agent_parser = ClaudeParser(pydantic_object=AgentDecisionWithReason)
test_chain = lm_choose_route(system_prompt=LLM_decide_tool_1, parser=agent_parser)


next_move = []
move_reason = []
for prompt in df.Prompt:
    x = get_state(user_uuid=uuid4(), prompts=[prompt], documents=['test@dbt.gov.uk/1 The power chapter.pdf'], ai_setting=AISettings(chat_backend=ChatLLMBackend(name="anthropic.claude-3-sonnet-20240229-v1:0", provider="bedrock")))
    res = test_chain.invoke(x)
    next_move += [res.next.value]
    move_reason += [res.reason] # comment this line out if you are not returning reason


In [None]:
# save result
save_path = "/Users/saisakulchernbumroong/Documents/RB exp/intent_exp/intent_prompt3_basic_metadata.csv"
pd.DataFrame({'Prompt': df.Prompt, 'Consensus': df.consensus, 'LLM_tool_select': next_move, 'LLM_reason': move_reason}).to_csv(save_path)