In [1]:
from dotenv import load_dotenv

# Displaying final output format
from IPython.display import display, Markdown, Latex

# Langchain Dependencies
from langchain_openai import ChatOpenAI
from langchain.prompts import PromptTemplate, ChatPromptTemplate
from langchain_core.output_parsers import JsonOutputParser, StrOutputParser
from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, SystemMessagePromptTemplate, AIMessagePromptTemplate, FewShotChatMessagePromptTemplate
from langchain_core.tools import tool
from langchain_core.pydantic_v1 import BaseModel, Field
from langgraph.graph import END, StateGraph
# For State Graph 
from typing_extensions import TypedDict
import pandas as pd

from dotenv import load_dotenv
import os
from langchain_core.runnables import (
    RunnableBranch,
    RunnableLambda,
    RunnableParallel,
    RunnablePassthrough,
)
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.prompts.prompt import PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from typing import Tuple, List, Optional
from langchain_core.messages import AIMessage, HumanMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_community.graphs import Neo4jGraph
from langchain.text_splitter import TokenTextSplitter
from langchain_openai import ChatOpenAI
from langchain_experimental.graph_transformers import LLMGraphTransformer
from neo4j import GraphDatabase
from langchain_community.vectorstores import Neo4jVector
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores.neo4j_vector import remove_lucene_chars
from langchain_core.runnables import ConfigurableField, RunnableParallel, RunnablePassthrough

from utils import CSVAgentGPTInstance

In [2]:
load_dotenv()

llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
llm_json = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0, model_kwargs={"response_format": {"type": "json_object"}})

graph = Neo4jGraph()

In [3]:
vector_index = Neo4jVector.from_existing_graph(
    OpenAIEmbeddings(),
    search_type="hybrid",
    node_label="Document",
    text_node_properties=["text"],
    embedding_node_property="embedding"
)
class Entities(BaseModel):
    """Identifying information about entities."""

    names: List[str] = Field(
        ...,
        description="All the person, object, location, or event entities that "
        "appear in the text",
    )

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are extracting person, object, location, or event entities from the text.",
        ),
        (
            "human",
            "Use the given format to extract information from the following "
            "input: {question}",
        ),
    ]
)

entity_chain = prompt | llm.with_structured_output(Entities)

def generate_full_text_query(input: str) -> str:
    """
    Generate a full-text search query for a given input string.

    This function constructs a query string suitable for a full-text search.
    It processes the input string by splitting it into words and appending a
    similarity threshold (~2 changed characters) to each word, then combines
    them using the AND operator. Useful for mapping entities from user questions
    to database values, and allows for some misspelings.
    """
    full_text_query = ""
    words = [el for el in remove_lucene_chars(input).split() if el]
    for word in words[:-1]:
        full_text_query += f" {word}~2 AND"
    full_text_query += f" {words[-1]}~2"
    return full_text_query.strip()

# Fulltext index query
def structured_retriever(question: str) -> str:
    """
    Collects the neighborhood of entities mentioned
    in the question
    """
    result = ""
    entities = entity_chain.invoke({"question": question})
    for entity in entities.names:
        response = graph.query(
            """
            CALL db.index.fulltext.queryNodes('entity', $query, {limit: 2})
            YIELD node, score
            CALL {
              WITH node
              MATCH (node)-[r:!MENTIONS]->(neighbor)
              RETURN node.id + ' - ' + type(r) + ' -> ' + neighbor.id AS output
              UNION ALL
              WITH node
              MATCH (node)<-[r:!MENTIONS]-(neighbor)
              RETURN neighbor.id + ' - ' + type(r) + ' -> ' + node.id AS output
            }
            RETURN output LIMIT 50
            """,
            {"query": generate_full_text_query(entity)},
        )
        result += "\n".join([el['output'] for el in response])
    return result

def retriever(question: str):
    structured_data = structured_retriever(question)
    unstructured_data = [el.page_content for el in vector_index.similarity_search(question)]
    final_data = f"""Structured data:
        {structured_data}
        Unstructured data:
        {"#Document ". join(unstructured_data)}
    """
    return final_data

_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question,
in its original language.
Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""  # noqa: E501
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)

def _format_chat_history(chat_history: List[Tuple[str, str]]) -> List:
    buffer = []
    for human, ai in chat_history:
        buffer.append(HumanMessage(content=human))
        buffer.append(AIMessage(content=ai))
    return buffer

_search_query = RunnableBranch(
    # If input includes chat_history, we condense it with the follow-up question
    (
        RunnableLambda(lambda x: bool(x.get("chat_history"))).with_config(
            run_name="HasChatHistoryCheck"
        ),  # Condense follow-up question and chat into a standalone_question
        RunnablePassthrough.assign(
            chat_history=lambda x: _format_chat_history(x["chat_history"])
        )
        | CONDENSE_QUESTION_PROMPT
        | ChatOpenAI(temperature=0)
        | StrOutputParser(),
    ),
    # Else, we have no chat history, so just pass through the question
    RunnableLambda(lambda x : x["question"]),
)

template = """Answer the question based only on the following context:
{context}

Question: {question}
Use natural language and be as elaborate as possible.
Answer:"""
prompt = ChatPromptTemplate.from_template(template)

chain = (
    RunnableParallel(
        {
            "context": _search_query | retriever,
            "question": RunnablePassthrough(),
        }
    )
    | prompt
    | llm
    | StrOutputParser()
)

def invoke_chain(question: str, chat_history):
    graph.query(
        "CREATE FULLTEXT INDEX entity IF NOT EXISTS FOR (e:__Entity__) ON EACH [e.id]")
    if chat_history:
        return chain.invoke(
            {
                "question": question,
                "chat_history": chat_history
            }
        )
    else:
        return chain.invoke(
            {
                "question": question,
            }
        )
    

# Router
router_prompt = PromptTemplate(
    template="""
    You are an expert at routing a user question to either the generation stage or general query stage. 
    Use the general query for questions that does not require additional contextual information.
    Otherwise, you can skip and go straight to the generation phase to respond.
    You do not need to be stringent with the keywords in the question related to these topics.
    Give a binary choice 'general' or 'generate' based on the question. 
    Return the JSON with a single key 'choice' with no premable or explanation. 
    
    Question to route: {question} 
    
    """,
    input_variables=["question"],
)

# Chain
question_router = router_prompt | llm | JsonOutputParser()

# Test Run
question = "What's up?"
print(question_router.invoke({"question": question}))

# General chain
general_prompt = PromptTemplate(
    template="""
    You are an expert at answering general questions. 
    Use the general query for questions that does not require additional contextual information.
    You do not need to be stringent with the keywords in the question related to these topics.
    Answer the following question in a natural language. 
    
    Question: {question} 
    
    """,
    input_variables=["question"],
)

general_chain = general_prompt | llm | StrOutputParser()

  
############################################################# Graph State #############################################################
class GraphState(TypedDict):
    """
    Represents the state of our graph.

    Attributes:
        question: question
        generation: LLM generation
        search_query: revised question for web search
        context: web_search result
    """
    question : str
    generation : str
    search_query : str
    context : str

# Node - Generate

def generate(state):
    """
    Generate answer

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, generation, that contains LLM generation
    """
    
    print("Step: Generating Final Response")
    question = state["question"]

    # Answer Generation
    generation = invoke_chain(question, None)
    return {"generation": generation}

def general(state):
    """
    Generate answer

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, generation, that contains LLM generation
    """
    
    print("Step: Generating Final Response")
    question = state["question"]

    # Answer Generation
    generation = general_chain.invoke({"question": question})
    return {"generation": generation}

# Conditional Edge, Routing

def route_question(state):
    """
    route question to web search or generation.

    Args:
        state (dict): The current graph state

    Returns:
        str: Next node to call
    """

    print("Step: Routing Query")
    question = state['question']
    output = question_router.invoke({"question": question})
    if output['choice'] == "general":
        print("Step: Routing Query to General")
        return "general"
    elif output['choice'] == 'generate':
        print("Step: Routing Query to Generation")
        return "generate"
    
def build_workflow():
    """
    Build the workflow for the graph
    """
    # Build the nodes
    workflow = StateGraph(GraphState)
    workflow.add_node("general", general)
    workflow.add_node("generate", generate)

    # Build the edges
    workflow.set_conditional_entry_point(
        route_question,
        {
            "general": "general",
            "generate": "generate",
        },
    )
    workflow.add_edge("general", END)
    workflow.add_edge("generate", END)

    # Compile the workflow
    local_agent = workflow.compile()

    return local_agent

def run_agent(query, local_agent):
    output = local_agent.invoke({"question": query})
    print("=======")
    display(Markdown(output["generation"]))



{'choice': 'general'}


In [4]:
# Test it out!
agent = build_workflow()
run_agent("How many sales did TikTok make yesterday?", agent)

Step: Routing Query
Step: Routing Query to Generation
Step: Generating Final Response


I'm sorry, but without any specific information or data provided in the context, I am unable to answer the question about how many sales TikTok made yesterday. TikTok is a popular social media platform, but without structured data or specific details about their sales figures, it is impossible to provide an accurate answer. It would be necessary to have access to TikTok's financial reports or sales data to determine the number of sales they made on a specific day.

# DB Router

In [26]:
load_dotenv()

llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
llm_json = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0, model_kwargs={"response_format": {"type": "json_object"}})

added actions list

In [4]:
database_list = [
  {
    "database_name": "Sales_database123",
    "database_description": "A database that contains information about supermarket sales.",
    'columns': "Invoice ID, Product ID, Shop ID, Customer ID, Date, Time, Payment, Unit Price, Quantity, Total Price",
    "database_path": "../csv_db/supermarket_sales_db.csv",
  },
  {
    "database_name": "Outlet_database",
    "database_description": "A database that contains information about physical outlets.",
    "columns": "Shop ID, Country, City, State, Shop Name, Shop Type, Shop Size, Address",
    "database_path": "../csv_db/shop_outlet_db.csv",
  },
  {
    "database_name": "Customer_database",
    "database_description": "A database that contains information about customers.",
    "columns": "Customer ID, Customer Name, Customer Email, Customer Phone, Customer Address, Gender, Age",
    "database_path": "../csv_db/customer_db.csv",
  },
  {
    "database_name": "Product_database",
    "database_description": "A database that contains information about products.",
    "columns": "Product ID, Product Name, Product Price, Product Category",
    "database_path": "../csv_db/product_db.csv",
  },
]

actions_list = [
  {
    "action_type": "query_database",
    "action_name": "most_product_sales_for_shop",
    "action_description": "Search for which product had the most sales from a specific shop.",
    "input": ["sales", "shop name",],
    "output": ["product name", "product ID",],
  },
  {
    "action_type": "query_database",
    "action_name": "most_customer_sales_for_shop",
    "action_description": "Search for which customer had the most sales from a specific shop.",
    "input": ["sales", "shop name",],
    "output": ["customer name", "customer ID",],
  },
  {
    "action_type": "query_database",
    "action_name": "full_description_of_product",
    "action_description": "Give the full description of a product.",
    "input": ["product name",],
    "output": ["product ID", "product name", "product price", "product category",],
  },
  {
    "action_type": "api_call",
    "action_name": "create_jira_issue",
    "action_description": "Creates a Jira Issue with the given details.",
    "input": ["issue title", "issue description", "issue type", "issue priority",],
    "output": [],
  },
]


added chain to identify which action to select

few shot dont work for actions for some reason ?

In [70]:
###################### DB Routing ######################

example_db_prompt = ChatPromptTemplate.from_messages(
    [
      (
        "system",
        """
        Database name: {database_name}
        Database description: {database_description}
        Database columns: {columns}
        """,
      ),
    ]
)

db_few_shot_prompt = FewShotChatMessagePromptTemplate(
  example_prompt=example_db_prompt,
  examples=database_list,
)

db_router_prompt = ChatPromptTemplate.from_messages(
  [
    SystemMessagePromptTemplate.from_template(
      """
      You are a expert at routing a user query to one of the database provided below. 
      Your choice should be one of the database's name. 
      You can return NA if the query is not related to any of the databases.
      Return the JSON with a single key 'choice' with no premable or explanation.

      Choices:
      """
    ),
    db_few_shot_prompt,
    HumanMessagePromptTemplate.from_template(
      "Query: {query}"
    )
  ]
)

multi_db_router_prompt = ChatPromptTemplate.from_messages(
  [
    SystemMessagePromptTemplate.from_template(
      """
      You are a expert at routing a user query to the database provided below. 
      You can select more than one database if the query requires information from multiple databases.
      Your choices should be the database's name. 
      You can return NA if the query is not related to any of the databases.
      You should return a list of database names.
      Return the JSON with a single key 'choice' with no premable or explanation.

      Choices:
      """
    ),
    db_few_shot_prompt,
    HumanMessagePromptTemplate.from_template(
      "Query: {query}"
    )
  ]
)

###################### Choose Which Action ######################

choose_action_prompt = ChatPromptTemplate.from_messages(
  [
    SystemMessagePromptTemplate.from_template(
      """
      You are an intelligent assistant. 
      Your task is to match a user's query to a list of actions based on the inputs required for each action. 
      Each action has a specific set of inputs that it requires. 
      You should identify which actions can be taken based on the inputs mentioned in the user's query and return those actions in JSON format.
      Return NA if and only if the query is not related to any of the actions.
      You should return a list of action names.
      Return the JSON with a single key 'name' with no premable or explanation.

      Choices:
      """
    ),
    SystemMessagePromptTemplate.from_template(
            "\n".join(
                [
                    f"""
                    Action type: {action['action_type']}
                    Action name: {action['action_name']}
                    Action description: {action['action_description']}
                    Action input: {action['input']}
                    Action output: {action['output']}
                    """
                    for action in actions_list
                ]
            )
        ),
    HumanMessagePromptTemplate.from_template(
      "Query: {query}"
    )
  ]
)

###################### Generate Prompt from Action Prompt ######################

# Few Shot Chat Message Prompt Template
generate_prompt_from_action_prompt_examples = [
  {
    "query": "Summarize the sales for Shop A.",
    "action_description": "Search for which product had the most sales from a specific shop.",
    "action_input": ["sales", "shop name"],
    "action_output": ["product name", "product ID"],
    "answer": "First, give me the name of the product that had the most sales from Shop A. Then, use that product name to find the product ID."
  },
  {
    "query": "Summarize the sales for Shop A.",
    "action_description": "Search for which customer had the most sales from a specific shop.",
    "action_input": ["sales", "shop name"],
    "action_output": ["customer name", "customer ID"],
    "answer": "First, give me the name of the customer that bought the most items from Shop A. Then, use that customer name to find the customer ID."
  },
  {
    "query": "Summarize the sales for Country A.",
    "action_description": "Search for the city which had the highest quantity of items sold in a specific country.",
    "action_input": ["sales", "country name"],
    "action_output": ["city name", "quantity"],
    "answer": "First, give me the city name which had the highest quantity of items sold in Country A. Then, give me the total quantity of items sold for that city."
  },
  {
    "query": "Give report on Customer A.",
    "action_description": "Give the full details of a customer.",
    "action_input": ["customer name"],
    "action_output": ["customer ID", "customer name", "customer email", "customer phone", "customer address", "gender", "age"],
    "answer": "Give me the customer ID, customer name, customer email, customer phone, customer address, gender, age of customer A."
  }
]

generate_prompt_from_action_prompt_example_prompt = ChatPromptTemplate.from_messages(
    [
      (
        "system",
        """
        Query: {query}
        Action description: {action_description}
        Action input: {action_input}
        Action output: {action_output}
        Answer: {answer}
        """,
      ),
    ]
)

generate_prompt_from_action_prompt_fewshotprompt = FewShotChatMessagePromptTemplate(
    examples=generate_prompt_from_action_prompt_examples,
    example_prompt=generate_prompt_from_action_prompt_example_prompt,
)

# Prompt to generate systematic and precise output
generate_prompt_from_action_prompt = ChatPromptTemplate.from_messages(
  [
    SystemMessagePromptTemplate.from_template(
      """      
      You are an intelligent assistant. Your task is to create systematic and precise prompts to be fed to another llm,
      based on a user's query and a list of actions.
      Each action has a specific set of inputs and outputs. You should first identify the main action from the query.
      Then, generate instructions that use intermediate results precisely.
      Generate the step-by-step instructions using the action's inputs and outputs,
      ensuring intermediate results are clearly stated and used in subsequent steps.
      """
    ),
    SystemMessagePromptTemplate.from_template(
      """
      For the action:
      Action description: {action_description}
      Action input: {action_input}
      Action output: {action_output}

      Based on the query: "{query}", generate the output based on the action and the query.
      
      I will provide some examples to help you understand the task.
      
      Examples:

      """
    ),
    generate_prompt_from_action_prompt_fewshotprompt,
    SystemMessagePromptTemplate.from_template(
      """
      Do not include any explanation or preamble. Only return the actual answer.
      
      Return the answer as a string in natural language, in prose, with no bullet points, point form, or list format.
      """
    ),
    HumanMessagePromptTemplate.from_template(
      "Query: {query}",
    )
  ]
)

###################### Generate Final Output From Actions and Query ######################
example_db_output = [
  "The product that had the most sales from Singapore Outlet 1 is 'Eau de Toilette' with the product ID 'ES41'",
  "The customer that bought the most items from Singapore Outlet 1 is 'John Doe' with the customer ID 'JD123'",
]
final_output_prompt = ChatPromptTemplate.from_messages(
  [
    SystemMessagePromptTemplate.from_template(
      """
      You are an intelligent assistant. 
      Your task is to answer the user's query using the context provided below. 
      If you don't know the answer, just say that you don't know.
      Use natural language and elaborate meaningfully and provide suggestions for improvement when appropriate.

      Context:
      """
      +"\n".join(
          [
            f"""
            {cont}
            """
            for cont in example_db_output
          ]
      )
    ),
    HumanMessagePromptTemplate.from_template(
      "Query: {query}"
    )
  ]
)

###################### Generate Prompt for Extract Input for API Type Action ######################
example_action = {
    "action_type": "api_call",
    "action_name": "create_jira_issue",
    "action_description": "Creates a Jira Issue with the given details.",
    "input": ["issue title", "issue description", "issue type", "issue priority",],
    "output": [],
  }
api_extract_input_prompt = ChatPromptTemplate.from_messages(
  [
    SystemMessagePromptTemplate.from_template(
      """
      You are an expert at extracting information from a user query for input parameter of an API call. 
      Your task is to extract each input parameter for the API call from the user query.
      The action, its description and the input parameters are provided below.
      Return the JSON with the input as the keys with no premable or explanation.
      Do not alter the input parameter names in any way when used as the key.
      You should leave the value empty if the input parameter is not present in the query.

      Action:

      Action name: {action_name}
      Action description: {action_desc}
      Action input: {action_input}
      """
    ),
    HumanMessagePromptTemplate.from_template(
      "Query: {query}"
    )
  ]
)

###################### Generate General Response ######################
general_response_prompt = ChatPromptTemplate.from_messages(
    [
        SystemMessagePromptTemplate.from_template(
        """
        You are an intelligent agent, with the name Waffles Copilot.
        Your role is to give general answers to questions that are not related to CRM related tasks or actions.

        You should follow these specific guidelines to answering:

        1. **Non-CRM Related Queries:**
            - If a user asks a question that is unrelated to CRM processes (e.g., "What is the weather today?" or "What is the Nvidia share price today?"), 
            respond with a general-purpose message to redirect the user back to CRM-related tasks.

        2. **General Purpose or Personal Queries:**
            - If a user asks about your purpose or general well-being (e.g., "Hi, what is your purpose?" or "How are you doing today?"), respond with your purpose and role. 
            
        3. **Hateful Queries:**
          - If the user provides any hateful message or expresses inappropriate emotion (e.g., "You suck" or "You are useless'), respond with a message that you acknowledge their feelings but you cannot answer such questions or that the question is inappropriate,
          and redirect the user back to CRM-related tasks.
        
        4. **Inappropriate Queries:**
          - If the user provides any inappropriate message (for example, racially insensitive, sexually explicit, violent, politically sensitive, etc), 
          (e.g., "I hate all people from this country" or "I want to kill someone" or "I hate this political party"),
          respond with a message that you cannot answer such questions and redirect the user back to CRM-related tasks.
          
        5. **Non-English Queries:**
          - If the user asks a question in a language other than English, respond with a message that you can only understand and respond to questions in English.

        Remember, your main role is to assist with CRM processes and enhance the user's customer management experience.
        """
        ),
        HumanMessagePromptTemplate.from_template(
        "Query: {query}"
        )
    ]
)


In [29]:
db_router_chain = db_router_prompt | llm | JsonOutputParser()
multi_db_router_chain = multi_db_router_prompt | llm | JsonOutputParser()
action_router_chain = choose_action_prompt | llm | JsonOutputParser()
generate_prompt_from_action_prompt_chain = generate_prompt_from_action_prompt | llm | StrOutputParser()
final_output_chain = final_output_prompt | llm | StrOutputParser()

In [9]:
api_extract_input_chain.invoke({
  "query": "Create a Jira issue with the title 'Bug in the system', description 'There is a bug in the system'",
  "action_name": example_action['action_name'],
  "action_desc": example_action['action_description'],
  "action_input": str(example_action['input'])
  })

{'issue title': 'Bug in the system',
 'issue description': 'There is a bug in the system',
 'issue type': '',
 'issue priority': ''}

In [10]:
final_output_chain.invoke({"query": "Summarize the sales for Singapore Outlet 1."})

"In summary, the product 'Eau de Toilette' with the product ID 'ES41' had the most sales at Singapore Outlet 1. Additionally, the customer 'John Doe' with the customer ID 'JD123' bought the most items from this outlet. This information provides insight into the top-selling product and customer at Singapore Outlet 1. If you need more detailed information or specific data regarding sales at this outlet, feel free to ask for further details."

In [11]:
multi_db_router_chain.invoke({"query": "What is the name of the customer that made the biggest purchase?"})

{'choice': ['Sales_database123', 'Customer_database']}

Test action router chain. Preliminary testing seems good?

In [12]:
action_router_chain.invoke({"query": "Summarize the sales for Singapore Outlet 1."})

{'name': ['most_product_sales_for_shop', 'most_customer_sales_for_shop']}

In [13]:
action_router_chain.invoke({"query": "Create a Jira Issue with the title 'Bug in the system' and description 'The system is not responding'."})

{'name': ['create_jira_issue']}

This one below seems inconsistent between NA and most_product_sales_for_shop??

In [14]:
action_router_chain.invoke({"query": "give me the city name with the highest quantity of items sold for Kuala Lumpur"})

{'name': ['most_product_sales_for_shop']}

Test generate action prompt. Seems good for the example_action_list given above also

In [35]:
test_action_1 = actions_list[2]
print(test_action_1)

response = generate_prompt_from_action_prompt_chain.invoke({
    "action_description": test_action_1["action_description"],
    "action_input": test_action_1["input"],
    "action_output": test_action_1["output"],
    "query": "Summarize product Eau de Toilette",
})

print(response)

{'action_type': 'query_database', 'action_name': 'full_description_of_product', 'action_description': 'Give the full description of a product.', 'input': ['product name'], 'output': ['product ID', 'product name', 'product price', 'product category']}
Give me the product ID, product name, product price, and product category of the product named Eau de Toilette.


In [36]:
actions_chosen = action_router_chain.invoke({"query": "Summarize the sales for Singapore Outlet 1."})
print(actions_chosen)

selected_actions_prompts = []
query = "Summarize the sales for Singapore Outlet 1."

# Retrieve objects that match the names in actions_chosen
for name in actions_chosen['name']:
    for obj in actions_list:
        if obj['action_name'] == name:
            action_description = obj["action_description"]
            action_input = obj["input"]
            action_output = obj["output"]
            response = generate_prompt_from_action_prompt_chain.invoke({
                "action_description": action_description,
                "action_input": action_input,
                "action_output": action_output,
                "query": query,
            })
            selected_actions_prompts.append(response)

print(selected_actions_prompts)

{'name': 'most_product_sales_for_shop'}
[]


In [41]:
from utils import Chains
############################################################# Graph State #############################################################
class DBAgentGraphState(TypedDict):
    """
    Represents the state of our graph.

    Attributes:
        query: query
        database: database to route query
        verbose: print debug statements?
        output: LLM generation
    """
    query : str
    database : str
    output: str
    verbose : bool

class DBAgent():
    def __init__(self, llm, database_list: List[dict]):
        self.database_list = database_list
        self.llm = llm
        self.chains = Chains(self.llm)
        self.multi_db_router_chain = self.chains.get_multi_db_router_chain(self.database_list)
        self.csvAgent = CSVAgentGPTInstance()

    # Node - db_router
    def db_router_node(self, state: DBAgentGraphState):
        """
        dynamic routing to database listed in database_list

        Args:
            state (dict): The current graph state

        Returns:
            str: Next node to call
        """
        if state['verbose']: print("Step: Routing Query")
        query = state['query']
        output = self.multi_db_router_chain.invoke({"query": query})
        if output['choice'] == "NA":
            if state['verbose']: print("Step: Routing Query to General")
            return {'database': "NA"}
        else:
            if state['verbose']: print("Step: Routing Query to Database")
            return {'database': output['choice']}
    
    def db_router_edge(self, state: DBAgentGraphState):
        """
        dynamic routing to database listed in database_list

        Args:
            state (dict): The current graph state

        Returns:
            str: Next node to call
        """
        if state['database'] == "NA":
            return "general"
        else:
            return "db_query"

    def database_query(self, state: DBAgentGraphState):
        """
        Generate answer

        Args:
            state (dict): The current graph state

        Returns:
            state (dict): New key added to state, generation, that contains LLM generation
        """
        
        if state['verbose']: print("Step: Generating DB Response")
        query = state["query"]
        db_paths = []
        for datab in state['database']:
            selected_db = list(filter(lambda db: db.get('database_name') == datab, self.database_list))[0]
            path_to_db = selected_db.get('database_path')
            db_paths.append(path_to_db)
        
            
        if len(db_paths) == 1:
            response = self.csvAgent.get_csv_agent_output(db_paths[0], query)
        else:
            response = self.csvAgent.get_csv_agent_output(db_paths, query)
            

        return {"output": response}

    def general(self, state: DBAgentGraphState):
        """
        Generate answer

        Args:
            state (dict): The current graph state

        Returns:
            state (dict): New key added to state, generation, that contains LLM generation
        """
        
        if state['verbose']: print("Step: Generating General Response")
        question = state["query"]

        # Answer Generation
        print(f"Answering general question: {question}")
        return None

    def build_workflow(self):
        """
        Build the workflow for the graph
        """
        # Build the nodes
        workflow = StateGraph(DBAgentGraphState)
        workflow.add_node("db_router_node", self.db_router_node)
        workflow.add_node("general", self.general)
        workflow.add_node("db_query", self.database_query)

        # Set the entry point
        workflow.set_entry_point("db_router_node")

        # Build the edges
        workflow.add_conditional_edges(
            "db_router_node",
            self.db_router_edge,
            {
                "general": "general",
                "db_query": "db_query",
            },
        )
        workflow.add_edge("general", END)
        workflow.add_edge("db_query", END)

        # Compile the workflow
        
        local_agent = workflow.compile()

        return local_agent

    def run_agent(self, query, verbose=True):
        local_agent = self.build_workflow()
        output = local_agent.invoke({"query": query, "verbose": verbose})
        return output

In [38]:
query = "what is the supermarket name of the store located on 13 orchard road"
DBAgent = DBAgent(llm, database_list)
DBAgent.run_agent(query)

Failed to write data to connection IPv4Address(('35d9c34f.databases.neo4j.io', 7687)) (ResolvedIPv4Address(('34.126.114.186', 7687)))
Failed to write data to connection ResolvedIPv4Address(('34.126.114.186', 7687)) (ResolvedIPv4Address(('34.126.114.186', 7687)))


Step: Routing Query
Step: Routing Query to Database
Step: Generating DB Response


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `python_repl_ast` with `{'query': "df[df['Address']=='13 Orchard Road']['Shop Name'].values[0]"}`


[0m[36;1m[1;3mSingapore Outlet 9[0m[32;1m[1;3mThe supermarket name of the store located on 13 Orchard Road is "Singapore Outlet 9".[0m

[1m> Finished chain.[0m


{'query': 'what is the supermarket name of the store located on 13 orchard road',
 'database': ['Outlet_database'],
 'output': {'input': 'what is the supermarket name of the store located on 13 orchard road',
  'output': 'The supermarket name of the store located on 13 Orchard Road is "Singapore Outlet 9".'},
 'verbose': True}

In [32]:
from utils import Chains
############################################################# Graph State #############################################################
class ActionAgentGraphState(TypedDict):
    """
    Represents the state of our graph.

    Attributes:
        query: query
        database: database to route query
        verbose: print debug statements?
        output: LLM generation
        action_prompts: action prompts
        query_output: query output
    """
    query: str
    actions: str
    actions_prompts: List[str]
    query_output: List[str]
    output: str
    verbose: bool

class ActionAgent():
    def __init__(self, llm, actions_list: List[dict], database_list: List[dict]):
        self.actions_list = actions_list
        self.database_list = database_list
        self.llm = llm
        self.chains = Chains(self.llm)
        self.action_router_chain = self.chains.get_action_router_chain(self.actions_list)
        self.generate_action_prompt_chain = self.chains.get_generate_action_prompt_chain()
        self.api_extract_input_chain = self.chains.get_api_extract_input_chain()
        self.DBAgent = DBAgent(self.llm, self.database_list)

    def actions_router_node(self, state: ActionAgentGraphState):
        """
        dynamic choose which actions to take

        Args:
            state (dict): The current graph state

        Returns:
            str: Next node to call
        """
        if state['verbose']: print("Step: Choosing Actions")
        query = state['query']
        output = self.action_router_chain.invoke({"query": query})
        if output['name'] == "NA":
            if state['verbose']: print("Step: No Actions to Take")
            return {'actions': "NA"}
        else:
            selected_actions = [action for action in self.actions_list if action['action_name'] in output['name']]
            api_type_actions = [action for action in selected_actions if action['action_type'] == 'api_call']
            if len(api_type_actions) > 0:
                if state['verbose']: print("Step: Routing Query to API Call Type Actions Stage")
                api_type_actions_names = [action['action_name'] for action in api_type_actions]
                return {'actions': api_type_actions_names}
            else:
                if state['verbose']: print("Step: Routing Query to DB type Actions Prompting Stage")
                return {'actions': output['name']}
    
    def actions_router_edge(self, state: ActionAgentGraphState):
        """
        dynamic routing to database listed in database_list

        Args:
            state (dict): The current graph state

        Returns:
            str: Next node to call
        """
        if state['actions'] == "NA":
            return "general"
        else:
            selected_actions = [action for action in self.actions_list if action['action_name'] in state['actions']]
            api_type_actions = [action for action in selected_actions if action['action_type'] == 'api_call']
            if len(api_type_actions) > 0:
                return "api_type_node"
            else:
                return "generate_action_prompt"
    
    def api_type_node(self, state: ActionAgentGraphState):
        """
        Generate answer

        Args:
            state (dict): The current graph state

        Returns:
            state (dict): New key added to state, generation, that contains LLM generation
        """
        
        if state['verbose']: print("Step: Generating API Call Response")
        selected_actions = [action for action in self.actions_list if action['action_name'] in state['actions']]
        response = self.api_extract_input_chain.invoke({
            "query": state["query"],
            "action_name": selected_actions[0]['action_name'],
            "action_desc": selected_actions[0]['action_description'],
            "action_input": str(selected_actions[0]['input'])
        })
        payload = {
            "action_name": selected_actions[0]['action_name'],
            "action_desc": selected_actions[0]['action_description'],
            'extracted_inputs': response
        }
        return {"output": payload}
    
    def generate_action_prompt(self, state: ActionAgentGraphState):
        """
        Generate answer

        Args:
            state (dict): The current graph state

        Returns:
            state (dict): New key added to state, generation, that contains LLM generation
        """
        
        if state['verbose']: print("Step: Generating Action Prompt")
        print("Actions: " + str(state['actions']))
        action_prompts = []
        for action in state['actions']:
            selected_action = list(filter(lambda act: act.get('action_name') == action, self.actions_list))[0]
            
            response = self.generate_action_prompt_chain.invoke({
                "action_description": selected_action["action_description"],
                "action_input": selected_action["input"],
                "action_output": selected_action["output"],
                "query": state["query"],
            })
            action_prompts.append(response)

        return {"actions_prompts": action_prompts}
    
    def db_query(self, state: ActionAgentGraphState):
        """
        Generate answer

        Args:
            state (dict): The current graph state

        Returns:
            state (dict): New key added to state, generation, that contains LLM generation
        """
        
        if state['verbose']: print("Step: Generating DB Response")
        query_output = []
        for action_prompt in state['actions_prompts']:
            response = self.DBAgent.run_agent(action_prompt)
            query_output.append(response)

        return {"query_output": query_output}
    
    def generate_final_output(self, state: ActionAgentGraphState):
        """
        Generate answer

        Args:
            state (dict): The current graph state

        Returns:
            state (dict): New key added to state, generation, that contains LLM generation
        """
        
        if state['verbose']: print("Step: Generating Final Response")
        query_output = state["query_output"]
        output_strings = [item['output']['output'] for item in query_output]
        final_output_chain = self.chains.get_final_output_chain(output_strings)
        response = final_output_chain.invoke({"query": state["query"]})
        return {"output": response}
    
    def general(self, state: ActionAgentGraphState):
        """
        Generate answer

        Args:
            state (dict): The current graph state

        Returns:
            state (dict): New key added to state, generation, that contains LLM generation
        """
        
        if state['verbose']: print("Step: Generating General Response")
        question = state["query"]

        # Answer Generation
        print(f"Answering general question: {question}")
        return None

    def build_workflow(self):
        """
        Build the workflow for the graph
        """
        # Build the nodes
        workflow = StateGraph(ActionAgentGraphState)
        workflow.add_node("actions_router_node", self.actions_router_node)
        workflow.add_node("general", self.general)
        workflow.add_node("generate_action_prompt", self.generate_action_prompt)
        workflow.add_node("db_query", self.db_query)
        workflow.add_node("generate_final_output", self.generate_final_output)
        workflow.add_node("api_type_node", self.api_type_node)

        # Set the entry point
        workflow.set_entry_point("actions_router_node")

        # Build the edges
        workflow.add_conditional_edges(
            "actions_router_node",
            self.actions_router_edge,
            {
                "general": "general",
                "generate_action_prompt": "generate_action_prompt",
                "api_type_node": "api_type_node",
            },
        )
        workflow.add_edge("general", END)
        workflow.add_edge("generate_action_prompt", "db_query")
        workflow.add_edge("db_query", "generate_final_output")
        workflow.add_edge("generate_final_output", END)
        workflow.add_edge("api_type_node", END)

        # Compile the workflow
        
        local_agent = workflow.compile()

        return local_agent

    def run_agent(self, query, verbose=True):
        local_agent = self.build_workflow()
        output = local_agent.invoke({"query": query, "verbose": verbose})
        return output

In [33]:
ActionAgent = ActionAgent(llm, actions_list, database_list)



In [None]:
query = "Summarize the sales for Singapore Outlet 1."

output = ActionAgent.run_agent(query)

Step: Choosing Actions
Step: Routing Query to DB type Actions Prompting Stage
Step: Generating Action Prompt
Actions: ['most_product_sales_for_shop', 'most_customer_sales_for_shop']
Step: Generating DB Response
Step: Routing Query
Step: Routing Query to Database
Step: Generating DB Response


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `python_repl_ast` with `{'query': "df = df1.merge(df2, on='Product ID')\nmost_sold_product = df[df['Shop ID'] == 'SG1'].groupby('Product Name')['Quantity'].sum().idxmax()\nmost_sold_product"}`


[0m[36;1m[1;3mEau de Toilette[0m[32;1m[1;3m
Invoking: `python_repl_ast` with `{'query': "df[df['Product Name'] == 'Eau de Toilette']['Product ID'].iloc[0]"}`


[0m[36;1m[1;3mES41[0m[32;1m[1;3mThe product name that had the most sales from Singapore Outlet 1 is "Eau de Toilette". The product ID for this product is ES41.[0m

[1m> Finished chain.[0m
Step: Routing Query
Step: Routing Query to Database
Step: Generating DB Respon

In [35]:
output['output']

{'action_name': 'create_jira_issue',
 'action_desc': 'Creates a Jira Issue with the given details.',
 'extracted_inputs': {'issue title': 'Bug in the system',
  'issue description': 'The system is not responding',
  'issue type': '',
  'issue priority': ''}}

In [34]:
query = "Help me create a Jira Issue with the title 'Bug in the system' and description 'The system is not responding'."

output = ActionAgent.run_agent(query)

Step: Choosing Actions
Step: Routing Query to API Call Type Actions Stage
Step: Generating API Call Response


In [None]:
query = "Hi, how are you?"

output = ActionAgent.run_agent(query)

Step: Choosing Actions
Step: No Actions to Take
Step: Generating General Response
Answering general question: Hi, how are you?


: 