# Query via RAG 

In [1]:
from dotenv import load_dotenv
from IPython.display import display, Markdown, Latex
from langchain_openai import ChatOpenAI
from langchain.prompts import PromptTemplate, ChatPromptTemplate
from langchain_core.output_parsers import JsonOutputParser, StrOutputParser
from langchain_community.tools import DuckDuckGoSearchRun, BingSearchRun
from langchain_community.utilities import DuckDuckGoSearchAPIWrapper, BingSearchAPIWrapper
from langchain_core.tools import tool
from langchain_core.pydantic_v1 import BaseModel, Field
from langgraph.graph import END, StateGraph
from typing_extensions import TypedDict
import os
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.prompts.prompt import PromptTemplate
from typing import Tuple, List, Optional
from langchain_core.messages import AIMessage, HumanMessage
from langchain_community.graphs import Neo4jGraph
from langchain.text_splitter import TokenTextSplitter
from neo4j import GraphDatabase
from langchain_community.vectorstores import Neo4jVector
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores.neo4j_vector import remove_lucene_chars

from langchain_core.runnables import (
    RunnableBranch,
    RunnableLambda,
    RunnableParallel,
    RunnablePassthrough,
)

In [2]:
# Initialise the LLM and neo4j graph
load_dotenv()

llm = ChatOpenAI(temperature=0, model="gpt-4o")
llm_json = ChatOpenAI(model="gpt-4o", temperature=0, model_kwargs={"response_format": {"type": "json_object"}})


graph = Neo4jGraph()

In [24]:
vector_index = Neo4jVector.from_existing_graph(
    OpenAIEmbeddings(),
    search_type="hybrid",
    node_label="Document",
    text_node_properties=["text"],
    embedding_node_property="embedding"
)


class Entities(BaseModel):
    """Identifying information about entities."""

    names: List[str] = Field(
        ...,
        description="All the person, object, location, or event entities that "
        "appear in the text",
    )

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are extracting person, object, location, or event entities from the text.",
        ),
        (
            "human",
            "Use the given format to extract information from the following "
            "input: {question}",
        ),
    ]
)

entity_chain = prompt | llm.with_structured_output(Entities)

def generate_full_text_query(input: str) -> str:
    """
    Generate a full-text search query for a given input string.

    This function constructs a query string suitable for a full-text search.
    It processes the input string by splitting it into words and appending a
    similarity threshold (~2 changed characters) to each word, then combines
    them using the AND operator. Useful for mapping entities from user questions
    to database values, and allows for some misspelings.
    """
    full_text_query = ""
    words = [el for el in remove_lucene_chars(input).split() if el]
    for word in words[:-1]:
        full_text_query += f" {word}~2 AND"
    full_text_query += f" {words[-1]}~2"
    return full_text_query.strip()

# Fulltext index query
def structured_retriever(question: str) -> str:
    """
    Collects the neighborhood of entities mentioned
    in the question
    """
    result = ""
    entities = entity_chain.invoke({"question": question})
    for entity in entities.names:
        response = graph.query(
            """
            CALL db.index.fulltext.queryNodes('keyword', $query, {limit: 2})
            YIELD node, score
            CALL {
              WITH node
              MATCH (node)-[r:!MENTIONS]->(neighbor)
              RETURN node.id + ' - ' + type(r) + ' -> ' + neighbor.id AS output
              UNION ALL
              WITH node
              MATCH (node)<-[r:!MENTIONS]-(neighbor)
              RETURN neighbor.id + ' - ' + type(r) + ' -> ' + node.id AS output
            }
            RETURN output LIMIT 50
            """,
            {"query": generate_full_text_query(entity)},
        )
        result += "\n".join([el['output'] for el in response])
    
    return result

def retriever(question: str):
    structured_data = structured_retriever(question)
    unstructured_data = [el.page_content for el in vector_index.similarity_search(question)]
    final_data = f"""Structured data:
        {structured_data}
        Unstructured data:
        {"#Document ". join(unstructured_data)}
    """
    return final_data

_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question,
in its original language.
Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""  # noqa: E501
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)

def _format_chat_history(chat_history: List[Tuple[str, str]]) -> List:
    buffer = []
    for human, ai in chat_history:
        buffer.append(HumanMessage(content=human))
        buffer.append(AIMessage(content=ai))
    return buffer

_search_query = RunnableBranch(
    # If input includes chat_history, we condense it with the follow-up question
    (
        RunnableLambda(lambda x: bool(x.get("chat_history"))).with_config(
            run_name="HasChatHistoryCheck"
        ),  # Condense follow-up question and chat into a standalone_question
        RunnablePassthrough.assign(
            chat_history=lambda x: _format_chat_history(x["chat_history"])
        )
        | CONDENSE_QUESTION_PROMPT
        | ChatOpenAI(temperature=0)
        | StrOutputParser(),
    ),
    # Else, we have no chat history, so just pass through the question
    RunnableLambda(lambda x : x["question"]),
)

template = """Answer the question based only on the following context:
{context}

Question: {question}
Use natural language and be as elaborate as possible.
Answer:"""
prompt = ChatPromptTemplate.from_template(template)

chain = (
    RunnableParallel(
        {
            "context": _search_query | retriever,
            "question": RunnablePassthrough(),
        }
    )
    | prompt
    | llm
    | StrOutputParser()
)

def invoke_chain(question: str, chat_history):
    print("invoke chain called")
    graph.query(
        "CREATE FULLTEXT INDEX entity IF NOT EXISTS FOR (e:__Entity__) ON EACH [e.id]")
    print("fulltext index created")
    if chat_history:
        return chain.invoke(
            {
                "question": question,
                "chat_history": chat_history
            }
        )
    else:
        return chain.invoke(
            {
                "question": question,
            }
        )
    
########################################################### Web Search Tool ###########################################################
wrapper = DuckDuckGoSearchAPIWrapper(max_results=25)
web_search_tool = DuckDuckGoSearchRun(api_wrapper=wrapper)

########################################################### Query Transformation ###########################################################
query_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", """
            You are an expert at crafting web search queries for research questions.
            More often than not, a user will ask a basic question that they wish to learn more about, however it might not be in the best format. 
            Reword their query to be the most effective web search string possible.
            Return the JSON with a single key 'query' with no premable or explanation. 
            
            Question to transform: {question} 
         """)
    ]
)

# Chain
query_chain = query_prompt | llm | JsonOutputParser()

############################################################# Graph State #############################################################
class GraphState(TypedDict):
    """
    Represents the state of our graph.

    Attributes:
        question: question
        generation: LLM generation
        search_query: revised question for web search
        context: web_search result
    """
    question : str
    generation : str
    search_query : str
    context : str

# Node - Generate

def generate(state):
    """
    Generate answer

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, generation, that contains LLM generation
    """
    
    print("Step: Generating Final Response")
    question = state["question"]

    # Answer Generation
    generation = invoke_chain(question, None)
    return {"generation": generation}

# Node - Query Transformation

def transform_query(state):
    """
    Transform user question to web search

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Appended search query
    """
    
    print("Step: Optimizing Query for Web Search")
    question = state['question']
    gen_query = query_chain.invoke({"question": question})
    search_query = gen_query["query"]
    return {"search_query": search_query}


# Node - Web Search

def web_search(state):
    """
    Web search based on the question

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Appended web results to context
    """

    search_query = state['search_query']
    print(f'Step: Searching the Web for: "{search_query}"')
    
    # Web search tool call
    search_result = web_search_tool.invoke(search_query)
    return {"context": search_result}
    
    # let the function do nothing and return the same state
    # return state
    


# Conditional Edge, Routing

def route_question(state):
    """
    route question to web search or generation.

    Args:
        state (dict): The current graph state

    Returns:
        str: Next node to call
    """

    print("Step: Routing Query")
    question = state['question']
    structured_data = structured_retriever(question)
    
    if len(structured_data) != 0:
        print("Step: Context Found, Routing to Generation")
        return "generate"
    elif len(structured_data) == 0:
        print("Step: Context Not Found, Routing to Web Search")
        return "websearch"
    
def build_workflow():
    """
    Build the workflow for the graph
    """
    # Build the nodes
    workflow = StateGraph(GraphState)
    workflow.add_node("websearch", web_search)
    workflow.add_node("transform_query", transform_query)
    workflow.add_node("generate", generate)

    # Build the edges
    workflow.set_conditional_entry_point(
        route_question,
        {
            "websearch": "transform_query",
            "generate": "generate",
        },
    )
    workflow.add_edge("transform_query", "websearch")
    workflow.add_edge("websearch", "generate")
    workflow.add_edge("generate", END)

    # Compile the workflow
    local_agent = workflow.compile()

    return local_agent

def run_agent(query, local_agent):
    output = local_agent.invoke({"question": query})
    print("=======")
    display(Markdown(output["generation"]))



In [26]:
# Test it out!
local_agent = build_workflow()
test_query = "What are some examples of novel initiatives kickstarted by governments regarding public transportation? Please list a few, together with their respective cities."
run_agent(test_query, local_agent)

Step: Routing Query
Step: Context Not Found, Routing to Web Search
Step: Optimizing Query for Web Search
Step: Searching the Web for: "recent government initiatives in public transportation with examples and cities"
Step: Generating Final Response
invoke chain called
fulltext index created


Several cities have introduced novel initiatives to enhance public transportation, focusing on sustainability, efficiency, and citizen well-being. Here are a few examples:

1. **Solaris - "Eco-Rail" Metro System and "Cycle Share" Bike-Sharing Program**:
   - **Eco-Rail Metro System**: Solaris has developed an extensive metro system that prioritizes eco-friendly mobility. This initiative has encouraged more active lifestyles among residents, leading to a 25% increase in the number of people commuting by public transit or cycling.
   - **Cycle Share Bike-Sharing Program**: Complementing the metro system, Solaris introduced a bike-sharing program that has further promoted sustainable transportation. This program has contributed to a significant reduction in air pollution and improved public health, with a 15% drop in respiratory-related illnesses.

2. **Elysia - Solar-Powered Streetlights**:
   - **Solar-Powered Streetlights**: Elysia has replaced conventional streetlights with solar-powered alternatives. These streetlights collect sunlight during the day and store energy in batteries to illuminate the city at night. This initiative not only reduces energy consumption but also engages local schools in the design and installation process, fostering community involvement and education.

3. **Utopolis - Solar-Powered Streetlights and Rainwater Harvesting**:
   - **Solar-Powered Streetlights**: Similar to Elysia, Utopolis has implemented solar-powered streetlights throughout the city. These streetlights are strategically placed to ensure adequate illumination in streets, parks, and public areas, promoting sustainable lighting solutions.
   - **Rainwater Harvesting**: Utopolis has also introduced rainwater harvesting systems in public buildings. This initiative reduces water usage by collecting and storing rainwater for non-potable purposes such as irrigation and cleaning, demonstrating a commitment to sustainable water management.

These initiatives reflect a broader trend among cities to innovate and invest in sustainable public transportation and infrastructure, ultimately enhancing the quality of life for their residents.