In [1]:
%pip install --upgrade --quiet  langchain langchain-community langchain-openai langchain-experimental neo4j wikipedia tiktoken yfiles_jupyter_graphs

Note: you may need to restart the kernel to use updated packages.


In [2]:
from langchain_core.runnables import (
    RunnableBranch,
    RunnableLambda,
    RunnableParallel,
    RunnablePassthrough,
)
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.prompts.prompt import PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from typing import Tuple, List, Optional
from langchain_core.messages import AIMessage, HumanMessage
from langchain_core.output_parsers import StrOutputParser
import os
from langchain_community.graphs import Neo4jGraph
from langchain.document_loaders import WikipediaLoader
from langchain.text_splitter import TokenTextSplitter
from langchain_openai import ChatOpenAI
from langchain_experimental.graph_transformers import LLMGraphTransformer
from neo4j import GraphDatabase
from yfiles_jupyter_graphs import GraphWidget
from langchain_community.vectorstores import Neo4jVector
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores.neo4j_vector import remove_lucene_chars
from langchain_core.runnables import ConfigurableField, RunnableParallel, RunnablePassthrough

try:
  import google.colab
  from google.colab import output
  output.enable_custom_widget_manager()
except:
  pass

In [3]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ["NEO4J_URI"] = "neo4j://localhost:7687"
os.environ["NEO4J_USERNAME"] = "neo4j"
os.environ["NEO4J_PASSWORD"] = "Willthe1!"
os.environ['OPENAI_API_KEY']

graph = Neo4jGraph()

# Data Ingestion

In [5]:
# Read the wikipedia article
raw_documents = WikipediaLoader(query="Elizabeth I").load()
# Define chunking strategy
text_splitter = TokenTextSplitter(chunk_size=512, chunk_overlap=24)
documents = text_splitter.split_documents(raw_documents[:3])





  lis = BeautifulSoup(html).find_all('li')


In [6]:
llm=ChatOpenAI(temperature=0, model_name="gpt-4o") # gpt-4-0125-preview occasionally has issues
llm_transformer = LLMGraphTransformer(llm=llm)

graph_documents = llm_transformer.convert_to_graph_documents(documents)
graph.add_graph_documents(
    graph_documents,
    baseEntityLabel=True,
    include_source=True
)

In [19]:
vector_index = Neo4jVector.from_existing_graph(
    OpenAIEmbeddings(),
    search_type="hybrid",
    node_label="Document",
    text_node_properties=["text"],
    embedding_node_property="embedding"
)

In [7]:
# Retriever

graph.query(
    "CREATE FULLTEXT INDEX entity IF NOT EXISTS FOR (e:__Entity__) ON EACH [e.id]")

# Extract entities from text
class Entities(BaseModel):
    """Identifying information about entities."""

    names: List[str] = Field(
        ...,
        description="All the person, organization, or business entities that "
        "appear in the text",
    )

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are extracting organization and person entities from the text.",
        ),
        (
            "human",
            "Use the given format to extract information from the following "
            "input: {question}",
        ),
    ]
)

entity_chain = prompt | llm.with_structured_output(Entities)

In [10]:
entity_chain.invoke({"question": "Where was Amelia Earhart born?"}).names

['Amelia Earhart']

In [13]:
def generate_full_text_query(input: str) -> str:
    """
    Generate a full-text search query for a given input string.

    This function constructs a query string suitable for a full-text search.
    It processes the input string by splitting it into words and appending a
    similarity threshold (~2 changed characters) to each word, then combines
    them using the AND operator. Useful for mapping entities from user questions
    to database values, and allows for some misspelings.
    """
    full_text_query = ""
    words = [el for el in remove_lucene_chars(input).split() if el]
    for word in words[:-1]:
        full_text_query += f" {word}~2 AND"
    full_text_query += f" {words[-1]}~2"
    print(f'generate full text query: {full_text_query.strip()}')
    return full_text_query.strip()

# Fulltext index query
def structured_retriever(question: str) -> str:
    """
    Collects the neighborhood of entities mentioned
    in the question
    """
    result = ""
    entities = entity_chain.invoke({"question": question})
    for entity in entities.names:
        response = graph.query(
            """CALL db.index.fulltext.queryNodes('entity', $query, {limit:2})
            YIELD node,score
            CALL {
              WITH node
              MATCH (node)-[r:!MENTIONS]->(neighbor)
              RETURN node.id + ' - ' + type(r) + ' -> ' + neighbor.id AS output
              UNION ALL
              WITH node
              MATCH (node)<-[r:!MENTIONS]-(neighbor)
              RETURN neighbor.id + ' - ' + type(r) + ' -> ' +  node.id AS output
            }
            RETURN output LIMIT 50
            """,
            {"query": generate_full_text_query(entity)},
        )
        result += "\n".join([el['output'] for el in response])
        print(f"structured_retriever result: {result}")
    return result

In [14]:
print(structured_retriever("Who is Elizabeth I?"))

generate full text query: Elizabeth~2 AND I~2
structured_retriever result: Elizabeth I - INTERACTED_WITH -> France
Elizabeth I - INTERACTED_WITH -> Netherlands
Elizabeth I - AIDED_BY -> Sir Francis Walsingham
Elizabeth I - QUEEN_OF -> England
Elizabeth I - SIBLING_OF -> Edward Vi
Elizabeth I - SUCCEEDED_BY -> James Vi Of Scotland
Elizabeth I - QUEEN_OF -> Ireland
Elizabeth I - SUPREME_GOVERNOR_OF -> Church Of England
Elizabeth I - ADVISED_BY -> William Cecil
Elizabeth I - SIBLING_OF -> Mary I
Elizabeth I - INTERACTED_WITH -> Spain
Elizabeth I - PARTICIPANT -> Elizabethan Religious Settlement
Elizabeth I - ADVISOR -> Sir Francis Walsingham
Elizabeth I - SUCCESSOR -> James Vi Of Scotland
Elizabeth I - SUPREME_GOVERNOR -> Church Of England
Elizabeth I - ADVISOR -> William Cecil
Elizabeth I - SIBLING -> Mary
Elizabeth I - SIBLING -> Edward Vi
Elizabeth I - CHILD_OF -> Anne Boleyn
Elizabeth I - CHILD_OF -> Henry Viii
Elizabeth I - MEMBER_OF -> House Of Tudor
Elizabeth I - QUEEN -> Ireland
E

In [15]:
def retriever(question: str):
    print(f"Search query: {question}")
    structured_data = structured_retriever(question)
    unstructured_data = [el.page_content for el in vector_index.similarity_search(question)]
    final_data = f"""Structured data:
{structured_data}
Unstructured data:
{"#Document ". join(unstructured_data)}
    """
    return final_data

In [16]:
# Condense a chat history and follow-up question into a standalone question
_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question,
in its original language.
Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""  # noqa: E501
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)

def _format_chat_history(chat_history: List[Tuple[str, str]]) -> List:
    buffer = []
    for human, ai in chat_history:
        buffer.append(HumanMessage(content=human))
        buffer.append(AIMessage(content=ai))
    return buffer

_search_query = RunnableBranch(
    # If input includes chat_history, we condense it with the follow-up question
    (
        RunnableLambda(lambda x: bool(x.get("chat_history"))).with_config(
            run_name="HasChatHistoryCheck"
        ),  # Condense follow-up question and chat into a standalone_question
        RunnablePassthrough.assign(
            chat_history=lambda x: _format_chat_history(x["chat_history"])
        )
        | CONDENSE_QUESTION_PROMPT
        | ChatOpenAI(temperature=0)
        | StrOutputParser(),
    ),
    # Else, we have no chat history, so just pass through the question
    RunnableLambda(lambda x : x["question"]),
)

In [17]:
template = """Answer the question based only on the following context:
{context}

Question: {question}
Use natural language and be concise.
Answer:"""
prompt = ChatPromptTemplate.from_template(template)

chain = (
    RunnableParallel(
        {
            "context": _search_query | retriever,
            "question": RunnablePassthrough(),
        }
    )
    | prompt
    | llm
    | StrOutputParser()
)

In [20]:
chain.invoke({"question": "Which house did Elizabeth I belong to?"})

Search query: Which house did Elizabeth I belong to?
generate full text query: Elizabeth~2 AND I~2
structured_retriever result: Elizabeth I - INTERACTED_WITH -> France
Elizabeth I - INTERACTED_WITH -> Netherlands
Elizabeth I - AIDED_BY -> Sir Francis Walsingham
Elizabeth I - QUEEN_OF -> England
Elizabeth I - SIBLING_OF -> Edward Vi
Elizabeth I - SUCCEEDED_BY -> James Vi Of Scotland
Elizabeth I - QUEEN_OF -> Ireland
Elizabeth I - SUPREME_GOVERNOR_OF -> Church Of England
Elizabeth I - ADVISED_BY -> William Cecil
Elizabeth I - SIBLING_OF -> Mary I
Elizabeth I - INTERACTED_WITH -> Spain
Elizabeth I - PARTICIPANT -> Elizabethan Religious Settlement
Elizabeth I - ADVISOR -> Sir Francis Walsingham
Elizabeth I - SUCCESSOR -> James Vi Of Scotland
Elizabeth I - SUPREME_GOVERNOR -> Church Of England
Elizabeth I - ADVISOR -> William Cecil
Elizabeth I - SIBLING -> Mary
Elizabeth I - SIBLING -> Edward Vi
Elizabeth I - CHILD_OF -> Anne Boleyn
Elizabeth I - CHILD_OF -> Henry Viii
Elizabeth I - MEMBER_

'Elizabeth I belonged to the House of Tudor.'

In [21]:
chain.invoke(
    {
        "question": "When was she born?",
        "chat_history": [("Which house did Elizabeth I belong to?", "House Of Tudor")],
    }
)

Search query: When was Elizabeth I born?
generate full text query: Elizabeth~2 AND I~2
structured_retriever result: Elizabeth I - INTERACTED_WITH -> France
Elizabeth I - INTERACTED_WITH -> Netherlands
Elizabeth I - AIDED_BY -> Sir Francis Walsingham
Elizabeth I - QUEEN_OF -> England
Elizabeth I - SIBLING_OF -> Edward Vi
Elizabeth I - SUCCEEDED_BY -> James Vi Of Scotland
Elizabeth I - QUEEN_OF -> Ireland
Elizabeth I - SUPREME_GOVERNOR_OF -> Church Of England
Elizabeth I - ADVISED_BY -> William Cecil
Elizabeth I - SIBLING_OF -> Mary I
Elizabeth I - INTERACTED_WITH -> Spain
Elizabeth I - PARTICIPANT -> Elizabethan Religious Settlement
Elizabeth I - ADVISOR -> Sir Francis Walsingham
Elizabeth I - SUCCESSOR -> James Vi Of Scotland
Elizabeth I - SUPREME_GOVERNOR -> Church Of England
Elizabeth I - ADVISOR -> William Cecil
Elizabeth I - SIBLING -> Mary
Elizabeth I - SIBLING -> Edward Vi
Elizabeth I - CHILD_OF -> Anne Boleyn
Elizabeth I - CHILD_OF -> Henry Viii
Elizabeth I - MEMBER_OF -> House 

'Elizabeth I was born on 7 September 1533.'

In [22]:
chain.invoke({"question": "Where was Elizabeth born?"})

Search query: Where was Elizabeth born?
generate full text query: Elizabeth~2
structured_retriever result: Elizabeth - CHILD_OF -> Ann
Elizabeth - CHILD_OF -> Henry Viii
Elizabeth - CHILD_OF -> Peter The Great
Elizabeth - CHILD_OF -> Catherine
Elizabeth - SIBLING -> Alexei
Elizabeth - SUPPORTED_MILITARY_CAMPAIGN -> Ireland
Elizabeth - SUPPORTED_MILITARY_CAMPAIGN -> France
Elizabeth - SUPPORTED_MILITARY_CAMPAIGN -> Spain
Elizabeth - SUPPORTED_MILITARY_CAMPAIGN -> Netherlands
Elizabeth - REIGN -> Elizabethan Era
Elizabeth - ALIAS -> Gloriana
Elizabeth - ALIAS -> Good Queen Bess
Elizabeth - ALIAS -> Empress Of Russia
Elizabeth - ALIAS -> Elizaveta Petrovna
Elizabeth - BORN_IN -> Greenwich Palace
Elizabeth - BORN_IN -> Kolomenskoye
Elizabeth - NAMED_AFTER -> Elizabeth Of York
Elizabeth - NAMED_AFTER -> Lady Elizabeth Howard
Elizabeth - CHILD -> Catherine I Of Russia
Elizabeth - CHILD -> Tsar Peter The Great
Elizabeth - AUNT -> Peter Iii
Elizabeth - AUNT -> Peter Ii
Elizabeth - COUSIN -> An

'Elizabeth was born at Greenwich Palace and Kolomenskoye.'