In [8]:
pip install -r requirements.txt

Collecting langchain-community>=0.3.27 (from -r requirements.txt (line 3))
  Using cached langchain_community-0.3.27-py3-none-any.whl.metadata (2.9 kB)
Collecting langchain-experimental>=0.3.4 (from -r requirements.txt (line 4))
  Downloading langchain_experimental-0.3.4-py3-none-any.whl.metadata (1.7 kB)
Collecting wikipedia>=1.4.0 (from -r requirements.txt (line 5))
  Downloading wikipedia-1.4.0.tar.gz (27 kB)
  Preparing metadata (setup.py) ... [?25ldone
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community>=0.3.27->-r requirements.txt (line 3))
  Using cached dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community>=0.3.27->-r requirements.txt (line 3))
  Using cached pydantic_settings-2.10.1-py3-none-any.whl.metadata (3.4 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community>=0.3.27->-r requirements.txt (line 3))
  Using cached httpx_sse-0.4.1-py3-none-any.whl.metadata (9.4 kB)
Collecting 

In [2]:
pip install neo4j

Note: you may need to restart the kernel to use updated packages.


In [8]:
import os
from neo4j import GraphDatabase
from langchain_groq import ChatGroq
from langchain_neo4j  import Neo4jGraph
from dotenv import load_dotenv

load_dotenv(override=True)

from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_community.document_loaders import WikipediaLoader
from langchain.text_splitter import TokenTextSplitter

from langchain_community.vectorstores.neo4j_vector import remove_lucene_chars
from langchain_core.prompts import ChatPromptTemplate

In [59]:
kg = Neo4jGraph(
    url="neo4j://127.0.0.1:7687", username="neo4j", password="user@123", enhanced_schema=True, 
)

TypeError: Neo4jGraph.__init__() got an unexpected keyword argument 'database_config'

# Wipe Out The DB

In [40]:
kg.query("MATCH(n) DETACH DELETE n")

[]

# Initialize LLM

In [42]:
llm = ChatGroq(temperature=0, groq_api_key=os.environ['GROQ_KEY'], model_name="llama-3.3-70b-versatile")

In [43]:
raw_documents = WikipediaLoader(query="Ramayana").load()
text_splitter = TokenTextSplitter(chunk_size=512, chunk_overlap=24)
documents = text_splitter.split_documents(raw_documents[:3])



  lis = BeautifulSoup(html).find_all('li')


# Generate Knowledge Graph

In [44]:
transformer = LLMGraphTransformer(llm=llm)
graph_documents = transformer.convert_to_graph_documents(documents)

# Ingest Data To KG

In [45]:
kg.add_graph_documents(
  graph_documents, 
  baseEntityLabel=True, 
  include_source=True
)

# Create FULL TEXT INDEX

In [46]:
kg.query(
    "CREATE FULLTEXT INDEX entity IF NOT EXISTS FOR (e:__Entity__) ON EACH [e.id]")

[]

# Schema Visualization

In [56]:
print(kg.schema)

Node properties:
- **Document**
  - `id`: STRING Available options: ['95376fe4974ad6af7ed541292732f6e6', '943f711142ed09016566c94df41c4289', '077c764928f6df391f2bc56c8fb1fb60', '16bf162388d87cdbbf595b5e856b3857', 'c19c9b670ba929dd23fa7f760fc29b77', '98926dcad461da97ee60f6dfa7a3dbfa', '692722004c6121da2b2db5da2984e8fe', 'c9ced462c75058643845b3a325db5f33']
  - `text`: STRING Available options: [' Buddhist. Its most important moral influence was ', ' narrative contradictions between these two volume', 'Ramayana: The Legend of Prince Rama is a 1993 anim', ' Bulbul Mukherjee – Dasharatha Madhulika Varma – K', ' B. B. Lal near Allahabad in Uttar Pradesh (India)', 'Rama (; Sanskrit: राम, IAST: Rāma, Sanskrit: [ˈraː', ' significantly from the Hindu versions. Jain Texts', 'The Ramayana (; Sanskrit: रामायणम्, romanized: Rām']
  - `summary`: STRING Available options: ['The Ramayana (; Sanskrit: रामायणम्, romanized: Rām', 'Ramayana: The Legend of Prince Rama is a 1993 anim', 'Rama (; Sanskrit: राम

In [48]:
from pydantic import BaseModel, Field
from typing import List

class Entities(BaseModel):
    """Identifying information about entities."""

    names: List[str] = Field(
        ...,
        description="All the person, organization, or business entities that "
        "appear in the text",
    )

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are extracting organization and person entities from the text.",
        ),
        (
            "human",
            "Use the given format to extract information from the following "
            "input: {question}",
        ),
    ]
)

entity_chain = prompt | llm.with_structured_output(Entities)

In [49]:
def generate_full_text_query(input: str) -> str:
    """
    Generate a full-text search query for a given input string.

    This function constructs a query string suitable for a full-text search.
    It processes the input string by splitting it into words and appending a
    similarity threshold (~2 changed characters) to each word, then combines 
    them using the AND operator. Useful for mapping entities from user questions
    to database values, and allows for some misspelings.
    """
    full_text_query = ""
    words = [el for el in remove_lucene_chars(input).split() if el]
    for word in words[:-1]:
        full_text_query += f" {word}~2 AND"
    full_text_query += f" {words[-1]}~2"
    return full_text_query.strip()

In [50]:
def structured_retriever(question: str) -> str:
    """
    Collects the neighborhood of entities mentioned
    in the question
    """
    result = ""
    entities = entity_chain.invoke({"question": question})
    for entity in entities.names:
        response = kg.query(
            """CALL db.index.fulltext.queryNodes('entity', $query, {limit:2})
            YIELD node,score
            CALL {
              MATCH (node)-[r:!MENTIONS]->(neighbor)
              RETURN node.id + ' - ' + type(r) + ' -> ' + neighbor.id AS output
              UNION
              MATCH (node)<-[r:!MENTIONS]-(neighbor)
              RETURN neighbor.id + ' - ' + type(r) + ' -> ' +  node.id AS output
            }
            RETURN output
            """,
            {"query": generate_full_text_query(entity)},
        )
        result += "\n".join([el['output'] for el in response])
    return result

In [51]:
def retriever(question: str):
    print(f"Search query: {question}")
    structured_data = structured_retriever(question)
    final_data = f"""Structured data:
{structured_data}
    """
    return final_data

In [52]:
def answer_question(question: str):
    context = retriever(question)
    template = """Answer the question based only on the following context:
{context}
Question: {question}"""
    prompt = ChatPromptTemplate.from_messages([
        ("system", "You are to convert structured data into a clean, human-readable answer. Also print the relevent structure data you are refering from the context."),
        ("human", template),
    ])
    messages = prompt.format_messages(context=context, question=question)
    response = llm(messages)
    return response.content

In [58]:
print(answer_question("how is Kritarth Trivedi related to Rama"))
print(answer_question("who is the author of ramayana"))

Search query: how is Kritarth Trivedi related to Rama




There is no information in the provided structured data about Kritarth Trivedi or their relation to Rama. 

Relevant structured data: None
Search query: who is the author of ramayana




The author of the Ramayana is Valmiki.

Relevant structured data:
Valmiki - AUTHOR -> Ramayana
