### Import libraries and configure settings

In [None]:
import os
from dotenv import load_dotenv
from pprint import pprint

from langchain.chains.qa_with_sources.retrieval import RetrievalQAWithSourcesChain
from langchain_neo4j.vectorstores.neo4j_vector import Neo4jVector
from langchain_neo4j.chains.graph_qa.cypher import GraphCypherQAChain
from langchain.prompts import PromptTemplate

from src.factory.llm import fetch_llm
from src.config import Source, ChunkerConf, LLMConf, EmbedderConf, KnowledgeGraphConfig
from src.graph.knowledge_graph import KnowledgeGraph
from src.ingestion.embedder import ChunkEmbedder

env = load_dotenv('config.env', override=True)

In [6]:
kg_config = KnowledgeGraphConfig(
    uri=os.getenv("NEO4J_URI"),
    user=os.getenv("NEO4J_USERNAME"),
    password=os.getenv("NEO4J_PASSWORD"),
    index_name="vector"
)

chunker_conf = ChunkerConf(
    type="recursive",
    chunk_size=1000,
    chunk_overlap=100
)

llm_conf = LLMConf(
    model=os.getenv("AZURE_OPENAI_LLM_MODEL_NAME"),
    temperature=0,
    type="azure-openai",
    deployment=os.getenv("AZURE_OPENAI_LLM_DEPLOYMENT_NAME"),
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    endpoint=os.getenv("AZURE_OPENAI_LLM_ENDPOINT"),
    api_version=os.getenv("AZURE_OPENAI_LLM_VERSION"),
)

embedder_conf = EmbedderConf(
    model=os.getenv("AZURE_OPENAI_EMBEDDING_MODEL_NAME"),
    type="azure-openai",
    deployment=os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME"),
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    endpoint=os.getenv("AZURE_OPENAI_EMBEDDING_ENDPOINT"),
    api_version=os.getenv("AZURE_OPENAI_EMBEDDING_VERSION"),
)

In [7]:
embedder = ChunkEmbedder(conf=embedder_conf)
knowledge_graph = KnowledgeGraph(
    conf=kg_config, 
    embeddings_model=embedder.embeddings
)
knowledge_graph._driver.verify_connectivity()
knowledge_graph._driver.verify_authentication()

2026-01-13 10:37:05,140 - src.ingestion.embedder - INFO - Embedder of type 'ModelType.AZURE_OPENAI' initialized.


True

### Query against both vector store and knowledge graph

In [31]:
# Define the same query to test against
query = "Where was Lucy born?"

### Vanilla RAG (aka not using graph)

In [32]:
prompt="""
You are a helpful assistant. Write clear, natural, and concise answers.
Ground your response primarily in the provided CONTEXT. If the context is insufficient
to answer the question, say so briefly and indicate what's missing. Follow safety and
content policies at all times.

CONTEXT:
{context}

QUESTION:
{question}

Instructions:
- Use only what's relevant from the CONTEXT to answer the QUESTION.
- Do not invent facts beyond the CONTEXT.
- If needed, say you don't have enough information.
- Write in a friendly, human tone.
"""
template=PromptTemplate.from_template(prompt)
template.input_variables=["context", "question"]

retriever = knowledge_graph.vector_store.as_retriever(
    search_type="similarity", 
    search_kwargs={"k":5}
)

llm = fetch_llm(conf=llm_conf)

response = llm.invoke(
    input=prompt.format(context=retriever.invoke(query), question=query), 
)
print(response.content)

2026-01-13 10:55:08,323 - src.factory.llm - INFO - Fetching LLM model 'gpt-4o-mini'..
2026-01-13 10:55:08,908 - src.factory.llm - INFO - Initialized LLM of type: 'ModelType.AZURE_OPENAI'


I'm sorry, but the provided context does not mention anyone named Lucy or where she was born. It only discusses Marco Rossi and his background in Rome, Italy. If you have more information or another question, feel free to ask!


### CQL Generation

In [33]:
from langchain_neo4j import Neo4jGraph, GraphCypherQAChain

In [34]:
prompt = """
Your task is to rephrase a user's question based on the schema of a graph database that will be given to you. 

Do not mention anything else, just rephrase the question from the user to be as ccherent as possible with the schema of the graph.
Do not make things up or add any information on your own. 

SCHEMA: {schema}
QUESTION: {question}

REPHRASED_QUESTION: 
"""
template=PromptTemplate.from_template(prompt)
template.input_variables = ['schema', 'question']
template.partial_variables = {"schema": knowledge_graph.get_structured_schema}

In [35]:
rephrased_question = llm.invoke(input=template.format(question=query)).content
rephrased_question

'In which city or country was Lucy born?'

In [36]:
graph_chain = GraphCypherQAChain.from_llm(
    llm=llm, 
    graph=knowledge_graph, 
    verbose=True,
    allow_dangerous_requests=True,
    validate_cypher=True
)
graph_chain.invoke(rephrased_question)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (p:Person {name: 'Lucy'})-[:BORN_IN]->(location)
RETURN location.name AS birthplace
[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m


{'query': 'In which city or country was Lucy born?',
 'result': "I don't know the answer."}