# Q&A Agent With Graph DB

Building a Question and Answering application over a Graph Database

# Setup 

In [None]:
from dotenv import load_dotenv
import os

load_dotenv()

## Openai
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")


# Nvidia
os.environ["NVIDIA_API_KEY"] = os.getenv("NVIDIA_API_KEY")

## langsmith tracking
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = os.getenv("LANGCHAIN_PROJECT")

## Huggingface
os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN")

## Neo4j
os.environ["NEO4J_URI"] = os.getenv("NEO4J_URI")
os.environ["NEO4J_USERNAME"] = os.getenv("NEO4J_USERNAME")
os.environ["NEO4J_PASSWORD"] = os.getenv("NEO4J_PASSWORD")


In [7]:
from langchain_community.graphs import Neo4jGraph

graph = Neo4jGraph(
    url=os.environ["NEO4J_URI"],
    username=os.environ["NEO4J_USERNAME"],
    password=os.environ["NEO4J_PASSWORD"],
)

## Import dataset

I woul like to cite and thanks [Tomaz Bratanic](https://github.com/tomasonjo) -- Graph ML and GenAI research at Neo4j -- to provide your public [datasets](https://github.com/tomasonjo/blog-datasets).

In [12]:
## Dataset Movie reviews

movie_query = """
LOAD CSV WITH HEADERS FROM 'https://raw.githubusercontent.com/tomasonjo/blog-datasets/main/movies/movies_small.csv' AS row

MERGE (m:Movie{movieId: toInteger(row.movieId)})
SET m.title = row.title,
    m.release = datetime(row.release),
    m.imdbRating = toFloat(row.imdbRating)

FOREACH (director IN split(row.director, '|') |
    MERGE (d:Director{name: trim(director)})
    MERGE (m)-[:DIRECTED_BY]->(d)
    MERGE (d)-[:DIRECTOR_OF]->(m)
)

FOREACH (actor IN split(row.actors, '|') |
    MERGE (a:Actor{name: trim(actor)})
    MERGE (a)-[:ACTOR_OF]->(m)
    MERGE (m)-[:HAS_ACTOR]->(a)
)

FOREACH (genre IN split(row.genres, '|') |
    MERGE (g:Genre{name: trim(genre)})
    MERGE (m)-[:HAS_GENRE]->(g)
    MERGE (g)-[:GENRE_OF]->(m)
)
"""


In [13]:
graph.query(movie_query)

Failed to read from defunct connection IPv4Address(('si-01de3140-caed.production-orch-0696.neo4j.io', 7687)) (ResolvedIPv4Address(('34.28.184.63', 7687)))


[]

#### 📊 **Gaph Relatioship Illustration of `ACTOR_OF`:**

![Relação ACTOR_OF](image/actor_of.png)


#### 📊 **Gaph Relatioship Illustration of `HAS_GENRE`:**

![Relação ACTOR_OF](image/genre.png)

In [15]:
graph.refresh_schema()
print(graph.schema)

Failed to read from defunct connection IPv4Address(('si-01de3140-caed.production-orch-0696.neo4j.io', 7687)) (ResolvedIPv4Address(('34.28.184.63', 7687)))


Node properties:
CEO {name: STRING, POB: STRING, YOB: INTEGER}
Company {name: STRING}
User {name: STRING, city: STRING, userId: INTEGER, age: INTEGER}
Post {userId: INTEGER}
Movie {movieId: INTEGER, title: STRING, imdbRating: FLOAT}
Director {name: STRING}
Actor {name: STRING}
Genre {name: STRING}
Relationship properties:

The relationships:
(:CEO)-[:CEO]->(:Company)
(:User)-[:POSTED]->(:Post)
(:User)-[:FRIEND]->(:User)
(:User)-[:LIKES]->(:User)
(:Movie)-[:DIRECTED_BY]->(:Director)
(:Movie)-[:HAS_ACTOR]->(:Actor)
(:Movie)-[:HAS_GENRE]->(:Genre)
(:Director)-[:DIRECTOR_OF]->(:Movie)
(:Actor)-[:ACTOR_OF]->(:Movie)
(:Genre)-[:GENRE_OF]->(:Movie)


### [Langchain Cypher Query framework](https://python.langchain.com/docs/integrations/graphs/neo4j_cypher/)

In [None]:
#from langchain_groq import ChatGroq

#llm = ChatGroq(model_name="llama3-8b-8192", temperature=0)

In [None]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

In [None]:
from langchain.chains import GraphCypherQAChain

chain = GraphCypherQAChain.from_llm(
    llm=llm, 
    graph=graph, 
    top_k=3, 
    verbose=True,
    allow_dangerous_requests=True, 
    return_intermediate_steps=True,
    use_function_response=True,  # This parameter passes results as function output, very important
)

chain

GraphCypherQAChain(verbose=True, graph=<langchain_community.graphs.neo4j_graph.Neo4jGraph object at 0x7de4dadbfbb0>, cypher_generation_chain=LLMChain(prompt=PromptTemplate(input_variables=['question', 'schema'], template='Task:Generate Cypher statement to query a graph database.\nInstructions:\nUse only the provided relationship types and properties in the schema.\nDo not use any other relationship types or properties that are not provided.\nSchema:\n{schema}\nNote: Do not include any explanations or apologies in your responses.\nDo not respond to any questions that might ask anything else than for you to construct a Cypher statement.\nDo not include any text except the generated Cypher statement.\n\nThe question is:\n{question}'), llm=ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x7de493edbc40>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x7de493d2a9e0>, root_client=<openai.OpenAI object at 0x7de493d

In [57]:
response = chain.invoke({"query": "What are the movies directed by Christopher Nolan?"})
response

Error in StdOutCallbackHandler.on_chain_start callback: AttributeError("'NoneType' object has no attribute 'get'")


Generated Cypher:
[32;1m[1;3mMATCH (d:Director {name: 'Christopher Nolan'})-[:DIRECTOR_OF]->(m:Movie)
RETURN m.title, m.imdbRating[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m


{'query': 'What are the movies directed by Christopher Nolan?',
 'result': "Sorry, I couldn't find information on movies directed by Christopher Nolan.",
 'intermediate_steps': [{'query': "MATCH (d:Director {name: 'Christopher Nolan'})-[:DIRECTOR_OF]->(m:Movie)\nRETURN m.title, m.imdbRating"},
  {'context': []}]}

In [58]:
response = chain.invoke({"query": "What are the movies directed by Joe Johnston?"})
response

Error in StdOutCallbackHandler.on_chain_start callback: AttributeError("'NoneType' object has no attribute 'get'")


Generated Cypher:
[32;1m[1;3mMATCH (d:Director {name: 'Joe Johnston'})-[:DIRECTOR_OF]->(m:Movie)
RETURN m.title, m.imdbRating[0m
Full Context:
[32;1m[1;3m[{'m.title': 'Jumanji', 'm.imdbRating': 6.9}][0m

[1m> Finished chain.[0m


{'query': 'What are the movies directed by Joe Johnston?',
 'result': 'Joe Johnston directed the movie "Jumanji."',
 'intermediate_steps': [{'query': "MATCH (d:Director {name: 'Joe Johnston'})-[:DIRECTOR_OF]->(m:Movie)\nRETURN m.title, m.imdbRating"},
  {'context': [{'m.title': 'Jumanji', 'm.imdbRating': 6.9}]}]}

`use_function_response=True` tells the chain to *trust and use the tool’s (Cypher)* response directly when forming the final answer, instead of relying on the LLM’s default “be conservative if unsure” behavior. In practice, it:

- Injects the query result (rows returned by Neo4j) into the answer step in a structured way.

- Reduces “I don’t know” fallbacks, because the LLM now sees a clear, machine-formatted function/tool result.

- Acts similarly to return_direct=True, but still lets the LLM phrase the result in natural language.

In [60]:
response = chain.invoke({"query": "What are the best movies ?"})
response

Error in StdOutCallbackHandler.on_chain_start callback: AttributeError("'NoneType' object has no attribute 'get'")


Generated Cypher:
[32;1m[1;3mMATCH (m:Movie)
RETURN m.title, m.imdbRating
ORDER BY m.imdbRating DESC
LIMIT 10[0m


Failed to read from defunct connection IPv4Address(('si-01de3140-caed.production-orch-0696.neo4j.io', 7687)) (ResolvedIPv4Address(('34.28.184.63', 7687)))


Full Context:
[32;1m[1;3m[{'m.title': 'Shawshank Redemption, The', 'm.imdbRating': 9.3}, {'m.title': 'Pulp Fiction', 'm.imdbRating': 8.9}, {'m.title': 'Star Wars: Episode IV - A New Hope', 'm.imdbRating': 8.7}][0m

[1m> Finished chain.[0m


{'query': 'What are the best movies ?',
 'result': 'The best movies are "The Shawshank Redemption" (IMDB 9.3), "Pulp Fiction" (IMDB 8.9), and "Star Wars: Episode IV - A New Hope" (IMDB 8.7).',
 'intermediate_steps': [{'query': 'MATCH (m:Movie)\nRETURN m.title, m.imdbRating\nORDER BY m.imdbRating DESC\nLIMIT 10'},
  {'context': [{'m.title': 'Shawshank Redemption, The', 'm.imdbRating': 9.3},
    {'m.title': 'Pulp Fiction', 'm.imdbRating': 8.9},
    {'m.title': 'Star Wars: Episode IV - A New Hope', 'm.imdbRating': 8.7}]}]}

In [55]:
# Fix the GraphCypherQAChain with correct parameters
print("🔧 Creating a working chain with correct parameters...")
print("="*60)

from langchain.prompts import PromptTemplate
from langchain.chains import GraphCypherQAChain

# Create a custom QA prompt that explicitly tells the LLM to use the context
custom_qa_prompt = PromptTemplate(
    input_variables=["context", "question"],
    template="""You are a helpful assistant that answers questions about movies using the provided graph database context.

Context from the database: {context}

Question: {question}

IMPORTANT: Use the information from the context above to answer the question. If the context contains relevant data, provide a clear answer based on that data. If the context is empty or doesn't contain relevant information, say "I don't have enough information to answer this question."

Answer:"""
)

# Create a new chain with the correct qa_prompt parameter
working_chain = GraphCypherQAChain.from_llm(
    llm=llm,
    graph=graph,
    qa_prompt=custom_qa_prompt,  # This is the correct parameter name
    top_k=5,
    verbose=True,
    allow_dangerous_requests=True,
    return_intermediate_steps=True
)

print("✅ Working chain created with correct qa_prompt parameter!")

# Test the working chain
print("\n🧪 Testing the working chain...")
test_query = "What are the movies directed by Joe Johnston?"

try:
    response = working_chain.invoke({"query": test_query})
    print(f"✅ Query: {test_query}")
    print(f"✅ Answer: {response.get('result', 'No result')}")
    
    # Show the intermediate steps
    if 'intermediate_steps' in response:
        print("\n🔍 Intermediate steps:")
        for i, step in enumerate(response['intermediate_steps']):
            if isinstance(step, dict):
                if 'query' in step:
                    print(f"  Generated Cypher: {step['query']}")
                if 'context' in step:
                    print(f"  Context: {step['context']}")
            else:
                print(f"  Step {i+1}: {step}")
                
except Exception as e:
    print(f"❌ Error: {str(e)}")


Error in StdOutCallbackHandler.on_chain_start callback: AttributeError("'NoneType' object has no attribute 'get'")


🔧 Creating a working chain with correct parameters...
✅ Working chain created with correct qa_prompt parameter!

🧪 Testing the working chain...
Generated Cypher:
[32;1m[1;3mMATCH (d:Director {name: 'Joe Johnston'})-[:DIRECTOR_OF]->(m:Movie)
RETURN m.title[0m
Full Context:
[32;1m[1;3m[{'m.title': 'Jumanji'}][0m

[1m> Finished chain.[0m
✅ Query: What are the movies directed by Joe Johnston?
✅ Answer: I don't have enough information to answer this question.

🔍 Intermediate steps:
  Generated Cypher: MATCH (d:Director {name: 'Joe Johnston'})-[:DIRECTOR_OF]->(m:Movie)
RETURN m.title
  Context: [{'m.title': 'Jumanji'}]


In [51]:
# Alternative approach: Use use_function_response parameter
print("🔧 Testing use_function_response parameter...")
print("="*60)

# Create a chain with use_function_response parameter
function_chain = GraphCypherQAChain.from_llm(
    llm=llm,
    graph=graph,
    verbose=True,
    use_function_response=True,  # This parameter passes results as function output
    allow_dangerous_requests=True,
    return_intermediate_steps=True
)

print("✅ Function response chain created!")

# Test the function response chain
print("\n🧪 Testing function response chain...")
test_queries = [
    "What are the movies directed by Joe Johnston?",
    "Who directed Jumanji?",
    "What are some action movies?"
]

for query in test_queries:
    print(f"\n❓ Query: {query}")
    try:
        response = function_chain.invoke({"query": query})
        print(f"✅ Answer: {response.get('result', 'No result')}")
        
        # Show context if available
        if 'intermediate_steps' in response and response['intermediate_steps']:
            for step in response['intermediate_steps']:
                if isinstance(step, dict) and 'context' in step:
                    context = step['context']
                    if context:
                        print(f"🔍 Context used: {context}")
                    else:
                        print("🔍 No context found")
                    break
                    
    except Exception as e:
        print(f"❌ Error: {str(e)}")
    
    print("-" * 40)


  return cls(**value_as_dict)
Error in StdOutCallbackHandler.on_chain_start callback: AttributeError("'NoneType' object has no attribute 'get'")


🔧 Testing use_function_response parameter...
✅ Function response chain created!

🧪 Testing function response chain...

❓ Query: What are the movies directed by Joe Johnston?
Generated Cypher:
[32;1m[1;3mMATCH (d:Director {name: 'Joe Johnston'})-[:DIRECTOR_OF]->(m:Movie)
RETURN m.title[0m


Failed to read from defunct connection IPv4Address(('si-01de3140-caed.production-orch-0696.neo4j.io', 7687)) (ResolvedIPv4Address(('34.28.184.63', 7687)))


Full Context:
[32;1m[1;3m[{'m.title': 'Jumanji'}][0m


Error in StdOutCallbackHandler.on_chain_start callback: AttributeError("'NoneType' object has no attribute 'get'")



[1m> Finished chain.[0m
✅ Answer: Joe Johnston directed the movie "Jumanji."
🔍 Context used: [{'m.title': 'Jumanji'}]
----------------------------------------

❓ Query: Who directed Jumanji?
Generated Cypher:
[32;1m[1;3mMATCH (d:Director)-[:DIRECTOR_OF]->(m:Movie {title: 'Jumanji'}) RETURN d.name[0m
Full Context:
[32;1m[1;3m[{'d.name': 'Joe Johnston'}][0m


Error in StdOutCallbackHandler.on_chain_start callback: AttributeError("'NoneType' object has no attribute 'get'")



[1m> Finished chain.[0m
✅ Answer: Jumanji was directed by Joe Johnston.
🔍 Context used: [{'d.name': 'Joe Johnston'}]
----------------------------------------

❓ Query: What are some action movies?
Generated Cypher:
[32;1m[1;3mMATCH (g:Genre {name: 'Action'})<-[:HAS_GENRE]-(m:Movie)
RETURN m.title, m.imdbRating[0m
Full Context:
[32;1m[1;3m[{'m.title': 'Heat', 'm.imdbRating': 8.2}, {'m.title': 'Sudden Death', 'm.imdbRating': 5.7}, {'m.title': 'GoldenEye', 'm.imdbRating': 7.2}, {'m.title': 'Cutthroat Island', 'm.imdbRating': 5.6}, {'m.title': 'Money Train', 'm.imdbRating': 5.6}, {'m.title': 'Assassins', 'm.imdbRating': 6.3}, {'m.title': 'Dead Presidents', 'm.imdbRating': 6.8}, {'m.title': 'Mortal Kombat', 'm.imdbRating': 5.8}, {'m.title': 'Lawnmower Man 2: Beyond Cyberspace', 'm.imdbRating': 2.4}, {'m.title': 'From Dusk Till Dawn', 'm.imdbRating': 7.3}][0m

[1m> Finished chain.[0m
✅ Answer: Some action movies include "Heat," "Sudden Death," "GoldenEye," "From Dusk Till Dawn," a