In [1]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.messages import SystemMessage, HumanMessage
from langchain_community.graphs import Neo4jGraph
import os
from dotenv import load_dotenv
from typing import List
load_dotenv()
# Warning control
import warnings
warnings.filterwarnings("ignore")

**Load the environment variables from your `.env` file**

**Load the LLM (e.g: GEMINI 2.5 Flash)**

In [2]:
import spacy

client = ChatGoogleGenerativeAI(
    model = os.getenv("MODEL_NAME"),
    temperature = os.getenv("TEMPERATURE"),
    google_api_key=os.getenv("GOOGLE_API_KEY"),
)

nlp = spacy.load(os.getenv("EMBEDDING_MODEL_NAME"))

def embed_text(text:str)->List:
    """
    Embeds the given text using the specified model.

    Parameters:
        text (str): The text to be embedded.

    Returns:
        List: A list containing the embedding of the text.
    """
    doc = nlp(text)
    return doc.vector # 300-dim vector

**Add Neo4j credentials (These information need to be kept secret)**

In [3]:
NEO4J_URI = "bolt://localhost:7687"
NEO4J_USERNAME = "neo4j"
NEO4J_PASSWORD = "12345678"
NEO4J_DATABASE = 'neo4j'

In [4]:
graph = Neo4jGraph(url=NEO4J_URI, username=NEO4J_USERNAME, password=NEO4J_PASSWORD, database=NEO4J_DATABASE)

  graph = Neo4jGraph(url=NEO4J_URI, username=NEO4J_USERNAME, password=NEO4J_PASSWORD, database=NEO4J_DATABASE)


**Sample question for RAG:**

In [5]:
question = "What movies are about love?"

**Get the questions embedding:**

In [6]:
question_embedding = embed_text(question)
question_embedding[:10]

array([-0.67266995,  0.08774167, -0.54867333, -0.36176834, -0.04267567,
       -0.02948567,  0.11805334, -0.40875998,  0.13430484,  1.8334517 ],
      dtype=float32)

**Perform Similarity Search using the question's embedding on the vector index of the graph database and get the results**

In [7]:
result = graph.query("""
    with $question_embedding as question_embedding      // Use the provided question embedding as 'question_embedding'
    CALL db.index.vector.queryNodes(                    // Call the vector index query function
        'movie_tagline_embeddings',                     // Name of the vector index to query against
        $top_k,                                         // Number of top results to retrieve
        question_embedding                              // The question embedding to compare against
        ) YIELD node AS movie, score                    // Yield each matched node and its similarity score
    RETURN movie.title, movie.tagline, score            // Return the title, tagline, and similarity score of each movie
    """,
    params={
        "question_embedding": question_embedding,       # Pass the question embedding as a parameter
        "top_k": 3                                      # Specify the number of top results to retrieve
    })
result

[{'movie.title': 'Waiting to Exhale',
  'movie.tagline': 'Friends are the people who let you be yourself... and never let you forget it.',
  'score': 0.9490612745285034},
 {'movie.title': 'Grumpier Old Men',
  'movie.tagline': 'Still Yelling. Still Fighting. Still Ready for Love.',
  'score': 0.9432224035263062},
 {'movie.title': 'GoldenEye',
  'movie.tagline': 'No limits. No fears. No substitutes.',
  'score': 0.9357520937919617}]

**Pass the results to an LLM for the final answer**

In [8]:
prompt = f"# Question:\n{question}\n\n# Graph DB search results:\n{result}"

system_msg = SystemMessage(
            content="You will be given the user question along with the search result of that question over a Neo4j graph database. Give the user the proper answer."
        )
user_msg = HumanMessage(content=prompt)
response = client.invoke([system_msg, user_msg])
print(response.content)

Based on the search results, here are the movies that appear to be about love:

*   **Grumpier Old Men** (Tagline: "Still Yelling. Still Fighting. Still Ready for Love.")
*   **Waiting to Exhale** (While the tagline focuses on friendship, the movie itself explores the romantic relationships and lives of its main characters.)


-----------------------

**Note: In this usecase, there is a higher chance of hallucination due to lack of enough evidence for the LLM to use its own judgment. The contents of the vector DB and the system role can address this issue to some extent.**

-----------------------

**Second example (in one go):**

In [9]:
question = "What movies are about adventure?"
question_embedding = embed_text(question)
result = graph.query("""
    with $question_embedding as question_embedding
    CALL db.index.vector.queryNodes(
        'movie_tagline_embeddings', 
        $top_k, 
        question_embedding
        ) YIELD node AS movie, score
    RETURN movie.title, movie.tagline, score
    """,
    params={
        "question_embedding": question_embedding,
        "top_k": 5
    })

prompt = f"# Question:\n{question}\n\n# Graph DB search results:\n{result}"

system_msg = SystemMessage(
            content="You will be given the user question along with the search result of that question over a Neo4j graph database. Give the user the proper answer."
        )
user_msg = HumanMessage(content=prompt)
response = client.invoke([system_msg, user_msg])

print(response.content)

Based on the search results, the movies that are about adventure are:

*   **GoldenEye**
*   **Balto**
*   **Tom and Huck**
