In [3]:
%set_env OPENAI_API_KEY=${OPENAI_API_KEY}


env: OPENAI_API_KEY=${OPENAI_API_KEY}


In [None]:
from langchain_anthropic import ChatAnthropic
llm = ChatAnthropic(
    api_key=os.getenv("ANTHROPIC_API_KEY"),
    model="claude-3-7-sonnet-20250219",
    max_tokens=2000
)


In [4]:
import os
from neo4j import GraphDatabase
from openai import OpenAI
import numpy as np

# Initialize clients
openai_client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

# Neo4j connection
uri = "bolt://localhost:7687"
username = "neo4j"
password = "sktelecom"
driver = GraphDatabase.driver(uri, auth=(username, password))

class Neo4jVectorExample:
    def __init__(self, driver, embedding_model="text-embedding-3-small"):
        self.driver = driver
        self.embedding_model = embedding_model
    
    def create_vector_index(self):
        """Create a vector index for the Chunk nodes"""
        with self.driver.session() as session:
            # Create vector index for chunks
            session.run("""
                CREATE VECTOR INDEX chunk_embeddings IF NOT EXISTS
                FOR (c:Chunk) ON c.embedding
                OPTIONS {
                  indexConfig: {
                    `vector.dimensions`: 1536,
                    `vector.similarity_function`: 'cosine'
                  }
                }
            """)
            
            print("Vector index created successfully")
    
    def generate_embedding(self, text):
        """Generate an embedding using OpenAI API"""
        response = openai_client.embeddings.create(
            input=text,
            model=self.embedding_model
        )
        return response.data[0].embedding
    
    def add_document_with_chunks(self, title, chunks):
        """Add a document with chunks to Neo4j"""
        # Generate document embedding from all text
        all_text = " ".join(chunks)
        doc_embedding = self.generate_embedding(all_text)
        
        with self.driver.session() as session:
            # Create document node
            doc_result = session.run("""
                CREATE (d:Document {
                    title: $title,
                    embedding: $embedding
                })
                RETURN id(d) as doc_id
            """, {
                "title": title,
                "embedding": doc_embedding
            })
            
            doc_id = doc_result.single()["doc_id"]
            
            # Create chunks with embeddings
            for i, chunk_text in enumerate(chunks):
                # Generate embedding for each chunk
                chunk_embedding = self.generate_embedding(chunk_text)
                
                # Create chunk node and relationship to document
                session.run("""
                    MATCH (d:Document) WHERE id(d) = $doc_id
                    CREATE (c:Chunk {
                        text: $text,
                        position: $position,
                        embedding: $embedding
                    })
                    CREATE (c)-[:CONTAINED_IN]->(d)
                """, {
                    "doc_id": doc_id,
                    "text": chunk_text,
                    "position": i,
                    "embedding": chunk_embedding
                })
            
            print(f"Added document '{title}' with {len(chunks)} chunks")
    
    def vector_search(self, query_text, k=5):
        """Perform vector similarity search"""
        # Generate embedding for query
        query_embedding = self.generate_embedding(query_text)
        
        with self.driver.session() as session:
            # Vector search using the index
            result = session.run("""
                // Vector similarity search on chunks
                CALL db.index.vector.queryNodes('chunk_embeddings', $k, $embedding)
                YIELD node, score
                
                // Get document info
                MATCH (node)-[:CONTAINED_IN]->(doc:Document)
                
                RETURN 
                    doc.title AS document_title,
                    node.text AS chunk_text,
                    score AS similarity
                ORDER BY similarity DESC
            """, {
                "embedding": query_embedding,
                "k": k
            })
            
            # Process results
            results = []
            for record in result:
                results.append({
                    "document": record["document_title"],
                    "chunk": record["chunk_text"],
                    "similarity": record["similarity"]
                })
            
            return results


example = Neo4jVectorExample(driver)

# # 1. Create vector index
# example.create_vector_index()

# # 2. Add sample document with chunks
# sample_doc = "Introduction to Graph Databases"
# sample_chunks = [
#     "A graph database is a database that uses graph structures for semantic queries with nodes, edges, and properties to represent and store data.",
#     "Neo4j is a graph database management system developed by Neo4j, Inc. It is a native graph database that uses a property graph data model.",
#     "Vector search in Neo4j combines the power of graph relationships with vector similarity for more contextual retrieval.",
#     "Graph-based RAG systems can leverage both semantic similarity and explicit relationships between document chunks."
# ]
# example.add_document_with_chunks(sample_doc, sample_chunks)
    


In [17]:
# 3. Perform vector search
user_query = "How can I use vector search with graphs?"
context = example.vector_search(user_query, k=3)

prompt = f"""
    Based on the following information, please answer the user's question:
    
    CONTEXT:
    {context}
    
    USER QUESTION: {user_query}
    
    Answer the question using only the provided context. If you cannot answer from the context, say so.
    """

# 4. Display results
print("\nSearch Results for:", user_query)
print("-" * 60)
for i, result in enumerate(context, 1):
    print(f"{i}. Document: {result['document']}")
    print(f"   Chunk: {result['chunk']}")
    print(f"   Similarity: {result['similarity']:.4f}")
    print()


Search Results for: How can I use vector search with graphs?
------------------------------------------------------------
1. Document: Introduction to Graph Databases
   Chunk: Vector search in Neo4j combines the power of graph relationships with vector similarity for more contextual retrieval.
   Similarity: 0.8459

2. Document: Introduction to Graph Databases
   Chunk: A graph database is a database that uses graph structures for semantic queries with nodes, edges, and properties to represent and store data.
   Similarity: 0.7191

3. Document: Introduction to Graph Databases
   Chunk: Graph-based RAG systems can leverage both semantic similarity and explicit relationships between document chunks.
   Similarity: 0.6808



In [18]:
print(llm.invoke(user_query).content)

# Vector Search with Graphs

Vector search can be combined with graph database techniques to create powerful search and recommendation systems. Here are ways to integrate these approaches:

## Key Implementation Approaches

1. **Hybrid Search Architecture**
   - Use vector search for semantic similarity matching
   - Use graph traversal for relationship-based queries
   - Combine results for more contextually relevant answers

2. **Knowledge Graphs with Vector Embeddings**
   - Store vector embeddings as properties on graph nodes
   - Perform vector similarity search to find entry points
   - Use graph traversal to explore connected entities

3. **Graph Neural Networks (GNNs)**
   - Generate node embeddings that incorporate both content and structural information
   - Allows for similarity search that considers graph topology

## Practical Implementation Strategies

- **Two-Stage Query Processing**:
  ```
  1. Find relevant nodes via vector similarity search
  2. Explore graph connecti

In [19]:
print(llm.invoke(prompt).content)

Based on the provided context, vector search in Neo4j combines the power of graph relationships with vector similarity to enable more contextual retrieval. Graph-based RAG (Retrieval Augmented Generation) systems can leverage both semantic similarity and explicit relationships between document chunks. This suggests that when using vector search with graphs, you can benefit from both the semantic similarity capabilities of vector search and the relational structure that graphs provide, resulting in more contextually relevant retrieval of information.

The context doesn't provide specific implementation details or step-by-step instructions on how to use vector search with graphs.
