# Neo4j Visualization Feature Test

This notebook tests the new visualization feature for the Text2Cypher workflow. It demonstrates:
1. LLM detection of visualization requests
2. Enhanced generation with visualization-specific prompts
3. Graph execution with Result.graph transformer
4. Side-by-side comparison of regular vs visualization queries

In [9]:
import sys

# Add the project root to Python path
sys.path.append("../..")

In [10]:
from dotenv import load_dotenv
from langchain_neo4j import Neo4jGraph
from langchain_openai import ChatOpenAI

from neo4j_text2cypher.retrievers import ConfigCypherExampleRetriever
from neo4j_text2cypher.utils.config import ConfigLoader
from neo4j_text2cypher.utils.debug import setup_debug_logging

# Import enhanced components for visualization
from neo4j_text2cypher.components.text2cypher.visualization_detector import detect_visualization_request
from neo4j_text2cypher.components.text2cypher.generation.node_with_viz import create_text2cypher_generation_node_with_viz
from neo4j_text2cypher.components.text2cypher.execution.node_with_graph import create_text2cypher_execution_with_graph_node

In [11]:
# Load environment variables
load_dotenv()

True

## Initialize Components with Visualization Support

In [12]:
# Load unified configuration
config_path = "app-config.yml"
config_loader = ConfigLoader(config_path)

# Setup debug logging
debug_config = config_loader.get_debug_config()
setup_debug_logging(debug_config)

# Initialize Neo4j graph connection
graph = Neo4jGraph(enhanced_schema=True)
print("Connected to Neo4j")
print(f"Schema: {graph.schema[:500]}...")  # Print first 500 chars of schema

Connected to Neo4j
Schema: Node properties:
- **Customer**
  - `id`: STRING Example: "9A79438B"
  - `ageBucket`: STRING Example: ">=70"
  - `gender`: STRING Available options: ['Male', 'Female', '']
- **Category**
  - `id`: STRING Available options: ['Exterior', 'Driving Experience', 'Features/Controls/Displays (FCD)', 'Driving Assistance', 'Infotainment', 'Seats', 'Climate', 'Interior', 'Powertrain']
- **Problem**
  - `id`: STRING Example: "EXT01"
  - `problem`: STRING Example: "EXT01: Doors - Hard to open/close"
- **Que...


In [13]:
# Initialize LLM
llm = ChatOpenAI(model="gpt-4o", temperature=0)
print("LLM initialized")

LLM initialized


In [None]:
# Initialize unified Cypher example retriever (following exact same pattern as working example)
cypher_example_retriever = ConfigCypherExampleRetriever(config_path=config_path)

# Get Streamlit configuration for scope description (same as working example)
streamlit_config = config_loader.get_streamlit_config()
print(f"Loaded {len(config_loader.get_example_queries())} regular example queries from: {config_path}")
print(f"Loaded {len(config_loader.get_visualization_examples())} visualization example queries from: {config_path}")
print(f"App: {streamlit_config.title}")
print(f"Scope: {streamlit_config.scope_description[:100]}...")

# Test the retriever methods
regular_examples = cypher_example_retriever.get_examples()
viz_examples = cypher_example_retriever.get_visualization_examples()

print(f"\nRetriever results:")
print(f"Regular examples length: {len(regular_examples)} chars")
print(f"Visualization examples length: {len(viz_examples)} chars")
print(f"\nFirst visualization example:")
print(viz_examples.split('\n\n')[0] if viz_examples else "No visualization examples found")

## Test 1: Visualization Detection

Test the LLM's ability to detect when questions request visualization.

In [None]:
# Test questions - mix of regular and visualization requests
test_questions = [
    "How many Honda Civic responses are there?",  # Regular
    "Visualize the connections between Honda Pilot complaints and categories",  # Visualization
    "Show me the network of problems for Acura models",  # Visualization
    "What is the average age of complainants?",  # Regular
    "Display the relationship between categories and problems",  # Visualization
    "Count the number of problems per category"  # Regular
]

print("Testing Visualization Detection:")
print("=" * 50)

for question in test_questions:
    is_viz = await detect_visualization_request(llm, question)
    status = "🔍 VISUALIZATION" if is_viz else "📊 REGULAR"
    print(f"{status}: {question}")

print("\nDetection complete!")

## Test 2: Enhanced Generation Node

Test the generation node that uses different prompts and examples for visualization requests.

In [None]:
# Create enhanced generation node
enhanced_generation = create_text2cypher_generation_node_with_viz(
    llm=llm,
    graph=graph,
    cypher_example_retriever=cypher_example_retriever
)

print("Enhanced generation node created!")

In [None]:
# Test both regular and visualization generation
test_cases = [
    ("How many Honda Civic complaints are there?", "Regular Question"),
    ("Visualize how Honda Pilot complaints connect to categories", "Visualization Question")
]

print("Testing Enhanced Generation:")
print("=" * 60)

for question, question_type in test_cases:
    print(f"\n{question_type}: {question}")
    print("-" * 60)
    
    # Create state for generation
    state = {
        "task": question,
        "prev_steps": []
    }
    
    # Generate Cypher
    result = await enhanced_generation(state)
    
    print(f"Visualization Requested: {result.get('visualization_requested', False)}")
    print(f"Generated Cypher:")
    print(result.get('statement', 'No statement generated'))
    print(f"Steps: {result.get('cypher_steps', [])}")

## Test 3: Enhanced Execution Node

Test the execution node that can return both regular results and graph data.

In [None]:
# Create enhanced execution node
enhanced_execution = create_text2cypher_execution_with_graph_node(graph)

print("Enhanced execution node created!")

In [None]:
# Test execution with a visualization query
viz_question = "Visualize Honda Pilot complaints and their categories"

print(f"Testing visualization execution for: {viz_question}")
print("=" * 60)

# First generate the query
gen_state = {
    "task": viz_question,
    "prev_steps": []
}

gen_result = await enhanced_generation(gen_state)
print(f"Generated query: {gen_result.get('statement')}")
print(f"Visualization requested: {gen_result.get('visualization_requested')}")

# Now execute it
exec_state = {
    "task": viz_question,
    "statement": gen_result.get('statement'),
    "cypher_steps": gen_result.get('cypher_steps', []),
    "visualization_requested": gen_result.get('visualization_requested', False),
    "errors": []
}

exec_result = await enhanced_execution(exec_state)

# Check results
cyphers = exec_result.get('cyphers', [])
if cyphers:
    cypher_output = cyphers[0]
    records = cypher_output.get('records', [])
    graph_result = cypher_output.get('graph_result')
    
    print(f"\nExecution Results:")
    print(f"Regular records: {len(records)} records")
    print(f"Graph result available: {graph_result is not None}")
    
    if graph_result:
        if hasattr(graph_result, 'nodes') and hasattr(graph_result, 'relationships'):
            print(f"Graph contains: {len(graph_result.nodes)} nodes, {len(graph_result.relationships)} relationships")
            
            # Show first few nodes (fix the slicing issue)
            nodes_list = list(graph_result.nodes)
            relationships_list = list(graph_result.relationships)
            
            print("\nFirst 3 nodes:")
            for i, node in enumerate(nodes_list[:3]):
                labels = list(node.labels) if hasattr(node, 'labels') else []
                properties = dict(list(node.items())[:2]) if hasattr(node, 'items') else {}
                print(f"  Node {i+1}: {labels} - {properties}")
            
            # Show first few relationships
            print("\nFirst 3 relationships:")
            for i, rel in enumerate(relationships_list[:3]):
                print(f"  Rel {i+1}: {rel.type}")
        else:
            print(f"Graph result type: {type(graph_result)}")
    
    # Show some regular records too
    print(f"\nFirst 3 regular records:")
    for i, record in enumerate(records[:3]):
        print(f"  Record {i+1}: {record}")
else:
    print("No cypher results found")

## Test 4: Complete Workflow Comparison

Compare regular vs visualization workflows side by side.

In [None]:
# Test the same semantic question both ways
base_question = "Honda Pilot complaints and their categories"
regular_question = f"How many {base_question} are there?"
viz_question = f"Visualize {base_question}"

print("SIDE-BY-SIDE COMPARISON")
print("=" * 60)
print(f"Base topic: {base_question}")
print(f"Regular question: {regular_question}")
print(f"Visualization question: {viz_question}")
print("\n")

# Process both questions
questions = [(regular_question, "REGULAR"), (viz_question, "VISUALIZATION")]

for question, qtype in questions:
    print(f"\n{qtype} WORKFLOW: {question}")
    print("-" * 60)
    
    # Generation
    gen_state = {"task": question, "prev_steps": []}
    gen_result = await enhanced_generation(gen_state)
    
    viz_requested = gen_result.get('visualization_requested', False)
    statement = gen_result.get('statement', '')
    
    print(f"1. Detection: {'🔍 VISUALIZATION' if viz_requested else '📊 REGULAR'}")
    print(f"2. Generated Cypher: {statement}")
    
    # Execution
    exec_state = {
        "task": question,
        "statement": statement,
        "cypher_steps": gen_result.get('cypher_steps', []),
        "visualization_requested": viz_requested,
        "errors": []
    }
    
    exec_result = await enhanced_execution(exec_state)
    cyphers = exec_result.get('cyphers', [])
    
    if cyphers:
        cypher_output = cyphers[0]
        records = cypher_output.get('records', [])
        graph_result = cypher_output.get('graph_result')
        
        print(f"3. Execution:")
        print(f"   - Regular records: {len(records)}")
        print(f"   - Graph result: {'Yes' if graph_result else 'No'}")
        
        if graph_result and hasattr(graph_result, 'nodes'):
            print(f"   - Graph nodes: {len(graph_result.nodes)}")
            print(f"   - Graph relationships: {len(graph_result.relationships)}")
        
        # Show actual results/answers
        print(f"4. Results:")
        if qtype == "REGULAR":
            # Show the actual answer for regular queries (usually a count or aggregation)
            for i, record in enumerate(records[:3]):
                print(f"   Answer: {record}")
        else:
            # For visualization queries, show sample nodes/relationships
            if graph_result and hasattr(graph_result, 'nodes'):
                nodes_list = list(graph_result.nodes)
                relationships_list = list(graph_result.relationships)
                
                print(f"   Sample nodes:")
                for i, node in enumerate(nodes_list[:2]):
                    labels = list(node.labels) if hasattr(node, 'labels') else []
                    props = dict(list(node.items())[:2]) if hasattr(node, 'items') else {}
                    print(f"     Node {i+1}: {labels[0] if labels else 'Unknown'} - {props}")
                
                print(f"   Sample relationships:")
                for i, rel in enumerate(relationships_list[:2]):
                    print(f"     Rel {i+1}: {rel.type}")
            else:
                # Fallback to regular records if graph result not available
                for i, record in enumerate(records[:2]):
                    print(f"     Record {i+1}: {record}")
    
    print(f"\n✅ {qtype} workflow completed")

print("\n🎉 Side-by-side comparison completed!")

## Test 5: Conversation with Mixed Questions

Test a conversation that mixes regular and visualization questions.

In [None]:
# Simulate a mixed conversation
conversation_questions = [
    "How many Honda Civic complaints are there?",  # Regular
    "Visualize the relationship between these complaints and their categories",  # Visualization
    "What's the most common problem category?",  # Regular
    "Show me the network of problems connected to that category"  # Visualization
]

print("MIXED CONVERSATION TEST")
print("=" * 50)

for i, question in enumerate(conversation_questions, 1):
    print(f"\nStep {i}: {question}")
    print("-" * 40)
    
    # Full generation and execution test
    gen_state = {"task": question, "prev_steps": []}
    gen_result = await enhanced_generation(gen_state)
    
    viz_requested = gen_result.get('visualization_requested', False)
    statement = gen_result.get('statement', '')
    
    print(f"Type: {'🔍 VISUALIZATION' if viz_requested else '📊 REGULAR'}")
    print(f"Query: {statement[:100]}{'...' if len(statement) > 100 else ''}")
    
    # Execute the query to show actual results
    exec_state = {
        "task": question,
        "statement": statement,
        "cypher_steps": gen_result.get('cypher_steps', []),
        "visualization_requested": viz_requested,
        "errors": []
    }
    
    exec_result = await enhanced_execution(exec_state)
    cyphers = exec_result.get('cyphers', [])
    
    if cyphers:
        cypher_output = cyphers[0]
        records = cypher_output.get('records', [])
        graph_result = cypher_output.get('graph_result')
        
        if viz_requested:
            print(f"🎯 Executed with graph transformer")
            if graph_result and hasattr(graph_result, 'nodes'):
                print(f"   Graph: {len(graph_result.nodes)} nodes, {len(graph_result.relationships)} relationships")
                
                # Show sample from graph
                nodes_list = list(graph_result.nodes)
                if nodes_list:
                    sample_node = nodes_list[0]
                    labels = list(sample_node.labels) if hasattr(sample_node, 'labels') else []
                    print(f"   Sample: {labels[0] if labels else 'Unknown'} node")
            else:
                print(f"   No graph data returned")
        else:
            print(f"📈 Standard execution")
            if records:
                print(f"   Result: {records[0] if len(records) == 1 else f'{len(records)} records'}")
            else:
                print(f"   No results returned")
    else:
        print(f"❌ Execution failed")

print("\n✅ Mixed conversation test completed!")
print("\n📝 Summary: The system successfully detects when to use visualization vs regular processing")
print("and executes the appropriate workflow with actual results.")