In [None]:
# Data Pipeline Testing

This notebook tests the complete data pipeline:
1. Note ingestion via FastAPI
2. LLM processing (tagging, entity extraction)
3. Graph storage in Neo4j
4. Retrieval and querying

## Pipeline Steps
- **Input**: Raw text notes
- **Processing**: LLM analysis and structuring
- **Storage**: Neo4j graph database
- **Output**: Structured, searchable knowledge graph


In [None]:
import sys
import requests
import json
import time
from datetime import datetime

# Add src to path
sys.path.append('../src')

from dotenv import load_dotenv
load_dotenv()

# Import pipeline components
# from src.backend.ingestion import NoteIngestionService
# from src.backend.tagging import TaggingService
# from src.graph.neo4j_client import Neo4jClient

# API endpoints (adjust based on your FastAPI setup)
API_BASE_URL = "http://localhost:8000"
NEO4J_URL = "bolt://localhost:7687"

print("Pipeline testing setup complete!")


In [None]:
def test_note_ingestion(note_content, title=None):
    """Test note ingestion via API"""
    payload = {
        "content": note_content,
        "title": title or f"Test Note {datetime.now().strftime('%Y%m%d_%H%M%S')}",
        "created_at": datetime.now().isoformat()
    }
    
    try:
        response = requests.post(f"{API_BASE_URL}/notes", json=payload)
        if response.status_code == 200:
            print(f"✅ Note ingested successfully: {response.json()}")
            return response.json()
        else:
            print(f"❌ Ingestion failed: {response.status_code} - {response.text}")
            return None
    except requests.exceptions.ConnectionError:
        print("❌ Cannot connect to API. Make sure FastAPI server is running.")
        return None
    except Exception as e:
        print(f"❌ Error during ingestion: {e}")
        return None

def test_pipeline_end_to_end(note_content):
    """Test the complete pipeline"""
    print("=== TESTING COMPLETE PIPELINE ===")
    print(f"Input note: {note_content[:100]}...")
    
    # Step 1: Ingest note
    print("\n1. Testing note ingestion...")
    result = test_note_ingestion(note_content)
    
    if result:
        note_id = result.get('id')
        print(f"Note ID: {note_id}")
        
        # Step 2: Check processing status
        print("\n2. Checking processing status...")
        time.sleep(2)  # Wait for background processing
        
        # Step 3: Verify in Neo4j
        print("\n3. Verifying storage in Neo4j...")
        # Add Neo4j verification code here
        
        print("✅ Pipeline test completed!")
    else:
        print("❌ Pipeline test failed at ingestion step")

# Test data
test_notes = [
    "Meeting with John about the new AI project scheduled for next Friday at 2 PM in the conference room.",
    "Read an interesting article about quantum computing applications in cryptography. Need to research more about Shor's algorithm.",
    "Project deadline moved to December 20th. Need to update timeline and inform the team."
]

print("Pipeline testing functions ready!")
