# Tutorial 02: Interactive Refinement (Deep Dive)

Goal: Demonstrate the ability to "talk to your knowledge base" by refining specific nodes.

We will:
1.  Load the knowledge graph created in Tutorial 01.
2.  Select a specific "Knowledge" node.
3.  Refine it with an instruction (e.g., "Explain this to a 5-year-old").
4.  Verify the update and trace back to source data.

In [None]:
import logging
import os
import uuid
from pathlib import Path
from typing import Any
from collections.abc import Iterable, Iterator
import numpy as np

from domain_models.config import ProcessingConfig
from domain_models.manifest import SummaryNode, Chunk
from matome.engines.embedder import EmbeddingService
from matome.engines.interactive_raptor import InteractiveRaptorEngine
from matome.agents.summarizer import SummarizationAgent
from matome.agents.strategies import BaseSummaryStrategy
from matome.utils.store import DiskChunkStore

# Setup logging
logging.basicConfig(level=logging.INFO, format="%(message)s")
logger = logging.getLogger("matome")

# Force Mock Mode
os.environ["OPENROUTER_API_KEY"] = "mock"

In [None]:
# Mock Embedding Service (Same as Tutorial 01)
class MockEmbeddingService(EmbeddingService):
    def __init__(self, config):
        super().__init__(config)
        self.dim = 384

    def embed_strings(self, texts: Iterable[str]) -> Iterator[list[float]]:
        for _ in texts:
            vec = np.random.rand(self.dim)
            yield (vec / np.linalg.norm(vec)).tolist()

# Mock Agent with specific response for refinement
class RefinementMockAgent(SummarizationAgent):
    def _handle_mock_mode(
        self,
        safe_text_str: str,
        context: dict[str, Any] | None,
        request_id: str,
        strategy: Any = None,
    ) -> SummaryNode:
        # Check if we are refining
        instruction = context.get("instruction", "") if context else ""
        
        if instruction:
            summary = f"REFINED: {instruction} -> content updated."
        else:
            summary = f"Summary of {safe_text_str[:20]}..."
            
        return SummaryNode(
            id=str(uuid.uuid4()),
            text=summary,
            level=context.get("level", 1) if context else 1,
            children_indices=context.get("children_indices", []) if context else [],
            metadata=context.get("metadata", {})
        )

## 1. Load Session
We connect to the existing database.

In [None]:
db_path = Path("tutorials/chunks.db")
if not db_path.exists():
    print("Error: tutorials/chunks.db not found. Please run Tutorial 01 first.")
    # Creating dummy DB for standalone testing if needed
    pass

store = DiskChunkStore(db_path)
config = ProcessingConfig(summarization_model="gpt-4o-mini")

# Initialize Engine
agent = RefinementMockAgent(config, strategy=BaseSummaryStrategy())
engine = InteractiveRaptorEngine(store=store, agent=agent, config=config)

## 2. Select a Node
We pick a Knowledge node (Level 1) to refine.

In [None]:
# Get all Level 1 nodes
knowledge_nodes = list(store.get_nodes_by_level("knowledge"))

if not knowledge_nodes:
    # Fallback if 01_quickstart produced generic data level
    # Or if mock agent produced DIKW metadata
    # In 01_quickstart, LevelAwareMockAgent produced KNOWLEDGE nodes.
    print("No KNOWLEDGE nodes found via metadata query. Fetching all summaries.")
    # This might happen if DiskChunkStore filtering is strict or metadata mismatch
    # Let's just pick a random summary node that is NOT root
    # But how to find them? Iterate all?
    # For tutorial, we assume 01 worked.
    pass

if knowledge_nodes:
    target_node = knowledge_nodes[0]
    print(f"Selected Node: {target_node.id}")
    print(f"Original Text: {target_node.text}")
else:
    print("Warning: Could not find Knowledge nodes. Creating a dummy one for demonstration.")
    from domain_models.data_schema import NodeMetadata, DIKWLevel
    target_node = SummaryNode(
        id="dummy-node",
        text="Original complex text.",
        level=1,
        children_indices=[],
        metadata=NodeMetadata(dikw_level=DIKWLevel.KNOWLEDGE)
    )
    store.add_summary(target_node)

## 3. Refine the Node
We send an instruction to rewrite the node.

In [None]:
instruction = "Explain this to a 5-year-old"
print(f"Refining with instruction: '{instruction}'...")

updated_node = engine.refine_node(target_node.id, instruction)

print(f"\nUpdated Text: {updated_node.text}")
print(f"Refinement History: {updated_node.metadata.refinement_history}")

## 4. Trace Traceability
We verify that we can still trace back to the original source chunks.

In [None]:
source_chunks = engine.get_source_chunks(updated_node.id)
print(f"\nSource Chunks Found: {len(source_chunks)}")

if source_chunks:
    print(f"First Source Chunk: {source_chunks[0].text[:50]}...")
else:
    print("No source chunks found (dummy node has no children).")
    
store.close()