# Tutorial 01: Quickstart (The "Aha! Moment")

Goal: Demonstrate the instant extraction of "Wisdom" from a complex text and Semantic Zooming.

We will:
1.  Setup a Mock Environment (to run without API keys).
2.  Ingest a sample text.
3.  Run the RAPTOR engine to build a knowledge tree.
4.  Visualize the root "Wisdom" node.
5.  Zoom in to reveal "Knowledge" nodes.

In [None]:
import logging
import os
import uuid
import shutil
from pathlib import Path
from typing import Any
from collections.abc import Iterable, Iterator
import numpy as np

from domain_models.config import ProcessingConfig
from domain_models.data_schema import DIKWLevel, NodeMetadata
from domain_models.manifest import SummaryNode, Chunk
from matome.engines.embedder import EmbeddingService
from matome.engines.cluster import GMMClusterer
from matome.engines.raptor import RaptorEngine
from matome.engines.token_chunker import JapaneseTokenChunker
from matome.agents.summarizer import SummarizationAgent
from matome.agents.strategies import WisdomStrategy, KnowledgeStrategy, BaseSummaryStrategy
from matome.utils.store import DiskChunkStore

# Setup logging
logging.basicConfig(level=logging.INFO, format="%(message)s")
logger = logging.getLogger("matome")

# Force Mock Mode
os.environ["OPENROUTER_API_KEY"] = "mock"

## 1. Mock Services
Since we want this tutorial to run instantly and without cost, we mock the Embedding Service and the Agent's response logic to simulate DIKW levels.

In [None]:
class MockEmbeddingService(EmbeddingService):
    def __init__(self, config):
        super().__init__(config)
        self.dim = 384

    def embed_strings(self, texts: Iterable[str]) -> Iterator[list[float]]:
        # Return random normalized vectors
        for _ in texts:
            vec = np.random.rand(self.dim)
            yield (vec / np.linalg.norm(vec)).tolist()

class LevelAwareMockAgent(SummarizationAgent):
    """
    A special mock agent that returns different content based on the tree level.
    Level 2 (Root) -> Wisdom
    Level 1 -> Knowledge
    """
    def _handle_mock_mode(
        self,
        safe_text_str: str,
        context: dict[str, Any] | None,
        request_id: str,
        strategy: Any = None,
    ) -> SummaryNode:
        level = context.get("level", 1) if context else 1
        
        if level >= 2:
            # Wisdom
            summary = "WISDOM: The essence of the text is that hierarchical understanding enables better decision making."
            dikw = DIKWLevel.WISDOM
        else:
            # Knowledge
            summary = f"KNOWLEDGE: This section explains the mechanism of {safe_text_str[:20]}..."
            dikw = DIKWLevel.KNOWLEDGE
            
        return SummaryNode(
            id=str(uuid.uuid4()),
            text=summary,
            level=level,
            children_indices=context.get("children_indices", []) if context else [],
            metadata=NodeMetadata(dikw_level=dikw)
        )

## 2. Configuration & Initialization
We configure the engine to use our mocks and GMM clustering.

In [None]:
config = ProcessingConfig(
    summarization_model="gpt-4o-mini",
    clustering_algorithm="gmm",
    n_clusters=2,  # Force branching to create levels
    chunk_buffer_size=10,
)

# Clean up previous run
db_path = Path("tutorials/chunks.db")
if db_path.exists():
    db_path.unlink()

store = DiskChunkStore(db_path)
chunker = JapaneseTokenChunker()
embedder = MockEmbeddingService(config)
clusterer = GMMClusterer()
# Inject our level-aware agent
agent = LevelAwareMockAgent(config, strategy=WisdomStrategy())

engine = RaptorEngine(chunker, embedder, clusterer, agent, config)

## 3. Run Pipeline
We generate some dummy text and run the pipeline.

In [None]:
# Generate enough text with newlines to force chunking
# The Japanese chunker splits on newlines if punctuation is missing
dummy_text = "\n".join([f"Matome 2.0 is a system for semantic zooming. Part {i}" for i in range(200)])

print("Running RAPTOR...")
tree = engine.run(dummy_text, store=store)
print("Done!")

## 4. Visualizing Wisdom (The "Aha! Moment")
We inspect the root node.

In [None]:
root = tree.root_node

if isinstance(root, Chunk):
    print("Tree did not generate summaries (only 1 chunk). Try increasing text length.")
    print(f"Chunk Content: {root.text[:100]}...")
else:
    # Safely access metadata
    dikw = root.metadata.dikw_level.value.upper()
    print(f"[{dikw}] (Level {root.level})")
    print(f"Summary: {root.text}")

## 5. Semantic Zooming
Now we zoom in to see the child nodes (Knowledge).

In [None]:
if not isinstance(root, Chunk):
    children_map = store.get_nodes(root.children_indices)

    for child_id in root.children_indices:
        child = children_map.get(child_id)
        if child:
            # Determine level label
            if isinstance(child, Chunk):
                label = "DATA"
                text = child.text[:100] + "..."
            else:
                label = child.metadata.dikw_level.value.upper()
                text = child.text
                
            print(f"\n[{label}] (Level {child.level if hasattr(child, 'level') else 0})")
            print(f"Summary: {text}")
        
store.close()