# Tutorial 3: Full Raptor Pipeline
End-to-end summarization.


In [None]:
import sys
from pathlib import Path
# Ensure src is in python path
project_root = Path.cwd().parent
if str(project_root / 'src') not in sys.path:
    sys.path.append(str(project_root / 'src'))

import logging
logging.basicConfig(level=logging.INFO)
from matome.engines.raptor import RaptorEngine
from matome.engines.semantic_chunker import JapaneseSemanticChunker
from matome.engines.embedder import EmbeddingService
from matome.engines.cluster import GMMClusterer
from matome.agents.summarizer import SummarizationAgent
from matome.utils.store import DiskChunkStore
from domain_models.config import ProcessingConfig
import os


In [None]:
config = ProcessingConfig(n_clusters=2, umap_n_neighbors=2)
store = DiskChunkStore() # Temp DB


In [None]:
# Initialize Components
embedder = EmbeddingService(config)
chunker = JapaneseSemanticChunker(embedder)
clusterer = GMMClusterer()
summarizer = SummarizationAgent(config)

engine = RaptorEngine(
    chunker=chunker,
    embedder=embedder,
    clusterer=clusterer,
    summarizer=summarizer,
    config=config
)


In [None]:
# Run
data_path = project_root / 'test_data' / 'sample.txt'
with open(data_path, 'r', encoding='utf-8') as f:
    text = f.read()

if not os.environ.get('OPENROUTER_API_KEY'):
    print('Skipping run due to missing API Key (Mocking not implemented in this notebook)')
else:
    tree = engine.run(text, store=store)
    print(f'Tree Root: {tree.root_node.id}')
