# Tutorial 1: Quickstart
Loading text and chunking it.


In [None]:
import sys
from pathlib import Path
# Ensure src is in python path
project_root = Path.cwd().parent
if str(project_root / 'src') not in sys.path:
    sys.path.append(str(project_root / 'src'))

import logging
logging.basicConfig(level=logging.INFO)
from matome.engines.semantic_chunker import JapaneseSemanticChunker
from matome.engines.embedder import EmbeddingService
from domain_models.config import ProcessingConfig
import os


In [None]:
# Initialize Config
config = ProcessingConfig()
if not os.environ.get('OPENROUTER_API_KEY'):
    print('Warning: No API Key found. Using default config (might fail without mocks).')


In [None]:
# Initialize Components
# Note: Real embedding service requires internet/model download
embedder = EmbeddingService(config)
chunker = JapaneseSemanticChunker(embedder)


In [None]:
# Load Data
data_path = project_root / 'test_data' / 'sample.txt'
with open(data_path, 'r', encoding='utf-8') as f:
    text = f.read()
print(f'Loaded {len(text)} chars')


In [None]:
# Chunk Text
chunks = list(chunker.split_text(text, config))
print(f'Generated {len(chunks)} chunks.')
for c in chunks[:5]:
    print(f'[{c.index}] {c.text[:50]}...')
