diff --git a/.env.example b/.env.example
index 442da0c0..7c4e121b 100644
--- a/.env.example
+++ b/.env.example
@@ -22,5 +22,36 @@ FORECAST_MAX_HORIZON=90
 FORECAST_MODEL_ARTIFACTS_DIR=./artifacts/models
 FORECAST_ENABLE_LIGHTGBM=false
 
+# RAG Configuration
+# Embedding Provider: "openai" or "ollama"
+RAG_EMBEDDING_PROVIDER=openai
+
+# OpenAI Configuration (when RAG_EMBEDDING_PROVIDER=openai)
+OPENAI_API_KEY=sk-your-openai-api-key-here
+RAG_EMBEDDING_MODEL=text-embedding-3-small
+
+# Ollama Configuration (when RAG_EMBEDDING_PROVIDER=ollama)
+# OLLAMA_BASE_URL=http://localhost:11434
+# OLLAMA_EMBEDDING_MODEL=nomic-embed-text
+
+# Embedding dimension (must match your model: OpenAI=1536, nomic-embed-text=768, etc.)
+RAG_EMBEDDING_DIMENSION=1536
+RAG_EMBEDDING_BATCH_SIZE=100
+
+# Chunking settings
+RAG_CHUNK_SIZE=512
+RAG_CHUNK_OVERLAP=50
+RAG_MIN_CHUNK_SIZE=100
+
+# Retrieval settings
+RAG_TOP_K=5
+RAG_SIMILARITY_THRESHOLD=0.7
+RAG_MAX_CONTEXT_TOKENS=4000
+
+# pgvector index settings
+RAG_INDEX_TYPE=hnsw
+RAG_HNSW_M=16
+RAG_HNSW_EF_CONSTRUCTION=64
+
 # Frontend (Vite)
 VITE_API_BASE_URL=http://localhost:8123
diff --git a/README.md b/README.md
index 82e24494..9d1285a3 100644
--- a/README.md
+++ b/README.md
@@ -454,6 +454,59 @@ curl -X POST http://localhost:8123/jobs \
 - JSONB storage for flexible params and results
 - Links to model_run for train/backtest jobs
 
+### RAG Knowledge Base
+
+- `POST /rag/index` - Index a document into the knowledge base
+- `POST /rag/retrieve` - Semantic search across indexed documents
+- `GET /rag/sources` - List indexed sources
+- `DELETE /rag/sources/{source_id}` - Delete a source and its chunks
+
+**Embedding Providers:**
+
+The RAG system supports two embedding providers:
+
+1. **OpenAI** (default):
+```bash
+RAG_EMBEDDING_PROVIDER=openai
+OPENAI_API_KEY=sk-your-key
+RAG_EMBEDDING_MODEL=text-embedding-3-small
+RAG_EMBEDDING_DIMENSION=1536
+```
+
+2. **Ollama** (local/LAN):
+```bash
+RAG_EMBEDDING_PROVIDER=ollama
+OLLAMA_BASE_URL=http://localhost:11434
+OLLAMA_EMBEDDING_MODEL=nomic-embed-text
+RAG_EMBEDDING_DIMENSION=768
+```
+
+**Example Index Request:**
+```bash
+curl -X POST http://localhost:8123/rag/index \
+  -H "Content-Type: application/json" \
+  -d '{
+    "source_type": "markdown",
+    "source_path": "docs/ARCHITECTURE.md"
+  }'
+```
+
+**Example Retrieve Request:**
+```bash
+curl -X POST http://localhost:8123/rag/retrieve \
+  -H "Content-Type: application/json" \
+  -d '{
+    "query": "How does backtesting work?",
+    "top_k": 5
+  }'
+```
+
+**Features:**
+- pgvector for HNSW similarity search
+- Idempotent indexing via content hash
+- Markdown and OpenAPI chunking strategies
+- Configurable embedding dimensions
+
 ### Error Responses (RFC 7807)
 
 All error responses follow RFC 7807 Problem Details format with `Content-Type: application/problem+json`:
diff --git a/alembic/env.py b/alembic/env.py
index b3d317b0..8d9890f3 100644
--- a/alembic/env.py
+++ b/alembic/env.py
@@ -14,6 +14,7 @@
 # Import all models for Alembic autogenerate detection
 from app.features.data_platform import models as data_platform_models  # noqa: F401
 from app.features.jobs import models as jobs_models  # noqa: F401
+from app.features.rag import models as rag_models  # noqa: F401
 from app.features.registry import models as registry_models  # noqa: F401
 
 # Alembic Config object
diff --git a/alembic/versions/b4c8d9e0f123_create_rag_tables.py b/alembic/versions/b4c8d9e0f123_create_rag_tables.py
new file mode 100644
index 00000000..e0d76cbc
--- /dev/null
+++ b/alembic/versions/b4c8d9e0f123_create_rag_tables.py
@@ -0,0 +1,153 @@
+"""create_rag_tables
+
+Revision ID: b4c8d9e0f123
+Revises: 37e16ecef223
+Create Date: 2026-02-01 12:00:00.000000
+
+"""
+
+from typing import Sequence, Union
+
+import sqlalchemy as sa
+from alembic import op
+from pgvector.sqlalchemy import Vector
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision: str = "b4c8d9e0f123"
+down_revision: Union[str, None] = "37e16ecef223"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    """Apply migration - create document_source and document_chunk tables with pgvector."""
+    # Enable pgvector extension
+    op.execute("CREATE EXTENSION IF NOT EXISTS vector")
+
+    # Create document_source table
+    op.create_table(
+        "document_source",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column("source_id", sa.String(length=32), nullable=False),
+        sa.Column("source_type", sa.String(length=50), nullable=False),
+        sa.Column("source_path", sa.Text(), nullable=False),
+        sa.Column("content_hash", sa.String(length=64), nullable=False),
+        sa.Column("metadata", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
+        sa.Column("indexed_at", sa.DateTime(timezone=True), nullable=False),
+        # Timestamps (from TimestampMixin)
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.Column(
+            "updated_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        # Constraints
+        sa.PrimaryKeyConstraint("id"),
+        sa.UniqueConstraint("source_type", "source_path", name="uq_source_type_path"),
+    )
+
+    # Create indexes for document_source
+    op.create_index(
+        op.f("ix_document_source_source_id"),
+        "document_source",
+        ["source_id"],
+        unique=True,
+    )
+    op.create_index(
+        op.f("ix_document_source_source_type"),
+        "document_source",
+        ["source_type"],
+        unique=False,
+    )
+
+    # Create document_chunk table with Vector column
+    op.create_table(
+        "document_chunk",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column("chunk_id", sa.String(length=32), nullable=False),
+        sa.Column("source_id", sa.Integer(), nullable=False),
+        sa.Column("chunk_index", sa.Integer(), nullable=False),
+        sa.Column("content", sa.Text(), nullable=False),
+        sa.Column("embedding", Vector(1536), nullable=True),
+        sa.Column("token_count", sa.Integer(), nullable=False),
+        sa.Column("metadata", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
+        # Timestamps (from TimestampMixin)
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.Column(
+            "updated_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        # Constraints
+        sa.PrimaryKeyConstraint("id"),
+        sa.ForeignKeyConstraint(
+            ["source_id"],
+            ["document_source.id"],
+            ondelete="CASCADE",
+        ),
+        sa.UniqueConstraint("source_id", "chunk_index", name="uq_source_chunk_index"),
+    )
+
+    # Create indexes for document_chunk
+    op.create_index(
+        op.f("ix_document_chunk_chunk_id"),
+        "document_chunk",
+        ["chunk_id"],
+        unique=True,
+    )
+    op.create_index(
+        op.f("ix_document_chunk_source_id"),
+        "document_chunk",
+        ["source_id"],
+        unique=False,
+    )
+
+    # Create HNSW index for vector similarity search (cosine distance)
+    op.create_index(
+        "ix_chunk_embedding_hnsw",
+        "document_chunk",
+        ["embedding"],
+        unique=False,
+        postgresql_using="hnsw",
+        postgresql_with={"m": 16, "ef_construction": 64},
+        postgresql_ops={"embedding": "vector_cosine_ops"},
+    )
+
+    # Create GIN index for metadata filtering
+    op.create_index(
+        "ix_chunk_metadata_gin",
+        "document_chunk",
+        ["metadata"],
+        unique=False,
+        postgresql_using="gin",
+    )
+
+
+def downgrade() -> None:
+    """Revert migration - drop document_source and document_chunk tables."""
+    # Drop document_chunk indexes and table
+    op.drop_index("ix_chunk_metadata_gin", table_name="document_chunk")
+    op.drop_index("ix_chunk_embedding_hnsw", table_name="document_chunk")
+    op.drop_index(op.f("ix_document_chunk_source_id"), table_name="document_chunk")
+    op.drop_index(op.f("ix_document_chunk_chunk_id"), table_name="document_chunk")
+    op.drop_table("document_chunk")
+
+    # Drop document_source indexes and table
+    op.drop_index(op.f("ix_document_source_source_type"), table_name="document_source")
+    op.drop_index(op.f("ix_document_source_source_id"), table_name="document_source")
+    op.drop_table("document_source")
+
+    # Note: We don't drop the vector extension as it might be used by other tables
diff --git a/alembic/versions/c5d9e1f2g345_rag_dynamic_embedding_dimension.py b/alembic/versions/c5d9e1f2g345_rag_dynamic_embedding_dimension.py
new file mode 100644
index 00000000..33d046b1
--- /dev/null
+++ b/alembic/versions/c5d9e1f2g345_rag_dynamic_embedding_dimension.py
@@ -0,0 +1,75 @@
+"""rag_dynamic_embedding_dimension
+
+Revision ID: c5d9e1f2g345
+Revises: b4c8d9e0f123
+Create Date: 2026-02-01 12:49:28.000000
+
+CRITICAL: This migration alters the embedding column dimension.
+If changing from 1536 to a different dimension, existing embeddings
+will be incompatible and re-indexing is required.
+"""
+
+from __future__ import annotations
+
+import os
+from collections.abc import Sequence
+
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision: str = "c5d9e1f2g345"
+down_revision: str | None = "b4c8d9e0f123"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def upgrade() -> None:
+    """Apply migration - alter embedding column to configurable dimension.
+
+    Reads RAG_EMBEDDING_DIMENSION from environment (default: 1536).
+    WARNING: Changing dimension requires re-indexing all documents.
+    """
+    # Get dimension from environment or use default
+    dimension = int(os.environ.get("RAG_EMBEDDING_DIMENSION", "1536"))
+
+    # Drop the HNSW index first (required before altering column type)
+    op.drop_index("ix_chunk_embedding_hnsw", table_name="document_chunk")
+
+    # Alter the embedding column type with new dimension
+    # Note: This will invalidate any existing embeddings if dimension changes
+    op.execute(f"ALTER TABLE document_chunk ALTER COLUMN embedding TYPE vector({dimension})")
+
+    # Recreate the HNSW index with the new dimension
+    op.create_index(
+        "ix_chunk_embedding_hnsw",
+        "document_chunk",
+        ["embedding"],
+        unique=False,
+        postgresql_using="hnsw",
+        postgresql_with={"m": 16, "ef_construction": 64},
+        postgresql_ops={"embedding": "vector_cosine_ops"},
+    )
+
+
+def downgrade() -> None:
+    """Revert migration - restore embedding column to 1536 dimensions.
+
+    WARNING: This will invalidate any embeddings that were generated
+    with a different dimension.
+    """
+    # Drop the HNSW index
+    op.drop_index("ix_chunk_embedding_hnsw", table_name="document_chunk")
+
+    # Restore to original 1536 dimension
+    op.execute("ALTER TABLE document_chunk ALTER COLUMN embedding TYPE vector(1536)")
+
+    # Recreate the HNSW index
+    op.create_index(
+        "ix_chunk_embedding_hnsw",
+        "document_chunk",
+        ["embedding"],
+        unique=False,
+        postgresql_using="hnsw",
+        postgresql_with={"m": 16, "ef_construction": 64},
+        postgresql_ops={"embedding": "vector_cosine_ops"},
+    )
diff --git a/app/core/config.py b/app/core/config.py
index 46d5c9c9..ba912fa8 100644
--- a/app/core/config.py
+++ b/app/core/config.py
@@ -64,6 +64,32 @@ class Settings(BaseSettings):
     # Jobs
     jobs_retention_days: int = 30
 
+    # RAG Embedding Configuration
+    rag_embedding_provider: Literal["openai", "ollama"] = "openai"
+    openai_api_key: str = ""
+    rag_embedding_model: str = "text-embedding-3-small"
+    rag_embedding_dimension: int = 1536
+    rag_embedding_batch_size: int = 100
+
+    # Ollama Configuration (when rag_embedding_provider = "ollama")
+    ollama_base_url: str = "http://localhost:11434"
+    ollama_embedding_model: str = "nomic-embed-text"
+
+    # RAG Chunking Configuration
+    rag_chunk_size: int = 512  # tokens
+    rag_chunk_overlap: int = 50  # tokens
+    rag_min_chunk_size: int = 100  # minimum tokens per chunk
+
+    # RAG Retrieval Configuration
+    rag_top_k: int = 5
+    rag_similarity_threshold: float = 0.7
+    rag_max_context_tokens: int = 4000
+
+    # RAG Index Configuration
+    rag_index_type: Literal["hnsw", "ivfflat"] = "hnsw"
+    rag_hnsw_m: int = 16
+    rag_hnsw_ef_construction: int = 64
+
     @property
     def is_development(self) -> bool:
         """Check if running in development mode."""
diff --git a/app/features/rag/__init__.py b/app/features/rag/__init__.py
new file mode 100644
index 00000000..918ac064
--- /dev/null
+++ b/app/features/rag/__init__.py
@@ -0,0 +1,5 @@
+"""RAG (Retrieval-Augmented Generation) knowledge base feature."""
+
+from app.features.rag.routes import router
+
+__all__ = ["router"]
diff --git a/app/features/rag/chunkers.py b/app/features/rag/chunkers.py
new file mode 100644
index 00000000..15c0ecfd
--- /dev/null
+++ b/app/features/rag/chunkers.py
@@ -0,0 +1,650 @@
+"""Document chunking strategies for RAG indexing.
+
+Provides heading-aware and content-aware chunking:
+- MarkdownChunker: Splits on heading boundaries
+- OpenAPIChunker: One chunk per endpoint
+
+CRITICAL: Uses tiktoken for accurate token counting.
+"""
+
+from __future__ import annotations
+
+import json
+import re
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from typing import Any
+
+import tiktoken
+
+from app.core.config import get_settings
+
+
+@dataclass
+class ChunkData:
+    """Represents a single chunk of document content.
+
+    Args:
+        content: The text content of the chunk.
+        index: Position of this chunk in the source document.
+        token_count: Number of tokens in the content.
+        metadata: Additional context (heading, section_path, etc.).
+    """
+
+    content: str
+    index: int
+    token_count: int
+    metadata: dict[str, Any] = field(default_factory=lambda: {})
+
+
+class BaseChunker(ABC):
+    """Abstract base class for document chunkers.
+
+    All chunkers must:
+    - Use tiktoken for token counting (cl100k_base encoding)
+    - Respect chunk_size and chunk_overlap settings
+    - Never exceed 8191 tokens per chunk (OpenAI limit)
+    """
+
+    MAX_TOKENS_PER_CHUNK = 8191  # OpenAI embedding input limit
+
+    def __init__(self) -> None:
+        """Initialize chunker with settings and tokenizer."""
+        self.settings = get_settings()
+        self.chunk_size = self.settings.rag_chunk_size
+        self.chunk_overlap = self.settings.rag_chunk_overlap
+        self.min_chunk_size = self.settings.rag_min_chunk_size
+        self._encoder = tiktoken.get_encoding("cl100k_base")
+
+    def count_tokens(self, text: str) -> int:
+        """Count tokens in text using tiktoken.
+
+        Args:
+            text: Text to count tokens for.
+
+        Returns:
+            Number of tokens.
+        """
+        return len(self._encoder.encode(text))
+
+    def _truncate_to_tokens(self, text: str, max_tokens: int) -> str:
+        """Truncate text to a maximum number of tokens.
+
+        Args:
+            text: Text to truncate.
+            max_tokens: Maximum number of tokens.
+
+        Returns:
+            Truncated text.
+        """
+        tokens = self._encoder.encode(text)
+        if len(tokens) <= max_tokens:
+            return text
+        return self._encoder.decode(tokens[:max_tokens])
+
+    @abstractmethod
+    def chunk(self, content: str) -> list[ChunkData]:
+        """Split content into chunks.
+
+        Args:
+            content: Full document content.
+
+        Returns:
+            List of ChunkData objects.
+        """
+        pass
+
+
+class MarkdownChunker(BaseChunker):
+    """Chunks markdown documents by heading boundaries.
+
+    Splits content at heading boundaries (# ## ### etc.) while:
+    - Respecting chunk_size limits
+    - Including heading hierarchy in metadata
+    - Preserving context through overlap
+    """
+
+    # Regex to match markdown headings
+    HEADING_PATTERN = re.compile(r"^(#{1,6})\s+(.+)$", re.MULTILINE)
+
+    def chunk(self, content: str) -> list[ChunkData]:
+        """Split markdown content into heading-aware chunks.
+
+        Args:
+            content: Markdown document content.
+
+        Returns:
+            List of ChunkData with heading metadata.
+        """
+        chunks: list[ChunkData] = []
+        sections = self._split_by_headings(content)
+
+        current_chunk = ""
+        current_heading_path: list[str] = []
+        chunk_index = 0
+
+        for section in sections:
+            section_content = section["content"]
+            heading = section.get("heading")
+            level = section.get("level", 0)
+
+            # Update heading path based on level
+            if heading:
+                current_heading_path = self._update_heading_path(
+                    current_heading_path, heading, level
+                )
+
+            section_tokens = self.count_tokens(section_content)
+
+            # If section alone exceeds chunk size, split it further
+            if section_tokens > self.chunk_size:
+                # Flush current chunk if any
+                if current_chunk.strip():
+                    chunks.append(
+                        self._create_chunk(
+                            current_chunk.strip(), chunk_index, current_heading_path.copy()
+                        )
+                    )
+                    chunk_index += 1
+                    current_chunk = ""
+
+                # Split large section into smaller chunks
+                sub_chunks = self._split_large_section(section_content, current_heading_path.copy())
+                for sub_chunk in sub_chunks:
+                    sub_chunk.index = chunk_index
+                    chunks.append(sub_chunk)
+                    chunk_index += 1
+                continue
+
+            # Check if adding this section exceeds chunk size
+            combined = current_chunk + section_content
+            combined_tokens = self.count_tokens(combined)
+
+            if combined_tokens > self.chunk_size:
+                # Save current chunk and start new one
+                if current_chunk.strip():
+                    chunks.append(
+                        self._create_chunk(
+                            current_chunk.strip(), chunk_index, current_heading_path.copy()
+                        )
+                    )
+                    chunk_index += 1
+
+                # Add overlap from previous chunk
+                overlap_text = self._get_overlap_text(current_chunk)
+                current_chunk = overlap_text + section_content
+            else:
+                current_chunk = combined
+
+        # Don't forget the last chunk
+        # Include it even if small when it's the only content
+        if current_chunk.strip():
+            token_count = self.count_tokens(current_chunk.strip())
+            # Include small chunks if: we have no other chunks OR it meets min size
+            if len(chunks) == 0 or token_count >= self.min_chunk_size:
+                chunks.append(
+                    self._create_chunk(
+                        current_chunk.strip(), chunk_index, current_heading_path.copy()
+                    )
+                )
+
+        return chunks
+
+    def _split_by_headings(self, content: str) -> list[dict[str, Any]]:
+        """Split content at heading boundaries.
+
+        Args:
+            content: Markdown content.
+
+        Returns:
+            List of sections with heading info.
+        """
+        sections: list[dict[str, Any]] = []
+        lines = content.split("\n")
+        current_section: dict[str, Any] = {"content": "", "heading": None, "level": 0}
+
+        for line in lines:
+            match = self.HEADING_PATTERN.match(line)
+            if match:
+                # Save current section if it has content
+                if current_section["content"].strip():
+                    sections.append(current_section)
+
+                # Start new section with this heading
+                level = len(match.group(1))
+                heading = match.group(2).strip()
+                current_section = {
+                    "content": line + "\n",
+                    "heading": heading,
+                    "level": level,
+                }
+            else:
+                current_section["content"] += line + "\n"
+
+        # Add final section
+        if current_section["content"].strip():
+            sections.append(current_section)
+
+        return sections
+
+    def _update_heading_path(self, current_path: list[str], heading: str, level: int) -> list[str]:
+        """Update the heading path based on the new heading level.
+
+        Args:
+            current_path: Current list of headings.
+            heading: New heading text.
+            level: Heading level (1-6).
+
+        Returns:
+            Updated heading path.
+        """
+        # Truncate path to current level and add new heading
+        new_path = current_path[: level - 1]
+        new_path.append(heading)
+        return new_path
+
+    def _split_large_section(self, content: str, heading_path: list[str]) -> list[ChunkData]:
+        """Split a large section into smaller chunks by sentences/paragraphs.
+
+        Args:
+            content: Section content that exceeds chunk size.
+            heading_path: Current heading hierarchy.
+
+        Returns:
+            List of smaller chunks.
+        """
+        chunks: list[ChunkData] = []
+        paragraphs = content.split("\n\n")
+        current_chunk = ""
+
+        for para in paragraphs:
+            para = para.strip()
+            if not para:
+                continue
+
+            para_tokens = self.count_tokens(para)
+
+            # If single paragraph exceeds limit, split by sentences
+            if para_tokens > self.chunk_size:
+                if current_chunk.strip():
+                    chunks.append(self._create_chunk(current_chunk.strip(), 0, heading_path))
+                    current_chunk = ""
+
+                sentence_chunks = self._split_by_sentences(para, heading_path)
+                chunks.extend(sentence_chunks)
+                continue
+
+            combined = current_chunk + "\n\n" + para if current_chunk else para
+            combined_tokens = self.count_tokens(combined)
+
+            if combined_tokens > self.chunk_size:
+                if current_chunk.strip():
+                    chunks.append(self._create_chunk(current_chunk.strip(), 0, heading_path))
+                current_chunk = para
+            else:
+                current_chunk = combined
+
+        if current_chunk.strip():
+            chunks.append(self._create_chunk(current_chunk.strip(), 0, heading_path))
+
+        return chunks
+
+    def _split_by_sentences(self, text: str, heading_path: list[str]) -> list[ChunkData]:
+        """Split text by sentences when paragraphs are too large.
+
+        Args:
+            text: Text to split.
+            heading_path: Current heading hierarchy.
+
+        Returns:
+            List of sentence-based chunks.
+        """
+        chunks: list[ChunkData] = []
+        # Simple sentence splitting (handles . ? !)
+        sentences = re.split(r"(?<=[.!?])\s+", text)
+        current_chunk = ""
+
+        for sentence in sentences:
+            sentence = sentence.strip()
+            if not sentence:
+                continue
+
+            sentence_tokens = self.count_tokens(sentence)
+
+            # If single sentence exceeds limit, truncate it
+            if sentence_tokens > self.MAX_TOKENS_PER_CHUNK:
+                if current_chunk.strip():
+                    chunks.append(self._create_chunk(current_chunk.strip(), 0, heading_path))
+                    current_chunk = ""
+
+                truncated = self._truncate_to_tokens(sentence, self.MAX_TOKENS_PER_CHUNK)
+                chunks.append(self._create_chunk(truncated, 0, heading_path))
+                continue
+
+            combined = current_chunk + " " + sentence if current_chunk else sentence
+            combined_tokens = self.count_tokens(combined)
+
+            if combined_tokens > self.chunk_size:
+                if current_chunk.strip():
+                    chunks.append(self._create_chunk(current_chunk.strip(), 0, heading_path))
+                current_chunk = sentence
+            else:
+                current_chunk = combined
+
+        if current_chunk.strip():
+            chunks.append(self._create_chunk(current_chunk.strip(), 0, heading_path))
+
+        return chunks
+
+    def _get_overlap_text(self, text: str) -> str:
+        """Get the last N tokens of text for overlap.
+
+        Args:
+            text: Text to get overlap from.
+
+        Returns:
+            Overlap text.
+        """
+        if not text or self.chunk_overlap <= 0:
+            return ""
+
+        tokens = self._encoder.encode(text)
+        if len(tokens) <= self.chunk_overlap:
+            return text
+
+        overlap_tokens = tokens[-self.chunk_overlap :]
+        return self._encoder.decode(overlap_tokens)
+
+    def _create_chunk(self, content: str, index: int, heading_path: list[str]) -> ChunkData:
+        """Create a ChunkData object with metadata.
+
+        Args:
+            content: Chunk content.
+            index: Chunk index.
+            heading_path: Heading hierarchy.
+
+        Returns:
+            ChunkData instance.
+        """
+        token_count = self.count_tokens(content)
+        metadata: dict[str, Any] = {}
+
+        if heading_path:
+            metadata["heading"] = heading_path[-1]
+            metadata["section_path"] = heading_path
+
+        return ChunkData(
+            content=content,
+            index=index,
+            token_count=token_count,
+            metadata=metadata,
+        )
+
+
+class OpenAPIChunker(BaseChunker):
+    """Chunks OpenAPI specifications by endpoint.
+
+    Creates one chunk per endpoint containing:
+    - Path and method
+    - Operation summary and description
+    - Parameters and request body schema
+    - Response schemas
+    """
+
+    def chunk(self, content: str) -> list[ChunkData]:
+        """Split OpenAPI spec into endpoint-based chunks.
+
+        Args:
+            content: OpenAPI JSON/YAML content.
+
+        Returns:
+            List of ChunkData, one per endpoint.
+        """
+        chunks: list[ChunkData] = []
+
+        spec_data: dict[str, Any]
+        try:
+            spec_data = json.loads(content)
+        except json.JSONDecodeError:
+            # Try YAML if JSON fails
+            try:
+                import yaml  # type: ignore[import-untyped]
+
+                parsed = yaml.safe_load(content)
+                # yaml.safe_load can return non-dict for simple strings
+                if not isinstance(parsed, dict):
+                    return MarkdownChunker().chunk(content)
+                spec_data = parsed  # pyright: ignore[reportUnknownVariableType]
+            except Exception:
+                # Fall back to treating as markdown
+                return MarkdownChunker().chunk(content)
+
+        paths: dict[str, Any] = spec_data.get("paths", {})
+        chunk_index = 0
+
+        # Also include info section as first chunk
+        info: dict[str, Any] = spec_data.get("info", {})
+        if info:
+            servers: list[dict[str, Any]] = spec_data.get("servers", [])
+            info_chunk = self._create_info_chunk(info, servers)
+            info_chunk.index = chunk_index
+            chunks.append(info_chunk)
+            chunk_index += 1
+
+        # Create chunk for each endpoint
+        for path_key, methods in paths.items():
+            path: str = str(path_key)
+            if not isinstance(methods, dict):
+                continue
+
+            methods_dict: dict[str, Any] = dict(methods)  # pyright: ignore[reportUnknownArgumentType]
+            for method_name, operation in methods_dict.items():
+                if method_name.startswith("x-") or not isinstance(operation, dict):
+                    continue
+
+                operation_dict: dict[str, Any] = dict(operation)  # pyright: ignore[reportUnknownArgumentType]
+                chunk = self._create_endpoint_chunk(path, method_name, operation_dict, spec_data)
+                chunk.index = chunk_index
+                chunks.append(chunk)
+                chunk_index += 1
+
+        return chunks
+
+    def _create_info_chunk(self, info: dict[str, Any], servers: list[dict[str, Any]]) -> ChunkData:
+        """Create a chunk for API info section.
+
+        Args:
+            info: OpenAPI info object.
+            servers: OpenAPI servers array.
+
+        Returns:
+            ChunkData for API overview.
+        """
+        parts: list[str] = []
+        title = info.get("title", "API")
+        version = info.get("version", "")
+
+        parts.append(f"# {title}")
+        if version:
+            parts.append(f"Version: {version}")
+        if info.get("description"):
+            parts.append(f"\n{info['description']}")
+        if servers:
+            parts.append("\n## Servers")
+            for server in servers:
+                url = server.get("url", "")
+                desc = server.get("description", "")
+                parts.append(f"- {url}" + (f" ({desc})" if desc else ""))
+
+        content = "\n".join(parts)
+        return ChunkData(
+            content=content,
+            index=0,
+            token_count=self.count_tokens(content),
+            metadata={"type": "api_info", "title": title},
+        )
+
+    def _create_endpoint_chunk(
+        self,
+        path: str,
+        method: str,
+        operation: dict[str, Any],
+        spec: dict[str, Any],
+    ) -> ChunkData:
+        """Create a chunk for a single API endpoint.
+
+        Args:
+            path: Endpoint path.
+            method: HTTP method.
+            operation: OpenAPI operation object.
+            spec: Full OpenAPI spec (for dereferencing).
+
+        Returns:
+            ChunkData for the endpoint.
+        """
+        parts: list[str] = []
+
+        # Endpoint header
+        operation_id = operation.get("operationId", f"{method}_{path}")
+        summary = operation.get("summary", "")
+        parts.append(f"## {method.upper()} {path}")
+        if summary:
+            parts.append(f"**{summary}**")
+
+        # Description
+        if operation.get("description"):
+            parts.append(f"\n{operation['description']}")
+
+        # Tags
+        tags = operation.get("tags", [])
+        if tags:
+            parts.append(f"\nTags: {', '.join(tags)}")
+
+        # Parameters
+        params = operation.get("parameters", [])
+        if params:
+            parts.append("\n### Parameters")
+            for param in params:
+                name = param.get("name", "")
+                location = param.get("in", "")
+                required = param.get("required", False)
+                desc = param.get("description", "")
+                req_str = " (required)" if required else ""
+                parts.append(f"- `{name}` ({location}){req_str}: {desc}")
+
+        # Request body
+        request_body = operation.get("requestBody", {})
+        if request_body:
+            parts.append("\n### Request Body")
+            content_types = request_body.get("content", {})
+            for ct, schema_info in content_types.items():
+                parts.append(f"Content-Type: {ct}")
+                if "schema" in schema_info:
+                    schema_str = self._format_schema(schema_info["schema"], spec)
+                    parts.append(f"```json\n{schema_str}\n```")
+
+        # Responses
+        responses = operation.get("responses", {})
+        if responses:
+            parts.append("\n### Responses")
+            for status, response in responses.items():
+                desc = response.get("description", "")
+                parts.append(f"- **{status}**: {desc}")
+
+        content = "\n".join(parts)
+
+        # Ensure we don't exceed token limit
+        token_count = self.count_tokens(content)
+        if token_count > self.MAX_TOKENS_PER_CHUNK:
+            content = self._truncate_to_tokens(content, self.MAX_TOKENS_PER_CHUNK)
+            token_count = self.count_tokens(content)
+
+        return ChunkData(
+            content=content,
+            index=0,
+            token_count=token_count,
+            metadata={
+                "type": "endpoint",
+                "path": path,
+                "method": method.upper(),
+                "operation_id": operation_id,
+                "tags": tags,
+            },
+        )
+
+    def _format_schema(self, schema: dict[str, Any], spec: dict[str, Any], depth: int = 0) -> str:
+        """Format a JSON schema for display.
+
+        Args:
+            schema: JSON schema object.
+            spec: Full OpenAPI spec (for $ref resolution).
+            depth: Current recursion depth.
+
+        Returns:
+            Formatted schema string.
+        """
+        if depth > 3:  # Prevent deep recursion
+            return "{...}"
+
+        # Handle $ref
+        if "$ref" in schema:
+            ref = schema["$ref"]
+            resolved = self._resolve_ref(ref, spec)
+            if resolved:
+                return self._format_schema(resolved, spec, depth + 1)
+            return f'{{"$ref": "{ref}"}}'
+
+        # Simple formatting
+        try:
+            return json.dumps(schema, indent=2)[:500]  # Limit size
+        except (TypeError, ValueError):
+            return str(schema)[:500]
+
+    def _resolve_ref(self, ref: str, spec: dict[str, Any]) -> dict[str, Any] | None:
+        """Resolve a $ref pointer in the OpenAPI spec.
+
+        Args:
+            ref: Reference string (e.g., "#/components/schemas/User").
+            spec: Full OpenAPI spec.
+
+        Returns:
+            Resolved schema or None.
+        """
+        if not ref.startswith("#/"):
+            return None
+
+        parts = ref[2:].split("/")
+        current: Any = spec
+
+        for part in parts:
+            if isinstance(current, dict) and part in current:
+                current = current[part]  # pyright: ignore[reportUnknownVariableType]
+            else:
+                return None
+
+        if isinstance(current, dict):
+            return dict(current)  # pyright: ignore[reportUnknownArgumentType]
+        return None
+
+
+def get_chunker(source_type: str) -> BaseChunker:
+    """Factory function to get the appropriate chunker.
+
+    Args:
+        source_type: Type of source (markdown, openapi).
+
+    Returns:
+        Appropriate chunker instance.
+
+    Raises:
+        ValueError: If source_type is not supported.
+    """
+    chunkers = {
+        "markdown": MarkdownChunker,
+        "openapi": OpenAPIChunker,
+    }
+
+    if source_type not in chunkers:
+        raise ValueError(f"Unsupported source type: {source_type}")
+
+    return chunkers[source_type]()
diff --git a/app/features/rag/embeddings.py b/app/features/rag/embeddings.py
new file mode 100644
index 00000000..69e4d42b
--- /dev/null
+++ b/app/features/rag/embeddings.py
@@ -0,0 +1,534 @@
+"""Embedding providers for RAG knowledge base.
+
+Provides async embedding generation with multiple backends:
+- OpenAI API (default): Batch processing with rate limit handling
+- Ollama: Local/LAN embedding generation via HTTP API
+
+CRITICAL: Provider selection via RAG_EMBEDDING_PROVIDER config.
+"""
+
+from __future__ import annotations
+
+import asyncio
+from abc import ABC, abstractmethod
+from typing import TYPE_CHECKING
+
+import httpx
+import structlog
+import tiktoken
+from openai import AsyncOpenAI, RateLimitError
+
+from app.core.config import get_settings
+
+if TYPE_CHECKING:
+    pass
+
+logger = structlog.get_logger()
+
+
+class EmbeddingError(Exception):
+    """Error during embedding generation."""
+
+    pass
+
+
+class EmbeddingProvider(ABC):
+    """Abstract base class for embedding providers.
+
+    Defines the interface for generating text embeddings.
+    All providers must implement embed_texts, embed_query, and dimension.
+    """
+
+    @abstractmethod
+    async def embed_texts(self, texts: list[str]) -> list[list[float]]:
+        """Generate embeddings for multiple texts.
+
+        Args:
+            texts: List of texts to embed.
+
+        Returns:
+            List of embedding vectors in same order as input texts.
+
+        Raises:
+            EmbeddingError: If embedding generation fails.
+        """
+        ...
+
+    @abstractmethod
+    async def embed_query(self, query: str) -> list[float]:
+        """Generate embedding for a single query.
+
+        Args:
+            query: Query text to embed.
+
+        Returns:
+            Embedding vector.
+
+        Raises:
+            EmbeddingError: If embedding generation fails.
+        """
+        ...
+
+    @property
+    @abstractmethod
+    def dimension(self) -> int:
+        """Return the embedding dimension for this provider.
+
+        Returns:
+            Embedding dimension (e.g., 1536 for OpenAI, 768 for nomic-embed-text).
+        """
+        ...
+
+
+class OpenAIEmbeddingProvider(EmbeddingProvider):
+    """Embedding provider using OpenAI API.
+
+    Handles:
+    - Async batch embedding generation
+    - Rate limit handling with exponential backoff
+    - Token counting and validation
+    - Cost tracking via logging
+
+    CRITICAL: OpenAI embedding input limit is 8192 tokens per text.
+    """
+
+    MAX_TOKENS_PER_INPUT = 8191  # OpenAI limit
+    MAX_INPUTS_PER_BATCH = 2048  # OpenAI batch limit
+
+    def __init__(self) -> None:
+        """Initialize OpenAI embedding provider."""
+        self.settings = get_settings()
+        self._encoder = tiktoken.get_encoding("cl100k_base")
+        self._client: AsyncOpenAI | None = None
+
+    def _get_client(self) -> AsyncOpenAI:
+        """Get or create the async OpenAI client.
+
+        Returns:
+            AsyncOpenAI client instance.
+
+        Raises:
+            EmbeddingError: If OpenAI API key is not configured.
+        """
+        if self._client is None:
+            if not self.settings.openai_api_key:
+                raise EmbeddingError(
+                    "OpenAI API key not configured. Set OPENAI_API_KEY environment variable."
+                )
+            self._client = AsyncOpenAI(api_key=self.settings.openai_api_key)
+        return self._client
+
+    @property
+    def dimension(self) -> int:
+        """Return configured embedding dimension.
+
+        Returns:
+            Embedding dimension from settings.
+        """
+        return self.settings.rag_embedding_dimension
+
+    def count_tokens(self, text: str) -> int:
+        """Count tokens in text using tiktoken.
+
+        Args:
+            text: Text to count tokens for.
+
+        Returns:
+            Number of tokens.
+        """
+        return len(self._encoder.encode(text))
+
+    def truncate_to_tokens(self, text: str, max_tokens: int) -> str:
+        """Truncate text to a maximum number of tokens.
+
+        Args:
+            text: Text to truncate.
+            max_tokens: Maximum number of tokens.
+
+        Returns:
+            Truncated text.
+        """
+        tokens = self._encoder.encode(text)
+        if len(tokens) <= max_tokens:
+            return text
+        return self._encoder.decode(tokens[:max_tokens])
+
+    async def embed_texts(
+        self,
+        texts: list[str],
+        max_retries: int = 3,
+        retry_delay: float = 1.0,
+    ) -> list[list[float]]:
+        """Generate embeddings for multiple texts.
+
+        Processes texts in batches according to settings and OpenAI limits.
+        Handles rate limits with exponential backoff.
+
+        Args:
+            texts: List of texts to embed.
+            max_retries: Maximum retry attempts per batch.
+            retry_delay: Initial delay between retries (doubles each retry).
+
+        Returns:
+            List of embeddings in same order as input texts.
+
+        Raises:
+            EmbeddingError: If embedding generation fails after retries.
+        """
+        if not texts:
+            return []
+
+        client = self._get_client()
+        batch_size = min(self.settings.rag_embedding_batch_size, self.MAX_INPUTS_PER_BATCH)
+
+        # Validate and truncate texts if needed
+        validated_texts: list[str] = []
+        total_tokens = 0
+
+        for text in texts:
+            token_count = self.count_tokens(text)
+            if token_count > self.MAX_TOKENS_PER_INPUT:
+                text = self.truncate_to_tokens(text, self.MAX_TOKENS_PER_INPUT)
+                token_count = self.count_tokens(text)
+                logger.warning(
+                    "rag.embedding_text_truncated",
+                    original_tokens=self.count_tokens(text),
+                    truncated_to=self.MAX_TOKENS_PER_INPUT,
+                )
+            validated_texts.append(text)
+            total_tokens += token_count
+
+        embeddings: list[list[float]] = []
+
+        # Process in batches
+        for i in range(0, len(validated_texts), batch_size):
+            batch = validated_texts[i : i + batch_size]
+            batch_embeddings = await self._embed_batch(client, batch, max_retries, retry_delay)
+            embeddings.extend(batch_embeddings)
+
+        logger.info(
+            "rag.embeddings_generated",
+            text_count=len(texts),
+            total_tokens=total_tokens,
+            model=self.settings.rag_embedding_model,
+            provider="openai",
+        )
+
+        return embeddings
+
+    async def embed_query(self, query: str) -> list[float]:
+        """Generate embedding for a single query.
+
+        Optimized for single query embedding (no batching overhead).
+
+        Args:
+            query: Query text to embed.
+
+        Returns:
+            Embedding vector.
+
+        Raises:
+            EmbeddingError: If embedding generation fails.
+        """
+        embeddings = await self.embed_texts([query])
+        return embeddings[0]
+
+    async def _embed_batch(
+        self,
+        client: AsyncOpenAI,
+        texts: list[str],
+        max_retries: int,
+        retry_delay: float,
+    ) -> list[list[float]]:
+        """Embed a single batch of texts with retry logic.
+
+        Args:
+            client: OpenAI async client.
+            texts: Batch of texts to embed.
+            max_retries: Maximum retry attempts.
+            retry_delay: Initial delay between retries.
+
+        Returns:
+            List of embeddings.
+
+        Raises:
+            EmbeddingError: If all retries fail.
+        """
+        last_error: Exception | None = None
+
+        for attempt in range(max_retries + 1):
+            try:
+                response = await client.embeddings.create(
+                    model=self.settings.rag_embedding_model,
+                    input=texts,
+                    dimensions=self.settings.rag_embedding_dimension,
+                )
+
+                # Extract embeddings in order
+                embeddings = [item.embedding for item in response.data]
+
+                # Log token usage
+                if response.usage:
+                    logger.debug(
+                        "rag.embedding_batch_completed",
+                        batch_size=len(texts),
+                        prompt_tokens=response.usage.prompt_tokens,
+                        total_tokens=response.usage.total_tokens,
+                    )
+
+                return embeddings
+
+            except RateLimitError as e:
+                last_error = e
+                if attempt < max_retries:
+                    wait_time = retry_delay * (2**attempt)
+                    logger.warning(
+                        "rag.embedding_rate_limit",
+                        attempt=attempt + 1,
+                        max_retries=max_retries,
+                        wait_seconds=wait_time,
+                    )
+                    await asyncio.sleep(wait_time)
+                continue
+
+            except Exception as e:
+                last_error = e
+                logger.error(
+                    "rag.embedding_error",
+                    error=str(e),
+                    error_type=type(e).__name__,
+                    batch_size=len(texts),
+                )
+                raise EmbeddingError(f"Failed to generate embeddings: {e}") from e
+
+        raise EmbeddingError(
+            f"Failed to generate embeddings after {max_retries} retries: {last_error}"
+        )
+
+
+class OllamaEmbeddingProvider(EmbeddingProvider):
+    """Embedding provider using Ollama's OpenAI-compatible API.
+
+    Provides local/LAN-based embedding generation without OpenAI dependency.
+    Uses the /v1/embeddings endpoint (OpenAI-compatible) which supports
+    the `dimensions` parameter for output dimension control.
+
+    CRITICAL: Requires Ollama server running with an embedding model pulled.
+    """
+
+    def __init__(self) -> None:
+        """Initialize Ollama embedding provider."""
+        self.settings = get_settings()
+        self._client: httpx.AsyncClient | None = None
+
+    def _get_client(self) -> httpx.AsyncClient:
+        """Get or create the async HTTP client.
+
+        Returns:
+            httpx AsyncClient instance.
+        """
+        if self._client is None:
+            self._client = httpx.AsyncClient(
+                base_url=self.settings.ollama_base_url,
+                timeout=httpx.Timeout(60.0, connect=10.0),
+            )
+        return self._client
+
+    @property
+    def dimension(self) -> int:
+        """Return configured embedding dimension.
+
+        Returns:
+            Embedding dimension from settings.
+        """
+        return self.settings.rag_embedding_dimension
+
+    async def embed_texts(
+        self,
+        texts: list[str],
+        max_retries: int = 3,
+        retry_delay: float = 1.0,
+    ) -> list[list[float]]:
+        """Generate embeddings for multiple texts via Ollama's OpenAI-compatible API.
+
+        Uses /v1/embeddings endpoint which supports the `dimensions` parameter
+        to control output embedding size.
+
+        Args:
+            texts: List of texts to embed.
+            max_retries: Maximum retry attempts.
+            retry_delay: Initial delay between retries (doubles each retry).
+
+        Returns:
+            List of embeddings in same order as input texts.
+
+        Raises:
+            EmbeddingError: If embedding generation fails.
+        """
+        if not texts:
+            return []
+
+        client = self._get_client()
+        last_error: Exception | None = None
+
+        for attempt in range(max_retries + 1):
+            try:
+                # Use OpenAI-compatible endpoint with dimensions parameter
+                response = await client.post(
+                    "/v1/embeddings",
+                    json={
+                        "model": self.settings.ollama_embedding_model,
+                        "input": texts,
+                        "dimensions": self.settings.rag_embedding_dimension,
+                    },
+                )
+                response.raise_for_status()
+
+                data = response.json()
+
+                # OpenAI-compatible response format: {"data": [{"embedding": [...], "index": 0}, ...]}
+                embedding_data = data.get("data", [])
+
+                if len(embedding_data) != len(texts):
+                    raise EmbeddingError(
+                        f"Embedding count mismatch: expected {len(texts)}, got {len(embedding_data)}"
+                    )
+
+                # Sort by index to ensure correct order and extract embeddings
+                sorted_data = sorted(embedding_data, key=lambda x: x.get("index", 0))
+                embeddings: list[list[float]] = [item["embedding"] for item in sorted_data]
+
+                logger.info(
+                    "rag.embeddings_generated",
+                    text_count=len(texts),
+                    model=self.settings.ollama_embedding_model,
+                    dimension=self.settings.rag_embedding_dimension,
+                    provider="ollama",
+                )
+
+                return embeddings
+
+            except httpx.HTTPStatusError as e:
+                last_error = e
+                if e.response.status_code == 404:
+                    # Model not found - don't retry
+                    raise EmbeddingError(
+                        f"Ollama model '{self.settings.ollama_embedding_model}' not found. "
+                        f"Run: ollama pull {self.settings.ollama_embedding_model}"
+                    ) from e
+                if e.response.status_code >= 500 and attempt < max_retries:
+                    # Server error - retry
+                    wait_time = retry_delay * (2**attempt)
+                    logger.warning(
+                        "rag.ollama_server_error",
+                        attempt=attempt + 1,
+                        max_retries=max_retries,
+                        wait_seconds=wait_time,
+                        status_code=e.response.status_code,
+                    )
+                    await asyncio.sleep(wait_time)
+                    continue
+                logger.error(
+                    "rag.embedding_error",
+                    error=str(e),
+                    error_type=type(e).__name__,
+                    status_code=e.response.status_code,
+                )
+                raise EmbeddingError(f"Ollama API error: {e}") from e
+
+            except httpx.ConnectError as e:
+                last_error = e
+                logger.error(
+                    "rag.ollama_connection_error",
+                    error=str(e),
+                    base_url=self.settings.ollama_base_url,
+                )
+                raise EmbeddingError(
+                    f"Failed to connect to Ollama at {self.settings.ollama_base_url}. "
+                    "Ensure Ollama is running."
+                ) from e
+
+            except Exception as e:
+                last_error = e
+                logger.error(
+                    "rag.embedding_error",
+                    error=str(e),
+                    error_type=type(e).__name__,
+                )
+                raise EmbeddingError(f"Failed to generate embeddings: {e}") from e
+
+        raise EmbeddingError(
+            f"Failed to generate embeddings after {max_retries} retries: {last_error}"
+        )
+
+    async def embed_query(self, query: str) -> list[float]:
+        """Generate embedding for a single query.
+
+        Args:
+            query: Query text to embed.
+
+        Returns:
+            Embedding vector.
+
+        Raises:
+            EmbeddingError: If embedding generation fails.
+        """
+        embeddings = await self.embed_texts([query])
+        return embeddings[0]
+
+    async def close(self) -> None:
+        """Close the HTTP client.
+
+        Should be called when done using the provider.
+        """
+        if self._client is not None:
+            await self._client.aclose()
+            self._client = None
+
+
+# Legacy alias for backwards compatibility
+EmbeddingService = OpenAIEmbeddingProvider
+
+
+# Singleton instances for dependency injection
+_embedding_provider: EmbeddingProvider | None = None
+
+
+def get_embedding_service() -> EmbeddingProvider:
+    """Get singleton embedding provider instance.
+
+    Returns provider based on RAG_EMBEDDING_PROVIDER config:
+    - "openai": OpenAI API (default)
+    - "ollama": Local Ollama server
+
+    Returns:
+        EmbeddingProvider instance.
+    """
+    global _embedding_provider
+    if _embedding_provider is None:
+        settings = get_settings()
+        if settings.rag_embedding_provider == "ollama":
+            _embedding_provider = OllamaEmbeddingProvider()
+            logger.info(
+                "rag.embedding_provider_initialized",
+                provider="ollama",
+                base_url=settings.ollama_base_url,
+                model=settings.ollama_embedding_model,
+            )
+        else:
+            _embedding_provider = OpenAIEmbeddingProvider()
+            logger.info(
+                "rag.embedding_provider_initialized",
+                provider="openai",
+                model=settings.rag_embedding_model,
+            )
+    return _embedding_provider
+
+
+def reset_embedding_service() -> None:
+    """Reset the singleton embedding provider.
+
+    Useful for testing or reconfiguration.
+    """
+    global _embedding_provider
+    _embedding_provider = None
diff --git a/app/features/rag/models.py b/app/features/rag/models.py
new file mode 100644
index 00000000..ba185b88
--- /dev/null
+++ b/app/features/rag/models.py
@@ -0,0 +1,115 @@
+"""RAG knowledge base ORM models.
+
+This module defines:
+- DocumentSource: Registry of indexed document sources
+- DocumentChunk: Indexed document chunks with embeddings
+
+CRITICAL: Uses PostgreSQL pgvector for embedding storage and similarity search.
+"""
+
+from __future__ import annotations
+
+from datetime import datetime
+from typing import TYPE_CHECKING, Any
+
+from pgvector.sqlalchemy import Vector  # type: ignore[import-untyped]
+from sqlalchemy import (
+    DateTime,
+    ForeignKey,
+    Index,
+    Integer,
+    String,
+    Text,
+    UniqueConstraint,
+)
+from sqlalchemy.dialects.postgresql import JSONB
+from sqlalchemy.orm import Mapped, mapped_column, relationship
+
+from app.core.database import Base
+from app.shared.models import TimestampMixin
+
+if TYPE_CHECKING:
+    pass
+
+
+class DocumentSource(TimestampMixin, Base):
+    """Registered document source for indexing.
+
+    CRITICAL: Tracks indexed sources with content hash for idempotent re-indexing.
+
+    Attributes:
+        id: Primary key.
+        source_id: Unique external identifier (UUID hex, 32 chars).
+        source_type: Type of source (markdown, openapi, run_report).
+        source_path: Path or identifier for the source.
+        content_hash: SHA-256 hash for change detection.
+        metadata_: Custom metadata as JSONB.
+        indexed_at: When the source was last indexed.
+        chunks: Related document chunks.
+    """
+
+    __tablename__ = "document_source"
+
+    id: Mapped[int] = mapped_column(Integer, primary_key=True)
+    source_id: Mapped[str] = mapped_column(String(32), unique=True, index=True)
+    source_type: Mapped[str] = mapped_column(String(50), index=True)
+    source_path: Mapped[str] = mapped_column(Text, nullable=False)
+    content_hash: Mapped[str] = mapped_column(String(64), nullable=False)
+    metadata_: Mapped[dict[str, Any] | None] = mapped_column("metadata", JSONB, nullable=True)
+    indexed_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False)
+
+    # Relationship to chunks
+    chunks: Mapped[list[DocumentChunk]] = relationship(
+        back_populates="source", cascade="all, delete-orphan"
+    )
+
+    __table_args__ = (UniqueConstraint("source_type", "source_path", name="uq_source_type_path"),)
+
+
+class DocumentChunk(TimestampMixin, Base):
+    """Indexed document chunk with embedding.
+
+    CRITICAL: Stores vector embeddings for semantic similarity search.
+
+    Attributes:
+        id: Primary key.
+        chunk_id: Unique external identifier (UUID hex, 32 chars).
+        source_id: Foreign key to parent source.
+        chunk_index: Position within the source document.
+        content: Chunk text content.
+        embedding: Vector embedding (1536 dimensions for text-embedding-3-small).
+        token_count: Number of tokens in the chunk.
+        metadata_: Heading hierarchy, section path, etc.
+        source: Related document source.
+    """
+
+    __tablename__ = "document_chunk"
+
+    id: Mapped[int] = mapped_column(Integer, primary_key=True)
+    chunk_id: Mapped[str] = mapped_column(String(32), unique=True, index=True)
+    source_id: Mapped[int] = mapped_column(
+        Integer, ForeignKey("document_source.id", ondelete="CASCADE"), index=True
+    )
+    chunk_index: Mapped[int] = mapped_column(Integer, nullable=False)
+    content: Mapped[str] = mapped_column(Text, nullable=False)
+    # Vector column for embeddings - dimension configurable via settings
+    embedding: Mapped[list[float] | None] = mapped_column(Vector(1536), nullable=True)
+    token_count: Mapped[int] = mapped_column(Integer, nullable=False)
+    metadata_: Mapped[dict[str, Any] | None] = mapped_column("metadata", JSONB, nullable=True)
+
+    # Relationship to source
+    source: Mapped[DocumentSource] = relationship(back_populates="chunks")
+
+    __table_args__ = (
+        UniqueConstraint("source_id", "chunk_index", name="uq_source_chunk_index"),
+        # HNSW index for cosine similarity search
+        Index(
+            "ix_chunk_embedding_hnsw",
+            "embedding",
+            postgresql_using="hnsw",
+            postgresql_with={"m": 16, "ef_construction": 64},
+            postgresql_ops={"embedding": "vector_cosine_ops"},
+        ),
+        # GIN index for metadata filtering
+        Index("ix_chunk_metadata_gin", "metadata", postgresql_using="gin"),
+    )
diff --git a/app/features/rag/routes.py b/app/features/rag/routes.py
new file mode 100644
index 00000000..403edd37
--- /dev/null
+++ b/app/features/rag/routes.py
@@ -0,0 +1,345 @@
+"""RAG API routes for document indexing and semantic retrieval."""
+
+from fastapi import APIRouter, Depends, HTTPException, status
+from sqlalchemy.exc import SQLAlchemyError
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.core.database import get_db
+from app.core.exceptions import DatabaseError
+from app.core.logging import get_logger
+from app.features.rag.embeddings import EmbeddingError
+from app.features.rag.schemas import (
+    DeleteResponse,
+    IndexRequest,
+    IndexResponse,
+    RetrieveRequest,
+    RetrieveResponse,
+    SourceListResponse,
+)
+from app.features.rag.service import RAGService, SourceNotFoundError
+
+logger = get_logger(__name__)
+
+router = APIRouter(prefix="/rag", tags=["rag"])
+
+
+# =============================================================================
+# Index Endpoint
+# =============================================================================
+
+
+@router.post(
+    "/index",
+    response_model=IndexResponse,
+    status_code=status.HTTP_201_CREATED,
+    summary="Index a document",
+    description="""
+Index a document into the RAG knowledge base.
+
+**Source Types:**
+- `markdown`: Markdown documents (split by headings)
+- `openapi`: OpenAPI specifications (split by endpoint)
+
+**Content Source:**
+- Provide `content` directly in the request, OR
+- Provide `source_path` to read from file system
+
+**Idempotent Updates:**
+- Documents are identified by `source_type` + `source_path`
+- Content hash is compared to detect changes
+- If unchanged, returns `status: "unchanged"` without re-indexing
+- If changed, old chunks are deleted and new ones created
+
+**Returns:**
+- `source_id`: Unique identifier for the indexed source
+- `chunks_created`: Number of chunks created
+- `tokens_processed`: Total tokens processed
+- `status`: "indexed", "updated", or "unchanged"
+""",
+)
+async def index_document(
+    request: IndexRequest,
+    db: AsyncSession = Depends(get_db),
+) -> IndexResponse:
+    """Index a document into the knowledge base.
+
+    Args:
+        request: Index request with source type, path, and optional content.
+        db: Async database session from dependency.
+
+    Returns:
+        Indexing result with statistics.
+
+    Raises:
+        HTTPException: If file not found or embedding generation fails.
+        DatabaseError: If database operation fails.
+    """
+    logger.info(
+        "rag.index_request_received",
+        source_type=request.source_type,
+        source_path=request.source_path,
+        has_content=request.content is not None,
+    )
+
+    service = RAGService()
+
+    try:
+        response = await service.index_document(db=db, request=request)
+
+        logger.info(
+            "rag.index_request_completed",
+            source_id=response.source_id,
+            chunks_created=response.chunks_created,
+            status=response.status,
+        )
+
+        return response
+
+    except FileNotFoundError as e:
+        logger.warning(
+            "rag.index_request_failed",
+            error=str(e),
+            error_type=type(e).__name__,
+            source_path=request.source_path,
+        )
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=str(e),
+        ) from e
+
+    except EmbeddingError as e:
+        logger.error(
+            "rag.index_request_failed",
+            error=str(e),
+            error_type=type(e).__name__,
+            exc_info=True,
+        )
+        raise HTTPException(
+            status_code=status.HTTP_502_BAD_GATEWAY,
+            detail=f"Embedding generation failed: {e}",
+        ) from e
+
+    except SQLAlchemyError as e:
+        logger.error(
+            "rag.index_request_failed",
+            error=str(e),
+            error_type=type(e).__name__,
+            exc_info=True,
+        )
+        raise DatabaseError(
+            message="Failed to index document",
+            details={"error": str(e)},
+        ) from e
+
+
+# =============================================================================
+# Retrieve Endpoint
+# =============================================================================
+
+
+@router.post(
+    "/retrieve",
+    response_model=RetrieveResponse,
+    summary="Semantic search",
+    description="""
+Perform semantic search across indexed documents.
+
+**Query:**
+- Natural language query (1-2000 characters)
+- Converted to embedding for similarity search
+
+**Parameters:**
+- `top_k`: Number of results (1-50, default: 5)
+- `similarity_threshold`: Minimum similarity (0.0-1.0, default: 0.7)
+- `filters`: Optional metadata filters
+
+**Filters:**
+- `source_type`: List of source types to search
+- `category`: Category from source metadata
+
+**Returns:**
+- List of matching chunks with relevance scores
+- Performance metrics (embedding time, search time)
+- Total chunks searched
+
+**Evidence-Grounded:**
+Returns raw chunks with citations - no answer generation.
+""",
+)
+async def retrieve(
+    request: RetrieveRequest,
+    db: AsyncSession = Depends(get_db),
+) -> RetrieveResponse:
+    """Perform semantic search across indexed documents.
+
+    Args:
+        request: Retrieval request with query and filters.
+        db: Async database session from dependency.
+
+    Returns:
+        Search results with relevance scores.
+
+    Raises:
+        HTTPException: If embedding generation fails.
+        DatabaseError: If database operation fails.
+    """
+    logger.info(
+        "rag.retrieve_request_received",
+        query_length=len(request.query),
+        top_k=request.top_k,
+        threshold=request.similarity_threshold,
+        has_filters=request.filters is not None,
+    )
+
+    service = RAGService()
+
+    try:
+        response = await service.retrieve(db=db, request=request)
+
+        logger.info(
+            "rag.retrieve_request_completed",
+            results_count=len(response.results),
+            query_embedding_time_ms=response.query_embedding_time_ms,
+            search_time_ms=response.search_time_ms,
+        )
+
+        return response
+
+    except EmbeddingError as e:
+        logger.error(
+            "rag.retrieve_request_failed",
+            error=str(e),
+            error_type=type(e).__name__,
+            exc_info=True,
+        )
+        raise HTTPException(
+            status_code=status.HTTP_502_BAD_GATEWAY,
+            detail=f"Embedding generation failed: {e}",
+        ) from e
+
+    except SQLAlchemyError as e:
+        logger.error(
+            "rag.retrieve_request_failed",
+            error=str(e),
+            error_type=type(e).__name__,
+            exc_info=True,
+        )
+        raise DatabaseError(
+            message="Failed to retrieve documents",
+            details={"error": str(e)},
+        ) from e
+
+
+# =============================================================================
+# Sources Endpoints
+# =============================================================================
+
+
+@router.get(
+    "/sources",
+    response_model=SourceListResponse,
+    summary="List indexed sources",
+    description="""
+List all indexed document sources with statistics.
+
+Returns:
+- List of sources with chunk counts
+- Total source count
+- Total chunk count across all sources
+""",
+)
+async def list_sources(
+    db: AsyncSession = Depends(get_db),
+) -> SourceListResponse:
+    """List all indexed sources.
+
+    Args:
+        db: Async database session from dependency.
+
+    Returns:
+        List of sources with statistics.
+    """
+    service = RAGService()
+    response = await service.list_sources(db=db)
+
+    logger.info(
+        "rag.list_sources_completed",
+        total_sources=response.total_sources,
+        total_chunks=response.total_chunks,
+    )
+
+    return response
+
+
+@router.delete(
+    "/sources/{source_id}",
+    response_model=DeleteResponse,
+    summary="Delete a source",
+    description="""
+Delete an indexed source and all its chunks.
+
+**Cascade Delete:**
+All chunks belonging to the source are automatically deleted.
+
+**Returns:**
+- `source_id`: Deleted source identifier
+- `chunks_deleted`: Number of chunks removed
+- `status`: Always "deleted"
+""",
+)
+async def delete_source(
+    source_id: str,
+    db: AsyncSession = Depends(get_db),
+) -> DeleteResponse:
+    """Delete a source and all its chunks.
+
+    Args:
+        source_id: Source identifier.
+        db: Async database session from dependency.
+
+    Returns:
+        Deletion result.
+
+    Raises:
+        HTTPException: If source not found.
+        DatabaseError: If database operation fails.
+    """
+    logger.info("rag.delete_source_request_received", source_id=source_id)
+
+    service = RAGService()
+
+    try:
+        response = await service.delete_source(db=db, source_id=source_id)
+
+        logger.info(
+            "rag.delete_source_request_completed",
+            source_id=source_id,
+            chunks_deleted=response.chunks_deleted,
+        )
+
+        return response
+
+    except SourceNotFoundError as e:
+        logger.warning(
+            "rag.delete_source_request_failed",
+            source_id=source_id,
+            error=str(e),
+            error_type=type(e).__name__,
+        )
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=str(e),
+        ) from e
+
+    except SQLAlchemyError as e:
+        logger.error(
+            "rag.delete_source_request_failed",
+            source_id=source_id,
+            error=str(e),
+            error_type=type(e).__name__,
+            exc_info=True,
+        )
+        raise DatabaseError(
+            message="Failed to delete source",
+            details={"error": str(e)},
+        ) from e
diff --git a/app/features/rag/schemas.py b/app/features/rag/schemas.py
new file mode 100644
index 00000000..3c350c31
--- /dev/null
+++ b/app/features/rag/schemas.py
@@ -0,0 +1,181 @@
+"""Pydantic schemas for RAG API contracts.
+
+Schemas are designed to be:
+- Validated for data integrity
+- Compatible with SQLAlchemy models via from_attributes
+- Evidence-grounded (citations include source metadata)
+"""
+
+from __future__ import annotations
+
+from datetime import datetime
+from typing import Any, Literal
+
+from pydantic import BaseModel, ConfigDict, Field
+
+
+class IndexRequest(BaseModel):
+    """Request to index a document into the knowledge base.
+
+    Args:
+        source_type: Type of document to index (markdown or openapi).
+        source_path: Path to the document or identifier.
+        content: Optional content override (if not reading from path).
+        metadata: Custom metadata to attach to the source.
+    """
+
+    model_config = ConfigDict(extra="forbid")
+
+    source_type: Literal["markdown", "openapi"] = Field(
+        ..., description="Type of document to index"
+    )
+    source_path: str = Field(
+        ...,
+        min_length=1,
+        max_length=500,
+        description="Path to the document or unique identifier",
+    )
+    content: str | None = Field(
+        None, description="Optional content override (if not reading from path)"
+    )
+    metadata: dict[str, Any] | None = Field(
+        None, description="Custom metadata to attach to the source"
+    )
+
+
+class IndexResponse(BaseModel):
+    """Response from document indexing operation.
+
+    Args:
+        source_id: Unique identifier for the indexed source.
+        source_path: Path of the indexed document.
+        chunks_created: Number of chunks created from the document.
+        tokens_processed: Total tokens processed across all chunks.
+        duration_ms: Time taken to index the document.
+        status: Indexing status (indexed, updated, unchanged).
+    """
+
+    model_config = ConfigDict(from_attributes=True)
+
+    source_id: str
+    source_path: str
+    chunks_created: int
+    tokens_processed: int
+    duration_ms: float
+    status: Literal["indexed", "updated", "unchanged"]
+
+
+class RetrieveRequest(BaseModel):
+    """Request for semantic search across indexed documents.
+
+    Args:
+        query: Search query text.
+        top_k: Number of results to return (1-50).
+        similarity_threshold: Minimum similarity score (0.0-1.0).
+        filters: Metadata filters to apply.
+    """
+
+    model_config = ConfigDict(extra="forbid")
+
+    query: str = Field(..., min_length=1, max_length=2000, description="Search query text")
+    top_k: int = Field(default=5, ge=1, le=50, description="Number of results to return")
+    similarity_threshold: float = Field(
+        default=0.7, ge=0.0, le=1.0, description="Minimum similarity score"
+    )
+    filters: dict[str, Any] | None = Field(
+        None, description="Metadata filters (source_type, category, etc.)"
+    )
+
+
+class ChunkResult(BaseModel):
+    """Single chunk in retrieval results with citation metadata.
+
+    CRITICAL: Provides evidence-grounded context with stable citations.
+
+    Args:
+        chunk_id: Unique identifier for the chunk.
+        source_id: Identifier of the parent source.
+        source_path: Path of the source document.
+        source_type: Type of source document.
+        content: Chunk text content.
+        relevance_score: Similarity score (0.0-1.0).
+        metadata: Heading hierarchy, section path, etc.
+    """
+
+    model_config = ConfigDict(from_attributes=True)
+
+    chunk_id: str
+    source_id: str
+    source_path: str
+    source_type: str
+    content: str
+    relevance_score: float = Field(..., ge=0.0, le=1.0)
+    metadata: dict[str, Any] | None = None
+
+
+class RetrieveResponse(BaseModel):
+    """Response from semantic search operation.
+
+    Args:
+        results: List of matching chunks with relevance scores.
+        query_embedding_time_ms: Time to generate query embedding.
+        search_time_ms: Time to execute similarity search.
+        total_chunks_searched: Total chunks in the search space.
+    """
+
+    results: list[ChunkResult]
+    query_embedding_time_ms: float
+    search_time_ms: float
+    total_chunks_searched: int
+
+
+class SourceResponse(BaseModel):
+    """Details of an indexed document source.
+
+    Args:
+        source_id: Unique identifier for the source.
+        source_type: Type of document (markdown, openapi).
+        source_path: Path to the document.
+        chunk_count: Number of chunks from this source.
+        content_hash: SHA-256 hash for change detection.
+        indexed_at: When the source was last indexed.
+        metadata: Custom metadata attached to the source.
+    """
+
+    model_config = ConfigDict(from_attributes=True)
+
+    source_id: str
+    source_type: str
+    source_path: str
+    chunk_count: int
+    content_hash: str
+    indexed_at: datetime
+    metadata: dict[str, Any] | None = None
+
+
+class SourceListResponse(BaseModel):
+    """List of all indexed sources with summary statistics.
+
+    Args:
+        sources: List of indexed sources.
+        total_sources: Total number of sources.
+        total_chunks: Total number of chunks across all sources.
+    """
+
+    sources: list[SourceResponse]
+    total_sources: int
+    total_chunks: int
+
+
+class DeleteResponse(BaseModel):
+    """Response from source deletion operation.
+
+    Args:
+        source_id: Identifier of the deleted source.
+        chunks_deleted: Number of chunks that were deleted.
+        status: Always "deleted".
+    """
+
+    source_id: str
+    chunks_deleted: int
+    status: Literal["deleted"]
diff --git a/app/features/rag/service.py b/app/features/rag/service.py
new file mode 100644
index 00000000..2b311386
--- /dev/null
+++ b/app/features/rag/service.py
@@ -0,0 +1,584 @@
+"""RAG service for document indexing and semantic retrieval.
+
+Orchestrates:
+- Document indexing with chunking and embedding
+- Semantic retrieval with similarity search
+- Source management (list, delete)
+- Idempotent re-indexing via content hash comparison
+
+CRITICAL: Uses pgvector cosine_distance for similarity search.
+"""
+
+from __future__ import annotations
+
+import hashlib
+import time
+import uuid
+from datetime import UTC, datetime
+from pathlib import Path
+from typing import Any, Literal
+
+import structlog
+from sqlalchemy import delete, func, select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.core.config import get_settings
+from app.features.rag.chunkers import ChunkData, get_chunker
+from app.features.rag.embeddings import EmbeddingProvider, get_embedding_service
+from app.features.rag.models import DocumentChunk, DocumentSource
+from app.features.rag.schemas import (
+    ChunkResult,
+    DeleteResponse,
+    IndexRequest,
+    IndexResponse,
+    RetrieveRequest,
+    RetrieveResponse,
+    SourceListResponse,
+    SourceResponse,
+)
+
+logger = structlog.get_logger()
+
+
+class SourceNotFoundError(ValueError):
+    """Source not found in the knowledge base."""
+
+    pass
+
+
+class RAGService:
+    """Service for RAG knowledge base operations.
+
+    Provides:
+    - Document indexing with automatic chunking and embedding
+    - Semantic retrieval with configurable similarity threshold
+    - Source management and statistics
+    - Idempotent re-indexing based on content hash
+
+    CRITICAL: Uses cosine_distance for similarity (not l2_distance).
+    """
+
+    def __init__(
+        self,
+        embedding_service: EmbeddingProvider | None = None,
+    ) -> None:
+        """Initialize RAG service.
+
+        Args:
+            embedding_service: Optional embedding provider override (for testing).
+        """
+        self.settings = get_settings()
+        self._embedding_service = embedding_service or get_embedding_service()
+
+    def _compute_content_hash(self, content: str) -> str:
+        """Compute SHA-256 hash of content for change detection.
+
+        Args:
+            content: Document content.
+
+        Returns:
+            64-character hex string hash.
+        """
+        return hashlib.sha256(content.encode()).hexdigest()
+
+    def _read_content_from_path(self, source_path: str) -> str:
+        """Read content from a file path.
+
+        Args:
+            source_path: Path to the file.
+
+        Returns:
+            File content.
+
+        Raises:
+            FileNotFoundError: If file doesn't exist.
+        """
+        path = Path(source_path)
+        if not path.exists():
+            raise FileNotFoundError(f"Source file not found: {source_path}")
+        return path.read_text(encoding="utf-8")
+
+    async def index_document(
+        self,
+        db: AsyncSession,
+        request: IndexRequest,
+    ) -> IndexResponse:
+        """Index a document into the knowledge base.
+
+        Handles:
+        - Content reading (from path or request)
+        - Content hash comparison for idempotent updates
+        - Chunking based on source type
+        - Embedding generation for all chunks
+        - Database upsert (source + chunks)
+
+        Args:
+            db: Database session.
+            request: Index request with source info.
+
+        Returns:
+            Indexing result with statistics.
+        """
+        start_time = time.time()
+
+        logger.info(
+            "rag.index_document_started",
+            source_type=request.source_type,
+            source_path=request.source_path,
+        )
+
+        # Get content (from request or file)
+        if request.content:
+            content = request.content
+        else:
+            content = self._read_content_from_path(request.source_path)
+
+        # Compute content hash
+        content_hash = self._compute_content_hash(content)
+
+        # Check if source already exists
+        existing_source = await self._find_source_by_path(
+            db, request.source_type, request.source_path
+        )
+
+        if existing_source and existing_source.content_hash == content_hash:
+            # Content unchanged - skip re-indexing
+            chunk_count = await self._get_chunk_count(db, existing_source.id)
+            duration_ms = (time.time() - start_time) * 1000
+
+            logger.info(
+                "rag.index_document_unchanged",
+                source_id=existing_source.source_id,
+                source_path=request.source_path,
+            )
+
+            return IndexResponse(
+                source_id=existing_source.source_id,
+                source_path=request.source_path,
+                chunks_created=chunk_count,
+                tokens_processed=0,
+                duration_ms=duration_ms,
+                status="unchanged",
+            )
+
+        # Chunk the content
+        chunker = get_chunker(request.source_type)
+        chunks = chunker.chunk(content)
+
+        if not chunks:
+            logger.warning(
+                "rag.index_document_no_chunks",
+                source_path=request.source_path,
+            )
+            chunks = []
+
+        # Generate embeddings for all chunks
+        chunk_texts = [chunk.content for chunk in chunks]
+        embeddings: list[list[float]] = []
+
+        if chunk_texts:
+            embeddings = await self._embedding_service.embed_texts(chunk_texts)
+
+        # Calculate total tokens
+        total_tokens = sum(chunk.token_count for chunk in chunks)
+
+        # Upsert source and chunks
+        source_id = existing_source.source_id if existing_source else uuid.uuid4().hex
+        status: Literal["indexed", "updated", "unchanged"] = (
+            "updated" if existing_source else "indexed"
+        )
+
+        await self._upsert_source_and_chunks(
+            db=db,
+            source_id=source_id,
+            source_type=request.source_type,
+            source_path=request.source_path,
+            content_hash=content_hash,
+            metadata=request.metadata,
+            chunks=chunks,
+            embeddings=embeddings,
+            existing_source=existing_source,
+        )
+
+        duration_ms = (time.time() - start_time) * 1000
+
+        logger.info(
+            "rag.index_document_completed",
+            source_id=source_id,
+            source_path=request.source_path,
+            chunks_created=len(chunks),
+            tokens_processed=total_tokens,
+            duration_ms=duration_ms,
+            status=status,
+        )
+
+        return IndexResponse(
+            source_id=source_id,
+            source_path=request.source_path,
+            chunks_created=len(chunks),
+            tokens_processed=total_tokens,
+            duration_ms=duration_ms,
+            status=status,
+        )
+
+    async def retrieve(
+        self,
+        db: AsyncSession,
+        request: RetrieveRequest,
+    ) -> RetrieveResponse:
+        """Perform semantic search across indexed documents.
+
+        Uses pgvector cosine_distance for similarity ranking:
+        - relevance_score = 1 - cosine_distance (normalized to 0-1)
+        - Filters by similarity threshold
+        - Supports metadata filtering
+
+        Args:
+            db: Database session.
+            request: Retrieval request with query and filters.
+
+        Returns:
+            Search results with relevance scores.
+        """
+        embed_start = time.time()
+
+        logger.info(
+            "rag.retrieve_started",
+            query_length=len(request.query),
+            top_k=request.top_k,
+            threshold=request.similarity_threshold,
+        )
+
+        # Generate query embedding
+        query_embedding = await self._embedding_service.embed_query(request.query)
+        embed_time_ms = (time.time() - embed_start) * 1000
+
+        search_start = time.time()
+
+        # Get total chunk count for statistics
+        total_chunks = await self._get_total_chunk_count(db)
+
+        # Build similarity search query
+        # CRITICAL: cosine_distance returns values 0-2, so relevance = 1 - distance/2
+        # But for cosine similarity on normalized vectors, distance is 0-1
+        results = await self._search_similar_chunks(
+            db=db,
+            query_embedding=query_embedding,
+            top_k=request.top_k,
+            threshold=request.similarity_threshold,
+            filters=request.filters,
+        )
+
+        search_time_ms = (time.time() - search_start) * 1000
+
+        logger.info(
+            "rag.retrieve_completed",
+            results_count=len(results),
+            query_embedding_time_ms=embed_time_ms,
+            search_time_ms=search_time_ms,
+        )
+
+        return RetrieveResponse(
+            results=results,
+            query_embedding_time_ms=embed_time_ms,
+            search_time_ms=search_time_ms,
+            total_chunks_searched=total_chunks,
+        )
+
+    async def list_sources(
+        self,
+        db: AsyncSession,
+    ) -> SourceListResponse:
+        """List all indexed sources with statistics.
+
+        Args:
+            db: Database session.
+
+        Returns:
+            List of sources with chunk counts.
+        """
+        # Get sources with chunk counts
+        stmt = (
+            select(
+                DocumentSource,
+                func.count(DocumentChunk.id).label("chunk_count"),
+            )
+            .outerjoin(DocumentChunk, DocumentSource.id == DocumentChunk.source_id)
+            .group_by(DocumentSource.id)
+            .order_by(DocumentSource.indexed_at.desc())
+        )
+
+        result = await db.execute(stmt)
+        rows = result.all()
+
+        sources: list[SourceResponse] = []
+        total_chunks = 0
+
+        for source, chunk_count in rows:
+            sources.append(
+                SourceResponse(
+                    source_id=source.source_id,
+                    source_type=source.source_type,
+                    source_path=source.source_path,
+                    chunk_count=chunk_count,
+                    content_hash=source.content_hash,
+                    indexed_at=source.indexed_at,
+                    metadata=source.metadata_,
+                )
+            )
+            total_chunks += chunk_count
+
+        return SourceListResponse(
+            sources=sources,
+            total_sources=len(sources),
+            total_chunks=total_chunks,
+        )
+
+    async def delete_source(
+        self,
+        db: AsyncSession,
+        source_id: str,
+    ) -> DeleteResponse:
+        """Delete a source and all its chunks.
+
+        Args:
+            db: Database session.
+            source_id: Source identifier.
+
+        Returns:
+            Deletion result with chunk count.
+
+        Raises:
+            SourceNotFoundError: If source not found.
+        """
+        logger.info("rag.delete_source_started", source_id=source_id)
+
+        # Find source
+        stmt = select(DocumentSource).where(DocumentSource.source_id == source_id)
+        result = await db.execute(stmt)
+        source = result.scalar_one_or_none()
+
+        if source is None:
+            raise SourceNotFoundError(f"Source not found: {source_id}")
+
+        # Count chunks before deletion
+        chunk_count = await self._get_chunk_count(db, source.id)
+
+        # Delete source (cascades to chunks)
+        await db.delete(source)
+        await db.flush()
+
+        logger.info(
+            "rag.delete_source_completed",
+            source_id=source_id,
+            chunks_deleted=chunk_count,
+        )
+
+        return DeleteResponse(
+            source_id=source_id,
+            chunks_deleted=chunk_count,
+            status="deleted",
+        )
+
+    async def _find_source_by_path(
+        self,
+        db: AsyncSession,
+        source_type: str,
+        source_path: str,
+    ) -> DocumentSource | None:
+        """Find source by type and path.
+
+        Args:
+            db: Database session.
+            source_type: Source type.
+            source_path: Source path.
+
+        Returns:
+            Source or None.
+        """
+        stmt = select(DocumentSource).where(
+            (DocumentSource.source_type == source_type)
+            & (DocumentSource.source_path == source_path)
+        )
+        result = await db.execute(stmt)
+        return result.scalar_one_or_none()
+
+    async def _get_chunk_count(self, db: AsyncSession, source_id: int) -> int:
+        """Get number of chunks for a source.
+
+        Args:
+            db: Database session.
+            source_id: Source internal ID.
+
+        Returns:
+            Chunk count.
+        """
+        stmt = (
+            select(func.count())
+            .select_from(DocumentChunk)
+            .where(DocumentChunk.source_id == source_id)
+        )
+        result = await db.execute(stmt)
+        return result.scalar_one()
+
+    async def _get_total_chunk_count(self, db: AsyncSession) -> int:
+        """Get total number of chunks across all sources.
+
+        Args:
+            db: Database session.
+
+        Returns:
+            Total chunk count.
+        """
+        stmt = select(func.count()).select_from(DocumentChunk)
+        result = await db.execute(stmt)
+        return result.scalar_one()
+
+    async def _upsert_source_and_chunks(
+        self,
+        db: AsyncSession,
+        source_id: str,
+        source_type: str,
+        source_path: str,
+        content_hash: str,
+        metadata: dict[str, Any] | None,
+        chunks: list[ChunkData],
+        embeddings: list[list[float]],
+        existing_source: DocumentSource | None,
+    ) -> None:
+        """Upsert source and chunks in database.
+
+        Args:
+            db: Database session.
+            source_id: External source identifier.
+            source_type: Type of source.
+            source_path: Path to source.
+            content_hash: SHA-256 hash of content.
+            metadata: Custom metadata.
+            chunks: Chunked content.
+            embeddings: Embeddings for each chunk.
+            existing_source: Existing source if updating.
+        """
+        now = datetime.now(UTC)
+
+        if existing_source:
+            # Update existing source
+            existing_source.content_hash = content_hash
+            existing_source.metadata_ = metadata
+            existing_source.indexed_at = now
+
+            # Delete old chunks
+            await db.execute(
+                delete(DocumentChunk).where(DocumentChunk.source_id == existing_source.id)
+            )
+            source_internal_id = existing_source.id
+        else:
+            # Create new source
+            source = DocumentSource(
+                source_id=source_id,
+                source_type=source_type,
+                source_path=source_path,
+                content_hash=content_hash,
+                metadata_=metadata,
+                indexed_at=now,
+            )
+            db.add(source)
+            await db.flush()
+            source_internal_id = source.id
+
+        # Create new chunks
+        for i, (chunk, embedding) in enumerate(zip(chunks, embeddings, strict=True)):
+            chunk_obj = DocumentChunk(
+                chunk_id=uuid.uuid4().hex,
+                source_id=source_internal_id,
+                chunk_index=i,
+                content=chunk.content,
+                embedding=embedding,
+                token_count=chunk.token_count,
+                metadata_=chunk.metadata if chunk.metadata else None,
+            )
+            db.add(chunk_obj)
+
+        await db.flush()
+
+    async def _search_similar_chunks(
+        self,
+        db: AsyncSession,
+        query_embedding: list[float],
+        top_k: int,
+        threshold: float,
+        filters: dict[str, Any] | None,
+    ) -> list[ChunkResult]:
+        """Search for similar chunks using cosine distance.
+
+        Args:
+            db: Database session.
+            query_embedding: Query embedding vector.
+            top_k: Maximum results to return.
+            threshold: Minimum similarity threshold.
+            filters: Optional metadata filters.
+
+        Returns:
+            List of chunk results with relevance scores.
+        """
+        # CRITICAL: Use cosine_distance method from pgvector
+        # cosine_distance returns 1 - cosine_similarity for normalized vectors
+        distance = DocumentChunk.embedding.cosine_distance(query_embedding)
+
+        # Build query with distance calculation
+        stmt = (
+            select(
+                DocumentChunk,
+                DocumentSource,
+                distance.label("distance"),
+            )
+            .join(DocumentSource, DocumentChunk.source_id == DocumentSource.id)
+            .where(DocumentChunk.embedding.isnot(None))
+            .order_by(distance)
+            .limit(top_k * 2)  # Fetch extra to filter by threshold
+        )
+
+        # Apply metadata filters if provided
+        if filters:
+            if "source_type" in filters:
+                source_types = filters["source_type"]
+                if isinstance(source_types, str):
+                    source_types = [source_types]
+                stmt = stmt.where(DocumentSource.source_type.in_(source_types))
+
+            if "category" in filters:
+                # Filter by metadata category
+                stmt = stmt.where(
+                    DocumentSource.metadata_.op("->>")("category") == filters["category"]
+                )
+
+        result = await db.execute(stmt)
+        rows = result.all()
+
+        results: list[ChunkResult] = []
+        for chunk, source, dist in rows:
+            # Convert distance to similarity score
+            # For cosine distance: similarity = 1 - distance
+            relevance_score = 1.0 - float(dist)
+
+            # Apply threshold filter
+            if relevance_score < threshold:
+                continue
+
+            results.append(
+                ChunkResult(
+                    chunk_id=chunk.chunk_id,
+                    source_id=source.source_id,
+                    source_path=source.source_path,
+                    source_type=source.source_type,
+                    content=chunk.content,
+                    relevance_score=round(relevance_score, 4),
+                    metadata=chunk.metadata_,
+                )
+            )
+
+            # Stop if we have enough results
+            if len(results) >= top_k:
+                break
+
+        return results
diff --git a/app/features/rag/tests/__init__.py b/app/features/rag/tests/__init__.py
new file mode 100644
index 00000000..041e4941
--- /dev/null
+++ b/app/features/rag/tests/__init__.py
@@ -0,0 +1 @@
+"""RAG feature tests."""
diff --git a/app/features/rag/tests/conftest.py b/app/features/rag/tests/conftest.py
new file mode 100644
index 00000000..3bf7f318
--- /dev/null
+++ b/app/features/rag/tests/conftest.py
@@ -0,0 +1,265 @@
+"""Test fixtures for RAG module."""
+
+from collections.abc import AsyncGenerator
+from datetime import UTC, datetime
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+from httpx import ASGITransport, AsyncClient
+from sqlalchemy import delete
+from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
+
+from app.core.config import get_settings
+from app.core.database import get_db
+from app.features.rag.embeddings import EmbeddingService
+from app.features.rag.models import DocumentChunk, DocumentSource
+from app.features.rag.schemas import IndexRequest, RetrieveRequest
+from app.main import app
+
+# =============================================================================
+# Database Fixtures for Integration Tests
+# =============================================================================
+
+
+@pytest.fixture
+async def db_session() -> AsyncGenerator[AsyncSession, None]:
+    """Create async database session for integration tests.
+
+    Creates tables if needed, provides a session, and cleans up test data.
+    Requires PostgreSQL to be running (docker-compose up -d).
+    """
+    settings = get_settings()
+    engine = create_async_engine(settings.database_url, echo=False)
+
+    async_session_maker = async_sessionmaker(
+        engine,
+        class_=AsyncSession,
+        expire_on_commit=False,
+    )
+
+    async with async_session_maker() as session:
+        try:
+            yield session
+        finally:
+            # Clean up test data (delete sources with test- prefix)
+            test_source_ids = delete(DocumentSource).where(
+                DocumentSource.source_path.like("test-%")
+            )
+            await session.execute(test_source_ids)
+            await session.commit()
+
+    await engine.dispose()
+
+
+@pytest.fixture
+async def client(db_session: AsyncSession) -> AsyncGenerator[AsyncClient, None]:
+    """Create test client with database dependency override."""
+
+    async def override_get_db() -> AsyncGenerator[AsyncSession, None]:
+        try:
+            yield db_session
+            await db_session.commit()
+        except Exception:
+            await db_session.rollback()
+            raise
+
+    app.dependency_overrides[get_db] = override_get_db
+
+    async with AsyncClient(
+        transport=ASGITransport(app=app),
+        base_url="http://test",
+    ) as ac:
+        yield ac
+
+    app.dependency_overrides.clear()
+
+
+# =============================================================================
+# Mock Embedding Service
+# =============================================================================
+
+
+@pytest.fixture
+def mock_embedding_service() -> EmbeddingService:
+    """Create a mocked EmbeddingService for unit tests.
+
+    Returns embeddings of correct dimension (1536) without calling OpenAI API.
+    """
+    service = MagicMock(spec=EmbeddingService)
+
+    # Mock embed_texts to return deterministic embeddings
+    async def mock_embed_texts(texts, **kwargs):
+        # Return embedding vector of correct dimension for each text
+        return [[0.1] * 1536 for _ in texts]
+
+    # Mock embed_query to return single embedding
+    async def mock_embed_query(query):
+        return [0.1] * 1536
+
+    service.embed_texts = AsyncMock(side_effect=mock_embed_texts)
+    service.embed_query = AsyncMock(side_effect=mock_embed_query)
+    service.count_tokens = MagicMock(side_effect=lambda text: len(text.split()))
+    service.truncate_to_tokens = MagicMock(side_effect=lambda text, max_tokens: text)
+
+    return service
+
+
+# =============================================================================
+# Sample Content Fixtures
+# =============================================================================
+
+
+@pytest.fixture
+def sample_markdown_content() -> str:
+    """Sample markdown content with headings for testing."""
+    return """# Main Title
+
+This is the introduction paragraph with some content.
+
+## Section One
+
+First section content goes here. It has multiple sentences.
+This is the second sentence. And a third one.
+
+### Subsection 1.1
+
+Subsection content with details about the topic.
+
+### Subsection 1.2
+
+More subsection content here.
+
+## Section Two
+
+Second section with different content.
+
+### Subsection 2.1
+
+Final subsection content.
+"""
+
+
+@pytest.fixture
+def sample_openapi_content() -> str:
+    """Sample OpenAPI JSON content for testing."""
+    return """{
+  "openapi": "3.0.0",
+  "info": {
+    "title": "Test API",
+    "version": "1.0.0",
+    "description": "A test API for unit testing"
+  },
+  "servers": [
+    {"url": "https://api.example.com", "description": "Production"}
+  ],
+  "paths": {
+    "/users": {
+      "get": {
+        "operationId": "listUsers",
+        "summary": "List all users",
+        "description": "Returns a paginated list of users",
+        "tags": ["users"],
+        "parameters": [
+          {
+            "name": "page",
+            "in": "query",
+            "description": "Page number",
+            "required": false
+          }
+        ],
+        "responses": {
+          "200": {"description": "Success"}
+        }
+      },
+      "post": {
+        "operationId": "createUser",
+        "summary": "Create a user",
+        "tags": ["users"],
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {"type": "object", "properties": {"name": {"type": "string"}}}
+            }
+          }
+        },
+        "responses": {
+          "201": {"description": "Created"}
+        }
+      }
+    }
+  }
+}"""
+
+
+@pytest.fixture
+def sample_large_markdown_content() -> str:
+    """Large markdown content that exceeds chunk size for testing."""
+    # Generate content that will need multiple chunks
+    paragraphs = []
+    for i in range(50):
+        paragraphs.append(
+            f"## Section {i}\n\n"
+            f"This is paragraph {i} with enough content to make it substantial. "
+            f"It contains multiple sentences to ensure proper chunking behavior. "
+            f"The content is designed to test the chunker's ability to handle large documents. "
+            f"Each section has similar structure but different section numbers.\n"
+        )
+    return "\n".join(paragraphs)
+
+
+# =============================================================================
+# Schema Fixtures
+# =============================================================================
+
+
+@pytest.fixture
+def sample_index_request() -> IndexRequest:
+    """Sample index request for testing."""
+    return IndexRequest(
+        source_type="markdown",
+        source_path="test-document.md",
+        content="# Test\n\nThis is test content.",
+        metadata={"category": "testing"},
+    )
+
+
+@pytest.fixture
+def sample_retrieve_request() -> RetrieveRequest:
+    """Sample retrieve request for testing."""
+    return RetrieveRequest(
+        query="What is the test about?",
+        top_k=5,
+        similarity_threshold=0.7,
+    )
+
+
+# =============================================================================
+# Model Fixtures
+# =============================================================================
+
+
+@pytest.fixture
+def sample_document_source() -> DocumentSource:
+    """Sample DocumentSource ORM object for testing."""
+    return DocumentSource(
+        source_id="test123456789012345678901234",
+        source_type="markdown",
+        source_path="test-sample.md",
+        content_hash="a" * 64,
+        metadata_={"category": "testing"},
+        indexed_at=datetime.now(UTC),
+    )
+
+
+@pytest.fixture
+def sample_document_chunk() -> DocumentChunk:
+    """Sample DocumentChunk ORM object for testing."""
+    return DocumentChunk(
+        chunk_id="chunk12345678901234567890123",
+        source_id=1,
+        chunk_index=0,
+        content="Test chunk content",
+        embedding=[0.1] * 1536,
+        token_count=3,
+        metadata_={"heading": "Test"},
+    )
diff --git a/app/features/rag/tests/test_chunkers.py b/app/features/rag/tests/test_chunkers.py
new file mode 100644
index 00000000..77d63141
--- /dev/null
+++ b/app/features/rag/tests/test_chunkers.py
@@ -0,0 +1,295 @@
+"""Unit tests for RAG chunkers."""
+
+import json
+
+import pytest
+
+from app.features.rag.chunkers import (
+    BaseChunker,
+    ChunkData,
+    MarkdownChunker,
+    OpenAPIChunker,
+    get_chunker,
+)
+
+
+class TestMarkdownChunker:
+    """Tests for MarkdownChunker."""
+
+    def test_chunk_simple_document(self, sample_markdown_content):
+        """Test chunking a simple markdown document."""
+        chunker = MarkdownChunker()
+        chunks = chunker.chunk(sample_markdown_content)
+
+        assert len(chunks) > 0
+        for chunk in chunks:
+            assert isinstance(chunk, ChunkData)
+            assert chunk.content
+            assert chunk.token_count > 0
+
+    def test_chunk_respects_heading_boundaries(self):
+        """Test that chunker respects heading boundaries."""
+        content = """# Title
+
+Introduction.
+
+## Section One
+
+Content one.
+
+## Section Two
+
+Content two.
+"""
+        chunker = MarkdownChunker()
+        chunker.chunk_size = 1000  # Large enough to not split within sections
+        chunks = chunker.chunk(content)
+
+        # Each section should be relatively intact
+        contents = [c.content for c in chunks]
+        full_content = "\n".join(contents)
+
+        assert "# Title" in full_content or "Title" in full_content
+        assert "Section One" in full_content
+        assert "Section Two" in full_content
+
+    def test_chunk_extracts_heading_metadata(self):
+        """Test that heading metadata is extracted."""
+        content = """# Main
+
+## Sub
+
+Content here.
+"""
+        chunker = MarkdownChunker()
+        chunks = chunker.chunk(content)
+
+        # Find chunk with heading metadata
+        chunks_with_headings = [c for c in chunks if c.metadata.get("heading")]
+        assert len(chunks_with_headings) > 0
+
+        # Check section_path is populated
+        for chunk in chunks_with_headings:
+            if chunk.metadata.get("section_path"):
+                assert isinstance(chunk.metadata["section_path"], list)
+
+    def test_chunk_respects_chunk_size(self, sample_large_markdown_content):
+        """Test that chunks respect the configured chunk size."""
+        chunker = MarkdownChunker()
+        chunker.chunk_size = 200  # Small chunk size
+        chunks = chunker.chunk(sample_large_markdown_content)
+
+        # Chunks should not vastly exceed chunk size
+        for chunk in chunks:
+            # Allow some tolerance for overlap and heading context
+            assert chunk.token_count <= chunker.chunk_size * 2
+
+    def test_chunk_handles_empty_content(self):
+        """Test handling of empty content."""
+        chunker = MarkdownChunker()
+        chunks = chunker.chunk("")
+
+        assert len(chunks) == 0
+
+    def test_chunk_handles_content_without_headings(self):
+        """Test handling content without headings."""
+        content = "This is just plain text without any headings. It has multiple sentences."
+        chunker = MarkdownChunker()
+        chunks = chunker.chunk(content)
+
+        assert len(chunks) >= 1
+        assert chunks[0].content.strip() == content.strip()
+
+    def test_chunk_updates_heading_path_correctly(self):
+        """Test heading path updates with nested headings."""
+        content = """# Level 1
+
+## Level 2
+
+### Level 3
+
+Back to level 2 content.
+
+## Another Level 2
+
+Content here.
+"""
+        chunker = MarkdownChunker()
+        chunks = chunker.chunk(content)
+
+        # Find chunks with section_path
+        paths = [c.metadata.get("section_path") for c in chunks if c.metadata.get("section_path")]
+
+        # Should have various heading depths
+        assert len(paths) > 0
+
+    def test_chunk_token_counting(self):
+        """Test that token counting is accurate."""
+        chunker = MarkdownChunker()
+
+        # Count tokens for known text
+        text = "Hello, this is a test."
+        token_count = chunker.count_tokens(text)
+
+        assert token_count > 0
+        assert token_count < len(text)  # Tokens should be fewer than characters
+
+    def test_chunk_indices_are_sequential(self):
+        """Test that chunk indices are sequential."""
+        content = """# One
+
+Content one.
+
+# Two
+
+Content two.
+
+# Three
+
+Content three.
+"""
+        chunker = MarkdownChunker()
+        chunks = chunker.chunk(content)
+
+        indices = [c.index for c in chunks]
+        expected = list(range(len(chunks)))
+        assert indices == expected
+
+    def test_overlap_text_extraction(self):
+        """Test overlap text extraction works correctly."""
+        chunker = MarkdownChunker()
+        chunker.chunk_overlap = 10
+
+        text = "This is a longer piece of text that we want to extract overlap from."
+        overlap = chunker._get_overlap_text(text)
+
+        assert len(overlap) > 0
+        assert text.endswith(overlap) or overlap in text
+
+
+class TestOpenAPIChunker:
+    """Tests for OpenAPIChunker."""
+
+    def test_chunk_openapi_json(self, sample_openapi_content):
+        """Test chunking OpenAPI JSON content."""
+        chunker = OpenAPIChunker()
+        chunks = chunker.chunk(sample_openapi_content)
+
+        assert len(chunks) >= 2  # At least info + endpoints
+
+        # Check for endpoint metadata
+        endpoint_chunks = [c for c in chunks if c.metadata.get("type") == "endpoint"]
+        assert len(endpoint_chunks) >= 2  # GET and POST /users
+
+    def test_chunk_creates_info_chunk(self, sample_openapi_content):
+        """Test that an info chunk is created."""
+        chunker = OpenAPIChunker()
+        chunks = chunker.chunk(sample_openapi_content)
+
+        info_chunks = [c for c in chunks if c.metadata.get("type") == "api_info"]
+        assert len(info_chunks) == 1
+        assert "Test API" in info_chunks[0].content
+
+    def test_chunk_extracts_endpoint_metadata(self, sample_openapi_content):
+        """Test endpoint metadata extraction."""
+        chunker = OpenAPIChunker()
+        chunks = chunker.chunk(sample_openapi_content)
+
+        endpoint_chunks = [c for c in chunks if c.metadata.get("type") == "endpoint"]
+
+        # Check GET /users endpoint
+        get_users = [
+            c
+            for c in endpoint_chunks
+            if c.metadata.get("path") == "/users" and c.metadata.get("method") == "GET"
+        ]
+        assert len(get_users) == 1
+        assert get_users[0].metadata.get("operation_id") == "listUsers"
+
+    def test_chunk_includes_parameters(self, sample_openapi_content):
+        """Test that parameters are included in chunk content."""
+        chunker = OpenAPIChunker()
+        chunks = chunker.chunk(sample_openapi_content)
+
+        endpoint_chunks = [c for c in chunks if c.metadata.get("type") == "endpoint"]
+        get_users = next(c for c in endpoint_chunks if c.metadata.get("method") == "GET")
+
+        assert "Parameters" in get_users.content
+        assert "page" in get_users.content
+
+    def test_chunk_handles_invalid_json(self):
+        """Test handling of invalid JSON content."""
+        chunker = OpenAPIChunker()
+        chunks = chunker.chunk("not valid json")
+
+        # Should fall back to markdown chunking
+        assert len(chunks) >= 1
+
+    def test_chunk_handles_minimal_spec(self):
+        """Test handling minimal OpenAPI spec."""
+        minimal_spec = json.dumps(
+            {
+                "openapi": "3.0.0",
+                "info": {"title": "Minimal", "version": "1.0"},
+                "paths": {},
+            }
+        )
+        chunker = OpenAPIChunker()
+        chunks = chunker.chunk(minimal_spec)
+
+        # Should at least have info chunk
+        assert len(chunks) >= 1
+
+    def test_chunk_respects_token_limit(self, sample_openapi_content):
+        """Test that chunks don't exceed token limit."""
+        chunker = OpenAPIChunker()
+        chunks = chunker.chunk(sample_openapi_content)
+
+        for chunk in chunks:
+            assert chunk.token_count <= BaseChunker.MAX_TOKENS_PER_CHUNK
+
+
+class TestGetChunker:
+    """Tests for get_chunker factory function."""
+
+    def test_get_markdown_chunker(self):
+        """Test getting markdown chunker."""
+        chunker = get_chunker("markdown")
+        assert isinstance(chunker, MarkdownChunker)
+
+    def test_get_openapi_chunker(self):
+        """Test getting openapi chunker."""
+        chunker = get_chunker("openapi")
+        assert isinstance(chunker, OpenAPIChunker)
+
+    def test_invalid_source_type_raises(self):
+        """Test that invalid source type raises ValueError."""
+        with pytest.raises(ValueError) as exc_info:
+            get_chunker("invalid_type")
+        assert "Unsupported source type" in str(exc_info.value)
+
+
+class TestChunkData:
+    """Tests for ChunkData dataclass."""
+
+    def test_chunk_data_creation(self):
+        """Test creating ChunkData."""
+        chunk = ChunkData(
+            content="Test content",
+            index=0,
+            token_count=2,
+            metadata={"heading": "Test"},
+        )
+        assert chunk.content == "Test content"
+        assert chunk.index == 0
+        assert chunk.token_count == 2
+        assert chunk.metadata == {"heading": "Test"}
+
+    def test_chunk_data_default_metadata(self):
+        """Test default metadata is empty dict."""
+        chunk = ChunkData(
+            content="Test",
+            index=0,
+            token_count=1,
+        )
+        assert chunk.metadata == {}
diff --git a/app/features/rag/tests/test_embeddings.py b/app/features/rag/tests/test_embeddings.py
new file mode 100644
index 00000000..2eb59b70
--- /dev/null
+++ b/app/features/rag/tests/test_embeddings.py
@@ -0,0 +1,452 @@
+"""Unit tests for RAG embedding providers."""
+
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import httpx
+import pytest
+
+from app.features.rag.embeddings import (
+    EmbeddingError,
+    EmbeddingProvider,
+    EmbeddingService,
+    OllamaEmbeddingProvider,
+    OpenAIEmbeddingProvider,
+    get_embedding_service,
+    reset_embedding_service,
+)
+
+
+class TestEmbeddingProvider:
+    """Tests for EmbeddingProvider abstract base class."""
+
+    def test_cannot_instantiate_directly(self):
+        """Test that EmbeddingProvider cannot be instantiated directly."""
+        with pytest.raises(TypeError):
+            EmbeddingProvider()  # type: ignore[abstract]
+
+
+class TestOpenAIEmbeddingProvider:
+    """Tests for OpenAIEmbeddingProvider."""
+
+    def test_init_without_api_key(self):
+        """Test initialization without API key."""
+        with patch("app.features.rag.embeddings.get_settings") as mock_settings:
+            mock_settings.return_value.openai_api_key = ""
+            mock_settings.return_value.rag_embedding_dimension = 1536
+            provider = OpenAIEmbeddingProvider()
+            # Should not raise during init
+            assert provider._client is None
+
+    def test_get_client_raises_without_api_key(self):
+        """Test _get_client raises when no API key configured."""
+        with patch("app.features.rag.embeddings.get_settings") as mock_settings:
+            mock_settings.return_value.openai_api_key = ""
+            provider = OpenAIEmbeddingProvider()
+
+            with pytest.raises(EmbeddingError) as exc_info:
+                provider._get_client()
+            assert "API key not configured" in str(exc_info.value)
+
+    def test_dimension_property(self):
+        """Test dimension property returns configured value."""
+        with patch("app.features.rag.embeddings.get_settings") as mock_settings:
+            mock_settings.return_value.openai_api_key = "test-key"
+            mock_settings.return_value.rag_embedding_dimension = 768
+            provider = OpenAIEmbeddingProvider()
+
+            assert provider.dimension == 768
+
+    def test_count_tokens(self):
+        """Test token counting."""
+        with patch("app.features.rag.embeddings.get_settings") as mock_settings:
+            mock_settings.return_value.openai_api_key = "test-key"
+            mock_settings.return_value.rag_embedding_model = "text-embedding-3-small"
+            mock_settings.return_value.rag_embedding_dimension = 1536
+            mock_settings.return_value.rag_embedding_batch_size = 100
+
+            provider = OpenAIEmbeddingProvider()
+
+            count = provider.count_tokens("Hello, world!")
+            assert count > 0
+            assert count < 20  # Should be a reasonable count
+
+    def test_count_tokens_empty_string(self):
+        """Test token counting for empty string."""
+        with patch("app.features.rag.embeddings.get_settings") as mock_settings:
+            mock_settings.return_value.openai_api_key = "test-key"
+            provider = OpenAIEmbeddingProvider()
+
+            count = provider.count_tokens("")
+            assert count == 0
+
+    def test_truncate_to_tokens(self):
+        """Test token truncation."""
+        with patch("app.features.rag.embeddings.get_settings") as mock_settings:
+            mock_settings.return_value.openai_api_key = "test-key"
+            provider = OpenAIEmbeddingProvider()
+
+            long_text = "This is a longer piece of text that will be truncated."
+            truncated = provider.truncate_to_tokens(long_text, 5)
+
+            assert len(truncated) < len(long_text)
+            assert provider.count_tokens(truncated) <= 5
+
+    def test_truncate_to_tokens_no_truncation_needed(self):
+        """Test truncation when text is already within limit."""
+        with patch("app.features.rag.embeddings.get_settings") as mock_settings:
+            mock_settings.return_value.openai_api_key = "test-key"
+            provider = OpenAIEmbeddingProvider()
+
+            short_text = "Hi"
+            truncated = provider.truncate_to_tokens(short_text, 100)
+
+            assert truncated == short_text
+
+    @pytest.mark.asyncio
+    async def test_embed_texts_empty_list(self):
+        """Test embedding empty list returns empty list."""
+        with patch("app.features.rag.embeddings.get_settings") as mock_settings:
+            mock_settings.return_value.openai_api_key = "test-key"
+            provider = OpenAIEmbeddingProvider()
+
+            result = await provider.embed_texts([])
+            assert result == []
+
+    @pytest.mark.asyncio
+    async def test_embed_texts_batching(self):
+        """Test that texts are batched correctly."""
+        with patch("app.features.rag.embeddings.get_settings") as mock_settings:
+            mock_settings.return_value.openai_api_key = "test-key"
+            mock_settings.return_value.rag_embedding_model = "text-embedding-3-small"
+            mock_settings.return_value.rag_embedding_dimension = 1536
+            mock_settings.return_value.rag_embedding_batch_size = 2
+
+            provider = OpenAIEmbeddingProvider()
+
+            # Mock the client
+            mock_client = MagicMock()
+
+            # Need to adjust mock to handle multiple calls
+            mock_response_1 = MagicMock()
+            mock_response_1.data = [
+                MagicMock(embedding=[0.1] * 1536),
+                MagicMock(embedding=[0.2] * 1536),
+            ]
+            mock_response_1.usage = MagicMock(prompt_tokens=10, total_tokens=10)
+
+            mock_response_2 = MagicMock()
+            mock_response_2.data = [
+                MagicMock(embedding=[0.3] * 1536),
+                MagicMock(embedding=[0.4] * 1536),
+            ]
+            mock_response_2.usage = MagicMock(prompt_tokens=10, total_tokens=10)
+
+            mock_client.embeddings.create = AsyncMock(
+                side_effect=[mock_response_1, mock_response_2]
+            )
+            provider._client = mock_client
+
+            # Test with 4 texts (should be 2 batches)
+            texts = ["text1", "text2", "text3", "text4"]
+            result = await provider.embed_texts(texts)
+
+            assert len(result) == 4
+            assert mock_client.embeddings.create.call_count == 2
+
+    @pytest.mark.asyncio
+    async def test_embed_query_returns_single_embedding(self):
+        """Test embed_query returns single embedding."""
+        with patch("app.features.rag.embeddings.get_settings") as mock_settings:
+            mock_settings.return_value.openai_api_key = "test-key"
+            mock_settings.return_value.rag_embedding_model = "text-embedding-3-small"
+            mock_settings.return_value.rag_embedding_dimension = 1536
+            mock_settings.return_value.rag_embedding_batch_size = 100
+
+            provider = OpenAIEmbeddingProvider()
+
+            # Mock the client
+            mock_client = MagicMock()
+            mock_response = MagicMock()
+            mock_response.data = [MagicMock(embedding=[0.1] * 1536)]
+            mock_response.usage = MagicMock(prompt_tokens=5, total_tokens=5)
+            mock_client.embeddings.create = AsyncMock(return_value=mock_response)
+            provider._client = mock_client
+
+            result = await provider.embed_query("test query")
+
+            assert len(result) == 1536
+            assert result == [0.1] * 1536
+
+    @pytest.mark.asyncio
+    async def test_embed_texts_truncates_long_input(self):
+        """Test that long inputs are truncated."""
+        with patch("app.features.rag.embeddings.get_settings") as mock_settings:
+            mock_settings.return_value.openai_api_key = "test-key"
+            mock_settings.return_value.rag_embedding_model = "text-embedding-3-small"
+            mock_settings.return_value.rag_embedding_dimension = 1536
+            mock_settings.return_value.rag_embedding_batch_size = 100
+
+            provider = OpenAIEmbeddingProvider()
+
+            # Mock the client
+            mock_client = MagicMock()
+            mock_response = MagicMock()
+            mock_response.data = [MagicMock(embedding=[0.1] * 1536)]
+            mock_response.usage = MagicMock(prompt_tokens=100, total_tokens=100)
+            mock_client.embeddings.create = AsyncMock(return_value=mock_response)
+            provider._client = mock_client
+
+            # (In reality, truncation happens before API call)
+            result = await provider.embed_texts(["short text"])
+
+            assert len(result) == 1
+
+
+class TestOllamaEmbeddingProvider:
+    """Tests for OllamaEmbeddingProvider."""
+
+    def test_init(self):
+        """Test initialization."""
+        with patch("app.features.rag.embeddings.get_settings") as mock_settings:
+            mock_settings.return_value.ollama_base_url = "http://localhost:11434"
+            mock_settings.return_value.ollama_embedding_model = "nomic-embed-text"
+            mock_settings.return_value.rag_embedding_dimension = 768
+
+            provider = OllamaEmbeddingProvider()
+            assert provider._client is None
+
+    def test_dimension_property(self):
+        """Test dimension property returns configured value."""
+        with patch("app.features.rag.embeddings.get_settings") as mock_settings:
+            mock_settings.return_value.ollama_base_url = "http://localhost:11434"
+            mock_settings.return_value.ollama_embedding_model = "nomic-embed-text"
+            mock_settings.return_value.rag_embedding_dimension = 768
+
+            provider = OllamaEmbeddingProvider()
+            assert provider.dimension == 768
+
+    @pytest.mark.asyncio
+    async def test_embed_texts_empty_list(self):
+        """Test embedding empty list returns empty list."""
+        with patch("app.features.rag.embeddings.get_settings") as mock_settings:
+            mock_settings.return_value.ollama_base_url = "http://localhost:11434"
+            mock_settings.return_value.ollama_embedding_model = "nomic-embed-text"
+            mock_settings.return_value.rag_embedding_dimension = 768
+
+            provider = OllamaEmbeddingProvider()
+            result = await provider.embed_texts([])
+            assert result == []
+
+    @pytest.mark.asyncio
+    async def test_embed_texts_success(self):
+        """Test successful embedding generation."""
+        with patch("app.features.rag.embeddings.get_settings") as mock_settings:
+            mock_settings.return_value.ollama_base_url = "http://localhost:11434"
+            mock_settings.return_value.ollama_embedding_model = "nomic-embed-text"
+            mock_settings.return_value.rag_embedding_dimension = 768
+
+            provider = OllamaEmbeddingProvider()
+
+            # Mock the HTTP client with OpenAI-compatible response format
+            mock_response = MagicMock()
+            mock_response.json.return_value = {
+                "data": [
+                    {"embedding": [0.1] * 768, "index": 0},
+                    {"embedding": [0.2] * 768, "index": 1},
+                ]
+            }
+            mock_response.raise_for_status = MagicMock()
+
+            mock_client = MagicMock(spec=httpx.AsyncClient)
+            mock_client.post = AsyncMock(return_value=mock_response)
+            provider._client = mock_client
+
+            result = await provider.embed_texts(["text1", "text2"])
+
+            assert len(result) == 2
+            assert result[0] == [0.1] * 768
+            assert result[1] == [0.2] * 768
+            mock_client.post.assert_called_once_with(
+                "/v1/embeddings",
+                json={
+                    "model": "nomic-embed-text",
+                    "input": ["text1", "text2"],
+                    "dimensions": 768,
+                },
+            )
+
+    @pytest.mark.asyncio
+    async def test_embed_query_returns_single_embedding(self):
+        """Test embed_query returns single embedding."""
+        with patch("app.features.rag.embeddings.get_settings") as mock_settings:
+            mock_settings.return_value.ollama_base_url = "http://localhost:11434"
+            mock_settings.return_value.ollama_embedding_model = "nomic-embed-text"
+            mock_settings.return_value.rag_embedding_dimension = 768
+
+            provider = OllamaEmbeddingProvider()
+
+            # Mock the HTTP client with OpenAI-compatible response format
+            mock_response = MagicMock()
+            mock_response.json.return_value = {"data": [{"embedding": [0.5] * 768, "index": 0}]}
+            mock_response.raise_for_status = MagicMock()
+
+            mock_client = MagicMock(spec=httpx.AsyncClient)
+            mock_client.post = AsyncMock(return_value=mock_response)
+            provider._client = mock_client
+
+            result = await provider.embed_query("test query")
+
+            assert len(result) == 768
+            assert result == [0.5] * 768
+
+    @pytest.mark.asyncio
+    async def test_embed_texts_model_not_found(self):
+        """Test error handling when model not found."""
+        with patch("app.features.rag.embeddings.get_settings") as mock_settings:
+            mock_settings.return_value.ollama_base_url = "http://localhost:11434"
+            mock_settings.return_value.ollama_embedding_model = "nonexistent-model"
+            mock_settings.return_value.rag_embedding_dimension = 768
+
+            provider = OllamaEmbeddingProvider()
+
+            # Mock 404 response
+            mock_response = MagicMock()
+            mock_response.status_code = 404
+            error = httpx.HTTPStatusError(
+                "Not Found",
+                request=MagicMock(),
+                response=mock_response,
+            )
+
+            mock_client = MagicMock(spec=httpx.AsyncClient)
+            mock_client.post = AsyncMock(side_effect=error)
+            provider._client = mock_client
+
+            with pytest.raises(EmbeddingError) as exc_info:
+                await provider.embed_texts(["test"])
+            assert "not found" in str(exc_info.value).lower()
+            assert "ollama pull" in str(exc_info.value)
+
+    @pytest.mark.asyncio
+    async def test_embed_texts_connection_error(self):
+        """Test error handling when Ollama not reachable."""
+        with patch("app.features.rag.embeddings.get_settings") as mock_settings:
+            mock_settings.return_value.ollama_base_url = "http://localhost:11434"
+            mock_settings.return_value.ollama_embedding_model = "nomic-embed-text"
+            mock_settings.return_value.rag_embedding_dimension = 768
+
+            provider = OllamaEmbeddingProvider()
+
+            # Mock connection error
+            mock_client = MagicMock(spec=httpx.AsyncClient)
+            mock_client.post = AsyncMock(side_effect=httpx.ConnectError("Connection refused"))
+            provider._client = mock_client
+
+            with pytest.raises(EmbeddingError) as exc_info:
+                await provider.embed_texts(["test"])
+            assert "Failed to connect to Ollama" in str(exc_info.value)
+
+    @pytest.mark.asyncio
+    async def test_embed_texts_count_mismatch(self):
+        """Test error when embedding count doesn't match input count."""
+        with patch("app.features.rag.embeddings.get_settings") as mock_settings:
+            mock_settings.return_value.ollama_base_url = "http://localhost:11434"
+            mock_settings.return_value.ollama_embedding_model = "nomic-embed-text"
+            mock_settings.return_value.rag_embedding_dimension = 768
+
+            provider = OllamaEmbeddingProvider()
+
+            # Mock response with wrong count (OpenAI-compatible format)
+            mock_response = MagicMock()
+            mock_response.json.return_value = {
+                "data": [{"embedding": [0.1] * 768, "index": 0}]  # Only 1 embedding for 2 texts
+            }
+            mock_response.raise_for_status = MagicMock()
+
+            mock_client = MagicMock(spec=httpx.AsyncClient)
+            mock_client.post = AsyncMock(return_value=mock_response)
+            provider._client = mock_client
+
+            with pytest.raises(EmbeddingError) as exc_info:
+                await provider.embed_texts(["text1", "text2"])
+            assert "mismatch" in str(exc_info.value).lower()
+
+    @pytest.mark.asyncio
+    async def test_close(self):
+        """Test close method properly closes HTTP client."""
+        with patch("app.features.rag.embeddings.get_settings") as mock_settings:
+            mock_settings.return_value.ollama_base_url = "http://localhost:11434"
+            mock_settings.return_value.ollama_embedding_model = "nomic-embed-text"
+            mock_settings.return_value.rag_embedding_dimension = 768
+
+            provider = OllamaEmbeddingProvider()
+
+            # Mock client
+            mock_client = MagicMock(spec=httpx.AsyncClient)
+            mock_client.aclose = AsyncMock()
+            provider._client = mock_client
+
+            await provider.close()
+
+            mock_client.aclose.assert_called_once()
+            assert provider._client is None
+
+
+class TestGetEmbeddingService:
+    """Tests for get_embedding_service factory."""
+
+    def test_returns_openai_by_default(self):
+        """Test that OpenAI provider is returned by default."""
+        reset_embedding_service()
+
+        with patch("app.features.rag.embeddings.get_settings") as mock_settings:
+            mock_settings.return_value.rag_embedding_provider = "openai"
+            mock_settings.return_value.openai_api_key = ""
+            mock_settings.return_value.rag_embedding_model = "text-embedding-3-small"
+            mock_settings.return_value.rag_embedding_dimension = 1536
+            mock_settings.return_value.rag_embedding_batch_size = 100
+
+            provider = get_embedding_service()
+            assert isinstance(provider, OpenAIEmbeddingProvider)
+
+        reset_embedding_service()
+
+    def test_returns_ollama_when_configured(self):
+        """Test that Ollama provider is returned when configured."""
+        reset_embedding_service()
+
+        with patch("app.features.rag.embeddings.get_settings") as mock_settings:
+            mock_settings.return_value.rag_embedding_provider = "ollama"
+            mock_settings.return_value.ollama_base_url = "http://localhost:11434"
+            mock_settings.return_value.ollama_embedding_model = "nomic-embed-text"
+            mock_settings.return_value.rag_embedding_dimension = 768
+
+            provider = get_embedding_service()
+            assert isinstance(provider, OllamaEmbeddingProvider)
+
+        reset_embedding_service()
+
+    def test_returns_same_instance(self):
+        """Test that singleton returns same instance."""
+        reset_embedding_service()
+
+        with patch("app.features.rag.embeddings.get_settings") as mock_settings:
+            mock_settings.return_value.rag_embedding_provider = "openai"
+            mock_settings.return_value.openai_api_key = ""
+            mock_settings.return_value.rag_embedding_model = "text-embedding-3-small"
+            mock_settings.return_value.rag_embedding_dimension = 1536
+            mock_settings.return_value.rag_embedding_batch_size = 100
+
+            provider1 = get_embedding_service()
+            provider2 = get_embedding_service()
+            assert provider1 is provider2
+
+        reset_embedding_service()
+
+
+class TestEmbeddingServiceAlias:
+    """Tests for backwards compatibility alias."""
+
+    def test_embedding_service_is_openai_provider(self):
+        """Test that EmbeddingService alias points to OpenAIEmbeddingProvider."""
+        assert EmbeddingService is OpenAIEmbeddingProvider
diff --git a/app/features/rag/tests/test_routes.py b/app/features/rag/tests/test_routes.py
new file mode 100644
index 00000000..ce09a05a
--- /dev/null
+++ b/app/features/rag/tests/test_routes.py
@@ -0,0 +1,433 @@
+"""Integration tests for RAG API routes.
+
+These tests require:
+- PostgreSQL running with pgvector extension (docker-compose up -d)
+- Migrations applied (uv run alembic upgrade head)
+
+Note: These tests mock the OpenAI embedding service to avoid API calls.
+"""
+
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+from httpx import AsyncClient
+
+from app.features.rag.embeddings import EmbeddingService
+
+# =============================================================================
+# Mock Embedding Service for Integration Tests
+# =============================================================================
+
+
+def create_mock_embedding_service() -> EmbeddingService:
+    """Create a mock embedding service for integration tests."""
+    service = MagicMock(spec=EmbeddingService)
+
+    async def mock_embed_texts(texts, **kwargs):
+        return [[0.1 + i * 0.01] * 1536 for i, _ in enumerate(texts)]
+
+    async def mock_embed_query(query):
+        return [0.1] * 1536
+
+    service.embed_texts = AsyncMock(side_effect=mock_embed_texts)
+    service.embed_query = AsyncMock(side_effect=mock_embed_query)
+    service.count_tokens = MagicMock(side_effect=lambda text: len(text.split()))
+    service.truncate_to_tokens = MagicMock(side_effect=lambda text, max_tokens: text)
+
+    return service
+
+
+# =============================================================================
+# Index Endpoint Tests
+# =============================================================================
+
+
+@pytest.mark.integration
+class TestIndexEndpoint:
+    """Integration tests for POST /rag/index endpoint."""
+
+    @pytest.mark.asyncio
+    async def test_index_markdown_creates_chunks(self, client: AsyncClient):
+        """Test that indexing markdown creates chunks in database."""
+        mock_service = create_mock_embedding_service()
+
+        with patch(
+            "app.features.rag.service.get_embedding_service",
+            return_value=mock_service,
+        ):
+            response = await client.post(
+                "/rag/index",
+                json={
+                    "source_type": "markdown",
+                    "source_path": "test-index-md-001",
+                    "content": "# Test Document\n\nThis is test content for indexing.",
+                    "metadata": {"category": "testing"},
+                },
+            )
+
+        assert response.status_code == 201
+        data = response.json()
+        assert data["status"] == "indexed"
+        assert data["chunks_created"] >= 1
+        assert data["source_path"] == "test-index-md-001"
+        assert "source_id" in data
+
+    @pytest.mark.asyncio
+    async def test_index_same_content_returns_unchanged(self, client: AsyncClient):
+        """Test that re-indexing unchanged content returns 'unchanged' status."""
+        mock_service = create_mock_embedding_service()
+
+        content = "# Unchanged\n\nSame content twice."
+
+        with patch(
+            "app.features.rag.service.get_embedding_service",
+            return_value=mock_service,
+        ):
+            # First index
+            response1 = await client.post(
+                "/rag/index",
+                json={
+                    "source_type": "markdown",
+                    "source_path": "test-unchanged-001",
+                    "content": content,
+                },
+            )
+            assert response1.status_code == 201
+            assert response1.json()["status"] == "indexed"
+
+            # Second index with same content
+            response2 = await client.post(
+                "/rag/index",
+                json={
+                    "source_type": "markdown",
+                    "source_path": "test-unchanged-001",
+                    "content": content,
+                },
+            )
+            assert response2.status_code == 201
+            assert response2.json()["status"] == "unchanged"
+
+    @pytest.mark.asyncio
+    async def test_index_updated_content_re_indexes(self, client: AsyncClient):
+        """Test that updated content triggers re-indexing."""
+        mock_service = create_mock_embedding_service()
+
+        with patch(
+            "app.features.rag.service.get_embedding_service",
+            return_value=mock_service,
+        ):
+            # First index
+            response1 = await client.post(
+                "/rag/index",
+                json={
+                    "source_type": "markdown",
+                    "source_path": "test-updated-001",
+                    "content": "# Original\n\nOriginal content.",
+                },
+            )
+            assert response1.status_code == 201
+            source_id = response1.json()["source_id"]
+
+            # Second index with different content
+            response2 = await client.post(
+                "/rag/index",
+                json={
+                    "source_type": "markdown",
+                    "source_path": "test-updated-001",
+                    "content": "# Updated\n\nNew updated content.",
+                },
+            )
+            assert response2.status_code == 201
+            assert response2.json()["status"] == "updated"
+            assert response2.json()["source_id"] == source_id
+
+    @pytest.mark.asyncio
+    async def test_index_invalid_source_type(self, client: AsyncClient):
+        """Test that invalid source type returns 422."""
+        response = await client.post(
+            "/rag/index",
+            json={
+                "source_type": "invalid",
+                "source_path": "test.txt",
+                "content": "test",
+            },
+        )
+        assert response.status_code == 422
+
+    @pytest.mark.asyncio
+    async def test_index_file_not_found(self, client: AsyncClient):
+        """Test that missing file returns 404."""
+        response = await client.post(
+            "/rag/index",
+            json={
+                "source_type": "markdown",
+                "source_path": "/nonexistent/path/file.md",
+            },
+        )
+        assert response.status_code == 404
+
+
+# =============================================================================
+# Retrieve Endpoint Tests
+# =============================================================================
+
+
+@pytest.mark.integration
+class TestRetrieveEndpoint:
+    """Integration tests for POST /rag/retrieve endpoint."""
+
+    @pytest.mark.asyncio
+    async def test_retrieve_returns_relevant_chunks(self, client: AsyncClient):
+        """Test that retrieval returns matching chunks."""
+        mock_service = create_mock_embedding_service()
+
+        with patch(
+            "app.features.rag.service.get_embedding_service",
+            return_value=mock_service,
+        ):
+            # First, index a document
+            await client.post(
+                "/rag/index",
+                json={
+                    "source_type": "markdown",
+                    "source_path": "test-retrieve-001",
+                    "content": "# Backtesting Guide\n\nBacktesting prevents data leakage by using time-based splits.",
+                },
+            )
+
+            # Then retrieve
+            response = await client.post(
+                "/rag/retrieve",
+                json={
+                    "query": "How does backtesting prevent leakage?",
+                    "top_k": 5,
+                    "similarity_threshold": 0.0,  # Low threshold to ensure results
+                },
+            )
+
+        assert response.status_code == 200
+        data = response.json()
+        assert "results" in data
+        assert "query_embedding_time_ms" in data
+        assert "search_time_ms" in data
+        assert "total_chunks_searched" in data
+
+    @pytest.mark.asyncio
+    async def test_retrieve_respects_threshold(self, client: AsyncClient):
+        """Test that retrieval respects similarity threshold."""
+        mock_service = create_mock_embedding_service()
+
+        with patch(
+            "app.features.rag.service.get_embedding_service",
+            return_value=mock_service,
+        ):
+            # Index a document
+            await client.post(
+                "/rag/index",
+                json={
+                    "source_type": "markdown",
+                    "source_path": "test-threshold-001",
+                    "content": "# Test Content\n\nSome test content here.",
+                },
+            )
+
+            # Retrieve with very high threshold
+            response = await client.post(
+                "/rag/retrieve",
+                json={
+                    "query": "unrelated query",
+                    "top_k": 5,
+                    "similarity_threshold": 0.99,  # Very high threshold
+                },
+            )
+
+        assert response.status_code == 200
+        # With high threshold and mock embeddings, results may be empty
+        data = response.json()
+        assert isinstance(data["results"], list)
+
+    @pytest.mark.asyncio
+    async def test_retrieve_empty_database(self, client: AsyncClient):
+        """Test retrieval on empty database returns empty results."""
+        mock_service = create_mock_embedding_service()
+
+        with patch(
+            "app.features.rag.service.get_embedding_service",
+            return_value=mock_service,
+        ):
+            response = await client.post(
+                "/rag/retrieve",
+                json={
+                    "query": "anything",
+                    "top_k": 5,
+                },
+            )
+
+        assert response.status_code == 200
+        data = response.json()
+        assert isinstance(data["results"], list)
+
+    @pytest.mark.asyncio
+    async def test_retrieve_validates_query(self, client: AsyncClient):
+        """Test that empty query is rejected."""
+        response = await client.post(
+            "/rag/retrieve",
+            json={
+                "query": "",
+                "top_k": 5,
+            },
+        )
+        assert response.status_code == 422
+
+
+# =============================================================================
+# Sources Endpoint Tests
+# =============================================================================
+
+
+@pytest.mark.integration
+class TestSourcesEndpoint:
+    """Integration tests for /rag/sources endpoints."""
+
+    @pytest.mark.asyncio
+    async def test_list_sources_returns_all(self, client: AsyncClient):
+        """Test listing all indexed sources."""
+        mock_service = create_mock_embedding_service()
+
+        with patch(
+            "app.features.rag.service.get_embedding_service",
+            return_value=mock_service,
+        ):
+            # Index a couple of documents
+            await client.post(
+                "/rag/index",
+                json={
+                    "source_type": "markdown",
+                    "source_path": "test-list-001",
+                    "content": "# First Doc",
+                },
+            )
+            await client.post(
+                "/rag/index",
+                json={
+                    "source_type": "markdown",
+                    "source_path": "test-list-002",
+                    "content": "# Second Doc",
+                },
+            )
+
+            # List sources
+            response = await client.get("/rag/sources")
+
+        assert response.status_code == 200
+        data = response.json()
+        assert "sources" in data
+        assert "total_sources" in data
+        assert "total_chunks" in data
+        assert data["total_sources"] >= 2
+
+    @pytest.mark.asyncio
+    async def test_delete_source_removes_chunks(self, client: AsyncClient):
+        """Test that deleting a source removes all its chunks."""
+        mock_service = create_mock_embedding_service()
+
+        with patch(
+            "app.features.rag.service.get_embedding_service",
+            return_value=mock_service,
+        ):
+            # Index a document
+            index_response = await client.post(
+                "/rag/index",
+                json={
+                    "source_type": "markdown",
+                    "source_path": "test-delete-001",
+                    "content": "# Delete Me\n\nThis will be deleted.",
+                },
+            )
+            source_id = index_response.json()["source_id"]
+
+            # Delete the source
+            delete_response = await client.delete(f"/rag/sources/{source_id}")
+
+        assert delete_response.status_code == 200
+        data = delete_response.json()
+        assert data["status"] == "deleted"
+        assert data["chunks_deleted"] >= 1
+
+    @pytest.mark.asyncio
+    async def test_delete_nonexistent_returns_404(self, client: AsyncClient):
+        """Test that deleting non-existent source returns 404."""
+        response = await client.delete("/rag/sources/nonexistent123456789012")
+        assert response.status_code == 404
+
+    @pytest.mark.asyncio
+    async def test_source_not_in_list_after_delete(self, client: AsyncClient):
+        """Test that deleted source no longer appears in list."""
+        mock_service = create_mock_embedding_service()
+
+        with patch(
+            "app.features.rag.service.get_embedding_service",
+            return_value=mock_service,
+        ):
+            # Index a document
+            index_response = await client.post(
+                "/rag/index",
+                json={
+                    "source_type": "markdown",
+                    "source_path": "test-delete-verify-001",
+                    "content": "# Verify Delete",
+                },
+            )
+            source_id = index_response.json()["source_id"]
+
+            # Delete the source
+            await client.delete(f"/rag/sources/{source_id}")
+
+            # Verify not in list
+            list_response = await client.get("/rag/sources")
+            source_ids = [s["source_id"] for s in list_response.json()["sources"]]
+            assert source_id not in source_ids
+
+
+# =============================================================================
+# OpenAPI Indexing Tests
+# =============================================================================
+
+
+@pytest.mark.integration
+class TestOpenAPIIndexing:
+    """Integration tests for OpenAPI document indexing."""
+
+    @pytest.mark.asyncio
+    async def test_index_openapi_creates_endpoint_chunks(self, client: AsyncClient):
+        """Test that OpenAPI spec creates endpoint-based chunks."""
+        mock_service = create_mock_embedding_service()
+
+        openapi_spec = """{
+            "openapi": "3.0.0",
+            "info": {"title": "Test API", "version": "1.0"},
+            "paths": {
+                "/users": {
+                    "get": {"summary": "List users", "operationId": "listUsers", "responses": {"200": {"description": "OK"}}},
+                    "post": {"summary": "Create user", "operationId": "createUser", "responses": {"201": {"description": "Created"}}}
+                }
+            }
+        }"""
+
+        with patch(
+            "app.features.rag.service.get_embedding_service",
+            return_value=mock_service,
+        ):
+            response = await client.post(
+                "/rag/index",
+                json={
+                    "source_type": "openapi",
+                    "source_path": "test-openapi-001",
+                    "content": openapi_spec,
+                },
+            )
+
+        assert response.status_code == 201
+        data = response.json()
+        # Should have at least: info chunk + 2 endpoint chunks
+        assert data["chunks_created"] >= 3
diff --git a/app/features/rag/tests/test_schemas.py b/app/features/rag/tests/test_schemas.py
new file mode 100644
index 00000000..a3bb0292
--- /dev/null
+++ b/app/features/rag/tests/test_schemas.py
@@ -0,0 +1,345 @@
+"""Unit tests for RAG schemas."""
+
+import pytest
+from pydantic import ValidationError
+
+from app.features.rag.schemas import (
+    ChunkResult,
+    DeleteResponse,
+    IndexRequest,
+    IndexResponse,
+    RetrieveRequest,
+    RetrieveResponse,
+    SourceListResponse,
+    SourceResponse,
+)
+
+
+class TestIndexRequest:
+    """Tests for IndexRequest schema."""
+
+    def test_valid_markdown_request(self):
+        """Test valid markdown index request."""
+        request = IndexRequest(
+            source_type="markdown",
+            source_path="docs/README.md",
+            content="# Hello\n\nWorld",
+            metadata={"category": "docs"},
+        )
+        assert request.source_type == "markdown"
+        assert request.source_path == "docs/README.md"
+        assert request.content == "# Hello\n\nWorld"
+        assert request.metadata == {"category": "docs"}
+
+    def test_valid_openapi_request(self):
+        """Test valid openapi index request."""
+        request = IndexRequest(
+            source_type="openapi",
+            source_path="api/openapi.json",
+        )
+        assert request.source_type == "openapi"
+        assert request.content is None
+        assert request.metadata is None
+
+    def test_invalid_source_type(self):
+        """Test invalid source type is rejected."""
+        with pytest.raises(ValidationError) as exc_info:
+            IndexRequest(
+                source_type="invalid",  # type: ignore[arg-type]
+                source_path="test.txt",
+            )
+        assert "source_type" in str(exc_info.value)
+
+    def test_empty_source_path_rejected(self):
+        """Test empty source path is rejected."""
+        with pytest.raises(ValidationError) as exc_info:
+            IndexRequest(
+                source_type="markdown",
+                source_path="",
+            )
+        assert "source_path" in str(exc_info.value)
+
+    def test_source_path_max_length(self):
+        """Test source path max length is enforced."""
+        with pytest.raises(ValidationError) as exc_info:
+            IndexRequest(
+                source_type="markdown",
+                source_path="x" * 501,
+            )
+        assert "source_path" in str(exc_info.value)
+
+    def test_extra_fields_rejected(self):
+        """Test extra fields are rejected."""
+        with pytest.raises(ValidationError) as exc_info:
+            IndexRequest(
+                source_type="markdown",
+                source_path="test.md",
+                extra_field="not allowed",  # type: ignore[call-arg]
+            )
+        assert "extra_field" in str(exc_info.value)
+
+
+class TestRetrieveRequest:
+    """Tests for RetrieveRequest schema."""
+
+    def test_valid_request_defaults(self):
+        """Test valid request with defaults."""
+        request = RetrieveRequest(query="What is forecasting?")
+        assert request.query == "What is forecasting?"
+        assert request.top_k == 5
+        assert request.similarity_threshold == 0.7
+        assert request.filters is None
+
+    def test_valid_request_custom_params(self):
+        """Test valid request with custom parameters."""
+        request = RetrieveRequest(
+            query="How does backtesting work?",
+            top_k=10,
+            similarity_threshold=0.8,
+            filters={"source_type": ["markdown"]},
+        )
+        assert request.top_k == 10
+        assert request.similarity_threshold == 0.8
+        assert request.filters == {"source_type": ["markdown"]}
+
+    def test_empty_query_rejected(self):
+        """Test empty query is rejected."""
+        with pytest.raises(ValidationError) as exc_info:
+            RetrieveRequest(query="")
+        assert "query" in str(exc_info.value)
+
+    def test_query_max_length(self):
+        """Test query max length is enforced."""
+        with pytest.raises(ValidationError) as exc_info:
+            RetrieveRequest(query="x" * 2001)
+        assert "query" in str(exc_info.value)
+
+    def test_top_k_bounds(self):
+        """Test top_k bounds are enforced."""
+        # Below minimum
+        with pytest.raises(ValidationError):
+            RetrieveRequest(query="test", top_k=0)
+
+        # Above maximum
+        with pytest.raises(ValidationError):
+            RetrieveRequest(query="test", top_k=51)
+
+        # Valid bounds
+        request_min = RetrieveRequest(query="test", top_k=1)
+        assert request_min.top_k == 1
+
+        request_max = RetrieveRequest(query="test", top_k=50)
+        assert request_max.top_k == 50
+
+    def test_similarity_threshold_bounds(self):
+        """Test similarity threshold bounds are enforced."""
+        # Below minimum
+        with pytest.raises(ValidationError):
+            RetrieveRequest(query="test", similarity_threshold=-0.1)
+
+        # Above maximum
+        with pytest.raises(ValidationError):
+            RetrieveRequest(query="test", similarity_threshold=1.1)
+
+        # Valid bounds
+        request_min = RetrieveRequest(query="test", similarity_threshold=0.0)
+        assert request_min.similarity_threshold == 0.0
+
+        request_max = RetrieveRequest(query="test", similarity_threshold=1.0)
+        assert request_max.similarity_threshold == 1.0
+
+
+class TestIndexResponse:
+    """Tests for IndexResponse schema."""
+
+    def test_indexed_status(self):
+        """Test indexed status response."""
+        response = IndexResponse(
+            source_id="abc123",
+            source_path="test.md",
+            chunks_created=5,
+            tokens_processed=1000,
+            duration_ms=123.45,
+            status="indexed",
+        )
+        assert response.status == "indexed"
+        assert response.chunks_created == 5
+
+    def test_updated_status(self):
+        """Test updated status response."""
+        response = IndexResponse(
+            source_id="abc123",
+            source_path="test.md",
+            chunks_created=3,
+            tokens_processed=500,
+            duration_ms=50.0,
+            status="updated",
+        )
+        assert response.status == "updated"
+
+    def test_unchanged_status(self):
+        """Test unchanged status response."""
+        response = IndexResponse(
+            source_id="abc123",
+            source_path="test.md",
+            chunks_created=5,
+            tokens_processed=0,
+            duration_ms=10.0,
+            status="unchanged",
+        )
+        assert response.status == "unchanged"
+        assert response.tokens_processed == 0
+
+
+class TestChunkResult:
+    """Tests for ChunkResult schema."""
+
+    def test_valid_chunk_result(self):
+        """Test valid chunk result."""
+        result = ChunkResult(
+            chunk_id="chunk123",
+            source_id="src123",
+            source_path="docs/test.md",
+            source_type="markdown",
+            content="This is chunk content",
+            relevance_score=0.95,
+            metadata={"heading": "Introduction"},
+        )
+        assert result.relevance_score == 0.95
+        assert result.metadata == {"heading": "Introduction"}
+
+    def test_relevance_score_bounds(self):
+        """Test relevance score bounds."""
+        # Valid bounds
+        result_zero = ChunkResult(
+            chunk_id="c1",
+            source_id="s1",
+            source_path="test.md",
+            source_type="markdown",
+            content="test",
+            relevance_score=0.0,
+        )
+        assert result_zero.relevance_score == 0.0
+
+        result_one = ChunkResult(
+            chunk_id="c1",
+            source_id="s1",
+            source_path="test.md",
+            source_type="markdown",
+            content="test",
+            relevance_score=1.0,
+        )
+        assert result_one.relevance_score == 1.0
+
+        # Out of bounds
+        with pytest.raises(ValidationError):
+            ChunkResult(
+                chunk_id="c1",
+                source_id="s1",
+                source_path="test.md",
+                source_type="markdown",
+                content="test",
+                relevance_score=1.5,
+            )
+
+
+class TestRetrieveResponse:
+    """Tests for RetrieveResponse schema."""
+
+    def test_valid_response(self):
+        """Test valid retrieve response."""
+        response = RetrieveResponse(
+            results=[
+                ChunkResult(
+                    chunk_id="c1",
+                    source_id="s1",
+                    source_path="test.md",
+                    source_type="markdown",
+                    content="test content",
+                    relevance_score=0.9,
+                )
+            ],
+            query_embedding_time_ms=45.5,
+            search_time_ms=12.3,
+            total_chunks_searched=100,
+        )
+        assert len(response.results) == 1
+        assert response.total_chunks_searched == 100
+
+    def test_empty_results(self):
+        """Test response with no results."""
+        response = RetrieveResponse(
+            results=[],
+            query_embedding_time_ms=50.0,
+            search_time_ms=10.0,
+            total_chunks_searched=0,
+        )
+        assert len(response.results) == 0
+
+
+class TestSourceResponse:
+    """Tests for SourceResponse schema."""
+
+    def test_valid_source_response(self):
+        """Test valid source response."""
+        from datetime import UTC, datetime
+
+        response = SourceResponse(
+            source_id="src123",
+            source_type="markdown",
+            source_path="docs/README.md",
+            chunk_count=10,
+            content_hash="a" * 64,
+            indexed_at=datetime.now(UTC),
+            metadata={"category": "docs"},
+        )
+        assert response.chunk_count == 10
+        assert response.source_type == "markdown"
+
+
+class TestSourceListResponse:
+    """Tests for SourceListResponse schema."""
+
+    def test_valid_list_response(self):
+        """Test valid source list response."""
+        from datetime import UTC, datetime
+
+        response = SourceListResponse(
+            sources=[
+                SourceResponse(
+                    source_id="src1",
+                    source_type="markdown",
+                    source_path="doc1.md",
+                    chunk_count=5,
+                    content_hash="a" * 64,
+                    indexed_at=datetime.now(UTC),
+                )
+            ],
+            total_sources=1,
+            total_chunks=5,
+        )
+        assert response.total_sources == 1
+        assert response.total_chunks == 5
+
+    def test_empty_list_response(self):
+        """Test empty source list response."""
+        response = SourceListResponse(
+            sources=[],
+            total_sources=0,
+            total_chunks=0,
+        )
+        assert len(response.sources) == 0
+
+
+class TestDeleteResponse:
+    """Tests for DeleteResponse schema."""
+
+    def test_valid_delete_response(self):
+        """Test valid delete response."""
+        response = DeleteResponse(
+            source_id="src123",
+            chunks_deleted=10,
+            status="deleted",
+        )
+        assert response.status == "deleted"
+        assert response.chunks_deleted == 10
diff --git a/app/features/rag/tests/test_service.py b/app/features/rag/tests/test_service.py
new file mode 100644
index 00000000..e68036fc
--- /dev/null
+++ b/app/features/rag/tests/test_service.py
@@ -0,0 +1,263 @@
+"""Unit tests for RAG service."""
+
+import hashlib
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from app.features.rag.schemas import IndexRequest, RetrieveRequest
+from app.features.rag.service import RAGService, SourceNotFoundError
+
+
+class TestRAGServiceUnit:
+    """Unit tests for RAGService (no database)."""
+
+    def test_compute_content_hash(self):
+        """Test content hash computation."""
+        service = RAGService()
+
+        content = "Test content"
+        hash1 = service._compute_content_hash(content)
+
+        # Should be SHA-256 hex (64 characters)
+        assert len(hash1) == 64
+        assert all(c in "0123456789abcdef" for c in hash1)
+
+        # Same content should produce same hash
+        hash2 = service._compute_content_hash(content)
+        assert hash1 == hash2
+
+        # Different content should produce different hash
+        hash3 = service._compute_content_hash("Different content")
+        assert hash1 != hash3
+
+    def test_compute_content_hash_deterministic(self):
+        """Test hash is deterministic."""
+        service = RAGService()
+
+        content = "# Test\n\nWith some content."
+        expected = hashlib.sha256(content.encode()).hexdigest()
+
+        result = service._compute_content_hash(content)
+        assert result == expected
+
+    def test_read_content_from_path_not_found(self, tmp_path):
+        """Test reading from non-existent path raises."""
+        service = RAGService()
+
+        with pytest.raises(FileNotFoundError):
+            service._read_content_from_path("/nonexistent/path.md")
+
+    def test_read_content_from_path_success(self, tmp_path):
+        """Test reading from existing path."""
+        service = RAGService()
+
+        # Create test file
+        test_file = tmp_path / "test.md"
+        test_file.write_text("# Test Content")
+
+        content = service._read_content_from_path(str(test_file))
+        assert content == "# Test Content"
+
+
+class TestRAGServiceIndexDocument:
+    """Tests for index_document method."""
+
+    @pytest.mark.asyncio
+    async def test_index_with_content_provided(self, mock_embedding_service):
+        """Test indexing when content is provided directly."""
+        service = RAGService(embedding_service=mock_embedding_service)
+
+        request = IndexRequest(
+            source_type="markdown",
+            source_path="test-direct-content.md",
+            content="# Test\n\nDirect content.",
+        )
+
+        # Mock database session
+        mock_db = AsyncMock()
+        mock_db.execute = AsyncMock(
+            return_value=MagicMock(scalar_one_or_none=MagicMock(return_value=None))
+        )
+        mock_db.flush = AsyncMock()
+        mock_db.add = MagicMock()
+
+        with patch.object(service, "_find_source_by_path", return_value=None):
+            with patch.object(service, "_upsert_source_and_chunks", new_callable=AsyncMock):
+                response = await service.index_document(db=mock_db, request=request)
+
+        assert response.status == "indexed"
+        assert response.source_path == "test-direct-content.md"
+        assert response.chunks_created > 0
+
+    @pytest.mark.asyncio
+    async def test_index_unchanged_content(self, mock_embedding_service):
+        """Test that unchanged content returns 'unchanged' status."""
+        service = RAGService(embedding_service=mock_embedding_service)
+
+        content = "# Test\n\nContent."
+        content_hash = service._compute_content_hash(content)
+
+        request = IndexRequest(
+            source_type="markdown",
+            source_path="test-unchanged.md",
+            content=content,
+        )
+
+        # Mock existing source with same hash
+        mock_source = MagicMock()
+        mock_source.source_id = "existing123"
+        mock_source.content_hash = content_hash
+
+        mock_db = AsyncMock()
+
+        with patch.object(service, "_find_source_by_path", return_value=mock_source):
+            with patch.object(service, "_get_chunk_count", return_value=5):
+                response = await service.index_document(db=mock_db, request=request)
+
+        assert response.status == "unchanged"
+        assert response.tokens_processed == 0
+        assert response.chunks_created == 5
+
+    @pytest.mark.asyncio
+    async def test_index_updated_content(self, mock_embedding_service):
+        """Test that changed content returns 'updated' status."""
+        service = RAGService(embedding_service=mock_embedding_service)
+
+        request = IndexRequest(
+            source_type="markdown",
+            source_path="test-updated.md",
+            content="# Updated\n\nNew content.",
+        )
+
+        # Mock existing source with different hash
+        mock_source = MagicMock()
+        mock_source.source_id = "existing123"
+        mock_source.content_hash = "different_hash"
+
+        mock_db = AsyncMock()
+
+        with patch.object(service, "_find_source_by_path", return_value=mock_source):
+            with patch.object(service, "_upsert_source_and_chunks", new_callable=AsyncMock):
+                response = await service.index_document(db=mock_db, request=request)
+
+        assert response.status == "updated"
+        assert response.source_id == "existing123"
+
+
+class TestRAGServiceRetrieve:
+    """Tests for retrieve method."""
+
+    @pytest.mark.asyncio
+    async def test_retrieve_calls_embedding_service(self, mock_embedding_service):
+        """Test that retrieve calls embedding service for query."""
+        service = RAGService(embedding_service=mock_embedding_service)
+
+        request = RetrieveRequest(
+            query="Test query",
+            top_k=5,
+            similarity_threshold=0.7,
+        )
+
+        mock_db = AsyncMock()
+
+        with patch.object(service, "_get_total_chunk_count", return_value=100):
+            with patch.object(service, "_search_similar_chunks", return_value=[]):
+                response = await service.retrieve(db=mock_db, request=request)
+
+        # Verify embedding service was called
+        mock_embedding_service.embed_query.assert_called_once_with("Test query")
+
+        assert response.total_chunks_searched == 100
+        assert len(response.results) == 0
+
+    @pytest.mark.asyncio
+    async def test_retrieve_returns_results(self, mock_embedding_service):
+        """Test that retrieve returns search results."""
+        from app.features.rag.schemas import ChunkResult
+
+        service = RAGService(embedding_service=mock_embedding_service)
+
+        request = RetrieveRequest(
+            query="Test query",
+            top_k=5,
+        )
+
+        mock_db = AsyncMock()
+
+        mock_results = [
+            ChunkResult(
+                chunk_id="chunk1",
+                source_id="src1",
+                source_path="test.md",
+                source_type="markdown",
+                content="Result content",
+                relevance_score=0.95,
+            )
+        ]
+
+        with patch.object(service, "_get_total_chunk_count", return_value=50):
+            with patch.object(service, "_search_similar_chunks", return_value=mock_results):
+                response = await service.retrieve(db=mock_db, request=request)
+
+        assert len(response.results) == 1
+        assert response.results[0].relevance_score == 0.95
+
+
+class TestRAGServiceListSources:
+    """Tests for list_sources method."""
+
+    @pytest.mark.asyncio
+    async def test_list_sources_empty(self):
+        """Test listing sources when none exist."""
+        service = RAGService()
+
+        mock_db = AsyncMock()
+        mock_result = MagicMock()
+        mock_result.all.return_value = []
+        mock_db.execute = AsyncMock(return_value=mock_result)
+
+        response = await service.list_sources(db=mock_db)
+
+        assert response.total_sources == 0
+        assert response.total_chunks == 0
+        assert len(response.sources) == 0
+
+
+class TestRAGServiceDeleteSource:
+    """Tests for delete_source method."""
+
+    @pytest.mark.asyncio
+    async def test_delete_source_not_found(self):
+        """Test deleting non-existent source raises."""
+        service = RAGService()
+
+        mock_db = AsyncMock()
+        mock_result = MagicMock()
+        mock_result.scalar_one_or_none.return_value = None
+        mock_db.execute = AsyncMock(return_value=mock_result)
+
+        with pytest.raises(SourceNotFoundError):
+            await service.delete_source(db=mock_db, source_id="nonexistent")
+
+    @pytest.mark.asyncio
+    async def test_delete_source_success(self):
+        """Test successful source deletion."""
+        service = RAGService()
+
+        mock_source = MagicMock()
+        mock_source.id = 1
+
+        mock_db = AsyncMock()
+        mock_result = MagicMock()
+        mock_result.scalar_one_or_none.return_value = mock_source
+        mock_db.execute = AsyncMock(return_value=mock_result)
+        mock_db.delete = AsyncMock()
+        mock_db.flush = AsyncMock()
+
+        with patch.object(service, "_get_chunk_count", return_value=10):
+            response = await service.delete_source(db=mock_db, source_id="test123")
+
+        assert response.status == "deleted"
+        assert response.chunks_deleted == 10
+        mock_db.delete.assert_called_once_with(mock_source)
diff --git a/app/main.py b/app/main.py
index 4b425db3..323c7987 100644
--- a/app/main.py
+++ b/app/main.py
@@ -17,6 +17,7 @@
 from app.features.forecasting.routes import router as forecasting_router
 from app.features.ingest.routes import router as ingest_router
 from app.features.jobs.routes import router as jobs_router
+from app.features.rag.routes import router as rag_router
 from app.features.registry.routes import router as registry_router
 
 logger = get_logger(__name__)
@@ -82,6 +83,7 @@ def create_app() -> FastAPI:
     app.include_router(forecasting_router)
     app.include_router(backtesting_router)
     app.include_router(registry_router)
+    app.include_router(rag_router)
 
     return app
 
diff --git a/docs/PHASE-index.md b/docs/PHASE-index.md
index b655d0c9..836c63ef 100644
--- a/docs/PHASE-index.md
+++ b/docs/PHASE-index.md
@@ -16,7 +16,7 @@ This document indexes all implementation phases of the ForecastLabAI project.
 | 5 | Backtesting | Completed | PRP-6 | [5-BACKTESTING.md](./PHASE/5-BACKTESTING.md) |
 | 6 | Model Registry | Completed | PRP-7 | [6-MODEL_REGISTRY.md](./PHASE/6-MODEL_REGISTRY.md) |
 | 7 | Serving Layer | Completed | PRP-8 | [7-SERVING_LAYER.md](./PHASE/7-SERVING_LAYER.md) |
-| 8 | RAG Knowledge Base | Pending | PRP-9 | - |
+| 8 | RAG Knowledge Base | Completed | PRP-9 | [8-RAG_KNOWLEDGE_BASE.md](./PHASE/8-RAG_KNOWLEDGE_BASE.md) |
 | 9 | Agentic Layer | Pending | PRP-10 | - |
 | 10 | ForecastLab Dashboard | Pending | PRP-11 | - |
 
@@ -273,17 +273,50 @@ jobs_retention_days: int = 30
 - Pyright: 0 errors
 - Pytest: 426 unit tests passed
 
----
+### [Phase 8: RAG Knowledge Base](./PHASE/8-RAG_KNOWLEDGE_BASE.md)
 
-## Pending Phases
+**Date Completed**: 2026-02-01
 
-### Phase 8: RAG Knowledge Base ("The Memory")
-Vector storage, document ingestion, and semantic retrieval infrastructure.
-- PostgreSQL 16 + pgvector extension
-- OpenAI text-embedding-3-small embeddings (1536 dimensions)
+**Summary**: RAG Knowledge Base with pgvector and multiple embedding providers:
+- PostgreSQL pgvector for HNSW similarity search
+- Embedding Provider Pattern: OpenAI (default) and Ollama (local/LAN)
+- Ollama uses `/v1/embeddings` OpenAI-compatible endpoint with `dimensions` parameter
 - Markdown-aware and OpenAPI endpoint-aware chunking
-- HNSW index for cosine similarity search
-- Endpoints: POST /rag/index, POST /rag/retrieve, GET /rag/sources, DELETE /rag/sources/{id}
+- Idempotent indexing via SHA-256 content hash
+- Configurable embedding dimensions (1536 default, 768 for nomic-embed-text, etc.)
+
+**Key Deliverables**:
+- `app/features/rag/embeddings.py` - EmbeddingProvider, OpenAIEmbeddingProvider, OllamaEmbeddingProvider
+- `app/features/rag/chunkers.py` - MarkdownChunker, OpenAPIChunker
+- `app/features/rag/models.py` - DocumentSource, DocumentChunk ORM models
+- `app/features/rag/service.py` - RAGService (index, retrieve, list, delete)
+- `app/features/rag/routes.py` - API endpoints
+- `alembic/versions/b4c8d9e0f123_create_rag_tables.py` - Base RAG tables
+- `alembic/versions/c5d9e1f2g345_rag_dynamic_embedding_dimension.py` - Dynamic dimension
+
+**API Endpoints**:
+- `POST /rag/index` - Index document into knowledge base
+- `POST /rag/retrieve` - Semantic search with similarity threshold
+- `GET /rag/sources` - List indexed sources
+- `DELETE /rag/sources/{source_id}` - Delete source and chunks
+
+**Configuration (Settings)**:
+```python
+rag_embedding_provider: Literal["openai", "ollama"] = "openai"
+rag_embedding_dimension: int = 1536
+ollama_base_url: str = "http://localhost:11434"
+ollama_embedding_model: str = "nomic-embed-text"
+```
+
+**Validation Results**:
+- Ruff: All checks passed
+- MyPy: 0 errors (117 source files)
+- Pyright: 0 errors
+- Pytest: 82 unit tests + 14 integration tests
+
+---
+
+## Pending Phases
 
 ### Phase 9: Agentic Layer ("The Brain")
 Autonomous decision-making, tool orchestration, and structured outputs using PydanticAI.
@@ -346,3 +379,4 @@ Each phase document (`docs/PHASE/X-PHASE_NAME.md`) contains:
 | 2026-01-31 | 5 | Backtesting module with time-series CV completed |
 | 2026-02-01 | 6 | Model Registry with run tracking and deployment aliases completed |
 | 2026-02-01 | 7 | Serving Layer with RFC 7807, dimensions, analytics, and jobs completed |
+| 2026-02-01 | 8 | RAG Knowledge Base with pgvector and Ollama embedding provider completed |
diff --git a/docs/PHASE/8-RAG_KNOWLEDGE_BASE.md b/docs/PHASE/8-RAG_KNOWLEDGE_BASE.md
new file mode 100644
index 00000000..aec1f984
--- /dev/null
+++ b/docs/PHASE/8-RAG_KNOWLEDGE_BASE.md
@@ -0,0 +1,398 @@
+# Phase 8: RAG Knowledge Base
+
+**Date Completed**: 2026-02-01
+**PRP**: PRP-9
+**Status**: ✅ Completed
+
+---
+
+## Executive Summary
+
+Phase 8 implements the RAG (Retrieval-Augmented Generation) Knowledge Base for ForecastLabAI with PostgreSQL pgvector for semantic similarity search, multiple embedding providers (OpenAI and Ollama), and evidence-grounded retrieval with citations.
+
+### Objectives Achieved
+
+1. **pgvector Integration** - HNSW index for fast cosine similarity search
+2. **Embedding Provider Pattern** - Abstract base class with OpenAI and Ollama implementations
+3. **Document Indexing** - Markdown and OpenAPI-aware chunking with content hash for idempotency
+4. **Semantic Retrieval** - Configurable top-k retrieval with similarity threshold
+5. **Source Management** - List, index, and delete document sources
+
+---
+
+## Deliverables
+
+### 1. Embedding Provider Pattern
+
+**File**: `app/features/rag/embeddings.py`
+
+Implements abstract `EmbeddingProvider` base class with two concrete implementations:
+
+```python
+class EmbeddingProvider(ABC):
+    """Abstract base class for embedding providers."""
+
+    @abstractmethod
+    async def embed_texts(self, texts: list[str]) -> list[list[float]]: ...
+
+    @abstractmethod
+    async def embed_query(self, query: str) -> list[float]: ...
+
+    @property
+    @abstractmethod
+    def dimension(self) -> int: ...
+```
+
+**Providers**:
+
+| Provider | Endpoint | Features |
+|----------|----------|----------|
+| `OpenAIEmbeddingProvider` | OpenAI API | Batch processing, rate limit handling, token validation |
+| `OllamaEmbeddingProvider` | `/v1/embeddings` | OpenAI-compatible, configurable dimensions, local/LAN |
+
+**Factory Function**:
+
+```python
+def get_embedding_service() -> EmbeddingProvider:
+    """Returns provider based on RAG_EMBEDDING_PROVIDER config."""
+    settings = get_settings()
+    if settings.rag_embedding_provider == "ollama":
+        return OllamaEmbeddingProvider()
+    return OpenAIEmbeddingProvider()
+```
+
+### 2. Document Chunking
+
+**File**: `app/features/rag/chunkers.py`
+
+| Chunker | Source Type | Strategy |
+|---------|-------------|----------|
+| `MarkdownChunker` | `markdown` | Respects heading boundaries, extracts heading hierarchy metadata |
+| `OpenAPIChunker` | `openapi` | Chunks by endpoint, extracts method/path/parameters metadata |
+
+**ChunkData Structure**:
+
+```python
+@dataclass
+class ChunkData:
+    content: str              # Chunk text
+    token_count: int          # Token count for the chunk
+    chunk_index: int          # Position in source document
+    metadata: dict | None     # Heading path, endpoint info, etc.
+```
+
+### 3. RAG Service
+
+**File**: `app/features/rag/service.py`
+
+| Method | Description |
+|--------|-------------|
+| `index_document()` | Index document with chunking and embedding |
+| `retrieve()` | Semantic search with similarity scoring |
+| `list_sources()` | List indexed sources with statistics |
+| `delete_source()` | Delete source and its chunks |
+
+**Idempotent Indexing**:
+- SHA-256 content hash for change detection
+- Returns `"unchanged"` status if content matches existing source
+- Re-indexes only when content changes
+
+### 4. ORM Models
+
+**File**: `app/features/rag/models.py`
+
+```python
+class DocumentSource(TimestampMixin, Base):
+    """Registry of indexed document sources."""
+    __tablename__ = "document_source"
+
+    id: Mapped[int]
+    source_id: Mapped[str]      # UUID hex (32 chars)
+    source_type: Mapped[str]    # markdown, openapi
+    source_path: Mapped[str]    # File path or identifier
+    content_hash: Mapped[str]   # SHA-256 for change detection
+    metadata_: Mapped[dict]     # JSONB custom metadata
+    indexed_at: Mapped[datetime]
+
+
+class DocumentChunk(TimestampMixin, Base):
+    """Indexed document chunk with embedding."""
+    __tablename__ = "document_chunk"
+
+    id: Mapped[int]
+    chunk_id: Mapped[str]       # UUID hex (32 chars)
+    source_id: Mapped[int]      # FK to document_source
+    chunk_index: Mapped[int]    # Position in document
+    content: Mapped[str]        # Chunk text
+    embedding: Mapped[list[float]]  # Vector(dimension)
+    token_count: Mapped[int]
+    metadata_: Mapped[dict]     # Heading hierarchy, etc.
+```
+
+### 5. API Endpoints
+
+**File**: `app/features/rag/routes.py`
+
+| Method | Path | Description |
+|--------|------|-------------|
+| POST | `/rag/index` | Index a document into the knowledge base |
+| POST | `/rag/retrieve` | Semantic search across indexed documents |
+| GET | `/rag/sources` | List all indexed sources |
+| DELETE | `/rag/sources/{source_id}` | Delete source and its chunks |
+
+---
+
+## Configuration
+
+### New Settings in `app/core/config.py`
+
+```python
+# Embedding Provider
+rag_embedding_provider: Literal["openai", "ollama"] = "openai"
+
+# OpenAI Configuration
+openai_api_key: str = ""
+rag_embedding_model: str = "text-embedding-3-small"
+
+# Ollama Configuration
+ollama_base_url: str = "http://localhost:11434"
+ollama_embedding_model: str = "nomic-embed-text"
+
+# Common Embedding Settings
+rag_embedding_dimension: int = 1536
+rag_embedding_batch_size: int = 100
+
+# Chunking Configuration
+rag_chunk_size: int = 512         # tokens
+rag_chunk_overlap: int = 50       # tokens
+rag_min_chunk_size: int = 100     # minimum tokens per chunk
+
+# Retrieval Configuration
+rag_top_k: int = 5
+rag_similarity_threshold: float = 0.7
+rag_max_context_tokens: int = 4000
+
+# Index Configuration
+rag_index_type: Literal["hnsw", "ivfflat"] = "hnsw"
+rag_hnsw_m: int = 16
+rag_hnsw_ef_construction: int = 64
+```
+
+### Environment Variables
+
+**OpenAI Provider (default)**:
+```bash
+RAG_EMBEDDING_PROVIDER=openai
+OPENAI_API_KEY=sk-your-key
+RAG_EMBEDDING_MODEL=text-embedding-3-small
+RAG_EMBEDDING_DIMENSION=1536
+```
+
+**Ollama Provider (local/LAN)**:
+```bash
+RAG_EMBEDDING_PROVIDER=ollama
+OLLAMA_BASE_URL=http://localhost:11434
+OLLAMA_EMBEDDING_MODEL=nomic-embed-text
+RAG_EMBEDDING_DIMENSION=768
+```
+
+---
+
+## Database Changes
+
+### Migration: `b4c8d9e0f123_create_rag_tables.py`
+
+Creates base RAG tables with pgvector:
+
+**Tables**:
+- `document_source` - Source registry with content hash
+- `document_chunk` - Chunks with vector embeddings
+
+**Indexes**:
+- `ix_document_source_source_id` (unique)
+- `ix_document_source_source_type`
+- `ix_document_chunk_chunk_id` (unique)
+- `ix_document_chunk_source_id`
+- `ix_chunk_embedding_hnsw` - HNSW index for cosine similarity
+- `ix_chunk_metadata_gin` - GIN index for metadata filtering
+
+### Migration: `c5d9e1f2g345_rag_dynamic_embedding_dimension.py`
+
+Enables configurable embedding dimension:
+
+```python
+def upgrade() -> None:
+    dimension = int(os.environ.get("RAG_EMBEDDING_DIMENSION", "1536"))
+    op.drop_index("ix_chunk_embedding_hnsw")
+    op.execute(f"ALTER TABLE document_chunk ALTER COLUMN embedding TYPE vector({dimension})")
+    op.create_index("ix_chunk_embedding_hnsw", ...)
+```
+
+**Note**: Changing dimension requires re-indexing all documents.
+
+---
+
+## Integration
+
+### Router Registration in `app/main.py`
+
+```python
+from app.features.rag.routes import router as rag_router
+
+# In create_app():
+app.include_router(rag_router)
+```
+
+### Alembic Model Import in `alembic/env.py`
+
+```python
+from app.features.rag import models as rag_models  # noqa: F401
+```
+
+---
+
+## Test Coverage
+
+### Test Files
+
+| File | Tests | Description |
+|------|-------|-------------|
+| `test_embeddings.py` | 25 | Provider pattern, OpenAI, Ollama, factory |
+| `test_chunkers.py` | 22 | Markdown and OpenAPI chunking |
+| `test_schemas.py` | 22 | Request/response validation |
+| `test_service.py` | 12 | Service unit tests |
+| `test_routes.py` | 14 | Integration tests (require DB) |
+
+### Validation Results
+
+```
+Ruff:    All checks passed
+MyPy:    0 errors (117 source files)
+Pyright: 0 errors
+Pytest:  82 unit tests passed + 14 integration tests
+```
+
+---
+
+## Directory Structure
+
+```
+app/
+├── core/
+│   └── config.py              # MODIFIED: Added RAG and Ollama settings
+├── features/
+│   └── rag/                   # NEW: RAG Knowledge Base
+│       ├── __init__.py
+│       ├── models.py          # DocumentSource, DocumentChunk ORM
+│       ├── schemas.py         # Request/response Pydantic schemas
+│       ├── embeddings.py      # EmbeddingProvider, OpenAI, Ollama
+│       ├── chunkers.py        # MarkdownChunker, OpenAPIChunker
+│       ├── service.py         # RAGService
+│       ├── routes.py          # API endpoints
+│       └── tests/
+│           ├── __init__.py
+│           ├── conftest.py
+│           ├── test_embeddings.py
+│           ├── test_chunkers.py
+│           ├── test_schemas.py
+│           ├── test_service.py
+│           └── test_routes.py
+└── main.py                    # MODIFIED: Router registration
+
+alembic/
+├── env.py                     # MODIFIED: RAG model import
+└── versions/
+    ├── b4c8d9e0f123_create_rag_tables.py         # NEW
+    └── c5d9e1f2g345_rag_dynamic_embedding_dimension.py  # NEW
+```
+
+---
+
+## API Usage Examples
+
+### Index Documents
+
+```bash
+# Index a markdown file
+curl -X POST http://localhost:8123/rag/index \
+  -H "Content-Type: application/json" \
+  -d '{
+    "source_type": "markdown",
+    "source_path": "docs/ARCHITECTURE.md"
+  }'
+
+# Index with inline content
+curl -X POST http://localhost:8123/rag/index \
+  -H "Content-Type: application/json" \
+  -d '{
+    "source_type": "markdown",
+    "source_path": "inline/readme",
+    "content": "# Project Overview\n\nThis is the project readme...",
+    "metadata": {"category": "documentation"}
+  }'
+
+# Index OpenAPI spec
+curl -X POST http://localhost:8123/rag/index \
+  -H "Content-Type: application/json" \
+  -d '{
+    "source_type": "openapi",
+    "source_path": "openapi.json"
+  }'
+```
+
+### Semantic Retrieval
+
+```bash
+# Basic query
+curl -X POST http://localhost:8123/rag/retrieve \
+  -H "Content-Type: application/json" \
+  -d '{
+    "query": "How does backtesting work?"
+  }'
+
+# Query with filters
+curl -X POST http://localhost:8123/rag/retrieve \
+  -H "Content-Type: application/json" \
+  -d '{
+    "query": "API endpoints for forecasting",
+    "top_k": 10,
+    "similarity_threshold": 0.8,
+    "filters": {
+      "source_type": "openapi"
+    }
+  }'
+```
+
+### Source Management
+
+```bash
+# List all sources
+curl http://localhost:8123/rag/sources
+
+# Delete a source
+curl -X DELETE http://localhost:8123/rag/sources/abc123def456...
+```
+
+---
+
+## Embedding Provider Comparison
+
+| Feature | OpenAI | Ollama |
+|---------|--------|--------|
+| Endpoint | OpenAI API | `/v1/embeddings` |
+| Authentication | API key required | None |
+| Rate Limiting | Yes, with backoff | No |
+| Token Validation | Yes (8191 max) | No |
+| Batch Size | Configurable (2048 max) | Native batch support |
+| Dimensions | 1536 (text-embedding-3-small) | Model-dependent |
+| Network | Internet required | Local/LAN |
+
+---
+
+## Next Phase Preparation
+
+Phase 9 (Agentic Layer) will build on this RAG infrastructure to:
+- Create RAG Assistant Agent for evidence-grounded Q&A
+- Implement citation formatting with source references
+- Add WebSocket streaming for real-time responses
+- Integrate with Experiment Orchestrator Agent
diff --git a/examples/rag/index_docs.py b/examples/rag/index_docs.py
new file mode 100644
index 00000000..3aac7722
--- /dev/null
+++ b/examples/rag/index_docs.py
@@ -0,0 +1,172 @@
+#!/usr/bin/env python
+"""Example: Index documentation into RAG knowledge base.
+
+This script demonstrates how to index markdown documentation
+from the docs/ directory into the RAG knowledge base.
+
+Usage:
+    # Make sure the API is running
+    uv run uvicorn app.main:app --reload --port 8123
+
+    # Run this script
+    uv run python examples/rag/index_docs.py
+
+Requirements:
+    - OPENAI_API_KEY environment variable must be set
+    - PostgreSQL with pgvector must be running (docker-compose up -d)
+    - Migrations applied (uv run alembic upgrade head)
+"""
+
+import asyncio
+from pathlib import Path
+
+import httpx
+
+
+async def index_markdown_docs(base_url: str = "http://localhost:8123") -> None:
+    """Index all markdown docs from docs/ directory.
+
+    Args:
+        base_url: Base URL of the API server.
+    """
+    docs_dir = Path("docs")
+
+    if not docs_dir.exists():
+        print(f"Error: {docs_dir} directory not found")
+        return
+
+    async with httpx.AsyncClient(base_url=base_url, timeout=60.0) as client:
+        # Find all markdown files
+        md_files = list(docs_dir.rglob("*.md"))
+        print(f"Found {len(md_files)} markdown files to index")
+
+        total_chunks = 0
+        total_tokens = 0
+        indexed = 0
+        unchanged = 0
+        failed = 0
+
+        for md_file in md_files:
+            try:
+                # Read file content
+                content = md_file.read_text(encoding="utf-8")
+
+                # Index the document
+                response = await client.post(
+                    "/rag/index",
+                    json={
+                        "source_type": "markdown",
+                        "source_path": str(md_file),
+                        "content": content,
+                        "metadata": {
+                            "category": "documentation",
+                            "file_type": "markdown",
+                        },
+                    },
+                )
+
+                if response.status_code == 201:
+                    result = response.json()
+                    status = result["status"]
+
+                    if status == "unchanged":
+                        unchanged += 1
+                        print(f"  [unchanged] {md_file}")
+                    else:
+                        indexed += 1
+                        total_chunks += result["chunks_created"]
+                        total_tokens += result["tokens_processed"]
+                        print(
+                            f"  [{status}] {md_file}: "
+                            f"{result['chunks_created']} chunks, "
+                            f"{result['tokens_processed']} tokens"
+                        )
+                else:
+                    failed += 1
+                    print(f"  [FAILED] {md_file}: {response.status_code} - {response.text}")
+
+            except Exception as e:
+                failed += 1
+                print(f"  [ERROR] {md_file}: {e}")
+
+        print("\n" + "=" * 50)
+        print("Indexing Summary:")
+        print(f"  Indexed: {indexed}")
+        print(f"  Unchanged: {unchanged}")
+        print(f"  Failed: {failed}")
+        print(f"  Total chunks created: {total_chunks}")
+        print(f"  Total tokens processed: {total_tokens}")
+
+
+async def index_readme(base_url: str = "http://localhost:8123") -> None:
+    """Index the main README.md file.
+
+    Args:
+        base_url: Base URL of the API server.
+    """
+    readme_path = Path("README.md")
+
+    if not readme_path.exists():
+        print("README.md not found")
+        return
+
+    async with httpx.AsyncClient(base_url=base_url, timeout=60.0) as client:
+        content = readme_path.read_text(encoding="utf-8")
+
+        response = await client.post(
+            "/rag/index",
+            json={
+                "source_type": "markdown",
+                "source_path": str(readme_path),
+                "content": content,
+                "metadata": {"category": "overview"},
+            },
+        )
+
+        if response.status_code == 201:
+            result = response.json()
+            print(f"README.md indexed: {result['chunks_created']} chunks ({result['status']})")
+        else:
+            print(f"Failed to index README.md: {response.status_code}")
+
+
+async def list_sources(base_url: str = "http://localhost:8123") -> None:
+    """List all indexed sources.
+
+    Args:
+        base_url: Base URL of the API server.
+    """
+    async with httpx.AsyncClient(base_url=base_url) as client:
+        response = await client.get("/rag/sources")
+
+        if response.status_code == 200:
+            data = response.json()
+            print(f"\nIndexed Sources: {data['total_sources']}")
+            print(f"Total Chunks: {data['total_chunks']}")
+            print("\nSources:")
+            for source in data["sources"]:
+                print(f"  - {source['source_path']} ({source['chunk_count']} chunks)")
+        else:
+            print(f"Failed to list sources: {response.status_code}")
+
+
+async def main() -> None:
+    """Main entry point."""
+    print("RAG Knowledge Base - Document Indexer")
+    print("=" * 50)
+
+    # Index README first
+    print("\n1. Indexing README.md...")
+    await index_readme()
+
+    # Index documentation
+    print("\n2. Indexing docs/ directory...")
+    await index_markdown_docs()
+
+    # List all sources
+    print("\n3. Listing indexed sources...")
+    await list_sources()
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/examples/rag/query.http b/examples/rag/query.http
new file mode 100644
index 00000000..04937945
--- /dev/null
+++ b/examples/rag/query.http
@@ -0,0 +1,123 @@
+### RAG Knowledge Base - HTTP Client Examples
+### Use with VS Code REST Client or similar tools
+
+### =============================================================================
+### Index Endpoints
+### =============================================================================
+
+### Index a markdown document (with content)
+POST http://localhost:8123/rag/index
+Content-Type: application/json
+
+{
+  "source_type": "markdown",
+  "source_path": "docs/example.md",
+  "content": "# Example Document\n\nThis is an example markdown document for testing the RAG indexing pipeline.\n\n## Section One\n\nFirst section with some content about forecasting.\n\n## Section Two\n\nSecond section about backtesting strategies.",
+  "metadata": {
+    "category": "documentation",
+    "author": "test"
+  }
+}
+
+### Index a markdown document (read from file path)
+POST http://localhost:8123/rag/index
+Content-Type: application/json
+
+{
+  "source_type": "markdown",
+  "source_path": "README.md"
+}
+
+### Index an OpenAPI specification
+POST http://localhost:8123/rag/index
+Content-Type: application/json
+
+{
+  "source_type": "openapi",
+  "source_path": "api/openapi.json",
+  "content": "{\"openapi\":\"3.0.0\",\"info\":{\"title\":\"Test API\",\"version\":\"1.0\"},\"paths\":{\"/users\":{\"get\":{\"summary\":\"List users\",\"operationId\":\"listUsers\",\"responses\":{\"200\":{\"description\":\"OK\"}}}}}}"
+}
+
+### =============================================================================
+### Retrieve Endpoints
+### =============================================================================
+
+### Semantic search - basic query
+POST http://localhost:8123/rag/retrieve
+Content-Type: application/json
+
+{
+  "query": "How does backtesting prevent data leakage?",
+  "top_k": 5,
+  "similarity_threshold": 0.7
+}
+
+### Semantic search - with filters
+POST http://localhost:8123/rag/retrieve
+Content-Type: application/json
+
+{
+  "query": "What forecasting models are available?",
+  "top_k": 10,
+  "similarity_threshold": 0.6,
+  "filters": {
+    "source_type": ["markdown"],
+    "category": "documentation"
+  }
+}
+
+### Semantic search - lower threshold for more results
+POST http://localhost:8123/rag/retrieve
+Content-Type: application/json
+
+{
+  "query": "time series cross validation",
+  "top_k": 20,
+  "similarity_threshold": 0.5
+}
+
+### =============================================================================
+### Sources Endpoints
+### =============================================================================
+
+### List all indexed sources
+GET http://localhost:8123/rag/sources
+
+### Delete a specific source (replace source_id with actual value)
+DELETE http://localhost:8123/rag/sources/abc123def456789012345678901234
+
+### =============================================================================
+### Example Workflows
+### =============================================================================
+
+### Workflow 1: Index and then query
+### Step 1: Index a document
+POST http://localhost:8123/rag/index
+Content-Type: application/json
+
+{
+  "source_type": "markdown",
+  "source_path": "test-workflow.md",
+  "content": "# Backtesting Guide\n\nBacktesting is a method to evaluate forecasting models using historical data.\n\n## Time-Based Splits\n\nWe use expanding or sliding window strategies to prevent data leakage.\n\n## Metrics\n\nKey metrics include MAE, sMAPE, WAPE, and Bias."
+}
+
+### Step 2: Query the indexed content
+POST http://localhost:8123/rag/retrieve
+Content-Type: application/json
+
+{
+  "query": "What metrics are used in backtesting?",
+  "top_k": 3,
+  "similarity_threshold": 0.6
+}
+
+### Workflow 2: Re-index with updated content
+### (Using same source_path will update existing chunks)
+POST http://localhost:8123/rag/index
+Content-Type: application/json
+
+{
+  "source_type": "markdown",
+  "source_path": "test-workflow.md",
+  "content": "# Backtesting Guide (Updated)\n\nBacktesting evaluates forecasting models.\n\n## Time-Based Splits\n\nWe use expanding or sliding window strategies.\n\n## Metrics\n\nKey metrics: MAE, sMAPE, WAPE, Bias, and Stability Index."
+}
diff --git a/pyproject.toml b/pyproject.toml
index 187facf4..5244b1b9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,6 +20,11 @@ dependencies = [
     "numpy>=2.4.1",
     "scikit-learn>=1.6.0",
     "joblib>=1.4.0",
+    # RAG dependencies
+    "pgvector>=0.3.0",
+    "openai>=1.40.0",
+    "tiktoken>=0.7.0",
+    "httpx>=0.28.0",
 ]
 
 [project.optional-dependencies]
diff --git a/uv.lock b/uv.lock
index 85d3d0c8..df06e69b 100644
--- a/uv.lock
+++ b/uv.lock
@@ -104,6 +104,63 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/e6/ad/3cc14f097111b4de0040c83a525973216457bbeeb63739ef1ed275c1c021/certifi-2026.1.4-py3-none-any.whl", hash = "sha256:9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c", size = 152900, upload-time = "2026-01-04T02:42:40.15Z" },
 ]
 
+[[package]]
+name = "charset-normalizer"
+version = "3.4.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/13/69/33ddede1939fdd074bce5434295f38fae7136463422fe4fd3e0e89b98062/charset_normalizer-3.4.4.tar.gz", hash = "sha256:94537985111c35f28720e43603b8e7b43a6ecfb2ce1d3058bbe955b73404e21a", size = 129418, upload-time = "2025-10-14T04:42:32.879Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f3/85/1637cd4af66fa687396e757dec650f28025f2a2f5a5531a3208dc0ec43f2/charset_normalizer-3.4.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0a98e6759f854bd25a58a73fa88833fba3b7c491169f86ce1180c948ab3fd394", size = 208425, upload-time = "2025-10-14T04:40:53.353Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/6a/04130023fef2a0d9c62d0bae2649b69f7b7d8d24ea5536feef50551029df/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b5b290ccc2a263e8d185130284f8501e3e36c5e02750fc6b6bdeb2e9e96f1e25", size = 148162, upload-time = "2025-10-14T04:40:54.558Z" },
+    { url = "https://files.pythonhosted.org/packages/78/29/62328d79aa60da22c9e0b9a66539feae06ca0f5a4171ac4f7dc285b83688/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74bb723680f9f7a6234dcf67aea57e708ec1fbdf5699fb91dfd6f511b0a320ef", size = 144558, upload-time = "2025-10-14T04:40:55.677Z" },
+    { url = "https://files.pythonhosted.org/packages/86/bb/b32194a4bf15b88403537c2e120b817c61cd4ecffa9b6876e941c3ee38fe/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f1e34719c6ed0b92f418c7c780480b26b5d9c50349e9a9af7d76bf757530350d", size = 161497, upload-time = "2025-10-14T04:40:57.217Z" },
+    { url = "https://files.pythonhosted.org/packages/19/89/a54c82b253d5b9b111dc74aca196ba5ccfcca8242d0fb64146d4d3183ff1/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2437418e20515acec67d86e12bf70056a33abdacb5cb1655042f6538d6b085a8", size = 159240, upload-time = "2025-10-14T04:40:58.358Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/10/d20b513afe03acc89ec33948320a5544d31f21b05368436d580dec4e234d/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:11d694519d7f29d6cd09f6ac70028dba10f92f6cdd059096db198c283794ac86", size = 153471, upload-time = "2025-10-14T04:40:59.468Z" },
+    { url = "https://files.pythonhosted.org/packages/61/fa/fbf177b55bdd727010f9c0a3c49eefa1d10f960e5f09d1d887bf93c2e698/charset_normalizer-3.4.4-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ac1c4a689edcc530fc9d9aa11f5774b9e2f33f9a0c6a57864e90908f5208d30a", size = 150864, upload-time = "2025-10-14T04:41:00.623Z" },
+    { url = "https://files.pythonhosted.org/packages/05/12/9fbc6a4d39c0198adeebbde20b619790e9236557ca59fc40e0e3cebe6f40/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:21d142cc6c0ec30d2efee5068ca36c128a30b0f2c53c1c07bd78cb6bc1d3be5f", size = 150647, upload-time = "2025-10-14T04:41:01.754Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/1f/6a9a593d52e3e8c5d2b167daf8c6b968808efb57ef4c210acb907c365bc4/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:5dbe56a36425d26d6cfb40ce79c314a2e4dd6211d51d6d2191c00bed34f354cc", size = 145110, upload-time = "2025-10-14T04:41:03.231Z" },
+    { url = "https://files.pythonhosted.org/packages/30/42/9a52c609e72471b0fc54386dc63c3781a387bb4fe61c20231a4ebcd58bdd/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:5bfbb1b9acf3334612667b61bd3002196fe2a1eb4dd74d247e0f2a4d50ec9bbf", size = 162839, upload-time = "2025-10-14T04:41:04.715Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/5b/c0682bbf9f11597073052628ddd38344a3d673fda35a36773f7d19344b23/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:d055ec1e26e441f6187acf818b73564e6e6282709e9bcb5b63f5b23068356a15", size = 150667, upload-time = "2025-10-14T04:41:05.827Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/24/a41afeab6f990cf2daf6cb8c67419b63b48cf518e4f56022230840c9bfb2/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:af2d8c67d8e573d6de5bc30cdb27e9b95e49115cd9baad5ddbd1a6207aaa82a9", size = 160535, upload-time = "2025-10-14T04:41:06.938Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/e5/6a4ce77ed243c4a50a1fecca6aaaab419628c818a49434be428fe24c9957/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:780236ac706e66881f3b7f2f32dfe90507a09e67d1d454c762cf642e6e1586e0", size = 154816, upload-time = "2025-10-14T04:41:08.101Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/ef/89297262b8092b312d29cdb2517cb1237e51db8ecef2e9af5edbe7b683b1/charset_normalizer-3.4.4-cp312-cp312-win32.whl", hash = "sha256:5833d2c39d8896e4e19b689ffc198f08ea58116bee26dea51e362ecc7cd3ed26", size = 99694, upload-time = "2025-10-14T04:41:09.23Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/2d/1e5ed9dd3b3803994c155cd9aacb60c82c331bad84daf75bcb9c91b3295e/charset_normalizer-3.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:a79cfe37875f822425b89a82333404539ae63dbdddf97f84dcbc3d339aae9525", size = 107131, upload-time = "2025-10-14T04:41:10.467Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/d9/0ed4c7098a861482a7b6a95603edce4c0d9db2311af23da1fb2b75ec26fc/charset_normalizer-3.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:376bec83a63b8021bb5c8ea75e21c4ccb86e7e45ca4eb81146091b56599b80c3", size = 100390, upload-time = "2025-10-14T04:41:11.915Z" },
+    { url = "https://files.pythonhosted.org/packages/97/45/4b3a1239bbacd321068ea6e7ac28875b03ab8bc0aa0966452db17cd36714/charset_normalizer-3.4.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e1f185f86a6f3403aa2420e815904c67b2f9ebc443f045edd0de921108345794", size = 208091, upload-time = "2025-10-14T04:41:13.346Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/62/73a6d7450829655a35bb88a88fca7d736f9882a27eacdca2c6d505b57e2e/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b39f987ae8ccdf0d2642338faf2abb1862340facc796048b604ef14919e55ed", size = 147936, upload-time = "2025-10-14T04:41:14.461Z" },
+    { url = "https://files.pythonhosted.org/packages/89/c5/adb8c8b3d6625bef6d88b251bbb0d95f8205831b987631ab0c8bb5d937c2/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3162d5d8ce1bb98dd51af660f2121c55d0fa541b46dff7bb9b9f86ea1d87de72", size = 144180, upload-time = "2025-10-14T04:41:15.588Z" },
+    { url = "https://files.pythonhosted.org/packages/91/ed/9706e4070682d1cc219050b6048bfd293ccf67b3d4f5a4f39207453d4b99/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:81d5eb2a312700f4ecaa977a8235b634ce853200e828fbadf3a9c50bab278328", size = 161346, upload-time = "2025-10-14T04:41:16.738Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/0d/031f0d95e4972901a2f6f09ef055751805ff541511dc1252ba3ca1f80cf5/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5bd2293095d766545ec1a8f612559f6b40abc0eb18bb2f5d1171872d34036ede", size = 158874, upload-time = "2025-10-14T04:41:17.923Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/83/6ab5883f57c9c801ce5e5677242328aa45592be8a00644310a008d04f922/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a8a8b89589086a25749f471e6a900d3f662d1d3b6e2e59dcecf787b1cc3a1894", size = 153076, upload-time = "2025-10-14T04:41:19.106Z" },
+    { url = "https://files.pythonhosted.org/packages/75/1e/5ff781ddf5260e387d6419959ee89ef13878229732732ee73cdae01800f2/charset_normalizer-3.4.4-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc7637e2f80d8530ee4a78e878bce464f70087ce73cf7c1caf142416923b98f1", size = 150601, upload-time = "2025-10-14T04:41:20.245Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/57/71be810965493d3510a6ca79b90c19e48696fb1ff964da319334b12677f0/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f8bf04158c6b607d747e93949aa60618b61312fe647a6369f88ce2ff16043490", size = 150376, upload-time = "2025-10-14T04:41:21.398Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/d5/c3d057a78c181d007014feb7e9f2e65905a6c4ef182c0ddf0de2924edd65/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:554af85e960429cf30784dd47447d5125aaa3b99a6f0683589dbd27e2f45da44", size = 144825, upload-time = "2025-10-14T04:41:22.583Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/8c/d0406294828d4976f275ffbe66f00266c4b3136b7506941d87c00cab5272/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:74018750915ee7ad843a774364e13a3db91682f26142baddf775342c3f5b1133", size = 162583, upload-time = "2025-10-14T04:41:23.754Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/24/e2aa1f18c8f15c4c0e932d9287b8609dd30ad56dbe41d926bd846e22fb8d/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:c0463276121fdee9c49b98908b3a89c39be45d86d1dbaa22957e38f6321d4ce3", size = 150366, upload-time = "2025-10-14T04:41:25.27Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/5b/1e6160c7739aad1e2df054300cc618b06bf784a7a164b0f238360721ab86/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:362d61fd13843997c1c446760ef36f240cf81d3ebf74ac62652aebaf7838561e", size = 160300, upload-time = "2025-10-14T04:41:26.725Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/10/f882167cd207fbdd743e55534d5d9620e095089d176d55cb22d5322f2afd/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9a26f18905b8dd5d685d6d07b0cdf98a79f3c7a918906af7cc143ea2e164c8bc", size = 154465, upload-time = "2025-10-14T04:41:28.322Z" },
+    { url = "https://files.pythonhosted.org/packages/89/66/c7a9e1b7429be72123441bfdbaf2bc13faab3f90b933f664db506dea5915/charset_normalizer-3.4.4-cp313-cp313-win32.whl", hash = "sha256:9b35f4c90079ff2e2edc5b26c0c77925e5d2d255c42c74fdb70fb49b172726ac", size = 99404, upload-time = "2025-10-14T04:41:29.95Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/26/b9924fa27db384bdcd97ab83b4f0a8058d96ad9626ead570674d5e737d90/charset_normalizer-3.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:b435cba5f4f750aa6c0a0d92c541fb79f69a387c91e61f1795227e4ed9cece14", size = 107092, upload-time = "2025-10-14T04:41:31.188Z" },
+    { url = "https://files.pythonhosted.org/packages/af/8f/3ed4bfa0c0c72a7ca17f0380cd9e4dd842b09f664e780c13cff1dcf2ef1b/charset_normalizer-3.4.4-cp313-cp313-win_arm64.whl", hash = "sha256:542d2cee80be6f80247095cc36c418f7bddd14f4a6de45af91dfad36d817bba2", size = 100408, upload-time = "2025-10-14T04:41:32.624Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/35/7051599bd493e62411d6ede36fd5af83a38f37c4767b92884df7301db25d/charset_normalizer-3.4.4-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:da3326d9e65ef63a817ecbcc0df6e94463713b754fe293eaa03da99befb9a5bd", size = 207746, upload-time = "2025-10-14T04:41:33.773Z" },
+    { url = "https://files.pythonhosted.org/packages/10/9a/97c8d48ef10d6cd4fcead2415523221624bf58bcf68a802721a6bc807c8f/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8af65f14dc14a79b924524b1e7fffe304517b2bff5a58bf64f30b98bbc5079eb", size = 147889, upload-time = "2025-10-14T04:41:34.897Z" },
+    { url = "https://files.pythonhosted.org/packages/10/bf/979224a919a1b606c82bd2c5fa49b5c6d5727aa47b4312bb27b1734f53cd/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74664978bb272435107de04e36db5a9735e78232b85b77d45cfb38f758efd33e", size = 143641, upload-time = "2025-10-14T04:41:36.116Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/33/0ad65587441fc730dc7bd90e9716b30b4702dc7b617e6ba4997dc8651495/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:752944c7ffbfdd10c074dc58ec2d5a8a4cd9493b314d367c14d24c17684ddd14", size = 160779, upload-time = "2025-10-14T04:41:37.229Z" },
+    { url = "https://files.pythonhosted.org/packages/67/ed/331d6b249259ee71ddea93f6f2f0a56cfebd46938bde6fcc6f7b9a3d0e09/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d1f13550535ad8cff21b8d757a3257963e951d96e20ec82ab44bc64aeb62a191", size = 159035, upload-time = "2025-10-14T04:41:38.368Z" },
+    { url = "https://files.pythonhosted.org/packages/67/ff/f6b948ca32e4f2a4576aa129d8bed61f2e0543bf9f5f2b7fc3758ed005c9/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ecaae4149d99b1c9e7b88bb03e3221956f68fd6d50be2ef061b2381b61d20838", size = 152542, upload-time = "2025-10-14T04:41:39.862Z" },
+    { url = "https://files.pythonhosted.org/packages/16/85/276033dcbcc369eb176594de22728541a925b2632f9716428c851b149e83/charset_normalizer-3.4.4-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:cb6254dc36b47a990e59e1068afacdcd02958bdcce30bb50cc1700a8b9d624a6", size = 149524, upload-time = "2025-10-14T04:41:41.319Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/f2/6a2a1f722b6aba37050e626530a46a68f74e63683947a8acff92569f979a/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c8ae8a0f02f57a6e61203a31428fa1d677cbe50c93622b4149d5c0f319c1d19e", size = 150395, upload-time = "2025-10-14T04:41:42.539Z" },
+    { url = "https://files.pythonhosted.org/packages/60/bb/2186cb2f2bbaea6338cad15ce23a67f9b0672929744381e28b0592676824/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:47cc91b2f4dd2833fddaedd2893006b0106129d4b94fdb6af1f4ce5a9965577c", size = 143680, upload-time = "2025-10-14T04:41:43.661Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/a5/bf6f13b772fbb2a90360eb620d52ed8f796f3c5caee8398c3b2eb7b1c60d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:82004af6c302b5d3ab2cfc4cc5f29db16123b1a8417f2e25f9066f91d4411090", size = 162045, upload-time = "2025-10-14T04:41:44.821Z" },
+    { url = "https://files.pythonhosted.org/packages/df/c5/d1be898bf0dc3ef9030c3825e5d3b83f2c528d207d246cbabe245966808d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:2b7d8f6c26245217bd2ad053761201e9f9680f8ce52f0fcd8d0755aeae5b2152", size = 149687, upload-time = "2025-10-14T04:41:46.442Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/42/90c1f7b9341eef50c8a1cb3f098ac43b0508413f33affd762855f67a410e/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:799a7a5e4fb2d5898c60b640fd4981d6a25f1c11790935a44ce38c54e985f828", size = 160014, upload-time = "2025-10-14T04:41:47.631Z" },
+    { url = "https://files.pythonhosted.org/packages/76/be/4d3ee471e8145d12795ab655ece37baed0929462a86e72372fd25859047c/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:99ae2cffebb06e6c22bdc25801d7b30f503cc87dbd283479e7b606f70aff57ec", size = 154044, upload-time = "2025-10-14T04:41:48.81Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/6f/8f7af07237c34a1defe7defc565a9bc1807762f672c0fde711a4b22bf9c0/charset_normalizer-3.4.4-cp314-cp314-win32.whl", hash = "sha256:f9d332f8c2a2fcbffe1378594431458ddbef721c1769d78e2cbc06280d8155f9", size = 99940, upload-time = "2025-10-14T04:41:49.946Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/51/8ade005e5ca5b0d80fb4aff72a3775b325bdc3d27408c8113811a7cbe640/charset_normalizer-3.4.4-cp314-cp314-win_amd64.whl", hash = "sha256:8a6562c3700cce886c5be75ade4a5db4214fda19fede41d9792d100288d8f94c", size = 107104, upload-time = "2025-10-14T04:41:51.051Z" },
+    { url = "https://files.pythonhosted.org/packages/da/5f/6b8f83a55bb8278772c5ae54a577f3099025f9ade59d0136ac24a0df4bde/charset_normalizer-3.4.4-cp314-cp314-win_arm64.whl", hash = "sha256:de00632ca48df9daf77a2c65a484531649261ec9f25489917f09e455cb09ddb2", size = 100743, upload-time = "2025-10-14T04:41:52.122Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/4c/925909008ed5a988ccbb72dcc897407e5d6d3bd72410d69e051fc0c14647/charset_normalizer-3.4.4-py3-none-any.whl", hash = "sha256:7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f", size = 53402, upload-time = "2025-10-14T04:42:31.76Z" },
+]
+
 [[package]]
 name = "click"
 version = "8.3.1"
@@ -199,6 +256,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d2/db/d291e30fdf7ea617a335531e72294e0c723356d7fdde8fba00610a76bda9/coverage-7.13.2-py3-none-any.whl", hash = "sha256:40ce1ea1e25125556d8e76bd0b61500839a07944cc287ac21d5626f3e620cad5", size = 210943, upload-time = "2026-01-25T13:00:02.388Z" },
 ]
 
+[[package]]
+name = "distro"
+version = "1.9.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/fc/f8/98eea607f65de6527f8a2e8885fc8015d3e6f5775df186e443e0964a11c3/distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed", size = 60722, upload-time = "2023-12-24T09:54:32.31Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277, upload-time = "2023-12-24T09:54:30.421Z" },
+]
+
 [[package]]
 name = "fastapi"
 version = "0.128.0"
@@ -216,21 +282,25 @@ wheels = [
 
 [[package]]
 name = "forecastlabai"
-version = "0.1.8"
+version = "0.2.1"
 source = { editable = "." }
 dependencies = [
     { name = "alembic" },
     { name = "asyncpg" },
     { name = "fastapi" },
+    { name = "httpx" },
     { name = "joblib" },
     { name = "numpy" },
+    { name = "openai" },
     { name = "pandas" },
+    { name = "pgvector" },
     { name = "pydantic" },
     { name = "pydantic-settings" },
     { name = "python-dotenv" },
     { name = "scikit-learn" },
     { name = "sqlalchemy", extra = ["asyncio"] },
     { name = "structlog" },
+    { name = "tiktoken" },
     { name = "uvicorn", extra = ["standard"] },
 ]
 
@@ -255,11 +325,14 @@ requires-dist = [
     { name = "alembic", specifier = ">=1.14.0" },
     { name = "asyncpg", specifier = ">=0.30.0" },
     { name = "fastapi", specifier = ">=0.115.0" },
+    { name = "httpx", specifier = ">=0.28.0" },
     { name = "httpx", marker = "extra == 'dev'", specifier = ">=0.28.0" },
     { name = "joblib", specifier = ">=1.4.0" },
     { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.13.0" },
     { name = "numpy", specifier = ">=2.4.1" },
+    { name = "openai", specifier = ">=1.40.0" },
     { name = "pandas", specifier = ">=3.0.0" },
+    { name = "pgvector", specifier = ">=0.3.0" },
     { name = "pydantic", specifier = ">=2.10.0" },
     { name = "pydantic-settings", specifier = ">=2.6.0" },
     { name = "pyright", marker = "extra == 'dev'", specifier = ">=1.1.390" },
@@ -271,6 +344,7 @@ requires-dist = [
     { name = "scikit-learn", specifier = ">=1.6.0" },
     { name = "sqlalchemy", extras = ["asyncio"], specifier = ">=2.0.36" },
     { name = "structlog", specifier = ">=24.4.0" },
+    { name = "tiktoken", specifier = ">=0.7.0" },
     { name = "uvicorn", extras = ["standard"], specifier = ">=0.32.0" },
 ]
 provides-extras = ["dev"]
@@ -405,6 +479,74 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" },
 ]
 
+[[package]]
+name = "jiter"
+version = "0.12.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/45/9d/e0660989c1370e25848bb4c52d061c71837239738ad937e83edca174c273/jiter-0.12.0.tar.gz", hash = "sha256:64dfcd7d5c168b38d3f9f8bba7fc639edb3418abcc74f22fdbe6b8938293f30b", size = 168294, upload-time = "2025-11-09T20:49:23.302Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/92/c9/5b9f7b4983f1b542c64e84165075335e8a236fa9e2ea03a0c79780062be8/jiter-0.12.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:305e061fa82f4680607a775b2e8e0bcb071cd2205ac38e6ef48c8dd5ebe1cf37", size = 314449, upload-time = "2025-11-09T20:47:22.999Z" },
+    { url = "https://files.pythonhosted.org/packages/98/6e/e8efa0e78de00db0aee82c0cf9e8b3f2027efd7f8a71f859d8f4be8e98ef/jiter-0.12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5c1860627048e302a528333c9307c818c547f214d8659b0705d2195e1a94b274", size = 319855, upload-time = "2025-11-09T20:47:24.779Z" },
+    { url = "https://files.pythonhosted.org/packages/20/26/894cd88e60b5d58af53bec5c6759d1292bd0b37a8b5f60f07abf7a63ae5f/jiter-0.12.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:df37577a4f8408f7e0ec3205d2a8f87672af8f17008358063a4d6425b6081ce3", size = 350171, upload-time = "2025-11-09T20:47:26.469Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/27/a7b818b9979ac31b3763d25f3653ec3a954044d5e9f5d87f2f247d679fd1/jiter-0.12.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:75fdd787356c1c13a4f40b43c2156276ef7a71eb487d98472476476d803fb2cf", size = 365590, upload-time = "2025-11-09T20:47:27.918Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/7e/e46195801a97673a83746170b17984aa8ac4a455746354516d02ca5541b4/jiter-0.12.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1eb5db8d9c65b112aacf14fcd0faae9913d07a8afea5ed06ccdd12b724e966a1", size = 479462, upload-time = "2025-11-09T20:47:29.654Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/75/f833bfb009ab4bd11b1c9406d333e3b4357709ed0570bb48c7c06d78c7dd/jiter-0.12.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:73c568cc27c473f82480abc15d1301adf333a7ea4f2e813d6a2c7d8b6ba8d0df", size = 378983, upload-time = "2025-11-09T20:47:31.026Z" },
+    { url = "https://files.pythonhosted.org/packages/71/b3/7a69d77943cc837d30165643db753471aff5df39692d598da880a6e51c24/jiter-0.12.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4321e8a3d868919bcb1abb1db550d41f2b5b326f72df29e53b2df8b006eb9403", size = 361328, upload-time = "2025-11-09T20:47:33.286Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/ac/a78f90caf48d65ba70d8c6efc6f23150bc39dc3389d65bbec2a95c7bc628/jiter-0.12.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0a51bad79f8cc9cac2b4b705039f814049142e0050f30d91695a2d9a6611f126", size = 386740, upload-time = "2025-11-09T20:47:34.703Z" },
+    { url = "https://files.pythonhosted.org/packages/39/b6/5d31c2cc8e1b6a6bcf3c5721e4ca0a3633d1ab4754b09bc7084f6c4f5327/jiter-0.12.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:2a67b678f6a5f1dd6c36d642d7db83e456bc8b104788262aaefc11a22339f5a9", size = 520875, upload-time = "2025-11-09T20:47:36.058Z" },
+    { url = "https://files.pythonhosted.org/packages/30/b5/4df540fae4e9f68c54b8dab004bd8c943a752f0b00efd6e7d64aa3850339/jiter-0.12.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:efe1a211fe1fd14762adea941e3cfd6c611a136e28da6c39272dbb7a1bbe6a86", size = 511457, upload-time = "2025-11-09T20:47:37.932Z" },
+    { url = "https://files.pythonhosted.org/packages/07/65/86b74010e450a1a77b2c1aabb91d4a91dd3cd5afce99f34d75fd1ac64b19/jiter-0.12.0-cp312-cp312-win32.whl", hash = "sha256:d779d97c834b4278276ec703dc3fc1735fca50af63eb7262f05bdb4e62203d44", size = 204546, upload-time = "2025-11-09T20:47:40.47Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/c7/6659f537f9562d963488e3e55573498a442503ced01f7e169e96a6110383/jiter-0.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:e8269062060212b373316fe69236096aaf4c49022d267c6736eebd66bbbc60bb", size = 205196, upload-time = "2025-11-09T20:47:41.794Z" },
+    { url = "https://files.pythonhosted.org/packages/21/f4/935304f5169edadfec7f9c01eacbce4c90bb9a82035ac1de1f3bd2d40be6/jiter-0.12.0-cp312-cp312-win_arm64.whl", hash = "sha256:06cb970936c65de926d648af0ed3d21857f026b1cf5525cb2947aa5e01e05789", size = 186100, upload-time = "2025-11-09T20:47:43.007Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/a6/97209693b177716e22576ee1161674d1d58029eb178e01866a0422b69224/jiter-0.12.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:6cc49d5130a14b732e0612bc76ae8db3b49898732223ef8b7599aa8d9810683e", size = 313658, upload-time = "2025-11-09T20:47:44.424Z" },
+    { url = "https://files.pythonhosted.org/packages/06/4d/125c5c1537c7d8ee73ad3d530a442d6c619714b95027143f1b61c0b4dfe0/jiter-0.12.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:37f27a32ce36364d2fa4f7fdc507279db604d27d239ea2e044c8f148410defe1", size = 318605, upload-time = "2025-11-09T20:47:45.973Z" },
+    { url = "https://files.pythonhosted.org/packages/99/bf/a840b89847885064c41a5f52de6e312e91fa84a520848ee56c97e4fa0205/jiter-0.12.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bbc0944aa3d4b4773e348cda635252824a78f4ba44328e042ef1ff3f6080d1cf", size = 349803, upload-time = "2025-11-09T20:47:47.535Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/88/e63441c28e0db50e305ae23e19c1d8fae012d78ed55365da392c1f34b09c/jiter-0.12.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:da25c62d4ee1ffbacb97fac6dfe4dcd6759ebdc9015991e92a6eae5816287f44", size = 365120, upload-time = "2025-11-09T20:47:49.284Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/7c/49b02714af4343970eb8aca63396bc1c82fa01197dbb1e9b0d274b550d4e/jiter-0.12.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:048485c654b838140b007390b8182ba9774621103bd4d77c9c3f6f117474ba45", size = 479918, upload-time = "2025-11-09T20:47:50.807Z" },
+    { url = "https://files.pythonhosted.org/packages/69/ba/0a809817fdd5a1db80490b9150645f3aae16afad166960bcd562be194f3b/jiter-0.12.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:635e737fbb7315bef0037c19b88b799143d2d7d3507e61a76751025226b3ac87", size = 379008, upload-time = "2025-11-09T20:47:52.211Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/c3/c9fc0232e736c8877d9e6d83d6eeb0ba4e90c6c073835cc2e8f73fdeef51/jiter-0.12.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e017c417b1ebda911bd13b1e40612704b1f5420e30695112efdbed8a4b389ed", size = 361785, upload-time = "2025-11-09T20:47:53.512Z" },
+    { url = "https://files.pythonhosted.org/packages/96/61/61f69b7e442e97ca6cd53086ddc1cf59fb830549bc72c0a293713a60c525/jiter-0.12.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:89b0bfb8b2bf2351fba36bb211ef8bfceba73ef58e7f0c68fb67b5a2795ca2f9", size = 386108, upload-time = "2025-11-09T20:47:54.893Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/2e/76bb3332f28550c8f1eba3bf6e5efe211efda0ddbbaf24976bc7078d42a5/jiter-0.12.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:f5aa5427a629a824a543672778c9ce0c5e556550d1569bb6ea28a85015287626", size = 519937, upload-time = "2025-11-09T20:47:56.253Z" },
+    { url = "https://files.pythonhosted.org/packages/84/d6/fa96efa87dc8bff2094fb947f51f66368fa56d8d4fc9e77b25d7fbb23375/jiter-0.12.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:ed53b3d6acbcb0fd0b90f20c7cb3b24c357fe82a3518934d4edfa8c6898e498c", size = 510853, upload-time = "2025-11-09T20:47:58.32Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/28/93f67fdb4d5904a708119a6ab58a8f1ec226ff10a94a282e0215402a8462/jiter-0.12.0-cp313-cp313-win32.whl", hash = "sha256:4747de73d6b8c78f2e253a2787930f4fffc68da7fa319739f57437f95963c4de", size = 204699, upload-time = "2025-11-09T20:47:59.686Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/1f/30b0eb087045a0abe2a5c9c0c0c8da110875a1d3be83afd4a9a4e548be3c/jiter-0.12.0-cp313-cp313-win_amd64.whl", hash = "sha256:e25012eb0c456fcc13354255d0338cd5397cce26c77b2832b3c4e2e255ea5d9a", size = 204258, upload-time = "2025-11-09T20:48:01.01Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/f4/2b4daf99b96bce6fc47971890b14b2a36aef88d7beb9f057fafa032c6141/jiter-0.12.0-cp313-cp313-win_arm64.whl", hash = "sha256:c97b92c54fe6110138c872add030a1f99aea2401ddcdaa21edf74705a646dd60", size = 185503, upload-time = "2025-11-09T20:48:02.35Z" },
+    { url = "https://files.pythonhosted.org/packages/39/ca/67bb15a7061d6fe20b9b2a2fd783e296a1e0f93468252c093481a2f00efa/jiter-0.12.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:53839b35a38f56b8be26a7851a48b89bc47e5d88e900929df10ed93b95fea3d6", size = 317965, upload-time = "2025-11-09T20:48:03.783Z" },
+    { url = "https://files.pythonhosted.org/packages/18/af/1788031cd22e29c3b14bc6ca80b16a39a0b10e611367ffd480c06a259831/jiter-0.12.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94f669548e55c91ab47fef8bddd9c954dab1938644e715ea49d7e117015110a4", size = 345831, upload-time = "2025-11-09T20:48:05.55Z" },
+    { url = "https://files.pythonhosted.org/packages/05/17/710bf8472d1dff0d3caf4ced6031060091c1320f84ee7d5dcbed1f352417/jiter-0.12.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:351d54f2b09a41600ffea43d081522d792e81dcfb915f6d2d242744c1cc48beb", size = 361272, upload-time = "2025-11-09T20:48:06.951Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/f1/1dcc4618b59761fef92d10bcbb0b038b5160be653b003651566a185f1a5c/jiter-0.12.0-cp313-cp313t-win_amd64.whl", hash = "sha256:2a5e90604620f94bf62264e7c2c038704d38217b7465b863896c6d7c902b06c7", size = 204604, upload-time = "2025-11-09T20:48:08.328Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/32/63cb1d9f1c5c6632a783c0052cde9ef7ba82688f7065e2f0d5f10a7e3edb/jiter-0.12.0-cp313-cp313t-win_arm64.whl", hash = "sha256:88ef757017e78d2860f96250f9393b7b577b06a956ad102c29c8237554380db3", size = 185628, upload-time = "2025-11-09T20:48:09.572Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/99/45c9f0dbe4a1416b2b9a8a6d1236459540f43d7fb8883cff769a8db0612d/jiter-0.12.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:c46d927acd09c67a9fb1416df45c5a04c27e83aae969267e98fba35b74e99525", size = 312478, upload-time = "2025-11-09T20:48:10.898Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/a7/54ae75613ba9e0f55fcb0bc5d1f807823b5167cc944e9333ff322e9f07dd/jiter-0.12.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:774ff60b27a84a85b27b88cd5583899c59940bcc126caca97eb2a9df6aa00c49", size = 318706, upload-time = "2025-11-09T20:48:12.266Z" },
+    { url = "https://files.pythonhosted.org/packages/59/31/2aa241ad2c10774baf6c37f8b8e1f39c07db358f1329f4eb40eba179c2a2/jiter-0.12.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5433fab222fb072237df3f637d01b81f040a07dcac1cb4a5c75c7aa9ed0bef1", size = 351894, upload-time = "2025-11-09T20:48:13.673Z" },
+    { url = "https://files.pythonhosted.org/packages/54/4f/0f2759522719133a9042781b18cc94e335b6d290f5e2d3e6899d6af933e3/jiter-0.12.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f8c593c6e71c07866ec6bfb790e202a833eeec885022296aff6b9e0b92d6a70e", size = 365714, upload-time = "2025-11-09T20:48:15.083Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/6f/806b895f476582c62a2f52c453151edd8a0fde5411b0497baaa41018e878/jiter-0.12.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:90d32894d4c6877a87ae00c6b915b609406819dce8bc0d4e962e4de2784e567e", size = 478989, upload-time = "2025-11-09T20:48:16.706Z" },
+    { url = "https://files.pythonhosted.org/packages/86/6c/012d894dc6e1033acd8db2b8346add33e413ec1c7c002598915278a37f79/jiter-0.12.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:798e46eed9eb10c3adbbacbd3bdb5ecd4cf7064e453d00dbef08802dae6937ff", size = 378615, upload-time = "2025-11-09T20:48:18.614Z" },
+    { url = "https://files.pythonhosted.org/packages/87/30/d718d599f6700163e28e2c71c0bbaf6dace692e7df2592fd793ac9276717/jiter-0.12.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b3f1368f0a6719ea80013a4eb90ba72e75d7ea67cfc7846db2ca504f3df0169a", size = 364745, upload-time = "2025-11-09T20:48:20.117Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/85/315b45ce4b6ddc7d7fceca24068543b02bdc8782942f4ee49d652e2cc89f/jiter-0.12.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:65f04a9d0b4406f7e51279710b27484af411896246200e461d80d3ba0caa901a", size = 386502, upload-time = "2025-11-09T20:48:21.543Z" },
+    { url = "https://files.pythonhosted.org/packages/74/0b/ce0434fb40c5b24b368fe81b17074d2840748b4952256bab451b72290a49/jiter-0.12.0-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:fd990541982a24281d12b67a335e44f117e4c6cbad3c3b75c7dea68bf4ce3a67", size = 519845, upload-time = "2025-11-09T20:48:22.964Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/a3/7a7a4488ba052767846b9c916d208b3ed114e3eb670ee984e4c565b9cf0d/jiter-0.12.0-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:b111b0e9152fa7df870ecaebb0bd30240d9f7fff1f2003bcb4ed0f519941820b", size = 510701, upload-time = "2025-11-09T20:48:24.483Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/16/052ffbf9d0467b70af24e30f91e0579e13ded0c17bb4a8eb2aed3cb60131/jiter-0.12.0-cp314-cp314-win32.whl", hash = "sha256:a78befb9cc0a45b5a5a0d537b06f8544c2ebb60d19d02c41ff15da28a9e22d42", size = 205029, upload-time = "2025-11-09T20:48:25.749Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/18/3cf1f3f0ccc789f76b9a754bdb7a6977e5d1d671ee97a9e14f7eb728d80e/jiter-0.12.0-cp314-cp314-win_amd64.whl", hash = "sha256:e1fe01c082f6aafbe5c8faf0ff074f38dfb911d53f07ec333ca03f8f6226debf", size = 204960, upload-time = "2025-11-09T20:48:27.415Z" },
+    { url = "https://files.pythonhosted.org/packages/02/68/736821e52ecfdeeb0f024b8ab01b5a229f6b9293bbdb444c27efade50b0f/jiter-0.12.0-cp314-cp314-win_arm64.whl", hash = "sha256:d72f3b5a432a4c546ea4bedc84cce0c3404874f1d1676260b9c7f048a9855451", size = 185529, upload-time = "2025-11-09T20:48:29.125Z" },
+    { url = "https://files.pythonhosted.org/packages/30/61/12ed8ee7a643cce29ac97c2281f9ce3956eb76b037e88d290f4ed0d41480/jiter-0.12.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:e6ded41aeba3603f9728ed2b6196e4df875348ab97b28fc8afff115ed42ba7a7", size = 318974, upload-time = "2025-11-09T20:48:30.87Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/c6/f3041ede6d0ed5e0e79ff0de4c8f14f401bbf196f2ef3971cdbe5fd08d1d/jiter-0.12.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a947920902420a6ada6ad51892082521978e9dd44a802663b001436e4b771684", size = 345932, upload-time = "2025-11-09T20:48:32.658Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/5d/4d94835889edd01ad0e2dbfc05f7bdfaed46292e7b504a6ac7839aa00edb/jiter-0.12.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:add5e227e0554d3a52cf390a7635edaffdf4f8fce4fdbcef3cc2055bb396a30c", size = 367243, upload-time = "2025-11-09T20:48:34.093Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/76/0051b0ac2816253a99d27baf3dda198663aff882fa6ea7deeb94046da24e/jiter-0.12.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3f9b1cda8fcb736250d7e8711d4580ebf004a46771432be0ae4796944b5dfa5d", size = 479315, upload-time = "2025-11-09T20:48:35.507Z" },
+    { url = "https://files.pythonhosted.org/packages/70/ae/83f793acd68e5cb24e483f44f482a1a15601848b9b6f199dacb970098f77/jiter-0.12.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:deeb12a2223fe0135c7ff1356a143d57f95bbf1f4a66584f1fc74df21d86b993", size = 380714, upload-time = "2025-11-09T20:48:40.014Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/5e/4808a88338ad2c228b1126b93fcd8ba145e919e886fe910d578230dabe3b/jiter-0.12.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c596cc0f4cb574877550ce4ecd51f8037469146addd676d7c1a30ebe6391923f", size = 365168, upload-time = "2025-11-09T20:48:41.462Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/d4/04619a9e8095b42aef436b5aeb4c0282b4ff1b27d1db1508df9f5dc82750/jiter-0.12.0-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5ab4c823b216a4aeab3fdbf579c5843165756bd9ad87cc6b1c65919c4715f783", size = 387893, upload-time = "2025-11-09T20:48:42.921Z" },
+    { url = "https://files.pythonhosted.org/packages/17/ea/d3c7e62e4546fdc39197fa4a4315a563a89b95b6d54c0d25373842a59cbe/jiter-0.12.0-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:e427eee51149edf962203ff8db75a7514ab89be5cb623fb9cea1f20b54f1107b", size = 520828, upload-time = "2025-11-09T20:48:44.278Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/0b/c6d3562a03fd767e31cb119d9041ea7958c3c80cb3d753eafb19b3b18349/jiter-0.12.0-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:edb868841f84c111255ba5e80339d386d937ec1fdce419518ce1bd9370fac5b6", size = 511009, upload-time = "2025-11-09T20:48:45.726Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/51/2cb4468b3448a8385ebcd15059d325c9ce67df4e2758d133ab9442b19834/jiter-0.12.0-cp314-cp314t-win32.whl", hash = "sha256:8bbcfe2791dfdb7c5e48baf646d37a6a3dcb5a97a032017741dea9f817dca183", size = 205110, upload-time = "2025-11-09T20:48:47.033Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/c5/ae5ec83dec9c2d1af805fd5fe8f74ebded9c8670c5210ec7820ce0dbeb1e/jiter-0.12.0-cp314-cp314t-win_amd64.whl", hash = "sha256:2fa940963bf02e1d8226027ef461e36af472dea85d36054ff835aeed944dd873", size = 205223, upload-time = "2025-11-09T20:48:49.076Z" },
+    { url = "https://files.pythonhosted.org/packages/97/9a/3c5391907277f0e55195550cf3fa8e293ae9ee0c00fb402fec1e38c0c82f/jiter-0.12.0-cp314-cp314t-win_arm64.whl", hash = "sha256:506c9708dd29b27288f9f8f1140c3cb0e3d8ddb045956d7757b1fa0e0f39a473", size = 185564, upload-time = "2025-11-09T20:48:50.376Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/f5/12efb8ada5f5c9edc1d4555fe383c1fb2eac05ac5859258a72d61981d999/jiter-0.12.0-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:e8547883d7b96ef2e5fe22b88f8a4c8725a56e7f4abafff20fd5272d634c7ecb", size = 309974, upload-time = "2025-11-09T20:49:17.187Z" },
+    { url = "https://files.pythonhosted.org/packages/85/15/d6eb3b770f6a0d332675141ab3962fd4a7c270ede3515d9f3583e1d28276/jiter-0.12.0-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:89163163c0934854a668ed783a2546a0617f71706a2551a4a0666d91ab365d6b", size = 304233, upload-time = "2025-11-09T20:49:18.734Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/3e/e7e06743294eea2cf02ced6aa0ff2ad237367394e37a0e2b4a1108c67a36/jiter-0.12.0-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d96b264ab7d34bbb2312dedc47ce07cd53f06835eacbc16dde3761f47c3a9e7f", size = 338537, upload-time = "2025-11-09T20:49:20.317Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/9c/6753e6522b8d0ef07d3a3d239426669e984fb0eba15a315cdbc1253904e4/jiter-0.12.0-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c24e864cb30ab82311c6425655b0cdab0a98c5d973b065c66a3f020740c2324c", size = 346110, upload-time = "2025-11-09T20:49:21.817Z" },
+]
+
 [[package]]
 name = "joblib"
 version = "1.5.3"
@@ -653,6 +795,25 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ad/0d/eca3d962f9eef265f01a8e0d20085c6dd1f443cbffc11b6dede81fd82356/numpy-2.4.1-cp314-cp314t-win_arm64.whl", hash = "sha256:6436cffb4f2bf26c974344439439c95e152c9a527013f26b3577be6c2ca64295", size = 10667121, upload-time = "2026-01-10T06:44:41.644Z" },
 ]
 
+[[package]]
+name = "openai"
+version = "2.16.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "anyio" },
+    { name = "distro" },
+    { name = "httpx" },
+    { name = "jiter" },
+    { name = "pydantic" },
+    { name = "sniffio" },
+    { name = "tqdm" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b1/6c/e4c964fcf1d527fdf4739e7cc940c60075a4114d50d03871d5d5b1e13a88/openai-2.16.0.tar.gz", hash = "sha256:42eaa22ca0d8ded4367a77374104d7a2feafee5bd60a107c3c11b5243a11cd12", size = 629649, upload-time = "2026-01-27T23:28:02.579Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/16/83/0315bf2cfd75a2ce8a7e54188e9456c60cec6c0cf66728ed07bd9859ff26/openai-2.16.0-py3-none-any.whl", hash = "sha256:5f46643a8f42899a84e80c38838135d7038e7718333ce61396994f887b09a59b", size = 1068612, upload-time = "2026-01-27T23:28:00.356Z" },
+]
+
 [[package]]
 name = "packaging"
 version = "26.0"
@@ -736,6 +897,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/32/2b/121e912bd60eebd623f873fd090de0e84f322972ab25a7f9044c056804ed/pathspec-1.0.3-py3-none-any.whl", hash = "sha256:e80767021c1cc524aa3fb14bedda9c34406591343cc42797b386ce7b9354fb6c", size = 55021, upload-time = "2026-01-09T15:46:44.652Z" },
 ]
 
+[[package]]
+name = "pgvector"
+version = "0.4.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "numpy" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/25/6c/6d8b4b03b958c02fa8687ec6063c49d952a189f8c91ebbe51e877dfab8f7/pgvector-0.4.2.tar.gz", hash = "sha256:322cac0c1dc5d41c9ecf782bd9991b7966685dee3a00bc873631391ed949513a", size = 31354, upload-time = "2025-12-05T01:07:17.87Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5a/26/6cee8a1ce8c43625ec561aff19df07f9776b7525d9002c86bceb3e0ac970/pgvector-0.4.2-py3-none-any.whl", hash = "sha256:549d45f7a18593783d5eec609ea1684a724ba8405c4cb182a0b2b08aeff04e08", size = 27441, upload-time = "2025-12-05T01:07:16.536Z" },
+]
+
 [[package]]
 name = "pluggy"
 version = "1.6.0"
@@ -977,6 +1150,109 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341, upload-time = "2025-09-25T21:32:56.828Z" },
 ]
 
+[[package]]
+name = "regex"
+version = "2026.1.15"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/0b/86/07d5056945f9ec4590b518171c4254a5925832eb727b56d3c38a7476f316/regex-2026.1.15.tar.gz", hash = "sha256:164759aa25575cbc0651bef59a0b18353e54300d79ace8084c818ad8ac72b7d5", size = 414811, upload-time = "2026-01-14T23:18:02.775Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/92/81/10d8cf43c807d0326efe874c1b79f22bfb0fb226027b0b19ebc26d301408/regex-2026.1.15-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:4c8fcc5793dde01641a35905d6731ee1548f02b956815f8f1cab89e515a5bdf1", size = 489398, upload-time = "2026-01-14T23:14:43.741Z" },
+    { url = "https://files.pythonhosted.org/packages/90/b0/7c2a74e74ef2a7c32de724658a69a862880e3e4155cba992ba04d1c70400/regex-2026.1.15-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:bfd876041a956e6a90ad7cdb3f6a630c07d491280bfeed4544053cd434901681", size = 291339, upload-time = "2026-01-14T23:14:45.183Z" },
+    { url = "https://files.pythonhosted.org/packages/19/4d/16d0773d0c818417f4cc20aa0da90064b966d22cd62a8c46765b5bd2d643/regex-2026.1.15-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9250d087bc92b7d4899ccd5539a1b2334e44eee85d848c4c1aef8e221d3f8c8f", size = 289003, upload-time = "2026-01-14T23:14:47.25Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/e4/1fc4599450c9f0863d9406e944592d968b8d6dfd0d552a7d569e43bceada/regex-2026.1.15-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c8a154cf6537ebbc110e24dabe53095e714245c272da9c1be05734bdad4a61aa", size = 798656, upload-time = "2026-01-14T23:14:48.77Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/e6/59650d73a73fa8a60b3a590545bfcf1172b4384a7df2e7fe7b9aab4e2da9/regex-2026.1.15-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8050ba2e3ea1d8731a549e83c18d2f0999fbc99a5f6bd06b4c91449f55291804", size = 864252, upload-time = "2026-01-14T23:14:50.528Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/ab/1d0f4d50a1638849a97d731364c9a80fa304fec46325e48330c170ee8e80/regex-2026.1.15-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0bf065240704cb8951cc04972cf107063917022511273e0969bdb34fc173456c", size = 912268, upload-time = "2026-01-14T23:14:52.952Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/df/0d722c030c82faa1d331d1921ee268a4e8fb55ca8b9042c9341c352f17fa/regex-2026.1.15-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c32bef3e7aeee75746748643667668ef941d28b003bfc89994ecf09a10f7a1b5", size = 803589, upload-time = "2026-01-14T23:14:55.182Z" },
+    { url = "https://files.pythonhosted.org/packages/66/23/33289beba7ccb8b805c6610a8913d0131f834928afc555b241caabd422a9/regex-2026.1.15-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d5eaa4a4c5b1906bd0d2508d68927f15b81821f85092e06f1a34a4254b0e1af3", size = 775700, upload-time = "2026-01-14T23:14:56.707Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/65/bf3a42fa6897a0d3afa81acb25c42f4b71c274f698ceabd75523259f6688/regex-2026.1.15-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:86c1077a3cc60d453d4084d5b9649065f3bf1184e22992bd322e1f081d3117fb", size = 787928, upload-time = "2026-01-14T23:14:58.312Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/f5/13bf65864fc314f68cdd6d8ca94adcab064d4d39dbd0b10fef29a9da48fc/regex-2026.1.15-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:2b091aefc05c78d286657cd4db95f2e6313375ff65dcf085e42e4c04d9c8d410", size = 858607, upload-time = "2026-01-14T23:15:00.657Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/31/040e589834d7a439ee43fb0e1e902bc81bd58a5ba81acffe586bb3321d35/regex-2026.1.15-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:57e7d17f59f9ebfa9667e6e5a1c0127b96b87cb9cede8335482451ed00788ba4", size = 763729, upload-time = "2026-01-14T23:15:02.248Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/84/6921e8129687a427edf25a34a5594b588b6d88f491320b9de5b6339a4fcb/regex-2026.1.15-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:c6c4dcdfff2c08509faa15d36ba7e5ef5fcfab25f1e8f85a0c8f45bc3a30725d", size = 850697, upload-time = "2026-01-14T23:15:03.878Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/87/3d06143d4b128f4229158f2de5de6c8f2485170c7221e61bf381313314b2/regex-2026.1.15-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:cf8ff04c642716a7f2048713ddc6278c5fd41faa3b9cab12607c7abecd012c22", size = 789849, upload-time = "2026-01-14T23:15:06.102Z" },
+    { url = "https://files.pythonhosted.org/packages/77/69/c50a63842b6bd48850ebc7ab22d46e7a2a32d824ad6c605b218441814639/regex-2026.1.15-cp312-cp312-win32.whl", hash = "sha256:82345326b1d8d56afbe41d881fdf62f1926d7264b2fc1537f99ae5da9aad7913", size = 266279, upload-time = "2026-01-14T23:15:07.678Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/36/39d0b29d087e2b11fd8191e15e81cce1b635fcc845297c67f11d0d19274d/regex-2026.1.15-cp312-cp312-win_amd64.whl", hash = "sha256:4def140aa6156bc64ee9912383d4038f3fdd18fee03a6f222abd4de6357ce42a", size = 277166, upload-time = "2026-01-14T23:15:09.257Z" },
+    { url = "https://files.pythonhosted.org/packages/28/32/5b8e476a12262748851fa8ab1b0be540360692325975b094e594dfebbb52/regex-2026.1.15-cp312-cp312-win_arm64.whl", hash = "sha256:c6c565d9a6e1a8d783c1948937ffc377dd5771e83bd56de8317c450a954d2056", size = 270415, upload-time = "2026-01-14T23:15:10.743Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/2e/6870bb16e982669b674cce3ee9ff2d1d46ab80528ee6bcc20fb2292efb60/regex-2026.1.15-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e69d0deeb977ffe7ed3d2e4439360089f9c3f217ada608f0f88ebd67afb6385e", size = 489164, upload-time = "2026-01-14T23:15:13.962Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/67/9774542e203849b0286badf67199970a44ebdb0cc5fb739f06e47ada72f8/regex-2026.1.15-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3601ffb5375de85a16f407854d11cca8fe3f5febbe3ac78fb2866bb220c74d10", size = 291218, upload-time = "2026-01-14T23:15:15.647Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/87/b0cda79f22b8dee05f774922a214da109f9a4c0eca5da2c9d72d77ea062c/regex-2026.1.15-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:4c5ef43b5c2d4114eb8ea424bb8c9cec01d5d17f242af88b2448f5ee81caadbc", size = 288895, upload-time = "2026-01-14T23:15:17.788Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/6a/0041f0a2170d32be01ab981d6346c83a8934277d82c780d60b127331f264/regex-2026.1.15-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:968c14d4f03e10b2fd960f1d5168c1f0ac969381d3c1fcc973bc45fb06346599", size = 798680, upload-time = "2026-01-14T23:15:19.342Z" },
+    { url = "https://files.pythonhosted.org/packages/58/de/30e1cfcdbe3e891324aa7568b7c968771f82190df5524fabc1138cb2d45a/regex-2026.1.15-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:56a5595d0f892f214609c9f76b41b7428bed439d98dc961efafdd1354d42baae", size = 864210, upload-time = "2026-01-14T23:15:22.005Z" },
+    { url = "https://files.pythonhosted.org/packages/64/44/4db2f5c5ca0ccd40ff052ae7b1e9731352fcdad946c2b812285a7505ca75/regex-2026.1.15-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0bf650f26087363434c4e560011f8e4e738f6f3e029b85d4904c50135b86cfa5", size = 912358, upload-time = "2026-01-14T23:15:24.569Z" },
+    { url = "https://files.pythonhosted.org/packages/79/b6/e6a5665d43a7c42467138c8a2549be432bad22cbd206f5ec87162de74bd7/regex-2026.1.15-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:18388a62989c72ac24de75f1449d0fb0b04dfccd0a1a7c1c43af5eb503d890f6", size = 803583, upload-time = "2026-01-14T23:15:26.526Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/53/7cd478222169d85d74d7437e74750005e993f52f335f7c04ff7adfda3310/regex-2026.1.15-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:6d220a2517f5893f55daac983bfa9fe998a7dbcaee4f5d27a88500f8b7873788", size = 775782, upload-time = "2026-01-14T23:15:29.352Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/b5/75f9a9ee4b03a7c009fe60500fe550b45df94f0955ca29af16333ef557c5/regex-2026.1.15-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c9c08c2fbc6120e70abff5d7f28ffb4d969e14294fb2143b4b5c7d20e46d1714", size = 787978, upload-time = "2026-01-14T23:15:31.295Z" },
+    { url = "https://files.pythonhosted.org/packages/72/b3/79821c826245bbe9ccbb54f6eadb7879c722fd3e0248c17bfc90bf54e123/regex-2026.1.15-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:7ef7d5d4bd49ec7364315167a4134a015f61e8266c6d446fc116a9ac4456e10d", size = 858550, upload-time = "2026-01-14T23:15:33.558Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/85/2ab5f77a1c465745bfbfcb3ad63178a58337ae8d5274315e2cc623a822fa/regex-2026.1.15-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:6e42844ad64194fa08d5ccb75fe6a459b9b08e6d7296bd704460168d58a388f3", size = 763747, upload-time = "2026-01-14T23:15:35.206Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/84/c27df502d4bfe2873a3e3a7cf1bdb2b9cc10284d1a44797cf38bed790470/regex-2026.1.15-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:cfecdaa4b19f9ca534746eb3b55a5195d5c95b88cac32a205e981ec0a22b7d31", size = 850615, upload-time = "2026-01-14T23:15:37.523Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/b7/658a9782fb253680aa8ecb5ccbb51f69e088ed48142c46d9f0c99b46c575/regex-2026.1.15-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:08df9722d9b87834a3d701f3fca570b2be115654dbfd30179f30ab2f39d606d3", size = 789951, upload-time = "2026-01-14T23:15:39.582Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/2a/5928af114441e059f15b2f63e188bd00c6529b3051c974ade7444b85fcda/regex-2026.1.15-cp313-cp313-win32.whl", hash = "sha256:d426616dae0967ca225ab12c22274eb816558f2f99ccb4a1d52ca92e8baf180f", size = 266275, upload-time = "2026-01-14T23:15:42.108Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/16/5bfbb89e435897bff28cf0352a992ca719d9e55ebf8b629203c96b6ce4f7/regex-2026.1.15-cp313-cp313-win_amd64.whl", hash = "sha256:febd38857b09867d3ed3f4f1af7d241c5c50362e25ef43034995b77a50df494e", size = 277145, upload-time = "2026-01-14T23:15:44.244Z" },
+    { url = "https://files.pythonhosted.org/packages/56/c1/a09ff7392ef4233296e821aec5f78c51be5e91ffde0d163059e50fd75835/regex-2026.1.15-cp313-cp313-win_arm64.whl", hash = "sha256:8e32f7896f83774f91499d239e24cebfadbc07639c1494bb7213983842348337", size = 270411, upload-time = "2026-01-14T23:15:45.858Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/38/0cfd5a78e5c6db00e6782fdae70458f89850ce95baa5e8694ab91d89744f/regex-2026.1.15-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:ec94c04149b6a7b8120f9f44565722c7ae31b7a6d2275569d2eefa76b83da3be", size = 492068, upload-time = "2026-01-14T23:15:47.616Z" },
+    { url = "https://files.pythonhosted.org/packages/50/72/6c86acff16cb7c959c4355826bbf06aad670682d07c8f3998d9ef4fee7cd/regex-2026.1.15-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:40c86d8046915bb9aeb15d3f3f15b6fd500b8ea4485b30e1bbc799dab3fe29f8", size = 292756, upload-time = "2026-01-14T23:15:49.307Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/58/df7fb69eadfe76526ddfce28abdc0af09ffe65f20c2c90932e89d705153f/regex-2026.1.15-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:726ea4e727aba21643205edad8f2187ec682d3305d790f73b7a51c7587b64bdd", size = 291114, upload-time = "2026-01-14T23:15:51.484Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/6c/a4011cd1cf96b90d2cdc7e156f91efbd26531e822a7fbb82a43c1016678e/regex-2026.1.15-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1cb740d044aff31898804e7bf1181cc72c03d11dfd19932b9911ffc19a79070a", size = 807524, upload-time = "2026-01-14T23:15:53.102Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/25/a53ffb73183f69c3e9f4355c4922b76d2840aee160af6af5fac229b6201d/regex-2026.1.15-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:05d75a668e9ea16f832390d22131fe1e8acc8389a694c8febc3e340b0f810b93", size = 873455, upload-time = "2026-01-14T23:15:54.956Z" },
+    { url = "https://files.pythonhosted.org/packages/66/0b/8b47fc2e8f97d9b4a851736f3890a5f786443aa8901061c55f24c955f45b/regex-2026.1.15-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d991483606f3dbec93287b9f35596f41aa2e92b7c2ebbb935b63f409e243c9af", size = 915007, upload-time = "2026-01-14T23:15:57.041Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/fa/97de0d681e6d26fabe71968dbee06dd52819e9a22fdce5dac7256c31ed84/regex-2026.1.15-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:194312a14819d3e44628a44ed6fea6898fdbecb0550089d84c403475138d0a09", size = 812794, upload-time = "2026-01-14T23:15:58.916Z" },
+    { url = "https://files.pythonhosted.org/packages/22/38/e752f94e860d429654aa2b1c51880bff8dfe8f084268258adf9151cf1f53/regex-2026.1.15-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:fe2fda4110a3d0bc163c2e0664be44657431440722c5c5315c65155cab92f9e5", size = 781159, upload-time = "2026-01-14T23:16:00.817Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/a7/d739ffaef33c378fc888302a018d7f81080393d96c476b058b8c64fd2b0d/regex-2026.1.15-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:124dc36c85d34ef2d9164da41a53c1c8c122cfb1f6e1ec377a1f27ee81deb794", size = 795558, upload-time = "2026-01-14T23:16:03.267Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/c4/542876f9a0ac576100fc73e9c75b779f5c31e3527576cfc9cb3009dcc58a/regex-2026.1.15-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:a1774cd1981cd212506a23a14dba7fdeaee259f5deba2df6229966d9911e767a", size = 868427, upload-time = "2026-01-14T23:16:05.646Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/0f/d5655bea5b22069e32ae85a947aa564912f23758e112cdb74212848a1a1b/regex-2026.1.15-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:b5f7d8d2867152cdb625e72a530d2ccb48a3d199159144cbdd63870882fb6f80", size = 769939, upload-time = "2026-01-14T23:16:07.542Z" },
+    { url = "https://files.pythonhosted.org/packages/20/06/7e18a4fa9d326daeda46d471a44ef94201c46eaa26dbbb780b5d92cbfdda/regex-2026.1.15-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:492534a0ab925d1db998defc3c302dae3616a2fc3fe2e08db1472348f096ddf2", size = 854753, upload-time = "2026-01-14T23:16:10.395Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/67/dc8946ef3965e166f558ef3b47f492bc364e96a265eb4a2bb3ca765c8e46/regex-2026.1.15-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c661fc820cfb33e166bf2450d3dadbda47c8d8981898adb9b6fe24e5e582ba60", size = 799559, upload-time = "2026-01-14T23:16:12.347Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/61/1bba81ff6d50c86c65d9fd84ce9699dd106438ee4cdb105bf60374ee8412/regex-2026.1.15-cp313-cp313t-win32.whl", hash = "sha256:99ad739c3686085e614bf77a508e26954ff1b8f14da0e3765ff7abbf7799f952", size = 268879, upload-time = "2026-01-14T23:16:14.049Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/5e/cef7d4c5fb0ea3ac5c775fd37db5747f7378b29526cc83f572198924ff47/regex-2026.1.15-cp313-cp313t-win_amd64.whl", hash = "sha256:32655d17905e7ff8ba5c764c43cb124e34a9245e45b83c22e81041e1071aee10", size = 280317, upload-time = "2026-01-14T23:16:15.718Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/52/4317f7a5988544e34ab57b4bde0f04944c4786128c933fb09825924d3e82/regex-2026.1.15-cp313-cp313t-win_arm64.whl", hash = "sha256:b2a13dd6a95e95a489ca242319d18fc02e07ceb28fa9ad146385194d95b3c829", size = 271551, upload-time = "2026-01-14T23:16:17.533Z" },
+    { url = "https://files.pythonhosted.org/packages/52/0a/47fa888ec7cbbc7d62c5f2a6a888878e76169170ead271a35239edd8f0e8/regex-2026.1.15-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:d920392a6b1f353f4aa54328c867fec3320fa50657e25f64abf17af054fc97ac", size = 489170, upload-time = "2026-01-14T23:16:19.835Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/c4/d000e9b7296c15737c9301708e9e7fbdea009f8e93541b6b43bdb8219646/regex-2026.1.15-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:b5a28980a926fa810dbbed059547b02783952e2efd9c636412345232ddb87ff6", size = 291146, upload-time = "2026-01-14T23:16:21.541Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/b6/921cc61982e538682bdf3bdf5b2c6ab6b34368da1f8e98a6c1ddc503c9cf/regex-2026.1.15-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:621f73a07595d83f28952d7bd1e91e9d1ed7625fb7af0064d3516674ec93a2a2", size = 288986, upload-time = "2026-01-14T23:16:23.381Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/33/eb7383dde0bbc93f4fb9d03453aab97e18ad4024ac7e26cef8d1f0a2cff0/regex-2026.1.15-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3d7d92495f47567a9b1669c51fc8d6d809821849063d168121ef801bbc213846", size = 799098, upload-time = "2026-01-14T23:16:25.088Z" },
+    { url = "https://files.pythonhosted.org/packages/27/56/b664dccae898fc8d8b4c23accd853f723bde0f026c747b6f6262b688029c/regex-2026.1.15-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8dd16fba2758db7a3780a051f245539c4451ca20910f5a5e6ea1c08d06d4a76b", size = 864980, upload-time = "2026-01-14T23:16:27.297Z" },
+    { url = "https://files.pythonhosted.org/packages/16/40/0999e064a170eddd237bae9ccfcd8f28b3aa98a38bf727a086425542a4fc/regex-2026.1.15-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:1e1808471fbe44c1a63e5f577a1d5f02fe5d66031dcbdf12f093ffc1305a858e", size = 911607, upload-time = "2026-01-14T23:16:29.235Z" },
+    { url = "https://files.pythonhosted.org/packages/07/78/c77f644b68ab054e5a674fb4da40ff7bffb2c88df58afa82dbf86573092d/regex-2026.1.15-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0751a26ad39d4f2ade8fe16c59b2bf5cb19eb3d2cd543e709e583d559bd9efde", size = 803358, upload-time = "2026-01-14T23:16:31.369Z" },
+    { url = "https://files.pythonhosted.org/packages/27/31/d4292ea8566eaa551fafc07797961c5963cf5235c797cc2ae19b85dfd04d/regex-2026.1.15-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:0f0c7684c7f9ca241344ff95a1de964f257a5251968484270e91c25a755532c5", size = 775833, upload-time = "2026-01-14T23:16:33.141Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/b2/cff3bf2fea4133aa6fb0d1e370b37544d18c8350a2fa118c7e11d1db0e14/regex-2026.1.15-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:74f45d170a21df41508cb67165456538425185baaf686281fa210d7e729abc34", size = 788045, upload-time = "2026-01-14T23:16:35.005Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/99/2cb9b69045372ec877b6f5124bda4eb4253bc58b8fe5848c973f752bc52c/regex-2026.1.15-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:f1862739a1ffb50615c0fde6bae6569b5efbe08d98e59ce009f68a336f64da75", size = 859374, upload-time = "2026-01-14T23:16:36.919Z" },
+    { url = "https://files.pythonhosted.org/packages/09/16/710b0a5abe8e077b1729a562d2f297224ad079f3a66dce46844c193416c8/regex-2026.1.15-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:453078802f1b9e2b7303fb79222c054cb18e76f7bdc220f7530fdc85d319f99e", size = 763940, upload-time = "2026-01-14T23:16:38.685Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/d1/7585c8e744e40eb3d32f119191969b91de04c073fca98ec14299041f6e7e/regex-2026.1.15-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:a30a68e89e5a218b8b23a52292924c1f4b245cb0c68d1cce9aec9bbda6e2c160", size = 850112, upload-time = "2026-01-14T23:16:40.646Z" },
+    { url = "https://files.pythonhosted.org/packages/af/d6/43e1dd85df86c49a347aa57c1f69d12c652c7b60e37ec162e3096194a278/regex-2026.1.15-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:9479cae874c81bf610d72b85bb681a94c95722c127b55445285fb0e2c82db8e1", size = 789586, upload-time = "2026-01-14T23:16:42.799Z" },
+    { url = "https://files.pythonhosted.org/packages/93/38/77142422f631e013f316aaae83234c629555729a9fbc952b8a63ac91462a/regex-2026.1.15-cp314-cp314-win32.whl", hash = "sha256:d639a750223132afbfb8f429c60d9d318aeba03281a5f1ab49f877456448dcf1", size = 271691, upload-time = "2026-01-14T23:16:44.671Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/a9/ab16b4649524ca9e05213c1cdbb7faa85cc2aa90a0230d2f796cbaf22736/regex-2026.1.15-cp314-cp314-win_amd64.whl", hash = "sha256:4161d87f85fa831e31469bfd82c186923070fc970b9de75339b68f0c75b51903", size = 280422, upload-time = "2026-01-14T23:16:46.607Z" },
+    { url = "https://files.pythonhosted.org/packages/be/2a/20fd057bf3521cb4791f69f869635f73e0aaf2b9ad2d260f728144f9047c/regex-2026.1.15-cp314-cp314-win_arm64.whl", hash = "sha256:91c5036ebb62663a6b3999bdd2e559fd8456d17e2b485bf509784cd31a8b1705", size = 273467, upload-time = "2026-01-14T23:16:48.967Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/77/0b1e81857060b92b9cad239104c46507dd481b3ff1fa79f8e7f865aae38a/regex-2026.1.15-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:ee6854c9000a10938c79238de2379bea30c82e4925a371711af45387df35cab8", size = 492073, upload-time = "2026-01-14T23:16:51.154Z" },
+    { url = "https://files.pythonhosted.org/packages/70/f3/f8302b0c208b22c1e4f423147e1913fd475ddd6230565b299925353de644/regex-2026.1.15-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:2c2b80399a422348ce5de4fe40c418d6299a0fa2803dd61dc0b1a2f28e280fcf", size = 292757, upload-time = "2026-01-14T23:16:53.08Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/f0/ef55de2460f3b4a6da9d9e7daacd0cb79d4ef75c64a2af316e68447f0df0/regex-2026.1.15-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:dca3582bca82596609959ac39e12b7dad98385b4fefccb1151b937383cec547d", size = 291122, upload-time = "2026-01-14T23:16:55.383Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/55/bb8ccbacabbc3a11d863ee62a9f18b160a83084ea95cdfc5d207bfc3dd75/regex-2026.1.15-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ef71d476caa6692eea743ae5ea23cde3260677f70122c4d258ca952e5c2d4e84", size = 807761, upload-time = "2026-01-14T23:16:57.251Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/84/f75d937f17f81e55679a0509e86176e29caa7298c38bd1db7ce9c0bf6075/regex-2026.1.15-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c243da3436354f4af6c3058a3f81a97d47ea52c9bd874b52fd30274853a1d5df", size = 873538, upload-time = "2026-01-14T23:16:59.349Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/d9/0da86327df70349aa8d86390da91171bd3ca4f0e7c1d1d453a9c10344da3/regex-2026.1.15-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8355ad842a7c7e9e5e55653eade3b7d1885ba86f124dd8ab1f722f9be6627434", size = 915066, upload-time = "2026-01-14T23:17:01.607Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/5e/f660fb23fc77baa2a61aa1f1fe3a4eea2bbb8a286ddec148030672e18834/regex-2026.1.15-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f192a831d9575271a22d804ff1a5355355723f94f31d9eef25f0d45a152fdc1a", size = 812938, upload-time = "2026-01-14T23:17:04.366Z" },
+    { url = "https://files.pythonhosted.org/packages/69/33/a47a29bfecebbbfd1e5cd3f26b28020a97e4820f1c5148e66e3b7d4b4992/regex-2026.1.15-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:166551807ec20d47ceaeec380081f843e88c8949780cd42c40f18d16168bed10", size = 781314, upload-time = "2026-01-14T23:17:06.378Z" },
+    { url = "https://files.pythonhosted.org/packages/65/ec/7ec2bbfd4c3f4e494a24dec4c6943a668e2030426b1b8b949a6462d2c17b/regex-2026.1.15-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:f9ca1cbdc0fbfe5e6e6f8221ef2309988db5bcede52443aeaee9a4ad555e0dac", size = 795652, upload-time = "2026-01-14T23:17:08.521Z" },
+    { url = "https://files.pythonhosted.org/packages/46/79/a5d8651ae131fe27d7c521ad300aa7f1c7be1dbeee4d446498af5411b8a9/regex-2026.1.15-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:b30bcbd1e1221783c721483953d9e4f3ab9c5d165aa709693d3f3946747b1aea", size = 868550, upload-time = "2026-01-14T23:17:10.573Z" },
+    { url = "https://files.pythonhosted.org/packages/06/b7/25635d2809664b79f183070786a5552dd4e627e5aedb0065f4e3cf8ee37d/regex-2026.1.15-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:2a8d7b50c34578d0d3bf7ad58cde9652b7d683691876f83aedc002862a35dc5e", size = 769981, upload-time = "2026-01-14T23:17:12.871Z" },
+    { url = "https://files.pythonhosted.org/packages/16/8b/fc3fcbb2393dcfa4a6c5ffad92dc498e842df4581ea9d14309fcd3c55fb9/regex-2026.1.15-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:9d787e3310c6a6425eb346be4ff2ccf6eece63017916fd77fe8328c57be83521", size = 854780, upload-time = "2026-01-14T23:17:14.837Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/38/dde117c76c624713c8a2842530be9c93ca8b606c0f6102d86e8cd1ce8bea/regex-2026.1.15-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:619843841e220adca114118533a574a9cd183ed8a28b85627d2844c500a2b0db", size = 799778, upload-time = "2026-01-14T23:17:17.369Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/0d/3a6cfa9ae99606afb612d8fb7a66b245a9d5ff0f29bb347c8a30b6ad561b/regex-2026.1.15-cp314-cp314t-win32.whl", hash = "sha256:e90b8db97f6f2c97eb045b51a6b2c5ed69cedd8392459e0642d4199b94fabd7e", size = 274667, upload-time = "2026-01-14T23:17:19.301Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/b2/297293bb0742fd06b8d8e2572db41a855cdf1cae0bf009b1cb74fe07e196/regex-2026.1.15-cp314-cp314t-win_amd64.whl", hash = "sha256:5ef19071f4ac9f0834793af85bd04a920b4407715624e40cb7a0631a11137cdf", size = 284386, upload-time = "2026-01-14T23:17:21.231Z" },
+    { url = "https://files.pythonhosted.org/packages/95/e4/a3b9480c78cf8ee86626cb06f8d931d74d775897d44201ccb813097ae697/regex-2026.1.15-cp314-cp314t-win_arm64.whl", hash = "sha256:ca89c5e596fc05b015f27561b3793dc2fa0917ea0d7507eebb448efd35274a70", size = 274837, upload-time = "2026-01-14T23:17:23.146Z" },
+]
+
+[[package]]
+name = "requests"
+version = "2.32.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "certifi" },
+    { name = "charset-normalizer" },
+    { name = "idna" },
+    { name = "urllib3" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c9/74/b3ff8e6c8446842c3f5c837e9c3dfcfe2018ea6ecef224c710c85ef728f4/requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf", size = 134517, upload-time = "2025-08-18T20:46:02.573Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" },
+]
+
 [[package]]
 name = "ruff"
 version = "0.14.14"
@@ -1117,6 +1393,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" },
 ]
 
+[[package]]
+name = "sniffio"
+version = "1.3.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a2/87/a6771e1546d97e7e041b6ae58d80074f81b7d5121207425c964ddf5cfdbd/sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc", size = 20372, upload-time = "2024-02-25T23:20:04.057Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" },
+]
+
 [[package]]
 name = "sqlalchemy"
 version = "2.0.46"
@@ -1195,6 +1480,65 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/32/d5/f9a850d79b0851d1d4ef6456097579a9005b31fea68726a4ae5f2d82ddd9/threadpoolctl-3.6.0-py3-none-any.whl", hash = "sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb", size = 18638, upload-time = "2025-03-13T13:49:21.846Z" },
 ]
 
+[[package]]
+name = "tiktoken"
+version = "0.12.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "regex" },
+    { name = "requests" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/7d/ab/4d017d0f76ec3171d469d80fc03dfbb4e48a4bcaddaa831b31d526f05edc/tiktoken-0.12.0.tar.gz", hash = "sha256:b18ba7ee2b093863978fcb14f74b3707cdc8d4d4d3836853ce7ec60772139931", size = 37806, upload-time = "2025-10-06T20:22:45.419Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a4/85/be65d39d6b647c79800fd9d29241d081d4eeb06271f383bb87200d74cf76/tiktoken-0.12.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b97f74aca0d78a1ff21b8cd9e9925714c15a9236d6ceacf5c7327c117e6e21e8", size = 1050728, upload-time = "2025-10-06T20:21:52.756Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/42/6573e9129bc55c9bf7300b3a35bef2c6b9117018acca0dc760ac2d93dffe/tiktoken-0.12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2b90f5ad190a4bb7c3eb30c5fa32e1e182ca1ca79f05e49b448438c3e225a49b", size = 994049, upload-time = "2025-10-06T20:21:53.782Z" },
+    { url = "https://files.pythonhosted.org/packages/66/c5/ed88504d2f4a5fd6856990b230b56d85a777feab84e6129af0822f5d0f70/tiktoken-0.12.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:65b26c7a780e2139e73acc193e5c63ac754021f160df919add909c1492c0fb37", size = 1129008, upload-time = "2025-10-06T20:21:54.832Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/90/3dae6cc5436137ebd38944d396b5849e167896fc2073da643a49f372dc4f/tiktoken-0.12.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:edde1ec917dfd21c1f2f8046b86348b0f54a2c0547f68149d8600859598769ad", size = 1152665, upload-time = "2025-10-06T20:21:56.129Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/fe/26df24ce53ffde419a42f5f53d755b995c9318908288c17ec3f3448313a3/tiktoken-0.12.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:35a2f8ddd3824608b3d650a000c1ef71f730d0c56486845705a8248da00f9fe5", size = 1194230, upload-time = "2025-10-06T20:21:57.546Z" },
+    { url = "https://files.pythonhosted.org/packages/20/cc/b064cae1a0e9fac84b0d2c46b89f4e57051a5f41324e385d10225a984c24/tiktoken-0.12.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:83d16643edb7fa2c99eff2ab7733508aae1eebb03d5dfc46f5565862810f24e3", size = 1254688, upload-time = "2025-10-06T20:21:58.619Z" },
+    { url = "https://files.pythonhosted.org/packages/81/10/b8523105c590c5b8349f2587e2fdfe51a69544bd5a76295fc20f2374f470/tiktoken-0.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:ffc5288f34a8bc02e1ea7047b8d041104791d2ddbf42d1e5fa07822cbffe16bd", size = 878694, upload-time = "2025-10-06T20:21:59.876Z" },
+    { url = "https://files.pythonhosted.org/packages/00/61/441588ee21e6b5cdf59d6870f86beb9789e532ee9718c251b391b70c68d6/tiktoken-0.12.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:775c2c55de2310cc1bc9a3ad8826761cbdc87770e586fd7b6da7d4589e13dab3", size = 1050802, upload-time = "2025-10-06T20:22:00.96Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/05/dcf94486d5c5c8d34496abe271ac76c5b785507c8eae71b3708f1ad9b45a/tiktoken-0.12.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a01b12f69052fbe4b080a2cfb867c4de12c704b56178edf1d1d7b273561db160", size = 993995, upload-time = "2025-10-06T20:22:02.788Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/70/5163fe5359b943f8db9946b62f19be2305de8c3d78a16f629d4165e2f40e/tiktoken-0.12.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:01d99484dc93b129cd0964f9d34eee953f2737301f18b3c7257bf368d7615baa", size = 1128948, upload-time = "2025-10-06T20:22:03.814Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/da/c028aa0babf77315e1cef357d4d768800c5f8a6de04d0eac0f377cb619fa/tiktoken-0.12.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:4a1a4fcd021f022bfc81904a911d3df0f6543b9e7627b51411da75ff2fe7a1be", size = 1151986, upload-time = "2025-10-06T20:22:05.173Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/5a/886b108b766aa53e295f7216b509be95eb7d60b166049ce2c58416b25f2a/tiktoken-0.12.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:981a81e39812d57031efdc9ec59fa32b2a5a5524d20d4776574c4b4bd2e9014a", size = 1194222, upload-time = "2025-10-06T20:22:06.265Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/f8/4db272048397636ac7a078d22773dd2795b1becee7bc4922fe6207288d57/tiktoken-0.12.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9baf52f84a3f42eef3ff4e754a0db79a13a27921b457ca9832cf944c6be4f8f3", size = 1255097, upload-time = "2025-10-06T20:22:07.403Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/32/45d02e2e0ea2be3a9ed22afc47d93741247e75018aac967b713b2941f8ea/tiktoken-0.12.0-cp313-cp313-win_amd64.whl", hash = "sha256:b8a0cd0c789a61f31bf44851defbd609e8dd1e2c8589c614cc1060940ef1f697", size = 879117, upload-time = "2025-10-06T20:22:08.418Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/76/994fc868f88e016e6d05b0da5ac24582a14c47893f4474c3e9744283f1d5/tiktoken-0.12.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:d5f89ea5680066b68bcb797ae85219c72916c922ef0fcdd3480c7d2315ffff16", size = 1050309, upload-time = "2025-10-06T20:22:10.939Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/b8/57ef1456504c43a849821920d582a738a461b76a047f352f18c0b26c6516/tiktoken-0.12.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b4e7ed1c6a7a8a60a3230965bdedba8cc58f68926b835e519341413370e0399a", size = 993712, upload-time = "2025-10-06T20:22:12.115Z" },
+    { url = "https://files.pythonhosted.org/packages/72/90/13da56f664286ffbae9dbcfadcc625439142675845baa62715e49b87b68b/tiktoken-0.12.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:fc530a28591a2d74bce821d10b418b26a094bf33839e69042a6e86ddb7a7fb27", size = 1128725, upload-time = "2025-10-06T20:22:13.541Z" },
+    { url = "https://files.pythonhosted.org/packages/05/df/4f80030d44682235bdaecd7346c90f67ae87ec8f3df4a3442cb53834f7e4/tiktoken-0.12.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:06a9f4f49884139013b138920a4c393aa6556b2f8f536345f11819389c703ebb", size = 1151875, upload-time = "2025-10-06T20:22:14.559Z" },
+    { url = "https://files.pythonhosted.org/packages/22/1f/ae535223a8c4ef4c0c1192e3f9b82da660be9eb66b9279e95c99288e9dab/tiktoken-0.12.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:04f0e6a985d95913cabc96a741c5ffec525a2c72e9df086ff17ebe35985c800e", size = 1194451, upload-time = "2025-10-06T20:22:15.545Z" },
+    { url = "https://files.pythonhosted.org/packages/78/a7/f8ead382fce0243cb625c4f266e66c27f65ae65ee9e77f59ea1653b6d730/tiktoken-0.12.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:0ee8f9ae00c41770b5f9b0bb1235474768884ae157de3beb5439ca0fd70f3e25", size = 1253794, upload-time = "2025-10-06T20:22:16.624Z" },
+    { url = "https://files.pythonhosted.org/packages/93/e0/6cc82a562bc6365785a3ff0af27a2a092d57c47d7a81d9e2295d8c36f011/tiktoken-0.12.0-cp313-cp313t-win_amd64.whl", hash = "sha256:dc2dd125a62cb2b3d858484d6c614d136b5b848976794edfb63688d539b8b93f", size = 878777, upload-time = "2025-10-06T20:22:18.036Z" },
+    { url = "https://files.pythonhosted.org/packages/72/05/3abc1db5d2c9aadc4d2c76fa5640134e475e58d9fbb82b5c535dc0de9b01/tiktoken-0.12.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:a90388128df3b3abeb2bfd1895b0681412a8d7dc644142519e6f0a97c2111646", size = 1050188, upload-time = "2025-10-06T20:22:19.563Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/7b/50c2f060412202d6c95f32b20755c7a6273543b125c0985d6fa9465105af/tiktoken-0.12.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:da900aa0ad52247d8794e307d6446bd3cdea8e192769b56276695d34d2c9aa88", size = 993978, upload-time = "2025-10-06T20:22:20.702Z" },
+    { url = "https://files.pythonhosted.org/packages/14/27/bf795595a2b897e271771cd31cb847d479073497344c637966bdf2853da1/tiktoken-0.12.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:285ba9d73ea0d6171e7f9407039a290ca77efcdb026be7769dccc01d2c8d7fff", size = 1129271, upload-time = "2025-10-06T20:22:22.06Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/de/9341a6d7a8f1b448573bbf3425fa57669ac58258a667eb48a25dfe916d70/tiktoken-0.12.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:d186a5c60c6a0213f04a7a802264083dea1bbde92a2d4c7069e1a56630aef830", size = 1151216, upload-time = "2025-10-06T20:22:23.085Z" },
+    { url = "https://files.pythonhosted.org/packages/75/0d/881866647b8d1be4d67cb24e50d0c26f9f807f994aa1510cb9ba2fe5f612/tiktoken-0.12.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:604831189bd05480f2b885ecd2d1986dc7686f609de48208ebbbddeea071fc0b", size = 1194860, upload-time = "2025-10-06T20:22:24.602Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/1e/b651ec3059474dab649b8d5b69f5c65cd8fcd8918568c1935bd4136c9392/tiktoken-0.12.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:8f317e8530bb3a222547b85a58583238c8f74fd7a7408305f9f63246d1a0958b", size = 1254567, upload-time = "2025-10-06T20:22:25.671Z" },
+    { url = "https://files.pythonhosted.org/packages/80/57/ce64fd16ac390fafde001268c364d559447ba09b509181b2808622420eec/tiktoken-0.12.0-cp314-cp314-win_amd64.whl", hash = "sha256:399c3dd672a6406719d84442299a490420b458c44d3ae65516302a99675888f3", size = 921067, upload-time = "2025-10-06T20:22:26.753Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/a4/72eed53e8976a099539cdd5eb36f241987212c29629d0a52c305173e0a68/tiktoken-0.12.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:c2c714c72bc00a38ca969dae79e8266ddec999c7ceccd603cc4f0d04ccd76365", size = 1050473, upload-time = "2025-10-06T20:22:27.775Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/d7/0110b8f54c008466b19672c615f2168896b83706a6611ba6e47313dbc6e9/tiktoken-0.12.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:cbb9a3ba275165a2cb0f9a83f5d7025afe6b9d0ab01a22b50f0e74fee2ad253e", size = 993855, upload-time = "2025-10-06T20:22:28.799Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/77/4f268c41a3957c418b084dd576ea2fad2e95da0d8e1ab705372892c2ca22/tiktoken-0.12.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:dfdfaa5ffff8993a3af94d1125870b1d27aed7cb97aa7eb8c1cefdbc87dbee63", size = 1129022, upload-time = "2025-10-06T20:22:29.981Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/2b/fc46c90fe5028bd094cd6ee25a7db321cb91d45dc87531e2bdbb26b4867a/tiktoken-0.12.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:584c3ad3d0c74f5269906eb8a659c8bfc6144a52895d9261cdaf90a0ae5f4de0", size = 1150736, upload-time = "2025-10-06T20:22:30.996Z" },
+    { url = "https://files.pythonhosted.org/packages/28/c0/3c7a39ff68022ddfd7d93f3337ad90389a342f761c4d71de99a3ccc57857/tiktoken-0.12.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:54c891b416a0e36b8e2045b12b33dd66fb34a4fe7965565f1b482da50da3e86a", size = 1194908, upload-time = "2025-10-06T20:22:32.073Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/0d/c1ad6f4016a3968c048545f5d9b8ffebf577774b2ede3e2e352553b685fe/tiktoken-0.12.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5edb8743b88d5be814b1a8a8854494719080c28faaa1ccbef02e87354fe71ef0", size = 1253706, upload-time = "2025-10-06T20:22:33.385Z" },
+    { url = "https://files.pythonhosted.org/packages/af/df/c7891ef9d2712ad774777271d39fdef63941ffba0a9d59b7ad1fd2765e57/tiktoken-0.12.0-cp314-cp314t-win_amd64.whl", hash = "sha256:f61c0aea5565ac82e2ec50a05e02a6c44734e91b51c10510b084ea1b8e633a71", size = 920667, upload-time = "2025-10-06T20:22:34.444Z" },
+]
+
+[[package]]
+name = "tqdm"
+version = "4.67.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "colorama", marker = "sys_platform == 'win32'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/27/89/4b0001b2dab8df0a5ee2787dcbe771de75ded01f18f1f8d53dedeea2882b/tqdm-4.67.2.tar.gz", hash = "sha256:649aac53964b2cb8dec76a14b405a4c0d13612cb8933aae547dd144eacc99653", size = 169514, upload-time = "2026-01-30T23:12:06.555Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f5/e2/31eac96de2915cf20ccaed0225035db149dfb9165a9ed28d4b252ef3f7f7/tqdm-4.67.2-py3-none-any.whl", hash = "sha256:9a12abcbbff58b6036b2167d9d3853042b9d436fe7330f06ae047867f2f8e0a7", size = 78354, upload-time = "2026-01-30T23:12:04.368Z" },
+]
+
 [[package]]
 name = "types-pytz"
 version = "2025.2.0.20251108"
@@ -1234,6 +1578,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/c7/b0/003792df09decd6849a5e39c28b513c06e84436a54440380862b5aeff25d/tzdata-2025.3-py2.py3-none-any.whl", hash = "sha256:06a47e5700f3081aab02b2e513160914ff0694bce9947d6b76ebd6bf57cfc5d1", size = 348521, upload-time = "2025-12-13T17:45:33.889Z" },
 ]
 
+[[package]]
+name = "urllib3"
+version = "2.6.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/c7/24/5f1b3bdffd70275f6661c76461e25f024d5a38a46f04aaca912426a2b1d3/urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed", size = 435556, upload-time = "2026-01-07T16:24:43.925Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584, upload-time = "2026-01-07T16:24:42.685Z" },
+]
+
 [[package]]
 name = "uvicorn"
 version = "0.40.0"