w7-mgfcode · w7-mgfcode · Feb 1, 2026 · Feb 1, 2026 · Feb 1, 2026 · Feb 1, 2026
@@ -22,5 +22,36 @@ FORECAST_MAX_HORIZON=90
 FORECAST_MODEL_ARTIFACTS_DIR=./artifacts/models
 FORECAST_ENABLE_LIGHTGBM=false
 
+# RAG Configuration
+# Embedding Provider: "openai" or "ollama"
+RAG_EMBEDDING_PROVIDER=openai
+
+# OpenAI Configuration (when RAG_EMBEDDING_PROVIDER=openai)
+OPENAI_API_KEY=sk-your-openai-api-key-here
+RAG_EMBEDDING_MODEL=text-embedding-3-small
+
+# Ollama Configuration (when RAG_EMBEDDING_PROVIDER=ollama)
+# OLLAMA_BASE_URL=http://localhost:11434
+# OLLAMA_EMBEDDING_MODEL=nomic-embed-text
+
+# Embedding dimension (must match your model: OpenAI=1536, nomic-embed-text=768, etc.)
+RAG_EMBEDDING_DIMENSION=1536
+RAG_EMBEDDING_BATCH_SIZE=100
+
+# Chunking settings
+RAG_CHUNK_SIZE=512
+RAG_CHUNK_OVERLAP=50
+RAG_MIN_CHUNK_SIZE=100
+
+# Retrieval settings
+RAG_TOP_K=5
+RAG_SIMILARITY_THRESHOLD=0.7
+RAG_MAX_CONTEXT_TOKENS=4000
+
+# pgvector index settings
+RAG_INDEX_TYPE=hnsw
+RAG_HNSW_M=16
+RAG_HNSW_EF_CONSTRUCTION=64
+
 # Frontend (Vite)
 VITE_API_BASE_URL=http://localhost:8123
@@ -454,6 +454,59 @@ curl -X POST http://localhost:8123/jobs \
 - JSONB storage for flexible params and results
 - Links to model_run for train/backtest jobs
 
+### RAG Knowledge Base
+
+- `POST /rag/index` - Index a document into the knowledge base
+- `POST /rag/retrieve` - Semantic search across indexed documents
+- `GET /rag/sources` - List indexed sources
+- `DELETE /rag/sources/{source_id}` - Delete a source and its chunks
+
+**Embedding Providers:**
+
+The RAG system supports two embedding providers:
+
+1. **OpenAI** (default):
+```bash
+RAG_EMBEDDING_PROVIDER=openai
+OPENAI_API_KEY=sk-your-key
+RAG_EMBEDDING_MODEL=text-embedding-3-small
+RAG_EMBEDDING_DIMENSION=1536
+```
+
+2. **Ollama** (local/LAN):
+```bash
+RAG_EMBEDDING_PROVIDER=ollama
+OLLAMA_BASE_URL=http://localhost:11434
+OLLAMA_EMBEDDING_MODEL=nomic-embed-text
+RAG_EMBEDDING_DIMENSION=768
+```
+
+**Example Index Request:**
+```bash
+curl -X POST http://localhost:8123/rag/index \
+  -H "Content-Type: application/json" \
+  -d '{
+    "source_type": "markdown",
+    "source_path": "docs/ARCHITECTURE.md"
+  }'
+```
+
+**Example Retrieve Request:**
+```bash
+curl -X POST http://localhost:8123/rag/retrieve \
+  -H "Content-Type: application/json" \
+  -d '{
+    "query": "How does backtesting work?",
+    "top_k": 5
+  }'
+```
+
+**Features:**
+- pgvector for HNSW similarity search
+- Idempotent indexing via content hash
+- Markdown and OpenAPI chunking strategies
+- Configurable embedding dimensions
+
 ### Error Responses (RFC 7807)
 
 All error responses follow RFC 7807 Problem Details format with `Content-Type: application/problem+json`:

@@ -14,6 +14,7 @@
 # Import all models for Alembic autogenerate detection
 from app.features.data_platform import models as data_platform_models  # noqa: F401
 from app.features.jobs import models as jobs_models  # noqa: F401
+from app.features.rag import models as rag_models  # noqa: F401
 from app.features.registry import models as registry_models  # noqa: F401
 
 # Alembic Config object

@@ -0,0 +1,153 @@
+"""create_rag_tables
+
+Revision ID: b4c8d9e0f123
+Revises: 37e16ecef223
+Create Date: 2026-02-01 12:00:00.000000
+
+"""
+
+from typing import Sequence, Union
+
+import sqlalchemy as sa
+from alembic import op
+from pgvector.sqlalchemy import Vector
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision: str = "b4c8d9e0f123"
+down_revision: Union[str, None] = "37e16ecef223"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    """Apply migration - create document_source and document_chunk tables with pgvector."""
+    # Enable pgvector extension
+    op.execute("CREATE EXTENSION IF NOT EXISTS vector")
+
+    # Create document_source table
+    op.create_table(
+        "document_source",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column("source_id", sa.String(length=32), nullable=False),
+        sa.Column("source_type", sa.String(length=50), nullable=False),
+        sa.Column("source_path", sa.Text(), nullable=False),
+        sa.Column("content_hash", sa.String(length=64), nullable=False),
+        sa.Column("metadata", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
+        sa.Column("indexed_at", sa.DateTime(timezone=True), nullable=False),
+        # Timestamps (from TimestampMixin)
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.Column(
+            "updated_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        # Constraints
+        sa.PrimaryKeyConstraint("id"),
+        sa.UniqueConstraint("source_type", "source_path", name="uq_source_type_path"),
+    )
+
+    # Create indexes for document_source
+    op.create_index(
+        op.f("ix_document_source_source_id"),
+        "document_source",
+        ["source_id"],
+        unique=True,
+    )
+    op.create_index(
+        op.f("ix_document_source_source_type"),
+        "document_source",
+        ["source_type"],
+        unique=False,
+    )
+
+    # Create document_chunk table with Vector column
+    op.create_table(
+        "document_chunk",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column("chunk_id", sa.String(length=32), nullable=False),
+        sa.Column("source_id", sa.Integer(), nullable=False),
+        sa.Column("chunk_index", sa.Integer(), nullable=False),
+        sa.Column("content", sa.Text(), nullable=False),
+        sa.Column("embedding", Vector(1536), nullable=True),
+        sa.Column("token_count", sa.Integer(), nullable=False),
+        sa.Column("metadata", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
+        # Timestamps (from TimestampMixin)
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.Column(
+            "updated_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        # Constraints
+        sa.PrimaryKeyConstraint("id"),
+        sa.ForeignKeyConstraint(
+            ["source_id"],
+            ["document_source.id"],
+            ondelete="CASCADE",
+        ),
+        sa.UniqueConstraint("source_id", "chunk_index", name="uq_source_chunk_index"),
+    )
+
+    # Create indexes for document_chunk
+    op.create_index(
+        op.f("ix_document_chunk_chunk_id"),
+        "document_chunk",
+        ["chunk_id"],
+        unique=True,
+    )
+    op.create_index(
+        op.f("ix_document_chunk_source_id"),
+        "document_chunk",
+        ["source_id"],
+        unique=False,
+    )
+
+    # Create HNSW index for vector similarity search (cosine distance)
+    op.create_index(
+        "ix_chunk_embedding_hnsw",
+        "document_chunk",
+        ["embedding"],
+        unique=False,
+        postgresql_using="hnsw",
+        postgresql_with={"m": 16, "ef_construction": 64},
+        postgresql_ops={"embedding": "vector_cosine_ops"},
+    )
+
+    # Create GIN index for metadata filtering
+    op.create_index(
+        "ix_chunk_metadata_gin",
+        "document_chunk",
+        ["metadata"],
+        unique=False,
+        postgresql_using="gin",
+    )
+
+
+def downgrade() -> None:
+    """Revert migration - drop document_source and document_chunk tables."""
+    # Drop document_chunk indexes and table
+    op.drop_index("ix_chunk_metadata_gin", table_name="document_chunk")
+    op.drop_index("ix_chunk_embedding_hnsw", table_name="document_chunk")
+    op.drop_index(op.f("ix_document_chunk_source_id"), table_name="document_chunk")
+    op.drop_index(op.f("ix_document_chunk_chunk_id"), table_name="document_chunk")
+    op.drop_table("document_chunk")
+
+    # Drop document_source indexes and table
+    op.drop_index(op.f("ix_document_source_source_type"), table_name="document_source")
+    op.drop_index(op.f("ix_document_source_source_id"), table_name="document_source")
+    op.drop_table("document_source")
+
+    # Note: We don't drop the vector extension as it might be used by other tables
@@ -0,0 +1,75 @@
+"""rag_dynamic_embedding_dimension
+
+Revision ID: c5d9e1f2g345
+Revises: b4c8d9e0f123
+Create Date: 2026-02-01 12:49:28.000000
+
+CRITICAL: This migration alters the embedding column dimension.
+If changing from 1536 to a different dimension, existing embeddings
+will be incompatible and re-indexing is required.
+"""
+
+from __future__ import annotations
+
+import os
+from collections.abc import Sequence
+
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision: str = "c5d9e1f2g345"
+down_revision: str | None = "b4c8d9e0f123"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def upgrade() -> None:
+    """Apply migration - alter embedding column to configurable dimension.
+
+    Reads RAG_EMBEDDING_DIMENSION from environment (default: 1536).
+    WARNING: Changing dimension requires re-indexing all documents.
+    """
+    # Get dimension from environment or use default
+    dimension = int(os.environ.get("RAG_EMBEDDING_DIMENSION", "1536"))
+
+    # Drop the HNSW index first (required before altering column type)
+    op.drop_index("ix_chunk_embedding_hnsw", table_name="document_chunk")
+
+    # Alter the embedding column type with new dimension
+    # Note: This will invalidate any existing embeddings if dimension changes
+    op.execute(f"ALTER TABLE document_chunk ALTER COLUMN embedding TYPE vector({dimension})")
+
+    # Recreate the HNSW index with the new dimension
+    op.create_index(
+        "ix_chunk_embedding_hnsw",
+        "document_chunk",
+        ["embedding"],
+        unique=False,
+        postgresql_using="hnsw",
+        postgresql_with={"m": 16, "ef_construction": 64},
+        postgresql_ops={"embedding": "vector_cosine_ops"},
+    )
+
+
+def downgrade() -> None:
+    """Revert migration - restore embedding column to 1536 dimensions.
+
+    WARNING: This will invalidate any embeddings that were generated
+    with a different dimension.
+    """
+    # Drop the HNSW index
+    op.drop_index("ix_chunk_embedding_hnsw", table_name="document_chunk")
+
+    # Restore to original 1536 dimension
+    op.execute("ALTER TABLE document_chunk ALTER COLUMN embedding TYPE vector(1536)")
+
+    # Recreate the HNSW index
+    op.create_index(
+        "ix_chunk_embedding_hnsw",
+        "document_chunk",
+        ["embedding"],
+        unique=False,
+        postgresql_using="hnsw",
+        postgresql_with={"m": 16, "ef_construction": 64},
+        postgresql_ops={"embedding": "vector_cosine_ops"},
+    )
@@ -64,6 +64,32 @@ class Settings(BaseSettings):
     # Jobs
     jobs_retention_days: int = 30
 
+    # RAG Embedding Configuration
+    rag_embedding_provider: Literal["openai", "ollama"] = "openai"
+    openai_api_key: str = ""
+    rag_embedding_model: str = "text-embedding-3-small"
+    rag_embedding_dimension: int = 1536
+    rag_embedding_batch_size: int = 100
+
+    # Ollama Configuration (when rag_embedding_provider = "ollama")
+    ollama_base_url: str = "http://localhost:11434"
+    ollama_embedding_model: str = "nomic-embed-text"
+
+    # RAG Chunking Configuration
+    rag_chunk_size: int = 512  # tokens
+    rag_chunk_overlap: int = 50  # tokens
+    rag_min_chunk_size: int = 100  # minimum tokens per chunk
+
+    # RAG Retrieval Configuration
+    rag_top_k: int = 5
+    rag_similarity_threshold: float = 0.7
+    rag_max_context_tokens: int = 4000
+
+    # RAG Index Configuration
+    rag_index_type: Literal["hnsw", "ivfflat"] = "hnsw"
+    rag_hnsw_m: int = 16
+    rag_hnsw_ef_construction: int = 64
+
     @property
     def is_development(self) -> bool:
         """Check if running in development mode."""

@@ -0,0 +1,5 @@
+"""RAG (Retrieval-Augmented Generation) knowledge base feature."""
+
+from app.features.rag.routes import router
+
+__all__ = ["router"]