Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -22,5 +22,36 @@ FORECAST_MAX_HORIZON=90
FORECAST_MODEL_ARTIFACTS_DIR=./artifacts/models
FORECAST_ENABLE_LIGHTGBM=false

# RAG Configuration
# Embedding Provider: "openai" or "ollama"
RAG_EMBEDDING_PROVIDER=openai

# OpenAI Configuration (when RAG_EMBEDDING_PROVIDER=openai)
OPENAI_API_KEY=sk-your-openai-api-key-here
RAG_EMBEDDING_MODEL=text-embedding-3-small

# Ollama Configuration (when RAG_EMBEDDING_PROVIDER=ollama)
# OLLAMA_BASE_URL=http://localhost:11434
# OLLAMA_EMBEDDING_MODEL=nomic-embed-text

# Embedding dimension (must match your model: OpenAI=1536, nomic-embed-text=768, etc.)
RAG_EMBEDDING_DIMENSION=1536
RAG_EMBEDDING_BATCH_SIZE=100

# Chunking settings
RAG_CHUNK_SIZE=512
RAG_CHUNK_OVERLAP=50
RAG_MIN_CHUNK_SIZE=100

# Retrieval settings
RAG_TOP_K=5
RAG_SIMILARITY_THRESHOLD=0.7
RAG_MAX_CONTEXT_TOKENS=4000

# pgvector index settings
RAG_INDEX_TYPE=hnsw
RAG_HNSW_M=16
RAG_HNSW_EF_CONSTRUCTION=64

# Frontend (Vite)
VITE_API_BASE_URL=http://localhost:8123
53 changes: 53 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -454,6 +454,59 @@ curl -X POST http://localhost:8123/jobs \
- JSONB storage for flexible params and results
- Links to model_run for train/backtest jobs

### RAG Knowledge Base

- `POST /rag/index` - Index a document into the knowledge base
- `POST /rag/retrieve` - Semantic search across indexed documents
- `GET /rag/sources` - List indexed sources
- `DELETE /rag/sources/{source_id}` - Delete a source and its chunks

**Embedding Providers:**

The RAG system supports two embedding providers:

1. **OpenAI** (default):
```bash
RAG_EMBEDDING_PROVIDER=openai
OPENAI_API_KEY=sk-your-key
RAG_EMBEDDING_MODEL=text-embedding-3-small
RAG_EMBEDDING_DIMENSION=1536
```

2. **Ollama** (local/LAN):
```bash
RAG_EMBEDDING_PROVIDER=ollama
OLLAMA_BASE_URL=http://localhost:11434
OLLAMA_EMBEDDING_MODEL=nomic-embed-text
RAG_EMBEDDING_DIMENSION=768
```

**Example Index Request:**
```bash
curl -X POST http://localhost:8123/rag/index \
-H "Content-Type: application/json" \
-d '{
"source_type": "markdown",
"source_path": "docs/ARCHITECTURE.md"
}'
```

**Example Retrieve Request:**
```bash
curl -X POST http://localhost:8123/rag/retrieve \
-H "Content-Type: application/json" \
-d '{
"query": "How does backtesting work?",
"top_k": 5
}'
```

**Features:**
- pgvector for HNSW similarity search
- Idempotent indexing via content hash
- Markdown and OpenAPI chunking strategies
- Configurable embedding dimensions

### Error Responses (RFC 7807)

All error responses follow RFC 7807 Problem Details format with `Content-Type: application/problem+json`:
Expand Down
1 change: 1 addition & 0 deletions alembic/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
# Import all models for Alembic autogenerate detection
from app.features.data_platform import models as data_platform_models # noqa: F401
from app.features.jobs import models as jobs_models # noqa: F401
from app.features.rag import models as rag_models # noqa: F401
from app.features.registry import models as registry_models # noqa: F401

# Alembic Config object
Expand Down
153 changes: 153 additions & 0 deletions alembic/versions/b4c8d9e0f123_create_rag_tables.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
"""create_rag_tables

Revision ID: b4c8d9e0f123
Revises: 37e16ecef223
Create Date: 2026-02-01 12:00:00.000000

"""

from typing import Sequence, Union

import sqlalchemy as sa
from alembic import op
from pgvector.sqlalchemy import Vector
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision: str = "b4c8d9e0f123"
down_revision: Union[str, None] = "37e16ecef223"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
"""Apply migration - create document_source and document_chunk tables with pgvector."""
# Enable pgvector extension
op.execute("CREATE EXTENSION IF NOT EXISTS vector")

# Create document_source table
op.create_table(
"document_source",
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("source_id", sa.String(length=32), nullable=False),
sa.Column("source_type", sa.String(length=50), nullable=False),
sa.Column("source_path", sa.Text(), nullable=False),
sa.Column("content_hash", sa.String(length=64), nullable=False),
sa.Column("metadata", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
sa.Column("indexed_at", sa.DateTime(timezone=True), nullable=False),
# Timestamps (from TimestampMixin)
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.Column(
"updated_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
# Constraints
sa.PrimaryKeyConstraint("id"),
sa.UniqueConstraint("source_type", "source_path", name="uq_source_type_path"),
)

# Create indexes for document_source
op.create_index(
op.f("ix_document_source_source_id"),
"document_source",
["source_id"],
unique=True,
)
op.create_index(
op.f("ix_document_source_source_type"),
"document_source",
["source_type"],
unique=False,
)

# Create document_chunk table with Vector column
op.create_table(
"document_chunk",
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("chunk_id", sa.String(length=32), nullable=False),
sa.Column("source_id", sa.Integer(), nullable=False),
sa.Column("chunk_index", sa.Integer(), nullable=False),
sa.Column("content", sa.Text(), nullable=False),
sa.Column("embedding", Vector(1536), nullable=True),
sa.Column("token_count", sa.Integer(), nullable=False),
sa.Column("metadata", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
# Timestamps (from TimestampMixin)
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.Column(
"updated_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
# Constraints
sa.PrimaryKeyConstraint("id"),
sa.ForeignKeyConstraint(
["source_id"],
["document_source.id"],
ondelete="CASCADE",
),
sa.UniqueConstraint("source_id", "chunk_index", name="uq_source_chunk_index"),
)

# Create indexes for document_chunk
op.create_index(
op.f("ix_document_chunk_chunk_id"),
"document_chunk",
["chunk_id"],
unique=True,
)
op.create_index(
op.f("ix_document_chunk_source_id"),
"document_chunk",
["source_id"],
unique=False,
)

# Create HNSW index for vector similarity search (cosine distance)
op.create_index(
"ix_chunk_embedding_hnsw",
"document_chunk",
["embedding"],
unique=False,
postgresql_using="hnsw",
postgresql_with={"m": 16, "ef_construction": 64},
postgresql_ops={"embedding": "vector_cosine_ops"},
)

# Create GIN index for metadata filtering
op.create_index(
"ix_chunk_metadata_gin",
"document_chunk",
["metadata"],
unique=False,
postgresql_using="gin",
)


def downgrade() -> None:
"""Revert migration - drop document_source and document_chunk tables."""
# Drop document_chunk indexes and table
op.drop_index("ix_chunk_metadata_gin", table_name="document_chunk")
op.drop_index("ix_chunk_embedding_hnsw", table_name="document_chunk")
op.drop_index(op.f("ix_document_chunk_source_id"), table_name="document_chunk")
op.drop_index(op.f("ix_document_chunk_chunk_id"), table_name="document_chunk")
op.drop_table("document_chunk")

# Drop document_source indexes and table
op.drop_index(op.f("ix_document_source_source_type"), table_name="document_source")
op.drop_index(op.f("ix_document_source_source_id"), table_name="document_source")
op.drop_table("document_source")

# Note: We don't drop the vector extension as it might be used by other tables
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
"""rag_dynamic_embedding_dimension

Revision ID: c5d9e1f2g345
Revises: b4c8d9e0f123
Create Date: 2026-02-01 12:49:28.000000

CRITICAL: This migration alters the embedding column dimension.
If changing from 1536 to a different dimension, existing embeddings
will be incompatible and re-indexing is required.
"""

from __future__ import annotations

import os
from collections.abc import Sequence

from alembic import op

# revision identifiers, used by Alembic.
revision: str = "c5d9e1f2g345"
down_revision: str | None = "b4c8d9e0f123"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None


def upgrade() -> None:
"""Apply migration - alter embedding column to configurable dimension.

Reads RAG_EMBEDDING_DIMENSION from environment (default: 1536).
WARNING: Changing dimension requires re-indexing all documents.
"""
# Get dimension from environment or use default
dimension = int(os.environ.get("RAG_EMBEDDING_DIMENSION", "1536"))

# Drop the HNSW index first (required before altering column type)
op.drop_index("ix_chunk_embedding_hnsw", table_name="document_chunk")

# Alter the embedding column type with new dimension
# Note: This will invalidate any existing embeddings if dimension changes
op.execute(f"ALTER TABLE document_chunk ALTER COLUMN embedding TYPE vector({dimension})")

# Recreate the HNSW index with the new dimension
op.create_index(
"ix_chunk_embedding_hnsw",
"document_chunk",
["embedding"],
unique=False,
postgresql_using="hnsw",
postgresql_with={"m": 16, "ef_construction": 64},
postgresql_ops={"embedding": "vector_cosine_ops"},
)


def downgrade() -> None:
"""Revert migration - restore embedding column to 1536 dimensions.

WARNING: This will invalidate any embeddings that were generated
with a different dimension.
"""
# Drop the HNSW index
op.drop_index("ix_chunk_embedding_hnsw", table_name="document_chunk")

# Restore to original 1536 dimension
op.execute("ALTER TABLE document_chunk ALTER COLUMN embedding TYPE vector(1536)")

# Recreate the HNSW index
op.create_index(
"ix_chunk_embedding_hnsw",
"document_chunk",
["embedding"],
unique=False,
postgresql_using="hnsw",
postgresql_with={"m": 16, "ef_construction": 64},
postgresql_ops={"embedding": "vector_cosine_ops"},
)
26 changes: 26 additions & 0 deletions app/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,32 @@ class Settings(BaseSettings):
# Jobs
jobs_retention_days: int = 30

# RAG Embedding Configuration
rag_embedding_provider: Literal["openai", "ollama"] = "openai"
openai_api_key: str = ""
rag_embedding_model: str = "text-embedding-3-small"
rag_embedding_dimension: int = 1536
rag_embedding_batch_size: int = 100

# Ollama Configuration (when rag_embedding_provider = "ollama")
ollama_base_url: str = "http://localhost:11434"
ollama_embedding_model: str = "nomic-embed-text"

# RAG Chunking Configuration
rag_chunk_size: int = 512 # tokens
rag_chunk_overlap: int = 50 # tokens
rag_min_chunk_size: int = 100 # minimum tokens per chunk

# RAG Retrieval Configuration
rag_top_k: int = 5
rag_similarity_threshold: float = 0.7
rag_max_context_tokens: int = 4000

# RAG Index Configuration
rag_index_type: Literal["hnsw", "ivfflat"] = "hnsw"
rag_hnsw_m: int = 16
rag_hnsw_ef_construction: int = 64

@property
def is_development(self) -> bool:
"""Check if running in development mode."""
Expand Down
5 changes: 5 additions & 0 deletions app/features/rag/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""RAG (Retrieval-Augmented Generation) knowledge base feature."""

from app.features.rag.routes import router

__all__ = ["router"]
Loading