In [1]:
# Automatically reload modules when code changes
%load_ext autoreload
%autoreload 2

In [2]:
from pgmcp.settings import get_settings

SETTINGS = get_settings()
from rich.table import Table
from rich.console import Console
from rich.theme import Theme


console = Console(highlight=False, force_jupyter=True)

In [3]:

from pgmcp.models.library import Library


KNOWLEDGE_BASE_LIBRARY_NAME = "Knowledge Base"
_kb_library: Library | None = None
async def get_knowledge_base_library() -> Library:
    """Get or create the knowledge base library."""
    
    global _kb_library
    if not _kb_library:
        async with Library.async_context():
            _kb_library = await Library.query().where(Library.name == KNOWLEDGE_BASE_LIBRARY_NAME).first()
            if not _kb_library:
                _kb_library = Library(name=KNOWLEDGE_BASE_LIBRARY_NAME)
                await _kb_library.save()
    return _kb_library

In [4]:
from sqlalchemy.future import select
from pgmcp.models import Corpus, Document, Chunk 
import openai

QUERY = "on save callbacks"

async with Corpus.async_context():
    library = await get_knowledge_base_library()

    # 2. We need to embed the query
    from openai import AsyncOpenAI
    client = AsyncOpenAI()

    response = await client.embeddings.create(
        model="text-embedding-3-small",
        input=QUERY
    )

    if not response or not response.data or not isinstance(response.data, list):
        raise ValueError(f"Invalid response from OpenAI: {response}")

    query_embedding = response.data[0].embedding    
    
    if not query_embedding or not isinstance(query_embedding, list):
        raise ValueError(f"Invalid embedding in response: {response.data[0]}")

QUERY_EMBEDDING = query_embedding


# Vector Similarity

## The Metaphor: Archery Exhibition

### Setting

- Holodeck: A simulated, multi-dimensional environment.
- Gravity: None; Arrows travel in straight lines.
- Air Resistance: Present; More draw on the bowstring results in greater distance traveled in space.

### Vocabulary

- **Vector** -- The set of coordinates where the arrow lands after you shoot it. For example, (x₁, x₂, ..., xₙ).
- **Origin** -- The starting point (0, 0, ..., 0) where every shot begins.
- **Distance (Magnitude)** -- The straight-line length from the origin to where the arrow lands. This is just the Pythagorean theorem, extended to as many dimensions as you have:
    - In 2D: `distance = math.sqrt(a**2 + b**2)`
        - Classic `a² + b² = c²`
    - In nD: `distance = math.sqrt(sum(x**2 for x in vector))`
        - Replace `c` with `DISTANCE`, so `a² + b² = DISTANCE²`, allowing `SUM([xₙ²,...]) == DISTANCE²`
    - Each coordinate is like a "side" in its own dimension. The formula always gives you the shortest path from the origin to the landing point, no matter how many dimensions you have!
- **Direction** -- The "way" the arrow points from the origin, no matter how far it goes. In programming terms, you get the direction by dividing each coordinate by the distance (magnitude). This gives you a "unit vector" that always has length 1, but points in the same direction as the original arrow.

    ```python
    # Example: get direction for vector v
    v = [x1, x2, ..., xn]
    magnitude = math.sqrt(sum(x**2 for x in v))
    direction = [x / magnitude for x in v]
    ```
### pgvector Comparators in metaphorical context.

Setup: Two archers show off their trick "shots" and they get recorded as vectors.

We can now compare those archers' shots using a few different pgvector comparators.

- **l2_distance**:
    - Measures the straight-line distance between where two arrows landed, just like using a ruler in multi-dimensional space.
    - Answers "how close did these two shots land to each other?"—ignoring the path, only caring about the shortest possible gap.
    - Use this when you want to find the most similar or nearest shots, regardless of direction or how each got there.
- **max_inner_product**:
    - Measures how much two arrows point in the same direction and how far they both traveled—combining direction and magnitude.
    - Useful for ranking which arrows (vectors) are most "aligned" and powerful compared to a reference shot.
    - Great when you want to find shots that not only aim the same way, but also have the most "force" behind them (largest combined effect).
- **cosine_distance**:
    - Measures how closely two arrows point in the same direction, completely ignoring how far they traveled.
    - Calculated as the dot product of the two vectors divided by the product of their magnitudes: `cosine_distance = dot(a, b) / (||a|| * ||b||)`.
    - Use this when you want to find arrows (vectors) that are aimed the same way, even if they landed at very different distances from the origin—perfect for matching "intent" or "approach" rather than exact landing spots.
- **l1_distance**:
    - Adds up the differences between each coordinate of two arrows—like counting the total number of steps you'd take moving along a grid to match one shot to another.
    - Use this when you care about the total adjustment needed in every direction, not just the straight-line distance—great for comparing feature-by-feature changes or when every axis matters.
    - Ideal if you want to know "how much work" it would take to transform one shot into another by moving only along the axes, not diagonally.
- **hamming_distance**:
    - Counts how many coordinates are different between two arrows—like tallying up how many settings you changed on your bow between shots.
    - Best for comparing arrows (vectors) in a space where each coordinate is a discrete choice (like on/off, yes/no, or categories).
    - Use this when you care about the number of differences, not how big those differences are—great for error detection or comparing categorical data.
- **jaccard_distance**:
    - Measures how different two sets of coordinates are by comparing what they have in common versus what they have in total.
    - Think of it as checking how much overlap there is between two arrows' landing spots—perfect overlap means they're identical, no overlap means they're totally different.
    - Use this when your arrows are defined by sets of features or categories, and you want to know how much their features overlap.


## Idea

**LLM Routing to choose best approach to use based on user query intent.**

It would be highly beneficial to have an LLM choose the most appropriate similarity metric based on the user’s query intent.

- Different user questions naturally map to different notions of “similarity”—an LLM can infer this intent and select the optimal metric, improving result relevance.
- This approach lets you support a wider range of search behaviors (e.g., intent-matching, feature-matching, nearest-neighbor) without forcing users to know or care about the underlying math.
- It also enables more advanced workflows, like hybrid or fallback strategies (e.g., try cosine, then l2 if no strong matches), all orchestrated by the LLM.
- In short: letting an LLM dynamically select the comparator makes your search system smarter, more flexible, and more user-aligned.


## LLM Decision Matrix Prompt: Choosing the Best pgvector Comparator for Knowledge Base Search

You are an expert search assistant. Your task is to select the most appropriate pgvector similarity metric for searching a documentation knowledge base, based on the user's query and intent. 

## Available Comparators

| Comparator           | Description |
|----------------------|-------------|
| **l2_distance**      | Use when the user wants the most similar or nearest documentation, regardless of approach or context. Best for "find the closest match" scenarios. |
| **max_inner_product**| Use when the user wants results that are both highly relevant and information-rich—prioritize docs that are strongly aligned and comprehensive. |
| **cosine_distance**  | Use when the user cares about matching the intent or conceptual direction of their query, even if the amount of detail differs. Best for "find docs with the same approach or intent." |
| **l1_distance**      | Use when the user wants to minimize total differences across all features or aspects—great for "feature-by-feature" or "stepwise" similarity. |
| **hamming_distance** | Use when the user cares about the number of exact differences in discrete or categorical features (e.g., toggles, flags, categories). |
| **jaccard_distance** | Use when the user wants to maximize overlap in sets of features, tags, or categories—best for set-based or tag-based matching. |

## Instructions
1. Analyze the user's query and infer their intent (e.g., are they seeking the closest match, conceptual alignment, feature overlap, etc.) using a chain of thought that considers the specific wording and context of the query. It should be between 3 and 10 connected thoughts long.
2. Select the single most appropriate pgvector comparator from the table above.
3. Response must pass validation against the JSON schema provided in the Response Schema section, and may not contain any but valid and parsable JSON.

## Response Schema
```json
{
  "$schema": "http://json-schema.org/draft-07/schema#",
  "title": "Pgvector Comparator Selection Process",
  "type": "object",
  "properties": {
    "a__chain_of_thought": {
      "type": "array",
      "description": "A step-by-step chain of thought (3-10 connected thoughts) showing how the comparator was selected.",
      "minItems": 3,
      "maxItems": 10,
      "items": {
        "type": "string"
      }
    }
    "b__reason": {
      "type": "string",
      "description": "A one-sentence justification for why this comparator was chosen, based on the chain_of_thought property array thinking"
    },
    "c__comparator": {
      "type": "string",
      "enum": [
        "l2_distance",
        "max_inner_product",
        "cosine_distance",
        "l1_distance",
        "hamming_distance",
        "jaccard_distance"
      ],
      "description": "The selected pgvector comparator for the search, chosen after using chain_of_thought and reason to justify its use."
    },
  },
  "required": ["a__chain_of_thought", "b__reason", "c__comparator"],
  ```
  

## Example Input / Output:**
<examples>
    <example>
        <conversation>
            <user>Find documentation on how to use the new API features of SqlAlchemy2x</user>
            <assistant>{
                "a__chain_of_thought": [
                    "The user is looking for documentation on a specific version of SqlAlchemy.",
                    "They are interested in new API features, which suggests they want the latest information.",
                    "The query is straightforward and does not imply a need for deep contextual understanding."
                ],
                "b__reason": "The l2_distance comparator is best for finding the closest match without needing to understand the context deeply.",
                "c__comparator": "l2_distance"
            }
            </assistant>
        </conversation>
    </example>
    <example>
        <conversation>
            <user>How do I implement pagination in FastAPI?</user>
            <assistant>{
                "a__chain_of_thought": [
                    "The user is looking for information on implementing pagination.",
                    "They are using FastAPI, which suggests they want a solution that fits within that framework.",
                    "The query is specific and likely requires examples or detailed explanations.",
                    "The user may also want an idiomatic solution that follows FastAPI best practices."
                ],
                "b__reason": "The max_inner_product comparator is best for finding highly relevant and information-rich documentation.",
                "c__comparator": "max_inner_product"
            }
            </assistant>
        </conversation>
    </example>
</examples>


In [6]:
# Search the postgresql database using similarity search with pgvector

"""
pgvector offers l2_distance, max_inner_product, cosine_distance, l1_distance, hamming_distance, and jaccard_distance

## Metaphor Time:

### Premise: 
- You are an Archer, standing at `origin` of the entire vector space.
- You fire an arrow wildly into multi-dimensional space.
- The direction and distance of that shot if your `reference` from which you will 
  _measure_ how close or distant it was to other historical paths of the _shot_.
- You can think of each shot as a vector in this space, with its own unique direction and magnitude.
    - Direction: The angle at which the arrow was shot.
    - Magnitude: The distance the arrow traveled.
- We don't care where the shot originated at all -- we only care about its direction (angle shot at) and magnitude (distance traveled).
- With those two pieces of information we can then compare _past_ shots to any other shot, and from that we can likely determine how similar they are.
- That covers shot similarity. But, we also know what it takes to "hit" a `target` which is defined by its own unique set of coordinates in this multi-dimensional space.
- We can tell how "close" any given shot it to a `target` by comparing a shot's vector to the target's vector. The closer they are in direction and magnitude, the more likely the shot is to hit the target.
- We can actually factor out distance and just compare `direction` (angle of the shot taken in multi-dimensional space) and the angle of the target's vector in multi-dimensional space. (from the theoretical origin of the multi-dimensional space)






"""

from pgmcp.models.chunk import Chunk
from pgmcp.models.document import Document

results = []
async with Chunk.async_context() as session:
    qb = Chunk.query()
    qb = qb.joins(Chunk.document)
    qb = qb.where(Chunk.embedding.l2_distance(query_embedding) < 0.5)

# Search all chunks in the corpus using similarity search with pgvector