In [1]:
from typing import Literal, Optional
from pydantic import BaseModel

In [2]:
class IntPropertyFilter(BaseModel):
    property_name: str
    operator: Literal["=", "<", ">", "<=", ">="]
    value: int | float

class TextPropertyFilter(BaseModel):
    property_name: str
    operator: Literal["=", "LIKE"]
    value: str

class BooleanPropertyFilter(BaseModel):
    property_name: str
    operator: Literal["=", "!="]
    value: bool

class IntAggregation(BaseModel):
    property_name: str
    metrics: Literal["COUNT", "TYPE", "MIN", "MAX", "MEAN", "MEDIAN", "MODE", "SUM"]

class TextAggregation(BaseModel):
    property_name: str
    metrics: Literal["COUNT", "TYPE", "TOP_OCCURRENCES"]
    top_occurrences_limit: Optional[int] = None

class BooleanAggregation(BaseModel):
    property_name: str
    metrics: Literal["COUNT", "TYPE", "TOTAL_TRUE", "TOTAL_FALSE", "PERCENTAGE_TRUE", "PERCENTAGE_FALSE"]

In [3]:
class WeaviateQuery(BaseModel):
    search_query: Optional[str]
    integer_property_filter: Optional[IntPropertyFilter]
    text_property_filter: Optional[TextPropertyFilter]
    boolean_property_filter: Optional[BooleanPropertyFilter]
    integer_property_aggregation: Optional[IntAggregation]
    text_property_aggregation: Optional[TextAggregation]
    boolean_property_aggregation: Optional[BooleanAggregation]
    groupby_property: Optional[str]
    corresponding_natural_language_query: str

In [5]:
import openai

lm_client = openai.OpenAI(
    api_key = ""
)

In [6]:
prompt = """
You are working with a Weaviate collection called "Products" that has the following schema:

{
  "name": "Products",
  "properties": [
    {
      "name": "name",
      "dataType": ["text"],
      "description": "The name of the product"
    },
    {
      "name": "description", 
      "dataType": ["text"],
      "description": "Description of the product"
    },
    {
      "name": "inStock",
      "dataType": ["boolean"],
      "description": "Whether the product is currently in stock"
    }
  ]
}

Generate a Weaviate query that:
1. Searches for products containing the word "laptop" in their name or description
2. Filters for only products that are in stock (inStock = true)

The query should help answer the natural language question: "What laptops do we have available in stock?"
"""

In [8]:
response = lm_client.beta.chat.completions.parse(
    model="gpt-4o",
    messages=[
        {"role": "system", "content": "You are a helpful assistant. Follow the response format instructions."},
        {"role": "user", "content": prompt}
    ],
    response_format=WeaviateQuery
)

In [11]:
response.choices[0].message.parsed

WeaviateQuery(search_query='laptop', integer_property_filter=None, text_property_filter=None, boolean_property_filter=BooleanPropertyFilter(property_name='inStock', operator='=', value=True), integer_property_aggregation=None, text_property_aggregation=None, boolean_property_aggregation=None, groupby_property=None, corresponding_natural_language_query='What laptops do we have available in stock?')