# Create Schema with a POST request

In [10]:
import weaviate

In [11]:
import json
import os

# Read the aquarium schema JSON file
schemas_path = os.path.join("..", "data", "simple-3-collection-schemas.json")
with open(schemas_path, "r") as f:
    schemas = json.load(f)

In [12]:
import json
json_schema = json.loads(schemas[0])
json_schema["weaviate_collections"][0]

{'properties': [{'name': 'patientName',
   'data_type': ['text'],
   'description': 'Name of the patient.'},
  {'name': 'age',
   'data_type': ['number'],
   'description': 'Age of the patient.'},
  {'name': 'admitted',
   'data_type': ['boolean'],
   'description': 'Indicates if the patient is currently admitted to the hospital.'}],
 'class': 'HospitalPatients',
 'description': 'The HospitalPatients collection tracks patient information, including their demographics and admission status. This is crucial for managing patient records and coordinating care with medical staff.',
 'vectorIndexType': 'hnsw',
 'vectorizer': 'text2vec-transformers'}

In [13]:
json_schema

{'weaviate_collections': [{'properties': [{'name': 'patientName',
     'data_type': ['text'],
     'description': 'Name of the patient.'},
    {'name': 'age',
     'data_type': ['number'],
     'description': 'Age of the patient.'},
    {'name': 'admitted',
     'data_type': ['boolean'],
     'description': 'Indicates if the patient is currently admitted to the hospital.'}],
   'class': 'HospitalPatients',
   'description': 'The HospitalPatients collection tracks patient information, including their demographics and admission status. This is crucial for managing patient records and coordinating care with medical staff.',
   'vectorIndexType': 'hnsw',
   'vectorizer': 'text2vec-transformers'},
  {'properties': [{'name': 'staffName',
     'data_type': ['text'],
     'description': 'Name of the medical staff member.'},
    {'name': 'yearsOfExperience',
     'data_type': ['number'],
     'description': 'Number of years of experience the staff has.'},
    {'name': 'onDuty',
     'data_type'

In [16]:
import requests

url = "http://localhost:8080/v1/schema"

# Post each class schema separately
for class_schema in json_schema['weaviate_collections']:
    # Make sure the schema has all required fields
    if 'class' not in class_schema:
        print("Error: Schema missing required 'class' field")
        continue
        
    # Create a clean schema object with only the required fields
    clean_schema = {
        'class': class_schema['class'],  # Use existing 'class' field
        'description': class_schema.get('description', ''),
        'properties': [
            {
                'name': prop['name'],
                'description': prop.get('description', ''),
                'dataType': prop['data_type']  # Weaviate expects dataType, not data_type
            }
            for prop in class_schema.get('properties', [])
        ],
        'vectorizer': class_schema.get('vectorizer', 'text2vec-transformers'),
        'vectorIndexType': class_schema.get('vectorIndexType', 'hnsw'),
    }
    
    # Convert to string
    schema_str = json.dumps(clean_schema)
    
    print(f"Sending schema for class {clean_schema['class']}:")
    print(schema_str)
    
    response = requests.post(
        url, 
        data=schema_str,
        headers={'Content-Type': 'application/json'}
    )
    
    print(f"Response status: {response.status_code}")
    if response.status_code != 200:
        print(f"Error response: {response.text}")

Sending schema for class HospitalPatients:
{"class": "HospitalPatients", "description": "The HospitalPatients collection tracks patient information, including their demographics and admission status. This is crucial for managing patient records and coordinating care with medical staff.", "properties": [{"name": "patientName", "description": "Name of the patient.", "dataType": ["text"]}, {"name": "age", "description": "Age of the patient.", "dataType": ["number"]}, {"name": "admitted", "description": "Indicates if the patient is currently admitted to the hospital.", "dataType": ["boolean"]}], "vectorizer": "text2vec-transformers", "vectorIndexType": "hnsw"}
Response status: 200
Sending schema for class MedicalStaff:
{"class": "MedicalStaff", "description": "The MedicalStaff collection maintains records of hospital staff members, their experience, and current duty status. This allows efficient scheduling and assignment of staff to patients based on their needs and availability.", "proper

# Parse `collections.list_all()`

In [19]:
import weaviate

weaviate_client = weaviate.connect_to_local()

weaviate_client.collections.list_all()

  weaviate_client = weaviate.connect_to_local()


{'HospitalDepartments': _CollectionConfigSimple(name='HospitalDepartments', description='HospitalDepartments provides information on each department within the hospital, including their capacity and operational status. This helps in optimizing resource allocation and ensuring each department can handle patient load effectively.', generative_config=None, properties=[_Property(name='departmentName', description='Name of the hospital department.', data_type=<DataType.TEXT: 'text'>, index_filterable=True, index_searchable=True, nested_properties=None, tokenization=<Tokenization.WORD: 'word'>, vectorizer_config=_PropertyVectorizerConfig(skip=False, vectorize_property_name=False), vectorizer='text2vec-transformers'), _Property(name='bedCapacity', description='Number of beds available in the department.', data_type=<DataType.NUMBER: 'number'>, index_filterable=True, index_searchable=False, nested_properties=None, tokenization=None, vectorizer_config=_PropertyVectorizerConfig(skip=False, vecto

In [26]:
def print_collections_info(client: weaviate.WeaviateClient) -> tuple[str, list[str]]:
    """
    Get detailed information about all collections in a Weaviate instance.
    
    Args:
        client: A Weaviate client instance
    
    Returns:
        tuple[str, list[str]]: Tuple containing formatted collection details string and list of collection names
    """
    
    collections = client.collections.list_all()
    
    # Get collection names as list
    collection_names = list(collections.keys())
    
    # Build output string
    output = []
    for collection_name, config in collections.items():
        output.append(f"\nCollection Name: {collection_name}")
        output.append(f"Description: {config.description}")
        output.append("\nProperties:")
        for prop in config.properties:
            output.append(f"- {prop.name}: {prop.description} (type: {prop.data_type.value})")
    
    return "\n".join(output), collection_names

# Call the function with our client and print result
info_str, collections_list = print_collections_info(weaviate_client)
print(info_str)


Collection Name: MedicalStaff
Description: The MedicalStaff collection maintains records of hospital staff members, their experience, and current duty status. This allows efficient scheduling and assignment of staff to patients based on their needs and availability.

Properties:
- staffName: Name of the medical staff member. (type: text)
- yearsOfExperience: Number of years of experience the staff has. (type: number)
- onDuty: Indicates if the staff member is currently on duty. (type: boolean)

Collection Name: HospitalPatients
Description: The HospitalPatients collection tracks patient information, including their demographics and admission status. This is crucial for managing patient records and coordinating care with medical staff.

Properties:
- patientName: Name of the patient. (type: text)
- age: Age of the patient. (type: number)
- admitted: Indicates if the patient is currently admitted to the hospital. (type: boolean)

Collection Name: HospitalDepartments
Description: Hospita

# Create Function

OpenAI wants to take enum as an argument with the following:

`"enum": ["c", "f"]`

Function Schema:

```python
{
    "name": "search_weaviate_collection",
    "description": "Search a Weaviate Collection",
    "parameters": {
        "type": "object",
        "properties": {
            "collection_name": {
                "type": "enum": collections_list,
                "description": "The Weaviate Collection to search through."
            },
            "search_query": {
                "type": "string",
                "description": "The search query."
            }
        },
        "required": ["collection_name", "search_query"],
        "additionalProperties": False
    }
}
```

In [38]:
from typing import Literal, Optional, Dict, List
from pydantic import BaseModel

class ParameterProperty(BaseModel):
    type: str
    description: str

class Parameters(BaseModel):
    type: Literal["object"]
    properties: Dict[str, ParameterProperty]
    required: Optional[List[str]]

class Function(BaseModel):
    name: str
    description: str
    parameters: Parameters

class Tool(BaseModel):
    type: Literal["function"]
    function: Function

# Example usage:
search_tool = Tool(
    type="function",
    function=Function(
        name="search_weaviate_collection",
        description="Search for the most relevant items to the provided `search_query` in a Weaviate Database Collection.",
        parameters=Parameters(
            type="object",
            properties={
                "collection_name": ParameterProperty(
                    type="string",
                    description="The Weaviate Collection to search through."
                ),
                "search_query": ParameterProperty(
                    type="string",
                    description="The search query."
                )
            },
            required=["collection_name", "search_query"]
        )
    )
)

# Example of another tool:
calculate_avg_tool = Tool(
    type="function", 
    function=Function(
        name="calculate_average",
        description="Calculate the average of a numeric property across collection items",
        parameters=Parameters(
            type="object",
            properties={
                "collection_name": ParameterProperty(
                    type="string",
                    description="The Weaviate Collection to analyze"
                ),
                "property_name": ParameterProperty(
                    type="string",
                    description="The numeric property to average"
                )
            },
            required=["collection_name", "property_name"]
        )
    )
)

In [39]:
print(search_tool.model_dump_json(indent=2))

{
  "type": "function",
  "function": {
    "name": "search_weaviate_collection",
    "description": "Search for the most relevant items to the provided `search_query` in a Weaviate Database Collection.",
    "parameters": {
      "type": "object",
      "properties": {
        "collection_name": {
          "type": "string",
          "description": "The Weaviate Collection to search through."
        },
        "search_query": {
          "type": "string",
          "description": "The search query."
        }
      },
      "required": [
        "collection_name",
        "search_query"
      ]
    }
  }
}


In [40]:
tools = [search_tool]

# Function Calling Test - Collection Query Routing

In [41]:
import openai

openai_client = openai.OpenAI(
    api_key = ""
)

messages = [
    {"role": "system", "content": "You are a helpful hospital support assistant. Use the supplied tools to assist the user."},
    {"role": "user", "content": "Hi, can you tell me who works at this hospital?"}
]

response = openai_client.chat.completions.create(
    model="gpt-4o-2024-08-06",
    messages=messages,
    tools=tools,
)

In [47]:
print(response.choices[0].message.tool_calls[0].function.collection_name)

print(response.choices[0].message.tool_calls[0].function.search_query)



Function(arguments='{"collection_name":"hospital_staff","search_query":"hospital staff"}', name='search_weaviate_collection')
