In [None]:
! pip install -r requirements.txt

In [3]:
import weaviate
from weaviate.embedded import EmbeddedOptions

client = weaviate.Client(
  embedded_options=EmbeddedOptions()
)

data_obj = {
  "name": "Chardonnay",
  "description": "Goes with fish"
}

client.data_object.create(data_obj, "Wine")

Binary /home/samos/.cache/weaviate-embedded did not exist. Downloading binary from https://github.com/weaviate/weaviate/releases/download/v1.19.12/weaviate-v1.19.12-Linux-amd64.tar.gz
Started /home/samos/.cache/weaviate-embedded: process ID 335876


{"action":"startup","default_vectorizer_module":"none","level":"info","msg":"the default vectorizer modules is set to \"none\", as a result all new schema classes without an explicit vectorizer setting, will use this vectorizer","time":"2023-08-03T22:44:41-07:00"}
{"action":"startup","auto_schema_enabled":true,"level":"info","msg":"auto schema enabled setting is set to \"true\"","time":"2023-08-03T22:44:41-07:00"}
{"action":"grpc_startup","level":"info","msg":"grpc server listening at [::]:50051","time":"2023-08-03T22:44:41-07:00"}
{"action":"restapi_management","level":"info","msg":"Serving weaviate at http://127.0.0.1:6666","time":"2023-08-03T22:44:41-07:00"}
{"action":"hnsw_vector_cache_prefill","count":1000,"index_id":"wine_9VdgrtkjFBrV","level":"info","limit":1000000000000,"msg":"prefilled vector cache","time":"2023-08-03T22:44:41-07:00","took":78642}


'eb844551-b5e8-4d5a-9833-070d4ccb7b7d'

## Check what incorrect GraphQL query result looks like

In [6]:
query = """
{
    Get {
        Article(limit: 2) {
        title
        hasAuthors {
            ... on Author {
                name
                }
            }
        }
    }
}
"""
result = client.query.raw(query)
result

{'errors': [{'locations': [{'column': 9, 'line': 4}],
   'message': 'Cannot query field "Article" on type "GetObjectsObj".',
   'path': None}]}

## Lets validate the schemas
For each Schema try to load it into Weaviate

In [7]:
! ls -lash ToySchemas

total 56K
4.0K drwxr-xr-x 2 samos samos 4.0K Aug  3 22:23 .
4.0K drwxr-xr-x 7 samos samos 4.0K Aug  3 22:49 ..
4.0K -rw-r--r-- 1 samos samos 1.7K Aug  3 22:23 books.json
4.0K -rw-r--r-- 1 samos samos 2.1K Aug  3 22:23 clothing.json
4.0K -rw-r--r-- 1 samos samos 1.8K Aug  3 22:23 coderepos.json
4.0K -rw-r--r-- 1 samos samos 1.1K Aug  3 22:23 crm.json
4.0K -rw-r--r-- 1 samos samos 2.0K Aug  3 22:23 eventplanning.json
4.0K -rw-r--r-- 1 samos samos 1.7K Aug  3 22:23 movies.json
4.0K -rw-r--r-- 1 samos samos 1.7K Aug  3 22:23 music.json
4.0K -rw-r--r-- 1 samos samos 2.4K Aug  3 22:23 podcast.json
4.0K -rw-r--r-- 1 samos samos 1.7K Aug  3 22:23 socialmedia.json
4.0K -rw-r--r-- 1 samos samos 1.8K Aug  3 22:23 supplements.json
4.0K -rw-r--r-- 1 samos samos 1.8K Aug  3 22:23 traveldestination.json
4.0K -rw-r--r-- 1 samos samos 1.7K Aug  3 22:23 workoutlog.json


In [10]:
from pathlib import Path

schema_filenames = list(Path("ToySchemas").glob("*.json"))
schema_filenames

[PosixPath('ToySchemas/books.json'),
 PosixPath('ToySchemas/workoutlog.json'),
 PosixPath('ToySchemas/podcast.json'),
 PosixPath('ToySchemas/socialmedia.json'),
 PosixPath('ToySchemas/movies.json'),
 PosixPath('ToySchemas/eventplanning.json'),
 PosixPath('ToySchemas/music.json'),
 PosixPath('ToySchemas/supplements.json'),
 PosixPath('ToySchemas/traveldestination.json'),
 PosixPath('ToySchemas/coderepos.json'),
 PosixPath('ToySchemas/clothing.json'),
 PosixPath('ToySchemas/crm.json')]

In [15]:
import json
schemas = []
for f in schema_filenames:
    schemas.append(json.loads(f.read_text(encoding="UTF-8")))

schemas[0:1]

[{'class': 'Book',
  'description': 'A book in the library.',
  'properties': [{'name': 'bookId',
    'dataType': ['uuid'],
    'description': 'A unique identifier for each book.',
    'moduleConfig': {'text2vec-transformers': {'skip': True,
      'vectorizeClassName': False,
      'vectorizePropertyName': False}}},
   {'name': 'title',
    'dataType': ['text'],
    'description': 'The title of the book.',
    'moduleConfig': {'text2vec-transformers': {'skip': False,
      'vectorizeClassName': False,
      'vectorizePropertyName': False}}},
   {'name': 'publicationDate',
    'dataType': ['date'],
    'description': 'The publication date of the book.',
    'moduleConfig': {'text2vec-transformers': {'skip': True,
      'vectorizeClassName': False,
      'vectorizePropertyName': False}}},
   {'name': 'genre',
    'dataType': ['text'],
    'description': 'The genre of the book.',
    'moduleConfig': {'text2vec-transformers': {'skip': False,
      'vectorizeClassName': False,
      'vector

In [23]:
# try creating and deleting the schema to ensure it's a correct schema
import logging
client.schema.delete_all()

classes_with_exceptions = []

for schema in schemas:
    try:
        print(f"Creating schema for class {schema['class']}")
        client.schema.create_class(schema)
        client.schema.delete_class(schema["class"])
    except:
        classes_with_exceptions.append({"class": schema["class"], "schema": schema})
        logging.exception(f"Exception for class {schema['class']}")

{"action":"hnsw_vector_cache_prefill","count":1000,"index_id":"book_ncI3yGTGYZGv","level":"info","limit":1000000000000,"msg":"prefilled vector cache","time":"2023-08-03T23:02:42-07:00","took":86812}
ERROR:root:Exception for class Book
Traceback (most recent call last):
  File "/tmp/ipykernel_335787/2715742918.py", line 10, in <module>
    client.schema.create_class(schema)
  File "/home/samos/workspace/weaviate-gorilla/.venv/lib/python3.11/site-packages/weaviate/schema/crud_schema.py", line 203, in create_class
    self._create_complex_properties_from_class(loaded_schema_class)
  File "/home/samos/workspace/weaviate-gorilla/.venv/lib/python3.11/site-packages/weaviate/schema/crud_schema.py", line 716, in _create_complex_properties_from_class
    raise UnexpectedStatusCodeException("Add properties to classes", response)
weaviate.exceptions.UnexpectedStatusCodeException: Add properties to classes! Unexpected status code: 422, with response body: {'error': [{'message': 'none vectorizer mod

Creating schema for class Book
Creating schema for class Workout
Creating schema for class PodClip
Creating schema for class User
Creating schema for class Movie
Creating schema for class Event
Creating schema for class Track
Creating schema for class Supplement
Creating schema for class Destination
Creating schema for class Repository
Creating schema for class ClothingItem
Creating schema for class Customer


{"action":"hnsw_vector_cache_prefill","count":1000,"index_id":"customer_AEeow2tDRtcx","level":"info","limit":1000000000000,"msg":"prefilled vector cache","time":"2023-08-03T23:02:42-07:00","took":86184}


In [27]:
[s["class"] for s in classes_with_exceptions]

['Book',
 'Workout',
 'PodClip',
 'User',
 'Movie',
 'Event',
 'Track',
 'Supplement',
 'Destination',
 'Repository',
 'ClothingItem']