In [None]:
import weaviate
from weaviate.classes.config import Property, DataType, Tokenization, Configure
from weaviate.classes.query import Filter

client = weaviate.connect_to_local()

In [None]:
tkn_options = [
    Tokenization.WORD,
    Tokenization.LOWERCASE,
    Tokenization.WHITESPACE,
    Tokenization.FIELD,
]

properties = [
    Property(
        name=f"text_word",
        data_type=DataType.TEXT,
        tokenization=Tokenization.WORD
    ),
    Property(
        name=f"text_lowercase",
        data_type=DataType.TEXT,
        tokenization=Tokenization.LOWERCASE
    ),
    Property(
        name=f"text_whitespace",
        data_type=DataType.TEXT,
        tokenization=Tokenization.WHITESPACE
    ),
    Property(
        name=f"text_field",
        data_type=DataType.TEXT,
        tokenization=Tokenization.FIELD
    ),
]

for p in properties:
    print(p.name, p.tokenization)

In [None]:
client.collections.delete("TokenExample")

collection = client.collections.create(
    name="TokenExample",
    properties=properties,
    vectorizer_config=Configure.Vectorizer.none()
)

print("Created TokenExample collection")

In [None]:
property_names = [p.name for p in properties]

for phrase in [
    "Lois & Clark: The New Adventures of Superman",
    "SW1A 1AA",
    "15-30",
    "30-15",
    "Beyoncé - Single Ladies (Put a Ring on It)",
]:
    obj_properties = {name: phrase for name in property_names}
    print(obj_properties)
    collection.data.insert(
        properties=obj_properties
    )

In [None]:
def token_test_query(query_term):
    print(f"\nHits for: '{query_term}'")

    # run a query on each property
    for name in property_names:
        response = collection.query.fetch_objects(
            filters=Filter.by_property(name).like(query_term),
            limit=5
        )

        if len(response.objects) > 0:
            for obj in response.objects:
                print(f"'{obj.properties[name]}' found in {name}")
        else:
            print(f"No matches for {name}")

In [None]:
token_test_query("Superman")

In [None]:
token_test_query("SUPERman")
token_test_query("Super man")

In [None]:
token_test_query("Lois & Superman")

In [None]:
token_test_query("Lois & Clark")
token_test_query("Lois & Clark:")

In [None]:
token_test_query("SW1A 1AA")
token_test_query("1AA")
token_test_query("1AA SW1A")

In [None]:
token_test_query("15-30")

token_test_query("30-15")