diff --git a/ci/docker-compose-azure-cc.yml b/ci/docker-compose-azure-cc.yml index 6c31d492..b7926d4d 100644 --- a/ci/docker-compose-azure-cc.yml +++ b/ci/docker-compose-azure-cc.yml @@ -10,7 +10,7 @@ services: - --scheme - http - --write-timeout=600s - image: semitechnologies/weaviate:1.18.2 + image: semitechnologies/weaviate:1.19.0 ports: - 8081:8081 restart: on-failure:0 diff --git a/ci/docker-compose-cluster.yml b/ci/docker-compose-cluster.yml index be2a29f0..b8c043de 100644 --- a/ci/docker-compose-cluster.yml +++ b/ci/docker-compose-cluster.yml @@ -2,7 +2,7 @@ version: '3.4' services: weaviate-node-1: - image: semitechnologies/weaviate:preview-gql-handler-consistency-level-integration-4a12f55 + image: semitechnologies/weaviate:1.19.0 restart: on-failure:0 ports: - "8087:8080" @@ -25,7 +25,7 @@ services: - '8080' - --scheme - http - image: semitechnologies/weaviate:preview-gql-handler-consistency-level-integration-4a12f55 + image: semitechnologies/weaviate:1.19.0 ports: - 8088:8080 - 6061:6060 @@ -38,7 +38,7 @@ services: PERSISTENCE_DATA_PATH: './weaviate-node-2' DEFAULT_VECTORIZER_MODULE: text2vec-contextionary ENABLE_MODULES: text2vec-contextionary - CLUSTER_HOSTNAME: 'node2' + CLUSTER_HOSTNAME: 'node2' CLUSTER_GOSSIP_BIND_PORT: '7112' CLUSTER_DATA_BIND_PORT: '7113' CLUSTER_JOIN: 'weaviate-node-1:7110' diff --git a/ci/docker-compose-okta-cc.yml b/ci/docker-compose-okta-cc.yml index 628aaa31..36f477a5 100644 --- a/ci/docker-compose-okta-cc.yml +++ b/ci/docker-compose-okta-cc.yml @@ -10,7 +10,7 @@ services: - --scheme - http - --write-timeout=600s - image: semitechnologies/weaviate:1.18.2 + image: semitechnologies/weaviate:1.19.0 ports: - 8082:8082 restart: on-failure:0 diff --git a/ci/docker-compose-okta-users.yml b/ci/docker-compose-okta-users.yml index bb2ac5fe..1f3c6322 100644 --- a/ci/docker-compose-okta-users.yml +++ b/ci/docker-compose-okta-users.yml @@ -10,7 +10,7 @@ services: - --scheme - http - --write-timeout=600s - image: semitechnologies/weaviate:1.18.2 + image: semitechnologies/weaviate:1.19.0 ports: - 8083:8083 restart: on-failure:0 diff --git a/ci/docker-compose-openai.yml b/ci/docker-compose-openai.yml index 5ffe4e64..ee39ea5c 100644 --- a/ci/docker-compose-openai.yml +++ b/ci/docker-compose-openai.yml @@ -9,7 +9,7 @@ services: - '8086' - --scheme - http - image: semitechnologies/weaviate:1.18.2 + image: semitechnologies/weaviate:1.19.0 ports: - 8086:8086 restart: on-failure:0 diff --git a/ci/docker-compose-wcs.yml b/ci/docker-compose-wcs.yml index ce206d95..064cebff 100644 --- a/ci/docker-compose-wcs.yml +++ b/ci/docker-compose-wcs.yml @@ -10,7 +10,7 @@ services: - --scheme - http - --write-timeout=600s - image: semitechnologies/weaviate:1.18.2 + image: semitechnologies/weaviate:1.19.0 ports: - 8085:8085 restart: on-failure:0 diff --git a/ci/docker-compose.yml b/ci/docker-compose.yml index eee1c5ef..bd2e73f6 100644 --- a/ci/docker-compose.yml +++ b/ci/docker-compose.yml @@ -2,7 +2,7 @@ version: '3.4' services: weaviate: - image: semitechnologies/weaviate:1.18.2 + image: semitechnologies/weaviate:1.19.0 restart: on-failure:0 ports: - "8080:8080" @@ -17,7 +17,7 @@ services: CLUSTER_GOSSIP_BIND_PORT: "7100" CLUSTER_DATA_BIND_PORT: "7101" contextionary: - image: semitechnologies/contextionary:en0.16.0-v1.2.0 + image: semitechnologies/contextionary:en0.16.0-v1.2.1 ports: - "9999:9999" environment: diff --git a/src/batch/journey.test.ts b/src/batch/journey.test.ts index bf68c3c1..25fadc9c 100644 --- a/src/batch/journey.test.ts +++ b/src/batch/journey.test.ts @@ -359,7 +359,7 @@ describe('batch deleting', () => { .withClassName(thingClassName) .withWhere({ operator: 'Equal', - valueString: 'bar1', + valueText: 'bar1', path: ['stringProp'], }) .withDryRun(true) @@ -373,7 +373,7 @@ describe('batch deleting', () => { where: { operands: null, operator: 'Equal', - valueString: 'bar1', + valueText: 'bar1', path: ['stringProp'], }, }); @@ -397,7 +397,7 @@ describe('batch deleting', () => { .withClassName(otherThingClassName) .withWhere({ operator: 'Like', - valueString: 'foo3', + valueText: 'foo3', path: ['stringProp'], }) .withDryRun(true) @@ -411,7 +411,7 @@ describe('batch deleting', () => { where: { operands: null, operator: 'Like', - valueString: 'foo3', + valueText: 'foo3', path: ['stringProp'], }, }); @@ -430,7 +430,7 @@ describe('batch deleting', () => { .withClassName(otherThingClassName) .withWhere({ operator: 'Equal', - valueString: 'doesNotExist', + valueText: 'doesNotExist', path: ['stringProp'], }) .do() @@ -442,7 +442,7 @@ describe('batch deleting', () => { where: { operands: null, operator: 'Equal', - valueString: 'doesNotExist', + valueText: 'doesNotExist', path: ['stringProp'], }, }); @@ -462,7 +462,7 @@ describe('batch deleting', () => { .withClassName(otherThingClassName) .withWhere({ operator: 'LessThan', - valueString: inAMinute, + valueText: inAMinute, path: ['_creationTimeUnix'], }) .withOutput('verbose') @@ -476,7 +476,7 @@ describe('batch deleting', () => { where: { operands: null, operator: 'LessThan', - valueString: inAMinute, + valueText: inAMinute, path: ['_creationTimeUnix'], }, }); diff --git a/src/cluster/journey.test.ts b/src/cluster/journey.test.ts index 5c359c78..3e793c22 100644 --- a/src/cluster/journey.test.ts +++ b/src/cluster/journey.test.ts @@ -7,8 +7,8 @@ const { SOUP_CLASS_NAME, } = require('../utils/testData'); -const EXPECTED_WEAVIATE_VERSION = '1.18.2'; -const EXPECTED_WEAVIATE_GIT_HASH = '723b88a'; +const EXPECTED_WEAVIATE_VERSION = '1.19.0'; +const EXPECTED_WEAVIATE_GIT_HASH = '48456a1'; describe('cluster nodes endpoint', () => { const client = weaviate.client({ diff --git a/src/data/journey.test.ts b/src/data/journey.test.ts index ac3ab2c7..dd5a9d64 100644 --- a/src/data/journey.test.ts +++ b/src/data/journey.test.ts @@ -45,7 +45,7 @@ describe('data', () => { .do() .catch((e: Error) => { expect(e.message).toEqual( - `usage error (422): {"error":[{"message":"invalid object: invalid string property 'stringProp' on class 'DataJourneyTestThing': not a string, but json.Number"}]}` + `usage error (422): {"error":[{"message":"invalid object: invalid text property 'stringProp' on class 'DataJourneyTestThing': not a string, but json.Number"}]}` ); }); }); diff --git a/src/graphql/getter.test.ts b/src/graphql/getter.test.ts index 64f5312c..49b7d0f7 100644 --- a/src/graphql/getter.test.ts +++ b/src/graphql/getter.test.ts @@ -119,6 +119,23 @@ describe('where filters', () => { query: jest.fn(), }; + const expectedQuery = `{Get{Person(where:{operator:Equal,valueText:"John Doe",path:["name"]}){name}}}`; + const where: WhereFilter = { + operator: 'Equal', + valueText: 'John Doe', + path: ['name'], + }; + + new Getter(mockClient).withClassName('Person').withFields('name').withWhere(where).do(); + + expect(mockClient.query).toHaveBeenCalledWith(expectedQuery); + }); + + test('a query with a deprecated valueString', () => { + const mockClient: any = { + query: jest.fn(), + }; + const expectedQuery = `{Get{Person(where:{operator:Equal,valueString:"John Doe",path:["name"]}){name}}}`; const where: WhereFilter = { operator: 'Equal', @@ -1243,9 +1260,9 @@ describe('generative search', () => { new Getter(mockClient) .withClassName('Mammal') .withGenerate({ - singlePrompt: `Which mammals -can survive -in Antarctica?`, + singlePrompt: `Which mammals + can survive + in Antarctica?`, }) .withFields('name taxonomy') .do(); @@ -1278,14 +1295,14 @@ in Antarctica?`, .withClassName('Mammal') .withFields('name taxonomy') .withGenerate({ - groupedTask: `Tell -me -about -how -polar -bears -keep -warm`, + groupedTask: `Tell + me + about + how + polar + bears + keep + warm`, }) .do(); @@ -1311,3 +1328,22 @@ warm`, expect(mockClient.query).toHaveBeenCalledWith(expectedQuery); }); }); + +describe('groupBy valid searchers', () => { + const mockClient: any = { + query: jest.fn(), + }; + + test('valid groupBy', () => { + const groupByQuery = `(groupBy:{path:["property"],groups:2,objectsPerGroup:3})`; + const expectedQuery = `{Get{Person` + groupByQuery + `{name}}}`; + + new Getter(mockClient) + .withClassName('Person') + .withFields('name') + .withGroupBy({ path: ['property'], groups: 2, objectsPerGroup: 3 }) + .do(); + + expect(mockClient.query).toHaveBeenCalledWith(expectedQuery); + }); +}); diff --git a/src/graphql/getter.ts b/src/graphql/getter.ts index c1de0ebc..c6416abc 100644 --- a/src/graphql/getter.ts +++ b/src/graphql/getter.ts @@ -13,6 +13,7 @@ import { CommandBase } from '../validation/commandBase'; import { WhereFilter } from '../openapi/types'; import { GenerateArgs, GraphQLGenerate } from './generate'; import { ConsistencyLevel } from '../data'; +import GroupBy, { GroupByArgs } from './groupBy'; export default class GraphQLGetter extends CommandBase { private after?: string; @@ -33,6 +34,7 @@ export default class GraphQLGetter extends CommandBase { private whereString?: string; private generateString?: string; private consistencyLevel?: ConsistencyLevel; + private groupByString?: string; constructor(client: Connection) { super(client); @@ -183,6 +185,15 @@ export default class GraphQLGetter extends CommandBase { return this; }; + withGroupBy = (args: GroupByArgs) => { + try { + this.groupByString = new GroupBy(args).toString(); + } catch (e: any) { + this.addError(e.toString()); + } + return this; + }; + validateIsSet = (prop: string | undefined | null, name: string, setter: string) => { if (prop == undefined || prop == null || prop.length == 0) { this.addError(`${name} must be set - set with ${setter}`); @@ -267,6 +278,10 @@ export default class GraphQLGetter extends CommandBase { args = [...args, `consistencyLevel:${this.consistencyLevel}`]; } + if (this.groupByString) { + args = [...args, `groupBy:${this.groupByString}`]; + } + if (args.length > 0) { params = `(${args.join(',')})`; } diff --git a/src/graphql/groupBy.ts b/src/graphql/groupBy.ts new file mode 100644 index 00000000..8b4696d0 --- /dev/null +++ b/src/graphql/groupBy.ts @@ -0,0 +1,31 @@ +export interface GroupByArgs { + path: string[]; + groups: number; + objectsPerGroup: number; +} + +export default class GraphQLGroupBy { + private args: GroupByArgs; + + constructor(args: GroupByArgs) { + this.args = args; + } + + toString() { + let parts: string[] = []; + + if (this.args.path) { + parts = [...parts, `path:${JSON.stringify(this.args.path)}`]; + } + + if (this.args.groups) { + parts = [...parts, `groups:${this.args.groups}`]; + } + + if (this.args.objectsPerGroup) { + parts = [...parts, `objectsPerGroup:${this.args.objectsPerGroup}`]; + } + + return `{${parts.join(',')}}`; + } +} diff --git a/src/graphql/journey.test.ts b/src/graphql/journey.test.ts index 270cb90c..1408be7d 100644 --- a/src/graphql/journey.test.ts +++ b/src/graphql/journey.test.ts @@ -1,4 +1,4 @@ -import weaviate, { WeaviateClient } from '..'; +import weaviate, { Reference, WeaviateClient, WeaviateError, WeaviateObject } from '..'; describe('the graphql journey', () => { let client: WeaviateClient; @@ -481,7 +481,7 @@ describe('the graphql journey', () => { .withClassName('Article') .withWhere({ path: ['title'], - valueString: 'apple', + valueText: 'apple', operator: 'Equal', }) .withLimit(10) @@ -757,7 +757,7 @@ describe('the graphql journey', () => { .withWhere({ operator: 'Equal', path: ['_id'], - valueString: 'abefd256-8574-442b-9293-9205193737e0', + valueText: 'abefd256-8574-442b-9293-9205193737e0', }) .withFields('meta { count }') .do() @@ -819,7 +819,7 @@ describe('the graphql journey', () => { .withWhere({ operator: 'Equal', path: ['_id'], - valueString: 'abefd256-8574-442b-9293-9205193737e0', + valueText: 'abefd256-8574-442b-9293-9205193737e0', }) .withFields('meta { count }') .do() @@ -843,7 +843,7 @@ describe('the graphql journey', () => { .withWhere({ operator: 'Equal', path: ['_id'], - valueString: 'abefd256-8574-442b-9293-9205193737e0', + valueText: 'abefd256-8574-442b-9293-9205193737e0', }) .withFields('meta { count }') .do() @@ -867,7 +867,7 @@ describe('the graphql journey', () => { .withWhere({ operator: 'Equal', path: ['_id'], - valueString: 'abefd256-8574-442b-9293-9205193737e0', + valueText: 'abefd256-8574-442b-9293-9205193737e0', }) .withFields('meta { count }') .do() @@ -888,7 +888,7 @@ describe('the graphql journey', () => { .withWhere({ operator: 'Equal', path: ['_id'], - valueString: 'abefd256-8574-442b-9293-9205193737e0', + valueText: 'abefd256-8574-442b-9293-9205193737e0', }) .withFields('meta { count }') .do() @@ -909,7 +909,7 @@ describe('the graphql journey', () => { .withWhere({ operator: 'Equal', path: ['_id'], - valueString: 'abefd256-8574-442b-9293-9205193737e0', + valueText: 'abefd256-8574-442b-9293-9205193737e0', }) .withFields('meta { count }') .do() @@ -1127,7 +1127,7 @@ describe('the graphql journey', () => { .withWhere({ path: ['_creationTimeUnix'], operator: 'Equal', - valueString: expected.data.Get.Article[0]._additional.creationTimeUnix, + valueText: expected.data.Get.Article[0]._additional.creationTimeUnix, }) .do() .then((res: any) => { @@ -1156,7 +1156,7 @@ describe('the graphql journey', () => { .withWhere({ path: ['_lastUpdateTimeUnix'], operator: 'Equal', - valueString: expected.data.Get.Article[0]._additional.lastUpdateTimeUnix, + valueText: expected.data.Get.Article[0]._additional.lastUpdateTimeUnix, }) .do() .then((res: any) => { @@ -1344,6 +1344,97 @@ describe('query cluster with consistency level', () => { }); }); +describe('query with group by', () => { + let client: WeaviateClient; + + beforeEach(() => { + client = weaviate.client({ + scheme: 'http', + host: 'localhost:8080', + }); + }); + + it('creates Document Passage schema classes', () => { + // this is just test setup, not part of what we want to test here + return setupGroupBy(client); + }); + + test('should return 3 groups', async () => { + interface GroupHit { + passageIds: string[]; + ofDocumentId: string; + } + const hits = 'hits{ofDocument{... on Document{_additional{id}}} _additional{id distance}}'; + const group = `group{id groupedBy{value path} count maxDistance minDistance ${hits}}`; + const _additional = `_additional{${group}}`; + const expectedGroupHits1: GroupHit = { + passageIds: [ + '00000000-0000-0000-0000-000000000001', + '00000000-0000-0000-0000-000000000009', + '00000000-0000-0000-0000-000000000007', + '00000000-0000-0000-0000-000000000008', + '00000000-0000-0000-0000-000000000006', + '00000000-0000-0000-0000-000000000010', + '00000000-0000-0000-0000-000000000005', + '00000000-0000-0000-0000-000000000004', + '00000000-0000-0000-0000-000000000003', + '00000000-0000-0000-0000-000000000002', + ], + ofDocumentId: '00000000-0000-0000-0000-00000000000a', + }; + const expectedGroupHits2: GroupHit = { + passageIds: [ + '00000000-0000-0000-0000-000000000011', + '00000000-0000-0000-0000-000000000013', + '00000000-0000-0000-0000-000000000012', + '00000000-0000-0000-0000-000000000014', + ], + ofDocumentId: '00000000-0000-0000-0000-00000000000b', + }; + const expectedGroupHits: GroupHit[] = [expectedGroupHits1, expectedGroupHits2]; + + await client.graphql + .get() + .withClassName('Passage') + .withGroupBy({ path: ['ofDocument'], groups: 3, objectsPerGroup: 10 }) + .withNearObject({ id: '00000000-0000-0000-0000-000000000001' }) + .withFields(_additional) + .do() + .then((res: any) => { + expect(res.data.Get.Passage).toHaveLength(3); + expect(res.data.Get.Passage[0]._additional.group.hits).toHaveLength(10); + expect(res.data.Get.Passage[1]._additional.group.hits).toHaveLength(4); + expect(res.data.Get.Passage[2]._additional.group.hits).toHaveLength(6); + for (let i = 0; i < 3; i++) { + expect(res.data.Get.Passage[i]._additional.group).toBeDefined(); + expect(res.data.Get.Passage[i]._additional.group.minDistance).toBe( + res.data.Get.Passage[i]._additional.group.hits[0]._additional.distance + ); + expect(res.data.Get.Passage[i]._additional.group.maxDistance).toBe( + res.data.Get.Passage[i]._additional.group.hits[ + res.data.Get.Passage[i]._additional.group.hits.length - 1 + ]._additional.distance + ); + } + for (let i = 0; i < 2; i++) { + const expectedResults = expectedGroupHits[i]; + const hits = res.data.Get.Passage[i]._additional.group.hits; + for (let j = 0; j < hits.length; j++) { + expect(hits[j]._additional.id).toBe(expectedResults.passageIds[j]); + expect(hits[j].ofDocument[0]._additional.id).toBe(expectedResults.ofDocumentId); + } + } + }); + }); + + it('tears down Document Passage schema', () => { + return Promise.all([ + client.schema.classDeleter().withClassName('Passage').do(), + client.schema.classDeleter().withClassName('Document').do(), + ]); + }); +}); + const setup = async (client: WeaviateClient) => { const thing = { class: 'Article', @@ -1474,3 +1565,127 @@ const setupReplicated = async (client: WeaviateClient) => { await batch.do(); return new Promise((resolve) => setTimeout(resolve, 1000)); }; + +const setupGroupBy = async (client: WeaviateClient) => { + const document = { + class: 'Document', + invertedIndexConfig: { indexTimestamps: true }, + properties: [ + { + name: 'title', + dataType: ['text'], + }, + ], + }; + + const passage = { + class: 'Passage', + invertedIndexConfig: { indexTimestamps: true }, + properties: [ + { + name: 'content', + dataType: ['text'], + }, + { + name: 'type', + dataType: ['text'], + }, + { + name: 'ofDocument', + dataType: ['Document'], + }, + ], + }; + + await Promise.all([client.schema.classCreator().withClass(document).do()]); + await Promise.all([client.schema.classCreator().withClass(passage).do()]); + + // document, passage uuids + const documentIds: string[] = [ + '00000000-0000-0000-0000-00000000000a', + '00000000-0000-0000-0000-00000000000b', + '00000000-0000-0000-0000-00000000000c', + '00000000-0000-0000-0000-00000000000d', + ]; + + const passageIds: string[] = [ + '00000000-0000-0000-0000-000000000001', + '00000000-0000-0000-0000-000000000002', + '00000000-0000-0000-0000-000000000003', + '00000000-0000-0000-0000-000000000004', + '00000000-0000-0000-0000-000000000005', + '00000000-0000-0000-0000-000000000006', + '00000000-0000-0000-0000-000000000007', + '00000000-0000-0000-0000-000000000008', + '00000000-0000-0000-0000-000000000009', + '00000000-0000-0000-0000-000000000010', + '00000000-0000-0000-0000-000000000011', + '00000000-0000-0000-0000-000000000012', + '00000000-0000-0000-0000-000000000013', + '00000000-0000-0000-0000-000000000014', + '00000000-0000-0000-0000-000000000015', + '00000000-0000-0000-0000-000000000016', + '00000000-0000-0000-0000-000000000017', + '00000000-0000-0000-0000-000000000018', + '00000000-0000-0000-0000-000000000019', + '00000000-0000-0000-0000-000000000020', + ]; + + const documents: WeaviateObject[] = []; + for (let i = 0; i < documentIds.length; i++) { + documents.push({ + id: documentIds[i], + class: 'Document', + properties: { + title: `Title of the document ${i}`, + }, + }); + } + + const passages: WeaviateObject[] = []; + for (let i = 0; i < passageIds.length; i++) { + passages.push({ + id: passageIds[i], + class: 'Passage', + properties: { + content: `Passage content ${i}`, + type: 'document-passage', + }, + }); + } + + let batch = client.batch.objectsBatcher(); + [...documents, ...passages].forEach((elem) => { + batch = batch.withObject(elem); + }); + await batch.do(); + + const createReferences = ( + client: WeaviateClient, + document: WeaviateObject, + passages: WeaviateObject[] + ): void => { + const ref: Reference = client.data + .referencePayloadBuilder() + .withId(document.id!) + .withClassName(document.class!) + .payload(); + for (const passage of passages) { + client.data + .referenceCreator() + .withId(passage.id!) + .withClassName(passage.class!) + .withReferenceProperty('ofDocument') + .withReference(ref) + .do() + .catch((e: WeaviateError) => { + throw new Error('it should not have errord: ' + e); + }); + } + }; + + createReferences(client, documents[0], passages.slice(0, 10)); + createReferences(client, documents[1], passages.slice(10, 14)); + + return new Promise((resolve) => setTimeout(resolve, 1000)); +}; diff --git a/src/openapi/schema.ts b/src/openapi/schema.ts index 4285e5e9..21adf321 100644 --- a/src/openapi/schema.ts +++ b/src/openapi/schema.ts @@ -202,7 +202,7 @@ export interface definitions { * @example foobarium */ concept?: string; - /** @description A list of space-delimited words or a sentence describing what the custom concept is about. Avoid using the custom concept itself. An Example definition for the custom concept 'foobarium': would be 'a naturally occourring element which can only be seen by programmers' */ + /** @description A list of space-delimited words or a sentence describing what the custom concept is about. Avoid using the custom concept itself. An Example definition for the custom concept 'foobarium': would be 'a naturally occurring element which can only be seen by programmers' */ definition?: string; /** * Format: float @@ -451,13 +451,17 @@ export interface definitions { moduleConfig?: { [key: string]: unknown }; /** @description Name of the property as URI relative to the schema URL. */ name?: string; - /** @description Optional. Should this property be indexed in the inverted index. Defaults to true. If you choose false, you will not be able to use this property in where filters. This property has no affect on vectorization decisions done by modules */ + /** @description Optional. Should this property be indexed in the inverted index. Defaults to true. If you choose false, you will not be able to use this property in where filters, bm25 or hybrid search. This property has no affect on vectorization decisions done by modules (deprecated as of v1.19; use indexFilterable or/and indexSearchable instead) */ indexInverted?: boolean; + /** @description Optional. Should this property be indexed in the inverted index. Defaults to true. If you choose false, you will not be able to use this property in where filters. This property has no affect on vectorization decisions done by modules */ + indexFilterable?: boolean; + /** @description Optional. Should this property be indexed in the inverted index. Defaults to true. Applicable only to properties of data type text and text[]. If you choose false, you will not be able to use this property in bm25 or hybrid search. This property has no affect on vectorization decisions done by modules */ + indexSearchable?: boolean; /** - * @description Determines tokenization of the property as separate words or whole field. Optional. Applies to string, string[], text and text[] data types. Allowed values are `word` (default) and `field` for string and string[], `word` (default) for text and text[]. Not supported for remaining data types + * @description Determines tokenization of the property as separate words or whole field. Optional. Applies to text and text[] data types. Allowed values are `word` (default; splits on any non-alphanumerical, lowercases), `lowercase` (splits on white spaces, lowercases), `whitespace` (splits on white spaces), `field` (trims). Not supported for remaining data types * @enum {string} */ - tokenization?: 'word' | 'field'; + tokenization?: 'word' | 'lowercase' | 'whitespace' | 'field'; }; /** @description The status of all the shards of a Class */ ShardStatusList: definitions['ShardStatusGetResponse'][]; @@ -651,7 +655,7 @@ export interface definitions { winningCount?: number; /** * Format: int64 - * @description size of the losing group, can be 0 if the winning group size euqals k + * @description size of the losing group, can be 0 if the winning group size equals k */ losingCount?: number; /** @@ -1017,12 +1021,12 @@ export interface definitions { */ valueBoolean?: boolean; /** - * @description value as string + * @description value as text (deprecated as of v1.19; alias for valueText) * @example my search term */ valueString?: string; /** - * @description value as text (on text props) + * @description value as text * @example my search term */ valueText?: string; diff --git a/src/schema/journey.test.ts b/src/schema/journey.test.ts index bb597c62..5c2d6c70 100644 --- a/src/schema/journey.test.ts +++ b/src/schema/journey.test.ts @@ -33,9 +33,11 @@ describe('schema', () => { it('extends the thing class with a new property', () => { const className = 'MyThingClass'; const prop: Property = { - dataType: ['string'], + dataType: ['text'], name: 'anotherProp', tokenization: 'field', + indexFilterable: true, + indexSearchable: true, moduleConfig: { 'text2vec-contextionary': { skip: false, @@ -54,32 +56,6 @@ describe('schema', () => { }); }); - it('fails to extend the thing class with property having not supported tokenization (1)', () => { - const className = 'MyThingClass'; - const prop: Property = { - dataType: ['text'], - name: 'yetAnotherProp', - tokenization: 'field', - moduleConfig: { - 'text2vec-contextionary': { - skip: false, - vectorizePropertyName: false, - }, - }, - }; - - return client.schema - .propertyCreator() - .withClassName(className) - .withProperty(prop) - .do() - .catch((err: Error) => { - expect(err.message).toEqual( - 'usage error (422): {"error":[{"message":"Tokenization \'field\' is not allowed for data type \'text\'"}]}' - ); - }); - }); - it('fails to extend the thing class with property having not supported tokenization (2)', () => { const className = 'MyThingClass'; const prop: Property = { @@ -101,7 +77,7 @@ describe('schema', () => { .do() .catch((err: Error) => { expect(err.message).toEqual( - 'usage error (422): {"error":[{"message":"Tokenization \'word\' is not allowed for data type \'int[]\'"}]}' + 'usage error (422): {"error":[{"message":"Tokenization is not allowed for data type \'int[]\'"}]}' ); }); }); @@ -117,9 +93,11 @@ describe('schema', () => { class: 'MyThingClass', properties: [ { - dataType: ['string'], + dataType: ['text'], name: 'stringProp', tokenization: 'word', + indexFilterable: true, + indexSearchable: true, moduleConfig: { 'text2vec-contextionary': { skip: false, @@ -128,9 +106,11 @@ describe('schema', () => { }, }, { - dataType: ['string'], + dataType: ['text'], name: 'anotherProp', tokenization: 'field', + indexFilterable: true, + indexSearchable: true, moduleConfig: { 'text2vec-contextionary': { skip: false, @@ -398,14 +378,257 @@ describe('schema', () => { }); }); +describe('property setting defaults and migrations', () => { + const client = weaviate.client({ + scheme: 'http', + host: 'localhost:8080', + }); + + test.each([ + ['text', null, 'text', 'word'], + ['text', '', 'text', 'word'], + ['text', 'word', 'text', 'word'], + ['text', 'lowercase', 'text', 'lowercase'], + ['text', 'whitespace', 'text', 'whitespace'], + ['text', 'field', 'text', 'field'], + + ['text[]', null, 'text[]', 'word'], + ['text[]', '', 'text[]', 'word'], + ['text[]', 'word', 'text[]', 'word'], + ['text[]', 'lowercase', 'text[]', 'lowercase'], + ['text[]', 'whitespace', 'text[]', 'whitespace'], + ['text[]', 'field', 'text[]', 'field'], + + ['string', null, 'text', 'whitespace'], + ['string', '', 'text', 'whitespace'], + ['string', 'word', 'text', 'whitespace'], + ['string', 'field', 'text', 'field'], + + ['string[]', null, 'text[]', 'whitespace'], + ['string[]', '', 'text[]', 'whitespace'], + ['string[]', 'word', 'text[]', 'whitespace'], + ['string[]', 'field', 'text[]', 'field'], + + ['int', null, 'int', null], + ['int', '', 'int', null], + + ['int[]', null, 'int[]', null], + ['int[]', '', 'int[]', null], + ])( + 'succeeds creating prop with data type and tokenization', + async ( + dataType: string, + tokenization: string | null, + expectedDataType: string, + expectedTokenization: string | null + ) => { + await client.schema + .classCreator() + .withClass({ + class: 'SomeClass', + properties: [ + { + dataType: [dataType], + name: 'property', + tokenization: tokenization, + }, + ], + }) + .do() + .then((res: WeaviateClass) => { + expect(res).toBeDefined(); + expect(res.properties).toHaveLength(1); + expect(res.properties![0]).toHaveProperty('dataType', [expectedDataType]); + if (expectedTokenization != null) { + expect(res.properties![0]).toHaveProperty('tokenization', expectedTokenization); + } else { + expect(res.properties![0]).not.toHaveProperty('tokenization'); + } + }); + + return deleteClass(client, 'SomeClass'); + } + ); + + test.each([ + ['string', 'whitespace'], + ['string', 'lowercase'], + + ['string[]', 'whitespace'], + ['string[]', 'lowercase'], + + ['int', 'word'], + ['int', 'whitespace'], + ['int', 'lowercase'], + ['int', 'field'], + + ['int[]', 'word'], + ['int[]', 'whitespace'], + ['int[]', 'lowercase'], + ['int[]', 'field'], + ])( + 'fails creating prop with data type and tokenization', + async (dataType: string, tokenization: string | null) => { + await client.schema + .classCreator() + .withClass({ + class: 'SomeClass', + properties: [ + { + dataType: [dataType], + name: 'property', + tokenization: tokenization, + }, + ], + }) + .do() + .catch((e: Error) => { + expect(e.message).toContain('is not allowed for data type'); + }); + } + ); + + test.each([ + ['text', null, null, null, true, true], + ['text', null, null, false, true, false], + ['text', null, null, true, true, true], + ['text', null, false, null, false, true], + ['text', null, false, false, false, false], + ['text', null, false, true, false, true], + ['text', null, true, null, true, true], + ['text', null, true, false, true, false], + ['text', null, true, true, true, true], + ['text', false, null, null, false, false], + ['text', true, null, null, true, true], + + ['int', null, null, null, true, false], + ['int', null, null, false, true, false], + ['int', null, false, null, false, false], + ['int', null, false, false, false, false], + ['int', null, true, null, true, false], + ['int', null, true, false, true, false], + ['int', false, null, null, false, false], + ['int', true, null, null, true, false], + ])( + 'succeeds creating prop with data type and indexing', + async ( + dataType: string, + inverted: boolean | null, + filterable: boolean | null, + searchable: boolean | null, + expectedFilterable: boolean, + expectedSearchable: boolean + ) => { + await client.schema + .classCreator() + .withClass({ + class: 'SomeClass', + properties: [ + { + dataType: [dataType], + name: 'property', + indexInverted: inverted, + indexFilterable: filterable, + indexSearchable: searchable, + }, + ], + }) + .do() + .then((res: WeaviateClass) => { + expect(res).toBeDefined(); + expect(res.properties).toHaveLength(1); + expect(res.properties![0]).toHaveProperty('indexFilterable', expectedFilterable); + expect(res.properties![0]).toHaveProperty('indexSearchable', expectedSearchable); + expect(res.properties![0]).not.toHaveProperty('indexInverted'); + }); + + return deleteClass(client, 'SomeClass'); + } + ); + + const errMsg1 = + '`indexInverted` is deprecated and can not be set together with `indexFilterable` or `indexSearchable`.'; + const errMsg2 = + '`indexSearchable` is allowed only for text/text[] data types. For other data types set false or leave empty'; + test.each([ + ['text', false, null, false, errMsg1], + ['text', false, null, true, errMsg1], + ['text', false, false, null, errMsg1], + ['text', false, false, false, errMsg1], + ['text', false, false, true, errMsg1], + ['text', false, true, null, errMsg1], + ['text', false, true, false, errMsg1], + ['text', false, true, true, errMsg1], + ['text', true, null, false, errMsg1], + ['text', true, null, true, errMsg1], + ['text', true, false, null, errMsg1], + ['text', true, false, false, errMsg1], + ['text', true, false, true, errMsg1], + ['text', true, true, null, errMsg1], + ['text', true, true, false, errMsg1], + ['text', true, true, true, errMsg1], + + ['int', false, null, false, errMsg1], + ['int', false, null, true, errMsg1], + ['int', false, false, null, errMsg1], + ['int', false, false, false, errMsg1], + ['int', false, false, true, errMsg1], + ['int', false, true, null, errMsg1], + ['int', false, true, false, errMsg1], + ['int', false, true, true, errMsg1], + ['int', true, null, false, errMsg1], + ['int', true, null, true, errMsg1], + ['int', true, false, null, errMsg1], + ['int', true, false, false, errMsg1], + ['int', true, false, true, errMsg1], + ['int', true, true, null, errMsg1], + ['int', true, true, false, errMsg1], + ['int', true, true, true, errMsg1], + + ['int', null, null, true, errMsg2], + ['int', null, false, true, errMsg2], + ['int', null, true, true, errMsg2], + ])( + 'fails creating prop with data type and indexing', + async ( + dataType: string, + inverted: boolean | null, + filterable: boolean | null, + searchable: boolean | null, + errMsg: string + ) => { + await client.schema + .classCreator() + .withClass({ + class: 'SomeClass', + properties: [ + { + dataType: [dataType], + name: 'property', + indexInverted: inverted, + indexFilterable: filterable, + indexSearchable: searchable, + }, + ], + }) + .do() + .catch((e: Error) => { + expect(e.message).toContain(errMsg); + }); + } + ); +}); + function newClassObject(className: string) { return { class: className, properties: [ { - dataType: ['string'], + dataType: ['text'], name: 'stringProp', tokenization: 'word', + indexFilterable: true, + indexSearchable: true, moduleConfig: { 'text2vec-contextionary': { skip: false,