diff --git a/.github/workflows/python-pytest.yml b/.github/workflows/python-pytest.yml index 5a38ea4d..a0ff7f32 100644 --- a/.github/workflows/python-pytest.yml +++ b/.github/workflows/python-pytest.yml @@ -49,4 +49,4 @@ jobs: API_KEY: ${{ secrets.API_KEY }} VI_USERNAME: ${{ secrets.VI_USERNAME }} VI_API_KEY: ${{ secrets.VI_API_KEY }} - run: python -m pytest ${{ matrix.test-path }} --use_client + run: python -m pytest ${{ matrix.test-path }} --use_client --reruns 5 --reruns-delay 1 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 4f8aefe2..a85ed90b 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -103,9 +103,15 @@ try to re-run the notebooks to ensure that your submission is not causing errors ### Style guide -For documentation strings, `Vector AI` follows the [google style](https://google.github.io/styleguide/pyguide.html). +For documentation strings, `Vector AI` aims to follows the [google style](https://google.github.io/styleguide/pyguide.html) as closely as possible. For internal attributes, we use _ in front of the name of the attribute. -We also prefer being as explicit as possible and try to avoid args and kwargs as much as we can. +However, we will not reject PRs if they do not follow this style. +### Running Automated API creation + +To run the automated APi creation, simply run: +``` +python utils/automate_api.py +``` #### This guide was inspired by Transformers [transformers guide to contributing](https://github.com/huggingface/transformers/blob/master/CONTRIBUTING.md) which was influenced by Scikit-learn [scikit-learn guide to contributing](https://github.com/scikit-learn/scikit-learn/blob/master/CONTRIBUTING.md). diff --git a/setup.py b/setup.py index c9b4c881..06cd79f7 100644 --- a/setup.py +++ b/setup.py @@ -6,8 +6,8 @@ core_req = ["requests", "numpy", "pandas", "appdirs>=1.4.4", "tqdm>=4.27.0", "plotly>=4.0.0"] extras_req = { - "dev" : ["twine", "black", "pytest", "pytest-cov", "vectorai"], - "test" : ["pytest", "pytest-cov"], + "dev" : ["twine", "black", "pytest", "pytest-cov", "vectorai", "openapi-to-sdk"], + "test" : ["pytest", "pytest-cov", "pytest-rerunfailures"], "docs" : ["sphinx-rtd-theme>=0.5.0", "nbsphinx>=0.7.1"] } extras_req["all"] = [p for r in extras_req.values() for p in r] diff --git a/tests/conftest.py b/tests/conftest.py index b2685034..a9f93ba5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,7 +3,7 @@ """ import pytest import os -from vectorai.client import ViClient, ViCollectionClient +from vectorai.client import ViClient from vectorai.analytics.client import ViAnalyticsClient from vectorai.models.deployed import ViText2Vec import random @@ -54,12 +54,12 @@ def test_client(test_username, test_api_key): def test_collection_name(): return "test_colour_col_" + str(get_random_string(3)) -@pytest.fixture -def test_collection_client(test_username, test_api_key, test_collection_name): - """Testing for the client login. - """ - client = ViCollectionClient(username=test_username, api_key=test_api_key, collection_name=test_collection_name) - return client +# @pytest.fixture +# def test_collection_client(test_username, test_api_key, test_collection_name): +# """Testing for the client login. +# """ +# client = ViCollectionClient(username=test_username, api_key=test_api_key, collection_name=test_collection_name) +# return client @pytest.fixture def test_analytics_client(test_username, test_api_key): diff --git a/tests/test_read.py b/tests/test_read.py index 14e8e975..aa21ec7d 100644 --- a/tests/test_read.py +++ b/tests/test_read.py @@ -34,7 +34,7 @@ def test_advanced_search_by_id(self, test_client, test_collection_name): ] results = test_client.advanced_search_by_id(test_collection_name, document_id=test_client.random_documents(test_collection_name)['documents'][0]['_id'], - fields={'color_vector_':1}, filters=filter_query) + search_fields={'color_vector_':1}, filters=filter_query) assert len(results) > 0 @pytest.mark.use_client @@ -102,6 +102,7 @@ def test_search_collections(test_client): cn = 'example_collection_123y8io' if cn not in test_client.list_collections(): test_client.create_collection(cn) + time.sleep(2) assert len(test_client.search_collections('123y8io')) > 0, "Not searching collections properly." test_client.delete_collection(cn) @@ -114,7 +115,7 @@ def test_random_recommendation_smoke_test(test_client, test_collection_name): time.sleep(2) results = test_client.random_recommendation( test_collection_name, - field='color_vector_') + search_field='color_vector_') assert len(results['results']) > 0, "Random recommendation fails." @pytest.mark.use_client diff --git a/tests/test_search.py b/tests/test_search.py index 41ffd1e0..bbba44d5 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -5,6 +5,7 @@ import pytest import time +@pytest.mark.skip(reason="Chunk Search being altered.") @pytest.mark.use_client def test_chunk_search(test_client, test_collection_name): if test_collection_name in test_client.list_collections(): @@ -13,7 +14,9 @@ def test_chunk_search(test_client, test_collection_name): test_client.create_sample_documents(10, include_chunks=True)) time.sleep(5) vec = np.random.rand(1, 30).tolist()[0] - results = test_client.chunk_search(test_collection_name, - vector=vec, - search_fields=['chunk.color_chunkvector_']) + results = test_client.chunk_search( + test_collection_name, + vector=vec, + search_fields=['chunk.color_chunkvector_'], + ) assert 'error' not in results.keys() diff --git a/tests/test_write.py b/tests/test_write.py index 2bfabc84..0d4e854f 100644 --- a/tests/test_write.py +++ b/tests/test_write.py @@ -54,7 +54,8 @@ def assert_json_serializable(document, temp_json_file="test.json"): class TestInsert: @pytest.mark.use_client - def test_insert_documents_simple_and_collection_stats_match(self, test_client, test_collection_name): + def test_insert_documents_simple_and_collection_stats_match(self, test_client, + test_collection_name): """ Testing for simple document insertion """ @@ -84,7 +85,8 @@ def test_inserting_documents_without_id_fields(self, test_client, test_collectio time.sleep(3) @pytest.mark.use_client - def test_inserting_documents_without_id_fields_with_overwrite(self, test_client, test_collection_name): + def test_inserting_documents_without_id_fields_with_overwrite(self, test_client, + test_collection_name): """ Test inserting documents if they do not have an ID field. """ @@ -117,7 +119,8 @@ def test_inserting_documents_when_id_is_not_a_string(self, test_client, test_col time.sleep(3) @pytest.mark.use_client - def test_inserting_documents_when_id_is_not_a_string_with_overwrite(self, test_client, test_collection_name): + def test_inserting_documents_when_id_is_not_a_string_with_overwrite(self, test_client, + test_collection_name): """ Test inserting documents when ID is not a string """ @@ -204,7 +207,7 @@ def test_edit_document(self, test_client, test_collection_name): collection_name=test_collection_name, edits=edits ) time.sleep(2) - doc = client.id(test_collection_name, document_id="1") + doc = client.id(collection_name=test_collection_name, document_id="1") assert doc["location"] == "Paris" @pytest.mark.use_client @@ -256,9 +259,9 @@ def test_edit_documents(self, test_client, test_collection_name): {"_id": "1", "location": "New York",}, ] test_client.edit_documents(test_collection_name, edits) - doc = test_client.id(test_collection_name, document_id="2") + doc = test_client.id(collection_name=test_collection_name, document_id="2") assert doc["location"] == "Sydney" - doc = test_client.id(test_collection_name, document_id="1") + doc = test_client.id(collection_name=test_collection_name, document_id="1") assert doc['location'] == 'New York' def test__write_document_nested_field(): @@ -350,37 +353,37 @@ def test_multiprocess_insert_with_error_with_overwrite(test_client, test_collect assert test_client.collection_stats(test_collection_name)['number_of_documents'] == NUM_OF_DOCUMENTS_INSERTED test_client.delete_collection(test_collection_name) -@pytest.mark.use_client -def test_multiprocess_with_collection_client(test_collection_client, test_collection_name): - NUM_OF_DOCUMENTS_INSERTED = 100 - if test_collection_client.collection_name in test_collection_client.list_collections(): - test_collection_client.delete_collection() - time.sleep(5) - documents = test_collection_client.create_sample_documents(NUM_OF_DOCUMENTS_INSERTED) - results = test_collection_client.insert_documents(documents, workers=5) - time.sleep(10) - assert len(results['failed_document_ids']) == 0 - assert test_collection_client.collection_name in test_collection_client.list_collections() - assert test_collection_client.collection_stats()['number_of_documents'] == NUM_OF_DOCUMENTS_INSERTED - test_collection_client.delete_collection() - -@pytest.mark.use_client -def test_multiprocess__with_error_with_collection_client(test_collection_client): - NUM_OF_DOCUMENTS_INSERTED = 100 - if test_collection_client.collection_name in test_collection_client.list_collections(): - test_collection_client.delete_collection() - time.sleep(5) - documents = test_collection_client.create_sample_documents(NUM_OF_DOCUMENTS_INSERTED) - documents.append({ - '_id': 9993, - 'color': np.nan - }) - # This should result in 1 failure - results = test_collection_client.insert_documents(documents, workers=5, overwrite=True) - time.sleep(10) - assert len(results['failed_document_ids']) == 1 - assert test_collection_client.collection_name in test_collection_client.list_collections() - assert test_collection_client.collection_stats()['number_of_documents'] == NUM_OF_DOCUMENTS_INSERTED +# @pytest.mark.use_client +# def test_multiprocess_with_collection_client(test_collection_client, test_collection_name): +# NUM_OF_DOCUMENTS_INSERTED = 100 +# if test_collection_client.collection_name in test_collection_client.list_collections(): +# test_collection_client.delete_collection() +# time.sleep(5) +# documents = test_collection_client.create_sample_documents(NUM_OF_DOCUMENTS_INSERTED) +# results = test_collection_client.insert_documents(documents, workers=5) +# time.sleep(10) +# assert len(results['failed_document_ids']) == 0 +# assert test_collection_client.collection_name in test_collection_client.list_collections() +# assert test_collection_client.collection_stats()['number_of_documents'] == NUM_OF_DOCUMENTS_INSERTED +# test_collection_client.delete_collection() + +# @pytest.mark.use_client +# def test_multiprocess__with_error_with_collection_client(test_collection_client): +# NUM_OF_DOCUMENTS_INSERTED = 100 +# if test_collection_client.collection_name in test_collection_client.list_collections(): +# test_collection_client.delete_collection() +# time.sleep(5) +# documents = test_collection_client.create_sample_documents(NUM_OF_DOCUMENTS_INSERTED) +# documents.append({ +# '_id': 9993, +# 'color': np.nan +# }) +# # This should result in 1 failure +# results = test_collection_client.insert_documents(documents, workers=5, overwrite=True) +# time.sleep(10) +# assert len(results['failed_document_ids']) == 1 +# assert test_collection_client.collection_name in test_collection_client.list_collections() +# assert test_collection_client.collection_stats()['number_of_documents'] == NUM_OF_DOCUMENTS_INSERTED @pytest.mark.use_client def test_multiprocess_with_overwrite(test_client, test_collection_name): @@ -409,21 +412,21 @@ def test_multiprocess_with_overwrite_insert(test_client, test_collection_name): @pytest.mark.use_client def test_multiprocess_overwrite(test_client, test_collection_name): if test_collection_name in test_client.list_collections(): - test_client.delete_collection() + test_client.delete_collection(test_collection_name) time.sleep(5) NUM_OF_DOCS = 100 docs = test_client.create_sample_documents(NUM_OF_DOCS) test_client.insert_documents(test_collection_name, docs[0:5], workers=1, overwrite=False) # For document with id '3' TEST_ID = '3' - id_document = test_client.id(test_collection_name, TEST_ID) + id_document = test_client.id(collection_name=test_collection_name, document_id=TEST_ID) test_client.set_field('test.field', id_document, 'stranger') docs[3] = id_document print(docs[3]) docs[3].update({'_id': '3'}) response = test_client.insert_documents(test_collection_name, docs[3:5], workers=1, overwrite=True) - id_document = test_client.id(test_collection_name, TEST_ID) + id_document = test_client.id(collection_name=test_collection_name, document_id=TEST_ID) assert test_client.get_field('test.field', id_document) == 'stranger' time.sleep(5) test_client.delete_collection(test_collection_name) @@ -438,57 +441,57 @@ def test_multiprocess_not_overwrite(test_client, test_collection_name): test_client.insert_documents(test_collection_name, docs[0:5], workers=1, overwrite=False) # For document with id '3' TEST_ID = '3' - id_document = test_client.id(test_collection_name, TEST_ID) + id_document = test_client.id(collection_name=test_collection_name, document_id=TEST_ID) test_client.set_field('test.field', id_document, 'stranger') docs[3] = id_document docs[3].update({'_id': '3'}) response = test_client.insert_documents(test_collection_name, docs[3:5], workers=1, overwrite=False) - id_document = test_client.id(test_collection_name, TEST_ID) + id_document = test_client.id(collection_name=test_collection_name, document_id=TEST_ID) with pytest.raises(MissingFieldError): test_client.get_field('test.field', id_document) time.sleep(5) test_client.delete_collection(test_collection_name) -@pytest.mark.use_client -def test_multiprocess_overwrite_collection_client(test_collection_client, test_collection_name): - if test_collection_client.collection_name in test_collection_client.list_collections(): - test_collection_client.delete_collection() - time.sleep(5) - NUM_OF_DOCS = 10 - docs = test_collection_client.create_sample_documents(NUM_OF_DOCS) - test_collection_client.insert_documents(docs[0:5], workers=1, overwrite=False) - # For document with id '3' - TEST_ID = '3' - id_document = test_collection_client.id(TEST_ID) - test_collection_client.set_field('test.field', id_document, 'stranger') - docs[3] = id_document - docs[3].update({'_id': '3'}) - response = test_collection_client.insert_documents(docs[3:5], workers=1, - overwrite=True) - id_document = test_collection_client.id(TEST_ID) - assert test_collection_client.get_field('test.field', id_document) == 'stranger' - time.sleep(5) - test_collection_client.delete_collection() - -@pytest.mark.use_client -def test_multiprocess_not_overwrite_collection_client(test_collection_client, test_collection_name): - NUM_OF_DOCS = 10 - docs = test_collection_client.create_sample_documents(NUM_OF_DOCS) - test_collection_client.insert_documents(docs[0:5], workers=1, overwrite=False) - # For document with id '3' - TEST_ID = '3' - id_document = test_collection_client.id(TEST_ID) - test_collection_client.set_field('test.field', id_document, 'stranger') - docs[3] = id_document - docs[3].update({'_id': '3'}) - response = test_collection_client.insert_documents(docs[3:5], workers=1, - overwrite=False) - id_document = test_collection_client.id(TEST_ID) - with pytest.raises(MissingFieldError): - test_collection_client.get_field('test.field', id_document) - time.sleep(5) - test_collection_client.delete_collection() +# @pytest.mark.use_client +# def test_multiprocess_overwrite_collection_client(test_collection_client, test_collection_name): +# if test_collection_client.collection_name in test_collection_client.list_collections(): +# test_collection_client.delete_collection() +# time.sleep(5) +# NUM_OF_DOCS = 10 +# docs = test_collection_client.create_sample_documents(NUM_OF_DOCS) +# test_collection_client.insert_documents(docs[0:5], workers=1, overwrite=False) +# # For document with id '3' +# TEST_ID = '3' +# id_document = test_collection_client.id(document_id=TEST_ID) +# test_collection_client.set_field('test.field', id_document, 'stranger') +# docs[3] = id_document +# docs[3].update({'_id': '3'}) +# response = test_collection_client.insert_documents(docs[3:5], workers=1, +# overwrite=True) +# id_document = test_collection_client.id(document_id=TEST_ID) +# assert test_collection_client.get_field('test.field', id_document) == 'stranger' +# time.sleep(5) +# test_collection_client.delete_collection() + +# @pytest.mark.use_client +# def test_multiprocess_not_overwrite_collection_client(test_collection_client, test_collection_name): +# NUM_OF_DOCS = 10 +# docs = test_collection_client.create_sample_documents(NUM_OF_DOCS) +# test_collection_client.insert_documents(docs[0:5], workers=1, overwrite=False) +# # For document with id '3' +# TEST_ID = '3' +# id_document = test_collection_client.id(document_id=TEST_ID) +# test_collection_client.set_field('test.field', id_document, 'stranger') +# docs[3] = id_document +# docs[3].update({'_id': '3'}) +# response = test_collection_client.insert_documents(docs[3:5], workers=1, +# overwrite=False) +# id_document = test_collection_client.id(document_id=TEST_ID) +# with pytest.raises(MissingFieldError): +# test_collection_client.get_field('test.field', id_document) +# time.sleep(5) +# test_collection_client.delete_collection() def test_dummy_vector(test_client): """ diff --git a/tests/utils.py b/tests/utils.py index 4e67706d..17a96dbe 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,15 +1,15 @@ import time import random import string -from vectorai import ViClient, ViCollectionClient +from vectorai import ViClient class TempClient: def __init__(self, client, collection_name: str=None): self.client = client if isinstance(client, ViClient): self.collection_name = collection_name - elif isinstance(client, ViCollectionClient): - self.collection_name = self.client.collection_name + # elif isinstance(client, ViCollectionClient): + # self.collection_name = self.client.collection_name def teardown_collection(self): if self.collection_name in self.client.list_collections(): @@ -40,6 +40,9 @@ def __init__(self, client, collection_name: str=None, num_of_docs: int=10): self.collection_name = collection_name self.client.collection_name = collection_name self.num_of_docs = num_of_docs + self.teardown_collection() + self.client.insert_documents(self.collection_name, + self.client.create_sample_documents(self.num_of_docs)) def generate_random_collection_name(self): return self.generate_random_string(20) @@ -49,7 +52,7 @@ def generate_random_string(self, num_of_letters): return ''.join(random.choice(letters) for i in range(num_of_letters)) def __enter__(self): - self.teardown_collection() - self.client.insert_documents(self.collection_name, - self.client.create_sample_documents(self.num_of_docs)) + # self.teardown_collection() + # self.client.insert_documents(self.collection_name, + # self.client.create_sample_documents(self.num_of_docs)) return self.client diff --git a/utils/automate_api.py b/utils/automate_api.py new file mode 100644 index 00000000..d890dff7 --- /dev/null +++ b/utils/automate_api.py @@ -0,0 +1,28 @@ +if __name__=="__main__": + import os + from vectorai.api import ViAPIClient + from openapi_to_sdk.sdk_automation import PythonSDKBuilder + sdk = PythonSDKBuilder( + url="https://api.vctr.ai", + inherited_properties=['username', 'api_key'], + decorators=[ + 'retry()', + "return_curl_or_response('json')"], + override_param_defaults={'min_score': None, 'cursor': None}, + internal_functions=[ + "list_collections", + "create_collection", + "search", + "delete_collection", + "edit_document", + "create_collection_from_document" + ], + ) + sdk.to_python_file( + class_name="ViAPIClient", + filename='vectorai/api/api.py', + import_strings=['import requests', 'from vectorai.api.utils import retry, return_curl_or_response'], + include_response_parsing=False, + ) + vi = ViAPIClient(os.environ['VI_USERNAME'], os.environ['VI_API_KEY']) + print(vi._list_collections()) \ No newline at end of file diff --git a/vectorai/analytics/api/comparator.py b/vectorai/analytics/api/comparator.py index 152a6708..a77fb889 100644 --- a/vectorai/analytics/api/comparator.py +++ b/vectorai/analytics/api/comparator.py @@ -10,6 +10,8 @@ def __init__(self, username: str=None, api_key: str=None, self.url = url self.analytics_url = analytics_url + + @return_curl_or_response('content') @retry() def _compare_ranks( self, @@ -58,7 +60,6 @@ def _compare_ranks( "colors": colors, } params.update(kwargs) - response = requests.post( + return requests.post( url= f"{self.analytics_url}/comparator/compare_ranks/", json=params) - return return_curl_or_response(response, 'content', return_curl=return_curl) diff --git a/vectorai/analytics/comparator.py b/vectorai/analytics/comparator.py index cb096b01..9a570ca5 100644 --- a/vectorai/analytics/comparator.py +++ b/vectorai/analytics/comparator.py @@ -82,11 +82,11 @@ def compare_search( ranked_list_1 = self.search( collection_name, vector=vector, - field=vector_fields[0]) + search_field=vector_fields[0]) ranked_list_2 = self.search_by_id( collection_name, vector=vector, - field=vector_fields[1]) + search_field=vector_fields[1]) return self.compare_ranks( ranked_list_1, ranked_list_2, @@ -120,14 +120,14 @@ def random_compare_search_by_id( include_fields=fields_to_include)['documents'] random_id = random_docs[0]['_id'] ranked_list_1 = self.search_by_id( - collection_name, - random_id, - field=vector_fields[0], + document_id=random_id, + collection_name=collection_name, + search_field=vector_fields[0], page_size=page_size)['results'] ranked_list_2 = self.search_by_id( - collection_name, - random_id, - field=vector_fields[1], + document_id=random_id, + collection_name=collection_name, + search_field=vector_fields[1], page_size=page_size)['results'] return self.compare_ranks( ranked_list_1, @@ -163,11 +163,11 @@ def compare_search_by_id( ranked_list_1 = self.search_by_id( collection_name, document_id, - field=vector_fields[0]) + search_field=vector_fields[0]) ranked_list_2 = self.search_by_id( collection_name, document_id, - field=vector_fields[1]) + search_field=vector_fields[1]) return self.compare_ranks( ranked_list_1, ranked_list_2, diff --git a/vectorai/api/__init__.py b/vectorai/api/__init__.py index 9ad81c44..f77d5060 100644 --- a/vectorai/api/__init__.py +++ b/vectorai/api/__init__.py @@ -1,9 +1 @@ -from .search import * -from .read import * -from .write import * -from .text import * -from .audio import * -from .text import * -from .cluster import * -from .dimensionality_reduction import * -from .array_dict_vectorizer import * \ No newline at end of file +from .api import * \ No newline at end of file diff --git a/vectorai/api/api.py b/vectorai/api/api.py new file mode 100644 index 00000000..a21c47f4 --- /dev/null +++ b/vectorai/api/api.py @@ -0,0 +1,2654 @@ +# This python file is auto-generated. Please do not edit. +import requests +import requests +from vectorai.api.utils import retry, return_curl_or_response + + +class ViAPIClient: + def __init__(self, username, api_key, ): + self.username = username + self.api_key = api_key + + @retry() + @return_curl_or_response('json') + def request_api_key(self, email, description, referral_code="api_referred", **kwargs): + """Request an api key +Make sure to save the api key somewhere safe. If you have a valid referral code, you can recieve the api key more quickly. + +Args +======== +username: Username you'd like to create, lowercase only +email: Email you are using to sign up +description: Description of your intended use case +referral_code: The referral code you've been given to allow you to register for an api key before others + +""" + return requests.post( + url='https://api.vctr.ai/project/request_api_key', + json=dict( + username=self.username, + email=email, + description=description, + referral_code=referral_code, + **kwargs)) + + @retry() + @return_curl_or_response('json') + def request_read_api_key(self, read_username, **kwargs): + """Request a read api key for your collections +Creates a read only key for your collections. Make sure to save the api key somewhere safe. When doing a search the admin username should still be used. + +Args +======== +username: Username +api_key: Api Key, you can request it from request_api_key +read_username: Username for read only key + +""" + return requests.post( + url='https://api.vctr.ai/project/request_read_api_key', + json=dict( + username=self.username, + api_key=self.api_key, + read_username=read_username, + **kwargs)) + + @retry() + @return_curl_or_response('json') + def _create_collection(self, collection_name, collection_schema={}, **kwargs): + """Creates a collection +A collection can store documents to be **searched, retrieved, filtered and aggregated** _(similar to Collections in MongoDB, Tables in SQL, Indexes in ElasticSearch)_. + +If you are inserting your own vector use the suffix (ends with) **"\_vector\_"** for the field name. and specify the length of the vector in colletion_schema like below example: + + { + "collection_schema": { + "celebrity_image_vector_": 1024, + "celebrity_audio_vector" : 512, + "product_description_vector" : 128 + } + } + +Args +======== +username: Username +api_key: Api Key, you can request it from request_api_key +collection_name: Name of Collection +collection_schema: Schema for specifying the field that are vectors and its length + +""" + return requests.post( + url='https://api.vctr.ai/project/create_collection', + json=dict( + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + collection_schema=collection_schema, + **kwargs)) + + @retry() + @return_curl_or_response('json') + def _create_collection_from_document(self, collection_name, document={}, **kwargs): + """Creates a collection by infering the schema from a document +If you are inserting your own vector use the suffix (ends with) **"\_vector\_"** for the field name. e.g. "product\_description\_vector\_" + +Args +======== +username: Username +api_key: Api Key, you can request it from request_api_key +collection_name: Name of Collection +document: A Document is a JSON-like data that we store our metadata and vectors with. For specifying id of the document use the field '\_id', for specifying vector field use the suffix of '\_vector\_' + +""" + return requests.post( + url='https://api.vctr.ai/project/create_collection_from_document', + json=dict( + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + document=document, + **kwargs)) + + @retry() + @return_curl_or_response('json') + def _delete_collection(self,collection_name, **kwargs): + return requests.get( + url='https://api.vctr.ai/project/delete_collection', + params=dict( + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + )) + + @retry() + @return_curl_or_response('json') + def _list_collections(self,**kwargs): + return requests.get( + url='https://api.vctr.ai/project/list_collections', + params=dict( + username=self.username, + api_key=self.api_key, + )) + + @retry() + @return_curl_or_response('json') + def collection_stats(self,collection_name, **kwargs): + return requests.get( + url='https://api.vctr.ai/project/collection_stats', + params=dict( + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + )) + + @retry() + @return_curl_or_response('json') + def collection_schema(self,collection_name, **kwargs): + return requests.get( + url='https://api.vctr.ai/project/collection_schema', + params=dict( + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + )) + + @retry() + @return_curl_or_response('json') + def collection_vector_health(self,collection_name, **kwargs): + return requests.get( + url='https://api.vctr.ai/project/collection_vector_health', + params=dict( + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + )) + + @retry() + @return_curl_or_response('json') + def add_collection_metadata(self, collection_name, metadata, **kwargs): + """Add metadata about a collection +Add metadata about a collection. notably description, data source, etc + +Args +======== +username: Username +api_key: Api Key, you can request it from request_api_key +collection_name: Name of Collection +metadata: Metadata for a collection, e.g. {'description' : 'collection for searching products'} + +""" + return requests.post( + url='https://api.vctr.ai/project/add_collection_metadata', + json=dict( + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + metadata=metadata, + **kwargs)) + + @retry() + @return_curl_or_response('json') + def collection_metadata(self,collection_name, **kwargs): + return requests.get( + url='https://api.vctr.ai/project/collection_metadata', + params=dict( + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + )) + + @retry() + @return_curl_or_response('json') + def copy_collection_from_another_user(self, collection_name, source_collection_name, source_username, source_api_key, **kwargs): + """Copy a collection from another user's projects into your project +Copy a collection from another user's projects into your project. This is considered a project job +Args +======== +collection_name: Collection to copy into +username: Your username +api_key: Your api key to access the username +source_collection_name: Collection to copy frpm +source_username: Source username of whom the collection belongs to +source_api_key: Api key to access the source username + +""" + return requests.post( + url='https://api.vctr.ai/project/copy_collection_from_another_user', + json=dict( + collection_name=collection_name, + username=self.username, + api_key=self.api_key, + source_collection_name=source_collection_name, + source_username=source_username, + source_api_key=source_api_key, + **kwargs)) + + @retry() + @return_curl_or_response('json') + def job_status(self,job_id, **kwargs): + return requests.get( + url='https://api.vctr.ai/project/job_status', + params=dict( + job_id=job_id, + username=self.username, + api_key=self.api_key, + )) + + @retry() + @return_curl_or_response('json') + def _search(self,vector, collection_name, search_fields, approx=0, sum_fields=True, page_size=20, page=1, metric="cosine", min_score=None, include_fields=[], include_vector=False, include_count=True, hundred_scale=False, asc=False, **kwargs): + return requests.get( + url='https://api.vctr.ai/collection/search', + params=dict( + vector=vector, + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + search_fields=search_fields, + approx=approx, + sum_fields=sum_fields, + page_size=page_size, + page=page, + metric=metric, + min_score=min_score, + include_fields=include_fields, + include_vector=include_vector, + include_count=include_count, + hundred_scale=hundred_scale, + asc=asc, + )) + + @retry() + @return_curl_or_response('json') + def search_by_id(self,document_id, collection_name, search_field, approx=0, sum_fields=True, page_size=20, page=1, metric="cosine", min_score=None, include_fields=[], include_vector=False, include_count=True, hundred_scale=False, asc=False, **kwargs): + return requests.get( + url='https://api.vctr.ai/collection/search_by_id', + params=dict( + document_id=document_id, + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + search_field=search_field, + approx=approx, + sum_fields=sum_fields, + page_size=page_size, + page=page, + metric=metric, + min_score=min_score, + include_fields=include_fields, + include_vector=include_vector, + include_count=include_count, + hundred_scale=hundred_scale, + asc=asc, + )) + + @retry() + @return_curl_or_response('json') + def search_by_ids(self,document_ids, collection_name, search_field, vector_operation="sum", approx=0, sum_fields=True, page_size=20, page=1, metric="cosine", min_score=None, include_fields=[], include_vector=False, include_count=True, hundred_scale=False, asc=False, **kwargs): + return requests.get( + url='https://api.vctr.ai/collection/search_by_ids', + params=dict( + document_ids=document_ids, + vector_operation=vector_operation, + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + search_field=search_field, + approx=approx, + sum_fields=sum_fields, + page_size=page_size, + page=page, + metric=metric, + min_score=min_score, + include_fields=include_fields, + include_vector=include_vector, + include_count=include_count, + hundred_scale=hundred_scale, + asc=asc, + )) + + @retry() + @return_curl_or_response('json') + def search_by_positive_negative_ids(self,positive_document_ids, negative_document_ids, collection_name, search_field, vector_operation="sum", approx=0, sum_fields=True, page_size=20, page=1, metric="cosine", min_score=None, include_fields=[], include_vector=False, include_count=True, hundred_scale=False, asc=False, **kwargs): + return requests.get( + url='https://api.vctr.ai/collection/search_by_positive_negative_ids', + params=dict( + positive_document_ids=positive_document_ids, + negative_document_ids=negative_document_ids, + vector_operation=vector_operation, + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + search_field=search_field, + approx=approx, + sum_fields=sum_fields, + page_size=page_size, + page=page, + metric=metric, + min_score=min_score, + include_fields=include_fields, + include_vector=include_vector, + include_count=include_count, + hundred_scale=hundred_scale, + asc=asc, + )) + + @retry() + @return_curl_or_response('json') + def search_with_positive_negative_ids_as_history(self,vector, positive_document_ids, negative_document_ids, collection_name, search_field, vector_operation="sum", approx=0, sum_fields=True, page_size=20, page=1, metric="cosine", min_score=None, include_fields=[], include_vector=False, include_count=True, hundred_scale=False, asc=False, **kwargs): + return requests.get( + url='https://api.vctr.ai/collection/search_with_positive_negative_ids_as_history', + params=dict( + vector=vector, + positive_document_ids=positive_document_ids, + negative_document_ids=negative_document_ids, + vector_operation=vector_operation, + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + search_field=search_field, + approx=approx, + sum_fields=sum_fields, + page_size=page_size, + page=page, + metric=metric, + min_score=min_score, + include_fields=include_fields, + include_vector=include_vector, + include_count=include_count, + hundred_scale=hundred_scale, + asc=asc, + )) + + @retry() + @return_curl_or_response('json') + def hybrid_search(self,text, vector, collection_name, search_fields, text_fields=[], traditional_weight=0.075, fuzzy=1, join=True, approx=0, sum_fields=True, page_size=20, page=1, metric="cosine", min_score=None, include_fields=[], include_vector=False, include_count=True, hundred_scale=False, asc=False, **kwargs): + return requests.get( + url='https://api.vctr.ai/collection/hybrid_search', + params=dict( + text=text, + vector=vector, + text_fields=text_fields, + traditional_weight=traditional_weight, + fuzzy=fuzzy, + join=join, + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + search_fields=search_fields, + approx=approx, + sum_fields=sum_fields, + page_size=page_size, + page=page, + metric=metric, + min_score=min_score, + include_fields=include_fields, + include_vector=include_vector, + include_count=include_count, + hundred_scale=hundred_scale, + asc=asc, + )) + + @retry() + @return_curl_or_response('json') + def insert(self, collection_name, document={}, insert_date=True, overwrite=True, update_schema=True, **kwargs): + """Insert a document into a Collection +When inserting the document you can specify your own id for a document by using the field name **"\_id"**. +For specifying your own vector use the suffix (ends with) **"\_vector\_"** for the field name. +e.g. "product\_description\_vector\_" + +Args +======== +username: Username +api_key: Api Key, you can request it from request_api_key +collection_name: Name of Collection +document: A Document is a JSON-like data that we store our metadata and vectors with. For specifying id of the document use the field '\_id', for specifying vector field use the suffix of '\_vector\_' +insert_date: Whether to include insert date as a field 'insert_date_'. +overwrite: Whether to overwrite document if it exists. +update_schema: Whether the api should check the documents for vector datatype to update the schema. + +""" + return requests.post( + url='https://api.vctr.ai/collection/insert', + json=dict( + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + document=document, + insert_date=insert_date, + overwrite=overwrite, + update_schema=update_schema, + **kwargs)) + + @retry() + @return_curl_or_response('json') + def bulk_insert(self, collection_name, documents={}, insert_date=True, overwrite=True, update_schema=True, quick=False, **kwargs): + """Insert multiple documents into a Collection +When inserting the document you can specify your own id for a document by using the field name **"\_id"**. +For specifying your own vector use the suffix (ends with) **"\_vector\_"** for the field name. +e.g. "product\_description\_vector\_" + +Args +======== +username: Username +api_key: Api Key, you can request it from request_api_key +collection_name: Name of Collection +documents: A list of documents. Document is a JSON-like data that we store our metadata and vectors with. For specifying id of the document use the field '\_id', for specifying vector field use the suffix of '\_vector\_' +insert_date: Whether to include insert date as a field 'insert_date_'. +overwrite: Whether to overwrite document if it exists. +update_schema: Whether the api should check the documents for vector datatype to update the schema. +quick: This will run the quickest insertion possible, which means there will be no schema checks or collection checks. + +""" + return requests.post( + url='https://api.vctr.ai/collection/bulk_insert', + json=dict( + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + documents=documents, + insert_date=insert_date, + overwrite=overwrite, + update_schema=update_schema, + quick=quick, + **kwargs)) + + @retry() + @return_curl_or_response('json') + def delete_by_id(self,document_id, collection_name, **kwargs): + return requests.get( + url='https://api.vctr.ai/collection/delete_by_id', + params=dict( + document_id=document_id, + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + )) + + @retry() + @return_curl_or_response('json') + def bulk_delete_by_id(self, collection_name, document_ids, **kwargs): + """Delete multiple documents in a Collection by ids +Delete a document by its id. + +Args +======== +username: Username +api_key: Api Key, you can request it from request_api_key +collection_name: Name of Collection +document_ids: IDs of documents + +""" + return requests.post( + url='https://api.vctr.ai/collection/bulk_delete_by_id', + json=dict( + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + document_ids=document_ids, + **kwargs)) + + @retry() + @return_curl_or_response('json') + def _edit_document(self, collection_name, document_id, edits, insert_date=True, **kwargs): + """Edit a document in a Collection by its id +Edit by providing a key value pair of fields you are adding or changing. + +Args +======== +username: Username +api_key: Api Key, you can request it from request_api_key +collection_name: Name of Collection +document_id: ID of a document +edits: A dictionary to edit and add fields to a document. +insert_date: Whether to include insert date as a field 'insert_date_'. + +""" + return requests.post( + url='https://api.vctr.ai/collection/edit_document', + json=dict( + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + document_id=document_id, + edits=edits, + insert_date=insert_date, + **kwargs)) + + @retry() + @return_curl_or_response('json') + def bulk_edit_document(self, collection_name, documents={}, insert_date=True, **kwargs): + """Edits multiple documents in a Collection by its ids +Edits documents by providing a key value pair of fields you are adding or changing, make sure to include the "_id" in the documents. + +Args +======== +username: Username +api_key: Api Key, you can request it from request_api_key +collection_name: Name of Collection +documents: A list of documents. Document is a JSON-like data that we store our metadata and vectors with. For specifying id of the document use the field '\_id', for specifying vector field use the suffix of '\_vector\_' +insert_date: Whether to include insert date as a field 'insert_date_'. + +""" + return requests.post( + url='https://api.vctr.ai/collection/bulk_edit_document', + json=dict( + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + documents=documents, + insert_date=insert_date, + **kwargs)) + + @retry() + @return_curl_or_response('json') + def delete_document_fields(self,document_id, fields_to_delete, collection_name, **kwargs): + return requests.get( + url='https://api.vctr.ai/collection/delete_document_fields', + params=dict( + document_id=document_id, + fields_to_delete=fields_to_delete, + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + )) + + @retry() + @return_curl_or_response('json') + def id(self,document_id, collection_name, include_vector=True, **kwargs): + return requests.get( + url='https://api.vctr.ai/collection/id', + params=dict( + document_id=document_id, + include_vector=include_vector, + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + )) + + @retry() + @return_curl_or_response('json') + def bulk_id(self,document_ids, collection_name, include_vector=True, **kwargs): + return requests.get( + url='https://api.vctr.ai/collection/bulk_id', + params=dict( + document_ids=document_ids, + include_vector=include_vector, + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + )) + + @retry() + @return_curl_or_response('json') + def bulk_missing_id(self, collection_name, document_ids, **kwargs): + """Look up in bulk if the ids exists in the collection, returns all the missing one as a list +Look up in bulk if the ids exists in the collection, returns all the missing one as a list. + +Args +======== +username: Username +api_key: Api Key, you can request it from request_api_key +collection_name: Name of Collection +document_ids: IDs of documents + +""" + return requests.post( + url='https://api.vctr.ai/collection/bulk_missing_id', + json=dict( + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + document_ids=document_ids, + **kwargs)) + + @retry() + @return_curl_or_response('json') + def retrieve_documents(self,collection_name, include_fields=[], cursor=None, page_size=20, sort=[], asc=False, include_vector=True, **kwargs): + return requests.get( + url='https://api.vctr.ai/collection/retrieve_documents', + params=dict( + include_fields=include_fields, + cursor=cursor, + page_size=page_size, + sort=sort, + asc=asc, + include_vector=include_vector, + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + )) + + @retry() + @return_curl_or_response('json') + def random_documents(self,collection_name, seed=10, include_fields=[], page_size=20, include_vector=True, **kwargs): + return requests.get( + url='https://api.vctr.ai/collection/random_documents', + params=dict( + seed=seed, + include_fields=include_fields, + page_size=page_size, + include_vector=include_vector, + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + )) + + @retry() + @return_curl_or_response('json') + def retrieve_documents_with_filters(self, collection_name, include_fields=[], cursor=None, page_size=20, sort=[], asc=False, include_vector=False, filters=[], **kwargs): + """Retrieve some documents with filters +Cursor is provided to retrieve even more documents. Loop through it to retrieve all documents in the database. + +Args +======== +username: Username +api_key: Api Key, you can request it from request_api_key +collection_name: Name of Collection +include_fields: Fields to include in the search results, empty array/list means all fields. +cursor: Cursor to paginate the document retrieval +page_size: Size of each page of results +sort: Fields to sort by +asc: Whether to sort results by ascending or descending order +include_vector: Include vectors in the search results +filters: Query for filtering the search results + +""" + return requests.post( + url='https://api.vctr.ai/collection/retrieve_documents_with_filters', + json=dict( + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + include_fields=include_fields, + cursor=cursor, + page_size=page_size, + sort=sort, + asc=asc, + include_vector=include_vector, + filters=filters, + **kwargs)) + + @retry() + @return_curl_or_response('json') + def random_documents_with_filters(self, collection_name, seed=10, include_fields=[], page_size=20, include_vector=False, filters=[], **kwargs): + """Retrieve some documents randomly with filters +Mainly for testing purposes. + +Args +======== +username: Username +api_key: Api Key, you can request it from request_api_key +collection_name: Name of Collection +seed: Random Seed for retrieving random documents. +include_fields: Fields to include in the search results, empty array/list means all fields. +page_size: Size of each page of results +include_vector: Include vectors in the search results +filters: Query for filtering the search results + +""" + return requests.post( + url='https://api.vctr.ai/collection/random_documents_with_filters', + json=dict( + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + seed=seed, + include_fields=include_fields, + page_size=page_size, + include_vector=include_vector, + filters=filters, + **kwargs)) + + @retry() + @return_curl_or_response('json') + def facets(self,collection_name, facets_fields=[], date_interval="monthly", page_size=1000, page=1, asc=False, **kwargs): + return requests.get( + url='https://api.vctr.ai/collection/facets', + params=dict( + facets_fields=facets_fields, + date_interval=date_interval, + page_size=page_size, + page=page, + asc=asc, + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + )) + + @retry() + @return_curl_or_response('json') + def filters(self, collection_name, filters=[], page=1, page_size=20, asc=False, include_vector=False, sort=[], **kwargs): + """Filters a collection +Filter is used to retrieve documents that match the conditions set in a filter query. This is used in advance search to filter the documents that are searched. + +The filters query is a json body that follows the schema of: + + [ + {'field' : , 'filter_type' : , "condition":"==", "condition_value":"america"}, + {'field' : , 'filter_type' : , "condition":">=", "condition_value":90}, + ] + +These are the available filter_type types: + +1. "contains": for filtering documents that contains a string. + {'field' : 'category', 'filter_type' : 'contains', "condition":"==", "condition_value": "bluetoo"]} +2. "exact_match"/"category": for filtering documents that matches a string or list of strings exactly. + {'field' : 'category', 'filter_type' : 'categories', "condition":"==", "condition_value": "tv"]} +3. "categories": for filtering documents that contains any of a category from a list of categories. + {'field' : 'category', 'filter_type' : 'categories', "condition":"==", "condition_value": ["tv", "smart", "bluetooth_compatible"]} +4. "exists": for filtering documents that contains a field. + {'field' : 'purchased', 'filter_type' : 'exists', "condition":">=", "condition_value":" "} +5. "date": for filtering date by date range. + {'field' : 'insert_date_', 'filter_type' : 'date', "condition":">=", "condition_value":"2020-01-01"} +6. "numeric": for filtering by numeric range. + {'field' : 'price', 'filter_type' : 'date', "condition":">=", "condition_value":90} +7. "ids": for filtering by document ids. + {'field' : 'ids', 'filter_type' : 'ids', "condition":"==", "condition_value":["1", "10"]} + +These are the available conditions: + + "==", "!=", ">=", ">", "<", "<=" + +Args +======== +username: Username +api_key: Api Key, you can request it from request_api_key +collection_name: Name of Collection +filters: Query for filtering the search results +page: Page of the results +page_size: Size of each page of results +asc: Whether to sort results by ascending or descending order +include_vector: Include vectors in the search results +sort: Fields to sort by + +""" + return requests.post( + url='https://api.vctr.ai/collection/filters', + json=dict( + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + filters=filters, + page=page, + page_size=page_size, + asc=asc, + include_vector=include_vector, + sort=sort, + **kwargs)) + + @retry() + @return_curl_or_response('json') + def advanced_search(self, collection_name, multivector_query, page=1, page_size=20, approx=0, sum_fields=True, metric="cosine", filters=[], facets=[], min_score=None, include_fields=[], include_vector=False, include_count=True, include_facets=False, hundred_scale=False, asc=False, **kwargs): + """Advanced Vector Similarity Search. Support for multiple vectors, vector weightings, facets and filtering +Advanced Vector Similarity Search, enables machine learning search with vector search. Search with a multiple vectors for the most similar documents. + +For example: Search with a product image and description vectors to find the most similar products by what it looks like and what its described to do. + +You can also give weightings of each vector field towards the search, e.g. image\_vector\_ weights 100%, whilst description\_vector\_ 50%. + +Advanced search also supports filtering to only search through filtered results and facets to get the overview of products available when a minimum score is set. + + +Args +======== +username: Username +api_key: Api Key, you can request it from request_api_key +collection_name: Name of Collection +page: Page of the results +page_size: Size of each page of results +approx: Used for approximate search +sum_fields: Whether to sum the multiple vectors similarity search score as 1 or seperate +metric: Similarity Metric, choose from ['cosine', 'l1', 'l2', 'dp'] +filters: Query for filtering the search results +facets: Fields to include in the facets, if [] then all +min_score: Minimum score for similarity metric +include_fields: Fields to include in the search results, empty array/list means all fields. +include_vector: Include vectors in the search results +include_count: Include count in the search results +include_facets: Include facets in the search results +hundred_scale: Whether to scale up the metric by 100 +asc: Whether to sort results by ascending or descending order +multivector_query: Query for advance search that allows for multiple vector and field querying + +""" + return requests.post( + url='https://api.vctr.ai/collection/advanced_search', + json=dict( + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + page=page, + page_size=page_size, + approx=approx, + sum_fields=sum_fields, + metric=metric, + filters=filters, + facets=facets, + min_score=min_score, + include_fields=include_fields, + include_vector=include_vector, + include_count=include_count, + include_facets=include_facets, + hundred_scale=hundred_scale, + asc=asc, + multivector_query=multivector_query, + **kwargs)) + + @retry() + @return_curl_or_response('json') + def advanced_search_by_id(self, collection_name, document_id, search_fields, page=1, page_size=20, approx=0, sum_fields=True, metric="cosine", filters=[], facets=[], min_score=None, include_fields=[], include_vector=False, include_count=True, include_facets=False, hundred_scale=False, asc=False, **kwargs): + """Advanced Single Product Recommendations +Single Product Recommendations (Search by an id). + +For example: Search with id of a product in the database, and using the product's image and description vectors to find the most similar products by what it looks like and what its described to do. + +You can also give weightings of each vector field towards the search, e.g. image\_vector\_ weights 100%, whilst description\_vector\_ 50%. + +Advanced search also supports filtering to only search through filtered results and facets to get the overview of products available when a minimum score is set. + + +Args +======== +username: Username +api_key: Api Key, you can request it from request_api_key +collection_name: Name of Collection +page: Page of the results +page_size: Size of each page of results +approx: Used for approximate search +sum_fields: Whether to sum the multiple vectors similarity search score as 1 or seperate +metric: Similarity Metric, choose from ['cosine', 'l1', 'l2', 'dp'] +filters: Query for filtering the search results +facets: Fields to include in the facets, if [] then all +min_score: Minimum score for similarity metric +include_fields: Fields to include in the search results, empty array/list means all fields. +include_vector: Include vectors in the search results +include_count: Include count in the search results +include_facets: Include facets in the search results +hundred_scale: Whether to scale up the metric by 100 +asc: Whether to sort results by ascending or descending order +document_id: ID of a document +search_fields: Vector fields to search against, and the weightings for them. + +""" + return requests.post( + url='https://api.vctr.ai/collection/advanced_search_by_id', + json=dict( + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + page=page, + page_size=page_size, + approx=approx, + sum_fields=sum_fields, + metric=metric, + filters=filters, + facets=facets, + min_score=min_score, + include_fields=include_fields, + include_vector=include_vector, + include_count=include_count, + include_facets=include_facets, + hundred_scale=hundred_scale, + asc=asc, + document_id=document_id, + search_fields=search_fields, + **kwargs)) + + @retry() + @return_curl_or_response('json') + def advanced_search_by_ids(self, collection_name, document_ids, search_fields, page=1, page_size=20, approx=0, sum_fields=True, metric="cosine", filters=[], facets=[], min_score=None, include_fields=[], include_vector=False, include_count=True, include_facets=False, hundred_scale=False, asc=False, vector_operation="sum", **kwargs): + """Advanced Multi Product Recommendations +Advanced Multi Product Recommendations (Search by ids). + +For example: Search with multiple ids of products in the database, and using the product's image and description vectors to find the most similar products by what it looks like and what its described to do. + +You can also give weightings of each vector field towards the search, e.g. image\_vector\_ weights 100%, whilst description\_vector\_ 50%. + +You can also give weightings of on each product as well e.g. product ID-A weights 100% whilst product ID-B 50%. + +Advanced search also supports filtering to only search through filtered results and facets to get the overview of products available when a minimum score is set. + + +Args +======== +username: Username +api_key: Api Key, you can request it from request_api_key +collection_name: Name of Collection +page: Page of the results +page_size: Size of each page of results +approx: Used for approximate search +sum_fields: Whether to sum the multiple vectors similarity search score as 1 or seperate +metric: Similarity Metric, choose from ['cosine', 'l1', 'l2', 'dp'] +filters: Query for filtering the search results +facets: Fields to include in the facets, if [] then all +min_score: Minimum score for similarity metric +include_fields: Fields to include in the search results, empty array/list means all fields. +include_vector: Include vectors in the search results +include_count: Include count in the search results +include_facets: Include facets in the search results +hundred_scale: Whether to scale up the metric by 100 +asc: Whether to sort results by ascending or descending order +document_ids: Document IDs to get recommendations for, and the weightings of each document +search_fields: Vector fields to search against, and the weightings for them. +vector_operation: Aggregation for the vectors, choose from ['mean', 'sum', 'min', 'max'] + +""" + return requests.post( + url='https://api.vctr.ai/collection/advanced_search_by_ids', + json=dict( + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + page=page, + page_size=page_size, + approx=approx, + sum_fields=sum_fields, + metric=metric, + filters=filters, + facets=facets, + min_score=min_score, + include_fields=include_fields, + include_vector=include_vector, + include_count=include_count, + include_facets=include_facets, + hundred_scale=hundred_scale, + asc=asc, + document_ids=document_ids, + search_fields=search_fields, + vector_operation=vector_operation, + **kwargs)) + + @retry() + @return_curl_or_response('json') + def advanced_search_by_positive_negative_ids(self, collection_name, positive_document_ids, negative_document_ids, search_fields, page=1, page_size=20, approx=0, sum_fields=True, metric="cosine", filters=[], facets=[], min_score=None, include_fields=[], include_vector=False, include_count=True, include_facets=False, hundred_scale=False, asc=False, vector_operation="sum", **kwargs): + """Advanced Multi Product Recommendations with likes and dislikes +Advanced Multi Product Recommendations with Likes and Dislikes (Search by ids). + +For example: Search with multiple ids of liked and dislike products in the database. Then using the product's image and description vectors to find the most similar products by what it looks like and what its described to do against the positives and most disimilar products for the negatives. + +You can also give weightings of each vector field towards the search, e.g. image\_vector\_ weights 100%, whilst description\_vector\_ 50%. + +You can also give weightings of on each product as well e.g. product ID-A weights 100% whilst product ID-B 50%. + +Advanced search also supports filtering to only search through filtered results and facets to get the overview of products available when a minimum score is set. + + +Args +======== +username: Username +api_key: Api Key, you can request it from request_api_key +collection_name: Name of Collection +page: Page of the results +page_size: Size of each page of results +approx: Used for approximate search +sum_fields: Whether to sum the multiple vectors similarity search score as 1 or seperate +metric: Similarity Metric, choose from ['cosine', 'l1', 'l2', 'dp'] +filters: Query for filtering the search results +facets: Fields to include in the facets, if [] then all +min_score: Minimum score for similarity metric +include_fields: Fields to include in the search results, empty array/list means all fields. +include_vector: Include vectors in the search results +include_count: Include count in the search results +include_facets: Include facets in the search results +hundred_scale: Whether to scale up the metric by 100 +asc: Whether to sort results by ascending or descending order +positive_document_ids: Positive Document IDs to get recommendations for, and the weightings of each document +negative_document_ids: Negative Document IDs to get recommendations for, and the weightings of each document +search_fields: Vector fields to search against, and the weightings for them. +vector_operation: Aggregation for the vectors, choose from ['mean', 'sum', 'min', 'max'] + +""" + return requests.post( + url='https://api.vctr.ai/collection/advanced_search_by_positive_negative_ids', + json=dict( + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + page=page, + page_size=page_size, + approx=approx, + sum_fields=sum_fields, + metric=metric, + filters=filters, + facets=facets, + min_score=min_score, + include_fields=include_fields, + include_vector=include_vector, + include_count=include_count, + include_facets=include_facets, + hundred_scale=hundred_scale, + asc=asc, + positive_document_ids=positive_document_ids, + negative_document_ids=negative_document_ids, + search_fields=search_fields, + vector_operation=vector_operation, + **kwargs)) + + @retry() + @return_curl_or_response('json') + def advanced_search_with_positive_negative_ids_as_history(self, collection_name, positive_document_ids, negative_document_ids, search_fields, vector, page=1, page_size=20, approx=0, sum_fields=True, metric="cosine", filters=[], facets=[], min_score=None, include_fields=[], include_vector=False, include_count=True, include_facets=False, hundred_scale=False, asc=False, vector_operation="sum", **kwargs): + """Advanced Search with Likes and Dislikes as history +For example: Vector search of a query vector with multiple ids of liked and dislike products in the database. Then using the product's image and description vectors to find the most similar products by what it looks like and what its described to do against the positives and most disimilar products for the negatives. + +You can also give weightings of each vector field towards the search, e.g. image\_vector\_ weights 100%, whilst description\_vector\_ 50%. + +You can also give weightings of on each product as well e.g. product ID-A weights 100% whilst product ID-B 50%. + +Advanced search also supports filtering to only search through filtered results and facets to get the overview of products available when a minimum score is set. + + +Args +======== +username: Username +api_key: Api Key, you can request it from request_api_key +collection_name: Name of Collection +page: Page of the results +page_size: Size of each page of results +approx: Used for approximate search +sum_fields: Whether to sum the multiple vectors similarity search score as 1 or seperate +metric: Similarity Metric, choose from ['cosine', 'l1', 'l2', 'dp'] +filters: Query for filtering the search results +facets: Fields to include in the facets, if [] then all +min_score: Minimum score for similarity metric +include_fields: Fields to include in the search results, empty array/list means all fields. +include_vector: Include vectors in the search results +include_count: Include count in the search results +include_facets: Include facets in the search results +hundred_scale: Whether to scale up the metric by 100 +asc: Whether to sort results by ascending or descending order +positive_document_ids: Positive Document IDs to get recommendations for, and the weightings of each document +negative_document_ids: Negative Document IDs to get recommendations for, and the weightings of each document +search_fields: Vector fields to search against, and the weightings for them. +vector_operation: Aggregation for the vectors, choose from ['mean', 'sum', 'min', 'max'] +vector: Vector, a list/array of floats that represents a piece of data + +""" + return requests.post( + url='https://api.vctr.ai/collection/advanced_search_with_positive_negative_ids_as_history', + json=dict( + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + page=page, + page_size=page_size, + approx=approx, + sum_fields=sum_fields, + metric=metric, + filters=filters, + facets=facets, + min_score=min_score, + include_fields=include_fields, + include_vector=include_vector, + include_count=include_count, + include_facets=include_facets, + hundred_scale=hundred_scale, + asc=asc, + positive_document_ids=positive_document_ids, + negative_document_ids=negative_document_ids, + search_fields=search_fields, + vector_operation=vector_operation, + vector=vector, + **kwargs)) + + @retry() + @return_curl_or_response('json') + def advanced_hybrid_search(self, collection_name, multivector_query, text, page=1, page_size=20, approx=0, sum_fields=True, metric="cosine", filters=[], facets=[], min_score=None, include_fields=[], include_vector=False, include_count=True, include_facets=False, hundred_scale=False, asc=False, text_fields=[], traditional_weight=0.075, fuzzy=1, join=True, **kwargs): + """Advanced Search a text field with vector and text using Vector Search and Traditional Search +Advanced Vector similarity search + Traditional Fuzzy Search with text and vector. + +You can also give weightings of each vector field towards the search, e.g. image\_vector\_ weights 100%, whilst description\_vector\_ 50%. + +Advanced search also supports filtering to only search through filtered results and facets to get the overview of products available when a minimum score is set. + + +Args +======== +username: Username +api_key: Api Key, you can request it from request_api_key +collection_name: Name of Collection +page: Page of the results +page_size: Size of each page of results +approx: Used for approximate search +sum_fields: Whether to sum the multiple vectors similarity search score as 1 or seperate +metric: Similarity Metric, choose from ['cosine', 'l1', 'l2', 'dp'] +filters: Query for filtering the search results +facets: Fields to include in the facets, if [] then all +min_score: Minimum score for similarity metric +include_fields: Fields to include in the search results, empty array/list means all fields. +include_vector: Include vectors in the search results +include_count: Include count in the search results +include_facets: Include facets in the search results +hundred_scale: Whether to scale up the metric by 100 +asc: Whether to sort results by ascending or descending order +multivector_query: Query for advance search that allows for multiple vector and field querying +text: Text Search Query (not encoded as vector) +text_fields: Text fields to search against +traditional_weight: Multiplier of traditional search. A value of 0.025~0.1 is good. +fuzzy: Fuzziness of the search. A value of 1-3 is good. +join: Whether to consider cases where there is a space in the word. E.g. Go Pro vs GoPro. + +""" + return requests.post( + url='https://api.vctr.ai/collection/advanced_hybrid_search', + json=dict( + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + page=page, + page_size=page_size, + approx=approx, + sum_fields=sum_fields, + metric=metric, + filters=filters, + facets=facets, + min_score=min_score, + include_fields=include_fields, + include_vector=include_vector, + include_count=include_count, + include_facets=include_facets, + hundred_scale=hundred_scale, + asc=asc, + multivector_query=multivector_query, + text=text, + text_fields=text_fields, + traditional_weight=traditional_weight, + fuzzy=fuzzy, + join=join, + **kwargs)) + + @retry() + @return_curl_or_response('json') + def aggregate(self, collection_name, aggregation_query, page_size=20, page=1, asc=False, flatten=True, **kwargs): + """Aggregate a collection +Aggregation/Groupby of a collection using an aggregation query. +The aggregation query is a json body that follows the schema of: + + { + "groupby" : [ + {"name": , "field": , "agg": "category"}, + {"name": , "field": , "agg": "category"} + ], + "metrics" : [ + {"name": , "field": , "agg": "avg"} + ] + } +- "groupby" is the fields you want to split the data into. These are the available groupby types: + - category" : groupby a field that is a category +- "metrics" is the fields you want to metrics you want to calculate in each of those, every aggregation includes a frequency metric. These are the available metric types: + - "avg", "max", "min", "sum", "cardinality" + +Args +======== +username: Username +api_key: Api Key, you can request it from request_api_key +collection_name: Name of Collection +aggregation_query: Aggregation query to aggregate data +page_size: Size of each page of results +page: Page of the results +asc: Whether to sort results by ascending or descending order +flatten: + +""" + return requests.post( + url='https://api.vctr.ai/collection/aggregate', + json=dict( + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + aggregation_query=aggregation_query, + page_size=page_size, + page=page, + asc=asc, + flatten=flatten, + **kwargs)) + + @retry() + @return_curl_or_response('json') + def publish_aggregation(self, source_collection, dest_collection, aggregation_name, description, aggregation_query, date_field="insert_date_", refresh_time="160s", start_immediately=True, **kwargs): + """Publishes your aggregation query to a new collection +Publish and schedules your aggregation query and saves it to a new collection. +This new collection is just like any other collection and you can read, filter and aggregate it. + +Args +======== +username: Username +api_key: Api Key, you can request it from request_api_key +source_collection: The collection where the data to aggregate comes from +dest_collection: The name of collection of where the data will be aggregated to +aggregation_name: The name for the published scheduled aggregation +description: The description for the published scheduled aggregation +aggregation_query: The aggregation query to schedule +date_field: The date field to check whether there is new data coming in +refresh_time: How often should the aggregation check for new data +start_immediately: Whether to start the published aggregation immediately + +""" + return requests.post( + url='https://api.vctr.ai/collection/publish_aggregation', + json=dict( + username=self.username, + api_key=self.api_key, + source_collection=source_collection, + dest_collection=dest_collection, + aggregation_name=aggregation_name, + description=description, + aggregation_query=aggregation_query, + date_field=date_field, + refresh_time=refresh_time, + start_immediately=start_immediately, + **kwargs)) + + @retry() + @return_curl_or_response('json') + def delete_published_aggregation(self,aggregation_name, **kwargs): + return requests.get( + url='https://api.vctr.ai/collection/delete_published_aggregation', + params=dict( + aggregation_name=aggregation_name, + username=self.username, + api_key=self.api_key, + )) + + @retry() + @return_curl_or_response('json') + def start_aggregation(self,aggregation_name, **kwargs): + return requests.get( + url='https://api.vctr.ai/collection/start_aggregation', + params=dict( + aggregation_name=aggregation_name, + username=self.username, + api_key=self.api_key, + )) + + @retry() + @return_curl_or_response('json') + def stop_aggregation(self,aggregation_name, **kwargs): + return requests.get( + url='https://api.vctr.ai/collection/stop_aggregation', + params=dict( + aggregation_name=aggregation_name, + username=self.username, + api_key=self.api_key, + )) + + @retry() + @return_curl_or_response('json') + def vector_aggregation(self, source_collection, dest_collection, source_to_dest_fields_mapping, vector_fields, aggregation_type="mean", refresh=True, **kwargs): + """Aggregate vectors from one collection into another published aggregation collection +This is useful for getting vectors of a category. e.g. You have "product\_description\_vector\_" and you want the vector for a brand samsung. The "samsung" brand's vector can be the aggregate of all the samsung "product\_description\_vector\_". + +Args +======== +username: Username +api_key: Api Key, you can request it from request_api_key +source_collection: The collection where the data to aggregate comes from +dest_collection: The collection that a scheduled aggregation is creating +source_to_dest_fields_mapping: The collection that a scheduled aggregation is creating +vector_fields: Vector fields to aggregate to form 1 aggregated vector for each split the groupby creates +aggregation_type: Aggregation for the vectors, choose from ['mean', 'sum', 'min', 'max'] +refresh: Whether to refresh the aggregation and recalculate the vectors for every single groupby + +""" + return requests.post( + url='https://api.vctr.ai/collection/vector_aggregation', + json=dict( + username=self.username, + api_key=self.api_key, + source_collection=source_collection, + dest_collection=dest_collection, + source_to_dest_fields_mapping=source_to_dest_fields_mapping, + vector_fields=vector_fields, + aggregation_type=aggregation_type, + refresh=refresh, + **kwargs)) + + @retry() + @return_curl_or_response('json') + def id_lookup_joined(self, join_query, doc_id, **kwargs): + """Look up a document by its id with joins +Look up a document by its id with joins. + +Args +======== +username: Username +api_key: Api Key, you can request it from request_api_key +join_query: +doc_id: ID of a Document + +""" + return requests.post( + url='https://api.vctr.ai/collection/id_lookup_joined', + json=dict( + username=self.username, + api_key=self.api_key, + join_query=join_query, + doc_id=doc_id, + **kwargs)) + + @retry() + @return_curl_or_response('json') + def join_collections(self, join_query, joined_collection_name, **kwargs): + """Join collections with a query +Perform a join query on a whole collection and write the results to a new collection. We currently only support left joins. + +Args +======== +username: Username +api_key: Api Key, you can request it from request_api_key +join_query: +joined_collection_name: Name of the new collection that contains the joined results + +""" + return requests.post( + url='https://api.vctr.ai/collection/join_collections', + json=dict( + username=self.username, + api_key=self.api_key, + join_query=join_query, + joined_collection_name=joined_collection_name, + **kwargs)) + + @retry() + @return_curl_or_response('json') + def chunk_search(self, collection_name, chunk_field, vector, search_fields, chunk_scoring="max", page=1, page_size=20, approx=0, sum_fields=True, metric="cosine", filters=[], facets=[], min_score=None, include_vector=False, include_count=True, include_facets=False, hundred_scale=False, asc=False, **kwargs): + """Vector Similarity Search on Chunks. +Vector Similarity Search on chunks. + +For example: Search with a person's characteristics, who are the most similar (querying the "persons_characteristics_vector" field): + + Query person's characteristics as a vector: + [180, 40, 70] representing [height, age, weight] + + Search Results: + [ + {"name": Adam Levine, "persons_characteristics_vector" : [180, 56, 71]}, + {"name": Brad Pitt, "persons_characteristics_vector" : [180, 56, 65]}, + ...] + +Args +======== +username: Username +api_key: Api Key, you can request it from request_api_key +collection_name: Name of Collection +chunk_field: Field that the array of chunked documents are. +chunk_scoring: Scoring method for determining for ranking between document chunks. +page: Page of the results +page_size: Size of each page of results +approx: Used for approximate search +sum_fields: Whether to sum the multiple vectors similarity search score as 1 or seperate +metric: Similarity Metric, choose from ['cosine', 'l1', 'l2', 'dp'] +filters: Query for filtering the search results +facets: Fields to include in the facets, if [] then all +min_score: Minimum score for similarity metric +include_vector: Include vectors in the search results +include_count: Include count in the search results +include_facets: Include facets in the search results +hundred_scale: Whether to scale up the metric by 100 +asc: Whether to sort results by ascending or descending order +vector: Vector, a list/array of floats that represents a piece of data +search_fields: Vector fields to search against + +""" + return requests.post( + url='https://api.vctr.ai/collection/chunk_search', + json=dict( + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + chunk_field=chunk_field, + chunk_scoring=chunk_scoring, + page=page, + page_size=page_size, + approx=approx, + sum_fields=sum_fields, + metric=metric, + filters=filters, + facets=facets, + min_score=min_score, + include_vector=include_vector, + include_count=include_count, + include_facets=include_facets, + hundred_scale=hundred_scale, + asc=asc, + vector=vector, + search_fields=search_fields, + **kwargs)) + + @retry() + @return_curl_or_response('json') + def advanced_chunk_search(self, collection_name, chunk_field, multivector_query, chunk_scoring="max", page=1, page_size=20, approx=0, sum_fields=True, metric="cosine", filters=[], facets=[], min_score=None, include_vector=False, include_count=True, include_facets=False, hundred_scale=False, asc=False, **kwargs): + """Advanced Vector Similarity Search on Chunks. Support for multiple vectors, vector weightings, facets and filtering +Advanced Vector Similarity Search, enables machine learning search with vector search. Search with a multiple vectors for the most similar documents. + +For example: Search with a product image and description vectors to find the most similar products by what it looks like and what its described to do. + +You can also give weightings of each vector field towards the search, e.g. image\_vector\_ weights 100%, whilst description\_vector\_ 50%. + +Advanced search also supports filtering to only search through filtered results and facets to get the overview of products available when a minimum score is set. + + +Args +======== +username: Username +api_key: Api Key, you can request it from request_api_key +collection_name: Name of Collection +chunk_field: Field that the array of chunked documents are. +chunk_scoring: Scoring method for determining for ranking between document chunks. +page: Page of the results +page_size: Size of each page of results +approx: Used for approximate search +sum_fields: Whether to sum the multiple vectors similarity search score as 1 or seperate +metric: Similarity Metric, choose from ['cosine', 'l1', 'l2', 'dp'] +filters: Query for filtering the search results +facets: Fields to include in the facets, if [] then all +min_score: Minimum score for similarity metric +include_vector: Include vectors in the search results +include_count: Include count in the search results +include_facets: Include facets in the search results +hundred_scale: Whether to scale up the metric by 100 +asc: Whether to sort results by ascending or descending order +multivector_query: Query for advance search that allows for multiple vector and field querying + +""" + return requests.post( + url='https://api.vctr.ai/collection/advanced_chunk_search', + json=dict( + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + chunk_field=chunk_field, + chunk_scoring=chunk_scoring, + page=page, + page_size=page_size, + approx=approx, + sum_fields=sum_fields, + metric=metric, + filters=filters, + facets=facets, + min_score=min_score, + include_vector=include_vector, + include_count=include_count, + include_facets=include_facets, + hundred_scale=hundred_scale, + asc=asc, + multivector_query=multivector_query, + **kwargs)) + + @retry() + @return_curl_or_response('json') + def cluster_aggregate(self, collection_name, aggregation_query, page_size=20, page=1, asc=False, flatten=True, **kwargs): + """Aggregate every cluster in a collection +Takes an aggregation query and gets the aggregate of each cluster in a collection. This helps you interpret each cluster and what is in them. + +Only can be used after a vector field has been clustered with /cluster. + +Args +======== +username: Username +api_key: Api Key, you can request it from request_api_key +collection_name: Name of Collection +aggregation_query: Aggregation query to aggregate data +page_size: Size of each page of results +page: Page of the results +asc: Whether to sort results by ascending or descending order +flatten: + +""" + return requests.post( + url='https://api.vctr.ai/collection/cluster_aggregate', + json=dict( + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + aggregation_query=aggregation_query, + page_size=page_size, + page=page, + asc=asc, + flatten=flatten, + **kwargs)) + + @retry() + @return_curl_or_response('json') + def cluster_facets(self,collection_name, facets_fields=[], page_size=1000, page=1, asc=False, date_interval="monthly", **kwargs): + return requests.get( + url='https://api.vctr.ai/collection/cluster_facets', + params=dict( + facets_fields=facets_fields, + page_size=page_size, + page=page, + asc=asc, + date_interval=date_interval, + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + )) + + @retry() + @return_curl_or_response('json') + def cluster_centroids(self,vector_field, collection_name, **kwargs): + return requests.get( + url='https://api.vctr.ai/collection/cluster_centroids', + params=dict( + vector_field=vector_field, + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + )) + + @retry() + @return_curl_or_response('json') + def cluster_centroid_documents(self,vector_field, collection_name, metric="cosine", include_vector=False, page=1, page_size=20, **kwargs): + return requests.get( + url='https://api.vctr.ai/collection/cluster_centroid_documents', + params=dict( + vector_field=vector_field, + metric=metric, + include_vector=include_vector, + page=page, + page_size=page_size, + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + )) + + @retry() + @return_curl_or_response('json') + def advanced_cluster(self,vector_field, collection_name, alias="default", n_clusters=0, n_iter=10, n_init=5, gpu=True, refresh=True, **kwargs): + return requests.get( + url='https://api.vctr.ai/collection/advanced_cluster', + params=dict( + vector_field=vector_field, + alias=alias, + n_clusters=n_clusters, + n_iter=n_iter, + n_init=n_init, + gpu=gpu, + refresh=refresh, + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + )) + + @retry() + @return_curl_or_response('json') + def advanced_cluster_aggregate(self, collection_name, aggregation_query, vector_field, alias, page_size=20, page=1, asc=False, flatten=True, filters=[], **kwargs): + """Aggregate every cluster in a collection +Takes an aggregation query and gets the aggregate of each cluster in a collection. This helps you interpret each cluster and what is in them. + +Only can be used after a vector field has been clustered with /advanced_cluster. + +Args +======== +username: Username +api_key: Api Key, you can request it from request_api_key +collection_name: Name of Collection +aggregation_query: Aggregation query to aggregate data +page_size: Size of each page of results +page: Page of the results +asc: Whether to sort results by ascending or descending order +flatten: +vector_field: Clustered vector field +alias: Alias of a cluster +filters: Query for filtering the search results + +""" + return requests.post( + url='https://api.vctr.ai/collection/advanced_cluster_aggregate', + json=dict( + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + aggregation_query=aggregation_query, + page_size=page_size, + page=page, + asc=asc, + flatten=flatten, + vector_field=vector_field, + alias=alias, + filters=filters, + **kwargs)) + + @retry() + @return_curl_or_response('json') + def advanced_cluster_facets(self,vector_field, collection_name, alias="default", facets_fields=[], page_size=1000, page=1, asc=False, date_interval="monthly", **kwargs): + return requests.get( + url='https://api.vctr.ai/collection/advanced_cluster_facets', + params=dict( + vector_field=vector_field, + alias=alias, + facets_fields=facets_fields, + page_size=page_size, + page=page, + asc=asc, + date_interval=date_interval, + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + )) + + @retry() + @return_curl_or_response('json') + def advanced_cluster_centroids(self,vector_field, collection_name, alias="default", **kwargs): + return requests.get( + url='https://api.vctr.ai/collection/advanced_cluster_centroids', + params=dict( + vector_field=vector_field, + alias=alias, + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + )) + + @retry() + @return_curl_or_response('json') + def advanced_cluster_centroid_documents(self,vector_field, collection_name, alias="default", metric="cosine", include_vector=False, page=1, page_size=20, **kwargs): + return requests.get( + url='https://api.vctr.ai/collection/advanced_cluster_centroid_documents', + params=dict( + vector_field=vector_field, + alias=alias, + metric=metric, + include_vector=include_vector, + page=page, + page_size=page_size, + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + )) + + @retry() + @return_curl_or_response('json') + def insert_cluster_centroids(self, collection_name, cluster_centers, vector_field, alias="default", job=False, job_metric="cosine", **kwargs): + """Insert cluster centroids +Insert your own cluster centroids for it to be used in approximate search settings and cluster aggregations. + +Args +======== +username: Username +api_key: Api Key, you can request it from request_api_key +collection_name: Name of Collection +cluster_centers: Cluster centers with the key being the index number +vector_field: Clustered vector field +alias: Alias is used to name a cluster +job: Whether to run a job where each document is assigned a cluster from the cluster_center +job_metric: Similarity Metric, choose from ['cosine', 'l1', 'l2', 'dp'] + +""" + return requests.post( + url='https://api.vctr.ai/collection/insert_cluster_centroids', + json=dict( + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + cluster_centers=cluster_centers, + vector_field=vector_field, + alias=alias, + job=job, + job_metric=job_metric, + **kwargs)) + + @retry() + @return_curl_or_response('json') + def dimensionality_reduction(self,vector_field, collection_name, alias="default", n_components=0, gpu=True, refresh=True, **kwargs): + return requests.get( + url='https://api.vctr.ai/collection/dimensionality_reduction', + params=dict( + vector_field=vector_field, + alias=alias, + n_components=n_components, + gpu=gpu, + refresh=refresh, + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + )) + + @retry() + @return_curl_or_response('json') + def dimensionality_reduce(self, collection_name, vectors, vector_field, alias="default", n_components=1, **kwargs): + """Reduces the dimension of a list of vectors +Reduce the dimensions of a list of vectors you input into a desired dimension. + +This can only reduce to dimensions less than or equal to the n_components that the dimensionality reduction model is trained on. + +Args +======== +username: Username +api_key: Api Key, you can request it from request_api_key +collection_name: Name of Collection +vectors: Vectors to perform dimensionality reduction on +vector_field: Vector field to perform dimensionality reduction on +alias: Alias of the dimensionality reduced vectors +n_components: The size/length to reduce the vector down to. + +""" + return requests.post( + url='https://api.vctr.ai/collection/dimensionality_reduce', + json=dict( + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + vectors=vectors, + vector_field=vector_field, + alias=alias, + n_components=n_components, + **kwargs)) + + @retry() + @return_curl_or_response('json') + def encode_array_field(self,array_fields, collection_name, **kwargs): + return requests.get( + url='https://api.vctr.ai/collection/encode_array_field', + params=dict( + array_fields=array_fields, + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + )) + + @retry() + @return_curl_or_response('json') + def encode_array(self,array_field, array, collection_name, **kwargs): + return requests.get( + url='https://api.vctr.ai/collection/encode_array', + params=dict( + array_field=array_field, + array=array, + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + )) + + @retry() + @return_curl_or_response('json') + def search_with_array(self,array_field, array, collection_name, search_fields, approx=0, sum_fields=True, page_size=20, page=1, metric="cosine", min_score=None, include_fields=[], include_vector=False, include_count=True, hundred_scale=False, asc=False, **kwargs): + return requests.get( + url='https://api.vctr.ai/collection/search_with_array', + params=dict( + array_field=array_field, + array=array, + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + search_fields=search_fields, + approx=approx, + sum_fields=sum_fields, + page_size=page_size, + page=page, + metric=metric, + min_score=min_score, + include_fields=include_fields, + include_vector=include_vector, + include_count=include_count, + hundred_scale=hundred_scale, + asc=asc, + )) + + @retry() + @return_curl_or_response('json') + def encode_dictionary_field(self,dictionary_fields, collection_name, **kwargs): + return requests.get( + url='https://api.vctr.ai/collection/encode_dictionary_field', + params=dict( + dictionary_fields=dictionary_fields, + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + )) + + @retry() + @return_curl_or_response('json') + def encode_dictionary(self, collection_name, dictionary, dictionary_field, **kwargs): + """Encode an dictionary into a vector +For example: a dictionary that represents a **person's characteristics visiting a store, field "person_characteristics"**: + + {"height":180, "age":40, "weight":70} + + -> -> + +| height | age | weight | purchases | visits | +|--------|-----|--------|-----------|--------| +| 180 | 40 | 70 | 0 | 0 | + + dictionary vector: [180, 40, 70, 0, 0] + +Args +======== +username: Username +api_key: Api Key, you can request it from request_api_key +collection_name: Name of Collection +dictionary: A dictionary to encode into vectors +dictionary_field: The dictionary field that encoding of the dictionary is trained on + +""" + return requests.post( + url='https://api.vctr.ai/collection/encode_dictionary', + json=dict( + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + dictionary=dictionary, + dictionary_field=dictionary_field, + **kwargs)) + + @retry() + @return_curl_or_response('json') + def search_with_dictionary(self, collection_name, search_fields, dictionary, dictionary_field, page_size=20, page=1, approx=0, sum_fields=True, metric="cosine", min_score=None, include_fields=[], include_vector=False, include_count=True, hundred_scale=False, asc=False, **kwargs): + """Search a dictionary field with a dictionary using Vector Search +Vector similarity search with a dictionary directly. + +For example: a dictionary that represents a **person's characteristics visiting a store, field "person_characteristics"**: + + {"height":180, "age":40, "weight":70} + + -> -> + +| height | age | weight | purchases | visits | +|--------|-----|--------|-----------|--------| +| 180 | 40 | 70 | 0 | 0 | + + dictionary vector: [180, 40, 70, 0, 0] + + -> -> + + Search Results: {...} + +Args +======== +username: Username +api_key: Api Key, you can request it from request_api_key +collection_name: Name of Collection +search_fields: Vector fields to search against +page_size: Size of each page of results +page: Page of the results +approx: Used for approximate search +sum_fields: Whether to sum the multiple vectors similarity search score as 1 or seperate +metric: Similarity Metric, choose from ['cosine', 'l1', 'l2', 'dp'] +min_score: Minimum score for similarity metric +include_fields: Fields to include in the search results, empty array/list means all fields. +include_vector: Include vectors in the search results +include_count: Include count in the search results +hundred_scale: Whether to scale up the metric by 100 +asc: Whether to sort results by ascending or descending order +dictionary: A dictionary to encode into vectors +dictionary_field: The dictionary field that encoding of the dictionary is trained on + +""" + return requests.post( + url='https://api.vctr.ai/collection/search_with_dictionary', + json=dict( + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + search_fields=search_fields, + page_size=page_size, + page=page, + approx=approx, + sum_fields=sum_fields, + metric=metric, + min_score=min_score, + include_fields=include_fields, + include_vector=include_vector, + include_count=include_count, + hundred_scale=hundred_scale, + asc=asc, + dictionary=dictionary, + dictionary_field=dictionary_field, + **kwargs)) + + @retry() + @return_curl_or_response('json') + def encode_text(self,text, collection_name, **kwargs): + return requests.get( + url='https://api.vctr.ai/collection/encode_text', + params=dict( + text=text, + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + )) + + @retry() + @return_curl_or_response('json') + def bulk_encode_text(self,texts, collection_name, **kwargs): + return requests.get( + url='https://api.vctr.ai/collection/bulk_encode_text', + params=dict( + texts=texts, + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + )) + + @retry() + @return_curl_or_response('json') + def search_with_text(self, collection_name, text, search_fields, page=1, page_size=20, approx=0, sum_fields=True, metric="cosine", filters=[], facets=[], min_score=None, include_fields=[], include_vector=False, include_count=True, include_facets=False, hundred_scale=False, asc=False, **kwargs): + """Advanced Search text fields with text using Vector Search +Vector similarity search with text directly. + +For example: "product_description" represents the description of a product: + + "AirPods deliver effortless, all-day audio on the go. And AirPods Pro bring Active Noise Cancellation to an in-ear headphone — with a customisable fit" + + -> -> + + i.e. text vector, "product_description_vector_": [0.794617772102356, 0.3581121861934662, 0.21113917231559753, 0.24878688156604767, 0.9741804003715515 ...] + + -> -> + + Search Results: {...} + +Args +======== +username: Username +api_key: Api Key, you can request it from request_api_key +collection_name: Name of Collection +page: Page of the results +page_size: Size of each page of results +approx: Used for approximate search +sum_fields: Whether to sum the multiple vectors similarity search score as 1 or seperate +metric: Similarity Metric, choose from ['cosine', 'l1', 'l2', 'dp'] +filters: Query for filtering the search results +facets: Fields to include in the facets, if [] then all +min_score: Minimum score for similarity metric +include_fields: Fields to include in the search results, empty array/list means all fields. +include_vector: Include vectors in the search results +include_count: Include count in the search results +include_facets: Include facets in the search results +hundred_scale: Whether to scale up the metric by 100 +asc: Whether to sort results by ascending or descending order +text: Text to encode into vector and vector search with +search_fields: Vector fields to search against + +""" + return requests.post( + url='https://api.vctr.ai/collection/search_with_text', + json=dict( + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + page=page, + page_size=page_size, + approx=approx, + sum_fields=sum_fields, + metric=metric, + filters=filters, + facets=facets, + min_score=min_score, + include_fields=include_fields, + include_vector=include_vector, + include_count=include_count, + include_facets=include_facets, + hundred_scale=hundred_scale, + asc=asc, + text=text, + search_fields=search_fields, + **kwargs)) + + @retry() + @return_curl_or_response('json') + def encode_image(self,image_url, model_url, collection_name, **kwargs): + return requests.get( + url='https://api.vctr.ai/collection/encode_image', + params=dict( + image_url=image_url, + model_url=model_url, + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + )) + + @retry() + @return_curl_or_response('json') + def bulk_encode_image(self,image_urls, model_url, collection_name, **kwargs): + return requests.get( + url='https://api.vctr.ai/collection/bulk_encode_image', + params=dict( + image_urls=image_urls, + model_url=model_url, + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + )) + + @retry() + @return_curl_or_response('json') + def search_with_image(self, collection_name, image_url, model_url, search_fields, page=1, page_size=20, approx=0, sum_fields=True, metric="cosine", filters=[], facets=[], min_score=None, include_fields=[], include_vector=False, include_count=True, include_facets=False, hundred_scale=False, asc=False, **kwargs): + """Advanced Search an image field with image using Vector Search +Vector similarity search with an image directly. + +_note: image has to be stored somewhere and be provided as image_url, a url that stores the image_ + +For example: an image_url represents an image of a celebrity: + + "https://www.celebrity_images.com/brad_pitt.png" + + -> -> + + image vector: [0.794617772102356, 0.3581121861934662, 0.21113917231559753, 0.24878688156604767, 0.9741804003715515 ...] + + -> -> + + Search Results: {...} + +Args +======== +username: Username +api_key: Api Key, you can request it from request_api_key +collection_name: Name of Collection +page: Page of the results +page_size: Size of each page of results +approx: Used for approximate search +sum_fields: Whether to sum the multiple vectors similarity search score as 1 or seperate +metric: Similarity Metric, choose from ['cosine', 'l1', 'l2', 'dp'] +filters: Query for filtering the search results +facets: Fields to include in the facets, if [] then all +min_score: Minimum score for similarity metric +include_fields: Fields to include in the search results, empty array/list means all fields. +include_vector: Include vectors in the search results +include_count: Include count in the search results +include_facets: Include facets in the search results +hundred_scale: Whether to scale up the metric by 100 +asc: Whether to sort results by ascending or descending order +image_url: The image url of an image to encode into a vector +model_url: The model url of a deployed vectorhub model +search_fields: Vector fields to search against + +""" + return requests.post( + url='https://api.vctr.ai/collection/search_with_image', + json=dict( + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + page=page, + page_size=page_size, + approx=approx, + sum_fields=sum_fields, + metric=metric, + filters=filters, + facets=facets, + min_score=min_score, + include_fields=include_fields, + include_vector=include_vector, + include_count=include_count, + include_facets=include_facets, + hundred_scale=hundred_scale, + asc=asc, + image_url=image_url, + model_url=model_url, + search_fields=search_fields, + **kwargs)) + + @retry() + @return_curl_or_response('json') + def search_with_image_upload(self, collection_name, image, model_url, search_fields, page=1, page_size=20, approx=0, sum_fields=True, metric="cosine", filters=[], facets=[], min_score=None, include_fields=[], include_vector=False, include_count=True, include_facets=False, hundred_scale=False, asc=False, **kwargs): + """Advanced Search an image field with uploaded image using Vector Search +Vector similarity search with an uploaded image directly. + +_note: image has to be sent as a base64 encoded string_ + +For example: an image represents an image of a celebrity: + + "https://www.celebrity_images.com/brad_pitt.png" + + -> -> + + image vector: [0.794617772102356, 0.3581121861934662, 0.21113917231559753, 0.24878688156604767, 0.9741804003715515 ...] + + -> -> + + Search Results: {...} + +Args +======== +username: Username +api_key: Api Key, you can request it from request_api_key +collection_name: Name of Collection +page: Page of the results +page_size: Size of each page of results +approx: Used for approximate search +sum_fields: Whether to sum the multiple vectors similarity search score as 1 or seperate +metric: Similarity Metric, choose from ['cosine', 'l1', 'l2', 'dp'] +filters: Query for filtering the search results +facets: Fields to include in the facets, if [] then all +min_score: Minimum score for similarity metric +include_fields: Fields to include in the search results, empty array/list means all fields. +include_vector: Include vectors in the search results +include_count: Include count in the search results +include_facets: Include facets in the search results +hundred_scale: Whether to scale up the metric by 100 +asc: Whether to sort results by ascending or descending order +image: Image represented as a base64 encoded string +model_url: The model url of a deployed vectorhub model +search_fields: Vector fields to search against + +""" + return requests.post( + url='https://api.vctr.ai/collection/search_with_image_upload', + json=dict( + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + page=page, + page_size=page_size, + approx=approx, + sum_fields=sum_fields, + metric=metric, + filters=filters, + facets=facets, + min_score=min_score, + include_fields=include_fields, + include_vector=include_vector, + include_count=include_count, + include_facets=include_facets, + hundred_scale=hundred_scale, + asc=asc, + image=image, + model_url=model_url, + search_fields=search_fields, + **kwargs)) + + @retry() + @return_curl_or_response('json') + def encode_audio(self,audio_url, model_url, collection_name, **kwargs): + return requests.get( + url='https://api.vctr.ai/collection/encode_audio', + params=dict( + audio_url=audio_url, + model_url=model_url, + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + )) + + @retry() + @return_curl_or_response('json') + def bulk_encode_audio(self,audio_urls, collection_name, **kwargs): + return requests.get( + url='https://api.vctr.ai/collection/bulk_encode_audio', + params=dict( + audio_urls=audio_urls, + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + )) + + @retry() + @return_curl_or_response('json') + def search_with_audio(self, collection_name, audio_url, model_url, search_fields, page=1, page_size=20, approx=0, sum_fields=True, metric="cosine", filters=[], facets=[], min_score=None, include_fields=[], include_vector=False, include_count=True, include_facets=False, hundred_scale=False, asc=False, **kwargs): + """Advanced Search an audio field with audio using Vector Search +Vector similarity search with an audio directly. + +_note: audio has to be stored somewhere and be provided as audio_url, a url that stores the audio_ + +For example: an audio_url represents sounds that a pokemon make: + + "https://play.pokemonshowdown.com/audio/cries/pikachu.mp3" + + -> -> + + audio vector: [0.794617772102356, 0.3581121861934662, 0.21113917231559753, 0.24878688156604767, 0.9741804003715515 ...] + + -> -> + + Search Results: {...} + +Args +======== +username: Username +api_key: Api Key, you can request it from request_api_key +collection_name: Name of Collection +page: Page of the results +page_size: Size of each page of results +approx: Used for approximate search +sum_fields: Whether to sum the multiple vectors similarity search score as 1 or seperate +metric: Similarity Metric, choose from ['cosine', 'l1', 'l2', 'dp'] +filters: Query for filtering the search results +facets: Fields to include in the facets, if [] then all +min_score: Minimum score for similarity metric +include_fields: Fields to include in the search results, empty array/list means all fields. +include_vector: Include vectors in the search results +include_count: Include count in the search results +include_facets: Include facets in the search results +hundred_scale: Whether to scale up the metric by 100 +asc: Whether to sort results by ascending or descending order +audio_url: The audio url of an audio to encode into a vector +model_url: The model url of a deployed vectorhub model +search_fields: Vector fields to search against + +""" + return requests.post( + url='https://api.vctr.ai/collection/search_with_audio', + json=dict( + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + page=page, + page_size=page_size, + approx=approx, + sum_fields=sum_fields, + metric=metric, + filters=filters, + facets=facets, + min_score=min_score, + include_fields=include_fields, + include_vector=include_vector, + include_count=include_count, + include_facets=include_facets, + hundred_scale=hundred_scale, + asc=asc, + audio_url=audio_url, + model_url=model_url, + search_fields=search_fields, + **kwargs)) + + @retry() + @return_curl_or_response('json') + def search_with_audio_upload(self, collection_name, audio, model_url, search_fields, page=1, page_size=20, approx=0, sum_fields=True, metric="cosine", filters=[], facets=[], min_score=None, include_fields=[], include_vector=False, include_count=True, include_facets=False, hundred_scale=False, asc=False, **kwargs): + """Advanced Search audio fields with uploaded audio using Vector Search +Vector similarity search with an uploaded audio directly. + +_note: audio has to be sent as a base64 encoded string_ + +For example: an audio represents sounds that a pokemon make: + + "https://play.pokemonshowdown.com/audio/cries/pikachu.mp3" + + -> -> + + audio vector: [0.794617772102356, 0.3581121861934662, 0.21113917231559753, 0.24878688156604767, 0.9741804003715515 ...] + -> -> + + Search Results: {...} + +Args +======== +username: Username +api_key: Api Key, you can request it from request_api_key +collection_name: Name of Collection +page: Page of the results +page_size: Size of each page of results +approx: Used for approximate search +sum_fields: Whether to sum the multiple vectors similarity search score as 1 or seperate +metric: Similarity Metric, choose from ['cosine', 'l1', 'l2', 'dp'] +filters: Query for filtering the search results +facets: Fields to include in the facets, if [] then all +min_score: Minimum score for similarity metric +include_fields: Fields to include in the search results, empty array/list means all fields. +include_vector: Include vectors in the search results +include_count: Include count in the search results +include_facets: Include facets in the search results +hundred_scale: Whether to scale up the metric by 100 +asc: Whether to sort results by ascending or descending order +audio: Audio represented as a base64 encoded string +model_url: The model url of a deployed vectorhub model +search_fields: Vector fields to search against + +""" + return requests.post( + url='https://api.vctr.ai/collection/search_with_audio_upload', + json=dict( + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + page=page, + page_size=page_size, + approx=approx, + sum_fields=sum_fields, + metric=metric, + filters=filters, + facets=facets, + min_score=min_score, + include_fields=include_fields, + include_vector=include_vector, + include_count=include_count, + include_facets=include_facets, + hundred_scale=hundred_scale, + asc=asc, + audio=audio, + model_url=model_url, + search_fields=search_fields, + **kwargs)) + + @retry() + @return_curl_or_response('json') + def encode_fields_to_vector(self, collection_name, vector_name, selected_fields, **kwargs): + """Encode all selected fields for a collection into vectors +Within a collection encode the specified fields in every document into vectors. + +For example: we choose the fields ["height", "age", "weight"] + document 1 field: {"height":180, "age":40, "weight":70, "purchases":20, "visits": 12} + + document 2 field: {"height":160, "age":32, "weight":50, "purchases":10, "visits": 24} + + -> -> + +| height | age | weight | +|--------|-----|--------| +| 180 | 40 | 70 | +| 160 | 32 | 50 | + + document 1 vector: {"person_characteristics_vector_": [180, 40, 70]} + + document 2 vector: {"person_characteristics_vector_": [160, 32, 50]} + +Args +======== +username: Username +api_key: Api Key, you can request it from request_api_key +collection_name: Name of Collection +vector_name: The name of the vector that the fields turn into +selected_fields: The fields to turn into vectors + +""" + return requests.post( + url='https://api.vctr.ai/collection/encode_fields_to_vector', + json=dict( + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + vector_name=vector_name, + selected_fields=selected_fields, + **kwargs)) + + @retry() + @return_curl_or_response('json') + def encode_fields(self, collection_name, document, vector_name, **kwargs): + """Encode fields into a vector +For example: we choose the fields ["height", "age", "weight"] + document field: {"height":180, "age":40, "weight":70, "purchases":20, "visits": 12} + + -> -> + +| height | age | weight | +|--------|-----|--------| +| 180 | 40 | 70 | + + document vector: {"person_characteristics_vector_": [180, 40, 70]} + + +Args +======== +username: Username +api_key: Api Key, you can request it from request_api_key +collection_name: Name of Collection +document: A document to encode into vectors +vector_name: The name of the vector that the fields turn into + +""" + return requests.post( + url='https://api.vctr.ai/collection/encode_fields', + json=dict( + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + document=document, + vector_name=vector_name, + **kwargs)) + + @retry() + @return_curl_or_response('json') + def search_with_fields(self, collection_name, search_fields, document, selected_fields, vector_name, page_size=20, page=1, approx=0, sum_fields=True, metric="cosine", min_score=None, include_fields=[], include_vector=False, include_count=True, hundred_scale=False, asc=False, **kwargs): + """Search with fields with a document using Vector Search +Vector similarity search with fields directly. + +For example: we choose the fields ["height", "age", "weight"] + document field: {"height":180, "age":40, "weight":70, "purchases":20, "visits": 12} + + -> -> + +| height | age | weight | +|--------|-----|--------| +| 180 | 40 | 70 | + + document dictionary vector: {"person_characteristics_vector_": [180, 40, 70]} + + -> -> + + Search Results: {...} + +Args +======== +username: Username +api_key: Api Key, you can request it from request_api_key +collection_name: Name of Collection +search_fields: Vector fields to search against +page_size: Size of each page of results +page: Page of the results +approx: Used for approximate search +sum_fields: Whether to sum the multiple vectors similarity search score as 1 or seperate +metric: Similarity Metric, choose from ['cosine', 'l1', 'l2', 'dp'] +min_score: Minimum score for similarity metric +include_fields: Fields to include in the search results, empty array/list means all fields. +include_vector: Include vectors in the search results +include_count: Include count in the search results +hundred_scale: Whether to scale up the metric by 100 +asc: Whether to sort results by ascending or descending order +document: A document to encode into vectors +selected_fields: The fields to turn into vectors +vector_name: A name to call the vector that the fields turn into + +""" + return requests.post( + url='https://api.vctr.ai/collection/search_with_fields', + json=dict( + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + search_fields=search_fields, + page_size=page_size, + page=page, + approx=approx, + sum_fields=sum_fields, + metric=metric, + min_score=min_score, + include_fields=include_fields, + include_vector=include_vector, + include_count=include_count, + hundred_scale=hundred_scale, + asc=asc, + document=document, + selected_fields=selected_fields, + vector_name=vector_name, + **kwargs)) + + @retry() + @return_curl_or_response('json') + def combine_vectors(self,vector_fields, vector_name, collection_name, **kwargs): + return requests.get( + url='https://api.vctr.ai/collection/combine_vectors', + params=dict( + vector_fields=vector_fields, + vector_name=vector_name, + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + )) + + @retry() + @return_curl_or_response('json') + def collection_vector_mappings(self,collection_name, **kwargs): + return requests.get( + url='https://api.vctr.ai/collection/collection_vector_mappings', + params=dict( + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + )) + + @retry() + @return_curl_or_response('json') + def insert_and_encode(self, collection_name, models, document={}, insert_date=True, overwrite=True, **kwargs): + """Insert a document into a Collection and encode it as well +When inserting the document you can specify your own id for a document by using the field name **"\_id"**. +For specifying your own vector use the suffix (ends with) **"\_vector\_"** for the field name. +e.g. "product\_description\_vector\_" +This method will also encode the specified field with models on the server side + +Args +======== +username: Username +api_key: Api Key, you can request it from request_api_key +collection_name: Name of Collection +document: A Document is a JSON-like data that we store our metadata and vectors with. For specifying id of the document use the field '\_id', for specifying vector field use the suffix of '\_vector\_' +insert_date: Whether to include insert date as a field 'insert_date_'. +overwrite: Whether to overwrite document if it exists. +models: Field and model to encode it with. e.g.{'image_url':'image', 'audio_url':'audio', 'name':'text'} + +""" + return requests.post( + url='https://api.vctr.ai/collection/insert_and_encode', + json=dict( + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + document=document, + insert_date=insert_date, + overwrite=overwrite, + models=models, + **kwargs)) + + @retry() + @return_curl_or_response('json') + def bulk_insert_and_encode(self, collection_name, models, documents={}, insert_date=True, overwrite=True, **kwargs): + """Insert multiple documents into a Collection and encode it as well +When inserting the document you can specify your own id for a document by using the field name **"\_id"**. +For specifying your own vector use the suffix (ends with) **"\_vector\_"** for the field name. +e.g. "product\_description\_vector\_" +This method will also encode the specified field with models on the server side + +Args +======== +username: Username +api_key: Api Key, you can request it from request_api_key +collection_name: Name of Collection +documents: A list of documents. Document is a JSON-like data that we store our metadata and vectors with. For specifying id of the document use the field '\_id', for specifying vector field use the suffix of '\_vector\_' +insert_date: Whether to include insert date as a field 'insert_date_'. +overwrite: Whether to overwrite document if it exists. +models: Field and model to encode it with. e.g.{'image_url':'image', 'audio_url':'audio', 'name':'text'} + +""" + return requests.post( + url='https://api.vctr.ai/collection/bulk_insert_and_encode', + json=dict( + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + documents=documents, + insert_date=insert_date, + overwrite=overwrite, + models=models, + **kwargs)) + + @retry() + @return_curl_or_response('json') + def job_status(self,job_id, job_name, collection_name, **kwargs): + return requests.get( + url='https://api.vctr.ai/collection/jobs/job_status', + params=dict( + job_id=job_id, + job_name=job_name, + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + )) + + @retry() + @return_curl_or_response('json') + def list_jobs(self,collection_name, show_active_only=True, **kwargs): + return requests.get( + url='https://api.vctr.ai/collection/jobs/list_jobs', + params=dict( + show_active_only=show_active_only, + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + )) + + @retry() + @return_curl_or_response('json') + def cluster(self,vector_field, collection_name, n_clusters=0, refresh=True, **kwargs): + return requests.get( + url='https://api.vctr.ai/collection/jobs/cluster', + params=dict( + vector_field=vector_field, + n_clusters=n_clusters, + refresh=refresh, + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + )) + + @retry() + @return_curl_or_response('json') + def advanced_cluster(self,vector_field, collection_name, alias="default", n_clusters=0, n_iter=10, n_init=5, refresh=True, **kwargs): + return requests.get( + url='https://api.vctr.ai/collection/jobs/advanced_cluster', + params=dict( + vector_field=vector_field, + alias=alias, + n_clusters=n_clusters, + n_iter=n_iter, + n_init=n_init, + refresh=refresh, + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + )) + + @retry() + @return_curl_or_response('json') + def dimensionality_reduction(self,vector_field, collection_name, alias="default", n_components=0, refresh=True, **kwargs): + return requests.get( + url='https://api.vctr.ai/collection/jobs/dimensionality_reduction', + params=dict( + vector_field=vector_field, + alias=alias, + n_components=n_components, + refresh=refresh, + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + )) + + @retry() + @return_curl_or_response('json') + def encode_text_field(self,text_field, collection_name, refresh=True, **kwargs): + return requests.get( + url='https://api.vctr.ai/collection/jobs/encode_text_field', + params=dict( + text_field=text_field, + refresh=refresh, + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + )) + + @retry() + @return_curl_or_response('json') + def encode_audio_field(self,audio_field, collection_name, refresh=True, **kwargs): + return requests.get( + url='https://api.vctr.ai/collection/jobs/encode_audio_field', + params=dict( + audio_field=audio_field, + refresh=refresh, + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + )) + + @retry() + @return_curl_or_response('json') + def encode_image_field(self,image_field, collection_name, refresh=True, **kwargs): + return requests.get( + url='https://api.vctr.ai/collection/jobs/encode_image_field', + params=dict( + image_field=image_field, + refresh=refresh, + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + )) + diff --git a/vectorai/api/utils.py b/vectorai/api/utils.py index 62e44965..72b71be4 100644 --- a/vectorai/api/utils.py +++ b/vectorai/api/utils.py @@ -71,6 +71,22 @@ def _return_curl(response): headers = " -H ".join(headers) return command.format(method=method, headers=headers, data=data, uri=uri).replace('-H "Accept-Encoding: gzip, deflate"', '') -def return_curl_or_response(response, return_type='json', return_curl=False): +def _return_curl_or_response(response, return_type='json', return_curl=False): if return_curl: return _return_curl(response) return return_response(response, return_type=return_type) + +def return_curl_or_response(return_type): + """ + Return a curl or response once the request is received + Args: + num_of_retries: The number of times the function should retry + timeout: The number of seconds to wait between each retry + """ + RETURN_CURL = bool(os.getenv("VI_RETURN_CURL")) + def _return_api_call(func): + @wraps(func) + def function_wrapper(*args, **kwargs): + return _return_curl_or_response(func(*args, **kwargs), + return_type=return_type, return_curl=RETURN_CURL) + return function_wrapper + return _return_api_call diff --git a/vectorai/client.py b/vectorai/client.py index 5ed567cd..0b36dbb6 100644 --- a/vectorai/client.py +++ b/vectorai/client.py @@ -3,6 +3,7 @@ import requests import pandas as pd import os +import warnings from .api.utils import return_curl_or_response from .write import ViWriteClient from .analytics.client import ViAnalyticsClient @@ -107,6 +108,8 @@ class ViCollectionClient(ViClient): >>> vi_client.insert_documents(documents) """ def __init__(self, collection_name: str, username: str, api_key: str, url: str="https://api.vctr.ai", verbose: bool=True) -> None: + warnings.warn("ViCollectionClient is no longer supported and will be deprecated in the near future." + \ + "Stick to using ViClient in the future.") if username is None: if 'VI_USERNAME' not in os.environ.keys(): raise APIError("Specify username of set VI_USERNAME as an environment variable.") diff --git a/vectorai/read.py b/vectorai/read.py index fed82a04..c7b49704 100644 --- a/vectorai/read.py +++ b/vectorai/read.py @@ -8,11 +8,11 @@ import time import warnings from typing import List, Dict, Union, Any -from .api.read import ViReadAPIClient +from .api import ViAPIClient from .utils import UtilsMixin from .doc_utils import DocUtilsMixin from .errors import MissingFieldWarning -class ViReadClient(ViReadAPIClient, UtilsMixin, DocUtilsMixin): +class ViReadClient(ViAPIClient, UtilsMixin, DocUtilsMixin): def __init__(self, username: str, api_key: str, url: str="https://api.vctr.ai"): self.username = username self.api_key = api_key @@ -421,7 +421,7 @@ def search_collections(self, keyword: str) -> List[str]: def random_recommendation(self, collection_name: str, - field: str, + search_field: str, seed=None, sum_fields: bool = True, metric: str = "cosine", @@ -464,7 +464,7 @@ def random_recommendation(self, """ random_id = self.random_documents(collection_name, page_size=1, seed=seed, include_fields=['_id'])['documents'][0]['_id'] - return self.search_by_id(collection_name, document_id=random_id, field=field, + return self.search_by_id(collection_name=collection_name, document_id=random_id, search_field=search_field, approx=approx, sum_fields=sum_fields, page_size=page_size, page=page, metric=metric, min_score=min_score, include_vector=include_vector, include_count=include_count, hundred_scale=hundred_scale, asc=asc, **kwargs) diff --git a/vectorai/utils.py b/vectorai/utils.py index cdb444b4..fe00bc73 100644 --- a/vectorai/utils.py +++ b/vectorai/utils.py @@ -464,7 +464,10 @@ def wrapper(*args, **kw): # new_func = partial(func, collection_name=collection_name) # print(args) # res = new_func(*args, **kw) - res = func(args[0], collection_name, *args[1:], **kw) + # try: + res = func(args[0], collection_name=collection_name, *args[1:], **kw) + # except TypeError: + # res = func(args[0], collection_name=collection_name, *args[1:], **kw) # res = func(args[0], collection_name, *args[1:], **kw) # res = func(args[0], kw['collection_name'], *args[1:]) # except: diff --git a/vectorai/write.py b/vectorai/write.py index a43b5910..085b8015 100644 --- a/vectorai/write.py +++ b/vectorai/write.py @@ -16,12 +16,11 @@ from functools import partial from multiprocessing import Pool from .utils import UtilsMixin -from .errors import APIError, MissingFieldError, MissingFieldWarning -from .read import ViReadClient -from .api.write import ViWriteAPIClient +from .errors import APIError, MissingFieldError, MissingFieldWarning, CollectionNameError +from .api import ViAPIClient -class ViWriteClient(ViReadClient, ViWriteAPIClient, UtilsMixin): +class ViWriteClient(ViAPIClient, UtilsMixin): """Class to write to database.""" def __init__(self, username, api_key, url="https://api.vctr.ai" ): @@ -744,3 +743,29 @@ def retrieve_and_encode( pbar.update(1) return failed_all + def _typecheck_collection_name(self, collection_name: str): + """ + Typecheck collection name + """ + ACCEPTABLE_LETTERS = 'abcdefghijklmnopqrstuvwxyz_.' + for letter in collection_name: + if letter not in ACCEPTABLE_LETTERS: + raise CollectionNameError("Collection names must be lower case A-Z and less than 240 characters") + if len(collection_name) > 240: + raise CollectionNameError("Collection names must be lower case A-Z and less than 240 characters") + + def create_collection_from_document(self, collection_name: str, document: dict, **kwargs): + """ +Creates a collection by infering the schema from a document + +If you are inserting your own vector use the suffix (ends with) **"\_vector\_"** for the field name. e.g. "product\_description\_vector\_" + +Args: + collection_name: + Name of Collection + document: + A Document is a JSON-like data that we store our metadata and vectors with. For specifying id of the document use the field '\_id', for specifying vector field use the suffix of '\_vector\_' +""" + self._typecheck_collection_name(collection_name) + return self._create_collection_from_document( + collection_name=collection_name, document=document)