From 281309e604a323652a21a3604c6174bc5cdc5e61 Mon Sep 17 00:00:00 2001 From: azevaykin Date: Tue, 2 Jul 2024 13:49:01 +0000 Subject: [PATCH 01/21] Create vector index in SchemeShard --- ydb/core/base/table_index.cpp | 37 +++++++++--- ydb/core/base/table_index.h | 7 ++- ydb/core/base/table_vector_index.h | 14 +++++ ydb/core/kqp/host/kqp_gateway_proxy.cpp | 13 +---- ydb/core/kqp/provider/yql_kikimr_gateway.cpp | 1 + ydb/core/kqp/provider/yql_kikimr_gateway.h | 36 +++++++++--- ydb/core/protos/flat_scheme_op.proto | 19 ++++++ ydb/core/tx/datashard/datashard_user_table.h | 13 ++++- ydb/core/tx/scheme_board/cache.cpp | 3 + ydb/core/tx/scheme_cache/scheme_cache.h | 3 +- ...emeshard__operation_create_build_index.cpp | 2 +- ...eshard__operation_create_indexed_table.cpp | 2 +- .../schemeshard_build_index__create.cpp | 3 + .../schemeshard_build_index_tx_base.cpp | 3 + .../tx/schemeshard/schemeshard_info_types.h | 8 +++ .../schemeshard_path_describer.cpp | 27 +++++++-- ydb/core/tx/schemeshard/schemeshard_utils.cpp | 49 ++++++++++++++++ ydb/core/tx/schemeshard/schemeshard_utils.h | 49 ++++++++++++---- .../tx/schemeshard/ut_helpers/ls_checks.cpp | 13 +++++ .../tx/schemeshard/ut_helpers/ls_checks.h | 5 ++ .../schemeshard/ut_index/ut_vector_index.cpp | 58 +++++++++++++++++++ ydb/core/tx/schemeshard/ut_index/ya.make | 1 + ydb/core/ydb_convert/table_description.cpp | 7 +++ ydb/core/ydb_convert/table_settings.cpp | 6 ++ ydb/public/api/protos/out/out.cpp | 17 ++++++ ydb/public/api/protos/ydb_table.proto | 39 +++++++++++++ ydb/public/sdk/cpp/client/ydb_table/table.cpp | 3 + .../sdk/cpp/client/ydb_table/table_enum.h | 1 + 28 files changed, 392 insertions(+), 47 deletions(-) create mode 100644 ydb/core/base/table_vector_index.h create mode 100644 ydb/core/tx/schemeshard/ut_index/ut_vector_index.cpp diff --git a/ydb/core/base/table_index.cpp b/ydb/core/base/table_index.cpp index fa63ed0f90c7..d1266e4265e3 100644 --- a/ydb/core/base/table_index.cpp +++ b/ydb/core/base/table_index.cpp @@ -16,14 +16,24 @@ TVector::const_iterator IsUniq(const TVector& names) { namespace NKikimr { namespace NTableIndex { -TTableColumns CalcTableImplDescription(const TTableColumns& table, const TIndexColumns& index) { +TTableColumns CalcTableImplDescription(const NKikimrSchemeOp::EIndexType indexType, const TTableColumns& table, const TIndexColumns& index) { { TString explain; - Y_ABORT_UNLESS(IsCompatibleIndex(table, index, explain), "explain is %s", explain.c_str()); + Y_ABORT_UNLESS(IsCompatibleIndex(indexType, table, index, explain), "explain is %s", explain.c_str()); } TTableColumns result; + if (indexType == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVector) { + result.Keys.push_back(NTableVectorIndex::LevelColumn); + result.Keys.push_back(NTableVectorIndex::IdColumn); + result.Columns.insert(NTableVectorIndex::LevelColumn); + result.Columns.insert(NTableVectorIndex::IdColumn); + result.Columns.insert(NTableVectorIndex::CentroidColumn); + result.Columns.insert(NTableVectorIndex::IdsColumn); + return result; + } + for (const auto& ik: index.KeyColumns) { result.Keys.push_back(ik); result.Columns.insert(ik); @@ -43,7 +53,7 @@ TTableColumns CalcTableImplDescription(const TTableColumns& table, const TIndexC return result; } -bool IsCompatibleIndex(const TTableColumns& table, const TIndexColumns& index, TString& explain) { +bool IsCompatibleIndex(const NKikimrSchemeOp::EIndexType indexType, const TTableColumns& table, const TIndexColumns& index, TString& explain) { { auto brokenAt = IsUniq(table.Keys); if (brokenAt != table.Keys.end()) { @@ -71,6 +81,17 @@ bool IsCompatibleIndex(const TTableColumns& table, const TIndexColumns& index, T } } + if (indexType == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVector) { + if (index.KeyColumns.size() != 1) { + explain = "Only single key column is supported for vector index"; + return false; + } + if (index.DataColumns.size() != 0) { + explain = "Data columns are not supported for vector index"; + return false; + } + } + THashSet indexKeys; for (const auto& tableKeyName: table.Keys) { @@ -93,10 +114,12 @@ bool IsCompatibleIndex(const TTableColumns& table, const TIndexColumns& index, T } } - if (index.KeyColumns == table.Keys) { - explain = TStringBuilder() - << "table and index keys are the same"; - return false; + if (indexType != NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVector) { + if (index.KeyColumns == table.Keys) { + explain = TStringBuilder() + << "table and index keys are the same"; + return false; + } } for (const auto& dataName: index.DataColumns) { diff --git a/ydb/core/base/table_index.h b/ydb/core/base/table_index.h index ea1328cdb614..7d7af7b915d0 100644 --- a/ydb/core/base/table_index.h +++ b/ydb/core/base/table_index.h @@ -1,5 +1,8 @@ #pragma once +#include "table_vector_index.h" +#include + #include #include #include @@ -18,8 +21,8 @@ struct TIndexColumns { TVector DataColumns; }; -bool IsCompatibleIndex(const TTableColumns& table, const TIndexColumns& index, TString& explain); -TTableColumns CalcTableImplDescription(const TTableColumns& table, const TIndexColumns &index); +bool IsCompatibleIndex(const NKikimrSchemeOp::EIndexType indexType, const TTableColumns& table, const TIndexColumns& index, TString& explain); +TTableColumns CalcTableImplDescription(const NKikimrSchemeOp::EIndexType indexType, const TTableColumns& table, const TIndexColumns& index); } } diff --git a/ydb/core/base/table_vector_index.h b/ydb/core/base/table_vector_index.h new file mode 100644 index 000000000000..fde27891f613 --- /dev/null +++ b/ydb/core/base/table_vector_index.h @@ -0,0 +1,14 @@ +#pragma once + +namespace NKikimr { +namespace NTableIndex { +namespace NTableVectorIndex { + +// Special columns for vector index +inline constexpr char LevelColumn[] = "level"; +inline constexpr char IdColumn[] = "id"; +inline constexpr char CentroidColumn[] = "centroid"; +inline constexpr char IdsColumn[] = "ids"; +} +} +} diff --git a/ydb/core/kqp/host/kqp_gateway_proxy.cpp b/ydb/core/kqp/host/kqp_gateway_proxy.cpp index 3c717ee29c7c..61b587e96c10 100644 --- a/ydb/core/kqp/host/kqp_gateway_proxy.cpp +++ b/ydb/core/kqp/host/kqp_gateway_proxy.cpp @@ -572,18 +572,7 @@ class TKqpGatewayProxy : public IKikimrGateway { for (const auto& index : metadata->Indexes) { auto indexDesc = schemeTx.MutableCreateIndexedTable()->AddIndexDescription(); indexDesc->SetName(index.Name); - switch (index.Type) { - case NYql::TIndexDescription::EType::GlobalSync: - indexDesc->SetType(NKikimrSchemeOp::EIndexType::EIndexTypeGlobal); - break; - case NYql::TIndexDescription::EType::GlobalAsync: - indexDesc->SetType(NKikimrSchemeOp::EIndexType::EIndexTypeGlobalAsync); - break; - case NYql::TIndexDescription::EType::GlobalSyncUnique: - indexDesc->SetType(NKikimrSchemeOp::EIndexType::EIndexTypeGlobalUnique); - break; - } - + indexDesc->SetType(TIndexDescription::ConvertIndexType(index.Type)); indexDesc->SetState(static_cast<::NKikimrSchemeOp::EIndexState>(index.State)); for (const auto& col : index.KeyColumns) { indexDesc->AddKeyColumnNames(col); diff --git a/ydb/core/kqp/provider/yql_kikimr_gateway.cpp b/ydb/core/kqp/provider/yql_kikimr_gateway.cpp index c0bf3edf6d61..301e7352db2e 100644 --- a/ydb/core/kqp/provider/yql_kikimr_gateway.cpp +++ b/ydb/core/kqp/provider/yql_kikimr_gateway.cpp @@ -116,6 +116,7 @@ void IKikimrGateway::BuildIndexMetadata(TTableMetadataResult& loadTableMetadataR const auto& index = tableMetadata->Indexes[i]; auto indexTablePath = NKikimr::NKqp::NSchemeHelpers::CreateIndexTablePath(tableName, index.Name); NKikimr::NTableIndex::TTableColumns indexTableColumns = NKikimr::NTableIndex::CalcTableImplDescription( + TIndexDescription::ConvertIndexType(index.Type), tableColumns, NKikimr::NTableIndex::TIndexColumns{index.KeyColumns, {}}); diff --git a/ydb/core/kqp/provider/yql_kikimr_gateway.h b/ydb/core/kqp/provider/yql_kikimr_gateway.h index a5c666a23ab1..4d3f2c698de8 100644 --- a/ydb/core/kqp/provider/yql_kikimr_gateway.h +++ b/ydb/core/kqp/provider/yql_kikimr_gateway.h @@ -64,6 +64,7 @@ struct TIndexDescription { GlobalSync = 0, GlobalAsync = 1, GlobalSyncUnique = 2, + Vector = 3 }; // Index states here must be in sync with NKikimrSchemeOp::EIndexState protobuf @@ -99,7 +100,7 @@ struct TIndexDescription { : Name(index.GetName()) , KeyColumns(index.GetKeyColumnNames().begin(), index.GetKeyColumnNames().end()) , DataColumns(index.GetDataColumnNames().begin(), index.GetDataColumnNames().end()) - , Type(ConvertIndexType(index)) + , Type(ConvertIndexType(index.GetType())) , State(static_cast(index.GetState())) , SchemaVersion(index.GetSchemaVersion()) , LocalPathId(index.GetLocalPathId()) @@ -117,15 +118,32 @@ struct TIndexDescription { , PathOwnerId(message->GetPathOwnerId()) {} - static TIndexDescription::EType ConvertIndexType(const NKikimrSchemeOp::TIndexDescription& index) { - auto type = NYql::TIndexDescription::EType::GlobalSync; - if (index.GetType() == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalAsync) { - type = NYql::TIndexDescription::EType::GlobalAsync; - } else if (index.GetType() == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalUnique) { - type = NYql::TIndexDescription::EType::GlobalSyncUnique; + static TIndexDescription::EType ConvertIndexType(const NKikimrSchemeOp::EIndexType indexType) { + switch (indexType) { + case NKikimrSchemeOp::EIndexType::EIndexTypeGlobal: + return TIndexDescription::EType::GlobalSync; + case NKikimrSchemeOp::EIndexType::EIndexTypeGlobalAsync: + return TIndexDescription::EType::GlobalAsync; + case NKikimrSchemeOp::EIndexType::EIndexTypeGlobalUnique: + return TIndexDescription::EType::GlobalSyncUnique; + case NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVector: + return TIndexDescription::EType::Vector; + case NKikimrSchemeOp::EIndexType::EIndexTypeInvalid: + YQL_ENSURE(false, "Unexpected NKikimrSchemeOp::EIndexType::EIndexTypeInvalid"); } + } - return type; + static NKikimrSchemeOp::EIndexType ConvertIndexType(const TIndexDescription::EType indexType) { + switch (indexType) { + case TIndexDescription::EType::GlobalSync: + return NKikimrSchemeOp::EIndexType::EIndexTypeGlobal; + case TIndexDescription::EType::GlobalAsync: + return NKikimrSchemeOp::EIndexType::EIndexTypeGlobalAsync; + case TIndexDescription::EType::GlobalSyncUnique: + return NKikimrSchemeOp::EIndexType::EIndexTypeGlobalUnique; + case NYql::TIndexDescription::EType::Vector: + return NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVector; + } } void ToMessage(NKikimrKqp::TIndexDescriptionProto* message) const { @@ -160,6 +178,8 @@ struct TIndexDescription { return true; case EType::GlobalAsync: return false; + case EType::Vector: + return false; } } }; diff --git a/ydb/core/protos/flat_scheme_op.proto b/ydb/core/protos/flat_scheme_op.proto index aa66c44d3ea0..74511b888e77 100644 --- a/ydb/core/protos/flat_scheme_op.proto +++ b/ydb/core/protos/flat_scheme_op.proto @@ -13,6 +13,7 @@ import "ydb/core/protos/follower_group.proto"; import "ydb/core/protos/blob_depot_config.proto"; import "ydb/public/api/protos/ydb_coordination.proto"; import "ydb/public/api/protos/ydb_export.proto"; +import "ydb/public/api/protos/ydb_table.proto"; import "ydb/public/api/protos/ydb_value.proto"; import "ydb/library/actors/protos/actors.proto"; import "ydb/library/mkql_proto/protos/minikql.proto"; @@ -973,6 +974,7 @@ enum EIndexType { EIndexTypeGlobal = 1; EIndexTypeGlobalAsync = 2; EIndexTypeGlobalUnique = 3; + EIndexTypeGlobalVector = 4; } enum EIndexState { @@ -982,6 +984,15 @@ enum EIndexState { EIndexStateWriteOnly = 3; } +message TVectorIndexDescription { + optional Ydb.Table.GlobalVectorIndex.IndexType IndexType = 1; + oneof DistanceSimilarity { + Ydb.Table.GlobalVectorIndex.Distance Distance = 2; + Ydb.Table.GlobalVectorIndex.Similarity Similarity = 3; + } + optional Ydb.Table.GlobalVectorIndex.VectorType VectorType = 4; +} + message TIndexDescription { optional string Name = 1; optional uint64 LocalPathId = 2; @@ -1000,6 +1011,10 @@ message TIndexDescription { // DataSize + IndexSize of indexImplTable optional uint64 DataSize = 9; repeated TTableDescription IndexImplTableDescriptions = 10; + + oneof SpecializedIndexDescription { + TVectorIndexDescription VectorIndexDescription = 11; + } } message TIndexCreationConfig { @@ -1009,6 +1024,9 @@ message TIndexCreationConfig { repeated TTableDescription IndexImplTableDescriptions = 4; //description for index impl tables optional EIndexState State = 5; //state of index at the creation time repeated string DataColumnNames = 6; //columns to be denormalized to read data just from index + oneof SpecializedIndexDescription { + TVectorIndexDescription VectorIndexDescription = 7; + } } message TIndexAlteringConfig { @@ -1849,6 +1867,7 @@ enum EPathSubType { EPathSubTypeSyncIndexImplTable = 1; EPathSubTypeAsyncIndexImplTable = 2; EPathSubTypeStreamImpl = 3; + EPathSubTypeVectorIndexImplTable = 4; } enum EPathState { diff --git a/ydb/core/tx/datashard/datashard_user_table.h b/ydb/core/tx/datashard/datashard_user_table.h index af7c9ab2a6d8..0aca764d01e1 100644 --- a/ydb/core/tx/datashard/datashard_user_table.h +++ b/ydb/core/tx/datashard/datashard_user_table.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -273,10 +274,20 @@ struct TUserTable : public TThrRefBase { nameToId.emplace(column.Name, id); } - auto fillColumnIds = [&nameToId](const auto& columnNames, TVector& columnIds) { + auto fillColumnIds = [&nameToId, type = Type](const auto& columnNames, TVector& columnIds) { columnIds.reserve(columnNames.size()); for (const auto& columnName : columnNames) { auto it = nameToId.find(columnName); + + if (it == nameToId.end() && type == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVector) { + Y_ABORT_UNLESS(columnName == NTableIndex::NTableVectorIndex::IdColumn || + columnName == NTableIndex::NTableVectorIndex::LevelColumn || + columnName == NTableIndex::NTableVectorIndex::CentroidColumn || + columnName == NTableIndex::NTableVectorIndex::IdsColumn + ); + continue; + } + Y_ABORT_UNLESS(it != nameToId.end()); columnIds.push_back(it->second); } diff --git a/ydb/core/tx/scheme_board/cache.cpp b/ydb/core/tx/scheme_board/cache.cpp index 07062b4ce553..01fb1a054f87 100644 --- a/ydb/core/tx/scheme_board/cache.cpp +++ b/ydb/core/tx/scheme_board/cache.cpp @@ -876,6 +876,8 @@ class TSchemeCache: public TMonitorableActor { return TResolve::KindSyncIndexTable; case NKikimrSchemeOp::EPathSubTypeAsyncIndexImplTable: return TResolve::KindAsyncIndexTable; + case NKikimrSchemeOp::EPathSubTypeVectorIndexImplTable: + return TResolve::KindVectorIndexTable; default: return TResolve::KindRegularTable; } @@ -887,6 +889,7 @@ class TSchemeCache: public TMonitorableActor { switch (subType) { case NKikimrSchemeOp::EPathSubTypeSyncIndexImplTable: case NKikimrSchemeOp::EPathSubTypeAsyncIndexImplTable: + case NKikimrSchemeOp::EPathSubTypeVectorIndexImplTable: return true; default: return false; diff --git a/ydb/core/tx/scheme_cache/scheme_cache.h b/ydb/core/tx/scheme_cache/scheme_cache.h index b81736613daa..7d459f4bd3c4 100644 --- a/ydb/core/tx/scheme_cache/scheme_cache.h +++ b/ydb/core/tx/scheme_cache/scheme_cache.h @@ -359,7 +359,8 @@ struct TSchemeCacheRequest { KindUnknown = 0, KindRegularTable = 1, KindSyncIndexTable = 2, - KindAsyncIndexTable= 3, + KindAsyncIndexTable = 3, + KindVectorIndexTable = 4 }; struct TEntry { diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_create_build_index.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_create_build_index.cpp index ade4b00ae014..b04e166ba3fe 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_create_build_index.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_create_build_index.cpp @@ -117,7 +117,7 @@ TVector CreateBuildIndex(TOperationId opId, const TTxTransa // This description provided by user to override partition policy const auto& userIndexDesc = indexDesc.GetIndexImplTableDescriptions(0); - indexImplTableDescription = CalcImplTableDesc(tableInfo, implTableColumns, userIndexDesc); + indexImplTableDescription = CalcImplTableDesc(indexDesc.GetType(), tableInfo, implTableColumns, userIndexDesc); indexImplTableDescription.MutablePartitionConfig()->MutableCompactionPolicy()->SetKeepEraseMarkers(true); indexImplTableDescription.MutablePartitionConfig()->SetShadowData(true); diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_create_indexed_table.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_create_indexed_table.cpp index e0eb084f8f65..6e58632ee3cb 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_create_indexed_table.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_create_indexed_table.cpp @@ -254,7 +254,7 @@ TVector CreateIndexedTable(TOperationId nextId, const TTxTr // This description provided by user to override partition policy userIndexDesc = indexDescription.GetIndexImplTableDescriptions(0); } - indexImplTableDescription = CalcImplTableDesc(baseTableDescription, implTableColumns, userIndexDesc); + indexImplTableDescription = CalcImplTableDesc(indexDescription.GetType(), baseTableDescription, implTableColumns, userIndexDesc); result.push_back(CreateNewTable(NextPartId(nextId, result), scheme)); } diff --git a/ydb/core/tx/schemeshard/schemeshard_build_index__create.cpp b/ydb/core/tx/schemeshard/schemeshard_build_index__create.cpp index 1eef982c605c..0986cca309f9 100644 --- a/ydb/core/tx/schemeshard/schemeshard_build_index__create.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_build_index__create.cpp @@ -226,6 +226,9 @@ class TSchemeShard::TIndexBuilder::TTxCreate: public TSchemeShard::TIndexBuilder case Ydb::Table::TableIndex::TypeCase::kGlobalUniqueIndex: explain = "unsupported index type to build"; return false; + case Ydb::Table::TableIndex::TypeCase::kGlobalVectorIndex: + explain = "unsupported vector index type to build"; + return false; case Ydb::Table::TableIndex::TypeCase::TYPE_NOT_SET: explain = "invalid or unset index type"; return false; diff --git a/ydb/core/tx/schemeshard/schemeshard_build_index_tx_base.cpp b/ydb/core/tx/schemeshard/schemeshard_build_index_tx_base.cpp index fe94174526d7..fd14a7ad7f53 100644 --- a/ydb/core/tx/schemeshard/schemeshard_build_index_tx_base.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_build_index_tx_base.cpp @@ -274,6 +274,9 @@ void TSchemeShard::TIndexBuilder::TTxBase::Fill(NKikimrIndexBuilder::TIndexBuild case NKikimrSchemeOp::EIndexType::EIndexTypeGlobalAsync: *index.mutable_global_async_index() = Ydb::Table::GlobalAsyncIndex(); break; + case NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVector: + //TODO build vector index + break; default: Y_ABORT("Unreachable"); } diff --git a/ydb/core/tx/schemeshard/schemeshard_info_types.h b/ydb/core/tx/schemeshard/schemeshard_info_types.h index b5297bf20037..8000d2d3359f 100644 --- a/ydb/core/tx/schemeshard/schemeshard_info_types.h +++ b/ydb/core/tx/schemeshard/schemeshard_info_types.h @@ -2365,8 +2365,14 @@ struct TTableIndexInfo : public TSimpleRefCount { alterData->IndexKeys.assign(config.GetKeyColumnNames().begin(), config.GetKeyColumnNames().end()); Y_ABORT_UNLESS(alterData->IndexKeys.size()); alterData->IndexDataColumns.assign(config.GetDataColumnNames().begin(), config.GetDataColumnNames().end()); + Y_ABORT_UNLESS(alterData->IndexDataColumns.empty() || config.GetType() != NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVector); + alterData->State = config.HasState() ? config.GetState() : EState::EIndexStateReady; + if (config.GetType() == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVector) { + alterData->VectorIndexDescription = config.GetVectorIndexDescription(); + } + return result; } @@ -2378,6 +2384,8 @@ struct TTableIndexInfo : public TSimpleRefCount { TVector IndexDataColumns; TTableIndexInfo::TPtr AlterData = nullptr; + + NKikimrSchemeOp::TVectorIndexDescription VectorIndexDescription; }; struct TCdcStreamInfo : public TSimpleRefCount { diff --git a/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp b/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp index c1259db27f56..8d5a770b871c 100644 --- a/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp @@ -131,10 +131,16 @@ TPathElement::EPathSubType TPathDescriber::CalcPathSubType(const TPath& path) { auto indexInfo = Self->Indexes.at(pathId); switch (indexInfo->Type) { - case NKikimrSchemeOp::EIndexTypeGlobalAsync: - return TPathElement::EPathSubType::EPathSubTypeAsyncIndexImplTable; - default: - return TPathElement::EPathSubType::EPathSubTypeSyncIndexImplTable; + case NKikimrSchemeOp::EIndexTypeGlobalAsync: + return TPathElement::EPathSubType::EPathSubTypeAsyncIndexImplTable; + case NKikimrSchemeOp::EIndexTypeGlobal: + case NKikimrSchemeOp::EIndexTypeGlobalUnique: + return TPathElement::EPathSubType::EPathSubTypeSyncIndexImplTable; + case NKikimrSchemeOp::EIndexTypeGlobalVector: + return TPathElement::EPathSubType::EPathSubTypeVectorIndexImplTable; + default: + Y_DEBUG_ABORT("%s", (TStringBuilder() << "unexpected indexInfo->Type# " << indexInfo->Type).data()); + return TPathElement::EPathSubType::EPathSubTypeEmpty; } } else if (parentPath.IsCdcStream()) { return TPathElement::EPathSubType::EPathSubTypeStreamImpl; @@ -1256,6 +1262,19 @@ void TSchemeShard::DescribeTableIndex(const TPathId& pathId, const TString& name if (fillBoundaries) { FillTableBoundaries(*tableInfo, *tableDescription->MutableSplitBoundary()); } + + if (indexInfo->Type == NKikimrSchemeOp::EIndexTypeGlobalVector) { + auto& vectorIndexDescription = *entry.MutableVectorIndexDescription(); + vectorIndexDescription.SetIndexType(indexInfo->VectorIndexDescription.GetIndexType()); + if (indexInfo->VectorIndexDescription.HasDistance()) + vectorIndexDescription.SetDistance(indexInfo->VectorIndexDescription.GetDistance()); + else if (indexInfo->VectorIndexDescription.HasSimilarity()) + vectorIndexDescription.SetSimilarity(indexInfo->VectorIndexDescription.GetSimilarity()); + else + Y_FAIL_S("Either distance or similarity should be set in VectorIndexDescription: " << indexInfo->VectorIndexDescription); + vectorIndexDescription.SetVectorType(indexInfo->VectorIndexDescription.GetVectorType()); + } + } void TSchemeShard::DescribeCdcStream(const TPathId& pathId, const TString& name, diff --git a/ydb/core/tx/schemeshard/schemeshard_utils.cpp b/ydb/core/tx/schemeshard/schemeshard_utils.cpp index 13642fc1054e..99f438e93b4d 100644 --- a/ydb/core/tx/schemeshard/schemeshard_utils.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_utils.cpp @@ -255,7 +255,41 @@ TTableColumns ExtractInfo(const NSchemeShard::TTableInfo::TPtr &tableInfo) { return result; } +void FillVectorIndexColumns(NKikimrSchemeOp::TTableDescription& result) { + { + auto levelColumn = result.AddColumns(); + levelColumn->SetName(NTableVectorIndex::LevelColumn); + levelColumn->SetType("Uint8"); + levelColumn->SetTypeId(NScheme::NTypeIds::Uint8); + levelColumn->SetId(0); + } + { + auto idColumn = result.AddColumns(); + idColumn->SetName(NTableVectorIndex::IdColumn); + idColumn->SetType("Uint32"); + idColumn->SetTypeId(NScheme::NTypeIds::Uint32); + idColumn->SetId(1); + } + { + auto centroidColumn = result.AddColumns(); + centroidColumn->SetName(NTableVectorIndex::CentroidColumn); + centroidColumn->SetType("String"); + centroidColumn->SetTypeId(NScheme::NTypeIds::String); + centroidColumn->SetId(2); + } + { + auto idsColumn = result.AddColumns(); + idsColumn->SetName(NTableVectorIndex::IdsColumn); + idsColumn->SetType("String"); + idsColumn->SetTypeId(NScheme::NTypeIds::String); + idsColumn->SetId(3); + } + result.AddKeyColumnNames(NTableVectorIndex::LevelColumn); + result.AddKeyColumnNames(NTableVectorIndex::IdColumn); +} + NKikimrSchemeOp::TTableDescription CalcImplTableDesc( + const NKikimrSchemeOp::EIndexType indexType, const NSchemeShard::TTableInfo::TPtr& baseTableInfo, const NTableIndex::TTableColumns& implTableColumns, const NKikimrSchemeOp::TTableDescription& indexTableDesc) @@ -274,6 +308,11 @@ NKikimrSchemeOp::TTableDescription CalcImplTableDesc( *result.MutablePartitionConfig() = PartitionConfigForIndexes(baseTableInfo, indexTableDesc); + if (indexType == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVector) { + FillVectorIndexColumns(result); + return result; + } + //Columns and KeyColumnNames order is really important //the order of implTableColumns.Keys is the right one @@ -321,6 +360,7 @@ NKikimrSchemeOp::TTableDescription CalcImplTableDesc( } NKikimrSchemeOp::TTableDescription CalcImplTableDesc( + const NKikimrSchemeOp::EIndexType indexType, const NKikimrSchemeOp::TTableDescription &baseTableDescr, const TTableColumns &implTableColumns, const NKikimrSchemeOp::TTableDescription &indexTableDesc) @@ -339,6 +379,11 @@ NKikimrSchemeOp::TTableDescription CalcImplTableDesc( *result.MutablePartitionConfig() = PartitionConfigForIndexes(baseTableDescr, indexTableDesc); + if (indexType == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVector) { + FillVectorIndexColumns(result); + return result; + } + //Columns and KeyColumnNames order is really important //the order of implTableColumns.Keys is the right one @@ -512,11 +557,15 @@ bool ExtractTypes(const NSchemeShard::TTableInfo::TPtr& baseTableInfo, TColumnTy } bool IsCompatibleKeyTypes( + const NKikimrSchemeOp::EIndexType indexType, const TColumnTypes& baseTableColumnTypes, const TTableColumns& implTableColumns, bool uniformTable, TString& explain) { + if (indexType == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVector) + return true; + const NScheme::TTypeRegistry* typeRegistry = AppData()->TypeRegistry; Y_ABORT_UNLESS(typeRegistry); diff --git a/ydb/core/tx/schemeshard/schemeshard_utils.h b/ydb/core/tx/schemeshard/schemeshard_utils.h index b74161e931ef..7de99117bace 100644 --- a/ydb/core/tx/schemeshard/schemeshard_utils.h +++ b/ydb/core/tx/schemeshard/schemeshard_utils.h @@ -137,14 +137,17 @@ class PQGroupReserve { namespace NTableIndex { NKikimrSchemeOp::TTableDescription CalcImplTableDesc( + const NKikimrSchemeOp::EIndexType indexType, const NSchemeShard::TTableInfo::TPtr& baseTableInfo, const NTableIndex::TTableColumns& implTableColumns, const NKikimrSchemeOp::TTableDescription& indexTableDesc); NKikimrSchemeOp::TTableDescription CalcImplTableDesc( + const NKikimrSchemeOp::EIndexType indexType, const NKikimrSchemeOp::TTableDescription& baseTableDesc, const NTableIndex::TTableColumns& implTableColumns, - const NKikimrSchemeOp::TTableDescription& indexTableDesc); + const NKikimrSchemeOp::TTableDescription& indexTableDesc +); NKikimrSchemeOp::TPartitionConfig PartitionConfigForIndexes( const NSchemeShard::TTableInfo::TPtr& baseTableInfo, @@ -164,6 +167,7 @@ bool ExtractTypes(const NSchemeShard::TTableInfo::TPtr& baseTableInfo, TColumnTy bool ExtractTypes(const NKikimrSchemeOp::TTableDescription& baseTableDesc, TColumnTypes& columnsTypes, TString& explain); bool IsCompatibleKeyTypes( + const NKikimrSchemeOp::EIndexType indexType, const TColumnTypes& baseTableColumnsTypes, const TTableColumns& implTableColumns, bool uniformTable, @@ -183,25 +187,50 @@ bool CommonCheck(const TTableDesc& tableDesc, const NKikimrSchemeOp::TIndexCreat return false; } - if (!indexKeys.DataColumns.empty() && !AppData()->FeatureFlags.GetEnableDataColumnForIndexTable()) { - status = NKikimrScheme::EStatus::StatusPreconditionFailed; - error = "It is not allowed to create index with data column"; - return false; + if (!indexKeys.DataColumns.empty()) { + if (!AppData()->FeatureFlags.GetEnableDataColumnForIndexTable()) { + status = NKikimrScheme::EStatus::StatusPreconditionFailed; + error = "It is not allowed to create index with data column"; + return false; + } + if (indexDesc.GetType() == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVector) { + status = NKikimrScheme::EStatus::StatusPreconditionFailed; + error = "It is not allowed to create vector index with data column"; + return false; + } } - if (!IsCompatibleIndex(baseTableColumns, indexKeys, error)) { + if (!IsCompatibleIndex(indexDesc.GetType(), baseTableColumns, indexKeys, error)) { status = NKikimrScheme::EStatus::StatusInvalidParameter; return false; } - TColumnTypes columnsTypes; - if (!ExtractTypes(tableDesc, columnsTypes, error)) { + TColumnTypes baseColumnTypes; + if (!ExtractTypes(tableDesc, baseColumnTypes, error)) { status = NKikimrScheme::EStatus::StatusInvalidParameter; return false; } - implTableColumns = CalcTableImplDescription(baseTableColumns, indexKeys); - if (!IsCompatibleKeyTypes(columnsTypes, implTableColumns, uniformTable, error)) { + if (indexDesc.GetType() == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVector) { + if (indexKeys.KeyColumns.size() != 1) { + status = NKikimrScheme::EStatus::StatusInvalidParameter; + error = "Only single column is supported for vector index"; + return false; + } + + const TString& indexColumnName = indexKeys.KeyColumns[0]; + Y_ABORT_UNLESS(baseColumnTypes.contains(indexColumnName)); + auto typeInfo = baseColumnTypes.at(indexColumnName); + + if (typeInfo.GetTypeId() != NScheme::NTypeIds::String) { + status = NKikimrScheme::EStatus::StatusInvalidParameter; + error = TStringBuilder() << "Index column '" << indexColumnName << "' expected type 'String' but got " << NScheme::TypeName(typeInfo); + return false; + } + } + + implTableColumns = CalcTableImplDescription(indexDesc.GetType(), baseTableColumns, indexKeys); + if (!IsCompatibleKeyTypes(indexDesc.GetType(), baseColumnTypes, implTableColumns, uniformTable, error)) { status = NKikimrScheme::EStatus::StatusInvalidParameter; return false; } diff --git a/ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp b/ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp index 8a415d0cad6c..4284fce760b3 100644 --- a/ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp +++ b/ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp @@ -830,6 +830,19 @@ TCheckFunc IndexDataColumns(const TVector& dataColumnNames) { }; } +TCheckFunc VectorIndexDescription(Ydb::Table::GlobalVectorIndex_IndexType indexType, + Ydb::Table::GlobalVectorIndex_Distance distance, + Ydb::Table::GlobalVectorIndex_Similarity similarity, + Ydb::Table::GlobalVectorIndex_VectorType vectorType) { + return [=] (const NKikimrScheme::TEvDescribeSchemeResult& record) { + UNIT_ASSERT_VALUES_EQUAL(record.GetPathDescription().GetTableIndex().GetVectorIndexDescription().GetIndexType(), indexType); + UNIT_ASSERT_VALUES_EQUAL(record.GetPathDescription().GetTableIndex().GetVectorIndexDescription().GetDistance(), distance); + UNIT_ASSERT_VALUES_EQUAL(record.GetPathDescription().GetTableIndex().GetVectorIndexDescription().GetSimilarity(), similarity); + UNIT_ASSERT_VALUES_EQUAL(record.GetPathDescription().GetTableIndex().GetVectorIndexDescription().GetVectorType(), vectorType); + }; +} + + TCheckFunc SequenceName(const TString& name) { return [=] (const NKikimrScheme::TEvDescribeSchemeResult& record) { UNIT_ASSERT_VALUES_EQUAL(record.GetPathDescription().GetSequenceDescription().GetName(), name); diff --git a/ydb/core/tx/schemeshard/ut_helpers/ls_checks.h b/ydb/core/tx/schemeshard/ut_helpers/ls_checks.h index 3cb8f39c5754..8dafa9bf5a6a 100644 --- a/ydb/core/tx/schemeshard/ut_helpers/ls_checks.h +++ b/ydb/core/tx/schemeshard/ut_helpers/ls_checks.h @@ -138,6 +138,11 @@ namespace NLs { TCheckFunc IndexState(NKikimrSchemeOp::EIndexState state); TCheckFunc IndexKeys(const TVector& keyNames); TCheckFunc IndexDataColumns(const TVector& dataColumnNames); + + TCheckFunc VectorIndexDescription(Ydb::Table::GlobalVectorIndex_IndexType indexType, + Ydb::Table::GlobalVectorIndex_Distance distance, + Ydb::Table::GlobalVectorIndex_Similarity similarity, + Ydb::Table::GlobalVectorIndex_VectorType vectorType); TCheckFunc SequenceName(const TString& name); TCheckFunc SequenceIncrement(i64 increment); diff --git a/ydb/core/tx/schemeshard/ut_index/ut_vector_index.cpp b/ydb/core/tx/schemeshard/ut_index/ut_vector_index.cpp new file mode 100644 index 000000000000..e66ea203e358 --- /dev/null +++ b/ydb/core/tx/schemeshard/ut_index/ut_vector_index.cpp @@ -0,0 +1,58 @@ +#include +#include +#include +#include +#include +#include +#include + +using namespace NKikimr; +using namespace NSchemeShard; +using namespace NSchemeShardUT_Private; + +Y_UNIT_TEST_SUITE(TVectorIndexTests) { + Y_UNIT_TEST(CreateTable) { + TTestBasicRuntime runtime; + TTestEnv env(runtime); + ui64 txId = 100; + + TestCreateIndexedTable(runtime, ++txId, "/MyRoot", R"( + TableDescription { + Name: "vectors" + Columns { Name: "id" Type: "Uint64" } + Columns { Name: "embedding" Type: "String" } + Columns { Name: "otherColumn1" Type: "String" } + Columns { Name: "otherColumn2" Type: "String" } + Columns { Name: "otherColumn3" Type: "String" } + KeyColumnNames: ["id"] + } + IndexDescription { + Name: "idx_vector" + KeyColumnNames: ["embedding"] + Type: EIndexTypeGlobalVector + VectorIndexDescription { + IndexType: INDEX_TYPE_KMEANS_TREE, + Distance: DISTANCE_COSINE, + VectorType: VECTOR_TYPE_FLOAT + } + } + )"); + env.TestWaitNotification(runtime, txId); + + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/vectors/idx_vector"), + { NLs::PathExist, + NLs::IndexType(NKikimrSchemeOp::EIndexTypeGlobalVector), + NLs::IndexState(NKikimrSchemeOp::EIndexStateReady), + NLs::IndexKeys({"embedding"}), + NLs::VectorIndexDescription(Ydb::Table::GlobalVectorIndex::INDEX_TYPE_KMEANS_TREE, + Ydb::Table::GlobalVectorIndex::DISTANCE_COSINE, + Ydb::Table::GlobalVectorIndex::SIMILARITY_UNSPECIFIED, + Ydb::Table::GlobalVectorIndex::VECTOR_TYPE_FLOAT + ), + }); + + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/vectors/idx_vector/indexImplTable"), + { NLs::PathExist, + NLs::CheckColumns("indexImplTable", {"level", "id", "centroid", "ids"}, {}, {"level", "id"}) }); + } +} diff --git a/ydb/core/tx/schemeshard/ut_index/ya.make b/ydb/core/tx/schemeshard/ut_index/ya.make index ed4a5fdd9f7b..3c3a129aef5f 100644 --- a/ydb/core/tx/schemeshard/ut_index/ya.make +++ b/ydb/core/tx/schemeshard/ut_index/ya.make @@ -24,6 +24,7 @@ PEERDIR( SRCS( ut_async_index.cpp ut_unique_index.cpp + ut_vector_index.cpp ) YQL_LAST_ABI_VERSION() diff --git a/ydb/core/ydb_convert/table_description.cpp b/ydb/core/ydb_convert/table_description.cpp index a1a0eb6e0ccb..a401a8586738 100644 --- a/ydb/core/ydb_convert/table_description.cpp +++ b/ydb/core/ydb_convert/table_description.cpp @@ -880,6 +880,9 @@ void FillIndexDescriptionImpl(TYdbProto& out, const NKikimrSchemeOp::TTableDescr tableIndex.GetIndexImplTableDescriptions() ); break; + case NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVector: + *index->mutable_global_vector_index() = Ydb::Table::GlobalVectorIndex(); + break; default: break; }; @@ -946,6 +949,10 @@ bool FillIndexDescription(NKikimrSchemeOp::TIndexedTableCreationConfig& out, indexDesc->SetType(NKikimrSchemeOp::EIndexType::EIndexTypeGlobalUnique); break; + case Ydb::Table::TableIndex::kGlobalVectorIndex: + indexDesc->SetType(NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVector); + break; + default: // pass through // TODO: maybe return BAD_REQUEST? diff --git a/ydb/core/ydb_convert/table_settings.cpp b/ydb/core/ydb_convert/table_settings.cpp index 9ee281f3f648..fa8ecd4cf74d 100644 --- a/ydb/core/ydb_convert/table_settings.cpp +++ b/ydb/core/ydb_convert/table_settings.cpp @@ -442,6 +442,12 @@ bool FillIndexTablePartitioning( } break; + case Ydb::Table::TableIndex::kGlobalVectorIndex: + if (!fillIndexPartitioning(index.global_vector_index().settings())) { + return false; + } + break; + case Ydb::Table::TableIndex::TYPE_NOT_SET: break; } diff --git a/ydb/public/api/protos/out/out.cpp b/ydb/public/api/protos/out/out.cpp index c8c5c7abfaff..3ba6d0a478b9 100644 --- a/ydb/public/api/protos/out/out.cpp +++ b/ydb/public/api/protos/out/out.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include @@ -29,3 +30,19 @@ Y_DECLARE_OUT_SPEC(, Ydb::Export::ExportProgress::Progress, stream, value) { Y_DECLARE_OUT_SPEC(, Ydb::Import::ImportProgress::Progress, stream, value) { stream << Ydb::Import::ImportProgress_Progress_Name(value); } + +Y_DECLARE_OUT_SPEC(, Ydb::Table::GlobalVectorIndex::IndexType, stream, value) { + stream << Ydb::Table::GlobalVectorIndex::IndexType_Name(value); +} + +Y_DECLARE_OUT_SPEC(, Ydb::Table::GlobalVectorIndex::Distance, stream, value) { + stream << Ydb::Table::GlobalVectorIndex::Distance_Name(value); +} + +Y_DECLARE_OUT_SPEC(, Ydb::Table::GlobalVectorIndex::Similarity, stream, value) { + stream << Ydb::Table::GlobalVectorIndex::Similarity_Name(value); +} + +Y_DECLARE_OUT_SPEC(, Ydb::Table::GlobalVectorIndex::VectorType, stream, value) { + stream << Ydb::Table::GlobalVectorIndex::VectorType_Name(value); +} \ No newline at end of file diff --git a/ydb/public/api/protos/ydb_table.proto b/ydb/public/api/protos/ydb_table.proto index a698b71c8d66..3c3f2869bbc3 100644 --- a/ydb/public/api/protos/ydb_table.proto +++ b/ydb/public/api/protos/ydb_table.proto @@ -71,6 +71,43 @@ message GlobalUniqueIndex { GlobalIndexSettings settings = 1; } +message GlobalVectorIndex { + optional GlobalIndexSettings settings = 1; + + enum IndexType { + INDEX_TYPE_UNSPECIFIED = 0; + INDEX_TYPE_KMEANS_TREE = 1; + } + + enum Distance { + DISTANCE_UNSPECIFIED = 0; + DISTANCE_COSINE = 1; + DISTANCE_MANHATTAN = 2; + DISTANCE_EUCLIDEAN = 3; + } + + enum Similarity { + SIMILARITY_UNSPECIFIED = 0; + SIMILARITY_COSINE = 1; + SIMILARITY_INNER_PRODUCT = 2; + } + + enum VectorType { + VECTOR_TYPE_UNSPECIFIED = 0; + VECTOR_TYPE_FLOAT = 1; + VECTOR_TYPE_UINT8 = 2; + VECTOR_TYPE_INT8 = 3; + VECTOR_TYPE_BIT = 4; + } + + IndexType indexType = 2; + oneof distance_similarity { + Distance distance = 3; + Similarity similarity = 4; + } + VectorType vectorType = 5; +} + // Represent secondary index message TableIndex { // Name of index @@ -82,6 +119,7 @@ message TableIndex { GlobalIndex global_index = 3; GlobalAsyncIndex global_async_index = 4; GlobalUniqueIndex global_unique_index = 6; + GlobalVectorIndex global_vector_index = 7; } // list of columns content to be copied in to index table repeated string data_columns = 5; @@ -105,6 +143,7 @@ message TableIndexDescription { GlobalIndex global_index = 3; GlobalAsyncIndex global_async_index = 5; GlobalUniqueIndex global_unique_index = 8; + GlobalVectorIndex global_vector_index = 9; } Status status = 4; // list of columns content to be copied in to index table diff --git a/ydb/public/sdk/cpp/client/ydb_table/table.cpp b/ydb/public/sdk/cpp/client/ydb_table/table.cpp index 7067585f5663..01957cc90167 100644 --- a/ydb/public/sdk/cpp/client/ydb_table/table.cpp +++ b/ydb/public/sdk/cpp/client/ydb_table/table.cpp @@ -2346,6 +2346,9 @@ void TIndexDescription::SerializeTo(Ydb::Table::TableIndex& proto) const { case EIndexType::GlobalUnique: GlobalIndexSettings_.SerializeTo(*proto.mutable_global_unique_index()->mutable_settings()); break; + case EIndexType::GlobalVector: + *proto.mutable_global_vector_index() = Ydb::Table::GlobalVectorIndex(); + break; case EIndexType::Unknown: break; } diff --git a/ydb/public/sdk/cpp/client/ydb_table/table_enum.h b/ydb/public/sdk/cpp/client/ydb_table/table_enum.h index 25b57b005b79..a86f884408fd 100644 --- a/ydb/public/sdk/cpp/client/ydb_table/table_enum.h +++ b/ydb/public/sdk/cpp/client/ydb_table/table_enum.h @@ -28,6 +28,7 @@ enum class EIndexType { GlobalSync, GlobalAsync, GlobalUnique, + GlobalVector, Unknown = std::numeric_limits::max() }; From c140e0aa96e21b585af7a1fa6d902ce3555cb492 Mon Sep 17 00:00:00 2001 From: azevaykin Date: Mon, 8 Jul 2024 14:40:37 +0000 Subject: [PATCH 02/21] Review fixes --- ydb/core/base/table_index.cpp | 10 ++-- ydb/core/kqp/provider/yql_kikimr_gateway.h | 16 +++--- ydb/core/protos/flat_scheme_op.proto | 15 ++---- ydb/core/tx/datashard/datashard_user_table.h | 2 +- ydb/core/tx/scheme_cache/scheme_cache.h | 2 +- .../schemeshard_build_index__create.cpp | 2 +- .../schemeshard_build_index_tx_base.cpp | 2 +- .../tx/schemeshard/schemeshard_info_types.h | 8 +-- .../schemeshard_path_describer.cpp | 28 ++++++---- ydb/core/tx/schemeshard/schemeshard_utils.cpp | 8 +-- ydb/core/tx/schemeshard/schemeshard_utils.h | 21 +++----- .../tx/schemeshard/ut_helpers/ls_checks.cpp | 25 +++++---- .../tx/schemeshard/ut_helpers/ls_checks.h | 11 ++-- .../schemeshard/ut_index/ut_vector_index.cpp | 24 +++++---- ydb/core/ydb_convert/table_description.cpp | 8 +-- ydb/core/ydb_convert/table_settings.cpp | 4 +- ydb/public/api/protos/out/out.cpp | 16 +++--- ydb/public/api/protos/ydb_table.proto | 51 +++++++++---------- ydb/public/sdk/cpp/client/ydb_table/table.cpp | 4 +- .../sdk/cpp/client/ydb_table/table_enum.h | 2 +- 20 files changed, 127 insertions(+), 132 deletions(-) diff --git a/ydb/core/base/table_index.cpp b/ydb/core/base/table_index.cpp index d1266e4265e3..519086f21ebd 100644 --- a/ydb/core/base/table_index.cpp +++ b/ydb/core/base/table_index.cpp @@ -24,7 +24,7 @@ TTableColumns CalcTableImplDescription(const NKikimrSchemeOp::EIndexType indexTy TTableColumns result; - if (indexType == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVector) { + if (indexType == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree) { result.Keys.push_back(NTableVectorIndex::LevelColumn); result.Keys.push_back(NTableVectorIndex::IdColumn); result.Columns.insert(NTableVectorIndex::LevelColumn); @@ -81,15 +81,11 @@ bool IsCompatibleIndex(const NKikimrSchemeOp::EIndexType indexType, const TTable } } - if (indexType == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVector) { + if (indexType == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree) { if (index.KeyColumns.size() != 1) { explain = "Only single key column is supported for vector index"; return false; } - if (index.DataColumns.size() != 0) { - explain = "Data columns are not supported for vector index"; - return false; - } } THashSet indexKeys; @@ -114,7 +110,7 @@ bool IsCompatibleIndex(const NKikimrSchemeOp::EIndexType indexType, const TTable } } - if (indexType != NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVector) { + if (indexType != NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree) { if (index.KeyColumns == table.Keys) { explain = TStringBuilder() << "table and index keys are the same"; diff --git a/ydb/core/kqp/provider/yql_kikimr_gateway.h b/ydb/core/kqp/provider/yql_kikimr_gateway.h index 4d3f2c698de8..ca7cfecf0f3a 100644 --- a/ydb/core/kqp/provider/yql_kikimr_gateway.h +++ b/ydb/core/kqp/provider/yql_kikimr_gateway.h @@ -64,7 +64,7 @@ struct TIndexDescription { GlobalSync = 0, GlobalAsync = 1, GlobalSyncUnique = 2, - Vector = 3 + GlobalSyncVectorKMeansTree = 3 }; // Index states here must be in sync with NKikimrSchemeOp::EIndexState protobuf @@ -126,9 +126,9 @@ struct TIndexDescription { return TIndexDescription::EType::GlobalAsync; case NKikimrSchemeOp::EIndexType::EIndexTypeGlobalUnique: return TIndexDescription::EType::GlobalSyncUnique; - case NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVector: - return TIndexDescription::EType::Vector; - case NKikimrSchemeOp::EIndexType::EIndexTypeInvalid: + case NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree: + return TIndexDescription::EType::GlobalSyncVectorKMeansTree; + default: YQL_ENSURE(false, "Unexpected NKikimrSchemeOp::EIndexType::EIndexTypeInvalid"); } } @@ -141,8 +141,8 @@ struct TIndexDescription { return NKikimrSchemeOp::EIndexType::EIndexTypeGlobalAsync; case TIndexDescription::EType::GlobalSyncUnique: return NKikimrSchemeOp::EIndexType::EIndexTypeGlobalUnique; - case NYql::TIndexDescription::EType::Vector: - return NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVector; + case NYql::TIndexDescription::EType::GlobalSyncVectorKMeansTree: + return NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree; } } @@ -178,8 +178,8 @@ struct TIndexDescription { return true; case EType::GlobalAsync: return false; - case EType::Vector: - return false; + case EType::GlobalSyncVectorKMeansTree: + return true; } } }; diff --git a/ydb/core/protos/flat_scheme_op.proto b/ydb/core/protos/flat_scheme_op.proto index 74511b888e77..2f40f656a684 100644 --- a/ydb/core/protos/flat_scheme_op.proto +++ b/ydb/core/protos/flat_scheme_op.proto @@ -974,7 +974,7 @@ enum EIndexType { EIndexTypeGlobal = 1; EIndexTypeGlobalAsync = 2; EIndexTypeGlobalUnique = 3; - EIndexTypeGlobalVector = 4; + EIndexTypeGlobalVectorKmeansTree = 4; } enum EIndexState { @@ -984,13 +984,8 @@ enum EIndexState { EIndexStateWriteOnly = 3; } -message TVectorIndexDescription { - optional Ydb.Table.GlobalVectorIndex.IndexType IndexType = 1; - oneof DistanceSimilarity { - Ydb.Table.GlobalVectorIndex.Distance Distance = 2; - Ydb.Table.GlobalVectorIndex.Similarity Similarity = 3; - } - optional Ydb.Table.GlobalVectorIndex.VectorType VectorType = 4; +message TVectorIndexKmeansTreeDescription { + optional Ydb.Table.VectorIndexSettings Settings = 1; } message TIndexDescription { @@ -1013,7 +1008,7 @@ message TIndexDescription { repeated TTableDescription IndexImplTableDescriptions = 10; oneof SpecializedIndexDescription { - TVectorIndexDescription VectorIndexDescription = 11; + TVectorIndexKmeansTreeDescription VectorIndexKmeansTreeDescription = 11; } } @@ -1025,7 +1020,7 @@ message TIndexCreationConfig { optional EIndexState State = 5; //state of index at the creation time repeated string DataColumnNames = 6; //columns to be denormalized to read data just from index oneof SpecializedIndexDescription { - TVectorIndexDescription VectorIndexDescription = 7; + TVectorIndexKmeansTreeDescription VectorIndexKmeansTreeDescription = 7; } } diff --git a/ydb/core/tx/datashard/datashard_user_table.h b/ydb/core/tx/datashard/datashard_user_table.h index 0aca764d01e1..10198a97dc5a 100644 --- a/ydb/core/tx/datashard/datashard_user_table.h +++ b/ydb/core/tx/datashard/datashard_user_table.h @@ -279,7 +279,7 @@ struct TUserTable : public TThrRefBase { for (const auto& columnName : columnNames) { auto it = nameToId.find(columnName); - if (it == nameToId.end() && type == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVector) { + if (it == nameToId.end() && type == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree) { Y_ABORT_UNLESS(columnName == NTableIndex::NTableVectorIndex::IdColumn || columnName == NTableIndex::NTableVectorIndex::LevelColumn || columnName == NTableIndex::NTableVectorIndex::CentroidColumn || diff --git a/ydb/core/tx/scheme_cache/scheme_cache.h b/ydb/core/tx/scheme_cache/scheme_cache.h index 7d459f4bd3c4..570f47e67f4d 100644 --- a/ydb/core/tx/scheme_cache/scheme_cache.h +++ b/ydb/core/tx/scheme_cache/scheme_cache.h @@ -360,7 +360,7 @@ struct TSchemeCacheRequest { KindRegularTable = 1, KindSyncIndexTable = 2, KindAsyncIndexTable = 3, - KindVectorIndexTable = 4 + KindVectorIndexTable = 4, }; struct TEntry { diff --git a/ydb/core/tx/schemeshard/schemeshard_build_index__create.cpp b/ydb/core/tx/schemeshard/schemeshard_build_index__create.cpp index 0986cca309f9..8f921e8245be 100644 --- a/ydb/core/tx/schemeshard/schemeshard_build_index__create.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_build_index__create.cpp @@ -226,7 +226,7 @@ class TSchemeShard::TIndexBuilder::TTxCreate: public TSchemeShard::TIndexBuilder case Ydb::Table::TableIndex::TypeCase::kGlobalUniqueIndex: explain = "unsupported index type to build"; return false; - case Ydb::Table::TableIndex::TypeCase::kGlobalVectorIndex: + case Ydb::Table::TableIndex::TypeCase::kGlobalVectorKmeansTreeIndex: explain = "unsupported vector index type to build"; return false; case Ydb::Table::TableIndex::TypeCase::TYPE_NOT_SET: diff --git a/ydb/core/tx/schemeshard/schemeshard_build_index_tx_base.cpp b/ydb/core/tx/schemeshard/schemeshard_build_index_tx_base.cpp index fd14a7ad7f53..2cd71e68602b 100644 --- a/ydb/core/tx/schemeshard/schemeshard_build_index_tx_base.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_build_index_tx_base.cpp @@ -274,7 +274,7 @@ void TSchemeShard::TIndexBuilder::TTxBase::Fill(NKikimrIndexBuilder::TIndexBuild case NKikimrSchemeOp::EIndexType::EIndexTypeGlobalAsync: *index.mutable_global_async_index() = Ydb::Table::GlobalAsyncIndex(); break; - case NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVector: + case NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree: //TODO build vector index break; default: diff --git a/ydb/core/tx/schemeshard/schemeshard_info_types.h b/ydb/core/tx/schemeshard/schemeshard_info_types.h index 8000d2d3359f..43cbd2287db7 100644 --- a/ydb/core/tx/schemeshard/schemeshard_info_types.h +++ b/ydb/core/tx/schemeshard/schemeshard_info_types.h @@ -2365,12 +2365,12 @@ struct TTableIndexInfo : public TSimpleRefCount { alterData->IndexKeys.assign(config.GetKeyColumnNames().begin(), config.GetKeyColumnNames().end()); Y_ABORT_UNLESS(alterData->IndexKeys.size()); alterData->IndexDataColumns.assign(config.GetDataColumnNames().begin(), config.GetDataColumnNames().end()); - Y_ABORT_UNLESS(alterData->IndexDataColumns.empty() || config.GetType() != NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVector); + Y_ABORT_UNLESS(alterData->IndexDataColumns.empty() || config.GetType() != NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree); alterData->State = config.HasState() ? config.GetState() : EState::EIndexStateReady; - if (config.GetType() == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVector) { - alterData->VectorIndexDescription = config.GetVectorIndexDescription(); + if (config.GetType() == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree) { + alterData->VectorIndexKmeansTreeDescription = config.GetVectorIndexKmeansTreeDescription(); } return result; @@ -2385,7 +2385,7 @@ struct TTableIndexInfo : public TSimpleRefCount { TTableIndexInfo::TPtr AlterData = nullptr; - NKikimrSchemeOp::TVectorIndexDescription VectorIndexDescription; + std::optional VectorIndexKmeansTreeDescription; }; struct TCdcStreamInfo : public TSimpleRefCount { diff --git a/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp b/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp index 8d5a770b871c..3736a36f2e61 100644 --- a/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp @@ -136,7 +136,7 @@ TPathElement::EPathSubType TPathDescriber::CalcPathSubType(const TPath& path) { case NKikimrSchemeOp::EIndexTypeGlobal: case NKikimrSchemeOp::EIndexTypeGlobalUnique: return TPathElement::EPathSubType::EPathSubTypeSyncIndexImplTable; - case NKikimrSchemeOp::EIndexTypeGlobalVector: + case NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree: return TPathElement::EPathSubType::EPathSubTypeVectorIndexImplTable; default: Y_DEBUG_ABORT("%s", (TStringBuilder() << "unexpected indexInfo->Type# " << indexInfo->Type).data()); @@ -1263,16 +1263,22 @@ void TSchemeShard::DescribeTableIndex(const TPathId& pathId, const TString& name FillTableBoundaries(*tableInfo, *tableDescription->MutableSplitBoundary()); } - if (indexInfo->Type == NKikimrSchemeOp::EIndexTypeGlobalVector) { - auto& vectorIndexDescription = *entry.MutableVectorIndexDescription(); - vectorIndexDescription.SetIndexType(indexInfo->VectorIndexDescription.GetIndexType()); - if (indexInfo->VectorIndexDescription.HasDistance()) - vectorIndexDescription.SetDistance(indexInfo->VectorIndexDescription.GetDistance()); - else if (indexInfo->VectorIndexDescription.HasSimilarity()) - vectorIndexDescription.SetSimilarity(indexInfo->VectorIndexDescription.GetSimilarity()); - else - Y_FAIL_S("Either distance or similarity should be set in VectorIndexDescription: " << indexInfo->VectorIndexDescription); - vectorIndexDescription.SetVectorType(indexInfo->VectorIndexDescription.GetVectorType()); + if (indexInfo->Type == NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree) { + const auto& indexInfoDescription = indexInfo->VectorIndexKmeansTreeDescription; + if (indexInfoDescription) { + const auto& indexInfoSettings = indexInfoDescription->GetSettings(); + auto entrySettings = entry.MutableVectorIndexKmeansTreeDescription()->MutableSettings(); + if (indexInfoSettings.Hasdistance()) + entrySettings->set_distance(indexInfoSettings.distance()); + else if (indexInfoSettings.Hassimilarity()) + entrySettings->set_similarity(indexInfoSettings.similarity()); + else + Y_FAIL_S("Either distance or similarity should be set in VectorIndexKmeansTreeDescription: " << *indexInfoDescription); + entrySettings->set_vector_type(indexInfoSettings.vector_type()); + entrySettings->set_vector_dimension(indexInfoSettings.vector_dimension()); + } else { + Y_FAIL_S("VectorIndexKmeansTreeDescription should be set"); + } } } diff --git a/ydb/core/tx/schemeshard/schemeshard_utils.cpp b/ydb/core/tx/schemeshard/schemeshard_utils.cpp index 99f438e93b4d..9b6bad19a1a3 100644 --- a/ydb/core/tx/schemeshard/schemeshard_utils.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_utils.cpp @@ -308,7 +308,7 @@ NKikimrSchemeOp::TTableDescription CalcImplTableDesc( *result.MutablePartitionConfig() = PartitionConfigForIndexes(baseTableInfo, indexTableDesc); - if (indexType == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVector) { + if (indexType == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree) { FillVectorIndexColumns(result); return result; } @@ -379,7 +379,7 @@ NKikimrSchemeOp::TTableDescription CalcImplTableDesc( *result.MutablePartitionConfig() = PartitionConfigForIndexes(baseTableDescr, indexTableDesc); - if (indexType == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVector) { + if (indexType == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree) { FillVectorIndexColumns(result); return result; } @@ -557,15 +557,11 @@ bool ExtractTypes(const NSchemeShard::TTableInfo::TPtr& baseTableInfo, TColumnTy } bool IsCompatibleKeyTypes( - const NKikimrSchemeOp::EIndexType indexType, const TColumnTypes& baseTableColumnTypes, const TTableColumns& implTableColumns, bool uniformTable, TString& explain) { - if (indexType == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVector) - return true; - const NScheme::TTypeRegistry* typeRegistry = AppData()->TypeRegistry; Y_ABORT_UNLESS(typeRegistry); diff --git a/ydb/core/tx/schemeshard/schemeshard_utils.h b/ydb/core/tx/schemeshard/schemeshard_utils.h index 7de99117bace..3bb62f0e3a97 100644 --- a/ydb/core/tx/schemeshard/schemeshard_utils.h +++ b/ydb/core/tx/schemeshard/schemeshard_utils.h @@ -167,7 +167,6 @@ bool ExtractTypes(const NSchemeShard::TTableInfo::TPtr& baseTableInfo, TColumnTy bool ExtractTypes(const NKikimrSchemeOp::TTableDescription& baseTableDesc, TColumnTypes& columnsTypes, TString& explain); bool IsCompatibleKeyTypes( - const NKikimrSchemeOp::EIndexType indexType, const TColumnTypes& baseTableColumnsTypes, const TTableColumns& implTableColumns, bool uniformTable, @@ -193,11 +192,6 @@ bool CommonCheck(const TTableDesc& tableDesc, const NKikimrSchemeOp::TIndexCreat error = "It is not allowed to create index with data column"; return false; } - if (indexDesc.GetType() == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVector) { - status = NKikimrScheme::EStatus::StatusPreconditionFailed; - error = "It is not allowed to create vector index with data column"; - return false; - } } if (!IsCompatibleIndex(indexDesc.GetType(), baseTableColumns, indexKeys, error)) { @@ -211,7 +205,9 @@ bool CommonCheck(const TTableDesc& tableDesc, const NKikimrSchemeOp::TIndexCreat return false; } - if (indexDesc.GetType() == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVector) { + implTableColumns = CalcTableImplDescription(indexDesc.GetType(), baseTableColumns, indexKeys); + + if (indexDesc.GetType() == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree) { if (indexKeys.KeyColumns.size() != 1) { status = NKikimrScheme::EStatus::StatusInvalidParameter; error = "Only single column is supported for vector index"; @@ -227,12 +223,11 @@ bool CommonCheck(const TTableDesc& tableDesc, const NKikimrSchemeOp::TIndexCreat error = TStringBuilder() << "Index column '" << indexColumnName << "' expected type 'String' but got " << NScheme::TypeName(typeInfo); return false; } - } - - implTableColumns = CalcTableImplDescription(indexDesc.GetType(), baseTableColumns, indexKeys); - if (!IsCompatibleKeyTypes(indexDesc.GetType(), baseColumnTypes, implTableColumns, uniformTable, error)) { - status = NKikimrScheme::EStatus::StatusInvalidParameter; - return false; + } else { + if (!IsCompatibleKeyTypes(baseColumnTypes, implTableColumns, uniformTable, error)) { + status = NKikimrScheme::EStatus::StatusInvalidParameter; + return false; + } } if (implTableColumns.Keys.size() > schemeLimits.MaxTableKeyColumns) { diff --git a/ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp b/ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp index 4284fce760b3..94edf3bf3d2e 100644 --- a/ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp +++ b/ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp @@ -830,16 +830,23 @@ TCheckFunc IndexDataColumns(const TVector& dataColumnNames) { }; } -TCheckFunc VectorIndexDescription(Ydb::Table::GlobalVectorIndex_IndexType indexType, - Ydb::Table::GlobalVectorIndex_Distance distance, - Ydb::Table::GlobalVectorIndex_Similarity similarity, - Ydb::Table::GlobalVectorIndex_VectorType vectorType) { - return [=] (const NKikimrScheme::TEvDescribeSchemeResult& record) { - UNIT_ASSERT_VALUES_EQUAL(record.GetPathDescription().GetTableIndex().GetVectorIndexDescription().GetIndexType(), indexType); - UNIT_ASSERT_VALUES_EQUAL(record.GetPathDescription().GetTableIndex().GetVectorIndexDescription().GetDistance(), distance); - UNIT_ASSERT_VALUES_EQUAL(record.GetPathDescription().GetTableIndex().GetVectorIndexDescription().GetSimilarity(), similarity); - UNIT_ASSERT_VALUES_EQUAL(record.GetPathDescription().GetTableIndex().GetVectorIndexDescription().GetVectorType(), vectorType); +TCheckFunc VectorIndexDescription(Ydb::Table::VectorIndexSettings_Distance dist, + Ydb::Table::VectorIndexSettings_Similarity similarity, + Ydb::Table::VectorIndexSettings_VectorType vectorType, + ui32 vectorDimension + ) { + return [=] (const NKikimrScheme::TEvDescribeSchemeResult& record) { + if (record.GetPathDescription().GetTableIndex().HasVectorIndexKmeansTreeDescription()) { + const auto& settings = record.GetPathDescription().GetTableIndex().GetVectorIndexKmeansTreeDescription().GetSettings(); + UNIT_ASSERT_VALUES_EQUAL(settings.distance(), dist); + UNIT_ASSERT_VALUES_EQUAL(settings.similarity(), similarity); + UNIT_ASSERT_VALUES_EQUAL(settings.vector_type(), vectorType); + UNIT_ASSERT_VALUES_EQUAL(settings.vector_dimension(), vectorDimension); + } else { + UNIT_FAIL("oneof SpecializedIndexDescription should be set."); + } }; + } diff --git a/ydb/core/tx/schemeshard/ut_helpers/ls_checks.h b/ydb/core/tx/schemeshard/ut_helpers/ls_checks.h index 8dafa9bf5a6a..c4cb9c8df26d 100644 --- a/ydb/core/tx/schemeshard/ut_helpers/ls_checks.h +++ b/ydb/core/tx/schemeshard/ut_helpers/ls_checks.h @@ -9,6 +9,8 @@ #include #include +#include + #include namespace NSchemeShardUT_Private { @@ -139,10 +141,11 @@ namespace NLs { TCheckFunc IndexKeys(const TVector& keyNames); TCheckFunc IndexDataColumns(const TVector& dataColumnNames); - TCheckFunc VectorIndexDescription(Ydb::Table::GlobalVectorIndex_IndexType indexType, - Ydb::Table::GlobalVectorIndex_Distance distance, - Ydb::Table::GlobalVectorIndex_Similarity similarity, - Ydb::Table::GlobalVectorIndex_VectorType vectorType); + TCheckFunc VectorIndexDescription(Ydb::Table::VectorIndexSettings_Distance dist, + Ydb::Table::VectorIndexSettings_Similarity similarity, + Ydb::Table::VectorIndexSettings_VectorType vectorType, + ui32 vectorDimension + ); TCheckFunc SequenceName(const TString& name); TCheckFunc SequenceIncrement(i64 increment); diff --git a/ydb/core/tx/schemeshard/ut_index/ut_vector_index.cpp b/ydb/core/tx/schemeshard/ut_index/ut_vector_index.cpp index e66ea203e358..ab2bb121bef5 100644 --- a/ydb/core/tx/schemeshard/ut_index/ut_vector_index.cpp +++ b/ydb/core/tx/schemeshard/ut_index/ut_vector_index.cpp @@ -29,25 +29,27 @@ Y_UNIT_TEST_SUITE(TVectorIndexTests) { IndexDescription { Name: "idx_vector" KeyColumnNames: ["embedding"] - Type: EIndexTypeGlobalVector - VectorIndexDescription { - IndexType: INDEX_TYPE_KMEANS_TREE, - Distance: DISTANCE_COSINE, - VectorType: VECTOR_TYPE_FLOAT - } + Type: EIndexTypeGlobalVectorKmeansTree + VectorIndexKmeansTreeDescription { + Settings : { + distance: DISTANCE_COSINE, + vector_type: VECTOR_TYPE_FLOAT, + vector_dimension: 1024 + } + } } )"); env.TestWaitNotification(runtime, txId); TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/vectors/idx_vector"), { NLs::PathExist, - NLs::IndexType(NKikimrSchemeOp::EIndexTypeGlobalVector), + NLs::IndexType(NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree), NLs::IndexState(NKikimrSchemeOp::EIndexStateReady), NLs::IndexKeys({"embedding"}), - NLs::VectorIndexDescription(Ydb::Table::GlobalVectorIndex::INDEX_TYPE_KMEANS_TREE, - Ydb::Table::GlobalVectorIndex::DISTANCE_COSINE, - Ydb::Table::GlobalVectorIndex::SIMILARITY_UNSPECIFIED, - Ydb::Table::GlobalVectorIndex::VECTOR_TYPE_FLOAT + NLs::VectorIndexDescription(Ydb::Table::VectorIndexSettings::DISTANCE_COSINE, + Ydb::Table::VectorIndexSettings::SIMILARITY_UNSPECIFIED, + Ydb::Table::VectorIndexSettings::VECTOR_TYPE_FLOAT, + 1024 ), }); diff --git a/ydb/core/ydb_convert/table_description.cpp b/ydb/core/ydb_convert/table_description.cpp index a401a8586738..0627da38e686 100644 --- a/ydb/core/ydb_convert/table_description.cpp +++ b/ydb/core/ydb_convert/table_description.cpp @@ -880,8 +880,8 @@ void FillIndexDescriptionImpl(TYdbProto& out, const NKikimrSchemeOp::TTableDescr tableIndex.GetIndexImplTableDescriptions() ); break; - case NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVector: - *index->mutable_global_vector_index() = Ydb::Table::GlobalVectorIndex(); + case NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree: + *index->mutable_global_vector_kmeans_tree_index() = Ydb::Table::GlobalVectorKMeansTreeIndex(); break; default: break; @@ -949,8 +949,8 @@ bool FillIndexDescription(NKikimrSchemeOp::TIndexedTableCreationConfig& out, indexDesc->SetType(NKikimrSchemeOp::EIndexType::EIndexTypeGlobalUnique); break; - case Ydb::Table::TableIndex::kGlobalVectorIndex: - indexDesc->SetType(NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVector); + case Ydb::Table::TableIndex::kGlobalVectorKmeansTreeIndex: + indexDesc->SetType(NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree); break; default: diff --git a/ydb/core/ydb_convert/table_settings.cpp b/ydb/core/ydb_convert/table_settings.cpp index fa8ecd4cf74d..9af62a1a4d81 100644 --- a/ydb/core/ydb_convert/table_settings.cpp +++ b/ydb/core/ydb_convert/table_settings.cpp @@ -442,8 +442,8 @@ bool FillIndexTablePartitioning( } break; - case Ydb::Table::TableIndex::kGlobalVectorIndex: - if (!fillIndexPartitioning(index.global_vector_index().settings())) { + case Ydb::Table::TableIndex::kGlobalVectorKmeansTreeIndex: + if (!fillIndexPartitioning(index.global_vector_kmeans_tree_index().settings())) { return false; } break; diff --git a/ydb/public/api/protos/out/out.cpp b/ydb/public/api/protos/out/out.cpp index 3ba6d0a478b9..1e96598f2e47 100644 --- a/ydb/public/api/protos/out/out.cpp +++ b/ydb/public/api/protos/out/out.cpp @@ -31,18 +31,14 @@ Y_DECLARE_OUT_SPEC(, Ydb::Import::ImportProgress::Progress, stream, value) { stream << Ydb::Import::ImportProgress_Progress_Name(value); } -Y_DECLARE_OUT_SPEC(, Ydb::Table::GlobalVectorIndex::IndexType, stream, value) { - stream << Ydb::Table::GlobalVectorIndex::IndexType_Name(value); +Y_DECLARE_OUT_SPEC(, Ydb::Table::VectorIndexSettings::Distance, stream, value) { + stream << Ydb::Table::VectorIndexSettings::Distance_Name(value); } -Y_DECLARE_OUT_SPEC(, Ydb::Table::GlobalVectorIndex::Distance, stream, value) { - stream << Ydb::Table::GlobalVectorIndex::Distance_Name(value); +Y_DECLARE_OUT_SPEC(, Ydb::Table::VectorIndexSettings::Similarity, stream, value) { + stream << Ydb::Table::VectorIndexSettings::Similarity_Name(value); } -Y_DECLARE_OUT_SPEC(, Ydb::Table::GlobalVectorIndex::Similarity, stream, value) { - stream << Ydb::Table::GlobalVectorIndex::Similarity_Name(value); -} - -Y_DECLARE_OUT_SPEC(, Ydb::Table::GlobalVectorIndex::VectorType, stream, value) { - stream << Ydb::Table::GlobalVectorIndex::VectorType_Name(value); +Y_DECLARE_OUT_SPEC(, Ydb::Table::VectorIndexSettings::VectorType, stream, value) { + stream << Ydb::Table::VectorIndexSettings::VectorType_Name(value); } \ No newline at end of file diff --git a/ydb/public/api/protos/ydb_table.proto b/ydb/public/api/protos/ydb_table.proto index 3c3f2869bbc3..20df240f38b7 100644 --- a/ydb/public/api/protos/ydb_table.proto +++ b/ydb/public/api/protos/ydb_table.proto @@ -59,26 +59,7 @@ message GlobalIndexSettings { PartitioningSettings partitioning_settings = 3; } -message GlobalIndex { - GlobalIndexSettings settings = 1; -} - -message GlobalAsyncIndex { - GlobalIndexSettings settings = 1; -} - -message GlobalUniqueIndex { - GlobalIndexSettings settings = 1; -} - -message GlobalVectorIndex { - optional GlobalIndexSettings settings = 1; - - enum IndexType { - INDEX_TYPE_UNSPECIFIED = 0; - INDEX_TYPE_KMEANS_TREE = 1; - } - +message VectorIndexSettings { enum Distance { DISTANCE_UNSPECIFIED = 0; DISTANCE_COSINE = 1; @@ -100,12 +81,30 @@ message GlobalVectorIndex { VECTOR_TYPE_BIT = 4; } - IndexType indexType = 2; oneof distance_similarity { - Distance distance = 3; - Similarity similarity = 4; + Distance distance = 1; + Similarity similarity = 2; } - VectorType vectorType = 5; + VectorType vector_type = 3; + + uint32 vector_dimension = 4; +} + +message GlobalIndex { + GlobalIndexSettings settings = 1; +} + +message GlobalAsyncIndex { + GlobalIndexSettings settings = 1; +} + +message GlobalUniqueIndex { + GlobalIndexSettings settings = 1; +} + +message GlobalVectorKMeansTreeIndex { + GlobalIndexSettings settings = 1; + VectorIndexSettings vector_settings = 2; } // Represent secondary index @@ -119,7 +118,7 @@ message TableIndex { GlobalIndex global_index = 3; GlobalAsyncIndex global_async_index = 4; GlobalUniqueIndex global_unique_index = 6; - GlobalVectorIndex global_vector_index = 7; + GlobalVectorKMeansTreeIndex global_vector_kmeans_tree_index = 7; } // list of columns content to be copied in to index table repeated string data_columns = 5; @@ -143,7 +142,7 @@ message TableIndexDescription { GlobalIndex global_index = 3; GlobalAsyncIndex global_async_index = 5; GlobalUniqueIndex global_unique_index = 8; - GlobalVectorIndex global_vector_index = 9; + GlobalVectorKMeansTreeIndex global_vector_kmeans_tree_index = 9; } Status status = 4; // list of columns content to be copied in to index table diff --git a/ydb/public/sdk/cpp/client/ydb_table/table.cpp b/ydb/public/sdk/cpp/client/ydb_table/table.cpp index 01957cc90167..ced89b6a673b 100644 --- a/ydb/public/sdk/cpp/client/ydb_table/table.cpp +++ b/ydb/public/sdk/cpp/client/ydb_table/table.cpp @@ -2346,8 +2346,8 @@ void TIndexDescription::SerializeTo(Ydb::Table::TableIndex& proto) const { case EIndexType::GlobalUnique: GlobalIndexSettings_.SerializeTo(*proto.mutable_global_unique_index()->mutable_settings()); break; - case EIndexType::GlobalVector: - *proto.mutable_global_vector_index() = Ydb::Table::GlobalVectorIndex(); + case EIndexType::GlobalVectorKMeansTree: + *proto.mutable_global_vector_kmeans_tree_index() = Ydb::Table::GlobalVectorKMeansTreeIndex(); break; case EIndexType::Unknown: break; diff --git a/ydb/public/sdk/cpp/client/ydb_table/table_enum.h b/ydb/public/sdk/cpp/client/ydb_table/table_enum.h index a86f884408fd..1660706f57a7 100644 --- a/ydb/public/sdk/cpp/client/ydb_table/table_enum.h +++ b/ydb/public/sdk/cpp/client/ydb_table/table_enum.h @@ -28,7 +28,7 @@ enum class EIndexType { GlobalSync, GlobalAsync, GlobalUnique, - GlobalVector, + GlobalVectorKMeansTree, Unknown = std::numeric_limits::max() }; From 80a5fbdef22f3aa721b9ecbbe89cd87e4ef69114 Mon Sep 17 00:00:00 2001 From: azevaykin Date: Tue, 9 Jul 2024 07:03:26 +0000 Subject: [PATCH 03/21] Review fixes 2 --- ydb/core/base/table_index.cpp | 17 +---------------- ydb/core/base/table_index.h | 2 +- ydb/core/kqp/provider/yql_kikimr_gateway.cpp | 12 ++++++++++-- ydb/core/tx/datashard/datashard_user_table.h | 12 +----------- .../schemeshard_build_index_tx_base.cpp | 2 +- .../tx/schemeshard/schemeshard_info_types.h | 4 ++-- .../schemeshard/schemeshard_path_describer.cpp | 9 ++++----- ydb/core/tx/schemeshard/schemeshard_utils.h | 11 ++++------- ydb/public/api/protos/ydb_table.proto | 2 +- 9 files changed, 25 insertions(+), 46 deletions(-) diff --git a/ydb/core/base/table_index.cpp b/ydb/core/base/table_index.cpp index 519086f21ebd..d56c01eed33b 100644 --- a/ydb/core/base/table_index.cpp +++ b/ydb/core/base/table_index.cpp @@ -16,24 +16,9 @@ TVector::const_iterator IsUniq(const TVector& names) { namespace NKikimr { namespace NTableIndex { -TTableColumns CalcTableImplDescription(const NKikimrSchemeOp::EIndexType indexType, const TTableColumns& table, const TIndexColumns& index) { - { - TString explain; - Y_ABORT_UNLESS(IsCompatibleIndex(indexType, table, index, explain), "explain is %s", explain.c_str()); - } - +TTableColumns CalcTableImplDescription(const TTableColumns& table, const TIndexColumns& index) { TTableColumns result; - if (indexType == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree) { - result.Keys.push_back(NTableVectorIndex::LevelColumn); - result.Keys.push_back(NTableVectorIndex::IdColumn); - result.Columns.insert(NTableVectorIndex::LevelColumn); - result.Columns.insert(NTableVectorIndex::IdColumn); - result.Columns.insert(NTableVectorIndex::CentroidColumn); - result.Columns.insert(NTableVectorIndex::IdsColumn); - return result; - } - for (const auto& ik: index.KeyColumns) { result.Keys.push_back(ik); result.Columns.insert(ik); diff --git a/ydb/core/base/table_index.h b/ydb/core/base/table_index.h index 7d7af7b915d0..7cfc9eae3363 100644 --- a/ydb/core/base/table_index.h +++ b/ydb/core/base/table_index.h @@ -22,7 +22,7 @@ struct TIndexColumns { }; bool IsCompatibleIndex(const NKikimrSchemeOp::EIndexType indexType, const TTableColumns& table, const TIndexColumns& index, TString& explain); -TTableColumns CalcTableImplDescription(const NKikimrSchemeOp::EIndexType indexType, const TTableColumns& table, const TIndexColumns& index); +TTableColumns CalcTableImplDescription(const TTableColumns& table, const TIndexColumns& index); } } diff --git a/ydb/core/kqp/provider/yql_kikimr_gateway.cpp b/ydb/core/kqp/provider/yql_kikimr_gateway.cpp index 301e7352db2e..0b6fb77fd48b 100644 --- a/ydb/core/kqp/provider/yql_kikimr_gateway.cpp +++ b/ydb/core/kqp/provider/yql_kikimr_gateway.cpp @@ -115,10 +115,18 @@ void IKikimrGateway::BuildIndexMetadata(TTableMetadataResult& loadTableMetadataR for (size_t i = 0; i < indexesCount; i++) { const auto& index = tableMetadata->Indexes[i]; auto indexTablePath = NKikimr::NKqp::NSchemeHelpers::CreateIndexTablePath(tableName, index.Name); + + NKikimr::NTableIndex::TIndexColumns indexColumns{index.KeyColumns, {}}; + + TString error; + YQL_ENSURE(IsCompatibleIndex(TIndexDescription::ConvertIndexType(index.Type), + tableColumns, + indexColumns, + error), "Index is not compatible: " << error); + NKikimr::NTableIndex::TTableColumns indexTableColumns = NKikimr::NTableIndex::CalcTableImplDescription( - TIndexDescription::ConvertIndexType(index.Type), tableColumns, - NKikimr::NTableIndex::TIndexColumns{index.KeyColumns, {}}); + indexColumns); TKikimrTableMetadataPtr indexTableMetadata = new TKikimrTableMetadata(cluster, indexTablePath); indexTableMetadata->DoesExist = true; diff --git a/ydb/core/tx/datashard/datashard_user_table.h b/ydb/core/tx/datashard/datashard_user_table.h index 10198a97dc5a..fc77adf9919e 100644 --- a/ydb/core/tx/datashard/datashard_user_table.h +++ b/ydb/core/tx/datashard/datashard_user_table.h @@ -274,20 +274,10 @@ struct TUserTable : public TThrRefBase { nameToId.emplace(column.Name, id); } - auto fillColumnIds = [&nameToId, type = Type](const auto& columnNames, TVector& columnIds) { + auto fillColumnIds = [&nameToId](const auto& columnNames, TVector& columnIds) { columnIds.reserve(columnNames.size()); for (const auto& columnName : columnNames) { auto it = nameToId.find(columnName); - - if (it == nameToId.end() && type == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree) { - Y_ABORT_UNLESS(columnName == NTableIndex::NTableVectorIndex::IdColumn || - columnName == NTableIndex::NTableVectorIndex::LevelColumn || - columnName == NTableIndex::NTableVectorIndex::CentroidColumn || - columnName == NTableIndex::NTableVectorIndex::IdsColumn - ); - continue; - } - Y_ABORT_UNLESS(it != nameToId.end()); columnIds.push_back(it->second); } diff --git a/ydb/core/tx/schemeshard/schemeshard_build_index_tx_base.cpp b/ydb/core/tx/schemeshard/schemeshard_build_index_tx_base.cpp index 2cd71e68602b..b766761905e8 100644 --- a/ydb/core/tx/schemeshard/schemeshard_build_index_tx_base.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_build_index_tx_base.cpp @@ -275,7 +275,7 @@ void TSchemeShard::TIndexBuilder::TTxBase::Fill(NKikimrIndexBuilder::TIndexBuild *index.mutable_global_async_index() = Ydb::Table::GlobalAsyncIndex(); break; case NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree: - //TODO build vector index + *index.mutable_global_vector_kmeans_tree_index() = Ydb::Table::GlobalVectorKMeansTreeIndex(); break; default: Y_ABORT("Unreachable"); diff --git a/ydb/core/tx/schemeshard/schemeshard_info_types.h b/ydb/core/tx/schemeshard/schemeshard_info_types.h index 43cbd2287db7..668ae758e12b 100644 --- a/ydb/core/tx/schemeshard/schemeshard_info_types.h +++ b/ydb/core/tx/schemeshard/schemeshard_info_types.h @@ -2370,7 +2370,7 @@ struct TTableIndexInfo : public TSimpleRefCount { alterData->State = config.HasState() ? config.GetState() : EState::EIndexStateReady; if (config.GetType() == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree) { - alterData->VectorIndexKmeansTreeDescription = config.GetVectorIndexKmeansTreeDescription(); + alterData->SpecializedIndexDescription = config.GetVectorIndexKmeansTreeDescription(); } return result; @@ -2385,7 +2385,7 @@ struct TTableIndexInfo : public TSimpleRefCount { TTableIndexInfo::TPtr AlterData = nullptr; - std::optional VectorIndexKmeansTreeDescription; + std::variant SpecializedIndexDescription; }; struct TCdcStreamInfo : public TSimpleRefCount { diff --git a/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp b/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp index 3736a36f2e61..bb32a958aa67 100644 --- a/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp @@ -1264,20 +1264,19 @@ void TSchemeShard::DescribeTableIndex(const TPathId& pathId, const TString& name } if (indexInfo->Type == NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree) { - const auto& indexInfoDescription = indexInfo->VectorIndexKmeansTreeDescription; - if (indexInfoDescription) { - const auto& indexInfoSettings = indexInfoDescription->GetSettings(); + if (const auto* vectorIndexKmeansTreeDescription = std::get_if(&indexInfo->SpecializedIndexDescription)) { + const auto& indexInfoSettings = vectorIndexKmeansTreeDescription->GetSettings(); auto entrySettings = entry.MutableVectorIndexKmeansTreeDescription()->MutableSettings(); if (indexInfoSettings.Hasdistance()) entrySettings->set_distance(indexInfoSettings.distance()); else if (indexInfoSettings.Hassimilarity()) entrySettings->set_similarity(indexInfoSettings.similarity()); else - Y_FAIL_S("Either distance or similarity should be set in VectorIndexKmeansTreeDescription: " << *indexInfoDescription); + Y_FAIL_S("Either distance or similarity should be set in index settings: " << indexInfoSettings); entrySettings->set_vector_type(indexInfoSettings.vector_type()); entrySettings->set_vector_dimension(indexInfoSettings.vector_dimension()); } else { - Y_FAIL_S("VectorIndexKmeansTreeDescription should be set"); + Y_FAIL_S("SpecializedIndexDescription should be set"); } } diff --git a/ydb/core/tx/schemeshard/schemeshard_utils.h b/ydb/core/tx/schemeshard/schemeshard_utils.h index 3bb62f0e3a97..19a9e207313b 100644 --- a/ydb/core/tx/schemeshard/schemeshard_utils.h +++ b/ydb/core/tx/schemeshard/schemeshard_utils.h @@ -205,14 +205,9 @@ bool CommonCheck(const TTableDesc& tableDesc, const NKikimrSchemeOp::TIndexCreat return false; } - implTableColumns = CalcTableImplDescription(indexDesc.GetType(), baseTableColumns, indexKeys); - if (indexDesc.GetType() == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree) { - if (indexKeys.KeyColumns.size() != 1) { - status = NKikimrScheme::EStatus::StatusInvalidParameter; - error = "Only single column is supported for vector index"; - return false; - } + //We have already checked this in IsCompatibleIndex + Y_ABORT_UNLESS(indexKeys.KeyColumns.size() == 1); const TString& indexColumnName = indexKeys.KeyColumns[0]; Y_ABORT_UNLESS(baseColumnTypes.contains(indexColumnName)); @@ -224,6 +219,8 @@ bool CommonCheck(const TTableDesc& tableDesc, const NKikimrSchemeOp::TIndexCreat return false; } } else { + implTableColumns = CalcTableImplDescription(baseTableColumns, indexKeys); + if (!IsCompatibleKeyTypes(baseColumnTypes, implTableColumns, uniformTable, error)) { status = NKikimrScheme::EStatus::StatusInvalidParameter; return false; diff --git a/ydb/public/api/protos/ydb_table.proto b/ydb/public/api/protos/ydb_table.proto index 20df240f38b7..7d556b1a844c 100644 --- a/ydb/public/api/protos/ydb_table.proto +++ b/ydb/public/api/protos/ydb_table.proto @@ -81,7 +81,7 @@ message VectorIndexSettings { VECTOR_TYPE_BIT = 4; } - oneof distance_similarity { + oneof metric { Distance distance = 1; Similarity similarity = 2; } From 35e3b5d0c2d689ef9c9e725118e290f4143b2961 Mon Sep 17 00:00:00 2001 From: azevaykin Date: Wed, 10 Jul 2024 05:58:36 +0000 Subject: [PATCH 04/21] Two tables --- ydb/core/base/table_index.cpp | 14 + ydb/core/base/table_vector_index.h | 20 +- ...emeshard__operation_create_build_index.cpp | 21 +- ...eshard__operation_create_indexed_table.cpp | 22 +- ...emeshard__operation_drop_indexed_table.cpp | 8 +- .../schemeshard_build_index__create.cpp | 4 +- .../tx/schemeshard/schemeshard_info_types.cpp | 4 +- .../tx/schemeshard/schemeshard_info_types.h | 3 +- .../schemeshard_path_describer.cpp | 28 +- ydb/core/tx/schemeshard/schemeshard_utils.cpp | 367 ++++++++++-------- ydb/core/tx/schemeshard/schemeshard_utils.h | 21 +- .../schemeshard/ut_index/ut_vector_index.cpp | 97 ++++- ydb/core/ydb_convert/table_description.cpp | 4 +- ydb/core/ydb_convert/table_settings.cpp | 19 +- ydb/core/ydb_convert/table_settings.h | 2 +- ydb/public/api/protos/ydb_table.proto | 5 +- 16 files changed, 415 insertions(+), 224 deletions(-) diff --git a/ydb/core/base/table_index.cpp b/ydb/core/base/table_index.cpp index d56c01eed33b..1ba8cd9f5377 100644 --- a/ydb/core/base/table_index.cpp +++ b/ydb/core/base/table_index.cpp @@ -13,6 +13,10 @@ TVector::const_iterator IsUniq(const TVector& names) { return names.end(); } +bool Contains(const TVector& names, TString str) { + return std::find(names.begin(), names.end(), str) != names.end(); +} + namespace NKikimr { namespace NTableIndex { @@ -71,6 +75,16 @@ bool IsCompatibleIndex(const NKikimrSchemeOp::EIndexType indexType, const TTable explain = "Only single key column is supported for vector index"; return false; } + + if (Contains(index.KeyColumns, NTableVectorKmeansTreeIndex::PostingTable_ParentIdColumn)) { + explain = TStringBuilder() << "Key column should not have a reserved name: " << NTableVectorKmeansTreeIndex::PostingTable_ParentIdColumn; + return false; + } + + if (Contains(index.DataColumns, NTableVectorKmeansTreeIndex::PostingTable_ParentIdColumn)) { + explain = TStringBuilder() << "Data column should not have a reserved name: " << NTableVectorKmeansTreeIndex::PostingTable_ParentIdColumn; + return false; + } } THashSet indexKeys; diff --git a/ydb/core/base/table_vector_index.h b/ydb/core/base/table_vector_index.h index fde27891f613..688e45cc2652 100644 --- a/ydb/core/base/table_vector_index.h +++ b/ydb/core/base/table_vector_index.h @@ -2,13 +2,21 @@ namespace NKikimr { namespace NTableIndex { -namespace NTableVectorIndex { +namespace NTableVectorKmeansTreeIndex { + +// Vector KmeansTree index tables description + +// Levels table +inline constexpr char LevelTable[] = "indexImplLevelTable"; +inline constexpr char LevelTable_ParentIdColumn[] = "-parent"; +inline constexpr char LevelTable_IdColumn[] = "-id"; +inline constexpr char LevelTable_EmbeddingColumn[] = "-embedding"; + +// Posting table +inline constexpr char PostingTable[] = "indexImplPostingTable"; +inline constexpr char PostingTable_ParentIdColumn[] = "-parent"; + -// Special columns for vector index -inline constexpr char LevelColumn[] = "level"; -inline constexpr char IdColumn[] = "id"; -inline constexpr char CentroidColumn[] = "centroid"; -inline constexpr char IdsColumn[] = "ids"; } } } diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_create_build_index.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_create_build_index.cpp index b04e166ba3fe..b42e6b6ca04d 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_create_build_index.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_create_build_index.cpp @@ -10,6 +10,8 @@ namespace NKikimr::NSchemeShard { +using namespace NTableIndex; + TVector CreateBuildColumn(TOperationId opId, const TTxTransaction& tx, TOperationContext& context) { Y_ABORT_UNLESS(tx.GetOperationType() == NKikimrSchemeOp::EOperationType::ESchemeOpCreateColumnBuild); @@ -111,18 +113,21 @@ TVector CreateBuildIndex(TOperationId opId, const TTxTransa result.push_back(CreateInitializeBuildIndexMainTable(NextPartId(opId, result), outTx)); } - { + auto createIndexImplTable = [&] (NKikimrSchemeOp::TTableDescription&& implTableDesc) { auto outTx = TransactionTemplate(index.PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpInitiateBuildIndexImplTable); - auto& indexImplTableDescription = *outTx.MutableCreateTable(); + *outTx.MutableCreateTable() = implTableDesc; - // This description provided by user to override partition policy - const auto& userIndexDesc = indexDesc.GetIndexImplTableDescriptions(0); - indexImplTableDescription = CalcImplTableDesc(indexDesc.GetType(), tableInfo, implTableColumns, userIndexDesc); - - indexImplTableDescription.MutablePartitionConfig()->MutableCompactionPolicy()->SetKeepEraseMarkers(true); - indexImplTableDescription.MutablePartitionConfig()->SetShadowData(true); + implTableDesc.MutablePartitionConfig()->MutableCompactionPolicy()->SetKeepEraseMarkers(true); + implTableDesc.MutablePartitionConfig()->SetShadowData(true); result.push_back(CreateInitializeBuildIndexImplTable(NextPartId(opId, result), outTx)); + }; + + if (indexDesc.GetType() == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree) { + createIndexImplTable(CalcVectorKmeansTreeLevelImplTableDesc(tableInfo->PartitionConfig(), indexDesc.GetIndexImplTableDescriptions(0))); + createIndexImplTable(CalcVectorKmeansTreePostingImplTableDesc(tableInfo, tableInfo->PartitionConfig(), implTableColumns, indexDesc.GetIndexImplTableDescriptions(1))); + } else { + createIndexImplTable(CalcImplTableDesc(tableInfo, implTableColumns, indexDesc.GetIndexImplTableDescriptions(0))); } return result; diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_create_indexed_table.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_create_indexed_table.cpp index 6e58632ee3cb..ca88b763a219 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_create_indexed_table.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_create_indexed_table.cpp @@ -238,25 +238,37 @@ TVector CreateIndexedTable(TOperationId nextId, const TTxTr result.push_back(CreateNewTableIndex(NextPartId(nextId, result), scheme)); } - { + auto createIndexImplTable = [&] (const NKikimrSchemeOp::TTableDescription&& implTableDesc) { auto scheme = TransactionTemplate( tx.GetWorkingDir() + "/" + baseTableDescription.GetName() + "/" + indexDescription.GetName(), NKikimrSchemeOp::EOperationType::ESchemeOpCreateTable); scheme.SetFailOnExist(tx.GetFailOnExist()); scheme.SetAllowCreateInTempDir(tx.GetAllowCreateInTempDir()); - const auto& implTableColumns = indexes.at(indexDescription.GetName()); + *scheme.MutableCreateTable() = implTableDesc; - auto& indexImplTableDescription = *scheme.MutableCreateTable(); + return result.push_back(CreateNewTable(NextPartId(nextId, result), scheme)); + }; + const auto& implTableColumns = indexes.at(indexDescription.GetName()); + if (indexDescription.GetType() == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree) { + NKikimrSchemeOp::TTableDescription userLevelDesc, userPostingDesc; + if (indexDescription.IndexImplTableDescriptionsSize() == 2) { + // This description provided by user to override partition policy + userLevelDesc = indexDescription.GetIndexImplTableDescriptions(0); + userPostingDesc = indexDescription.GetIndexImplTableDescriptions(1); + } + + createIndexImplTable(CalcVectorKmeansTreeLevelImplTableDesc(baseTableDescription.GetPartitionConfig(), userLevelDesc)); + createIndexImplTable(CalcVectorKmeansTreePostingImplTableDesc(baseTableDescription, baseTableDescription.GetPartitionConfig(), implTableColumns, userPostingDesc)); + } else { NKikimrSchemeOp::TTableDescription userIndexDesc; if (indexDescription.IndexImplTableDescriptionsSize()) { // This description provided by user to override partition policy userIndexDesc = indexDescription.GetIndexImplTableDescriptions(0); } - indexImplTableDescription = CalcImplTableDesc(indexDescription.GetType(), baseTableDescription, implTableColumns, userIndexDesc); - result.push_back(CreateNewTable(NextPartId(nextId, result), scheme)); + createIndexImplTable(CalcImplTableDesc(baseTableDescription, implTableColumns, userIndexDesc)); } } diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_drop_indexed_table.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_drop_indexed_table.cpp index 5f32acec8c03..51bf6f69a0b9 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_drop_indexed_table.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_drop_indexed_table.cpp @@ -494,10 +494,12 @@ TVector CreateDropIndexedTable(TOperationId nextId, const T result.push_back(CreateDropCdcStreamImpl(NextPartId(nextId, result), dropStream)); } - Y_ABORT_UNLESS(child.Base()->GetChildren().size() == 1); for (auto& [implName, implPathId] : child.Base()->GetChildren()) { - Y_ABORT_UNLESS(implName == "indexImplTable" || implName == "streamImpl", - "unexpected name %s", implName.c_str()); + Y_ABORT_UNLESS(implName == "indexImplTable" + || implName == "streamImpl" + || implName == NTableIndex::NTableVectorKmeansTreeIndex::LevelTable + || implName == NTableIndex::NTableVectorKmeansTreeIndex::PostingTable + , "unexpected name %s", implName.c_str()); TPath implPath = child.Child(implName); { diff --git a/ydb/core/tx/schemeshard/schemeshard_build_index__create.cpp b/ydb/core/tx/schemeshard/schemeshard_build_index__create.cpp index 8f921e8245be..faa86875499d 100644 --- a/ydb/core/tx/schemeshard/schemeshard_build_index__create.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_build_index__create.cpp @@ -219,9 +219,11 @@ class TSchemeShard::TIndexBuilder::TTxCreate: public TSchemeShard::TIndexBuilder switch (index.type_case()) { case Ydb::Table::TableIndex::TypeCase::kGlobalIndex: buildInfo->IndexType = NKikimrSchemeOp::EIndexType::EIndexTypeGlobal; + buildInfo->ImplTableDescriptions.resize(1); break; case Ydb::Table::TableIndex::TypeCase::kGlobalAsyncIndex: buildInfo->IndexType = NKikimrSchemeOp::EIndexType::EIndexTypeGlobalAsync; + buildInfo->ImplTableDescriptions.resize(1); break; case Ydb::Table::TableIndex::TypeCase::kGlobalUniqueIndex: explain = "unsupported index type to build"; @@ -239,7 +241,7 @@ class TSchemeShard::TIndexBuilder::TTxCreate: public TSchemeShard::TIndexBuilder buildInfo->DataColumns.assign(index.data_columns().begin(), index.data_columns().end()); Ydb::StatusIds::StatusCode status; - if (!FillIndexTablePartitioning(buildInfo->ImplTableDescription, index, status, explain)) { + if (!FillIndexTablePartitioning(buildInfo->ImplTableDescriptions, index, status, explain)) { return false; } } diff --git a/ydb/core/tx/schemeshard/schemeshard_info_types.cpp b/ydb/core/tx/schemeshard/schemeshard_info_types.cpp index 4d1338e6e254..0d32fd8dbfbb 100644 --- a/ydb/core/tx/schemeshard/schemeshard_info_types.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_info_types.cpp @@ -2114,7 +2114,9 @@ void TIndexBuildInfo::SerializeToProto(TSchemeShard* ss, NKikimrSchemeOp::TIndex *index.AddDataColumnNames() = x; } - *index.AddIndexImplTableDescriptions() = ImplTableDescription; + for (const auto& implTableDescription : ImplTableDescriptions) { + *index.AddIndexImplTableDescriptions() = implTableDescription; + } } void TIndexBuildInfo::SerializeToProto(TSchemeShard* ss, NKikimrIndexBuilder::TColumnBuildSettings* result) const { diff --git a/ydb/core/tx/schemeshard/schemeshard_info_types.h b/ydb/core/tx/schemeshard/schemeshard_info_types.h index 668ae758e12b..a7f3a37aec04 100644 --- a/ydb/core/tx/schemeshard/schemeshard_info_types.h +++ b/ydb/core/tx/schemeshard/schemeshard_info_types.h @@ -2365,7 +2365,6 @@ struct TTableIndexInfo : public TSimpleRefCount { alterData->IndexKeys.assign(config.GetKeyColumnNames().begin(), config.GetKeyColumnNames().end()); Y_ABORT_UNLESS(alterData->IndexKeys.size()); alterData->IndexDataColumns.assign(config.GetDataColumnNames().begin(), config.GetDataColumnNames().end()); - Y_ABORT_UNLESS(alterData->IndexDataColumns.empty() || config.GetType() != NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree); alterData->State = config.HasState() ? config.GetState() : EState::EIndexStateReady; @@ -2923,7 +2922,7 @@ struct TIndexBuildInfo: public TSimpleRefCount { TString ImplTablePath; NTableIndex::TTableColumns ImplTableColumns; - NKikimrSchemeOp::TTableDescription ImplTableDescription; + TVector ImplTableDescriptions; EState State = EState::Invalid; TString Issue; diff --git a/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp b/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp index bb32a958aa67..495857fd76b9 100644 --- a/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp @@ -1246,22 +1246,26 @@ void TSchemeShard::DescribeTableIndex(const TPathId& pathId, const TString& name auto* indexPath = PathsById.FindPtr(pathId); Y_ABORT_UNLESS(indexPath); - Y_ABORT_UNLESS((*indexPath)->GetChildren().size() == 1); - const auto& indexImplTablePathId = (*indexPath)->GetChildren().begin()->second; + const ui8 expectedIndexImplTableCount = indexInfo->Type == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree ? 2 : 1; + Y_ABORT_UNLESS((*indexPath)->GetChildren().size() == expectedIndexImplTableCount); - auto* tableInfo = Tables.FindPtr(indexImplTablePathId); - Y_ABORT_UNLESS(tableInfo); + ui64 dataSize = 0; + for (const auto& indexImplTablePathId : (*indexPath)->GetChildren()) { + auto* tableInfo = Tables.FindPtr(indexImplTablePathId.second); + Y_ABORT_UNLESS(tableInfo); - const auto& tableStats = (*tableInfo)->GetStats().Aggregated; - entry.SetDataSize(tableStats.DataSize + tableStats.IndexSize); + const auto& tableStats = (*tableInfo)->GetStats().Aggregated; + dataSize += tableStats.DataSize + tableStats.IndexSize; - auto* tableDescription = entry.AddIndexImplTableDescriptions(); - if (fillConfig) { - FillPartitionConfig((*tableInfo)->PartitionConfig(), *tableDescription->MutablePartitionConfig()); - } - if (fillBoundaries) { - FillTableBoundaries(*tableInfo, *tableDescription->MutableSplitBoundary()); + auto* tableDescription = entry.AddIndexImplTableDescriptions(); + if (fillConfig) { + FillPartitionConfig((*tableInfo)->PartitionConfig(), *tableDescription->MutablePartitionConfig()); + } + if (fillBoundaries) { + FillTableBoundaries(*tableInfo, *tableDescription->MutableSplitBoundary()); + } } + entry.SetDataSize(dataSize); if (indexInfo->Type == NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree) { if (const auto* vectorIndexKmeansTreeDescription = std::get_if(&indexInfo->SpecializedIndexDescription)) { diff --git a/ydb/core/tx/schemeshard/schemeshard_utils.cpp b/ydb/core/tx/schemeshard/schemeshard_utils.cpp index 9b6bad19a1a3..b7e1ac50086e 100644 --- a/ydb/core/tx/schemeshard/schemeshard_utils.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_utils.cpp @@ -255,64 +255,104 @@ TTableColumns ExtractInfo(const NSchemeShard::TTableInfo::TPtr &tableInfo) { return result; } -void FillVectorIndexColumns(NKikimrSchemeOp::TTableDescription& result) { - { - auto levelColumn = result.AddColumns(); - levelColumn->SetName(NTableVectorIndex::LevelColumn); - levelColumn->SetType("Uint8"); - levelColumn->SetTypeId(NScheme::NTypeIds::Uint8); - levelColumn->SetId(0); +NKikimrSchemeOp::TPartitionConfig PartitionConfigForIndexes( + const NKikimrSchemeOp::TPartitionConfig& baseTablePartitionConfig, + const NKikimrSchemeOp::TTableDescription& indexTableDesc) +{ + // KIKIMR-6687 + NKikimrSchemeOp::TPartitionConfig result; + + if (baseTablePartitionConfig.HasNamedCompactionPolicy()) { + result.SetNamedCompactionPolicy(baseTablePartitionConfig.GetNamedCompactionPolicy()); } - { - auto idColumn = result.AddColumns(); - idColumn->SetName(NTableVectorIndex::IdColumn); - idColumn->SetType("Uint32"); - idColumn->SetTypeId(NScheme::NTypeIds::Uint32); - idColumn->SetId(1); + if (baseTablePartitionConfig.HasCompactionPolicy()) { + result.MutableCompactionPolicy()->CopyFrom(baseTablePartitionConfig.GetCompactionPolicy()); } - { - auto centroidColumn = result.AddColumns(); - centroidColumn->SetName(NTableVectorIndex::CentroidColumn); - centroidColumn->SetType("String"); - centroidColumn->SetTypeId(NScheme::NTypeIds::String); - centroidColumn->SetId(2); + // skip optional uint64 FollowerCount = 3; + if (baseTablePartitionConfig.HasExecutorCacheSize()) { + result.SetExecutorCacheSize(baseTablePartitionConfig.GetExecutorCacheSize()); } - { - auto idsColumn = result.AddColumns(); - idsColumn->SetName(NTableVectorIndex::IdsColumn); - idsColumn->SetType("String"); - idsColumn->SetTypeId(NScheme::NTypeIds::String); - idsColumn->SetId(3); + // skip optional bool AllowFollowerPromotion = 5 [default = true]; + if (baseTablePartitionConfig.HasTxReadSizeLimit()) { + result.SetTxReadSizeLimit(baseTablePartitionConfig.GetTxReadSizeLimit()); + } + // skip optional uint32 CrossDataCenterFollowerCount = 8; + if (baseTablePartitionConfig.HasChannelProfileId()) { + result.SetChannelProfileId(baseTablePartitionConfig.GetChannelProfileId()); } - result.AddKeyColumnNames(NTableVectorIndex::LevelColumn); - result.AddKeyColumnNames(NTableVectorIndex::IdColumn); -} -NKikimrSchemeOp::TTableDescription CalcImplTableDesc( - const NKikimrSchemeOp::EIndexType indexType, - const NSchemeShard::TTableInfo::TPtr& baseTableInfo, - const NTableIndex::TTableColumns& implTableColumns, - const NKikimrSchemeOp::TTableDescription& indexTableDesc) -{ - NKikimrSchemeOp::TTableDescription result; + if (indexTableDesc.GetPartitionConfig().HasPartitioningPolicy()) { + result.MutablePartitioningPolicy()->CopyFrom(indexTableDesc.GetPartitionConfig().GetPartitioningPolicy()); + } else { + result.MutablePartitioningPolicy()->SetSizeToSplit(2_GB); + result.MutablePartitioningPolicy()->SetMinPartitionsCount(1); + } + if (baseTablePartitionConfig.HasPipelineConfig()) { + result.MutablePipelineConfig()->CopyFrom(baseTablePartitionConfig.GetPipelineConfig()); + } + if (baseTablePartitionConfig.ColumnFamiliesSize()) { + // Indexes don't need column families unless it's the default column family + for (const auto& family : baseTablePartitionConfig.GetColumnFamilies()) { + const bool isDefaultFamily = ( + (!family.HasId() && !family.HasName()) || + (family.HasId() && family.GetId() == 0) || + (family.HasName() && family.GetName() == "default")); + if (isDefaultFamily) { + result.AddColumnFamilies()->CopyFrom(family); + } + } + } + if (baseTablePartitionConfig.HasResourceProfile()) { + result.SetResourceProfile(baseTablePartitionConfig.GetResourceProfile()); + } + if (baseTablePartitionConfig.HasDisableStatisticsCalculation()) { + result.SetDisableStatisticsCalculation(baseTablePartitionConfig.GetDisableStatisticsCalculation()); + } + if (baseTablePartitionConfig.HasEnableFilterByKey()) { + result.SetEnableFilterByKey(baseTablePartitionConfig.GetEnableFilterByKey()); + } + if (baseTablePartitionConfig.HasExecutorFastLogPolicy()) { + result.SetExecutorFastLogPolicy(baseTablePartitionConfig.GetExecutorFastLogPolicy()); + } + if (baseTablePartitionConfig.HasEnableEraseCache()) { + result.SetEnableEraseCache(baseTablePartitionConfig.GetEnableEraseCache()); + } + if (baseTablePartitionConfig.HasEraseCacheMinRows()) { + result.SetEraseCacheMinRows(baseTablePartitionConfig.GetEraseCacheMinRows()); + } + if (baseTablePartitionConfig.HasEraseCacheMaxBytes()) { + result.SetEraseCacheMaxBytes(baseTablePartitionConfig.GetEraseCacheMaxBytes()); + } + if (baseTablePartitionConfig.HasKeepSnapshotTimeout()) { + result.SetKeepSnapshotTimeout(baseTablePartitionConfig.GetKeepSnapshotTimeout()); + } + // skip repeated NKikimrStorageSettings.TStorageRoom StorageRooms = 17; + // skip optional NKikimrHive.TFollowerGroup FollowerGroup = 23; - result.SetName("indexImplTable"); + return result; +} +void SetImplTablePartitionConfig( + const NKikimrSchemeOp::TPartitionConfig& baseTablePartitionConfig, + const NKikimrSchemeOp::TTableDescription& indexTableDesc, + NKikimrSchemeOp::TTableDescription& tableDescription) +{ if (indexTableDesc.HasUniformPartitionsCount()) { - result.SetUniformPartitionsCount(indexTableDesc.GetUniformPartitionsCount()); + tableDescription.SetUniformPartitionsCount(indexTableDesc.GetUniformPartitionsCount()); } if (indexTableDesc.SplitBoundarySize()) { - result.MutableSplitBoundary()->CopyFrom(indexTableDesc.GetSplitBoundary()); + tableDescription.MutableSplitBoundary()->CopyFrom(indexTableDesc.GetSplitBoundary()); } - *result.MutablePartitionConfig() = PartitionConfigForIndexes(baseTableInfo, indexTableDesc); - - if (indexType == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree) { - FillVectorIndexColumns(result); - return result; - } + *tableDescription.MutablePartitionConfig() = PartitionConfigForIndexes(baseTablePartitionConfig, indexTableDesc); +} + void FillIndexImplTableColumns( + const THashMap& baseTableColumns, + const NTableIndex::TTableColumns& implTableColumns, + NKikimrSchemeOp::TTableDescription& implTableDesc) +{ //Columns and KeyColumnNames order is really important //the order of implTableColumns.Keys is the right one @@ -321,15 +361,14 @@ NKikimrSchemeOp::TTableDescription CalcImplTableDesc( implKeyToImplColumn[implTableColumns.Keys[keyId]] = keyId; } - result.ClearColumns(); - for (auto& iter: baseTableInfo->Columns) { + for (auto& iter: baseTableColumns) { const NSchemeShard::TTableInfo::TColumn& column = iter.second; if (column.IsDropped()) { continue; } if (implTableColumns.Columns.contains(column.Name)) { - auto item = result.AddColumns(); + auto item = implTableDesc.AddColumns(); item->SetName(column.Name); item->SetType(NScheme::TypeName(column.PType, column.PTypeMod)); item->SetNotNull(column.NotNull); @@ -341,49 +380,26 @@ NKikimrSchemeOp::TTableDescription CalcImplTableDesc( } } - std::sort(result.MutableColumns()->begin(), - result.MutableColumns()->end(), + std::sort(implTableDesc.MutableColumns()->begin(), + implTableDesc.MutableColumns()->end(), [] (auto& left, auto& right) { return left.GetId() < right.GetId(); }); - for (auto& column: *result.MutableColumns()) { + for (auto& column: *implTableDesc.MutableColumns()) { column.ClearId(); } - result.ClearKeyColumnNames(); for (auto& keyName: implTableColumns.Keys) { - result.AddKeyColumnNames(keyName); + implTableDesc.AddKeyColumnNames(keyName); } - - return result; } -NKikimrSchemeOp::TTableDescription CalcImplTableDesc( - const NKikimrSchemeOp::EIndexType indexType, - const NKikimrSchemeOp::TTableDescription &baseTableDescr, - const TTableColumns &implTableColumns, - const NKikimrSchemeOp::TTableDescription &indexTableDesc) +void FillIndexImplTableColumns( + const ::google::protobuf::RepeatedPtrField& baseTableColumns, + const NTableIndex::TTableColumns& implTableColumns, + NKikimrSchemeOp::TTableDescription& implTableDesc) { - NKikimrSchemeOp::TTableDescription result; - - result.SetName("indexImplTable"); - - if (indexTableDesc.HasUniformPartitionsCount()) { - result.SetUniformPartitionsCount(indexTableDesc.GetUniformPartitionsCount()); - } - - if (indexTableDesc.SplitBoundarySize()) { - result.MutableSplitBoundary()->CopyFrom(indexTableDesc.GetSplitBoundary()); - } - - *result.MutablePartitionConfig() = PartitionConfigForIndexes(baseTableDescr, indexTableDesc); - - if (indexType == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree) { - FillVectorIndexColumns(result); - return result; - } - //Columns and KeyColumnNames order is really important //the order of implTableColumns.Keys is the right one @@ -392,11 +408,10 @@ NKikimrSchemeOp::TTableDescription CalcImplTableDesc( implKeyToImplColumn[implTableColumns.Keys[keyId]] = keyId; } - result.ClearColumns(); - for (auto& column: baseTableDescr.GetColumns()) { + for (auto& column: baseTableColumns) { auto& columnName = column.GetName(); if (implTableColumns.Columns.contains(columnName)) { - auto item = result.AddColumns(); + auto item = implTableDesc.AddColumns(); *item = column; // Indexes don't use column families @@ -414,115 +429,147 @@ NKikimrSchemeOp::TTableDescription CalcImplTableDesc( } } - std::sort(result.MutableColumns()->begin(), - result.MutableColumns()->end(), + std::sort(implTableDesc.MutableColumns()->begin(), + implTableDesc.MutableColumns()->end(), [] (auto& left, auto& right) { return left.GetId() < right.GetId(); }); - for (auto& column: *result.MutableColumns()) { + for (auto& column: *implTableDesc.MutableColumns()) { column.ClearId(); } - result.ClearKeyColumnNames(); for (auto& keyName: implTableColumns.Keys) { - result.AddKeyColumnNames(keyName); + implTableDesc.AddKeyColumnNames(keyName); } +} - return result; +NKikimrSchemeOp::TTableDescription CalcImplTableDesc( + const NSchemeShard::TTableInfo::TPtr& baseTableInfo, + const NTableIndex::TTableColumns& implTableColumns, + const NKikimrSchemeOp::TTableDescription& indexTableDesc) +{ + NKikimrSchemeOp::TTableDescription implTableDesc; + + implTableDesc.SetName("indexImplTable"); + + SetImplTablePartitionConfig(baseTableInfo->PartitionConfig(), indexTableDesc, implTableDesc); + + FillIndexImplTableColumns(baseTableInfo->Columns, implTableColumns, implTableDesc); + + return implTableDesc; } -NKikimrSchemeOp::TPartitionConfig PartitionConfigForIndexes( - const NKikimrSchemeOp::TPartitionConfig& baseTablePartitionConfig, - const NKikimrSchemeOp::TTableDescription& indexTableDesc) +NKikimrSchemeOp::TTableDescription CalcImplTableDesc( + const NKikimrSchemeOp::TTableDescription &baseTableDescr, + const TTableColumns &implTableColumns, + const NKikimrSchemeOp::TTableDescription &indexTableDesc) { - // KIKIMR-6687 - NKikimrSchemeOp::TPartitionConfig result; + NKikimrSchemeOp::TTableDescription implTableDesc; - if (baseTablePartitionConfig.HasNamedCompactionPolicy()) { - result.SetNamedCompactionPolicy(baseTablePartitionConfig.GetNamedCompactionPolicy()); - } - if (baseTablePartitionConfig.HasCompactionPolicy()) { - result.MutableCompactionPolicy()->CopyFrom(baseTablePartitionConfig.GetCompactionPolicy()); - } - // skip optional uint64 FollowerCount = 3; - if (baseTablePartitionConfig.HasExecutorCacheSize()) { - result.SetExecutorCacheSize(baseTablePartitionConfig.GetExecutorCacheSize()); - } - // skip optional bool AllowFollowerPromotion = 5 [default = true]; - if (baseTablePartitionConfig.HasTxReadSizeLimit()) { - result.SetTxReadSizeLimit(baseTablePartitionConfig.GetTxReadSizeLimit()); - } - // skip optional uint32 CrossDataCenterFollowerCount = 8; - if (baseTablePartitionConfig.HasChannelProfileId()) { - result.SetChannelProfileId(baseTablePartitionConfig.GetChannelProfileId()); - } + implTableDesc.SetName("indexImplTable"); - if (indexTableDesc.GetPartitionConfig().HasPartitioningPolicy()) { - result.MutablePartitioningPolicy()->CopyFrom(indexTableDesc.GetPartitionConfig().GetPartitioningPolicy()); - } else { - result.MutablePartitioningPolicy()->SetSizeToSplit(2_GB); - result.MutablePartitioningPolicy()->SetMinPartitionsCount(1); - } - if (baseTablePartitionConfig.HasPipelineConfig()) { - result.MutablePipelineConfig()->CopyFrom(baseTablePartitionConfig.GetPipelineConfig()); - } - if (baseTablePartitionConfig.ColumnFamiliesSize()) { - // Indexes don't need column families unless it's the default column family - for (const auto& family : baseTablePartitionConfig.GetColumnFamilies()) { - const bool isDefaultFamily = ( - (!family.HasId() && !family.HasName()) || - (family.HasId() && family.GetId() == 0) || - (family.HasName() && family.GetName() == "default")); - if (isDefaultFamily) { - result.AddColumnFamilies()->CopyFrom(family); - } - } - } - if (baseTablePartitionConfig.HasResourceProfile()) { - result.SetResourceProfile(baseTablePartitionConfig.GetResourceProfile()); - } - if (baseTablePartitionConfig.HasDisableStatisticsCalculation()) { - result.SetDisableStatisticsCalculation(baseTablePartitionConfig.GetDisableStatisticsCalculation()); - } - if (baseTablePartitionConfig.HasEnableFilterByKey()) { - result.SetEnableFilterByKey(baseTablePartitionConfig.GetEnableFilterByKey()); - } - if (baseTablePartitionConfig.HasExecutorFastLogPolicy()) { - result.SetExecutorFastLogPolicy(baseTablePartitionConfig.GetExecutorFastLogPolicy()); - } - if (baseTablePartitionConfig.HasEnableEraseCache()) { - result.SetEnableEraseCache(baseTablePartitionConfig.GetEnableEraseCache()); - } - if (baseTablePartitionConfig.HasEraseCacheMinRows()) { - result.SetEraseCacheMinRows(baseTablePartitionConfig.GetEraseCacheMinRows()); + SetImplTablePartitionConfig(baseTableDescr.GetPartitionConfig(), indexTableDesc, implTableDesc); + + FillIndexImplTableColumns(baseTableDescr.GetColumns(), implTableColumns, implTableDesc); + + return implTableDesc; +} + +NKikimrSchemeOp::TTableDescription CalcVectorKmeansTreeLevelImplTableDesc( + const NKikimrSchemeOp::TPartitionConfig& baseTablePartitionConfig, + const NKikimrSchemeOp::TTableDescription& indexTableDesc) +{ + NKikimrSchemeOp::TTableDescription implTableDesc; + + implTableDesc.SetName(NTableVectorKmeansTreeIndex::LevelTable); + + SetImplTablePartitionConfig(baseTablePartitionConfig, indexTableDesc, implTableDesc); + + { + auto parentIdColumn = implTableDesc.AddColumns(); + parentIdColumn->SetName(NTableVectorKmeansTreeIndex::LevelTable_ParentIdColumn); + parentIdColumn->SetType("Uint32"); + parentIdColumn->SetTypeId(NScheme::NTypeIds::Uint32); + parentIdColumn->SetId(0); } - if (baseTablePartitionConfig.HasEraseCacheMaxBytes()) { - result.SetEraseCacheMaxBytes(baseTablePartitionConfig.GetEraseCacheMaxBytes()); + { + auto idColumn = implTableDesc.AddColumns(); + idColumn->SetName(NTableVectorKmeansTreeIndex::LevelTable_IdColumn); + idColumn->SetType("Uint32"); + idColumn->SetTypeId(NScheme::NTypeIds::Uint32); + idColumn->SetId(1); } - if (baseTablePartitionConfig.HasKeepSnapshotTimeout()) { - result.SetKeepSnapshotTimeout(baseTablePartitionConfig.GetKeepSnapshotTimeout()); + { + auto centroidColumn = implTableDesc.AddColumns(); + centroidColumn->SetName(NTableVectorKmeansTreeIndex::LevelTable_EmbeddingColumn); + centroidColumn->SetType("String"); + centroidColumn->SetTypeId(NScheme::NTypeIds::String); + centroidColumn->SetId(2); } - // skip repeated NKikimrStorageSettings.TStorageRoom StorageRooms = 17; - // skip optional NKikimrHive.TFollowerGroup FollowerGroup = 23; - return result; + implTableDesc.AddKeyColumnNames(NTableVectorKmeansTreeIndex::LevelTable_ParentIdColumn); + implTableDesc.AddKeyColumnNames(NTableVectorKmeansTreeIndex::LevelTable_IdColumn); + + return implTableDesc; } -NKikimrSchemeOp::TPartitionConfig PartitionConfigForIndexes( +NKikimrSchemeOp::TTableDescription CalcVectorKmeansTreePostingImplTableDesc( const NSchemeShard::TTableInfo::TPtr& baseTableInfo, + const NKikimrSchemeOp::TPartitionConfig& baseTablePartitionConfig, + const NTableIndex::TTableColumns& implTableColumns, const NKikimrSchemeOp::TTableDescription& indexTableDesc) { - return PartitionConfigForIndexes(baseTableInfo->PartitionConfig(), indexTableDesc); + NKikimrSchemeOp::TTableDescription implTableDesc; + + implTableDesc.SetName(NTableVectorKmeansTreeIndex::PostingTable); + + SetImplTablePartitionConfig(baseTablePartitionConfig, indexTableDesc, implTableDesc); + + { + auto parentIdColumn = implTableDesc.AddColumns(); + parentIdColumn->SetName(NTableVectorKmeansTreeIndex::PostingTable_ParentIdColumn); + parentIdColumn->SetType("Uint32"); + parentIdColumn->SetTypeId(NScheme::NTypeIds::Uint8); + parentIdColumn->SetId(0); + } + + implTableDesc.AddKeyColumnNames(NTableVectorKmeansTreeIndex::PostingTable_ParentIdColumn); + + FillIndexImplTableColumns(baseTableInfo->Columns, implTableColumns, implTableDesc); + + return implTableDesc; } -NKikimrSchemeOp::TPartitionConfig PartitionConfigForIndexes( - const NKikimrSchemeOp::TTableDescription& baseTableDescr, +NKikimrSchemeOp::TTableDescription CalcVectorKmeansTreePostingImplTableDesc( + const NKikimrSchemeOp::TTableDescription &baseTableDescr, + const NKikimrSchemeOp::TPartitionConfig& baseTablePartitionConfig, + const NTableIndex::TTableColumns& implTableColumns, const NKikimrSchemeOp::TTableDescription& indexTableDesc) { - return PartitionConfigForIndexes(baseTableDescr.GetPartitionConfig(), indexTableDesc); + NKikimrSchemeOp::TTableDescription implTableDesc; + + implTableDesc.SetName(NTableVectorKmeansTreeIndex::PostingTable); + + SetImplTablePartitionConfig(baseTablePartitionConfig, indexTableDesc, implTableDesc); + + { + auto parentIdColumn = implTableDesc.AddColumns(); + parentIdColumn->SetName(NTableVectorKmeansTreeIndex::PostingTable_ParentIdColumn); + parentIdColumn->SetType("Uint32"); + parentIdColumn->SetTypeId(NScheme::NTypeIds::Uint8); + parentIdColumn->SetId(0); + } + + implTableDesc.AddKeyColumnNames(NTableVectorKmeansTreeIndex::PostingTable_ParentIdColumn); + + FillIndexImplTableColumns(baseTableDescr.GetColumns(), implTableColumns, implTableDesc); + + return implTableDesc; } + + bool ExtractTypes(const NKikimrSchemeOp::TTableDescription& baseTableDescr, TColumnTypes& columnTypes, TString& explain) { const NScheme::TTypeRegistry* typeRegistry = AppData()->TypeRegistry; Y_ABORT_UNLESS(typeRegistry); diff --git a/ydb/core/tx/schemeshard/schemeshard_utils.h b/ydb/core/tx/schemeshard/schemeshard_utils.h index 19a9e207313b..0849dcdcd1dd 100644 --- a/ydb/core/tx/schemeshard/schemeshard_utils.h +++ b/ydb/core/tx/schemeshard/schemeshard_utils.h @@ -137,26 +137,31 @@ class PQGroupReserve { namespace NTableIndex { NKikimrSchemeOp::TTableDescription CalcImplTableDesc( - const NKikimrSchemeOp::EIndexType indexType, const NSchemeShard::TTableInfo::TPtr& baseTableInfo, const NTableIndex::TTableColumns& implTableColumns, const NKikimrSchemeOp::TTableDescription& indexTableDesc); NKikimrSchemeOp::TTableDescription CalcImplTableDesc( - const NKikimrSchemeOp::EIndexType indexType, const NKikimrSchemeOp::TTableDescription& baseTableDesc, const NTableIndex::TTableColumns& implTableColumns, - const NKikimrSchemeOp::TTableDescription& indexTableDesc -); + const NKikimrSchemeOp::TTableDescription& indexTableDesc); -NKikimrSchemeOp::TPartitionConfig PartitionConfigForIndexes( - const NSchemeShard::TTableInfo::TPtr& baseTableInfo, +NKikimrSchemeOp::TTableDescription CalcVectorKmeansTreeLevelImplTableDesc( + const NKikimrSchemeOp::TPartitionConfig& baseTablePartitionConfig, const NKikimrSchemeOp::TTableDescription& indexTableDesc); -NKikimrSchemeOp::TPartitionConfig PartitionConfigForIndexes( - const NKikimrSchemeOp::TTableDescription& baseTableDesc, +NKikimrSchemeOp::TTableDescription CalcVectorKmeansTreePostingImplTableDesc( + const NSchemeShard::TTableInfo::TPtr& baseTableInfo, + const NKikimrSchemeOp::TPartitionConfig& baseTablePartitionConfig, + const NTableIndex::TTableColumns& implTableColumns, const NKikimrSchemeOp::TTableDescription& indexTableDesc); +NKikimrSchemeOp::TTableDescription CalcVectorKmeansTreePostingImplTableDesc( + const NKikimrSchemeOp::TTableDescription &baseTableDescr, + const NKikimrSchemeOp::TPartitionConfig& baseTablePartitionConfig, + const NTableIndex::TTableColumns& implTableColumns, + const NKikimrSchemeOp::TTableDescription& indexTableDesc); + TTableColumns ExtractInfo(const NSchemeShard::TTableInfo::TPtr& tableInfo); TTableColumns ExtractInfo(const NKikimrSchemeOp::TTableDescription& tableDesc); TIndexColumns ExtractInfo(const NKikimrSchemeOp::TIndexCreationConfig& indexDesc); diff --git a/ydb/core/tx/schemeshard/ut_index/ut_vector_index.cpp b/ydb/core/tx/schemeshard/ut_index/ut_vector_index.cpp index ab2bb121bef5..683ffc8cacfc 100644 --- a/ydb/core/tx/schemeshard/ut_index/ut_vector_index.cpp +++ b/ydb/core/tx/schemeshard/ut_index/ut_vector_index.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -6,9 +7,11 @@ #include #include + using namespace NKikimr; using namespace NSchemeShard; using namespace NSchemeShardUT_Private; +using namespace NKikimr::NTableIndex::NTableVectorKmeansTreeIndex; Y_UNIT_TEST_SUITE(TVectorIndexTests) { Y_UNIT_TEST(CreateTable) { @@ -21,21 +24,21 @@ Y_UNIT_TEST_SUITE(TVectorIndexTests) { Name: "vectors" Columns { Name: "id" Type: "Uint64" } Columns { Name: "embedding" Type: "String" } - Columns { Name: "otherColumn1" Type: "String" } - Columns { Name: "otherColumn2" Type: "String" } - Columns { Name: "otherColumn3" Type: "String" } + Columns { Name: "covered" Type: "String" } + Columns { Name: "another" Type: "String" } KeyColumnNames: ["id"] } IndexDescription { Name: "idx_vector" KeyColumnNames: ["embedding"] + DataColumnNames: ["covered"] Type: EIndexTypeGlobalVectorKmeansTree VectorIndexKmeansTreeDescription { Settings : { distance: DISTANCE_COSINE, vector_type: VECTOR_TYPE_FLOAT, vector_dimension: 1024 - } + } } } )"); @@ -46,6 +49,7 @@ Y_UNIT_TEST_SUITE(TVectorIndexTests) { NLs::IndexType(NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree), NLs::IndexState(NKikimrSchemeOp::EIndexStateReady), NLs::IndexKeys({"embedding"}), + NLs::IndexDataColumns({"covered"}), NLs::VectorIndexDescription(Ydb::Table::VectorIndexSettings::DISTANCE_COSINE, Ydb::Table::VectorIndexSettings::SIMILARITY_UNSPECIFIED, Ydb::Table::VectorIndexSettings::VECTOR_TYPE_FLOAT, @@ -53,8 +57,89 @@ Y_UNIT_TEST_SUITE(TVectorIndexTests) { ), }); - TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/vectors/idx_vector/indexImplTable"), + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/vectors/idx_vector/indexImplLevelTable"), + { NLs::PathExist, + NLs::CheckColumns(LevelTable, {LevelTable_ParentIdColumn, LevelTable_IdColumn, LevelTable_EmbeddingColumn}, {}, {LevelTable_ParentIdColumn, LevelTable_IdColumn}) }); + + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/vectors/idx_vector/indexImplPostingTable"), + { NLs::PathExist, + NLs::CheckColumns(PostingTable, {PostingTable_ParentIdColumn, "id", "covered"}, {}, {PostingTable_ParentIdColumn, "id"}) }); + + + TVector dropTxIds; + TestDropTable(runtime, dropTxIds.emplace_back(++txId), "/MyRoot", "vectors"); + env.TestWaitNotification(runtime, dropTxIds); + } + + Y_UNIT_TEST(CreateTableMultiColumn) { + TTestBasicRuntime runtime; + TTestEnv env(runtime); + ui64 txId = 100; + + TestCreateIndexedTable(runtime, ++txId, "/MyRoot", R"( + TableDescription { + Name: "vectors" + Columns { Name: "id1" Type: "String" } + Columns { Name: "id2" Type: "String" } + Columns { Name: "embedding" Type: "String" } + Columns { Name: "covered1" Type: "String" } + Columns { Name: "covered2" Type: "String" } + Columns { Name: "another1" Type: "String" } + Columns { Name: "another2" Type: "String" } + KeyColumnNames: ["id1", "id2"] + } + IndexDescription { + Name: "idx_vector" + KeyColumnNames: ["embedding"] + DataColumnNames: ["covered1", "covered2"] + Type: EIndexTypeGlobalVectorKmeansTree + VectorIndexKmeansTreeDescription { + Settings : { + distance: DISTANCE_COSINE, + vector_type: VECTOR_TYPE_FLOAT, + vector_dimension: 1024 + } + } + } + )"); + env.TestWaitNotification(runtime, txId); + + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/vectors/idx_vector"), + { NLs::PathExist, + NLs::IndexType(NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree), + NLs::IndexState(NKikimrSchemeOp::EIndexStateReady), + NLs::IndexKeys({"embedding"}), + NLs::IndexDataColumns({"covered1", "covered2"}), + }); + + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/vectors/idx_vector/indexImplLevelTable"), { NLs::PathExist, - NLs::CheckColumns("indexImplTable", {"level", "id", "centroid", "ids"}, {}, {"level", "id"}) }); + NLs::CheckColumns(LevelTable, {LevelTable_ParentIdColumn, LevelTable_IdColumn, LevelTable_EmbeddingColumn}, {}, {LevelTable_ParentIdColumn, LevelTable_IdColumn}) }); + + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/vectors/idx_vector/indexImplPostingTable"), + { NLs::PathExist, + NLs::CheckColumns(PostingTable, {PostingTable_ParentIdColumn, "id1", "id2", "covered1", "covered2"}, {}, {PostingTable_ParentIdColumn, "id1", "id2"}) }); } + + + Y_UNIT_TEST(CreateTableWithError) { + TTestBasicRuntime runtime; + TTestEnv env(runtime); + ui64 txId = 100; + + // base table column should not contains reserved name '-parent' + TestCreateIndexedTable(runtime, ++txId, "/MyRoot", R"( + TableDescription { + Name: "vectors" + Columns { Name: "id" Type: "Uint64" } + Columns { Name: "-parent" Type: "String" } + KeyColumnNames: ["id"] + } + IndexDescription { + Name: "idx_vector" + KeyColumnNames: ["-parent"] + Type: EIndexTypeGlobalVectorKmeansTree + } + )", {NKikimrScheme::StatusInvalidParameter}); + } } diff --git a/ydb/core/ydb_convert/table_description.cpp b/ydb/core/ydb_convert/table_description.cpp index 0627da38e686..3e526bb93300 100644 --- a/ydb/core/ydb_convert/table_description.cpp +++ b/ydb/core/ydb_convert/table_description.cpp @@ -959,9 +959,11 @@ bool FillIndexDescription(NKikimrSchemeOp::TIndexedTableCreationConfig& out, break; } - if (!FillIndexTablePartitioning(*indexDesc->AddIndexImplTableDescriptions(), index, status, error)) { + std::vector indexImplTableDescriptionsVector(indexDesc->MutableIndexImplTableDescriptions()->begin(), indexDesc->MutableIndexImplTableDescriptions()->end()); + if (!FillIndexTablePartitioning(indexImplTableDescriptionsVector, index, status, error)) { return false; } + *indexDesc->MutableIndexImplTableDescriptions() = {indexImplTableDescriptionsVector.begin(), indexImplTableDescriptionsVector.end()}; } return true; diff --git a/ydb/core/ydb_convert/table_settings.cpp b/ydb/core/ydb_convert/table_settings.cpp index 9af62a1a4d81..028c0fb3e4fa 100644 --- a/ydb/core/ydb_convert/table_settings.cpp +++ b/ydb/core/ydb_convert/table_settings.cpp @@ -405,18 +405,18 @@ bool FillAlterTableSettingsDesc(NKikimrSchemeOp::TTableDescription& tableDesc, } bool FillIndexTablePartitioning( - NKikimrSchemeOp::TTableDescription& out, + std::vector& indexImplTableDescriptions, const Ydb::Table::TableIndex& index, Ydb::StatusIds::StatusCode& code, TString& error ) { - auto fillIndexPartitioning = [&](const Ydb::Table::GlobalIndexSettings& settings) { + auto fillIndexPartitioning = [&](const Ydb::Table::GlobalIndexSettings& settings, NKikimrSchemeOp::TTableDescription& indexImplTableDescription) { if (settings.has_partitioning_settings()) { - if (!FillPartitioningPolicy(*out.MutablePartitionConfig(), settings, code, error)) { + if (!FillPartitioningPolicy(*indexImplTableDescription.MutablePartitionConfig(), settings, code, error)) { return false; } } if (settings.partitions_case() != Ydb::Table::GlobalIndexSettings::PARTITIONS_NOT_SET) { - if (!FillPartitions(out, settings, code, error)) { + if (!FillPartitions(indexImplTableDescription, settings, code, error)) { return false; } } @@ -425,25 +425,28 @@ bool FillIndexTablePartitioning( switch (index.type_case()) { case Ydb::Table::TableIndex::kGlobalIndex: - if (!fillIndexPartitioning(index.global_index().settings())) { + if (!fillIndexPartitioning(index.global_index().settings(), indexImplTableDescriptions.at(0))) { return false; } break; case Ydb::Table::TableIndex::kGlobalAsyncIndex: - if (!fillIndexPartitioning(index.global_async_index().settings())) { + if (!fillIndexPartitioning(index.global_async_index().settings(), indexImplTableDescriptions.at(0))) { return false; } break; case Ydb::Table::TableIndex::kGlobalUniqueIndex: - if (!fillIndexPartitioning(index.global_unique_index().settings())) { + if (!fillIndexPartitioning(index.global_unique_index().settings(), indexImplTableDescriptions.at(0))) { return false; } break; case Ydb::Table::TableIndex::kGlobalVectorKmeansTreeIndex: - if (!fillIndexPartitioning(index.global_vector_kmeans_tree_index().settings())) { + if (!fillIndexPartitioning(index.global_vector_kmeans_tree_index().level_table_settings(), indexImplTableDescriptions.at(0))) { + return false; + } + if (!fillIndexPartitioning(index.global_vector_kmeans_tree_index().posting_table_settings(), indexImplTableDescriptions.at(1))) { return false; } break; diff --git a/ydb/core/ydb_convert/table_settings.h b/ydb/core/ydb_convert/table_settings.h index 46713ceefa36..49c36c0f6820 100644 --- a/ydb/core/ydb_convert/table_settings.h +++ b/ydb/core/ydb_convert/table_settings.h @@ -70,7 +70,7 @@ bool FillTtlSettings(TTtlSettingsEnabled& out, const Ydb::Table::TtlSettings& in } bool FillIndexTablePartitioning( - NKikimrSchemeOp::TTableDescription& out, + std::vector& indexImplTableDescriptions, const Ydb::Table::TableIndex& index, Ydb::StatusIds::StatusCode& code, TString& error); diff --git a/ydb/public/api/protos/ydb_table.proto b/ydb/public/api/protos/ydb_table.proto index 7d556b1a844c..bd2ce670da82 100644 --- a/ydb/public/api/protos/ydb_table.proto +++ b/ydb/public/api/protos/ydb_table.proto @@ -103,8 +103,9 @@ message GlobalUniqueIndex { } message GlobalVectorKMeansTreeIndex { - GlobalIndexSettings settings = 1; - VectorIndexSettings vector_settings = 2; + GlobalIndexSettings level_table_settings = 1; + GlobalIndexSettings posting_table_settings = 2; + VectorIndexSettings vector_settings = 3; } // Represent secondary index From 932e91a31b6d4df91b06ac42446570bf77723ded Mon Sep 17 00:00:00 2001 From: azevaykin Date: Wed, 10 Jul 2024 07:57:07 +0000 Subject: [PATCH 05/21] EPathSubType --- ydb/core/protos/flat_scheme_op.proto | 2 +- ydb/core/tx/scheme_board/cache.cpp | 4 ++-- ydb/core/tx/schemeshard/schemeshard_path_describer.cpp | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ydb/core/protos/flat_scheme_op.proto b/ydb/core/protos/flat_scheme_op.proto index 2f40f656a684..481d7f5d3c6a 100644 --- a/ydb/core/protos/flat_scheme_op.proto +++ b/ydb/core/protos/flat_scheme_op.proto @@ -1862,7 +1862,7 @@ enum EPathSubType { EPathSubTypeSyncIndexImplTable = 1; EPathSubTypeAsyncIndexImplTable = 2; EPathSubTypeStreamImpl = 3; - EPathSubTypeVectorIndexImplTable = 4; + EPathSubTypeVectorKmeansTreeIndexImplTable = 4; } enum EPathState { diff --git a/ydb/core/tx/scheme_board/cache.cpp b/ydb/core/tx/scheme_board/cache.cpp index 01fb1a054f87..11ef95fdb7b6 100644 --- a/ydb/core/tx/scheme_board/cache.cpp +++ b/ydb/core/tx/scheme_board/cache.cpp @@ -876,7 +876,7 @@ class TSchemeCache: public TMonitorableActor { return TResolve::KindSyncIndexTable; case NKikimrSchemeOp::EPathSubTypeAsyncIndexImplTable: return TResolve::KindAsyncIndexTable; - case NKikimrSchemeOp::EPathSubTypeVectorIndexImplTable: + case NKikimrSchemeOp::EPathSubTypeVectorKmeansTreeIndexImplTable: return TResolve::KindVectorIndexTable; default: return TResolve::KindRegularTable; @@ -889,7 +889,7 @@ class TSchemeCache: public TMonitorableActor { switch (subType) { case NKikimrSchemeOp::EPathSubTypeSyncIndexImplTable: case NKikimrSchemeOp::EPathSubTypeAsyncIndexImplTable: - case NKikimrSchemeOp::EPathSubTypeVectorIndexImplTable: + case NKikimrSchemeOp::EPathSubTypeVectorKmeansTreeIndexImplTable: return true; default: return false; diff --git a/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp b/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp index 495857fd76b9..949a0de359ae 100644 --- a/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp @@ -137,7 +137,7 @@ TPathElement::EPathSubType TPathDescriber::CalcPathSubType(const TPath& path) { case NKikimrSchemeOp::EIndexTypeGlobalUnique: return TPathElement::EPathSubType::EPathSubTypeSyncIndexImplTable; case NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree: - return TPathElement::EPathSubType::EPathSubTypeVectorIndexImplTable; + return TPathElement::EPathSubType::EPathSubTypeVectorKmeansTreeIndexImplTable; default: Y_DEBUG_ABORT("%s", (TStringBuilder() << "unexpected indexInfo->Type# " << indexInfo->Type).data()); return TPathElement::EPathSubType::EPathSubTypeEmpty; From 851adeb11d7dd13a9fb77dfca818c637bc776704 Mon Sep 17 00:00:00 2001 From: azevaykin Date: Wed, 10 Jul 2024 08:45:43 +0000 Subject: [PATCH 06/21] Table with vector indexes doesn't support TTL --- ydb/core/tx/schemeshard/schemeshard_info_types.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ydb/core/tx/schemeshard/schemeshard_info_types.cpp b/ydb/core/tx/schemeshard/schemeshard_info_types.cpp index 0d32fd8dbfbb..c54f1ae38f40 100644 --- a/ydb/core/tx/schemeshard/schemeshard_info_types.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_info_types.cpp @@ -507,6 +507,13 @@ TTableInfo::TAlterDataPtr TTableInfo::CreateAlterData( } if (op.HasTTLSettings()) { + for (const auto& indexDescription : op.GetTableIndexes()) { + if (indexDescription.GetType() == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree) { + errStr = "Table with vector indexes doesn't support TTL"; + return nullptr; + } + } + const auto& ttl = op.GetTTLSettings(); if (!ValidateTtlSettings(ttl, source ? source->Columns : THashMap(), alterData->Columns, colName2Id, subDomain, errStr)) { From eb7d8df0749fd4f0e69674ab79ddaea189e4068b Mon Sep 17 00:00:00 2001 From: azevaykin Date: Wed, 10 Jul 2024 11:07:37 +0000 Subject: [PATCH 07/21] Review fixes 3 --- ydb/core/base/table_vector_index.h | 19 ++++++++----------- ...emeshard__operation_create_build_index.cpp | 5 +++-- ydb/core/tx/schemeshard/schemeshard_utils.cpp | 6 +++--- ydb/core/tx/schemeshard/schemeshard_utils.h | 10 ++++------ 4 files changed, 18 insertions(+), 22 deletions(-) diff --git a/ydb/core/base/table_vector_index.h b/ydb/core/base/table_vector_index.h index 688e45cc2652..1b6e24508314 100644 --- a/ydb/core/base/table_vector_index.h +++ b/ydb/core/base/table_vector_index.h @@ -1,22 +1,19 @@ #pragma once -namespace NKikimr { -namespace NTableIndex { -namespace NTableVectorKmeansTreeIndex { +namespace NKikimr::NTableIndex::NTableVectorKmeansTreeIndex { // Vector KmeansTree index tables description // Levels table -inline constexpr char LevelTable[] = "indexImplLevelTable"; -inline constexpr char LevelTable_ParentIdColumn[] = "-parent"; -inline constexpr char LevelTable_IdColumn[] = "-id"; -inline constexpr char LevelTable_EmbeddingColumn[] = "-embedding"; +inline constexpr const char* LevelTable = "indexImplLevelTable"; +inline constexpr const char* LevelTable_ParentIdColumn = "-parent"; +inline constexpr const char* LevelTable_IdColumn = "-id"; +inline constexpr const char* LevelTable_EmbeddingColumn = "-embedding"; // Posting table -inline constexpr char PostingTable[] = "indexImplPostingTable"; -inline constexpr char PostingTable_ParentIdColumn[] = "-parent"; +inline constexpr const char* PostingTable = "indexImplPostingTable"; +inline constexpr const char* PostingTable_ParentIdColumn = "-parent"; } -} -} + diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_create_build_index.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_create_build_index.cpp index b42e6b6ca04d..d0ca5e3ca27a 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_create_build_index.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_create_build_index.cpp @@ -117,7 +117,6 @@ TVector CreateBuildIndex(TOperationId opId, const TTxTransa auto outTx = TransactionTemplate(index.PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpInitiateBuildIndexImplTable); *outTx.MutableCreateTable() = implTableDesc; - implTableDesc.MutablePartitionConfig()->MutableCompactionPolicy()->SetKeepEraseMarkers(true); implTableDesc.MutablePartitionConfig()->SetShadowData(true); result.push_back(CreateInitializeBuildIndexImplTable(NextPartId(opId, result), outTx)); @@ -127,7 +126,9 @@ TVector CreateBuildIndex(TOperationId opId, const TTxTransa createIndexImplTable(CalcVectorKmeansTreeLevelImplTableDesc(tableInfo->PartitionConfig(), indexDesc.GetIndexImplTableDescriptions(0))); createIndexImplTable(CalcVectorKmeansTreePostingImplTableDesc(tableInfo, tableInfo->PartitionConfig(), implTableColumns, indexDesc.GetIndexImplTableDescriptions(1))); } else { - createIndexImplTable(CalcImplTableDesc(tableInfo, implTableColumns, indexDesc.GetIndexImplTableDescriptions(0))); + NKikimrSchemeOp::TTableDescription implTableDesc = CalcImplTableDesc(tableInfo, implTableColumns, indexDesc.GetIndexImplTableDescriptions(0)); + implTableDesc.MutablePartitionConfig()->MutableCompactionPolicy()->SetKeepEraseMarkers(true); + createIndexImplTable(std::move(implTableDesc)); } return result; diff --git a/ydb/core/tx/schemeshard/schemeshard_utils.cpp b/ydb/core/tx/schemeshard/schemeshard_utils.cpp index b7e1ac50086e..c1f144fb0193 100644 --- a/ydb/core/tx/schemeshard/schemeshard_utils.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_utils.cpp @@ -461,9 +461,9 @@ NKikimrSchemeOp::TTableDescription CalcImplTableDesc( } NKikimrSchemeOp::TTableDescription CalcImplTableDesc( - const NKikimrSchemeOp::TTableDescription &baseTableDescr, - const TTableColumns &implTableColumns, - const NKikimrSchemeOp::TTableDescription &indexTableDesc) + const NKikimrSchemeOp::TTableDescription& baseTableDescr, + const TTableColumns& implTableColumns, + const NKikimrSchemeOp::TTableDescription& indexTableDesc) { NKikimrSchemeOp::TTableDescription implTableDesc; diff --git a/ydb/core/tx/schemeshard/schemeshard_utils.h b/ydb/core/tx/schemeshard/schemeshard_utils.h index 0849dcdcd1dd..1316a8974170 100644 --- a/ydb/core/tx/schemeshard/schemeshard_utils.h +++ b/ydb/core/tx/schemeshard/schemeshard_utils.h @@ -191,12 +191,10 @@ bool CommonCheck(const TTableDesc& tableDesc, const NKikimrSchemeOp::TIndexCreat return false; } - if (!indexKeys.DataColumns.empty()) { - if (!AppData()->FeatureFlags.GetEnableDataColumnForIndexTable()) { - status = NKikimrScheme::EStatus::StatusPreconditionFailed; - error = "It is not allowed to create index with data column"; - return false; - } + if (!indexKeys.DataColumns.empty() && !AppData()->FeatureFlags.GetEnableDataColumnForIndexTable()) { + status = NKikimrScheme::EStatus::StatusPreconditionFailed; + error = "It is not allowed to create index with data column"; + return false; } if (!IsCompatibleIndex(indexDesc.GetType(), baseTableColumns, indexKeys, error)) { From 3e33b6d23ab8ef48914c2e3d7e312884e109ae30 Mon Sep 17 00:00:00 2001 From: azevaykin Date: Wed, 10 Jul 2024 12:12:40 +0000 Subject: [PATCH 08/21] Uint32 --- ydb/core/tx/schemeshard/schemeshard_utils.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ydb/core/tx/schemeshard/schemeshard_utils.cpp b/ydb/core/tx/schemeshard/schemeshard_utils.cpp index c1f144fb0193..5dc9d9779219 100644 --- a/ydb/core/tx/schemeshard/schemeshard_utils.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_utils.cpp @@ -530,7 +530,7 @@ NKikimrSchemeOp::TTableDescription CalcVectorKmeansTreePostingImplTableDesc( auto parentIdColumn = implTableDesc.AddColumns(); parentIdColumn->SetName(NTableVectorKmeansTreeIndex::PostingTable_ParentIdColumn); parentIdColumn->SetType("Uint32"); - parentIdColumn->SetTypeId(NScheme::NTypeIds::Uint8); + parentIdColumn->SetTypeId(NScheme::NTypeIds::Uint32); parentIdColumn->SetId(0); } @@ -557,7 +557,7 @@ NKikimrSchemeOp::TTableDescription CalcVectorKmeansTreePostingImplTableDesc( auto parentIdColumn = implTableDesc.AddColumns(); parentIdColumn->SetName(NTableVectorKmeansTreeIndex::PostingTable_ParentIdColumn); parentIdColumn->SetType("Uint32"); - parentIdColumn->SetTypeId(NScheme::NTypeIds::Uint8); + parentIdColumn->SetTypeId(NScheme::NTypeIds::Uint32); parentIdColumn->SetId(0); } From db7fd55e1b8cdaf97c4c63d570ed23ccbcf72bc0 Mon Sep 17 00:00:00 2001 From: azevaykin Date: Wed, 10 Jul 2024 13:56:46 +0000 Subject: [PATCH 09/21] CalcTableImplDescription for vector index --- ydb/core/base/table_index.cpp | 13 +++++++++---- ydb/core/base/table_index.h | 2 +- ydb/core/kqp/provider/yql_kikimr_gateway.cpp | 12 ++++-------- ydb/core/tx/schemeshard/schemeshard_utils.cpp | 4 ---- ydb/core/tx/schemeshard/schemeshard_utils.h | 4 ++-- ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp | 2 ++ 6 files changed, 18 insertions(+), 19 deletions(-) diff --git a/ydb/core/base/table_index.cpp b/ydb/core/base/table_index.cpp index 1ba8cd9f5377..0b6f1fe7fc17 100644 --- a/ydb/core/base/table_index.cpp +++ b/ydb/core/base/table_index.cpp @@ -20,12 +20,17 @@ bool Contains(const TVector& names, TString str) { namespace NKikimr { namespace NTableIndex { -TTableColumns CalcTableImplDescription(const TTableColumns& table, const TIndexColumns& index) { +TTableColumns CalcTableImplDescription(const NKikimrSchemeOp::EIndexType indexType, const TTableColumns& table, const TIndexColumns& index) { TTableColumns result; - for (const auto& ik: index.KeyColumns) { - result.Keys.push_back(ik); - result.Columns.insert(ik); + if (indexType == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree) { + result.Keys.push_back(NTableVectorKmeansTreeIndex::PostingTable_ParentIdColumn); + result.Columns.insert(NTableVectorKmeansTreeIndex::PostingTable_ParentIdColumn); + } else { + for (const auto& ik: index.KeyColumns) { + result.Keys.push_back(ik); + result.Columns.insert(ik); + } } for (const auto& tk: table.Keys) { diff --git a/ydb/core/base/table_index.h b/ydb/core/base/table_index.h index 7cfc9eae3363..7d7af7b915d0 100644 --- a/ydb/core/base/table_index.h +++ b/ydb/core/base/table_index.h @@ -22,7 +22,7 @@ struct TIndexColumns { }; bool IsCompatibleIndex(const NKikimrSchemeOp::EIndexType indexType, const TTableColumns& table, const TIndexColumns& index, TString& explain); -TTableColumns CalcTableImplDescription(const TTableColumns& table, const TIndexColumns& index); +TTableColumns CalcTableImplDescription(const NKikimrSchemeOp::EIndexType indexType, const TTableColumns& table, const TIndexColumns& index); } } diff --git a/ydb/core/kqp/provider/yql_kikimr_gateway.cpp b/ydb/core/kqp/provider/yql_kikimr_gateway.cpp index 0b6fb77fd48b..897bbfc54190 100644 --- a/ydb/core/kqp/provider/yql_kikimr_gateway.cpp +++ b/ydb/core/kqp/provider/yql_kikimr_gateway.cpp @@ -119,14 +119,10 @@ void IKikimrGateway::BuildIndexMetadata(TTableMetadataResult& loadTableMetadataR NKikimr::NTableIndex::TIndexColumns indexColumns{index.KeyColumns, {}}; TString error; - YQL_ENSURE(IsCompatibleIndex(TIndexDescription::ConvertIndexType(index.Type), - tableColumns, - indexColumns, - error), "Index is not compatible: " << error); - - NKikimr::NTableIndex::TTableColumns indexTableColumns = NKikimr::NTableIndex::CalcTableImplDescription( - tableColumns, - indexColumns); + NKikimrSchemeOp::EIndexType indexType = TIndexDescription::ConvertIndexType(index.Type); + YQL_ENSURE(IsCompatibleIndex(indexType, tableColumns, indexColumns, error), "Index is not compatible: " << error); + + NKikimr::NTableIndex::TTableColumns indexTableColumns = NKikimr::NTableIndex::CalcTableImplDescription(indexType, tableColumns, indexColumns); TKikimrTableMetadataPtr indexTableMetadata = new TKikimrTableMetadata(cluster, indexTablePath); indexTableMetadata->DoesExist = true; diff --git a/ydb/core/tx/schemeshard/schemeshard_utils.cpp b/ydb/core/tx/schemeshard/schemeshard_utils.cpp index 5dc9d9779219..b94b0647b866 100644 --- a/ydb/core/tx/schemeshard/schemeshard_utils.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_utils.cpp @@ -534,8 +534,6 @@ NKikimrSchemeOp::TTableDescription CalcVectorKmeansTreePostingImplTableDesc( parentIdColumn->SetId(0); } - implTableDesc.AddKeyColumnNames(NTableVectorKmeansTreeIndex::PostingTable_ParentIdColumn); - FillIndexImplTableColumns(baseTableInfo->Columns, implTableColumns, implTableDesc); return implTableDesc; @@ -561,8 +559,6 @@ NKikimrSchemeOp::TTableDescription CalcVectorKmeansTreePostingImplTableDesc( parentIdColumn->SetId(0); } - implTableDesc.AddKeyColumnNames(NTableVectorKmeansTreeIndex::PostingTable_ParentIdColumn); - FillIndexImplTableColumns(baseTableDescr.GetColumns(), implTableColumns, implTableDesc); return implTableDesc; diff --git a/ydb/core/tx/schemeshard/schemeshard_utils.h b/ydb/core/tx/schemeshard/schemeshard_utils.h index 1316a8974170..b63b33b1a4af 100644 --- a/ydb/core/tx/schemeshard/schemeshard_utils.h +++ b/ydb/core/tx/schemeshard/schemeshard_utils.h @@ -208,6 +208,8 @@ bool CommonCheck(const TTableDesc& tableDesc, const NKikimrSchemeOp::TIndexCreat return false; } + implTableColumns = CalcTableImplDescription(indexDesc.GetType(), baseTableColumns, indexKeys); + if (indexDesc.GetType() == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree) { //We have already checked this in IsCompatibleIndex Y_ABORT_UNLESS(indexKeys.KeyColumns.size() == 1); @@ -222,8 +224,6 @@ bool CommonCheck(const TTableDesc& tableDesc, const NKikimrSchemeOp::TIndexCreat return false; } } else { - implTableColumns = CalcTableImplDescription(baseTableColumns, indexKeys); - if (!IsCompatibleKeyTypes(baseColumnTypes, implTableColumns, uniformTable, error)) { status = NKikimrScheme::EStatus::StatusInvalidParameter; return false; diff --git a/ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp b/ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp index 94edf3bf3d2e..7abbd259bc7d 100644 --- a/ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp +++ b/ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp @@ -489,6 +489,7 @@ TCheckFunc CheckColumns(const TString& name, const TSet& columns, const NKikimrSchemeOp::TTableDescription table = descr.GetTable(); UNIT_ASSERT(table.ColumnsSize()); + UNIT_ASSERT(columns.size() - droppedColumns.size() == table.ColumnsSize()); for (auto& col : table.GetColumns()) { UNIT_ASSERT(col.HasName()); UNIT_ASSERT(col.HasId()); @@ -499,6 +500,7 @@ TCheckFunc CheckColumns(const TString& name, const TSet& columns, const UNIT_ASSERT(!droppedColumns.contains(name)); } + UNIT_ASSERT(keyColumns.size() == table.KeyColumnNamesSize()); for (auto& keyName : table.GetKeyColumnNames()) { UNIT_ASSERT(keyColumns.contains(keyName)); } From b336a158e27d67523505fe625f6c6b25c6240320 Mon Sep 17 00:00:00 2001 From: azevaykin Date: Wed, 10 Jul 2024 18:26:14 +0000 Subject: [PATCH 10/21] KQP tests fix --- ydb/core/ydb_convert/table_description.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/ydb/core/ydb_convert/table_description.cpp b/ydb/core/ydb_convert/table_description.cpp index 3e526bb93300..994e4f28e93d 100644 --- a/ydb/core/ydb_convert/table_description.cpp +++ b/ydb/core/ydb_convert/table_description.cpp @@ -936,30 +936,35 @@ bool FillIndexDescription(NKikimrSchemeOp::TIndexedTableCreationConfig& out, } // specific fields + std::vector indexImplTableDescriptionsVector; switch (index.type_case()) { case Ydb::Table::TableIndex::kGlobalIndex: indexDesc->SetType(NKikimrSchemeOp::EIndexType::EIndexTypeGlobal); + indexImplTableDescriptionsVector.resize(1); break; case Ydb::Table::TableIndex::kGlobalAsyncIndex: indexDesc->SetType(NKikimrSchemeOp::EIndexType::EIndexTypeGlobalAsync); + indexImplTableDescriptionsVector.resize(1); break; case Ydb::Table::TableIndex::kGlobalUniqueIndex: indexDesc->SetType(NKikimrSchemeOp::EIndexType::EIndexTypeGlobalUnique); + indexImplTableDescriptionsVector.resize(1); break; case Ydb::Table::TableIndex::kGlobalVectorKmeansTreeIndex: indexDesc->SetType(NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree); + indexImplTableDescriptionsVector.resize(2); break; default: // pass through // TODO: maybe return BAD_REQUEST? + indexImplTableDescriptionsVector.resize(1); break; } - std::vector indexImplTableDescriptionsVector(indexDesc->MutableIndexImplTableDescriptions()->begin(), indexDesc->MutableIndexImplTableDescriptions()->end()); if (!FillIndexTablePartitioning(indexImplTableDescriptionsVector, index, status, error)) { return false; } From 4f20dd35d86c34eac6f363579cc9a68981581d97 Mon Sep 17 00:00:00 2001 From: azevaykin Date: Wed, 10 Jul 2024 18:42:28 +0000 Subject: [PATCH 11/21] strictCount parameter in CheckColumns --- ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp | 9 ++++----- ydb/core/tx/schemeshard/ut_helpers/ls_checks.h | 3 +-- ydb/core/tx/schemeshard/ut_index/ut_vector_index.cpp | 8 ++++---- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp b/ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp index 7abbd259bc7d..59b41331ffb2 100644 --- a/ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp +++ b/ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp @@ -471,8 +471,7 @@ void IsResourcePool(const NKikimrScheme::TEvDescribeSchemeResult& record) { UNIT_ASSERT_VALUES_EQUAL(selfPath.GetPathType(), NKikimrSchemeOp::EPathTypeResourcePool); } -TCheckFunc CheckColumns(const TString& name, const TSet& columns, const TSet& droppedColumns, const TSet keyColumns, - NKikimrSchemeOp::EPathState pathState) { +TCheckFunc CheckColumns(const TString& name, const TSet& columns, const TSet& droppedColumns, const TSet keyColumns, bool strictCount) { return [=] (const NKikimrScheme::TEvDescribeSchemeResult& record) { UNIT_ASSERT(record.HasPathDescription()); NKikimrSchemeOp::TPathDescription descr = record.GetPathDescription(); @@ -483,13 +482,13 @@ TCheckFunc CheckColumns(const TString& name, const TSet& columns, const TString curName = self.GetName(); ui32 curPathState = self.GetPathState(); UNIT_ASSERT_STRINGS_EQUAL(curName, name); - UNIT_ASSERT_VALUES_EQUAL(curPathState, (ui32)pathState); + UNIT_ASSERT_VALUES_EQUAL(curPathState, (ui32)NKikimrSchemeOp::EPathState::EPathStateNoChanges); UNIT_ASSERT(descr.HasTable()); NKikimrSchemeOp::TTableDescription table = descr.GetTable(); UNIT_ASSERT(table.ColumnsSize()); - UNIT_ASSERT(columns.size() - droppedColumns.size() == table.ColumnsSize()); + UNIT_ASSERT(!strictCount || columns.size() - droppedColumns.size() == table.ColumnsSize()); for (auto& col : table.GetColumns()) { UNIT_ASSERT(col.HasName()); UNIT_ASSERT(col.HasId()); @@ -500,7 +499,7 @@ TCheckFunc CheckColumns(const TString& name, const TSet& columns, const UNIT_ASSERT(!droppedColumns.contains(name)); } - UNIT_ASSERT(keyColumns.size() == table.KeyColumnNamesSize()); + UNIT_ASSERT(!strictCount || keyColumns.size() == table.KeyColumnNamesSize()); for (auto& keyName : table.GetKeyColumnNames()) { UNIT_ASSERT(keyColumns.contains(keyName)); } diff --git a/ydb/core/tx/schemeshard/ut_helpers/ls_checks.h b/ydb/core/tx/schemeshard/ut_helpers/ls_checks.h index c4cb9c8df26d..363422c3a12e 100644 --- a/ydb/core/tx/schemeshard/ut_helpers/ls_checks.h +++ b/ydb/core/tx/schemeshard/ut_helpers/ls_checks.h @@ -96,8 +96,7 @@ namespace NLs { void IsExternalDataSource(const NKikimrScheme::TEvDescribeSchemeResult& record); void IsView(const NKikimrScheme::TEvDescribeSchemeResult& record); void IsResourcePool(const NKikimrScheme::TEvDescribeSchemeResult& record); - TCheckFunc CheckColumns(const TString& name, const TSet& columns, const TSet& droppedColumns, const TSet keyColumns, - NKikimrSchemeOp::EPathState pathState = NKikimrSchemeOp::EPathState::EPathStateNoChanges); + TCheckFunc CheckColumns(const TString& name, const TSet& columns, const TSet& droppedColumns, const TSet keyColumns, bool strictCount = false); void CheckBoundaries(const NKikimrScheme::TEvDescribeSchemeResult& record); TCheckFunc PartitionCount(ui32 count); TCheckFunc PartitionKeys(TVector lastShardKeys); diff --git a/ydb/core/tx/schemeshard/ut_index/ut_vector_index.cpp b/ydb/core/tx/schemeshard/ut_index/ut_vector_index.cpp index 683ffc8cacfc..328ba7d7f8c6 100644 --- a/ydb/core/tx/schemeshard/ut_index/ut_vector_index.cpp +++ b/ydb/core/tx/schemeshard/ut_index/ut_vector_index.cpp @@ -59,11 +59,11 @@ Y_UNIT_TEST_SUITE(TVectorIndexTests) { TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/vectors/idx_vector/indexImplLevelTable"), { NLs::PathExist, - NLs::CheckColumns(LevelTable, {LevelTable_ParentIdColumn, LevelTable_IdColumn, LevelTable_EmbeddingColumn}, {}, {LevelTable_ParentIdColumn, LevelTable_IdColumn}) }); + NLs::CheckColumns(LevelTable, {LevelTable_ParentIdColumn, LevelTable_IdColumn, LevelTable_EmbeddingColumn}, {}, {LevelTable_ParentIdColumn, LevelTable_IdColumn}, true) }); TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/vectors/idx_vector/indexImplPostingTable"), { NLs::PathExist, - NLs::CheckColumns(PostingTable, {PostingTable_ParentIdColumn, "id", "covered"}, {}, {PostingTable_ParentIdColumn, "id"}) }); + NLs::CheckColumns(PostingTable, {PostingTable_ParentIdColumn, "id", "covered"}, {}, {PostingTable_ParentIdColumn, "id"}, true) }); TVector dropTxIds; @@ -114,11 +114,11 @@ Y_UNIT_TEST_SUITE(TVectorIndexTests) { TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/vectors/idx_vector/indexImplLevelTable"), { NLs::PathExist, - NLs::CheckColumns(LevelTable, {LevelTable_ParentIdColumn, LevelTable_IdColumn, LevelTable_EmbeddingColumn}, {}, {LevelTable_ParentIdColumn, LevelTable_IdColumn}) }); + NLs::CheckColumns(LevelTable, {LevelTable_ParentIdColumn, LevelTable_IdColumn, LevelTable_EmbeddingColumn}, {}, {LevelTable_ParentIdColumn, LevelTable_IdColumn}, true) }); TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/vectors/idx_vector/indexImplPostingTable"), { NLs::PathExist, - NLs::CheckColumns(PostingTable, {PostingTable_ParentIdColumn, "id1", "id2", "covered1", "covered2"}, {}, {PostingTable_ParentIdColumn, "id1", "id2"}) }); + NLs::CheckColumns(PostingTable, {PostingTable_ParentIdColumn, "id1", "id2", "covered1", "covered2"}, {}, {PostingTable_ParentIdColumn, "id1", "id2"}, true) }); } From d65e3cb968b0f2c110cbfdc942dde8eab7ff6177 Mon Sep 17 00:00:00 2001 From: azevaykin Date: Thu, 11 Jul 2024 05:48:32 +0000 Subject: [PATCH 12/21] Fix index restore --- ydb/core/tx/schemeshard/schemeshard_info_types.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ydb/core/tx/schemeshard/schemeshard_info_types.h b/ydb/core/tx/schemeshard/schemeshard_info_types.h index a7f3a37aec04..b068493e1d48 100644 --- a/ydb/core/tx/schemeshard/schemeshard_info_types.h +++ b/ydb/core/tx/schemeshard/schemeshard_info_types.h @@ -3063,6 +3063,8 @@ struct TIndexBuildInfo: public TSimpleRefCount { indexInfo->IndexName = row.template GetValue(); indexInfo->IndexType = row.template GetValue(); + indexInfo->ImplTableDescriptions.resize(indexInfo->IndexType == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree ? 2 : 1); + indexInfo->State = TIndexBuildInfo::EState( row.template GetValue()); indexInfo->Issue = From 2f78a7191a8bdcdbfa3d9b2b0284052104fa9639 Mon Sep 17 00:00:00 2001 From: azevaykin Date: Thu, 11 Jul 2024 07:31:47 +0000 Subject: [PATCH 13/21] Some fixes --- ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp | 15 +++++++++++++++ ydb/core/ydb_convert/table_description.cpp | 22 ++++++++++++---------- 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp b/ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp index 4db4ac5dd9fb..25925d616bb2 100644 --- a/ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp +++ b/ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp @@ -2433,6 +2433,21 @@ Y_UNIT_TEST_SUITE(KqpScheme) { auto indexDesc = describe.GetTableDescription(); UNIT_ASSERT_VALUES_EQUAL(indexDesc.GetPartitioningSettings().GetMinPartitionsCount(), minPartitionsCount); } + + constexpr int partitionSizeMb = 555; + { + auto result = session.ExecuteSchemeQuery(Sprintf(R"( + ALTER TABLE `/Root/SecondaryKeys` ALTER INDEX Index SET AUTO_PARTITIONING_PARTITION_SIZE_MB %d; + )", partitionSizeMb) + ).ExtractValueSync(); + UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); + } + { + auto describe = session.DescribeTable("/Root/SecondaryKeys/Index/indexImplTable").GetValueSync(); + UNIT_ASSERT_C(describe.IsSuccess(), describe.GetIssues().ToString()); + auto indexDesc = describe.GetTableDescription(); + UNIT_ASSERT_VALUES_EQUAL(indexDesc.GetPartitioningSettings().GetPartitionSizeMb(), partitionSizeMb); + } } Y_UNIT_TEST(AlterIndexImplTable) { diff --git a/ydb/core/ydb_convert/table_description.cpp b/ydb/core/ydb_convert/table_description.cpp index 994e4f28e93d..3853bdfc8e24 100644 --- a/ydb/core/ydb_convert/table_description.cpp +++ b/ydb/core/ydb_convert/table_description.cpp @@ -822,13 +822,8 @@ void FillPartitioningSettingsImpl(TYdbProto& out, } void FillGlobalIndexSettings(Ydb::Table::GlobalIndexSettings& settings, - const google::protobuf::RepeatedPtrField& indexImplTables + const NKikimrSchemeOp::TTableDescription& indexImplTableDescription ) { - if (indexImplTables.empty()) { - return; - } - const auto& indexImplTableDescription = indexImplTables.Get(0); - if (indexImplTableDescription.SplitBoundarySize()) { NKikimrMiniKQL::TType splitKeyType; Ydb::Table::DescribeTableResult unused; @@ -865,23 +860,30 @@ void FillIndexDescriptionImpl(TYdbProto& out, const NKikimrSchemeOp::TTableDescr case NKikimrSchemeOp::EIndexType::EIndexTypeGlobal: FillGlobalIndexSettings( *index->mutable_global_index()->mutable_settings(), - tableIndex.GetIndexImplTableDescriptions() + tableIndex.GetIndexImplTableDescriptions(0) ); break; case NKikimrSchemeOp::EIndexType::EIndexTypeGlobalAsync: FillGlobalIndexSettings( *index->mutable_global_async_index()->mutable_settings(), - tableIndex.GetIndexImplTableDescriptions() + tableIndex.GetIndexImplTableDescriptions(0) ); break; case NKikimrSchemeOp::EIndexType::EIndexTypeGlobalUnique: FillGlobalIndexSettings( *index->mutable_global_unique_index()->mutable_settings(), - tableIndex.GetIndexImplTableDescriptions() + tableIndex.GetIndexImplTableDescriptions(0) ); break; case NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree: - *index->mutable_global_vector_kmeans_tree_index() = Ydb::Table::GlobalVectorKMeansTreeIndex(); + FillGlobalIndexSettings( + *index->mutable_global_vector_kmeans_tree_index()->mutable_level_table_settings(), + tableIndex.GetIndexImplTableDescriptions(0) + ); + FillGlobalIndexSettings( + *index->mutable_global_vector_kmeans_tree_index()->mutable_posting_table_settings(), + tableIndex.GetIndexImplTableDescriptions(1) + ); break; default: break; From dc91b0f98a78040c645cbf16e93754c3cc3e915b Mon Sep 17 00:00:00 2001 From: azevaykin Date: Thu, 11 Jul 2024 09:26:31 +0000 Subject: [PATCH 14/21] Embedding column can be covered --- ydb/core/base/table_index.cpp | 6 ++- .../schemeshard/ut_index/ut_vector_index.cpp | 51 +++++++++++++++++++ 2 files changed, 55 insertions(+), 2 deletions(-) diff --git a/ydb/core/base/table_index.cpp b/ydb/core/base/table_index.cpp index 0b6f1fe7fc17..e308a412e39b 100644 --- a/ydb/core/base/table_index.cpp +++ b/ydb/core/base/table_index.cpp @@ -48,6 +48,8 @@ TTableColumns CalcTableImplDescription(const NKikimrSchemeOp::EIndexType indexTy } bool IsCompatibleIndex(const NKikimrSchemeOp::EIndexType indexType, const TTableColumns& table, const TIndexColumns& index, TString& explain) { + const bool isVectorIndex = indexType == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree; + { auto brokenAt = IsUniq(table.Keys); if (brokenAt != table.Keys.end()) { @@ -75,7 +77,7 @@ bool IsCompatibleIndex(const NKikimrSchemeOp::EIndexType indexType, const TTable } } - if (indexType == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree) { + if (isVectorIndex) { if (index.KeyColumns.size() != 1) { explain = "Only single key column is supported for vector index"; return false; @@ -123,7 +125,7 @@ bool IsCompatibleIndex(const NKikimrSchemeOp::EIndexType indexType, const TTable } for (const auto& dataName: index.DataColumns) { - if (indexKeys.contains(dataName)) { + if (indexKeys.contains(dataName) && !isVectorIndex) { explain = TStringBuilder() << "The same column can't be used as key column and data column for one index"; return false; diff --git a/ydb/core/tx/schemeshard/ut_index/ut_vector_index.cpp b/ydb/core/tx/schemeshard/ut_index/ut_vector_index.cpp index 328ba7d7f8c6..f8fffa1ebb91 100644 --- a/ydb/core/tx/schemeshard/ut_index/ut_vector_index.cpp +++ b/ydb/core/tx/schemeshard/ut_index/ut_vector_index.cpp @@ -71,6 +71,57 @@ Y_UNIT_TEST_SUITE(TVectorIndexTests) { env.TestWaitNotification(runtime, dropTxIds); } + Y_UNIT_TEST(CreateTableCoveredEmbedding) { + TTestBasicRuntime runtime; + TTestEnv env(runtime); + ui64 txId = 100; + + TestCreateIndexedTable(runtime, ++txId, "/MyRoot", R"( + TableDescription { + Name: "vectors" + Columns { Name: "id" Type: "Uint64" } + Columns { Name: "embedding" Type: "String" } + Columns { Name: "another" Type: "String" } + KeyColumnNames: ["id"] + } + IndexDescription { + Name: "idx_vector" + KeyColumnNames: ["embedding"] + DataColumnNames: ["embedding"] + Type: EIndexTypeGlobalVectorKmeansTree + VectorIndexKmeansTreeDescription { + Settings : { + distance: DISTANCE_COSINE, + vector_type: VECTOR_TYPE_FLOAT, + vector_dimension: 1024 + } + } + } + )"); + env.TestWaitNotification(runtime, txId); + + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/vectors/idx_vector"), + { NLs::PathExist, + NLs::IndexType(NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree), + NLs::IndexState(NKikimrSchemeOp::EIndexStateReady), + NLs::IndexKeys({"embedding"}), + NLs::IndexDataColumns({"embedding"}), + NLs::VectorIndexDescription(Ydb::Table::VectorIndexSettings::DISTANCE_COSINE, + Ydb::Table::VectorIndexSettings::SIMILARITY_UNSPECIFIED, + Ydb::Table::VectorIndexSettings::VECTOR_TYPE_FLOAT, + 1024 + ), + }); + + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/vectors/idx_vector/indexImplLevelTable"), + { NLs::PathExist, + NLs::CheckColumns(LevelTable, {LevelTable_ParentIdColumn, LevelTable_IdColumn, LevelTable_EmbeddingColumn}, {}, {LevelTable_ParentIdColumn, LevelTable_IdColumn}, true) }); + + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/vectors/idx_vector/indexImplPostingTable"), + { NLs::PathExist, + NLs::CheckColumns(PostingTable, {PostingTable_ParentIdColumn, "id", "embedding"}, {}, {PostingTable_ParentIdColumn, "id"}, true) }); + } + Y_UNIT_TEST(CreateTableMultiColumn) { TTestBasicRuntime runtime; TTestEnv env(runtime); From 0841320a42afbe01ab529c015eda04626e45d8db Mon Sep 17 00:00:00 2001 From: azevaykin Date: Thu, 11 Jul 2024 12:38:11 +0000 Subject: [PATCH 15/21] PK column should be covered --- ydb/core/base/table_index.cpp | 15 ++++--- .../schemeshard/ut_index/ut_vector_index.cpp | 42 +++++++++---------- 2 files changed, 28 insertions(+), 29 deletions(-) diff --git a/ydb/core/base/table_index.cpp b/ydb/core/base/table_index.cpp index e308a412e39b..bc6bdea57e40 100644 --- a/ydb/core/base/table_index.cpp +++ b/ydb/core/base/table_index.cpp @@ -107,7 +107,8 @@ bool IsCompatibleIndex(const NKikimrSchemeOp::EIndexType indexType, const TTable } for (const auto& indexKeyName: index.KeyColumns) { - indexKeys.insert(indexKeyName); + if (!isVectorIndex) + indexKeys.insert(indexKeyName); if (!table.Columns.contains(indexKeyName)) { explain = TStringBuilder() << "all index keys should be in table columns" @@ -116,16 +117,14 @@ bool IsCompatibleIndex(const NKikimrSchemeOp::EIndexType indexType, const TTable } } - if (indexType != NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree) { - if (index.KeyColumns == table.Keys) { - explain = TStringBuilder() - << "table and index keys are the same"; - return false; - } + if (index.KeyColumns == table.Keys && !isVectorIndex) { + explain = TStringBuilder() + << "table and index keys are the same"; + return false; } for (const auto& dataName: index.DataColumns) { - if (indexKeys.contains(dataName) && !isVectorIndex) { + if (indexKeys.contains(dataName)) { explain = TStringBuilder() << "The same column can't be used as key column and data column for one index"; return false; diff --git a/ydb/core/tx/schemeshard/ut_index/ut_vector_index.cpp b/ydb/core/tx/schemeshard/ut_index/ut_vector_index.cpp index f8fffa1ebb91..c3f6914b961f 100644 --- a/ydb/core/tx/schemeshard/ut_index/ut_vector_index.cpp +++ b/ydb/core/tx/schemeshard/ut_index/ut_vector_index.cpp @@ -33,13 +33,7 @@ Y_UNIT_TEST_SUITE(TVectorIndexTests) { KeyColumnNames: ["embedding"] DataColumnNames: ["covered"] Type: EIndexTypeGlobalVectorKmeansTree - VectorIndexKmeansTreeDescription { - Settings : { - distance: DISTANCE_COSINE, - vector_type: VECTOR_TYPE_FLOAT, - vector_dimension: 1024 - } - } + VectorIndexKmeansTreeDescription: { Settings : { distance: DISTANCE_COSINE, vector_type: VECTOR_TYPE_FLOAT, vector_dimension: 1024 } } } )"); env.TestWaitNotification(runtime, txId); @@ -89,13 +83,7 @@ Y_UNIT_TEST_SUITE(TVectorIndexTests) { KeyColumnNames: ["embedding"] DataColumnNames: ["embedding"] Type: EIndexTypeGlobalVectorKmeansTree - VectorIndexKmeansTreeDescription { - Settings : { - distance: DISTANCE_COSINE, - vector_type: VECTOR_TYPE_FLOAT, - vector_dimension: 1024 - } - } + VectorIndexKmeansTreeDescription: { Settings : { distance: DISTANCE_COSINE, vector_type: VECTOR_TYPE_FLOAT, vector_dimension: 1024 } } } )"); env.TestWaitNotification(runtime, txId); @@ -144,13 +132,7 @@ Y_UNIT_TEST_SUITE(TVectorIndexTests) { KeyColumnNames: ["embedding"] DataColumnNames: ["covered1", "covered2"] Type: EIndexTypeGlobalVectorKmeansTree - VectorIndexKmeansTreeDescription { - Settings : { - distance: DISTANCE_COSINE, - vector_type: VECTOR_TYPE_FLOAT, - vector_dimension: 1024 - } - } + VectorIndexKmeansTreeDescription: { Settings : { distance: DISTANCE_COSINE, vector_type: VECTOR_TYPE_FLOAT, vector_dimension: 1024 } } } )"); env.TestWaitNotification(runtime, txId); @@ -190,6 +172,24 @@ Y_UNIT_TEST_SUITE(TVectorIndexTests) { Name: "idx_vector" KeyColumnNames: ["-parent"] Type: EIndexTypeGlobalVectorKmeansTree + VectorIndexKmeansTreeDescription: { Settings : { distance: DISTANCE_COSINE, vector_type: VECTOR_TYPE_FLOAT, vector_dimension: 1024 } } + } + )", {NKikimrScheme::StatusInvalidParameter}); + + // pk should not be covered + TestCreateIndexedTable(runtime, ++txId, "/MyRoot", R"( + TableDescription { + Name: "vectors" + Columns { Name: "id" Type: "Uint64" } + Columns { Name: "embedding" Type: "String" } + KeyColumnNames: ["id"] + } + IndexDescription { + Name: "idx_vector" + KeyColumnNames: ["embedding"] + DataColumnNames: ["id"] + Type: EIndexTypeGlobalVectorKmeansTree + VectorIndexKmeansTreeDescription: { Settings : { distance: DISTANCE_COSINE, vector_type: VECTOR_TYPE_FLOAT, vector_dimension: 1024 } } } )", {NKikimrScheme::StatusInvalidParameter}); } From b937ebf586d8e8a37449cad6b33aac2730459782 Mon Sep 17 00:00:00 2001 From: azevaykin Date: Fri, 12 Jul 2024 05:18:08 +0000 Subject: [PATCH 16/21] Move result out of lambda --- .../schemeshard__operation_create_build_index.cpp | 8 ++++---- .../schemeshard__operation_create_indexed_table.cpp | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_create_build_index.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_create_build_index.cpp index d0ca5e3ca27a..9d84a8727ca0 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_create_build_index.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_create_build_index.cpp @@ -119,16 +119,16 @@ TVector CreateBuildIndex(TOperationId opId, const TTxTransa implTableDesc.MutablePartitionConfig()->SetShadowData(true); - result.push_back(CreateInitializeBuildIndexImplTable(NextPartId(opId, result), outTx)); + return CreateInitializeBuildIndexImplTable(NextPartId(opId, result), outTx); }; if (indexDesc.GetType() == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree) { - createIndexImplTable(CalcVectorKmeansTreeLevelImplTableDesc(tableInfo->PartitionConfig(), indexDesc.GetIndexImplTableDescriptions(0))); - createIndexImplTable(CalcVectorKmeansTreePostingImplTableDesc(tableInfo, tableInfo->PartitionConfig(), implTableColumns, indexDesc.GetIndexImplTableDescriptions(1))); + result.push_back(createIndexImplTable(CalcVectorKmeansTreeLevelImplTableDesc(tableInfo->PartitionConfig(), indexDesc.GetIndexImplTableDescriptions(0)))); + result.push_back(createIndexImplTable(CalcVectorKmeansTreePostingImplTableDesc(tableInfo, tableInfo->PartitionConfig(), implTableColumns, indexDesc.GetIndexImplTableDescriptions(1)))); } else { NKikimrSchemeOp::TTableDescription implTableDesc = CalcImplTableDesc(tableInfo, implTableColumns, indexDesc.GetIndexImplTableDescriptions(0)); implTableDesc.MutablePartitionConfig()->MutableCompactionPolicy()->SetKeepEraseMarkers(true); - createIndexImplTable(std::move(implTableDesc)); + result.push_back(createIndexImplTable(std::move(implTableDesc))); } return result; diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_create_indexed_table.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_create_indexed_table.cpp index ca88b763a219..10366c485db3 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_create_indexed_table.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_create_indexed_table.cpp @@ -247,7 +247,7 @@ TVector CreateIndexedTable(TOperationId nextId, const TTxTr *scheme.MutableCreateTable() = implTableDesc; - return result.push_back(CreateNewTable(NextPartId(nextId, result), scheme)); + return CreateNewTable(NextPartId(nextId, result), scheme); }; const auto& implTableColumns = indexes.at(indexDescription.GetName()); @@ -259,8 +259,8 @@ TVector CreateIndexedTable(TOperationId nextId, const TTxTr userPostingDesc = indexDescription.GetIndexImplTableDescriptions(1); } - createIndexImplTable(CalcVectorKmeansTreeLevelImplTableDesc(baseTableDescription.GetPartitionConfig(), userLevelDesc)); - createIndexImplTable(CalcVectorKmeansTreePostingImplTableDesc(baseTableDescription, baseTableDescription.GetPartitionConfig(), implTableColumns, userPostingDesc)); + result.push_back(createIndexImplTable(CalcVectorKmeansTreeLevelImplTableDesc(baseTableDescription.GetPartitionConfig(), userLevelDesc))); + result.push_back(createIndexImplTable(CalcVectorKmeansTreePostingImplTableDesc(baseTableDescription, baseTableDescription.GetPartitionConfig(), implTableColumns, userPostingDesc))); } else { NKikimrSchemeOp::TTableDescription userIndexDesc; if (indexDescription.IndexImplTableDescriptionsSize()) { @@ -268,7 +268,7 @@ TVector CreateIndexedTable(TOperationId nextId, const TTxTr userIndexDesc = indexDescription.GetIndexImplTableDescriptions(0); } - createIndexImplTable(CalcImplTableDesc(baseTableDescription, implTableColumns, userIndexDesc)); + result.push_back(createIndexImplTable(CalcImplTableDesc(baseTableDescription, implTableColumns, userIndexDesc))); } } From 05c392b92453deec35da07b75a3faca91d32d08f Mon Sep 17 00:00:00 2001 From: azevaykin Date: Fri, 12 Jul 2024 05:19:18 +0000 Subject: [PATCH 17/21] formatting --- ydb/core/tx/schemeshard/schemeshard_path_describer.cpp | 4 ++-- ydb/public/api/protos/out/out.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp b/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp index 949a0de359ae..080212788ebc 100644 --- a/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp @@ -1271,9 +1271,9 @@ void TSchemeShard::DescribeTableIndex(const TPathId& pathId, const TString& name if (const auto* vectorIndexKmeansTreeDescription = std::get_if(&indexInfo->SpecializedIndexDescription)) { const auto& indexInfoSettings = vectorIndexKmeansTreeDescription->GetSettings(); auto entrySettings = entry.MutableVectorIndexKmeansTreeDescription()->MutableSettings(); - if (indexInfoSettings.Hasdistance()) + if (indexInfoSettings.has_distance()) entrySettings->set_distance(indexInfoSettings.distance()); - else if (indexInfoSettings.Hassimilarity()) + else if (indexInfoSettings.has_similarity()) entrySettings->set_similarity(indexInfoSettings.similarity()); else Y_FAIL_S("Either distance or similarity should be set in index settings: " << indexInfoSettings); diff --git a/ydb/public/api/protos/out/out.cpp b/ydb/public/api/protos/out/out.cpp index 1e96598f2e47..c169fef4c329 100644 --- a/ydb/public/api/protos/out/out.cpp +++ b/ydb/public/api/protos/out/out.cpp @@ -41,4 +41,4 @@ Y_DECLARE_OUT_SPEC(, Ydb::Table::VectorIndexSettings::Similarity, stream, value) Y_DECLARE_OUT_SPEC(, Ydb::Table::VectorIndexSettings::VectorType, stream, value) { stream << Ydb::Table::VectorIndexSettings::VectorType_Name(value); -} \ No newline at end of file +} From a7a0df27ca6831a8b225e20d47aeed402f2e2c65 Mon Sep 17 00:00:00 2001 From: azevaykin Date: Fri, 12 Jul 2024 05:25:12 +0000 Subject: [PATCH 18/21] Resize ImplTableDescriptions in FillIndexTablePartitioning --- .../schemeshard_build_index__create.cpp | 2 -- .../tx/schemeshard/schemeshard_info_types.h | 2 -- ydb/core/ydb_convert/table_description.cpp | 5 ----- ydb/core/ydb_convert/table_settings.cpp | 17 ++++++++++------- 4 files changed, 10 insertions(+), 16 deletions(-) diff --git a/ydb/core/tx/schemeshard/schemeshard_build_index__create.cpp b/ydb/core/tx/schemeshard/schemeshard_build_index__create.cpp index faa86875499d..0c0d5f3b25c3 100644 --- a/ydb/core/tx/schemeshard/schemeshard_build_index__create.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_build_index__create.cpp @@ -219,11 +219,9 @@ class TSchemeShard::TIndexBuilder::TTxCreate: public TSchemeShard::TIndexBuilder switch (index.type_case()) { case Ydb::Table::TableIndex::TypeCase::kGlobalIndex: buildInfo->IndexType = NKikimrSchemeOp::EIndexType::EIndexTypeGlobal; - buildInfo->ImplTableDescriptions.resize(1); break; case Ydb::Table::TableIndex::TypeCase::kGlobalAsyncIndex: buildInfo->IndexType = NKikimrSchemeOp::EIndexType::EIndexTypeGlobalAsync; - buildInfo->ImplTableDescriptions.resize(1); break; case Ydb::Table::TableIndex::TypeCase::kGlobalUniqueIndex: explain = "unsupported index type to build"; diff --git a/ydb/core/tx/schemeshard/schemeshard_info_types.h b/ydb/core/tx/schemeshard/schemeshard_info_types.h index b068493e1d48..a7f3a37aec04 100644 --- a/ydb/core/tx/schemeshard/schemeshard_info_types.h +++ b/ydb/core/tx/schemeshard/schemeshard_info_types.h @@ -3063,8 +3063,6 @@ struct TIndexBuildInfo: public TSimpleRefCount { indexInfo->IndexName = row.template GetValue(); indexInfo->IndexType = row.template GetValue(); - indexInfo->ImplTableDescriptions.resize(indexInfo->IndexType == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree ? 2 : 1); - indexInfo->State = TIndexBuildInfo::EState( row.template GetValue()); indexInfo->Issue = diff --git a/ydb/core/ydb_convert/table_description.cpp b/ydb/core/ydb_convert/table_description.cpp index 3853bdfc8e24..a6f3af41a6d0 100644 --- a/ydb/core/ydb_convert/table_description.cpp +++ b/ydb/core/ydb_convert/table_description.cpp @@ -942,28 +942,23 @@ bool FillIndexDescription(NKikimrSchemeOp::TIndexedTableCreationConfig& out, switch (index.type_case()) { case Ydb::Table::TableIndex::kGlobalIndex: indexDesc->SetType(NKikimrSchemeOp::EIndexType::EIndexTypeGlobal); - indexImplTableDescriptionsVector.resize(1); break; case Ydb::Table::TableIndex::kGlobalAsyncIndex: indexDesc->SetType(NKikimrSchemeOp::EIndexType::EIndexTypeGlobalAsync); - indexImplTableDescriptionsVector.resize(1); break; case Ydb::Table::TableIndex::kGlobalUniqueIndex: indexDesc->SetType(NKikimrSchemeOp::EIndexType::EIndexTypeGlobalUnique); - indexImplTableDescriptionsVector.resize(1); break; case Ydb::Table::TableIndex::kGlobalVectorKmeansTreeIndex: indexDesc->SetType(NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree); - indexImplTableDescriptionsVector.resize(2); break; default: // pass through // TODO: maybe return BAD_REQUEST? - indexImplTableDescriptionsVector.resize(1); break; } diff --git a/ydb/core/ydb_convert/table_settings.cpp b/ydb/core/ydb_convert/table_settings.cpp index 028c0fb3e4fa..324fefbca031 100644 --- a/ydb/core/ydb_convert/table_settings.cpp +++ b/ydb/core/ydb_convert/table_settings.cpp @@ -409,7 +409,10 @@ bool FillIndexTablePartitioning( const Ydb::Table::TableIndex& index, Ydb::StatusIds::StatusCode& code, TString& error ) { - auto fillIndexPartitioning = [&](const Ydb::Table::GlobalIndexSettings& settings, NKikimrSchemeOp::TTableDescription& indexImplTableDescription) { + auto fillIndexPartitioning = [&](const Ydb::Table::GlobalIndexSettings& settings, std::vector& indexImplTableDescriptions) { + indexImplTableDescriptions.push_back({}); + auto& indexImplTableDescription = indexImplTableDescriptions.back(); + if (settings.has_partitioning_settings()) { if (!FillPartitioningPolicy(*indexImplTableDescription.MutablePartitionConfig(), settings, code, error)) { return false; @@ -425,34 +428,34 @@ bool FillIndexTablePartitioning( switch (index.type_case()) { case Ydb::Table::TableIndex::kGlobalIndex: - if (!fillIndexPartitioning(index.global_index().settings(), indexImplTableDescriptions.at(0))) { + if (!fillIndexPartitioning(index.global_index().settings(), indexImplTableDescriptions)) { return false; } break; case Ydb::Table::TableIndex::kGlobalAsyncIndex: - if (!fillIndexPartitioning(index.global_async_index().settings(), indexImplTableDescriptions.at(0))) { + if (!fillIndexPartitioning(index.global_async_index().settings(), indexImplTableDescriptions)) { return false; } break; case Ydb::Table::TableIndex::kGlobalUniqueIndex: - if (!fillIndexPartitioning(index.global_unique_index().settings(), indexImplTableDescriptions.at(0))) { + if (!fillIndexPartitioning(index.global_unique_index().settings(), indexImplTableDescriptions)) { return false; } break; case Ydb::Table::TableIndex::kGlobalVectorKmeansTreeIndex: - if (!fillIndexPartitioning(index.global_vector_kmeans_tree_index().level_table_settings(), indexImplTableDescriptions.at(0))) { + if (!fillIndexPartitioning(index.global_vector_kmeans_tree_index().level_table_settings(), indexImplTableDescriptions)) { return false; } - if (!fillIndexPartitioning(index.global_vector_kmeans_tree_index().posting_table_settings(), indexImplTableDescriptions.at(1))) { + if (!fillIndexPartitioning(index.global_vector_kmeans_tree_index().posting_table_settings(), indexImplTableDescriptions)) { return false; } break; case Ydb::Table::TableIndex::TYPE_NOT_SET: - break; + break; } return true; From 5361979a50b99149b6620550f1b3e008d9620354 Mon Sep 17 00:00:00 2001 From: azevaykin Date: Fri, 12 Jul 2024 05:38:37 +0000 Subject: [PATCH 19/21] TIndexDescription contains TVector --- ydb/public/sdk/cpp/client/ydb_table/table.cpp | 49 +++++++++++++------ ydb/public/sdk/cpp/client/ydb_table/table.h | 6 +-- 2 files changed, 38 insertions(+), 17 deletions(-) diff --git a/ydb/public/sdk/cpp/client/ydb_table/table.cpp b/ydb/public/sdk/cpp/client/ydb_table/table.cpp index ced89b6a673b..202fab47e35c 100644 --- a/ydb/public/sdk/cpp/client/ydb_table/table.cpp +++ b/ydb/public/sdk/cpp/client/ydb_table/table.cpp @@ -2215,7 +2215,7 @@ TIndexDescription::TIndexDescription( EIndexType type, const TVector& indexColumns, const TVector& dataColumns, - const TGlobalIndexSettings& settings + const TVector& settings ) : IndexName_(name) , IndexType_(type) , IndexColumns_(indexColumns) @@ -2227,7 +2227,7 @@ TIndexDescription::TIndexDescription( const TString& name, const TVector& indexColumns, const TVector& dataColumns, - const TGlobalIndexSettings& settings + const TVector& settings ) : TIndexDescription(name, EIndexType::GlobalSync, indexColumns, dataColumns, settings) {} @@ -2297,7 +2297,7 @@ TIndexDescription TIndexDescription::FromProto(const TProto& proto) { EIndexType type; TVector indexColumns; TVector dataColumns; - TGlobalIndexSettings globalIndexSettings; + TVector globalIndexSettings; indexColumns.assign(proto.index_columns().begin(), proto.index_columns().end()); dataColumns.assign(proto.data_columns().begin(), proto.data_columns().end()); @@ -2305,18 +2305,24 @@ TIndexDescription TIndexDescription::FromProto(const TProto& proto) { switch (proto.type_case()) { case TProto::kGlobalIndex: type = EIndexType::GlobalSync; - globalIndexSettings = TGlobalIndexSettings::FromProto(proto.global_index().settings()); + globalIndexSettings.emplace_back(TGlobalIndexSettings::FromProto(proto.global_index().settings())); break; case TProto::kGlobalAsyncIndex: type = EIndexType::GlobalAsync; - globalIndexSettings = TGlobalIndexSettings::FromProto(proto.global_async_index().settings()); + globalIndexSettings.emplace_back(TGlobalIndexSettings::FromProto(proto.global_async_index().settings())); break; case TProto::kGlobalUniqueIndex: type = EIndexType::GlobalUnique; - globalIndexSettings = TGlobalIndexSettings::FromProto(proto.global_unique_index().settings()); + globalIndexSettings.emplace_back(TGlobalIndexSettings::FromProto(proto.global_unique_index().settings())); + break; + case TProto::kGlobalVectorKmeansTreeIndex: + type = EIndexType::GlobalVectorKMeansTree; + globalIndexSettings.emplace_back(TGlobalIndexSettings::FromProto(proto.global_vector_kmeans_tree_index().level_table_settings())); + globalIndexSettings.emplace_back(TGlobalIndexSettings::FromProto(proto.global_vector_kmeans_tree_index().posting_table_settings())); break; default: // fallback to global sync type = EIndexType::GlobalSync; + globalIndexSettings.resize(1); break; } @@ -2337,18 +2343,33 @@ void TIndexDescription::SerializeTo(Ydb::Table::TableIndex& proto) const { *proto.mutable_data_columns() = {DataColumns_.begin(), DataColumns_.end()}; switch (IndexType_) { - case EIndexType::GlobalSync: - GlobalIndexSettings_.SerializeTo(*proto.mutable_global_index()->mutable_settings()); + case EIndexType::GlobalSync: { + auto& settings = *proto.mutable_global_index()->mutable_settings(); + if (GlobalIndexSettings_.size() == 1) + GlobalIndexSettings_[0].SerializeTo(settings); break; - case EIndexType::GlobalAsync: - GlobalIndexSettings_.SerializeTo(*proto.mutable_global_async_index()->mutable_settings()); + } + case EIndexType::GlobalAsync: { + auto& settings = *proto.mutable_global_async_index()->mutable_settings(); + if (GlobalIndexSettings_.size() == 1) + GlobalIndexSettings_[0].SerializeTo(settings); break; - case EIndexType::GlobalUnique: - GlobalIndexSettings_.SerializeTo(*proto.mutable_global_unique_index()->mutable_settings()); + } + case EIndexType::GlobalUnique: { + auto& settings = *proto.mutable_global_unique_index()->mutable_settings(); + if (GlobalIndexSettings_.size() == 1) + GlobalIndexSettings_[0].SerializeTo(settings); break; - case EIndexType::GlobalVectorKMeansTree: - *proto.mutable_global_vector_kmeans_tree_index() = Ydb::Table::GlobalVectorKMeansTreeIndex(); + } + case EIndexType::GlobalVectorKMeansTree: { + auto& level_settings = *proto.mutable_global_vector_kmeans_tree_index()->mutable_level_table_settings(); + auto& posting_settings = *proto.mutable_global_vector_kmeans_tree_index()->mutable_posting_table_settings(); + if (GlobalIndexSettings_.size() == 2) { + GlobalIndexSettings_[0].SerializeTo(level_settings); + GlobalIndexSettings_[1].SerializeTo(posting_settings); + } break; + } case EIndexType::Unknown: break; } diff --git a/ydb/public/sdk/cpp/client/ydb_table/table.h b/ydb/public/sdk/cpp/client/ydb_table/table.h index 4e20f5259f38..db6130b123ac 100644 --- a/ydb/public/sdk/cpp/client/ydb_table/table.h +++ b/ydb/public/sdk/cpp/client/ydb_table/table.h @@ -202,14 +202,14 @@ class TIndexDescription { EIndexType type, const TVector& indexColumns, const TVector& dataColumns = {}, - const TGlobalIndexSettings& settings = {} + const TVector& settings = {} ); TIndexDescription( const TString& name, const TVector& indexColumns, const TVector& dataColumns = {}, - const TGlobalIndexSettings& settings = {} + const TVector& settings = {} ); const TString& GetIndexName() const; @@ -234,7 +234,7 @@ class TIndexDescription { EIndexType IndexType_; TVector IndexColumns_; TVector DataColumns_; - TGlobalIndexSettings GlobalIndexSettings_; + TVector GlobalIndexSettings_; ui64 SizeBytes = 0; }; From e1539e245498d8942ec948da529a121965b34d91 Mon Sep 17 00:00:00 2001 From: azevaykin Date: Fri, 12 Jul 2024 12:02:19 +0000 Subject: [PATCH 20/21] ImplTableDescriptions should be persisted in future --- ...chemeshard__operation_create_build_index.cpp | 17 ++++++++++++++--- .../tx/schemeshard/schemeshard_build_index.cpp | 2 ++ .../tx/schemeshard/schemeshard_info_types.h | 2 ++ 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_create_build_index.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_create_build_index.cpp index 9d84a8727ca0..02353a7f8e77 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_create_build_index.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_create_build_index.cpp @@ -123,10 +123,21 @@ TVector CreateBuildIndex(TOperationId opId, const TTxTransa }; if (indexDesc.GetType() == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree) { - result.push_back(createIndexImplTable(CalcVectorKmeansTreeLevelImplTableDesc(tableInfo->PartitionConfig(), indexDesc.GetIndexImplTableDescriptions(0)))); - result.push_back(createIndexImplTable(CalcVectorKmeansTreePostingImplTableDesc(tableInfo, tableInfo->PartitionConfig(), implTableColumns, indexDesc.GetIndexImplTableDescriptions(1)))); + NKikimrSchemeOp::TTableDescription indexLevelTableDesc, indexPostingTableDesc; + // TODO After IndexImplTableDescriptions are persisted, this should be replaced with Y_ABORT_UNLESS + if (indexDesc.IndexImplTableDescriptionsSize() == 2) { + indexLevelTableDesc = indexDesc.GetIndexImplTableDescriptions(0); + indexPostingTableDesc = indexDesc.GetIndexImplTableDescriptions(0); + } + result.push_back(createIndexImplTable(CalcVectorKmeansTreeLevelImplTableDesc(tableInfo->PartitionConfig(), indexLevelTableDesc))); + result.push_back(createIndexImplTable(CalcVectorKmeansTreePostingImplTableDesc(tableInfo, tableInfo->PartitionConfig(), implTableColumns, indexPostingTableDesc))); } else { - NKikimrSchemeOp::TTableDescription implTableDesc = CalcImplTableDesc(tableInfo, implTableColumns, indexDesc.GetIndexImplTableDescriptions(0)); + NKikimrSchemeOp::TTableDescription indexTableDesc; + // TODO After IndexImplTableDescriptions are persisted, this should be replaced with Y_ABORT_UNLESS + if (indexDesc.IndexImplTableDescriptionsSize() == 1) { + indexTableDesc = indexDesc.GetIndexImplTableDescriptions(0); + } + NKikimrSchemeOp::TTableDescription implTableDesc = CalcImplTableDesc(tableInfo, implTableColumns, indexTableDesc); implTableDesc.MutablePartitionConfig()->MutableCompactionPolicy()->SetKeepEraseMarkers(true); result.push_back(createIndexImplTable(std::move(implTableDesc))); } diff --git a/ydb/core/tx/schemeshard/schemeshard_build_index.cpp b/ydb/core/tx/schemeshard/schemeshard_build_index.cpp index 08f72ca6c04f..39cd6c870eda 100644 --- a/ydb/core/tx/schemeshard/schemeshard_build_index.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_build_index.cpp @@ -47,6 +47,8 @@ void TSchemeShard::PersistCreateBuildIndex(NIceDb::TNiceDb& db, const TIndexBuil NIceDb::TUpdate(info->Limits.MaxShards), NIceDb::TUpdate(info->Limits.MaxRetries), NIceDb::TUpdate(ui32(info->BuildKind)) + + // TODO save info->ImplTableDescriptions ); ui32 columnNo = 0; diff --git a/ydb/core/tx/schemeshard/schemeshard_info_types.h b/ydb/core/tx/schemeshard/schemeshard_info_types.h index a7f3a37aec04..298bac3eaeeb 100644 --- a/ydb/core/tx/schemeshard/schemeshard_info_types.h +++ b/ydb/core/tx/schemeshard/schemeshard_info_types.h @@ -3063,6 +3063,8 @@ struct TIndexBuildInfo: public TSimpleRefCount { indexInfo->IndexName = row.template GetValue(); indexInfo->IndexType = row.template GetValue(); + // TODO load indexInfo->ImplTableDescriptions + indexInfo->State = TIndexBuildInfo::EState( row.template GetValue()); indexInfo->Issue = From c93567d1c520776f329db7a41df1c0edda8bb293 Mon Sep 17 00:00:00 2001 From: azevaykin Date: Fri, 12 Jul 2024 10:56:43 +0000 Subject: [PATCH 21/21] Vector index in SDK --- ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp | 70 +++++++ ydb/core/ydb_convert/table_description.cpp | 4 + ydb/public/sdk/cpp/client/ydb_table/out.cpp | 66 +++++++ ydb/public/sdk/cpp/client/ydb_table/table.cpp | 180 +++++++++++++++++- ydb/public/sdk/cpp/client/ydb_table/table.h | 54 +++++- 5 files changed, 362 insertions(+), 12 deletions(-) diff --git a/ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp b/ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp index 25925d616bb2..7c5d2f282beb 100644 --- a/ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp +++ b/ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp @@ -2653,6 +2653,76 @@ Y_UNIT_TEST_SUITE(KqpScheme) { } } + Y_UNIT_TEST(CreateTableWithVectorIndexPublicApi) { + TKikimrRunner kikimr; + auto db = kikimr.GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + { + auto builder = TTableBuilder() + .AddNullableColumn("Key", EPrimitiveType::Uint64) + .AddNullableColumn("Embedding", EPrimitiveType::String) + .SetPrimaryKeyColumn("Key") + .AddVectorKMeansTreeSecondaryIndex("vector_idx", {"Embedding"}, + { NYdb::NTable::TVectorIndexSettings::EDistance::Cosine, + NYdb::NTable::TVectorIndexSettings::EVectorType::Float, + 1024}); + + auto result = session.CreateTable("/Root/TestTable", builder.Build()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + } + { + auto result = session.DescribeTable("/Root/TestTable").ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), NYdb::EStatus::SUCCESS); + + UNIT_ASSERT_VALUES_EQUAL(result.GetTableDescription().GetIndexDescriptions().size(), 1); + auto indexDesc = result.GetTableDescription().GetIndexDescriptions()[0]; + UNIT_ASSERT_VALUES_EQUAL(indexDesc.GetIndexName(), "vector_idx"); + UNIT_ASSERT_VALUES_EQUAL(indexDesc.GetIndexType(), EIndexType::GlobalVectorKMeansTree); + UNIT_ASSERT_VALUES_EQUAL(indexDesc.GetIndexColumns().size(), 1); + UNIT_ASSERT_VALUES_EQUAL(indexDesc.GetIndexColumns()[0], "Embedding"); + UNIT_ASSERT_VALUES_EQUAL(indexDesc.GetDataColumns().size(), 0); + UNIT_ASSERT_VALUES_EQUAL(std::get(indexDesc.GetVectorIndexSettings()->Metric), NYdb::NTable::TVectorIndexSettings::EDistance::Cosine); + UNIT_ASSERT_VALUES_EQUAL(indexDesc.GetVectorIndexSettings()->VectorType, NYdb::NTable::TVectorIndexSettings::EVectorType::Float); + UNIT_ASSERT_VALUES_EQUAL(indexDesc.GetVectorIndexSettings()->VectorDimension, 1024); + } + } + + Y_UNIT_TEST(CreateTableWithVectorIndexCoveredPublicApi) { + TKikimrRunner kikimr; + auto db = kikimr.GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + { + auto builder = TTableBuilder() + .AddNullableColumn("Key", EPrimitiveType::Uint64) + .AddNullableColumn("Embedding", EPrimitiveType::String) + .AddNullableColumn("Covered", EPrimitiveType::String) + .SetPrimaryKeyColumn("Key") + .AddVectorKMeansTreeSecondaryIndex("vector_idx", {"Embedding"}, {"Covered"}, + { NYdb::NTable::TVectorIndexSettings::EDistance::Cosine, + NYdb::NTable::TVectorIndexSettings::EVectorType::Float, + 1024}); + + auto result = session.CreateTable("/Root/TestTable", builder.Build()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + } + { + auto result = session.DescribeTable("/Root/TestTable").ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), NYdb::EStatus::SUCCESS); + + UNIT_ASSERT_VALUES_EQUAL(result.GetTableDescription().GetIndexDescriptions().size(), 1); + auto indexDesc = result.GetTableDescription().GetIndexDescriptions()[0]; + UNIT_ASSERT_VALUES_EQUAL(indexDesc.GetIndexName(), "vector_idx"); + UNIT_ASSERT_VALUES_EQUAL(indexDesc.GetIndexType(), EIndexType::GlobalVectorKMeansTree); + UNIT_ASSERT_VALUES_EQUAL(indexDesc.GetIndexColumns().size(), 1); + UNIT_ASSERT_VALUES_EQUAL(indexDesc.GetIndexColumns()[0], "Embedding"); + UNIT_ASSERT_VALUES_EQUAL(indexDesc.GetDataColumns().size(), 1); + UNIT_ASSERT_VALUES_EQUAL(indexDesc.GetDataColumns()[0], "Covered"); + UNIT_ASSERT_VALUES_EQUAL(std::get(indexDesc.GetVectorIndexSettings()->Metric), NYdb::NTable::TVectorIndexSettings::EDistance::Cosine); + UNIT_ASSERT_VALUES_EQUAL(indexDesc.GetVectorIndexSettings()->VectorType, NYdb::NTable::TVectorIndexSettings::EVectorType::Float); + UNIT_ASSERT_VALUES_EQUAL(indexDesc.GetVectorIndexSettings()->VectorDimension, 1024); + } + } + Y_UNIT_TEST(AlterTableWithDecimalColumn) { TKikimrRunner kikimr; auto db = kikimr.GetTableClient(); diff --git a/ydb/core/ydb_convert/table_description.cpp b/ydb/core/ydb_convert/table_description.cpp index a6f3af41a6d0..8952209ddeeb 100644 --- a/ydb/core/ydb_convert/table_description.cpp +++ b/ydb/core/ydb_convert/table_description.cpp @@ -884,6 +884,9 @@ void FillIndexDescriptionImpl(TYdbProto& out, const NKikimrSchemeOp::TTableDescr *index->mutable_global_vector_kmeans_tree_index()->mutable_posting_table_settings(), tableIndex.GetIndexImplTableDescriptions(1) ); + + *index->mutable_global_vector_kmeans_tree_index()->mutable_vector_settings() = tableIndex.GetVectorIndexKmeansTreeDescription().GetSettings(); + break; default: break; @@ -954,6 +957,7 @@ bool FillIndexDescription(NKikimrSchemeOp::TIndexedTableCreationConfig& out, case Ydb::Table::TableIndex::kGlobalVectorKmeansTreeIndex: indexDesc->SetType(NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree); + *indexDesc->MutableVectorIndexKmeansTreeDescription()->MutableSettings() = index.global_vector_kmeans_tree_index().vector_settings(); break; default: diff --git a/ydb/public/sdk/cpp/client/ydb_table/out.cpp b/ydb/public/sdk/cpp/client/ydb_table/out.cpp index 1f16dc74bf6c..b00212884f79 100644 --- a/ydb/public/sdk/cpp/client/ydb_table/out.cpp +++ b/ydb/public/sdk/cpp/client/ydb_table/out.cpp @@ -23,3 +23,69 @@ Y_DECLARE_OUT_SPEC(, NYdb::NTable::TCreateSessionResult, o, x) { Y_DECLARE_OUT_SPEC(, NYdb::NTable::TDescribeTableResult, o, x) { return x.Out(o); } + +Y_DECLARE_OUT_SPEC(, NYdb::NTable::TVectorIndexSettings::EDistance, stream, value) { + auto convertDistance = [] (auto value) -> auto { + switch (value) { + case NYdb::NTable::TVectorIndexSettings::EDistance::Cosine: + return "COSINE"; + case NYdb::NTable::TVectorIndexSettings::EDistance::Manhattan: + return "MANHATTAN"; + case NYdb::NTable::TVectorIndexSettings::EDistance::Euclidean: + return "EUCLIDEAN"; + case NYdb::NTable::TVectorIndexSettings::EDistance::Unknown: + return "UNKNOWN"; + } + }; + + stream << convertDistance(value); +} + +Y_DECLARE_OUT_SPEC(, NYdb::NTable::TVectorIndexSettings::ESimilarity, stream, value) { + auto convertSimilarity = [] (auto value) -> auto { + switch (value) { + case NYdb::NTable::TVectorIndexSettings::ESimilarity::Cosine: + return "COSINE"; + case NYdb::NTable::TVectorIndexSettings::ESimilarity::InnerProduct: + return "INNER_PRODUCT"; + case NYdb::NTable::TVectorIndexSettings::ESimilarity::Unknown: + return "UNKNOWN"; + } + }; + + stream << convertSimilarity(value); +} + +Y_DECLARE_OUT_SPEC(, NYdb::NTable::TVectorIndexSettings::EVectorType, stream, value) { + auto convertVectorType = [] (auto value) -> auto { + switch (value) { + case NYdb::NTable::TVectorIndexSettings::EVectorType::Float: + return "FLOAT"; + case NYdb::NTable::TVectorIndexSettings::EVectorType::Uint8: + return "UINT8"; + case NYdb::NTable::TVectorIndexSettings::EVectorType::Int8: + return "INT8"; + case NYdb::NTable::TVectorIndexSettings::EVectorType::Bit: + return "BIT"; + case NYdb::NTable::TVectorIndexSettings::EVectorType::Unknown: + return "UNKNOWN"; + } + }; + + stream << convertVectorType(value); +} + +Y_DECLARE_OUT_SPEC(, NYdb::NTable::TVectorIndexSettings, stream, value) { + stream << "{"; + + if (const auto* distance = std::get_if(&value.Metric)) { + stream << " distance: " << *distance << ""; + } else if (const auto* similarity = std::get_if(&value.Metric)) { + stream << " similarity: " << *similarity << ""; + } + + stream << ", vector_type: " << value.VectorType << ""; + stream << ", vector_dimension: " << value.VectorDimension << ""; + + stream << " }"; +} \ No newline at end of file diff --git a/ydb/public/sdk/cpp/client/ydb_table/table.cpp b/ydb/public/sdk/cpp/client/ydb_table/table.cpp index 202fab47e35c..a340d5dbbf84 100644 --- a/ydb/public/sdk/cpp/client/ydb_table/table.cpp +++ b/ydb/public/sdk/cpp/client/ydb_table/table.cpp @@ -27,6 +27,7 @@ #include #include #include +#include #include @@ -468,6 +469,14 @@ class TTableDescription::TImpl { Indexes_.emplace_back(TIndexDescription(indexName, type, indexColumns, dataColumns)); } + void AddVectorIndex(const TString& indexName, EIndexType type, const TVector& indexColumns, const TVectorIndexSettings& vectorIndexSettings) { + Indexes_.emplace_back(TIndexDescription(indexName, type, indexColumns, {}, {}, vectorIndexSettings)); + } + + void AddVectorIndex(const TString& indexName, EIndexType type, const TVector& indexColumns, const TVector& dataColumns, const TVectorIndexSettings& vectorIndexSettings) { + Indexes_.emplace_back(TIndexDescription(indexName, type, indexColumns, dataColumns, {}, vectorIndexSettings)); + } + void AddChangefeed(const TString& name, EChangefeedMode mode, EChangefeedFormat format) { Changefeeds_.emplace_back(name, mode, format); } @@ -764,6 +773,14 @@ void TTableDescription::AddUniqueSecondaryIndex(const TString& indexName, const AddSecondaryIndex(indexName, EIndexType::GlobalUnique, indexColumns, dataColumns); } +void TTableDescription::AddVectorKMeansTreeSecondaryIndex(const TString& indexName, const TVector& indexColumns, const TVectorIndexSettings& vectorIndexSettings) { + Impl_->AddVectorIndex(indexName, EIndexType::GlobalVectorKMeansTree, indexColumns, vectorIndexSettings); +} + +void TTableDescription::AddVectorKMeansTreeSecondaryIndex(const TString& indexName, const TVector& indexColumns, const TVector& dataColumns, const TVectorIndexSettings& vectorIndexSettings) { + Impl_->AddVectorIndex(indexName, EIndexType::GlobalVectorKMeansTree, indexColumns, dataColumns, vectorIndexSettings); +} + void TTableDescription::AddSecondaryIndex(const TString& indexName, const TVector& indexColumns) { AddSyncSecondaryIndex(indexName, indexColumns); } @@ -1211,6 +1228,16 @@ TTableBuilder& TTableBuilder::AddUniqueSecondaryIndex(const TString& indexName, return AddSecondaryIndex(indexName, EIndexType::GlobalUnique, indexColumns); } +TTableBuilder& TTableBuilder::AddVectorKMeansTreeSecondaryIndex(const TString& indexName, const TVector& indexColumns, const TVector& dataColumns, const TVectorIndexSettings& vectorIndexSettings) { + TableDescription_.AddVectorKMeansTreeSecondaryIndex(indexName, indexColumns, dataColumns, vectorIndexSettings); + return *this; +} + +TTableBuilder& TTableBuilder::AddVectorKMeansTreeSecondaryIndex(const TString& indexName, const TVector& indexColumns, const TVectorIndexSettings& vectorIndexSettings) { + TableDescription_.AddVectorKMeansTreeSecondaryIndex(indexName, indexColumns, vectorIndexSettings); + return *this; +} + TTableBuilder& TTableBuilder::AddSecondaryIndex(const TString& indexName, const TString& indexColumn) { return AddSyncSecondaryIndex(indexName, indexColumn); } @@ -2215,20 +2242,22 @@ TIndexDescription::TIndexDescription( EIndexType type, const TVector& indexColumns, const TVector& dataColumns, - const TVector& settings + const TVector& globalIndexSettings, + const std::optional& vectorIndexSettings ) : IndexName_(name) , IndexType_(type) , IndexColumns_(indexColumns) , DataColumns_(dataColumns) - , GlobalIndexSettings_(settings) + , GlobalIndexSettings_(globalIndexSettings) + , VectorIndexSettings_(vectorIndexSettings) {} TIndexDescription::TIndexDescription( const TString& name, const TVector& indexColumns, const TVector& dataColumns, - const TVector& settings -) : TIndexDescription(name, EIndexType::GlobalSync, indexColumns, dataColumns, settings) + const TVector& globalIndexSettings +) : TIndexDescription(name, EIndexType::GlobalSync, indexColumns, dataColumns, globalIndexSettings) {} TIndexDescription::TIndexDescription(const Ydb::Table::TableIndex& tableIndex) @@ -2255,6 +2284,10 @@ const TVector& TIndexDescription::GetDataColumns() const { return DataColumns_; } +const std::optional& TIndexDescription::GetVectorIndexSettings() const { + return VectorIndexSettings_; +} + ui64 TIndexDescription::GetSizeBytes() const { return SizeBytes; } @@ -2292,12 +2325,128 @@ void TGlobalIndexSettings::SerializeTo(Ydb::Table::GlobalIndexSettings& settings std::visit(std::move(variantVisitor), Partitions); } +template +TVectorIndexSettings TVectorIndexSettings::FromProto(const TProto& proto) { + auto convertDistance = [] (auto distance) -> auto { + switch (distance) { + case Ydb::Table::VectorIndexSettings::DISTANCE_COSINE: + return EDistance::Cosine; + case Ydb::Table::VectorIndexSettings::DISTANCE_MANHATTAN: + return EDistance::Manhattan; + case Ydb::Table::VectorIndexSettings::DISTANCE_EUCLIDEAN: + return EDistance::Euclidean; + default: + return EDistance::Unknown; + } + }; + + auto convertSimilarity = [] (auto similarity) -> auto { + switch (similarity) { + case Ydb::Table::VectorIndexSettings::SIMILARITY_COSINE: + return ESimilarity::Cosine; + case Ydb::Table::VectorIndexSettings::SIMILARITY_INNER_PRODUCT: + return ESimilarity::InnerProduct; + default: + return ESimilarity::Unknown; + } + }; + + auto convertVectorType = [] (auto vectorType) -> auto { + switch (vectorType) { + case Ydb::Table::VectorIndexSettings::VECTOR_TYPE_FLOAT: + return EVectorType::Float; + case Ydb::Table::VectorIndexSettings::VECTOR_TYPE_UINT8: + return EVectorType::Uint8; + case Ydb::Table::VectorIndexSettings::VECTOR_TYPE_INT8: + return EVectorType::Int8; + case Ydb::Table::VectorIndexSettings::VECTOR_TYPE_BIT: + return EVectorType::Bit; + default: + return EVectorType::Unknown; + } + }; + + + auto metricFromProto = [&](const auto& proto) -> TVectorIndexSettings::TMetric { + switch (proto.metric_case()) { + case TProto::kDistance: + return convertDistance(proto.distance()); + case TProto::kSimilarity: + return convertSimilarity(proto.similarity()); + default: + return {}; + } + }; + + return { + .Metric = metricFromProto(proto), + .VectorType = convertVectorType(proto.vector_type()), + .VectorDimension = proto.vector_dimension() + }; +} + +void TVectorIndexSettings::SerializeTo(Ydb::Table::VectorIndexSettings& settings) const { + auto convertDistance = [] (auto distance) -> auto { + switch (distance) { + case EDistance::Cosine: + return Ydb::Table::VectorIndexSettings::DISTANCE_COSINE; + case EDistance::Manhattan: + return Ydb::Table::VectorIndexSettings::DISTANCE_MANHATTAN; + case EDistance::Euclidean: + return Ydb::Table::VectorIndexSettings::DISTANCE_EUCLIDEAN; + case EDistance::Unknown: + return Ydb::Table::VectorIndexSettings::DISTANCE_UNSPECIFIED; + } + }; + + auto convertSimilarity = [] (auto similarity) -> auto { + switch (similarity) { + case ESimilarity::Cosine: + return Ydb::Table::VectorIndexSettings::SIMILARITY_COSINE; + case ESimilarity::InnerProduct: + return Ydb::Table::VectorIndexSettings::SIMILARITY_INNER_PRODUCT; + case ESimilarity::Unknown: + return Ydb::Table::VectorIndexSettings::SIMILARITY_UNSPECIFIED; + } + }; + + auto convertVectorType = [] (auto vectorType) -> auto { + switch (vectorType) { + case EVectorType::Float: + return Ydb::Table::VectorIndexSettings::VECTOR_TYPE_FLOAT; + case EVectorType::Uint8: + return Ydb::Table::VectorIndexSettings::VECTOR_TYPE_UINT8; + case EVectorType::Int8: + return Ydb::Table::VectorIndexSettings::VECTOR_TYPE_INT8; + case EVectorType::Bit: + return Ydb::Table::VectorIndexSettings::VECTOR_TYPE_BIT; + case EVectorType::Unknown: + return Ydb::Table::VectorIndexSettings::VECTOR_TYPE_UNSPECIFIED; + } + }; + + + if (const auto* distance = std::get_if(&Metric)) { + settings.set_distance(convertDistance(*distance)); + } else if (const auto* similarity = std::get_if(&Metric)) { + settings.set_similarity(convertSimilarity(*similarity)); + } + + settings.set_vector_type(convertVectorType(VectorType)); + settings.set_vector_dimension(VectorDimension); +} + +void TVectorIndexSettings::Out(IOutputStream& o) const { + o << *this; +} + template TIndexDescription TIndexDescription::FromProto(const TProto& proto) { EIndexType type; TVector indexColumns; TVector dataColumns; TVector globalIndexSettings; + std::optional vectorIndexSettings; indexColumns.assign(proto.index_columns().begin(), proto.index_columns().end()); dataColumns.assign(proto.data_columns().begin(), proto.data_columns().end()); @@ -2315,18 +2464,21 @@ TIndexDescription TIndexDescription::FromProto(const TProto& proto) { type = EIndexType::GlobalUnique; globalIndexSettings.emplace_back(TGlobalIndexSettings::FromProto(proto.global_unique_index().settings())); break; - case TProto::kGlobalVectorKmeansTreeIndex: + case TProto::kGlobalVectorKmeansTreeIndex: { type = EIndexType::GlobalVectorKMeansTree; - globalIndexSettings.emplace_back(TGlobalIndexSettings::FromProto(proto.global_vector_kmeans_tree_index().level_table_settings())); - globalIndexSettings.emplace_back(TGlobalIndexSettings::FromProto(proto.global_vector_kmeans_tree_index().posting_table_settings())); + const auto &vectorProto = proto.global_vector_kmeans_tree_index(); + globalIndexSettings.emplace_back(TGlobalIndexSettings::FromProto(vectorProto.level_table_settings())); + globalIndexSettings.emplace_back(TGlobalIndexSettings::FromProto(vectorProto.posting_table_settings())); + vectorIndexSettings = TVectorIndexSettings::FromProto(vectorProto.vector_settings()); break; + } default: // fallback to global sync type = EIndexType::GlobalSync; globalIndexSettings.resize(1); break; } - auto result = TIndexDescription(proto.name(), type, indexColumns, dataColumns, globalIndexSettings); + auto result = TIndexDescription(proto.name(), type, indexColumns, dataColumns, globalIndexSettings, vectorIndexSettings); if constexpr (std::is_same_v) { result.SizeBytes = proto.size_bytes(); } @@ -2362,12 +2514,17 @@ void TIndexDescription::SerializeTo(Ydb::Table::TableIndex& proto) const { break; } case EIndexType::GlobalVectorKMeansTree: { - auto& level_settings = *proto.mutable_global_vector_kmeans_tree_index()->mutable_level_table_settings(); - auto& posting_settings = *proto.mutable_global_vector_kmeans_tree_index()->mutable_posting_table_settings(); + auto* global_vector_kmeans_tree_index = proto.mutable_global_vector_kmeans_tree_index(); + auto& level_settings = *global_vector_kmeans_tree_index->mutable_level_table_settings(); + auto& posting_settings = *global_vector_kmeans_tree_index->mutable_posting_table_settings(); + auto& vector_settings = *global_vector_kmeans_tree_index->mutable_vector_settings(); if (GlobalIndexSettings_.size() == 2) { GlobalIndexSettings_[0].SerializeTo(level_settings); GlobalIndexSettings_[1].SerializeTo(posting_settings); } + if (VectorIndexSettings_) { + VectorIndexSettings_->SerializeTo(vector_settings); + } break; } case EIndexType::Unknown: @@ -2391,6 +2548,9 @@ void TIndexDescription::Out(IOutputStream& o) const { o << ", data_columns: [" << JoinSeq(", ", DataColumns_) << "]"; } + if (VectorIndexSettings_) { + o << ", vector_settings: " << *VectorIndexSettings_ << ""; + } o << " }"; } diff --git a/ydb/public/sdk/cpp/client/ydb_table/table.h b/ydb/public/sdk/cpp/client/ydb_table/table.h index db6130b123ac..9284665cd76b 100644 --- a/ydb/public/sdk/cpp/client/ydb_table/table.h +++ b/ydb/public/sdk/cpp/client/ydb_table/table.h @@ -25,6 +25,7 @@ class ChangefeedDescription; class DescribeTableResult; class ExplicitPartitions; class GlobalIndexSettings; +class VectorIndexSettings; class PartitioningSettings; class DateTypeColumnModeSettings; class TtlSettings; @@ -192,6 +193,45 @@ struct TGlobalIndexSettings { void SerializeTo(Ydb::Table::GlobalIndexSettings& proto) const; }; +struct TVectorIndexSettings { +public: + enum class EDistance { + Cosine, + Manhattan, + Euclidean, + + Unknown = std::numeric_limits::max() + }; + + enum class ESimilarity { + Cosine, + InnerProduct, + + Unknown = std::numeric_limits::max() + }; + + enum class EVectorType { + Float, + Uint8, + Int8, + Bit, + + Unknown = std::numeric_limits::max() + }; + using TMetric = std::variant; + + TMetric Metric; + EVectorType VectorType; + ui32 VectorDimension; + + template + static TVectorIndexSettings FromProto(const TProto& proto); + + void SerializeTo(Ydb::Table::VectorIndexSettings& settings) const; + + void Out(IOutputStream &o) const; +}; + //! Represents index description class TIndexDescription { friend class NYdb::TProtoAccessor; @@ -202,20 +242,22 @@ class TIndexDescription { EIndexType type, const TVector& indexColumns, const TVector& dataColumns = {}, - const TVector& settings = {} + const TVector& globalIndexSettings = {}, + const std::optional& vectorIndexSettings = {} ); TIndexDescription( const TString& name, const TVector& indexColumns, const TVector& dataColumns = {}, - const TVector& settings = {} + const TVector& globalIndexSettings = {} ); const TString& GetIndexName() const; EIndexType GetIndexType() const; const TVector& GetIndexColumns() const; const TVector& GetDataColumns() const; + const std::optional& GetVectorIndexSettings() const; ui64 GetSizeBytes() const; void SerializeTo(Ydb::Table::TableIndex& proto) const; @@ -235,6 +277,7 @@ class TIndexDescription { TVector IndexColumns_; TVector DataColumns_; TVector GlobalIndexSettings_; + std::optional VectorIndexSettings_; ui64 SizeBytes = 0; }; @@ -608,6 +651,9 @@ class TTableDescription { // unique void AddUniqueSecondaryIndex(const TString& indexName, const TVector& indexColumns); void AddUniqueSecondaryIndex(const TString& indexName, const TVector& indexColumns, const TVector& dataColumns); + // vector KMeansTree + void AddVectorKMeansTreeSecondaryIndex(const TString& indexName, const TVector& indexColumns, const TVectorIndexSettings& vectorIndexSettings); + void AddVectorKMeansTreeSecondaryIndex(const TString& indexName, const TVector& indexColumns, const TVector& dataColumns, const TVectorIndexSettings& vectorIndexSettings); // default void AddSecondaryIndex(const TString& indexName, const TVector& indexColumns); @@ -827,6 +873,10 @@ class TTableBuilder { TTableBuilder& AddUniqueSecondaryIndex(const TString& indexName, const TVector& indexColumns); TTableBuilder& AddUniqueSecondaryIndex(const TString& indexName, const TVector& indexColumns, const TVector& dataColumns); + // vector KMeansTree + TTableBuilder& AddVectorKMeansTreeSecondaryIndex(const TString& indexName, const TVector& indexColumns, const TVectorIndexSettings& vectorIndexSettings); + TTableBuilder& AddVectorKMeansTreeSecondaryIndex(const TString& indexName, const TVector& indexColumns, const TVector& dataColumns, const TVectorIndexSettings& vectorIndexSettings); + // default TTableBuilder& AddSecondaryIndex(const TString& indexName, const TVector& indexColumns, const TVector& dataColumns); TTableBuilder& AddSecondaryIndex(const TString& indexName, const TVector& indexColumns);