diff --git a/ydb/core/base/table_index.cpp b/ydb/core/base/table_index.cpp index b24e0b607d5e..d82aec792dd9 100644 --- a/ydb/core/base/table_index.cpp +++ b/ydb/core/base/table_index.cpp @@ -1,5 +1,6 @@ #include "table_index.h" +#include #include namespace NKikimr::NTableIndex { @@ -61,13 +62,40 @@ constexpr std::string_view PrefixedGlobalKMeansTreeImplTables[] = { }; static_assert(std::is_sorted(std::begin(PrefixedGlobalKMeansTreeImplTables), std::end(PrefixedGlobalKMeansTreeImplTables))); +constexpr std::string_view GlobalFulltextImplTables[] = { + ImplTable, +}; +static_assert(std::is_sorted(std::begin(GlobalFulltextImplTables), std::end(GlobalFulltextImplTables))); + +bool IsSecondaryIndex(NKikimrSchemeOp::EIndexType indexType) { + switch (indexType) { + case NKikimrSchemeOp::EIndexTypeGlobal: + case NKikimrSchemeOp::EIndexTypeGlobalAsync: + case NKikimrSchemeOp::EIndexTypeGlobalUnique: + return true; + case NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree: + case NKikimrSchemeOp::EIndexTypeGlobalFulltext: + return false; + default: + Y_ENSURE(false, InvalidIndexType(indexType)); + } } -TTableColumns CalcTableImplDescription(NKikimrSchemeOp::EIndexType type, const TTableColumns& table, const TIndexColumns& index) { +} + +TTableColumns CalcTableImplDescription(NKikimrSchemeOp::EIndexType indexType, const TTableColumns& table, const TIndexColumns& index) { TTableColumns result; - const bool isSecondaryIndex = type != NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree; - std::for_each(index.KeyColumns.begin(), index.KeyColumns.end() - (isSecondaryIndex ? 0 : 1), [&] (const auto& ik) { + const bool isSecondaryIndex = IsSecondaryIndex(indexType); + + auto takeKeyColumns = index.KeyColumns.size(); + if (!isSecondaryIndex) { // vector and fulltext indexes have special embedding and text key columns + Y_ASSERT(indexType == NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree + || indexType == NKikimrSchemeOp::EIndexTypeGlobalFulltext); + takeKeyColumns--; + } + + std::for_each(index.KeyColumns.begin(), index.KeyColumns.begin() + takeKeyColumns, [&] (const auto& ik) { result.Keys.push_back(ik); result.Columns.emplace(ik); }); @@ -85,6 +113,18 @@ TTableColumns CalcTableImplDescription(NKikimrSchemeOp::EIndexType type, const T return result; } +NKikimrSchemeOp::EIndexType GetIndexType(NKikimrSchemeOp::TIndexCreationConfig indexCreation) { + // TODO: always provide EIndexTypeGlobal value instead of null + // TODO: do not cast unknown index types to EIndexTypeGlobal (proto2 specific) + return indexCreation.HasType() + ? indexCreation.GetType() + : NKikimrSchemeOp::EIndexTypeGlobal; +} + +TString InvalidIndexType(NKikimrSchemeOp::EIndexType indexType) { + return TStringBuilder() << "Invalid index type " << static_cast(indexType); +} + bool IsCompatibleIndex(NKikimrSchemeOp::EIndexType indexType, const TTableColumns& table, const TIndexColumns& index, TString& explain) { if (const auto* broken = IsContains(table.Keys, table.Columns)) { explain = TStringBuilder() @@ -127,7 +167,7 @@ bool IsCompatibleIndex(NKikimrSchemeOp::EIndexType indexType, const TTableColumn return false; } - const bool isSecondaryIndex = indexType != NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree; + const bool isSecondaryIndex = IsSecondaryIndex(indexType); if (index.KeyColumns.size() < 1) { explain = "should be at least single index key column"; @@ -157,7 +197,9 @@ bool IsCompatibleIndex(NKikimrSchemeOp::EIndexType indexType, const TTableColumn if (isSecondaryIndex) { tmp.insert(index.KeyColumns.begin(), index.KeyColumns.end()); } else { - // Vector indexes allow to add all columns both to index & data + // Vector and fulltext indexes allow to add all columns both to index & data + Y_ASSERT(indexType == NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree + || indexType == NKikimrSchemeOp::EIndexTypeGlobalFulltext); } if (const auto* broken = IsContains(index.DataColumns, tmp, true)) { explain = TStringBuilder() @@ -167,15 +209,37 @@ bool IsCompatibleIndex(NKikimrSchemeOp::EIndexType indexType, const TTableColumn return true; } +bool DoesIndexSupportTTL(NKikimrSchemeOp::EIndexType indexType) { + switch (indexType) { + case NKikimrSchemeOp::EIndexTypeGlobal: + case NKikimrSchemeOp::EIndexTypeGlobalUnique: + case NKikimrSchemeOp::EIndexTypeGlobalAsync: + return true; + case NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree: + case NKikimrSchemeOp::EIndexTypeGlobalFulltext: + return false; + default: + Y_DEBUG_ABORT_S(InvalidIndexType(indexType)); + return false; + } +} + std::span GetImplTables(NKikimrSchemeOp::EIndexType indexType, std::span indexKeys) { - if (indexType == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree) { - if (indexKeys.size() == 1) { - return GlobalKMeansTreeImplTables; - } else { - return PrefixedGlobalKMeansTreeImplTables; - } - } else { - return GlobalSecondaryImplTables; + switch (indexType) { + case NKikimrSchemeOp::EIndexTypeGlobal: + case NKikimrSchemeOp::EIndexTypeGlobalAsync: + case NKikimrSchemeOp::EIndexTypeGlobalUnique: + return GlobalSecondaryImplTables; + case NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree: + if (indexKeys.size() == 1) { + return GlobalKMeansTreeImplTables; + } else { + return PrefixedGlobalKMeansTreeImplTables; + } + case NKikimrSchemeOp::EIndexTypeGlobalFulltext: + return GlobalFulltextImplTables; + default: + Y_ENSURE(false, InvalidIndexType(indexType)); } } diff --git a/ydb/core/base/table_index.h b/ydb/core/base/table_index.h index ffe3681f0ae2..201e40ae99f6 100644 --- a/ydb/core/base/table_index.h +++ b/ydb/core/base/table_index.h @@ -38,8 +38,13 @@ struct TIndexColumns { inline constexpr const char* ImplTable = "indexImplTable"; -bool IsCompatibleIndex(NKikimrSchemeOp::EIndexType type, const TTableColumns& table, const TIndexColumns& index, TString& explain); -TTableColumns CalcTableImplDescription(NKikimrSchemeOp::EIndexType type, const TTableColumns& table, const TIndexColumns& index); +bool IsCompatibleIndex(NKikimrSchemeOp::EIndexType indexType, const TTableColumns& table, const TIndexColumns& index, TString& explain); +TTableColumns CalcTableImplDescription(NKikimrSchemeOp::EIndexType indexType, const TTableColumns& table, const TIndexColumns& index); + +bool DoesIndexSupportTTL(NKikimrSchemeOp::EIndexType indexType); + +NKikimrSchemeOp::EIndexType GetIndexType(NKikimrSchemeOp::TIndexCreationConfig indexCreation); +TString InvalidIndexType(NKikimrSchemeOp::EIndexType indexType); std::span GetImplTables(NKikimrSchemeOp::EIndexType indexType, std::span indexKeys); bool IsImplTable(std::string_view tableName); @@ -76,6 +81,13 @@ TClusterId SetPostingParentFlag(TClusterId parent); } +namespace NFulltext { + // TODO: support utf-8 in fulltext index + inline constexpr const char* TokenTypeName = "String"; + + inline constexpr const char* TokenColumn = "__ydb_token"; +} + TString ToShortDebugString(const NKikimrTxDataShard::TEvReshuffleKMeansRequest& record); TString ToShortDebugString(const NKikimrTxDataShard::TEvRecomputeKMeansRequest& record); TString ToShortDebugString(const NKikimrTxDataShard::TEvRecomputeKMeansResponse& record); diff --git a/ydb/core/protos/flat_scheme_op.proto b/ydb/core/protos/flat_scheme_op.proto index a6096a6b32f9..14b5374e4d8c 100644 --- a/ydb/core/protos/flat_scheme_op.proto +++ b/ydb/core/protos/flat_scheme_op.proto @@ -1158,6 +1158,7 @@ enum EIndexType { EIndexTypeGlobalAsync = 2; EIndexTypeGlobalUnique = 3; EIndexTypeGlobalVectorKmeansTree = 4; + EIndexTypeGlobalFulltext = 5; } enum EIndexState { @@ -1171,6 +1172,10 @@ message TVectorIndexKmeansTreeDescription { optional Ydb.Table.KMeansTreeSettings Settings = 1; } +message TFulltextIndexDescription { + optional Ydb.Table.FulltextIndexSettings Settings = 1; +} + message TIndexDescription { optional string Name = 1; optional uint64 LocalPathId = 2; @@ -1192,6 +1197,7 @@ message TIndexDescription { oneof SpecializedIndexDescription { TVectorIndexKmeansTreeDescription VectorIndexKmeansTreeDescription = 11; + TFulltextIndexDescription FulltextIndexDescription = 12; } } @@ -1204,6 +1210,7 @@ message TIndexCreationConfig { repeated string DataColumnNames = 6; //columns to be denormalized to read data just from index oneof SpecializedIndexDescription { TVectorIndexKmeansTreeDescription VectorIndexKmeansTreeDescription = 7; + TFulltextIndexDescription FulltextIndexDescription = 8; } } @@ -1985,6 +1992,7 @@ enum EPathSubType { EPathSubTypeAsyncIndexImplTable = 2; EPathSubTypeStreamImpl = 3; EPathSubTypeVectorKmeansTreeIndexImplTable = 4; + EPathSubTypeFulltextIndexImplTable = 5; } enum EPathState { diff --git a/ydb/core/tx/scheme_board/cache.cpp b/ydb/core/tx/scheme_board/cache.cpp index 7be4d189d636..88053314774c 100644 --- a/ydb/core/tx/scheme_board/cache.cpp +++ b/ydb/core/tx/scheme_board/cache.cpp @@ -898,6 +898,8 @@ class TSchemeCache: public TMonitorableActor { return NSchemeCache::ETableKind::KindAsyncIndexTable; case NKikimrSchemeOp::EPathSubTypeVectorKmeansTreeIndexImplTable: return NSchemeCache::ETableKind::KindVectorIndexTable; + case NKikimrSchemeOp::EPathSubTypeFulltextIndexImplTable: + return NSchemeCache::ETableKind::KindFulltextIndexTable; default: return NSchemeCache::ETableKind::KindRegularTable; } diff --git a/ydb/core/tx/scheme_cache/scheme_cache.h b/ydb/core/tx/scheme_cache/scheme_cache.h index 3c9ecc98b746..28379746a7cf 100644 --- a/ydb/core/tx/scheme_cache/scheme_cache.h +++ b/ydb/core/tx/scheme_cache/scheme_cache.h @@ -153,6 +153,7 @@ enum class ETableKind { KindSyncIndexTable = 2, KindAsyncIndexTable = 3, KindVectorIndexTable = 4, + KindFulltextIndexTable = 5, }; struct TSchemeCacheNavigate { diff --git a/ydb/core/tx/schemeshard/schemeshard__conditional_erase.cpp b/ydb/core/tx/schemeshard/schemeshard__conditional_erase.cpp index 50dadbd3cbf8..2314e8590f87 100644 --- a/ydb/core/tx/schemeshard/schemeshard__conditional_erase.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__conditional_erase.cpp @@ -1,6 +1,8 @@ #include "schemeshard_impl.h" #include +#include +#include namespace NKikimr { namespace NSchemeShard { @@ -239,7 +241,7 @@ struct TSchemeShard::TTxRunConditionalErase: public TSchemeShard::TRwTxBase { auto index = GetIndex(childPath); if (index->Type == NKikimrSchemeOp::EIndexTypeGlobalAsync - || index->Type == NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree) { + || !DoesIndexSupportTTL(index->Type)) { continue; } @@ -276,7 +278,7 @@ struct TSchemeShard::TTxRunConditionalErase: public TSchemeShard::TRwTxBase { } static TVector> MakeColumnIds(TTableInfo::TPtr mainTable, TTableIndexInfo::TPtr index, TTableInfo::TPtr indexImplTable) { - Y_ABORT_UNLESS(index->Type != NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree); + Y_ABORT_UNLESS(DoesIndexSupportTTL(index->Type)); TVector> result; THashSet keys; diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_consistent_copy_tables.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_consistent_copy_tables.cpp index adc9ba3ead1b..b571fdde52ae 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_consistent_copy_tables.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_consistent_copy_tables.cpp @@ -50,11 +50,23 @@ static std::optional CreateIndexTask(NKikimr::NS *operation->MutableDataColumnNames()->Add() = dataColumn; } - if (indexInfo->Type == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree) { - *operation->MutableVectorIndexKmeansTreeDescription() = - std::get(indexInfo->SpecializedIndexDescription); - } else if (!std::holds_alternative(indexInfo->SpecializedIndexDescription)) { - return {}; + switch (indexInfo->Type) { + case NKikimrSchemeOp::EIndexTypeGlobal: + case NKikimrSchemeOp::EIndexTypeGlobalAsync: + case NKikimrSchemeOp::EIndexTypeGlobalUnique: + // no specialized index description + Y_ASSERT(std::holds_alternative(indexInfo->SpecializedIndexDescription)); + break; + case NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree: + *operation->MutableVectorIndexKmeansTreeDescription() = + std::get(indexInfo->SpecializedIndexDescription); + break; + case NKikimrSchemeOp::EIndexTypeGlobalFulltext: + *operation->MutableFulltextIndexDescription() = + std::get(indexInfo->SpecializedIndexDescription); + break; + default: + return {}; // reject } return scheme; diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_copy_table.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_copy_table.cpp index e8103b6a4e60..accf49d53c62 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_copy_table.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_copy_table.cpp @@ -852,12 +852,24 @@ TVector CreateCopyTable(TOperationId nextId, const TTxTrans for (const auto& dataColumn: indexInfo->IndexDataColumns) { *operation->MutableDataColumnNames()->Add() = dataColumn; } - if (indexInfo->Type == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree) { - *operation->MutableVectorIndexKmeansTreeDescription() = - std::get(indexInfo->SpecializedIndexDescription); - } else if (!std::holds_alternative(indexInfo->SpecializedIndexDescription)) { - return {CreateReject(nextId, NKikimrScheme::EStatus::StatusInvalidParameter, - TStringBuilder{} << "Copy table doesn't support table with index type " << indexInfo->Type)}; + + switch (indexInfo->Type) { + case NKikimrSchemeOp::EIndexTypeGlobal: + case NKikimrSchemeOp::EIndexTypeGlobalAsync: + case NKikimrSchemeOp::EIndexTypeGlobalUnique: + // no specialized index description + Y_ASSERT(std::holds_alternative(indexInfo->SpecializedIndexDescription)); + break; + case NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree: + *operation->MutableVectorIndexKmeansTreeDescription() = + std::get(indexInfo->SpecializedIndexDescription); + break; + case NKikimrSchemeOp::EIndexTypeGlobalFulltext: + *operation->MutableFulltextIndexDescription() = + std::get(indexInfo->SpecializedIndexDescription); + break; + default: + return {CreateReject(nextId, NKikimrScheme::EStatus::StatusInvalidParameter, InvalidIndexType(indexInfo->Type))}; } result.push_back(CreateNewTableIndex(NextPartId(nextId, result), schema)); diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_create_build_index.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_create_build_index.cpp index a135183dfbcf..133d9670700a 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_create_build_index.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_create_build_index.cpp @@ -40,6 +40,30 @@ TVector CreateBuildIndex(TOperationId opId, const TTxTransa const auto& op = tx.GetInitiateIndexBuild(); const auto& indexDesc = op.GetIndex(); + switch (GetIndexType(indexDesc)) { + case NKikimrSchemeOp::EIndexTypeGlobal: + case NKikimrSchemeOp::EIndexTypeGlobalAsync: + // no feature flag, everything is fine + break; + case NKikimrSchemeOp::EIndexTypeGlobalUnique: + if (!context.SS->EnableInitialUniqueIndex) { + return {CreateReject(opId, NKikimrScheme::EStatus::StatusPreconditionFailed, "Adding a unique index to an existing table is disabled")}; + } + break; + case NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree: + if (!context.SS->EnableVectorIndex) { + return {CreateReject(opId, NKikimrScheme::EStatus::StatusPreconditionFailed, "Vector index support is disabled")}; + } + break; + case NKikimrSchemeOp::EIndexTypeGlobalFulltext: + if (!context.SS->EnableFulltextIndex) { + return {CreateReject(opId, NKikimrScheme::EStatus::StatusPreconditionFailed, "Fulltext index support is disabled")}; + } + break; + default: + return {CreateReject(opId, NKikimrScheme::EStatus::StatusPreconditionFailed, InvalidIndexType(indexDesc.GetType()))}; + } + const auto table = TPath::Resolve(op.GetTable(), context.SS); const auto index = table.Child(indexDesc.GetName()); { @@ -93,15 +117,14 @@ TVector CreateBuildIndex(TOperationId opId, const TTxTransa } TVector result; - const NKikimrSchemeOp::EIndexType indexType = indexDesc.HasType() ? indexDesc.GetType() : NKikimrSchemeOp::EIndexTypeGlobal; { auto outTx = TransactionTemplate(table.PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpCreateTableIndex); *outTx.MutableLockGuard() = tx.GetLockGuard(); outTx.MutableCreateTableIndex()->CopyFrom(indexDesc); + outTx.MutableCreateTableIndex()->SetType(GetIndexType(indexDesc)); outTx.MutableCreateTableIndex()->SetState(NKikimrSchemeOp::EIndexStateWriteOnly); outTx.SetInternal(tx.GetInternal()); - outTx.MutableCreateTableIndex()->SetType(indexType); result.push_back(CreateNewTableIndex(NextPartId(opId, result), outTx)); } @@ -118,7 +141,7 @@ TVector CreateBuildIndex(TOperationId opId, const TTxTransa } auto createImplTable = [&](NKikimrSchemeOp::TTableDescription&& implTableDesc) { - if (indexType != NKikimrSchemeOp::EIndexTypeGlobalUnique) { + if (GetIndexType(indexDesc) != NKikimrSchemeOp::EIndexTypeGlobalUnique) { implTableDesc.MutablePartitionConfig()->SetShadowData(true); } @@ -129,34 +152,56 @@ TVector CreateBuildIndex(TOperationId opId, const TTxTransa return CreateInitializeBuildIndexImplTable(NextPartId(opId, result), outTx); }; - if (indexDesc.GetType() == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree) { - const bool prefixVectorIndex = indexDesc.GetKeyColumnNames().size() > 1; - NKikimrSchemeOp::TTableDescription indexLevelTableDesc, indexPostingTableDesc, indexPrefixTableDesc; - // TODO After IndexImplTableDescriptions are persisted, this should be replaced with Y_ABORT_UNLESS - if (indexDesc.IndexImplTableDescriptionsSize() == 2 + prefixVectorIndex) { - indexLevelTableDesc = indexDesc.GetIndexImplTableDescriptions(0); - indexPostingTableDesc = indexDesc.GetIndexImplTableDescriptions(1); - if (prefixVectorIndex) { - indexPrefixTableDesc = indexDesc.GetIndexImplTableDescriptions(2); + switch (GetIndexType(indexDesc)) { + case NKikimrSchemeOp::EIndexTypeGlobal: + case NKikimrSchemeOp::EIndexTypeGlobalAsync: + case NKikimrSchemeOp::EIndexTypeGlobalUnique: { + NKikimrSchemeOp::TTableDescription indexTableDesc; + // TODO After IndexImplTableDescriptions are persisted, this should be replaced with Y_ABORT_UNLESS + if (indexDesc.IndexImplTableDescriptionsSize() == 1) { + indexTableDesc = indexDesc.GetIndexImplTableDescriptions(0); } + auto implTableDesc = CalcImplTableDesc(tableInfo, implTableColumns, indexTableDesc); + // TODO if keep erase markers also speedup compaction or something else we can enable it for other impl tables too + implTableDesc.MutablePartitionConfig()->MutableCompactionPolicy()->SetKeepEraseMarkers(true); + result.push_back(createImplTable(std::move(implTableDesc))); + break; } - const THashSet indexDataColumns{indexDesc.GetDataColumnNames().begin(), indexDesc.GetDataColumnNames().end()}; - result.push_back(createImplTable(CalcVectorKmeansTreeLevelImplTableDesc(tableInfo->PartitionConfig(), indexLevelTableDesc))); - result.push_back(createImplTable(CalcVectorKmeansTreePostingImplTableDesc(tableInfo, tableInfo->PartitionConfig(), indexDataColumns, indexPostingTableDesc))); - if (prefixVectorIndex) { - const THashSet prefixColumns{indexDesc.GetKeyColumnNames().begin(), indexDesc.GetKeyColumnNames().end() - 1}; - result.push_back(createImplTable(CalcVectorKmeansTreePrefixImplTableDesc(prefixColumns, tableInfo, tableInfo->PartitionConfig(), implTableColumns, indexPrefixTableDesc))); + case NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree: { + const bool prefixVectorIndex = indexDesc.GetKeyColumnNames().size() > 1; + NKikimrSchemeOp::TTableDescription indexLevelTableDesc, indexPostingTableDesc, indexPrefixTableDesc; + // TODO After IndexImplTableDescriptions are persisted, this should be replaced with Y_ABORT_UNLESS + if (indexDesc.IndexImplTableDescriptionsSize() == 2 + prefixVectorIndex) { + indexLevelTableDesc = indexDesc.GetIndexImplTableDescriptions(0); + indexPostingTableDesc = indexDesc.GetIndexImplTableDescriptions(1); + if (prefixVectorIndex) { + indexPrefixTableDesc = indexDesc.GetIndexImplTableDescriptions(2); + } + } + const THashSet indexDataColumns{indexDesc.GetDataColumnNames().begin(), indexDesc.GetDataColumnNames().end()}; + result.push_back(createImplTable(CalcVectorKmeansTreeLevelImplTableDesc(tableInfo->PartitionConfig(), indexLevelTableDesc))); + result.push_back(createImplTable(CalcVectorKmeansTreePostingImplTableDesc(tableInfo, tableInfo->PartitionConfig(), indexDataColumns, indexPostingTableDesc))); + if (prefixVectorIndex) { + const THashSet prefixColumns{indexDesc.GetKeyColumnNames().begin(), indexDesc.GetKeyColumnNames().end() - 1}; + result.push_back(createImplTable(CalcVectorKmeansTreePrefixImplTableDesc(prefixColumns, tableInfo, tableInfo->PartitionConfig(), implTableColumns, indexPrefixTableDesc))); + } + break; } - } else { - NKikimrSchemeOp::TTableDescription indexTableDesc; - // TODO After IndexImplTableDescriptions are persisted, this should be replaced with Y_ABORT_UNLESS - if (indexDesc.IndexImplTableDescriptionsSize() == 1) { - indexTableDesc = indexDesc.GetIndexImplTableDescriptions(0); + case NKikimrSchemeOp::EIndexTypeGlobalFulltext: { + NKikimrSchemeOp::TTableDescription indexTableDesc; + // TODO After IndexImplTableDescriptions are persisted, this should be replaced with Y_ABORT_UNLESS + if (indexDesc.IndexImplTableDescriptionsSize() == 1) { + indexTableDesc = indexDesc.GetIndexImplTableDescriptions(0); + } + const THashSet indexDataColumns{indexDesc.GetDataColumnNames().begin(), indexDesc.GetDataColumnNames().end()}; + auto implTableDesc = CalcFulltextImplTableDesc(tableInfo, tableInfo->PartitionConfig(), indexDataColumns, indexTableDesc); + implTableDesc.MutablePartitionConfig()->MutableCompactionPolicy()->SetKeepEraseMarkers(true); + result.push_back(createImplTable(std::move(implTableDesc))); + break; } - auto implTableDesc = CalcImplTableDesc(tableInfo, implTableColumns, indexTableDesc); - // TODO if keep erase markers also speedup compaction or something else we can enable it for other impl tables too - implTableDesc.MutablePartitionConfig()->MutableCompactionPolicy()->SetKeepEraseMarkers(true); - result.push_back(createImplTable(std::move(implTableDesc))); + default: + Y_DEBUG_ABORT_S(NTableIndex::InvalidIndexType(indexDesc.GetType())); + break; } return result; diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_create_indexed_table.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_create_indexed_table.cpp index 6ef3046f5df3..837b64748184 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_create_indexed_table.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_create_indexed_table.cpp @@ -127,6 +127,31 @@ TVector CreateIndexedTable(TOperationId nextId, const TTxTr TTableColumns baseTableColumns = ExtractInfo(baseTableDescription); for (auto& indexDescription: indexedTable.GetIndexDescription()) { const auto& indexName = indexDescription.GetName(); + + switch (GetIndexType(indexDescription)) { + case NKikimrSchemeOp::EIndexTypeGlobal: + case NKikimrSchemeOp::EIndexTypeGlobalAsync: + // no feature flag, everything is fine + break; + case NKikimrSchemeOp::EIndexTypeGlobalUnique: + if (!context.SS->EnableInitialUniqueIndex) { + return {CreateReject(nextId, NKikimrScheme::EStatus::StatusPreconditionFailed, "Unique constraint feature is disabled")}; + } + break; + case NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree: + if (!context.SS->EnableVectorIndex) { + return {CreateReject(nextId, NKikimrScheme::EStatus::StatusPreconditionFailed, "Vector index support is disabled")}; + } + break; + case NKikimrSchemeOp::EIndexTypeGlobalFulltext: + if (!context.SS->EnableFulltextIndex) { + return {CreateReject(nextId, NKikimrScheme::EStatus::StatusPreconditionFailed, "Fulltext index support is disabled")}; + } + break; + default: + return {CreateReject(nextId, NKikimrScheme::EStatus::StatusPreconditionFailed, InvalidIndexType(indexDescription.GetType()))}; + } + bool uniformIndexTable = false; if (indexDescription.IndexImplTableDescriptionsSize()) { if (indexDescription.GetIndexImplTableDescriptions(0).HasUniformPartitionsCount()) { @@ -241,29 +266,6 @@ TVector CreateIndexedTable(TOperationId nextId, const TTxTr } for (auto& indexDescription: indexedTable.GetIndexDescription()) { - const auto indexType = indexDescription.HasType() - ? indexDescription.GetType() - : NKikimrSchemeOp::EIndexTypeGlobal; - - switch (indexType) { - case NKikimrSchemeOp::EIndexTypeInvalid: - return {CreateReject(nextId, NKikimrScheme::EStatus::StatusPreconditionFailed, "Invalid index type")}; - case NKikimrSchemeOp::EIndexTypeGlobal: - case NKikimrSchemeOp::EIndexTypeGlobalAsync: - // no feature flag, everything is fine - break; - case NKikimrSchemeOp::EIndexTypeGlobalUnique: - if (!context.SS->EnableInitialUniqueIndex) { - return {CreateReject(nextId, NKikimrScheme::EStatus::StatusPreconditionFailed, "Unique constraint feature is disabled")}; - } - break; - case NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree: - if (!context.SS->EnableVectorIndex) { - return {CreateReject(nextId, NKikimrScheme::EStatus::StatusPreconditionFailed, "Vector index support is disabled")}; - } - break; - } - { auto scheme = TransactionTemplate( tx.GetWorkingDir() + "/" + baseTableDescription.GetName(), @@ -273,7 +275,7 @@ TVector CreateIndexedTable(TOperationId nextId, const TTxTr scheme.SetInternal(tx.GetInternal()); scheme.MutableCreateTableIndex()->CopyFrom(indexDescription); - scheme.MutableCreateTableIndex()->SetType(indexType); + scheme.MutableCreateTableIndex()->SetType(GetIndexType(indexDescription)); result.push_back(CreateNewTableIndex(NextPartId(nextId, result), scheme)); } @@ -292,32 +294,51 @@ TVector CreateIndexedTable(TOperationId nextId, const TTxTr }; const auto& implTableColumns = indexes.at(indexDescription.GetName()); - if (indexDescription.GetType() == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree) { - const bool prefixVectorIndex = indexDescription.GetKeyColumnNames().size() > 1; - NKikimrSchemeOp::TTableDescription userLevelDesc, userPostingDesc, userPrefixDesc; - if (indexDescription.IndexImplTableDescriptionsSize() == 2 + prefixVectorIndex) { - // This description provided by user to override partition policy - userLevelDesc = indexDescription.GetIndexImplTableDescriptions(0); - userPostingDesc = indexDescription.GetIndexImplTableDescriptions(1); - if (prefixVectorIndex) { - userPrefixDesc = indexDescription.GetIndexImplTableDescriptions(2); + switch (GetIndexType(indexDescription)) { + case NKikimrSchemeOp::EIndexTypeGlobal: + case NKikimrSchemeOp::EIndexTypeGlobalAsync: + case NKikimrSchemeOp::EIndexTypeGlobalUnique: { + NKikimrSchemeOp::TTableDescription userIndexDesc; + if (indexDescription.IndexImplTableDescriptionsSize()) { + // This description provided by user to override partition policy + userIndexDesc = indexDescription.GetIndexImplTableDescriptions(0); } + result.push_back(createIndexImplTable(CalcImplTableDesc(baseTableDescription, implTableColumns, userIndexDesc))); + break; } - const THashSet indexDataColumns{indexDescription.GetDataColumnNames().begin(), indexDescription.GetDataColumnNames().end()}; - result.push_back(createIndexImplTable(CalcVectorKmeansTreeLevelImplTableDesc(baseTableDescription.GetPartitionConfig(), userLevelDesc))); - result.push_back(createIndexImplTable(CalcVectorKmeansTreePostingImplTableDesc(baseTableDescription, baseTableDescription.GetPartitionConfig(), indexDataColumns, userPostingDesc))); - if (prefixVectorIndex) { - const THashSet prefixColumns{indexDescription.GetKeyColumnNames().begin(), indexDescription.GetKeyColumnNames().end() - 1}; - result.push_back(createIndexImplTable(CalcVectorKmeansTreePrefixImplTableDesc(prefixColumns, baseTableDescription, baseTableDescription.GetPartitionConfig(), implTableColumns, userPrefixDesc))); + case NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree: { + const bool prefixVectorIndex = indexDescription.GetKeyColumnNames().size() > 1; + NKikimrSchemeOp::TTableDescription userLevelDesc, userPostingDesc, userPrefixDesc; + if (indexDescription.IndexImplTableDescriptionsSize() == 2 + prefixVectorIndex) { + // This description provided by user to override partition policy + userLevelDesc = indexDescription.GetIndexImplTableDescriptions(0); + userPostingDesc = indexDescription.GetIndexImplTableDescriptions(1); + if (prefixVectorIndex) { + userPrefixDesc = indexDescription.GetIndexImplTableDescriptions(2); + } + } + const THashSet indexDataColumns{indexDescription.GetDataColumnNames().begin(), indexDescription.GetDataColumnNames().end()}; + result.push_back(createIndexImplTable(CalcVectorKmeansTreeLevelImplTableDesc(baseTableDescription.GetPartitionConfig(), userLevelDesc))); + result.push_back(createIndexImplTable(CalcVectorKmeansTreePostingImplTableDesc(baseTableDescription, baseTableDescription.GetPartitionConfig(), indexDataColumns, userPostingDesc))); + if (prefixVectorIndex) { + const THashSet prefixColumns{indexDescription.GetKeyColumnNames().begin(), indexDescription.GetKeyColumnNames().end() - 1}; + result.push_back(createIndexImplTable(CalcVectorKmeansTreePrefixImplTableDesc(prefixColumns, baseTableDescription, baseTableDescription.GetPartitionConfig(), implTableColumns, userPrefixDesc))); + } + break; } - } else { - NKikimrSchemeOp::TTableDescription userIndexDesc; - if (indexDescription.IndexImplTableDescriptionsSize()) { - // This description provided by user to override partition policy - userIndexDesc = indexDescription.GetIndexImplTableDescriptions(0); + case NKikimrSchemeOp::EIndexTypeGlobalFulltext: { + NKikimrSchemeOp::TTableDescription userIndexDesc; + if (indexDescription.IndexImplTableDescriptionsSize()) { + // This description provided by user to override partition policy + userIndexDesc = indexDescription.GetIndexImplTableDescriptions(0); + } + const THashSet indexDataColumns{indexDescription.GetDataColumnNames().begin(), indexDescription.GetDataColumnNames().end()}; + result.push_back(createIndexImplTable(CalcFulltextImplTableDesc(baseTableDescription, baseTableDescription.GetPartitionConfig(), indexDataColumns, userIndexDesc))); + break; } - - result.push_back(createIndexImplTable(CalcImplTableDesc(baseTableDescription, implTableColumns, userIndexDesc))); + default: + Y_DEBUG_ABORT_S(NTableIndex::InvalidIndexType(indexDescription.GetType())); + break; } } diff --git a/ydb/core/tx/schemeshard/schemeshard_build_index.cpp b/ydb/core/tx/schemeshard/schemeshard_build_index.cpp index 65d1401d0c49..8cb8de853d30 100644 --- a/ydb/core/tx/schemeshard/schemeshard_build_index.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_build_index.cpp @@ -1,6 +1,7 @@ #include "schemeshard_build_index.h" #include "schemeshard_impl.h" +#include namespace NKikimr { namespace NSchemeShard { @@ -95,11 +96,25 @@ void TSchemeShard::PersistCreateBuildIndex(NIceDb::TNiceDb& db, const TIndexBuil *serializableRepresentation.AddIndexImplTableDescriptions() = description; } - std::visit([&](const T& specializedDescription) { - if constexpr (std::is_same_v) { - *serializableRepresentation.MutableVectorIndexKmeansTreeDescription() = specializedDescription; - } - }, info.SpecializedIndexDescription); + switch (info.IndexType) { + case NKikimrSchemeOp::EIndexTypeGlobal: + case NKikimrSchemeOp::EIndexTypeGlobalAsync: + case NKikimrSchemeOp::EIndexTypeGlobalUnique: + // no specialized index description + Y_ASSERT(std::holds_alternative(info.SpecializedIndexDescription)); + break; + case NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree: + *serializableRepresentation.MutableVectorIndexKmeansTreeDescription() = + std::get(info.SpecializedIndexDescription); + break; + case NKikimrSchemeOp::EIndexTypeGlobalFulltext: + *serializableRepresentation.MutableFulltextIndexDescription() = + std::get(info.SpecializedIndexDescription); + break; + default: + Y_DEBUG_ABORT_S(NTableIndex::InvalidIndexType(info.IndexType)); + break; + } persistedBuildIndex.Update( NIceDb::TUpdate(serializableRepresentation.SerializeAsString()) diff --git a/ydb/core/tx/schemeshard/schemeshard_build_index__create.cpp b/ydb/core/tx/schemeshard/schemeshard_build_index__create.cpp index af657956498a..e3c58897e201 100644 --- a/ydb/core/tx/schemeshard/schemeshard_build_index__create.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_build_index__create.cpp @@ -5,6 +5,7 @@ #include "schemeshard_utils.h" // for NTableIndex::CommonCheck #include "schemeshard_xxport__helpers.h" +#include #include namespace NKikimr::NSchemeShard { @@ -157,6 +158,11 @@ class TSchemeShard::TIndexBuilder::TTxCreate: public TSchemeShard::TIndexBuilder return makeReply(explain); } + if (tableInfo->IsTTLEnabled() && !DoesIndexSupportTTL(buildInfo->IndexType)) { + return Reply(Ydb::StatusIds::PRECONDITION_FAILED, + TStringBuilder() << buildInfo->IndexType << " index doesn't support TTL"); + } + NKikimrSchemeOp::TIndexBuildConfig tmpConfig; buildInfo->SerializeToProto(Self, &tmpConfig); const auto indexDesc = tmpConfig.GetIndex(); @@ -234,7 +240,7 @@ class TSchemeShard::TIndexBuilder::TTxCreate: public TSchemeShard::TIndexBuilder buildInfo.BuildKind = TIndexBuildInfo::EBuildKind::BuildSecondaryIndex; buildInfo.IndexType = NKikimrSchemeOp::EIndexType::EIndexTypeGlobalAsync; break; - case Ydb::Table::TableIndex::TypeCase::kGlobalUniqueIndex: + case Ydb::Table::TableIndex::TypeCase::kGlobalUniqueIndex: { if (!Self->EnableAddUniqueIndex) { explain = "Adding a unique index to an existing table is disabled"; return false; @@ -242,6 +248,7 @@ class TSchemeShard::TIndexBuilder::TTxCreate: public TSchemeShard::TIndexBuilder buildInfo.BuildKind = TIndexBuildInfo::EBuildKind::BuildSecondaryUniqueIndex; buildInfo.IndexType = NKikimrSchemeOp::EIndexType::EIndexTypeGlobalUnique; break; + } case Ydb::Table::TableIndex::TypeCase::kGlobalVectorKmeansTreeIndex: { if (!Self->EnableVectorIndex) { explain = "Vector index support is disabled"; @@ -264,8 +271,13 @@ class TSchemeShard::TIndexBuilder::TTxCreate: public TSchemeShard::TIndexBuilder break; } case Ydb::Table::TableIndex::TypeCase::kGlobalFulltextIndex: { - explain = "Fulltext index support is disabled"; - return false; + if (!Self->EnableFulltextIndex) { + explain = "Fulltext index support is disabled"; + return false; + } + buildInfo.BuildKind = TIndexBuildInfo::EBuildKind::BuildFulltext; + buildInfo.IndexType = NKikimrSchemeOp::EIndexType::EIndexTypeGlobalFulltext; + break; } }; diff --git a/ydb/core/tx/schemeshard/schemeshard_build_index_tx_base.cpp b/ydb/core/tx/schemeshard/schemeshard_build_index_tx_base.cpp index 771239e89d89..675b2cde48ef 100644 --- a/ydb/core/tx/schemeshard/schemeshard_build_index_tx_base.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_build_index_tx_base.cpp @@ -301,8 +301,11 @@ void TSchemeShard::TIndexBuilder::TTxBase::Fill(NKikimrIndexBuilder::TIndexBuild case NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree: *index.mutable_global_vector_kmeans_tree_index() = Ydb::Table::GlobalVectorKMeansTreeIndex(); break; + case NKikimrSchemeOp::EIndexType::EIndexTypeGlobalFulltext: + *index.mutable_global_fulltext_index() = Ydb::Table::GlobalFulltextIndex(); + break; default: - Y_ABORT("Unreachable"); + Y_ENSURE(false, InvalidIndexType(info.IndexType)); } } else if (info.IsBuildColumns()) { for(const auto& column : info.BuildColumns) { diff --git a/ydb/core/tx/schemeshard/schemeshard_impl.cpp b/ydb/core/tx/schemeshard/schemeshard_impl.cpp index ecc7f006b675..9bd9e60c5bbb 100644 --- a/ydb/core/tx/schemeshard/schemeshard_impl.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_impl.cpp @@ -5041,6 +5041,7 @@ void TSchemeShard::OnActivateExecutor(const TActorContext &ctx) { EnableVectorIndex = appData->FeatureFlags.GetEnableVectorIndex(); EnableInitialUniqueIndex = appData->FeatureFlags.GetEnableUniqConstraint(); EnableAddUniqueIndex = appData->FeatureFlags.GetEnableAddUniqueIndex(); + EnableFulltextIndex = appData->FeatureFlags.GetEnableFulltextIndex(); EnableResourcePoolsOnServerless = appData->FeatureFlags.GetEnableResourcePoolsOnServerless(); EnableExternalDataSourcesOnServerless = appData->FeatureFlags.GetEnableExternalDataSourcesOnServerless(); EnableShred = appData->FeatureFlags.GetEnableDataErasure(); @@ -7762,6 +7763,7 @@ void TSchemeShard::ApplyConsoleConfigs(const NKikimrConfig::TFeatureFlags& featu EnableVectorIndex = featureFlags.GetEnableVectorIndex(); EnableInitialUniqueIndex = featureFlags.GetEnableUniqConstraint(); EnableAddUniqueIndex = featureFlags.GetEnableAddUniqueIndex(); + EnableFulltextIndex = featureFlags.GetEnableFulltextIndex(); EnableExternalDataSourcesOnServerless = featureFlags.GetEnableExternalDataSourcesOnServerless(); EnableShred = featureFlags.GetEnableDataErasure(); EnableExternalSourceSchemaInference = featureFlags.GetEnableExternalSourceSchemaInference(); diff --git a/ydb/core/tx/schemeshard/schemeshard_impl.h b/ydb/core/tx/schemeshard/schemeshard_impl.h index b05cdccce12b..79a22610e74a 100644 --- a/ydb/core/tx/schemeshard/schemeshard_impl.h +++ b/ydb/core/tx/schemeshard/schemeshard_impl.h @@ -358,6 +358,7 @@ class TSchemeShard bool EnableVectorIndex = false; bool EnableInitialUniqueIndex = false; bool EnableAddUniqueIndex = false; + bool EnableFulltextIndex = false; bool EnableExternalDataSourcesOnServerless = false; bool EnableShred = false; bool EnableExternalSourceSchemaInference = false; diff --git a/ydb/core/tx/schemeshard/schemeshard_info_types.cpp b/ydb/core/tx/schemeshard/schemeshard_info_types.cpp index 41ba2c60bfca..237f71af5a84 100644 --- a/ydb/core/tx/schemeshard/schemeshard_info_types.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_info_types.cpp @@ -5,10 +5,12 @@ #include #include +#include #include #include #include #include +#include #include #include #include @@ -617,8 +619,8 @@ TTableInfo::TAlterDataPtr TTableInfo::CreateAlterData( if (op.HasTTLSettings()) { for (const auto& indexDescription : op.GetTableIndexes()) { - if (indexDescription.GetType() == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree) { - errStr = "Table with vector indexes doesn't support TTL"; + if (!DoesIndexSupportTTL(indexDescription.GetType())) { + errStr = TStringBuilder() << "Table with " << indexDescription.GetType() << " index doesn't support TTL"; return nullptr; } } @@ -2289,8 +2291,22 @@ void TIndexBuildInfo::SerializeToProto(TSchemeShard* ss, NKikimrSchemeOp::TIndex ImplTableDescriptions.end() }; - if (IndexType == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree) { - *index.MutableVectorIndexKmeansTreeDescription() = std::get(SpecializedIndexDescription); + switch (IndexType) { + case NKikimrSchemeOp::EIndexTypeGlobal: + case NKikimrSchemeOp::EIndexTypeGlobalAsync: + case NKikimrSchemeOp::EIndexTypeGlobalUnique: + // no specialized index description + Y_ASSERT(std::holds_alternative(SpecializedIndexDescription)); + break; + case NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree: + *index.MutableVectorIndexKmeansTreeDescription() = std::get(SpecializedIndexDescription); + break; + case NKikimrSchemeOp::EIndexTypeGlobalFulltext: + *index.MutableFulltextIndexDescription() = std::get(SpecializedIndexDescription); + break; + default: + Y_DEBUG_ABORT_S(InvalidIndexType(IndexType)); + break; } } diff --git a/ydb/core/tx/schemeshard/schemeshard_info_types.h b/ydb/core/tx/schemeshard/schemeshard_info_types.h index d1782cbd8ca3..39b9eb963fd5 100644 --- a/ydb/core/tx/schemeshard/schemeshard_info_types.h +++ b/ydb/core/tx/schemeshard/schemeshard_info_types.h @@ -8,6 +8,8 @@ #include "schemeshard_tx_infly.h" #include "schemeshard_types.h" +#include +#include #include #include #include @@ -2443,9 +2445,30 @@ struct TTableIndexInfo : public TSimpleRefCount { , Type(type) , State(state) { - if (type == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree) { - Y_ENSURE(SpecializedIndexDescription.emplace() - .ParseFromString(description)); + switch (type) { + case NKikimrSchemeOp::EIndexTypeGlobal: + case NKikimrSchemeOp::EIndexTypeGlobalAsync: + case NKikimrSchemeOp::EIndexTypeGlobalUnique: + // no specialized index description + Y_ASSERT(description.empty()); + break; + case NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree: { + auto success = SpecializedIndexDescription + .emplace() + .ParseFromString(description); + Y_ENSURE(success, description); + break; + } + case NKikimrSchemeOp::EIndexTypeGlobalFulltext: { + auto success = SpecializedIndexDescription + .emplace() + .ParseFromString(description); + Y_ENSURE(success, description); + break; + } + default: + Y_DEBUG_ABORT_S(NTableIndex::InvalidIndexType(type)); + break; } } @@ -2494,8 +2517,21 @@ struct TTableIndexInfo : public TSimpleRefCount { alterData->State = config.HasState() ? config.GetState() : EState::EIndexStateReady; - if (config.GetType() == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree) { - alterData->SpecializedIndexDescription = config.GetVectorIndexKmeansTreeDescription(); + switch (GetIndexType(config)) { + case NKikimrSchemeOp::EIndexTypeGlobal: + case NKikimrSchemeOp::EIndexTypeGlobalAsync: + case NKikimrSchemeOp::EIndexTypeGlobalUnique: + // no specialized index description + break; + case NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree: + alterData->SpecializedIndexDescription = config.GetVectorIndexKmeansTreeDescription(); + break; + case NKikimrSchemeOp::EIndexTypeGlobalFulltext: + alterData->SpecializedIndexDescription = config.GetFulltextIndexDescription(); + break; + default: + errMsg += InvalidIndexType(config.GetType()); + return nullptr; } return result; @@ -2510,7 +2546,9 @@ struct TTableIndexInfo : public TSimpleRefCount { TTableIndexInfo::TPtr AlterData = nullptr; - std::variant SpecializedIndexDescription; + std::variant SpecializedIndexDescription; }; struct TCdcStreamSettings { @@ -3112,6 +3150,7 @@ struct TIndexBuildInfo: public TSimpleRefCount { BuildPrefixedVectorIndex = 12, BuildSecondaryUniqueIndex = 13, BuildColumns = 20, + BuildFulltext = 30, }; TActorId CreateSender; @@ -3140,7 +3179,9 @@ struct TIndexBuildInfo: public TSimpleRefCount { TString TargetName; TVector ImplTableDescriptions; - std::variant SpecializedIndexDescription; + std::variant SpecializedIndexDescription; struct TKMeans { // TODO(mbkkt) move to TVectorIndexKmeansTreeDescription @@ -3590,11 +3631,17 @@ struct TIndexBuildInfo: public TSimpleRefCount { indexInfo->Clusters = NKikimr::NKMeans::CreateClusters(desc.settings().settings(), indexInfo->KMeans.Rounds, createError); Y_ENSURE(indexInfo->Clusters, createError); indexInfo->SpecializedIndexDescription = std::move(desc); - } break; + break; + } + case NKikimrSchemeOp::TIndexCreationConfig::kFulltextIndexDescription: { + auto& desc = *creationConfig.MutableFulltextIndexDescription(); + indexInfo->SpecializedIndexDescription = std::move(desc); + break; + } case NKikimrSchemeOp::TIndexCreationConfig::SPECIALIZEDINDEXDESCRIPTION_NOT_SET: /* do nothing */ break; - } + } } LOG_DEBUG_S(TlsActivationContext->AsActorContext(), NKikimrServices::BUILD_INDEX, diff --git a/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp b/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp index a95baf0632da..f47317b534f6 100644 --- a/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp @@ -1,5 +1,6 @@ #include "schemeshard_path_describer.h" +#include #include #include @@ -216,8 +217,10 @@ TPathElement::EPathSubType TPathDescriber::CalcPathSubType(const TPath& path) { return TPathElement::EPathSubType::EPathSubTypeSyncIndexImplTable; case NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree: return TPathElement::EPathSubType::EPathSubTypeVectorKmeansTreeIndexImplTable; + case NKikimrSchemeOp::EIndexTypeGlobalFulltext: + return TPathElement::EPathSubType::EPathSubTypeFulltextIndexImplTable; default: - Y_DEBUG_ABORT("%s", (TStringBuilder() << "unexpected indexInfo->Type# " << indexInfo->Type).data()); + Y_DEBUG_ABORT_S(NTableIndex::InvalidIndexType(indexInfo->Type)); return TPathElement::EPathSubType::EPathSubTypeEmpty; } } else if (parentPath.IsCdcStream()) { @@ -1456,14 +1459,23 @@ void TSchemeShard::DescribeTableIndex(const TPathId& pathId, const TString& name } entry.SetDataSize(dataSize); - if (indexInfo->Type == NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree) { - if (const auto* vectorIndexKmeansTreeDescription = std::get_if(&indexInfo->SpecializedIndexDescription)) { - *entry.MutableVectorIndexKmeansTreeDescription() = *vectorIndexKmeansTreeDescription; - } else { - Y_FAIL_S("SpecializedIndexDescription should be set"); - } + switch (indexInfo->Type) { + case NKikimrSchemeOp::EIndexTypeGlobal: + case NKikimrSchemeOp::EIndexTypeGlobalAsync: + case NKikimrSchemeOp::EIndexTypeGlobalUnique: + // no specialized index description + Y_ASSERT(std::holds_alternative(indexInfo->SpecializedIndexDescription)); + break; + case NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree: + *entry.MutableVectorIndexKmeansTreeDescription() = std::get(indexInfo->SpecializedIndexDescription); + break; + case NKikimrSchemeOp::EIndexTypeGlobalFulltext: + *entry.MutableFulltextIndexDescription() = std::get(indexInfo->SpecializedIndexDescription); + break; + default: + Y_DEBUG_ABORT_S(NTableIndex::InvalidIndexType(indexInfo->Type)); + break; } - } void TSchemeShard::DescribeCdcStream(const TPathId& pathId, const TString& name, diff --git a/ydb/core/tx/schemeshard/schemeshard_schema.h b/ydb/core/tx/schemeshard/schemeshard_schema.h index 9535551467e6..29dc797882bb 100644 --- a/ydb/core/tx/schemeshard/schemeshard_schema.h +++ b/ydb/core/tx/schemeshard/schemeshard_schema.h @@ -17,6 +17,9 @@ namespace NKikimr::NSchemeShard { inline constexpr auto ClusterIdTypeId = NScheme::NTypeIds::Uint64; +// TODO: support utf-8 in fulltext index +inline constexpr auto TokenTypeId = NScheme::NTypeIds::String; + struct Schema : NIceDb::Schema { struct Paths : Table<1> { struct Id : Column<1, NScheme::NTypeIds::Uint64> { using Type = TLocalPathId; }; diff --git a/ydb/core/tx/schemeshard/schemeshard_utils.cpp b/ydb/core/tx/schemeshard/schemeshard_utils.cpp index 1d1d6b8d66b8..e3062e3f3727 100644 --- a/ydb/core/tx/schemeshard/schemeshard_utils.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_utils.cpp @@ -315,6 +315,36 @@ auto CalcVectorKmeansTreePrefixImplTableDescImpl( return implTableDesc; } +auto CalcFulltextImplTableDescImpl( + const auto& baseTable, + const NKikimrSchemeOp::TPartitionConfig& baseTablePartitionConfig, + const THashSet& indexDataColumns, + const NKikimrSchemeOp::TTableDescription& indexTableDesc) +{ + auto tableColumns = ExtractInfo(baseTable); + THashSet indexColumns = indexDataColumns; + for (const auto & keyColumn: tableColumns.Keys) { + indexColumns.insert(keyColumn); + } + + NKikimrSchemeOp::TTableDescription implTableDesc; + implTableDesc.SetName(NTableIndex::ImplTable); + SetImplTablePartitionConfig(baseTablePartitionConfig, indexTableDesc, implTableDesc); + { + auto tokenColumn = implTableDesc.AddColumns(); + tokenColumn->SetName(NFulltext::TokenColumn); + tokenColumn->SetType(NFulltext::TokenTypeName); + tokenColumn->SetTypeId(NSchemeShard::TokenTypeId); + tokenColumn->SetNotNull(true); + } + implTableDesc.AddKeyColumnNames(NFulltext::TokenColumn); + FillIndexImplTableColumns(GetColumns(baseTable), tableColumns.Keys, indexColumns, implTableDesc); + + implTableDesc.SetSystemColumnNamesAllowed(true); + + return implTableDesc; +} + } void FillIndexTableColumns( @@ -421,6 +451,24 @@ NKikimrSchemeOp::TTableDescription CalcVectorKmeansTreePrefixImplTableDesc( return CalcVectorKmeansTreePrefixImplTableDescImpl(indexKeyColumns, baseTableDescr, baseTablePartitionConfig, implTableColumns, indexTableDesc); } +NKikimrSchemeOp::TTableDescription CalcFulltextImplTableDesc( + const NSchemeShard::TTableInfo::TPtr& baseTableInfo, + const NKikimrSchemeOp::TPartitionConfig& baseTablePartitionConfig, + const THashSet& indexDataColumns, + const NKikimrSchemeOp::TTableDescription& indexTableDesc) +{ + return CalcFulltextImplTableDescImpl(baseTableInfo, baseTablePartitionConfig, indexDataColumns, indexTableDesc); +} + +NKikimrSchemeOp::TTableDescription CalcFulltextImplTableDesc( + const NKikimrSchemeOp::TTableDescription& baseTableDescr, + const NKikimrSchemeOp::TPartitionConfig& baseTablePartitionConfig, + const THashSet& indexDataColumns, + const NKikimrSchemeOp::TTableDescription& indexTableDesc) +{ + return CalcFulltextImplTableDescImpl(baseTableDescr, baseTablePartitionConfig, indexDataColumns, indexTableDesc); +} + bool ExtractTypes(const NKikimrSchemeOp::TTableDescription& baseTableDescr, TColumnTypes& columnTypes, TString& explain) { const NScheme::TTypeRegistry* typeRegistry = AppData()->TypeRegistry; Y_ABORT_UNLESS(typeRegistry); diff --git a/ydb/core/tx/schemeshard/schemeshard_utils.h b/ydb/core/tx/schemeshard/schemeshard_utils.h index 3283610ba03f..d23dfbdb89a2 100644 --- a/ydb/core/tx/schemeshard/schemeshard_utils.h +++ b/ydb/core/tx/schemeshard/schemeshard_utils.h @@ -90,6 +90,18 @@ NKikimrSchemeOp::TTableDescription CalcVectorKmeansTreePrefixImplTableDesc( const TTableColumns& implTableColumns, const NKikimrSchemeOp::TTableDescription& indexTableDesc); +NKikimrSchemeOp::TTableDescription CalcFulltextImplTableDesc( + const NSchemeShard::TTableInfo::TPtr& baseTableInfo, + const NKikimrSchemeOp::TPartitionConfig& baseTablePartitionConfig, + const THashSet& indexDataColumns, + const NKikimrSchemeOp::TTableDescription& indexTableDesc); + +NKikimrSchemeOp::TTableDescription CalcFulltextImplTableDesc( + const NKikimrSchemeOp::TTableDescription& baseTableDescr, + const NKikimrSchemeOp::TPartitionConfig& baseTablePartitionConfig, + const THashSet& indexDataColumns, + const NKikimrSchemeOp::TTableDescription& indexTableDesc); + TTableColumns ExtractInfo(const NSchemeShard::TTableInfo::TPtr& tableInfo); TTableColumns ExtractInfo(const NKikimrSchemeOp::TTableDescription& tableDesc); TIndexColumns ExtractInfo(const NKikimrSchemeOp::TIndexCreationConfig& indexDesc); @@ -131,7 +143,7 @@ bool CommonCheck(const TTableDesc& tableDesc, const NKikimrSchemeOp::TIndexCreat return false; } - if (!IsCompatibleIndex(indexDesc.GetType(), baseTableColumns, indexKeys, error)) { + if (!IsCompatibleIndex(GetIndexType(indexDesc), baseTableColumns, indexKeys, error)) { status = NKikimrScheme::EStatus::StatusInvalidParameter; return false; } @@ -142,29 +154,63 @@ bool CommonCheck(const TTableDesc& tableDesc, const NKikimrSchemeOp::TIndexCreat return false; } - implTableColumns = CalcTableImplDescription(indexDesc.GetType(), baseTableColumns, indexKeys); - - if (indexDesc.GetType() == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree) { - //We have already checked this in IsCompatibleIndex - Y_ABORT_UNLESS(indexKeys.KeyColumns.size() >= 1); - - if (indexKeys.KeyColumns.size() > 1 && !IsCompatibleKeyTypes(baseColumnTypes, implTableColumns, uniformTable, error)) { - status = NKikimrScheme::EStatus::StatusInvalidParameter; - return false; + implTableColumns = CalcTableImplDescription(GetIndexType(indexDesc), baseTableColumns, indexKeys); + + switch (GetIndexType(indexDesc)) { + case NKikimrSchemeOp::EIndexTypeGlobal: + case NKikimrSchemeOp::EIndexTypeGlobalAsync: + case NKikimrSchemeOp::EIndexTypeGlobalUnique: + if (!IsCompatibleKeyTypes(baseColumnTypes, implTableColumns, uniformTable, error)) { + status = NKikimrScheme::EStatus::StatusInvalidParameter; + return false; + } + break; + case NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree: { + //We have already checked this in IsCompatibleIndex + Y_ABORT_UNLESS(indexKeys.KeyColumns.size() >= 1); + + if (indexKeys.KeyColumns.size() > 1 && !IsCompatibleKeyTypes(baseColumnTypes, implTableColumns, uniformTable, error)) { + status = NKikimrScheme::EStatus::StatusInvalidParameter; + return false; + } + + const TString& indexColumnName = indexKeys.KeyColumns.back(); + Y_ABORT_UNLESS(baseColumnTypes.contains(indexColumnName)); + auto typeInfo = baseColumnTypes.at(indexColumnName); + + if (typeInfo.GetTypeId() != NScheme::NTypeIds::String) { + status = NKikimrScheme::EStatus::StatusInvalidParameter; + error = TStringBuilder() << "Index column '" << indexColumnName << "' expected type 'String' but got " << NScheme::TypeName(typeInfo); + return false; + } + break; } - - const TString& indexColumnName = indexKeys.KeyColumns.back(); - Y_ABORT_UNLESS(baseColumnTypes.contains(indexColumnName)); - auto typeInfo = baseColumnTypes.at(indexColumnName); - - if (typeInfo.GetTypeId() != NScheme::NTypeIds::String) { + case NKikimrSchemeOp::EIndexTypeGlobalFulltext: { + //We have already checked this in IsCompatibleIndex + Y_ABORT_UNLESS(indexKeys.KeyColumns.size() >= 1); + + if (indexKeys.KeyColumns.size() > 1) { + status = NKikimrScheme::EStatus::StatusInvalidParameter; + error = TStringBuilder() << "fulltext index can only have a single key text column"; + return false; + } + + const TString& indexColumnName = indexKeys.KeyColumns.back(); + Y_ABORT_UNLESS(baseColumnTypes.contains(indexColumnName)); + auto typeInfo = baseColumnTypes.at(indexColumnName); + + // TODO: support utf-8 in fulltext index + if (typeInfo.GetTypeId() != NScheme::NTypeIds::String) { + status = NKikimrScheme::EStatus::StatusInvalidParameter; + error = TStringBuilder() << "Index column '" << indexColumnName << "' expected type 'String' but got " << NScheme::TypeName(typeInfo); + return false; + } + break; + } + default: status = NKikimrScheme::EStatus::StatusInvalidParameter; - error = TStringBuilder() << "Index column '" << indexColumnName << "' expected type 'String' but got " << NScheme::TypeName(typeInfo); + error = InvalidIndexType(indexDesc.GetType()); return false; - } - } else if (!IsCompatibleKeyTypes(baseColumnTypes, implTableColumns, uniformTable, error)) { - status = NKikimrScheme::EStatus::StatusInvalidParameter; - return false; } if (implTableColumns.Keys.size() > schemeLimits.MaxTableKeyColumns) { diff --git a/ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp b/ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp index efa5921b7aa5..a75939072c02 100644 --- a/ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp +++ b/ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp @@ -912,6 +912,24 @@ TCheckFunc KMeansTreeDescription(Ydb::Table::VectorIndexSettings_Metric metric, }; } +TCheckFunc SpecializedIndexDescription(const TString& proto) { + return [=] (const NKikimrScheme::TEvDescribeSchemeResult& record) { + TString actual; + switch (record.GetPathDescription().GetTableIndex().GetSpecializedIndexDescriptionCase()) { + case NKikimrSchemeOp::TIndexDescription::kVectorIndexKmeansTreeDescription: + actual = record.GetPathDescription().GetTableIndex().GetVectorIndexKmeansTreeDescription().GetSettings().ShortDebugString(); + break; + case NKikimrSchemeOp::TIndexDescription::kFulltextIndexDescription: + actual = record.GetPathDescription().GetTableIndex().GetFulltextIndexDescription().GetSettings().ShortDebugString(); + break; + case NKikimrSchemeOp::TIndexDescription::SPECIALIZEDINDEXDESCRIPTION_NOT_SET: + actual = "SPECIALIZEDINDEXDESCRIPTION_NOT_SET"; + break; + } + + UNIT_ASSERT_VALUES_EQUAL(actual, proto); + }; +} TCheckFunc SequenceName(const TString& name) { return [=] (const NKikimrScheme::TEvDescribeSchemeResult& record) { diff --git a/ydb/core/tx/schemeshard/ut_helpers/ls_checks.h b/ydb/core/tx/schemeshard/ut_helpers/ls_checks.h index d5b70410349f..e3284c521bf2 100644 --- a/ydb/core/tx/schemeshard/ut_helpers/ls_checks.h +++ b/ydb/core/tx/schemeshard/ut_helpers/ls_checks.h @@ -174,6 +174,8 @@ namespace NLs { ui32 levels ); + TCheckFunc SpecializedIndexDescription(const TString& proto); + TCheckFunc SequenceName(const TString& name); TCheckFunc SequenceIncrement(i64 increment); TCheckFunc SequenceMaxValue(i64 maxValue); diff --git a/ydb/core/tx/schemeshard/ut_helpers/test_env.cpp b/ydb/core/tx/schemeshard/ut_helpers/test_env.cpp index a3bf538ec36e..de68a4f951bc 100644 --- a/ydb/core/tx/schemeshard/ut_helpers/test_env.cpp +++ b/ydb/core/tx/schemeshard/ut_helpers/test_env.cpp @@ -595,6 +595,7 @@ NSchemeShardUT_Private::TTestEnv::TTestEnv(TTestActorRuntime& runtime, const TTe app.FeatureFlags.SetEnableTableDatetime64(true); app.FeatureFlags.SetEnableVectorIndex(true); app.FeatureFlags.SetEnableAddUniqueIndex(true); + app.FeatureFlags.SetEnableFulltextIndex(true); app.FeatureFlags.SetEnableColumnStore(true); app.FeatureFlags.SetEnableStrictAclCheck(opts.EnableStrictAclCheck_); app.SetEnableMoveIndex(opts.EnableMoveIndex_); diff --git a/ydb/core/tx/schemeshard/ut_index/ut_fulltext_index.cpp b/ydb/core/tx/schemeshard/ut_index/ut_fulltext_index.cpp new file mode 100644 index 000000000000..e9f772e8e3fc --- /dev/null +++ b/ydb/core/tx/schemeshard/ut_index/ut_fulltext_index.cpp @@ -0,0 +1,125 @@ +#include +#include +#include +#include +#include +#include + + +using namespace NKikimr; +using namespace NSchemeShard; +using namespace NSchemeShardUT_Private; +using namespace NKikimr::NTableIndex; +using namespace NKikimr::NTableIndex::NFulltext; + +Y_UNIT_TEST_SUITE(TFulltextIndexTests) { + Y_UNIT_TEST(CreateTable) { + TTestBasicRuntime runtime; + TTestEnv env(runtime); + ui64 txId = 100; + + TestCreateIndexedTable(runtime, ++txId, "/MyRoot", R"( + TableDescription { + Name: "texts" + Columns { Name: "id" Type: "Uint64" } + Columns { Name: "text" Type: "String" } + Columns { Name: "covered" Type: "String" } + Columns { Name: "another" Type: "Uint64" } + KeyColumnNames: ["id"] + } + IndexDescription { + Name: "idx_fulltext" + KeyColumnNames: ["text"] + DataColumnNames: ["covered"] + Type: EIndexTypeGlobalFulltext + FulltextIndexDescription: { Settings: { layout: FLAT, tokenizer: STANDARD, use_filter_ngram: true, filter_ngram_max_length: 42 } } + } + )"); + env.TestWaitNotification(runtime, txId); + + NKikimrSchemeOp::TDescribeOptions opts; + opts.SetReturnChildren(true); + opts.SetShowPrivateTable(true); + Cout << DescribePath(runtime, "/MyRoot/texts/idx_fulltext/indexImplTable", opts).DebugString() << Endl; + + for (ui32 reboot = 0; reboot < 2; reboot++) { + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/texts/idx_fulltext"),{ + NLs::PathExist, + NLs::IndexType(NKikimrSchemeOp::EIndexTypeGlobalFulltext), + NLs::IndexState(NKikimrSchemeOp::EIndexStateReady), + NLs::IndexKeys({"text"}), + NLs::IndexDataColumns({"covered"}), + NLs::SpecializedIndexDescription("layout: FLAT tokenizer: STANDARD use_filter_ngram: true filter_ngram_max_length: 42"), + NLs::ChildrenCount(1), + }); + + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/texts/idx_fulltext/indexImplTable"),{ + NLs::PathExist, + NLs::CheckColumns("indexImplTable", + { NFulltext::TokenColumn, "id", "covered" }, {}, + { NFulltext::TokenColumn, "id" }, true) }); + + Cerr << "Reboot SchemeShard.." << Endl; + TActorId sender = runtime.AllocateEdgeActor(); + RebootTablet(runtime, TTestTxConfig::SchemeShard, sender); + } + } + + Y_UNIT_TEST(CreateTablePrefix) { // not supported for now, maybe later + TTestBasicRuntime runtime; + TTestEnv env(runtime); + ui64 txId = 100; + + TestCreateIndexedTable(runtime, ++txId, "/MyRoot", R"( + TableDescription { + Name: "texts" + Columns { Name: "id" Type: "Uint64" } + Columns { Name: "text" Type: "String" } + Columns { Name: "covered" Type: "String" } + Columns { Name: "another" Type: "Uint64" } + KeyColumnNames: [ "id"] + } + IndexDescription { + Name: "idx_fulltext" + KeyColumnNames: [ "another", "text"] + DataColumnNames: ["covered"] + Type: EIndexTypeGlobalFulltext + FulltextIndexDescription: { Settings: { layout: FLAT, tokenizer: STANDARD, use_filter_ngram: true, filter_ngram_max_length: 42 } } + } + )", {NKikimrScheme::StatusInvalidParameter}); + env.TestWaitNotification(runtime, txId); + + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/texts/idx_fulltext"),{ + NLs::PathNotExist, + }); + } + + Y_UNIT_TEST(CreateTableNotText) { + TTestBasicRuntime runtime; + TTestEnv env(runtime); + ui64 txId = 100; + + TestCreateIndexedTable(runtime, ++txId, "/MyRoot", R"( + TableDescription { + Name: "texts" + Columns { Name: "id" Type: "Uint64" } + Columns { Name: "text" Type: "Uint64" } + Columns { Name: "covered" Type: "String" } + Columns { Name: "another" Type: "Uint64" } + KeyColumnNames: ["id"] + } + IndexDescription { + Name: "idx_fulltext" + KeyColumnNames: ["text"] + DataColumnNames: ["covered"] + Type: EIndexTypeGlobalFulltext + FulltextIndexDescription: { Settings: { layout: FLAT, tokenizer: STANDARD, use_filter_ngram: true, filter_ngram_max_length: 42 } } + } + )", {NKikimrScheme::StatusInvalidParameter}); + env.TestWaitNotification(runtime, txId); + + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/texts/idx_fulltext"),{ + NLs::PathNotExist, + }); + } +} diff --git a/ydb/core/tx/schemeshard/ut_index/ya.make b/ydb/core/tx/schemeshard/ut_index/ya.make index ddd365aeee78..cfb8db9df7b4 100644 --- a/ydb/core/tx/schemeshard/ut_index/ya.make +++ b/ydb/core/tx/schemeshard/ut_index/ya.make @@ -22,6 +22,7 @@ SRCS( ut_async_index.cpp ut_unique_index.cpp ut_vector_index.cpp + ut_fulltext_index.cpp ) YQL_LAST_ABI_VERSION() diff --git a/ydb/core/ydb_convert/table_description.cpp b/ydb/core/ydb_convert/table_description.cpp index bcb66afc0825..ed8cd0eb95fd 100644 --- a/ydb/core/ydb_convert/table_description.cpp +++ b/ydb/core/ydb_convert/table_description.cpp @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -1083,7 +1084,18 @@ void FillIndexDescriptionImpl(TYdbProto& out, const NKikimrSchemeOp::TTableDescr break; } + case NKikimrSchemeOp::EIndexTypeGlobalFulltext: + FillGlobalIndexSettings( + *index->mutable_global_fulltext_index()->mutable_settings(), + tableIndex.GetIndexImplTableDescriptions(0) + ); + + *index->mutable_global_fulltext_index()->mutable_fulltext_settings() = tableIndex.GetFulltextIndexDescription().GetSettings(); + + break; default: + Y_DEBUG_ABORT_S(NTableIndex::InvalidIndexType(tableIndex.GetType())); + break; }; @@ -1136,7 +1148,6 @@ bool FillIndexDescription(NKikimrSchemeOp::TIndexedTableCreationConfig& out, } // specific fields - std::vector indexImplTableDescriptionsVector; switch (index.type_case()) { case Ydb::Table::TableIndex::kGlobalIndex: indexDesc->SetType(NKikimrSchemeOp::EIndexType::EIndexTypeGlobal); @@ -1154,17 +1165,23 @@ bool FillIndexDescription(NKikimrSchemeOp::TIndexedTableCreationConfig& out, indexDesc->SetType(NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree); *indexDesc->MutableVectorIndexKmeansTreeDescription()->MutableSettings() = index.global_vector_kmeans_tree_index().vector_settings(); break; + + case Ydb::Table::TableIndex::kGlobalFulltextIndex: + indexDesc->SetType(NKikimrSchemeOp::EIndexType::EIndexTypeGlobalFulltext); + *indexDesc->MutableFulltextIndexDescription()->MutableSettings() = index.global_fulltext_index().fulltext_settings(); + break; - default: - // pass through - // TODO: maybe return BAD_REQUEST? + case Ydb::Table::TableIndex::TYPE_NOT_SET: + // FIXME: python sdk can create a table with a secondary index without a type + // so it's not possible to return an invalid index type error here for now break; } - if (!FillIndexTablePartitioning(indexImplTableDescriptionsVector, index, status, error)) { + std::vector indexImplTableDescriptions; + if (!FillIndexTablePartitioning(indexImplTableDescriptions, index, status, error)) { return false; } - *indexDesc->MutableIndexImplTableDescriptions() = {indexImplTableDescriptionsVector.begin(), indexImplTableDescriptionsVector.end()}; + *indexDesc->MutableIndexImplTableDescriptions() = {indexImplTableDescriptions.begin(), indexImplTableDescriptions.end()}; } return true; diff --git a/ydb/public/api/protos/ydb_table.proto b/ydb/public/api/protos/ydb_table.proto index ec972a571658..ea0bb46f00d5 100644 --- a/ydb/public/api/protos/ydb_table.proto +++ b/ydb/public/api/protos/ydb_table.proto @@ -273,6 +273,7 @@ message TableIndexDescription { GlobalAsyncIndex global_async_index = 5; GlobalUniqueIndex global_unique_index = 8; GlobalVectorKMeansTreeIndex global_vector_kmeans_tree_index = 9; + GlobalFulltextIndex global_fulltext_index = 10; } Status status = 4; // list of columns content to be copied in to index table diff --git a/ydb/services/ydb/backup_ut/ydb_backup_ut.cpp b/ydb/services/ydb/backup_ut/ydb_backup_ut.cpp index 69485a6e70e3..d6c29e9e7119 100644 --- a/ydb/services/ydb/backup_ut/ydb_backup_ut.cpp +++ b/ydb/services/ydb/backup_ut/ydb_backup_ut.cpp @@ -2389,6 +2389,9 @@ Y_UNIT_TEST_SUITE(BackupRestore) { case EIndexTypeGlobalUnique: case EIndexTypeGlobalVectorKmeansTree: return TestTableWithIndexBackupRestore(Value); + case EIndexTypeGlobalFulltext: + // TODO: will be added later + break; case EIndexTypeInvalid: break; // not applicable default: @@ -3219,6 +3222,9 @@ Y_UNIT_TEST_SUITE(BackupRestoreS3) { case EIndexTypeGlobalVectorKmeansTree: TestTableWithIndexBackupRestore(Value); break; + case EIndexTypeGlobalFulltext: + // TODO: will be added later + break; case EIndexTypeInvalid: break; // not applicable default: