Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 77 additions & 13 deletions ydb/core/base/table_index.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "table_index.h"

#include <ydb/library/yverify_stream/yverify_stream.h>
#include <ydb/core/protos/tx_datashard.pb.h>

namespace NKikimr::NTableIndex {
Expand Down Expand Up @@ -61,13 +62,40 @@ constexpr std::string_view PrefixedGlobalKMeansTreeImplTables[] = {
};
static_assert(std::is_sorted(std::begin(PrefixedGlobalKMeansTreeImplTables), std::end(PrefixedGlobalKMeansTreeImplTables)));

constexpr std::string_view GlobalFulltextImplTables[] = {
ImplTable,
};
static_assert(std::is_sorted(std::begin(GlobalFulltextImplTables), std::end(GlobalFulltextImplTables)));

bool IsSecondaryIndex(NKikimrSchemeOp::EIndexType indexType) {
switch (indexType) {
case NKikimrSchemeOp::EIndexTypeGlobal:
case NKikimrSchemeOp::EIndexTypeGlobalAsync:
case NKikimrSchemeOp::EIndexTypeGlobalUnique:
return true;
case NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree:
case NKikimrSchemeOp::EIndexTypeGlobalFulltext:
return false;
default:
Y_ENSURE(false, InvalidIndexType(indexType));
}
}

TTableColumns CalcTableImplDescription(NKikimrSchemeOp::EIndexType type, const TTableColumns& table, const TIndexColumns& index) {
}

TTableColumns CalcTableImplDescription(NKikimrSchemeOp::EIndexType indexType, const TTableColumns& table, const TIndexColumns& index) {
TTableColumns result;

const bool isSecondaryIndex = type != NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree;
std::for_each(index.KeyColumns.begin(), index.KeyColumns.end() - (isSecondaryIndex ? 0 : 1), [&] (const auto& ik) {
const bool isSecondaryIndex = IsSecondaryIndex(indexType);

auto takeKeyColumns = index.KeyColumns.size();
if (!isSecondaryIndex) { // vector and fulltext indexes have special embedding and text key columns
Y_ASSERT(indexType == NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree
|| indexType == NKikimrSchemeOp::EIndexTypeGlobalFulltext);
takeKeyColumns--;
}

std::for_each(index.KeyColumns.begin(), index.KeyColumns.begin() + takeKeyColumns, [&] (const auto& ik) {
result.Keys.push_back(ik);
result.Columns.emplace(ik);
});
Expand All @@ -85,6 +113,18 @@ TTableColumns CalcTableImplDescription(NKikimrSchemeOp::EIndexType type, const T
return result;
}

NKikimrSchemeOp::EIndexType GetIndexType(NKikimrSchemeOp::TIndexCreationConfig indexCreation) {
// TODO: always provide EIndexTypeGlobal value instead of null
// TODO: do not cast unknown index types to EIndexTypeGlobal (proto2 specific)
return indexCreation.HasType()
? indexCreation.GetType()
: NKikimrSchemeOp::EIndexTypeGlobal;
}

TString InvalidIndexType(NKikimrSchemeOp::EIndexType indexType) {
return TStringBuilder() << "Invalid index type " << static_cast<int>(indexType);
}

bool IsCompatibleIndex(NKikimrSchemeOp::EIndexType indexType, const TTableColumns& table, const TIndexColumns& index, TString& explain) {
if (const auto* broken = IsContains(table.Keys, table.Columns)) {
explain = TStringBuilder()
Expand Down Expand Up @@ -127,7 +167,7 @@ bool IsCompatibleIndex(NKikimrSchemeOp::EIndexType indexType, const TTableColumn
return false;
}

const bool isSecondaryIndex = indexType != NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree;
const bool isSecondaryIndex = IsSecondaryIndex(indexType);

if (index.KeyColumns.size() < 1) {
explain = "should be at least single index key column";
Expand Down Expand Up @@ -157,7 +197,9 @@ bool IsCompatibleIndex(NKikimrSchemeOp::EIndexType indexType, const TTableColumn
if (isSecondaryIndex) {
tmp.insert(index.KeyColumns.begin(), index.KeyColumns.end());
} else {
// Vector indexes allow to add all columns both to index & data
// Vector and fulltext indexes allow to add all columns both to index & data
Y_ASSERT(indexType == NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree
|| indexType == NKikimrSchemeOp::EIndexTypeGlobalFulltext);
}
if (const auto* broken = IsContains(index.DataColumns, tmp, true)) {
explain = TStringBuilder()
Expand All @@ -167,15 +209,37 @@ bool IsCompatibleIndex(NKikimrSchemeOp::EIndexType indexType, const TTableColumn
return true;
}

bool DoesIndexSupportTTL(NKikimrSchemeOp::EIndexType indexType) {
switch (indexType) {
case NKikimrSchemeOp::EIndexTypeGlobal:
case NKikimrSchemeOp::EIndexTypeGlobalUnique:
case NKikimrSchemeOp::EIndexTypeGlobalAsync:
return true;
case NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree:
case NKikimrSchemeOp::EIndexTypeGlobalFulltext:
return false;
default:
Y_DEBUG_ABORT_S(InvalidIndexType(indexType));
return false;
}
}

std::span<const std::string_view> GetImplTables(NKikimrSchemeOp::EIndexType indexType, std::span<const TString> indexKeys) {
if (indexType == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree) {
if (indexKeys.size() == 1) {
return GlobalKMeansTreeImplTables;
} else {
return PrefixedGlobalKMeansTreeImplTables;
}
} else {
return GlobalSecondaryImplTables;
switch (indexType) {
case NKikimrSchemeOp::EIndexTypeGlobal:
case NKikimrSchemeOp::EIndexTypeGlobalAsync:
case NKikimrSchemeOp::EIndexTypeGlobalUnique:
return GlobalSecondaryImplTables;
case NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree:
if (indexKeys.size() == 1) {
return GlobalKMeansTreeImplTables;
} else {
return PrefixedGlobalKMeansTreeImplTables;
}
case NKikimrSchemeOp::EIndexTypeGlobalFulltext:
return GlobalFulltextImplTables;
default:
Y_ENSURE(false, InvalidIndexType(indexType));
}
}

Expand Down
16 changes: 14 additions & 2 deletions ydb/core/base/table_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,13 @@ struct TIndexColumns {

inline constexpr const char* ImplTable = "indexImplTable";

bool IsCompatibleIndex(NKikimrSchemeOp::EIndexType type, const TTableColumns& table, const TIndexColumns& index, TString& explain);
TTableColumns CalcTableImplDescription(NKikimrSchemeOp::EIndexType type, const TTableColumns& table, const TIndexColumns& index);
bool IsCompatibleIndex(NKikimrSchemeOp::EIndexType indexType, const TTableColumns& table, const TIndexColumns& index, TString& explain);
TTableColumns CalcTableImplDescription(NKikimrSchemeOp::EIndexType indexType, const TTableColumns& table, const TIndexColumns& index);

bool DoesIndexSupportTTL(NKikimrSchemeOp::EIndexType indexType);

NKikimrSchemeOp::EIndexType GetIndexType(NKikimrSchemeOp::TIndexCreationConfig indexCreation);
TString InvalidIndexType(NKikimrSchemeOp::EIndexType indexType);

std::span<const std::string_view> GetImplTables(NKikimrSchemeOp::EIndexType indexType, std::span<const TString> indexKeys);
bool IsImplTable(std::string_view tableName);
Expand Down Expand Up @@ -76,6 +81,13 @@ TClusterId SetPostingParentFlag(TClusterId parent);

}

namespace NFulltext {
// TODO: support utf-8 in fulltext index
inline constexpr const char* TokenTypeName = "String";

inline constexpr const char* TokenColumn = "__ydb_token";
}

TString ToShortDebugString(const NKikimrTxDataShard::TEvReshuffleKMeansRequest& record);
TString ToShortDebugString(const NKikimrTxDataShard::TEvRecomputeKMeansRequest& record);
TString ToShortDebugString(const NKikimrTxDataShard::TEvRecomputeKMeansResponse& record);
Expand Down
8 changes: 8 additions & 0 deletions ydb/core/protos/flat_scheme_op.proto
Original file line number Diff line number Diff line change
Expand Up @@ -1158,6 +1158,7 @@ enum EIndexType {
EIndexTypeGlobalAsync = 2;
EIndexTypeGlobalUnique = 3;
EIndexTypeGlobalVectorKmeansTree = 4;
EIndexTypeGlobalFulltext = 5;
}

enum EIndexState {
Expand All @@ -1171,6 +1172,10 @@ message TVectorIndexKmeansTreeDescription {
optional Ydb.Table.KMeansTreeSettings Settings = 1;
}

message TFulltextIndexDescription {
optional Ydb.Table.FulltextIndexSettings Settings = 1;
}

message TIndexDescription {
optional string Name = 1;
optional uint64 LocalPathId = 2;
Expand All @@ -1192,6 +1197,7 @@ message TIndexDescription {

oneof SpecializedIndexDescription {
TVectorIndexKmeansTreeDescription VectorIndexKmeansTreeDescription = 11;
TFulltextIndexDescription FulltextIndexDescription = 12;
}
}

Expand All @@ -1204,6 +1210,7 @@ message TIndexCreationConfig {
repeated string DataColumnNames = 6; //columns to be denormalized to read data just from index
oneof SpecializedIndexDescription {
TVectorIndexKmeansTreeDescription VectorIndexKmeansTreeDescription = 7;
TFulltextIndexDescription FulltextIndexDescription = 8;
}
}

Expand Down Expand Up @@ -1985,6 +1992,7 @@ enum EPathSubType {
EPathSubTypeAsyncIndexImplTable = 2;
EPathSubTypeStreamImpl = 3;
EPathSubTypeVectorKmeansTreeIndexImplTable = 4;
EPathSubTypeFulltextIndexImplTable = 5;
}

enum EPathState {
Expand Down
2 changes: 2 additions & 0 deletions ydb/core/tx/scheme_board/cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -898,6 +898,8 @@ class TSchemeCache: public TMonitorableActor<TSchemeCache> {
return NSchemeCache::ETableKind::KindAsyncIndexTable;
case NKikimrSchemeOp::EPathSubTypeVectorKmeansTreeIndexImplTable:
return NSchemeCache::ETableKind::KindVectorIndexTable;
case NKikimrSchemeOp::EPathSubTypeFulltextIndexImplTable:
return NSchemeCache::ETableKind::KindFulltextIndexTable;
default:
return NSchemeCache::ETableKind::KindRegularTable;
}
Expand Down
1 change: 1 addition & 0 deletions ydb/core/tx/scheme_cache/scheme_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ enum class ETableKind {
KindSyncIndexTable = 2,
KindAsyncIndexTable = 3,
KindVectorIndexTable = 4,
KindFulltextIndexTable = 5,
};

struct TSchemeCacheNavigate {
Expand Down
6 changes: 4 additions & 2 deletions ydb/core/tx/schemeshard/schemeshard__conditional_erase.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#include "schemeshard_impl.h"

#include <util/string/join.h>
#include <ydb/core/base/table_index.h>
#include <ydb/core/protos/flat_scheme_op.pb.h>

namespace NKikimr {
namespace NSchemeShard {
Expand Down Expand Up @@ -239,7 +241,7 @@ struct TSchemeShard::TTxRunConditionalErase: public TSchemeShard::TRwTxBase {

auto index = GetIndex(childPath);
if (index->Type == NKikimrSchemeOp::EIndexTypeGlobalAsync
|| index->Type == NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree) {
|| !DoesIndexSupportTTL(index->Type)) {
continue;
}

Expand Down Expand Up @@ -276,7 +278,7 @@ struct TSchemeShard::TTxRunConditionalErase: public TSchemeShard::TRwTxBase {
}

static TVector<std::pair<ui32, ui32>> MakeColumnIds(TTableInfo::TPtr mainTable, TTableIndexInfo::TPtr index, TTableInfo::TPtr indexImplTable) {
Y_ABORT_UNLESS(index->Type != NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree);
Y_ABORT_UNLESS(DoesIndexSupportTTL(index->Type));
TVector<std::pair<ui32, ui32>> result;
THashSet<TString> keys;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,23 @@ static std::optional<NKikimrSchemeOp::TModifyScheme> CreateIndexTask(NKikimr::NS
*operation->MutableDataColumnNames()->Add() = dataColumn;
}

if (indexInfo->Type == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree) {
*operation->MutableVectorIndexKmeansTreeDescription() =
std::get<NKikimrSchemeOp::TVectorIndexKmeansTreeDescription>(indexInfo->SpecializedIndexDescription);
} else if (!std::holds_alternative<std::monostate>(indexInfo->SpecializedIndexDescription)) {
return {};
switch (indexInfo->Type) {
case NKikimrSchemeOp::EIndexTypeGlobal:
case NKikimrSchemeOp::EIndexTypeGlobalAsync:
case NKikimrSchemeOp::EIndexTypeGlobalUnique:
// no specialized index description
Y_ASSERT(std::holds_alternative<std::monostate>(indexInfo->SpecializedIndexDescription));
break;
case NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree:
*operation->MutableVectorIndexKmeansTreeDescription() =
std::get<NKikimrSchemeOp::TVectorIndexKmeansTreeDescription>(indexInfo->SpecializedIndexDescription);
break;
case NKikimrSchemeOp::EIndexTypeGlobalFulltext:
*operation->MutableFulltextIndexDescription() =
std::get<NKikimrSchemeOp::TFulltextIndexDescription>(indexInfo->SpecializedIndexDescription);
break;
default:
return {}; // reject
}

return scheme;
Expand Down
24 changes: 18 additions & 6 deletions ydb/core/tx/schemeshard/schemeshard__operation_copy_table.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -852,12 +852,24 @@ TVector<ISubOperation::TPtr> CreateCopyTable(TOperationId nextId, const TTxTrans
for (const auto& dataColumn: indexInfo->IndexDataColumns) {
*operation->MutableDataColumnNames()->Add() = dataColumn;
}
if (indexInfo->Type == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree) {
*operation->MutableVectorIndexKmeansTreeDescription() =
std::get<NKikimrSchemeOp::TVectorIndexKmeansTreeDescription>(indexInfo->SpecializedIndexDescription);
} else if (!std::holds_alternative<std::monostate>(indexInfo->SpecializedIndexDescription)) {
return {CreateReject(nextId, NKikimrScheme::EStatus::StatusInvalidParameter,
TStringBuilder{} << "Copy table doesn't support table with index type " << indexInfo->Type)};

switch (indexInfo->Type) {
case NKikimrSchemeOp::EIndexTypeGlobal:
case NKikimrSchemeOp::EIndexTypeGlobalAsync:
case NKikimrSchemeOp::EIndexTypeGlobalUnique:
// no specialized index description
Y_ASSERT(std::holds_alternative<std::monostate>(indexInfo->SpecializedIndexDescription));
break;
case NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree:
*operation->MutableVectorIndexKmeansTreeDescription() =
std::get<NKikimrSchemeOp::TVectorIndexKmeansTreeDescription>(indexInfo->SpecializedIndexDescription);
break;
case NKikimrSchemeOp::EIndexTypeGlobalFulltext:
*operation->MutableFulltextIndexDescription() =
std::get<NKikimrSchemeOp::TFulltextIndexDescription>(indexInfo->SpecializedIndexDescription);
break;
default:
return {CreateReject(nextId, NKikimrScheme::EStatus::StatusInvalidParameter, InvalidIndexType(indexInfo->Type))};
}

result.push_back(CreateNewTableIndex(NextPartId(nextId, result), schema));
Expand Down
Loading
Loading