diff --git a/ydb/core/base/fulltext.cpp b/ydb/core/base/fulltext.cpp index 51ced489aff1..242484e72af7 100644 --- a/ydb/core/base/fulltext.cpp +++ b/ydb/core/base/fulltext.cpp @@ -137,40 +137,54 @@ TVector Analyze(const TString& text, const Ydb::Table::FulltextIndexSet return tokens; } -bool ValidateSettings(const NProtoBuf::RepeatedPtrField& keyColumns, const Ydb::Table::FulltextIndexSettings& settings, TString& error) { - if (!settings.has_layout() || settings.layout() == Ydb::Table::FulltextIndexSettings::LAYOUT_UNSPECIFIED) { - error = "layout should be set"; - return false; - } +bool ValidateColumnsMatches(const NProtoBuf::RepeatedPtrField& columns, const Ydb::Table::FulltextIndexSettings& settings, TString& error) { + return ValidateColumnsMatches(TVector{columns.begin(), columns.end()}, settings, error); +} +bool ValidateColumnsMatches(const TVector& columns, const Ydb::Table::FulltextIndexSettings& settings, TString& error) { + TVector settingsColumns(::Reserve(settings.columns().size())); for (auto column : settings.columns()) { - if (!column.has_column()) { - error = "fulltext index settings should have a column name"; - return false; - } - if (!column.has_analyzers()) { - error = "fulltext index settings should have analyzers"; - return false; - } + settingsColumns.push_back(column.column()); } - if (keyColumns.size() != 1) { - error = TStringBuilder() << "fulltext index should have a single text key column" - << " but have " << keyColumns.size() << " of them"; + + if (columns != settingsColumns) { + error = TStringBuilder() << "columns " << settingsColumns << " should be " << columns; return false; } - if (settings.columns().size() != 1) { - error = TStringBuilder() << "fulltext index should have a single text key column settings" - << " but have " << settings.columns().size() << " of them"; + + error = ""; + return true; +} + +bool ValidateSettings(const Ydb::Table::FulltextIndexSettings& settings, TString& error) { + if (!settings.has_layout() || settings.layout() == Ydb::Table::FulltextIndexSettings::LAYOUT_UNSPECIFIED) { + error = "layout should be set"; + return false; + } + + if (settings.columns().empty()) { + error = "columns should be set"; return false; } - if (keyColumns.at(0) != settings.columns().at(0).column()) { - error = TStringBuilder() << "fulltext index should have a single text key column " << keyColumns.at(0) << " settings" - << " but have " << settings.columns().at(0).column(); + + // current implementation limitation: + if (settings.columns().size() != 1) { + error = "columns should have a single value"; return false; } for (auto column : settings.columns()) { + if (!column.has_column()) { + error = "column name should be set"; + return false; + } + + // current implementation limitation: + if (!settings.columns().at(0).has_analyzers()) { + error = "column analyzers should be set"; + return false; + } if (!ValidateSettings(column.analyzers(), error)) { return false; } @@ -226,15 +240,18 @@ Ydb::Table::FulltextIndexSettings FillSettings(const TString& keyColumn, const T columnAnalyzers->mutable_analyzers()->CopyFrom(resultAnalyzers); } - { - NProtoBuf::RepeatedPtrField keyColumns; - TString keyColumn_ = keyColumn; - keyColumns.Add(std::move(keyColumn_)); - ValidateSettings(keyColumns, result, error); - } + ValidateSettings(result, error); return result; } } + +template<> inline +void Out>(IOutputStream& o, const TVector &vec) { + o << "[ "; + for (const auto &x : vec) + o << x << ' '; + o << "]"; +} diff --git a/ydb/core/base/fulltext.h b/ydb/core/base/fulltext.h index 849b52bfc9e4..b7303613ae2d 100644 --- a/ydb/core/base/fulltext.h +++ b/ydb/core/base/fulltext.h @@ -8,7 +8,10 @@ namespace NKikimr::NFulltext { TVector Analyze(const TString& text, const Ydb::Table::FulltextIndexSettings::Analyzers& settings); -bool ValidateSettings(const NProtoBuf::RepeatedPtrField& keyColumns, const Ydb::Table::FulltextIndexSettings& settings, TString& error); +bool ValidateColumnsMatches(const NProtoBuf::RepeatedPtrField& columns, const Ydb::Table::FulltextIndexSettings& settings, TString& error); +bool ValidateColumnsMatches(const TVector& columns, const Ydb::Table::FulltextIndexSettings& settings, TString& error); + +bool ValidateSettings(const Ydb::Table::FulltextIndexSettings& settings, TString& error); Ydb::Table::FulltextIndexSettings FillSettings(const TString& keyColumn, const TVector>& values, TString& error); } diff --git a/ydb/core/base/ut/fulltext_ut.cpp b/ydb/core/base/ut/fulltext_ut.cpp index d3cb620dca9d..113c18211539 100644 --- a/ydb/core/base/ut/fulltext_ut.cpp +++ b/ydb/core/base/ut/fulltext_ut.cpp @@ -6,56 +6,54 @@ namespace NKikimr::NFulltext { Y_UNIT_TEST_SUITE(NFulltext) { + Y_UNIT_TEST(ValidateColumnsMatches) { + TString error; + + Ydb::Table::FulltextIndexSettings settings; + settings.add_columns()->set_column("column1"); + settings.add_columns()->set_column("column2"); + + UNIT_ASSERT(!ValidateColumnsMatches(TVector{"column2"}, settings, error)); + UNIT_ASSERT_VALUES_EQUAL(error, "columns [ column1 column2 ] should be [ column2 ]"); + + UNIT_ASSERT(!ValidateColumnsMatches(TVector{"column2", "column1"}, settings, error)); + UNIT_ASSERT_VALUES_EQUAL(error, "columns [ column1 column2 ] should be [ column2 column1 ]"); + + UNIT_ASSERT(ValidateColumnsMatches(TVector{"column1", "column2"}, settings, error)); + UNIT_ASSERT_VALUES_EQUAL(error, ""); + } + Y_UNIT_TEST(ValidateSettings) { Ydb::Table::FulltextIndexSettings settings; TString error; - NProtoBuf::RepeatedPtrField keyColumns; - keyColumns.Add("text"); - - UNIT_ASSERT(!ValidateSettings(keyColumns, settings, error)); + UNIT_ASSERT(!ValidateSettings(settings, error)); UNIT_ASSERT_VALUES_EQUAL(error, "layout should be set"); settings.set_layout(Ydb::Table::FulltextIndexSettings::FLAT); - UNIT_ASSERT(!ValidateSettings(keyColumns, settings, error)); - UNIT_ASSERT_VALUES_EQUAL(error, "fulltext index should have a single text key column settings but have 0 of them"); + UNIT_ASSERT(!ValidateSettings(settings, error)); + UNIT_ASSERT_VALUES_EQUAL(error, "columns should be set"); auto columnSettings = settings.add_columns(); - UNIT_ASSERT(!ValidateSettings(keyColumns, settings, error)); - UNIT_ASSERT_VALUES_EQUAL(error, "fulltext index settings should have a column name"); + UNIT_ASSERT(!ValidateSettings(settings, error)); + UNIT_ASSERT_VALUES_EQUAL(error, "column name should be set"); columnSettings->set_column("text"); - UNIT_ASSERT(!ValidateSettings(keyColumns, settings, error)); - UNIT_ASSERT_VALUES_EQUAL(error, "fulltext index settings should have analyzers"); + UNIT_ASSERT(!ValidateSettings(settings, error)); + UNIT_ASSERT_VALUES_EQUAL(error, "column analyzers should be set"); auto columnAnalyzers = columnSettings->mutable_analyzers(); - UNIT_ASSERT(!ValidateSettings(keyColumns, settings, error)); + UNIT_ASSERT(!ValidateSettings(settings, error)); UNIT_ASSERT_VALUES_EQUAL(error, "tokenizer should be set"); columnAnalyzers->set_tokenizer(Ydb::Table::FulltextIndexSettings::STANDARD); - { - NProtoBuf::RepeatedPtrField keyColumns; - UNIT_ASSERT_C(!ValidateSettings(keyColumns, settings, error), error); - UNIT_ASSERT_VALUES_EQUAL(error, "fulltext index should have a single text key column but have 0 of them"); - } - - { - NProtoBuf::RepeatedPtrField keyColumns; - keyColumns.Add("text2"); - UNIT_ASSERT_C(!ValidateSettings(keyColumns, settings, error), error); - UNIT_ASSERT_VALUES_EQUAL(error, "fulltext index should have a single text key column text2 settings but have text"); - } - - { - NProtoBuf::RepeatedPtrField keyColumns; - keyColumns.Add("text"); - keyColumns.Add("text"); - UNIT_ASSERT_C(!ValidateSettings(keyColumns, settings, error), error); - UNIT_ASSERT_VALUES_EQUAL(error, "fulltext index should have a single text key column but have 2 of them"); - } - - UNIT_ASSERT_C(ValidateSettings(keyColumns, settings, error), error); + UNIT_ASSERT_C(ValidateSettings(settings, error), error); UNIT_ASSERT_VALUES_EQUAL(error, ""); + + columnSettings = settings.add_columns(); + columnSettings->set_column("text2"); + UNIT_ASSERT_C(!ValidateSettings(settings, error), error); + UNIT_ASSERT_VALUES_EQUAL(error, "columns should have a single value"); } Y_UNIT_TEST(FillSettings) { diff --git a/ydb/core/protos/tx_datashard.proto b/ydb/core/protos/tx_datashard.proto index f032d20d8b05..1f34bbdc935c 100644 --- a/ydb/core/protos/tx_datashard.proto +++ b/ydb/core/protos/tx_datashard.proto @@ -1799,14 +1799,12 @@ message TEvBuildFulltextIndexRequest { optional uint64 SeqNoGeneration = 6; optional uint64 SeqNoRound = 7; - optional Ydb.Table.FulltextIndexSettings Settings = 8; + optional string IndexName = 8; - optional string IndexName = 9; + optional Ydb.Table.FulltextIndexSettings Settings = 9; // also has key columns + repeated string DataColumns = 10; - repeated string KeyColumns = 10; - repeated string DataColumns = 11; - - optional NKikimrIndexBuilder.TIndexBuildScanSettings ScanSettings = 12; + optional NKikimrIndexBuilder.TIndexBuildScanSettings ScanSettings = 11; } message TEvBuildFulltextIndexResponse { diff --git a/ydb/core/tx/datashard/build_index/fulltext.cpp b/ydb/core/tx/datashard/build_index/fulltext.cpp index 3af00dd7f661..61aa3112c713 100644 --- a/ydb/core/tx/datashard/build_index/fulltext.cpp +++ b/ydb/core/tx/datashard/build_index/fulltext.cpp @@ -65,8 +65,6 @@ class TBuildFulltextIndexScan: public TActor, public IA Y_ENSURE(Request.settings().columns().size() == 1); TextColumn = Request.settings().columns().at(0).column(); TextAnalyzers = Request.settings().columns().at(0).analyzers(); - Y_ENSURE(Request.GetKeyColumns().size() == 1); - Y_ENSURE(Request.GetKeyColumns().at(0) == TextColumn); auto tags = GetAllTags(table); auto types = GetAllTypes(table); @@ -373,9 +371,9 @@ void TDataShard::HandleSafe(TEvDataShard::TEvBuildFulltextIndexRequest::TPtr& ev } auto tags = GetAllTags(userTable); - for (auto keyColumn : request.GetKeyColumns()) { - if (!tags.contains(keyColumn)) { - badRequest(TStringBuilder() << "Unknown key column: " << keyColumn); + for (auto column : request.GetSettings().columns()) { + if (!tags.contains(column.column())) { + badRequest(TStringBuilder() << "Unknown key column: " << column.column()); } } for (auto dataColumn : request.GetDataColumns()) { @@ -393,7 +391,7 @@ void TDataShard::HandleSafe(TEvDataShard::TEvBuildFulltextIndexRequest::TPtr& ev badRequest(TStringBuilder() << "Missing fulltext index settings"); } else { TString error; - if (!NKikimr::NFulltext::ValidateSettings(request.keycolumns(), request.GetSettings(), error)) { + if (!NKikimr::NFulltext::ValidateSettings(request.GetSettings(), error)) { badRequest(error); } } diff --git a/ydb/core/tx/datashard/build_index/ut/ut_fulltext.cpp b/ydb/core/tx/datashard/build_index/ut/ut_fulltext.cpp index 39a9b67eee85..a0f6884f7bfb 100644 --- a/ydb/core/tx/datashard/build_index/ut/ut_fulltext.cpp +++ b/ydb/core/tx/datashard/build_index/ut/ut_fulltext.cpp @@ -54,8 +54,6 @@ Y_UNIT_TEST_SUITE(TTxDataShardBuildFulltextIndexScan) { request.SetIndexName(kIndexTable); - request.AddKeyColumns("text"); - setupRequest(request); return datashards[0]; @@ -176,24 +174,17 @@ Y_UNIT_TEST_SUITE(TTxDataShardBuildFulltextIndexScan) { }, "{
: Error: Missing fulltext index settings }"); DoBadRequest(server, sender, [](NKikimrTxDataShard::TEvBuildFulltextIndexRequest& request) { request.MutableSettings()->clear_columns(); - }, "{
: Error: fulltext index should have a single text key column settings but have 0 of them }"); + }, "{
: Error: columns should be set }"); DoBadRequest(server, sender, [](NKikimrTxDataShard::TEvBuildFulltextIndexRequest& request) { request.MutableSettings()->mutable_columns()->at(0).mutable_analyzers()->clear_tokenizer(); }, "{
: Error: tokenizer should be set }"); - DoBadRequest(server, sender, [](NKikimrTxDataShard::TEvBuildFulltextIndexRequest& request) { - request.MutableSettings()->mutable_columns()->at(0).set_column("data"); - }, "{
: Error: fulltext index should have a single text key column text settings but have data }"); DoBadRequest(server, sender, [](NKikimrTxDataShard::TEvBuildFulltextIndexRequest& request) { request.ClearIndexName(); }, "{
: Error: Empty index table name }"); DoBadRequest(server, sender, [](NKikimrTxDataShard::TEvBuildFulltextIndexRequest& request) { - request.ClearKeyColumns(); - }, "{
: Error: fulltext index should have a single text key column but have 0 of them }"); - DoBadRequest(server, sender, [](NKikimrTxDataShard::TEvBuildFulltextIndexRequest& request) { - request.ClearKeyColumns(); - request.AddKeyColumns("some"); + request.MutableSettings()->mutable_columns()->at(0).set_column("some"); }, "{
: Error: Unknown key column: some }"); DoBadRequest(server, sender, [](NKikimrTxDataShard::TEvBuildFulltextIndexRequest& request) { request.AddDataColumns("some"); @@ -202,9 +193,8 @@ Y_UNIT_TEST_SUITE(TTxDataShardBuildFulltextIndexScan) { // test multiple issues: DoBadRequest(server, sender, [](NKikimrTxDataShard::TEvBuildFulltextIndexRequest& request) { request.ClearIndexName(); - request.ClearKeyColumns(); - request.AddKeyColumns("some"); - }, "[ {
: Error: Empty index table name } {
: Error: Unknown key column: some } ]"); + request.AddDataColumns("some"); + }, "[ {
: Error: Empty index table name } {
: Error: Unknown data column: some } ]"); } Y_UNIT_TEST(Build) { diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_create_indexed_table.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_create_indexed_table.cpp index 4ee836c4c14e..fc648c9dda86 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_create_indexed_table.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_create_indexed_table.cpp @@ -153,7 +153,7 @@ TVector CreateIndexedTable(TOperationId nextId, const TTxTr return {CreateReject(nextId, NKikimrScheme::EStatus::StatusPreconditionFailed, "Fulltext index support is disabled")}; } TString msg; - if (!NKikimr::NFulltext::ValidateSettings(indexDescription.keycolumnnames(), indexDescription.GetFulltextIndexDescription().GetSettings(), msg)) { + if (!NKikimr::NFulltext::ValidateSettings(indexDescription.GetFulltextIndexDescription().GetSettings(), msg)) { return {CreateReject(nextId, NKikimrScheme::EStatus::StatusInvalidParameter, msg)}; } break; diff --git a/ydb/core/tx/schemeshard/schemeshard_build_index__create.cpp b/ydb/core/tx/schemeshard/schemeshard_build_index__create.cpp index 6dc406e05527..0b83aae30125 100644 --- a/ydb/core/tx/schemeshard/schemeshard_build_index__create.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_build_index__create.cpp @@ -282,7 +282,7 @@ class TSchemeShard::TIndexBuilder::TTxCreate: public TSchemeShard::TIndexBuilder buildInfo.IndexType = NKikimrSchemeOp::EIndexType::EIndexTypeGlobalFulltext; NKikimrSchemeOp::TFulltextIndexDescription fulltextIndexDescription; *fulltextIndexDescription.MutableSettings() = index.global_fulltext_index().fulltext_settings(); - if (!NKikimr::NFulltext::ValidateSettings(index.index_columns(), fulltextIndexDescription.GetSettings(), explain)) { + if (!NKikimr::NFulltext::ValidateSettings(fulltextIndexDescription.GetSettings(), explain)) { return false; } buildInfo.SpecializedIndexDescription = fulltextIndexDescription; diff --git a/ydb/core/tx/schemeshard/schemeshard_utils.h b/ydb/core/tx/schemeshard/schemeshard_utils.h index ad99fca80549..5c490b80771d 100644 --- a/ydb/core/tx/schemeshard/schemeshard_utils.h +++ b/ydb/core/tx/schemeshard/schemeshard_utils.h @@ -3,6 +3,7 @@ #include "schemeshard_info_types.h" #include "schemeshard_types.h" +#include #include #include @@ -168,7 +169,7 @@ bool CommonCheck(const TTableDesc& tableDesc, const NKikimrSchemeOp::TIndexCreat } break; case NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree: { - //We have already checked this in IsCompatibleIndex + // We have already checked this in IsCompatibleIndex Y_ABORT_UNLESS(indexKeys.KeyColumns.size() >= 1); if (indexKeys.KeyColumns.size() > 1 && !IsCompatibleKeyTypes(baseColumnTypes, implTableColumns, uniformTable, error)) { @@ -188,25 +189,28 @@ bool CommonCheck(const TTableDesc& tableDesc, const NKikimrSchemeOp::TIndexCreat break; } case NKikimrSchemeOp::EIndexTypeGlobalFulltext: { - //We have already checked this in IsCompatibleIndex + // We have already checked this in IsCompatibleIndex Y_ABORT_UNLESS(indexKeys.KeyColumns.size() >= 1); - - if (indexKeys.KeyColumns.size() > 1) { + + // Here we only check that fulltext index columns matches table description + // the rest will be checked in NFulltext::ValidateSettings (called separately outside of CommonCheck) + if (!NKikimr::NFulltext::ValidateColumnsMatches(indexKeys.KeyColumns, indexDesc.GetFulltextIndexDescription().GetSettings(), error)) { status = NKikimrScheme::EStatus::StatusInvalidParameter; - error = TStringBuilder() << "fulltext index should have a single text key column"; return false; } - - const TString& textColumnName = indexKeys.KeyColumns.at(0); - Y_ABORT_UNLESS(baseColumnTypes.contains(textColumnName)); - auto typeInfo = baseColumnTypes.at(textColumnName); - - // TODO: support utf-8 in fulltext index - if (typeInfo.GetTypeId() != NScheme::NTypeIds::String) { - status = NKikimrScheme::EStatus::StatusInvalidParameter; - error = TStringBuilder() << "Text column '" << textColumnName << "' expected type 'String' but got " << NScheme::TypeName(typeInfo); - return false; + + for (const auto& column : indexDesc.GetFulltextIndexDescription().GetSettings().columns()) { + if (column.has_analyzers()) { + auto typeInfo = baseColumnTypes.at(column.column()); + // TODO: support utf-8 in fulltext index + if (typeInfo.GetTypeId() != NScheme::NTypeIds::String) { + status = NKikimrScheme::EStatus::StatusInvalidParameter; + error = TStringBuilder() << "Fulltext column '" << column.column() << "' expected type 'String' but got " << NScheme::TypeName(typeInfo); + return false; + } + } } + break; } default: diff --git a/ydb/public/api/protos/ydb_table.proto b/ydb/public/api/protos/ydb_table.proto index 8aaa1c40515d..e22ce6145054 100644 --- a/ydb/public/api/protos/ydb_table.proto +++ b/ydb/public/api/protos/ydb_table.proto @@ -244,8 +244,9 @@ message FulltextIndexSettings { optional Layout layout = 1; // List of columns and their fulltext settings - // Currently, this list should contain a single entry - // And provided column should be the only one in the TableIndex.index_columns list + // Currently, this list should contain a single entry with specified analyzers + // Later, some columns may not use analyzers and will be indexed as-is + // This list must always match TableIndex.index_columns repeated ColumnAnalyzers columns = 2; }