Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 45 additions & 28 deletions ydb/core/base/fulltext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,40 +137,54 @@ TVector<TString> Analyze(const TString& text, const Ydb::Table::FulltextIndexSet
return tokens;
}

bool ValidateSettings(const NProtoBuf::RepeatedPtrField<TString>& keyColumns, const Ydb::Table::FulltextIndexSettings& settings, TString& error) {
if (!settings.has_layout() || settings.layout() == Ydb::Table::FulltextIndexSettings::LAYOUT_UNSPECIFIED) {
error = "layout should be set";
return false;
}
bool ValidateColumnsMatches(const NProtoBuf::RepeatedPtrField<TString>& columns, const Ydb::Table::FulltextIndexSettings& settings, TString& error) {
return ValidateColumnsMatches(TVector<TString>{columns.begin(), columns.end()}, settings, error);
}

bool ValidateColumnsMatches(const TVector<TString>& columns, const Ydb::Table::FulltextIndexSettings& settings, TString& error) {
TVector<TString> settingsColumns(::Reserve(settings.columns().size()));
for (auto column : settings.columns()) {
if (!column.has_column()) {
error = "fulltext index settings should have a column name";
return false;
}
if (!column.has_analyzers()) {
error = "fulltext index settings should have analyzers";
return false;
}
settingsColumns.push_back(column.column());
}

if (keyColumns.size() != 1) {
error = TStringBuilder() << "fulltext index should have a single text key column"
<< " but have " << keyColumns.size() << " of them";

if (columns != settingsColumns) {
error = TStringBuilder() << "columns " << settingsColumns << " should be " << columns;
return false;
}
if (settings.columns().size() != 1) {
error = TStringBuilder() << "fulltext index should have a single text key column settings"
<< " but have " << settings.columns().size() << " of them";

error = "";
return true;
}

bool ValidateSettings(const Ydb::Table::FulltextIndexSettings& settings, TString& error) {
if (!settings.has_layout() || settings.layout() == Ydb::Table::FulltextIndexSettings::LAYOUT_UNSPECIFIED) {
error = "layout should be set";
return false;
}

if (settings.columns().empty()) {
error = "columns should be set";
return false;
}
if (keyColumns.at(0) != settings.columns().at(0).column()) {
error = TStringBuilder() << "fulltext index should have a single text key column " << keyColumns.at(0) << " settings"
<< " but have " << settings.columns().at(0).column();

// current implementation limitation:
if (settings.columns().size() != 1) {
error = "columns should have a single value";
return false;
}

for (auto column : settings.columns()) {
if (!column.has_column()) {
error = "column name should be set";
return false;
}

// current implementation limitation:
if (!settings.columns().at(0).has_analyzers()) {
error = "column analyzers should be set";
return false;
}
if (!ValidateSettings(column.analyzers(), error)) {
return false;
}
Expand Down Expand Up @@ -226,15 +240,18 @@ Ydb::Table::FulltextIndexSettings FillSettings(const TString& keyColumn, const T
columnAnalyzers->mutable_analyzers()->CopyFrom(resultAnalyzers);
}

{
NProtoBuf::RepeatedPtrField<TString> keyColumns;
TString keyColumn_ = keyColumn;
keyColumns.Add(std::move(keyColumn_));
ValidateSettings(keyColumns, result, error);
}
ValidateSettings(result, error);

return result;
}


}

template<> inline
void Out<TVector<TString>>(IOutputStream& o, const TVector<TString> &vec) {
o << "[ ";
for (const auto &x : vec)
o << x << ' ';
o << "]";
}
5 changes: 4 additions & 1 deletion ydb/core/base/fulltext.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@ namespace NKikimr::NFulltext {

TVector<TString> Analyze(const TString& text, const Ydb::Table::FulltextIndexSettings::Analyzers& settings);

bool ValidateSettings(const NProtoBuf::RepeatedPtrField<TString>& keyColumns, const Ydb::Table::FulltextIndexSettings& settings, TString& error);
bool ValidateColumnsMatches(const NProtoBuf::RepeatedPtrField<TString>& columns, const Ydb::Table::FulltextIndexSettings& settings, TString& error);
bool ValidateColumnsMatches(const TVector<TString>& columns, const Ydb::Table::FulltextIndexSettings& settings, TString& error);

bool ValidateSettings(const Ydb::Table::FulltextIndexSettings& settings, TString& error);
Ydb::Table::FulltextIndexSettings FillSettings(const TString& keyColumn, const TVector<std::pair<TString, TString>>& values, TString& error);

}
64 changes: 31 additions & 33 deletions ydb/core/base/ut/fulltext_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,56 +6,54 @@ namespace NKikimr::NFulltext {

Y_UNIT_TEST_SUITE(NFulltext) {

Y_UNIT_TEST(ValidateColumnsMatches) {
TString error;

Ydb::Table::FulltextIndexSettings settings;
settings.add_columns()->set_column("column1");
settings.add_columns()->set_column("column2");

UNIT_ASSERT(!ValidateColumnsMatches(TVector<TString>{"column2"}, settings, error));
UNIT_ASSERT_VALUES_EQUAL(error, "columns [ column1 column2 ] should be [ column2 ]");

UNIT_ASSERT(!ValidateColumnsMatches(TVector<TString>{"column2", "column1"}, settings, error));
UNIT_ASSERT_VALUES_EQUAL(error, "columns [ column1 column2 ] should be [ column2 column1 ]");

UNIT_ASSERT(ValidateColumnsMatches(TVector<TString>{"column1", "column2"}, settings, error));
UNIT_ASSERT_VALUES_EQUAL(error, "");
}

Y_UNIT_TEST(ValidateSettings) {
Ydb::Table::FulltextIndexSettings settings;
TString error;

NProtoBuf::RepeatedPtrField<TString> keyColumns;
keyColumns.Add("text");

UNIT_ASSERT(!ValidateSettings(keyColumns, settings, error));
UNIT_ASSERT(!ValidateSettings(settings, error));
UNIT_ASSERT_VALUES_EQUAL(error, "layout should be set");
settings.set_layout(Ydb::Table::FulltextIndexSettings::FLAT);

UNIT_ASSERT(!ValidateSettings(keyColumns, settings, error));
UNIT_ASSERT_VALUES_EQUAL(error, "fulltext index should have a single text key column settings but have 0 of them");
UNIT_ASSERT(!ValidateSettings(settings, error));
UNIT_ASSERT_VALUES_EQUAL(error, "columns should be set");
auto columnSettings = settings.add_columns();

UNIT_ASSERT(!ValidateSettings(keyColumns, settings, error));
UNIT_ASSERT_VALUES_EQUAL(error, "fulltext index settings should have a column name");
UNIT_ASSERT(!ValidateSettings(settings, error));
UNIT_ASSERT_VALUES_EQUAL(error, "column name should be set");
columnSettings->set_column("text");

UNIT_ASSERT(!ValidateSettings(keyColumns, settings, error));
UNIT_ASSERT_VALUES_EQUAL(error, "fulltext index settings should have analyzers");
UNIT_ASSERT(!ValidateSettings(settings, error));
UNIT_ASSERT_VALUES_EQUAL(error, "column analyzers should be set");
auto columnAnalyzers = columnSettings->mutable_analyzers();

UNIT_ASSERT(!ValidateSettings(keyColumns, settings, error));
UNIT_ASSERT(!ValidateSettings(settings, error));
UNIT_ASSERT_VALUES_EQUAL(error, "tokenizer should be set");
columnAnalyzers->set_tokenizer(Ydb::Table::FulltextIndexSettings::STANDARD);

{
NProtoBuf::RepeatedPtrField<TString> keyColumns;
UNIT_ASSERT_C(!ValidateSettings(keyColumns, settings, error), error);
UNIT_ASSERT_VALUES_EQUAL(error, "fulltext index should have a single text key column but have 0 of them");
}

{
NProtoBuf::RepeatedPtrField<TString> keyColumns;
keyColumns.Add("text2");
UNIT_ASSERT_C(!ValidateSettings(keyColumns, settings, error), error);
UNIT_ASSERT_VALUES_EQUAL(error, "fulltext index should have a single text key column text2 settings but have text");
}

{
NProtoBuf::RepeatedPtrField<TString> keyColumns;
keyColumns.Add("text");
keyColumns.Add("text");
UNIT_ASSERT_C(!ValidateSettings(keyColumns, settings, error), error);
UNIT_ASSERT_VALUES_EQUAL(error, "fulltext index should have a single text key column but have 2 of them");
}

UNIT_ASSERT_C(ValidateSettings(keyColumns, settings, error), error);
UNIT_ASSERT_C(ValidateSettings(settings, error), error);
UNIT_ASSERT_VALUES_EQUAL(error, "");

columnSettings = settings.add_columns();
columnSettings->set_column("text2");
UNIT_ASSERT_C(!ValidateSettings(settings, error), error);
UNIT_ASSERT_VALUES_EQUAL(error, "columns should have a single value");
}

Y_UNIT_TEST(FillSettings) {
Expand Down
10 changes: 4 additions & 6 deletions ydb/core/protos/tx_datashard.proto
Original file line number Diff line number Diff line change
Expand Up @@ -1799,14 +1799,12 @@ message TEvBuildFulltextIndexRequest {
optional uint64 SeqNoGeneration = 6;
optional uint64 SeqNoRound = 7;

optional Ydb.Table.FulltextIndexSettings Settings = 8;
optional string IndexName = 8;

optional string IndexName = 9;
optional Ydb.Table.FulltextIndexSettings Settings = 9; // also has key columns
repeated string DataColumns = 10;

repeated string KeyColumns = 10;
repeated string DataColumns = 11;

optional NKikimrIndexBuilder.TIndexBuildScanSettings ScanSettings = 12;
optional NKikimrIndexBuilder.TIndexBuildScanSettings ScanSettings = 11;
}

message TEvBuildFulltextIndexResponse {
Expand Down
10 changes: 4 additions & 6 deletions ydb/core/tx/datashard/build_index/fulltext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,6 @@ class TBuildFulltextIndexScan: public TActor<TBuildFulltextIndexScan>, public IA
Y_ENSURE(Request.settings().columns().size() == 1);
TextColumn = Request.settings().columns().at(0).column();
TextAnalyzers = Request.settings().columns().at(0).analyzers();
Y_ENSURE(Request.GetKeyColumns().size() == 1);
Y_ENSURE(Request.GetKeyColumns().at(0) == TextColumn);

auto tags = GetAllTags(table);
auto types = GetAllTypes(table);
Expand Down Expand Up @@ -373,9 +371,9 @@ void TDataShard::HandleSafe(TEvDataShard::TEvBuildFulltextIndexRequest::TPtr& ev
}

auto tags = GetAllTags(userTable);
for (auto keyColumn : request.GetKeyColumns()) {
if (!tags.contains(keyColumn)) {
badRequest(TStringBuilder() << "Unknown key column: " << keyColumn);
for (auto column : request.GetSettings().columns()) {
if (!tags.contains(column.column())) {
badRequest(TStringBuilder() << "Unknown key column: " << column.column());
}
}
for (auto dataColumn : request.GetDataColumns()) {
Expand All @@ -393,7 +391,7 @@ void TDataShard::HandleSafe(TEvDataShard::TEvBuildFulltextIndexRequest::TPtr& ev
badRequest(TStringBuilder() << "Missing fulltext index settings");
} else {
TString error;
if (!NKikimr::NFulltext::ValidateSettings(request.keycolumns(), request.GetSettings(), error)) {
if (!NKikimr::NFulltext::ValidateSettings(request.GetSettings(), error)) {
badRequest(error);
}
}
Expand Down
18 changes: 4 additions & 14 deletions ydb/core/tx/datashard/build_index/ut/ut_fulltext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,6 @@ Y_UNIT_TEST_SUITE(TTxDataShardBuildFulltextIndexScan) {

request.SetIndexName(kIndexTable);

request.AddKeyColumns("text");

setupRequest(request);

return datashards[0];
Expand Down Expand Up @@ -176,24 +174,17 @@ Y_UNIT_TEST_SUITE(TTxDataShardBuildFulltextIndexScan) {
}, "{ <main>: Error: Missing fulltext index settings }");
DoBadRequest(server, sender, [](NKikimrTxDataShard::TEvBuildFulltextIndexRequest& request) {
request.MutableSettings()->clear_columns();
}, "{ <main>: Error: fulltext index should have a single text key column settings but have 0 of them }");
}, "{ <main>: Error: columns should be set }");
DoBadRequest(server, sender, [](NKikimrTxDataShard::TEvBuildFulltextIndexRequest& request) {
request.MutableSettings()->mutable_columns()->at(0).mutable_analyzers()->clear_tokenizer();
}, "{ <main>: Error: tokenizer should be set }");
DoBadRequest(server, sender, [](NKikimrTxDataShard::TEvBuildFulltextIndexRequest& request) {
request.MutableSettings()->mutable_columns()->at(0).set_column("data");
}, "{ <main>: Error: fulltext index should have a single text key column text settings but have data }");

DoBadRequest(server, sender, [](NKikimrTxDataShard::TEvBuildFulltextIndexRequest& request) {
request.ClearIndexName();
}, "{ <main>: Error: Empty index table name }");

DoBadRequest(server, sender, [](NKikimrTxDataShard::TEvBuildFulltextIndexRequest& request) {
request.ClearKeyColumns();
}, "{ <main>: Error: fulltext index should have a single text key column but have 0 of them }");
DoBadRequest(server, sender, [](NKikimrTxDataShard::TEvBuildFulltextIndexRequest& request) {
request.ClearKeyColumns();
request.AddKeyColumns("some");
request.MutableSettings()->mutable_columns()->at(0).set_column("some");
}, "{ <main>: Error: Unknown key column: some }");
DoBadRequest(server, sender, [](NKikimrTxDataShard::TEvBuildFulltextIndexRequest& request) {
request.AddDataColumns("some");
Expand All @@ -202,9 +193,8 @@ Y_UNIT_TEST_SUITE(TTxDataShardBuildFulltextIndexScan) {
// test multiple issues:
DoBadRequest(server, sender, [](NKikimrTxDataShard::TEvBuildFulltextIndexRequest& request) {
request.ClearIndexName();
request.ClearKeyColumns();
request.AddKeyColumns("some");
}, "[ { <main>: Error: Empty index table name } { <main>: Error: Unknown key column: some } ]");
request.AddDataColumns("some");
}, "[ { <main>: Error: Empty index table name } { <main>: Error: Unknown data column: some } ]");
}

Y_UNIT_TEST(Build) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ TVector<ISubOperation::TPtr> CreateIndexedTable(TOperationId nextId, const TTxTr
return {CreateReject(nextId, NKikimrScheme::EStatus::StatusPreconditionFailed, "Fulltext index support is disabled")};
}
TString msg;
if (!NKikimr::NFulltext::ValidateSettings(indexDescription.keycolumnnames(), indexDescription.GetFulltextIndexDescription().GetSettings(), msg)) {
if (!NKikimr::NFulltext::ValidateSettings(indexDescription.GetFulltextIndexDescription().GetSettings(), msg)) {
return {CreateReject(nextId, NKikimrScheme::EStatus::StatusInvalidParameter, msg)};
}
break;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ class TSchemeShard::TIndexBuilder::TTxCreate: public TSchemeShard::TIndexBuilder
buildInfo.IndexType = NKikimrSchemeOp::EIndexType::EIndexTypeGlobalFulltext;
NKikimrSchemeOp::TFulltextIndexDescription fulltextIndexDescription;
*fulltextIndexDescription.MutableSettings() = index.global_fulltext_index().fulltext_settings();
if (!NKikimr::NFulltext::ValidateSettings(index.index_columns(), fulltextIndexDescription.GetSettings(), explain)) {
if (!NKikimr::NFulltext::ValidateSettings(fulltextIndexDescription.GetSettings(), explain)) {
return false;
}
buildInfo.SpecializedIndexDescription = fulltextIndexDescription;
Expand Down
34 changes: 19 additions & 15 deletions ydb/core/tx/schemeshard/schemeshard_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include "schemeshard_info_types.h"
#include "schemeshard_types.h"

#include <ydb/core/base/fulltext.h>
#include <ydb/core/base/table_index.h>

#include <yql/essentials/minikql/mkql_type_ops.h>
Expand Down Expand Up @@ -168,7 +169,7 @@ bool CommonCheck(const TTableDesc& tableDesc, const NKikimrSchemeOp::TIndexCreat
}
break;
case NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree: {
//We have already checked this in IsCompatibleIndex
// We have already checked this in IsCompatibleIndex
Y_ABORT_UNLESS(indexKeys.KeyColumns.size() >= 1);

if (indexKeys.KeyColumns.size() > 1 && !IsCompatibleKeyTypes(baseColumnTypes, implTableColumns, uniformTable, error)) {
Expand All @@ -188,25 +189,28 @@ bool CommonCheck(const TTableDesc& tableDesc, const NKikimrSchemeOp::TIndexCreat
break;
}
case NKikimrSchemeOp::EIndexTypeGlobalFulltext: {
//We have already checked this in IsCompatibleIndex
// We have already checked this in IsCompatibleIndex
Y_ABORT_UNLESS(indexKeys.KeyColumns.size() >= 1);

if (indexKeys.KeyColumns.size() > 1) {

// Here we only check that fulltext index columns matches table description
// the rest will be checked in NFulltext::ValidateSettings (called separately outside of CommonCheck)
if (!NKikimr::NFulltext::ValidateColumnsMatches(indexKeys.KeyColumns, indexDesc.GetFulltextIndexDescription().GetSettings(), error)) {
status = NKikimrScheme::EStatus::StatusInvalidParameter;
error = TStringBuilder() << "fulltext index should have a single text key column";
return false;
}

const TString& textColumnName = indexKeys.KeyColumns.at(0);
Y_ABORT_UNLESS(baseColumnTypes.contains(textColumnName));
auto typeInfo = baseColumnTypes.at(textColumnName);

// TODO: support utf-8 in fulltext index
if (typeInfo.GetTypeId() != NScheme::NTypeIds::String) {
status = NKikimrScheme::EStatus::StatusInvalidParameter;
error = TStringBuilder() << "Text column '" << textColumnName << "' expected type 'String' but got " << NScheme::TypeName(typeInfo);
return false;

for (const auto& column : indexDesc.GetFulltextIndexDescription().GetSettings().columns()) {
if (column.has_analyzers()) {
auto typeInfo = baseColumnTypes.at(column.column());
// TODO: support utf-8 in fulltext index
if (typeInfo.GetTypeId() != NScheme::NTypeIds::String) {
status = NKikimrScheme::EStatus::StatusInvalidParameter;
error = TStringBuilder() << "Fulltext column '" << column.column() << "' expected type 'String' but got " << NScheme::TypeName(typeInfo);
return false;
}
}
}

break;
}
default:
Expand Down
5 changes: 3 additions & 2 deletions ydb/public/api/protos/ydb_table.proto
Original file line number Diff line number Diff line change
Expand Up @@ -244,8 +244,9 @@ message FulltextIndexSettings {
optional Layout layout = 1;

// List of columns and their fulltext settings
// Currently, this list should contain a single entry
// And provided column should be the only one in the TableIndex.index_columns list
// Currently, this list should contain a single entry with specified analyzers
// Later, some columns may not use analyzers and will be indexed as-is
// This list must always match TableIndex.index_columns
repeated ColumnAnalyzers columns = 2;
}

Expand Down
Loading