diff --git a/ydb/core/kqp/ut/scheme/kqp_scheme_fulltext_ut.cpp b/ydb/core/kqp/ut/scheme/kqp_scheme_fulltext_ut.cpp index 52f585f6d936..fda611e85e9c 100644 --- a/ydb/core/kqp/ut/scheme/kqp_scheme_fulltext_ut.cpp +++ b/ydb/core/kqp/ut/scheme/kqp_scheme_fulltext_ut.cpp @@ -66,8 +66,7 @@ Y_UNIT_TEST_SUITE(KqpSchemeFulltext) { WITH (layout=flat, tokenizer=whitespace, use_filter_lowercase=true) )"; auto result = session.ExecuteSchemeQuery(query).ExtractValueSync(); - // TODO: implement build index - UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::GENERIC_ERROR, result.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); } } diff --git a/ydb/core/tx/schemeshard/schemeshard_build_index.cpp b/ydb/core/tx/schemeshard/schemeshard_build_index.cpp index 8cb8de853d30..ab83d454b145 100644 --- a/ydb/core/tx/schemeshard/schemeshard_build_index.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_build_index.cpp @@ -58,6 +58,10 @@ void TSchemeShard::Handle(TEvDataShard::TEvValidateUniqueIndexResponse::TPtr& ev Execute(CreateTxReply(ev), ctx); } +void TSchemeShard::Handle(TEvDataShard::TEvBuildFulltextIndexResponse::TPtr& ev, const TActorContext& ctx) { + Execute(CreateTxReply(ev), ctx); +} + void TSchemeShard::Handle(TEvPrivate::TEvIndexBuildingMakeABill::TPtr& ev, const TActorContext& ctx) { Execute(CreateTxBilling(ev), ctx); } diff --git a/ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp b/ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp index d40ab4575f99..6744ef652b69 100644 --- a/ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp @@ -610,6 +610,20 @@ struct TSchemeShard::TIndexBuilder::TTxProgress: public TSchemeShard::TIndexBuil : request.GetTabletId()); } + void FillBuildInfoColumns(TIndexBuildInfo& buildInfo, TTableColumns&& columns) { + buildInfo.FillIndexColumns.clear(); + buildInfo.FillIndexColumns.reserve(columns.Keys.size()); + for (const auto& x: columns.Keys) { + buildInfo.FillIndexColumns.emplace_back(x); + columns.Columns.erase(x); + } + buildInfo.FillDataColumns.clear(); + buildInfo.FillDataColumns.reserve(columns.Columns.size()); + for (const auto& x: columns.Columns) { + buildInfo.FillDataColumns.emplace_back(x); + } + } + void SendSampleKRequest(TShardIdx shardIdx, TIndexBuildInfo& buildInfo) { Y_ENSURE(buildInfo.IsBuildVectorIndex()); auto ev = MakeHolder(); @@ -821,19 +835,7 @@ struct TSchemeShard::TIndexBuilder::TTxProgress: public TSchemeShard::TIndexBuil buildInfo.TargetName = implTable.PathString(); const auto& implTableInfo = Self->Tables.at(implTable.Base()->PathId); - auto implTableColumns = NTableIndex::ExtractInfo(implTableInfo); - buildInfo.FillIndexColumns.clear(); - buildInfo.FillIndexColumns.reserve(implTableColumns.Keys.size()); - for (const auto& x: implTableColumns.Keys) { - buildInfo.FillIndexColumns.emplace_back(x); - implTableColumns.Columns.erase(x); - } - // TODO(mbkkt) why order doesn't matter? - buildInfo.FillDataColumns.clear(); - buildInfo.FillDataColumns.reserve(implTableColumns.Columns.size()); - for (const auto& x: implTableColumns.Columns) { - buildInfo.FillDataColumns.emplace_back(x); - } + FillBuildInfoColumns(buildInfo, NTableIndex::ExtractInfo(implTableInfo)); } *ev->Record.MutableIndexColumns() = { buildInfo.FillIndexColumns.begin(), @@ -899,6 +901,34 @@ struct TSchemeShard::TIndexBuilder::TTxProgress: public TSchemeShard::TIndexBuil LOG_N("TTxBuildProgress: TUploadSampleK: " << buildInfo); } + void SendBuildFulltextIndexRequest(TShardIdx shardIdx, TIndexBuildInfo& buildInfo) { + auto ev = MakeHolder(); + ev->Record.SetId(ui64(BuildId)); + + buildInfo.TablePathId.ToProto(ev->Record.MutablePathId()); + + if (buildInfo.TargetName.empty()) { + TPath implTable = GetBuildPath(Self, buildInfo, NTableIndex::ImplTable); + buildInfo.TargetName = implTable.PathString(); + + const auto& implTableInfo = Self->Tables.at(implTable.Base()->PathId); + FillBuildInfoColumns(buildInfo, NTableIndex::ExtractInfo(implTableInfo)); + } + ev->Record.SetIndexName(buildInfo.TargetName); + *ev->Record.MutableSettings() = std::get( + buildInfo.SpecializedIndexDescription).GetSettings(); + *ev->Record.MutableDataColumns() = { + buildInfo.FillDataColumns.begin(), + buildInfo.FillDataColumns.end() + }; + + auto shardId = FillScanRequestCommon(ev->Record, shardIdx, buildInfo); + + LOG_N("TTxBuildProgress: TEvBuildFulltextIndexRequest: " << ev->Record.ShortDebugString()); + + ToTabletSend.emplace(shardId, std::move(ev)); + } + void ClearAfterFill(const TActorContext& ctx, TIndexBuildInfo& buildInfo) { buildInfo.DoneShards = {}; buildInfo.InProgressShards = {}; @@ -1328,11 +1358,27 @@ struct TSchemeShard::TIndexBuilder::TTxProgress: public TSchemeShard::TIndexBuil return true; } + bool FillFulltextIndex(TIndexBuildInfo& buildInfo) { + LOG_D("FillFulltextIndex Start"); + + if (NoShardsAdded(buildInfo)) { + AddAllShards(buildInfo); + } + + auto done = SendToShards(buildInfo, [&](TShardIdx shardIdx) { SendBuildFulltextIndexRequest(shardIdx, buildInfo); }) && + buildInfo.DoneShards.size() == buildInfo.Shards.size(); + + if (done) { + LOG_D("FillFulltextIndex Done"); + } + + return done; + } + bool FillIndex(TTransactionContext& txc, TIndexBuildInfo& buildInfo) { - // About Level == 1, for now build index impl tables don't need snapshot, + // for now build index impl tables don't need snapshot, // because they're used only by build index - if (buildInfo.KMeans.Level == 1 && !buildInfo.SnapshotTxId) { - Y_ENSURE(!buildInfo.SnapshotStep); + if (!buildInfo.SnapshotTxId && GetShardsPath(buildInfo)->PathId == buildInfo.TablePathId) { Y_ENSURE(Self->TablesWithSnapshots.contains(buildInfo.TablePathId)); Y_ENSURE(Self->TablesWithSnapshots.at(buildInfo.TablePathId) == buildInfo.InitiateTxId); @@ -1357,8 +1403,10 @@ struct TSchemeShard::TIndexBuilder::TTxProgress: public TSchemeShard::TIndexBuil return FillVectorIndex(txc, buildInfo); case TIndexBuildInfo::EBuildKind::BuildPrefixedVectorIndex: return FillPrefixedVectorIndex(txc, buildInfo); + case TIndexBuildInfo::EBuildKind::BuildFulltext: + return FillFulltextIndex(buildInfo); default: - Y_ENSURE(false); + Y_ENSURE(false, buildInfo.InvalidBuildKind()); return true; } } @@ -1541,7 +1589,7 @@ struct TSchemeShard::TIndexBuilder::TTxProgress: public TSchemeShard::TIndexBuil } break; case TIndexBuildInfo::EState::LockBuild: - Y_ENSURE(buildInfo.IsBuildVectorIndex() || buildInfo.IsValidatingUniqueIndex()); + Y_ENSURE(buildInfo.IsBuildVectorIndex() && buildInfo.KMeans.Level > 1 || buildInfo.IsValidatingUniqueIndex()); if (buildInfo.ApplyTxId == InvalidTxId) { AllocateTxId(BuildId); } else if (buildInfo.ApplyTxStatus == NKikimrScheme::StatusSuccess) { @@ -1722,6 +1770,28 @@ struct TSchemeShard::TIndexBuilder::TTxProgress: public TSchemeShard::TIndexBuil return TSerializedTableRange{{&from, 1}, false, {&to, 1}, true}; } + TPath GetShardsPath(TIndexBuildInfo& buildInfo) { + switch (buildInfo.BuildKind) { + case TIndexBuildInfo::EBuildKind::BuildSecondaryIndex: + case TIndexBuildInfo::EBuildKind::BuildColumns: + case TIndexBuildInfo::EBuildKind::BuildFulltext: + return TPath::Init(buildInfo.TablePathId, Self); + case TIndexBuildInfo::EBuildKind::BuildSecondaryUniqueIndex: + return buildInfo.IsValidatingUniqueIndex() + ? GetBuildPath(Self, buildInfo, NTableIndex::ImplTable) + : TPath::Init(buildInfo.TablePathId, Self); + case TIndexBuildInfo::EBuildKind::BuildVectorIndex: + case TIndexBuildInfo::EBuildKind::BuildPrefixedVectorIndex: + if (buildInfo.KMeans.Level == 1) { + return TPath::Init(buildInfo.TablePathId, Self); + } else { + return GetBuildPath(Self, buildInfo, buildInfo.KMeans.ReadFrom()); + } + default: + Y_ENSURE(false, buildInfo.InvalidBuildKind()); + } + } + bool InitiateShards(NIceDb::TNiceDb& db, TIndexBuildInfo& buildInfo) { LOG_D("InitiateShards " << buildInfo.DebugString()); @@ -1730,23 +1800,17 @@ struct TSchemeShard::TIndexBuilder::TTxProgress: public TSchemeShard::TIndexBuil Y_ENSURE(buildInfo.InProgressShards.empty()); Y_ENSURE(buildInfo.DoneShards.empty()); - TTableInfo::TPtr table; - if (buildInfo.IsValidatingUniqueIndex()) { - auto path = GetBuildPath(Self, buildInfo, NTableIndex::ImplTable); - table = Self->Tables.at(path->PathId); - } else if (buildInfo.KMeans.Level == 1) { - table = Self->Tables.at(buildInfo.TablePathId); - } else { - auto path = GetBuildPath(Self, buildInfo, buildInfo.KMeans.ReadFrom()); - table = Self->Tables.at(path->PathId); - - if (!path.IsLocked()) { // lock is needed to prevent table shards from being split - ChangeState(buildInfo.Id, TIndexBuildInfo::EState::LockBuild); - Progress(buildInfo.Id); - return false; - } - Y_ENSURE(path.LockedBy() == buildInfo.LockTxId); + TPath path = GetShardsPath(buildInfo); + if (!path.IsLocked()) { // lock is needed to prevent table shards from being split + Y_ENSURE(buildInfo.IsBuildVectorIndex() && buildInfo.KMeans.Level > 1); + ChangeState(buildInfo.Id, TIndexBuildInfo::EState::LockBuild); + Progress(buildInfo.Id); + return false; } + Y_ENSURE(path.LockedBy() == buildInfo.LockTxId); + + TTableInfo::TPtr table = Self->Tables.at(path->PathId); + auto tableColumns = NTableIndex::ExtractInfo(table); // skip dropped columns // In case of unique index validation the real range will arrive after index validation for each shard: // it will describe the first and the last index keys for further validation. @@ -2259,6 +2323,13 @@ struct TSchemeShard::TIndexBuilder::TTxReplyValidateUniqueIndex: public TTxShard } }; +struct TSchemeShard::TIndexBuilder::TTxReplyFulltextIndex: public TTxShardReply { + TTxReplyFulltextIndex(TSelf* self, TEvDataShard::TEvBuildFulltextIndexResponse::TPtr& response) + : TTxShardReply(self, TIndexBuildId(response->Get()->Record.GetId()), response) + { + } +}; + struct TSchemeShard::TIndexBuilder::TTxReplyProgress: public TTxShardReply { explicit TTxReplyProgress(TSelf* self, TEvDataShard::TEvBuildIndexProgressResponse::TPtr& response) : TTxShardReply(self, TIndexBuildId(response->Get()->Record.GetId()), response) @@ -2750,6 +2821,10 @@ ITransaction* TSchemeShard::CreateTxReply(TEvDataShard::TEvValidateUniqueIndexRe return new TIndexBuilder::TTxReplyValidateUniqueIndex(this, response); } +ITransaction* TSchemeShard::CreateTxReply(TEvDataShard::TEvBuildFulltextIndexResponse::TPtr& response) { + return new TIndexBuilder::TTxReplyFulltextIndex(this, response); +} + ITransaction* TSchemeShard::CreatePipeRetry(TIndexBuildId indexBuildId, TTabletId tabletId) { return new TIndexBuilder::TTxReplyRetry(this, indexBuildId, tabletId); } diff --git a/ydb/core/tx/schemeshard/schemeshard_impl.cpp b/ydb/core/tx/schemeshard/schemeshard_impl.cpp index b94c04e214a1..bb140e49706c 100644 --- a/ydb/core/tx/schemeshard/schemeshard_impl.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_impl.cpp @@ -5378,6 +5378,7 @@ void TSchemeShard::StateWork(STFUNC_SIG) { HFuncTraced(TEvDataShard::TEvPrefixKMeansResponse, Handle); HFuncTraced(TEvIndexBuilder::TEvUploadSampleKResponse, Handle); HFuncTraced(TEvDataShard::TEvValidateUniqueIndexResponse, Handle); + HFuncTraced(TEvDataShard::TEvBuildFulltextIndexResponse, Handle); // } // NIndexBuilder //namespace NCdcStreamScan { diff --git a/ydb/core/tx/schemeshard/schemeshard_impl.h b/ydb/core/tx/schemeshard/schemeshard_impl.h index 6fc6d569ca56..00ac1acf0489 100644 --- a/ydb/core/tx/schemeshard/schemeshard_impl.h +++ b/ydb/core/tx/schemeshard/schemeshard_impl.h @@ -1569,6 +1569,7 @@ class TSchemeShard struct TTxReplyPrefixKMeans; struct TTxReplyUploadSample; struct TTxReplyValidateUniqueIndex; + struct TTxReplyFulltextIndex; struct TTxPipeReset; struct TTxBilling; @@ -1590,7 +1591,8 @@ class TSchemeShard NTabletFlatExecutor::ITransaction* CreateTxReply(TEvDataShard::TEvLocalKMeansResponse::TPtr& local); NTabletFlatExecutor::ITransaction* CreateTxReply(TEvDataShard::TEvPrefixKMeansResponse::TPtr& prefix); NTabletFlatExecutor::ITransaction* CreateTxReply(TEvIndexBuilder::TEvUploadSampleKResponse::TPtr& upload); - NTabletFlatExecutor::ITransaction* CreateTxReply(TEvDataShard::TEvValidateUniqueIndexResponse::TPtr& progress); + NTabletFlatExecutor::ITransaction* CreateTxReply(TEvDataShard::TEvValidateUniqueIndexResponse::TPtr& response); + NTabletFlatExecutor::ITransaction* CreateTxReply(TEvDataShard::TEvBuildFulltextIndexResponse::TPtr& response); NTabletFlatExecutor::ITransaction* CreatePipeRetry(TIndexBuildId indexBuildId, TTabletId tabletId); NTabletFlatExecutor::ITransaction* CreateTxBilling(TEvPrivate::TEvIndexBuildingMakeABill::TPtr& ev); @@ -1608,6 +1610,7 @@ class TSchemeShard void Handle(TEvDataShard::TEvPrefixKMeansResponse::TPtr& ev, const TActorContext& ctx); void Handle(TEvIndexBuilder::TEvUploadSampleKResponse::TPtr& ev, const TActorContext& ctx); void Handle(TEvDataShard::TEvValidateUniqueIndexResponse::TPtr& ev, const TActorContext& ctx); + void Handle(TEvDataShard::TEvBuildFulltextIndexResponse::TPtr& ev, const TActorContext& ctx); void Handle(TEvPrivate::TEvIndexBuildingMakeABill::TPtr& ev, const TActorContext& ctx); diff --git a/ydb/core/tx/schemeshard/schemeshard_info_types.h b/ydb/core/tx/schemeshard/schemeshard_info_types.h index 7e22800f2a45..048d2dfdc067 100644 --- a/ydb/core/tx/schemeshard/schemeshard_info_types.h +++ b/ydb/core/tx/schemeshard/schemeshard_info_types.h @@ -3433,19 +3433,20 @@ struct TIndexBuildInfo: public TSimpleRefCount { std::unique_ptr Clusters; TString DebugString() const { - auto result = TStringBuilder() << BuildKind; + auto result = TStringBuilder() << BuildKind << " " << State << "/" << SubState << " "; if (IsBuildVectorIndex()) { - result << " " - << KMeans.DebugString() << ", " + result << KMeans.DebugString() << ", " << "{ Rows = " << Sample.Rows.size() << ", Sample = " << Sample.State - << ", Clusters = " << Clusters->GetClusters().size() << " }, " - << "{ Done = " << DoneShards.size() - << ", ToUpload = " << ToUploadShards.size() - << ", InProgress = " << InProgressShards.size() << " }"; + << ", Clusters = " << Clusters->GetClusters().size() << " }, "; } + result + << "{ Done = " << DoneShards.size() + << ", ToUpload = " << ToUploadShards.size() + << ", InProgress = " << InProgressShards.size() << " }"; + return result; } @@ -3613,7 +3614,7 @@ struct TIndexBuildInfo: public TSimpleRefCount { indexInfo->Billed.SetReadRows(row.template GetValueOrDefault(0)); indexInfo->Billed.SetReadBytes(row.template GetValueOrDefault(0)); indexInfo->Billed.SetCpuTimeUs(row.template GetValueOrDefault(0)); - if (indexInfo->IsFillBuildIndex()) { + if (indexInfo->IsOldBuildIndex()) { TMeteringStatsHelper::TryFixOldFormat(indexInfo->Billed); } @@ -3622,7 +3623,7 @@ struct TIndexBuildInfo: public TSimpleRefCount { indexInfo->Processed.SetReadRows(row.template GetValueOrDefault(0)); indexInfo->Processed.SetReadBytes(row.template GetValueOrDefault(0)); indexInfo->Processed.SetCpuTimeUs(row.template GetValueOrDefault(0)); - if (indexInfo->IsFillBuildIndex()) { + if (indexInfo->IsOldBuildIndex()) { TMeteringStatsHelper::TryFixOldFormat(indexInfo->Processed); } @@ -3702,7 +3703,7 @@ struct TIndexBuildInfo: public TSimpleRefCount { shardStatus.Processed.SetReadRows(row.template GetValueOrDefault(0)); shardStatus.Processed.SetReadBytes(row.template GetValueOrDefault(0)); shardStatus.Processed.SetCpuTimeUs(row.template GetValueOrDefault(0)); - if (IsFillBuildIndex()) { + if (IsOldBuildIndex()) { TMeteringStatsHelper::TryFixOldFormat(shardStatus.Processed); } Processed += shardStatus.Processed; @@ -3712,8 +3713,13 @@ struct TIndexBuildInfo: public TSimpleRefCount { return CancelRequested; } - bool IsFillBuildIndex() const { - return IsBuildSecondaryIndex() || IsBuildSecondaryUniqueIndex() || IsBuildColumns(); + bool IsOldBuildIndex() const { + return IsBuildSecondaryIndex() || IsBuildColumns(); + } + + TString InvalidBuildKind() { + return TStringBuilder() << "Invalid index build kind " << static_cast(BuildKind) + << " for index type " << static_cast(IndexType); } bool IsBuildSecondaryIndex() const { @@ -3732,8 +3738,12 @@ struct TIndexBuildInfo: public TSimpleRefCount { return BuildKind == EBuildKind::BuildVectorIndex || IsBuildPrefixedVectorIndex(); } + bool IsBuildFulltextIndex() const { + return BuildKind == EBuildKind::BuildFulltext; + } + bool IsBuildIndex() const { - return IsBuildSecondaryIndex() || IsBuildSecondaryUniqueIndex() || IsBuildVectorIndex(); + return IsBuildSecondaryIndex() || IsBuildSecondaryUniqueIndex() || IsBuildVectorIndex() || IsBuildFulltextIndex(); } bool IsBuildColumns() const { diff --git a/ydb/core/tx/schemeshard/ut_helpers/helpers.cpp b/ydb/core/tx/schemeshard/ut_helpers/helpers.cpp index dd20f176b692..a82e9b25b1f8 100644 --- a/ydb/core/tx/schemeshard/ut_helpers/helpers.cpp +++ b/ydb/core/tx/schemeshard/ut_helpers/helpers.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -1883,13 +1884,17 @@ namespace NSchemeShardUT_Private { return { step, txId }; } - TEvIndexBuilder::TEvCreateRequest* CreateBuildIndexRequest(ui64 id, const TString& dbName, const TString& src, const TBuildIndexConfig& cfg) { + TEvIndexBuilder::TEvCreateRequest* CreateBuildIndexRequest(ui64 id, const TString& dbName, const TString& src, const Ydb::Table::TableIndex& index) { NKikimrIndexBuilder::TIndexBuildSettings settings; settings.set_source_path(src); settings.MutableScanSettings()->SetMaxBatchRows(1); settings.set_max_shards_in_flight(2); + *settings.mutable_index() = index; + return new TEvIndexBuilder::TEvCreateRequest(id, dbName, std::move(settings)); + } - Ydb::Table::TableIndex& index = *settings.mutable_index(); + TEvIndexBuilder::TEvCreateRequest* CreateBuildIndexRequest(ui64 id, const TString& dbName, const TString& src, const TBuildIndexConfig& cfg) { + Ydb::Table::TableIndex index; index.set_name(cfg.IndexName); *index.mutable_index_columns() = {cfg.IndexColumns.begin(), cfg.IndexColumns.end()}; *index.mutable_data_columns() = {cfg.DataColumns.begin(), cfg.DataColumns.end()}; @@ -1943,7 +1948,7 @@ namespace NSchemeShardUT_Private { UNIT_ASSERT_C(false, "Unknown index type: " << static_cast(cfg.IndexType)); } - return new TEvIndexBuilder::TEvCreateRequest(id, dbName, std::move(settings)); + return CreateBuildIndexRequest(id, dbName, src, index); } std::unique_ptr CreateBuildColumnRequest(ui64 id, const TString& dbName, const TString& src, const TString& columnName, const Ydb::TypedValue& literal) { @@ -1959,6 +1964,13 @@ namespace NSchemeShardUT_Private { return std::make_unique(id, dbName, std::move(settings)); } + void AsyncBuildIndex(TTestActorRuntime& runtime, ui64 id, ui64 schemeShard, const TString &dbName, const TString &src, const Ydb::Table::TableIndex& index) { + auto sender = runtime.AllocateEdgeActor(); + auto request = CreateBuildIndexRequest(id, dbName, src, index); + + ForwardToTablet(runtime, schemeShard, sender, request); + } + void AsyncBuildIndex(TTestActorRuntime& runtime, ui64 id, ui64 schemeShard, const TString &dbName, const TString &src, const TBuildIndexConfig &cfg) { auto sender = runtime.AllocateEdgeActor(); auto request = CreateBuildIndexRequest(id, dbName, src, cfg); @@ -2014,6 +2026,23 @@ namespace NSchemeShardUT_Private { << " issues was " << event->Record.GetIssues()); } + void TestBuildIndex(TTestActorRuntime& runtime, ui64 id, ui64 schemeShard, const TString &dbName, + const TString &src, const Ydb::Table::TableIndex& index, Ydb::StatusIds::StatusCode expectedStatus) + { + AsyncBuildIndex(runtime, id, schemeShard, dbName, src, index); + + TAutoPtr handle; + TEvIndexBuilder::TEvCreateResponse* event = runtime.GrabEdgeEvent(handle); + UNIT_ASSERT(event); + + Cerr << "BUILDINDEX RESPONSE CREATE: " << event->ToString() << Endl; + UNIT_ASSERT_EQUAL_C(event->Record.GetStatus(), expectedStatus, + "status mismatch" + << " got " << Ydb::StatusIds::StatusCode_Name(event->Record.GetStatus()) + << " expected " << Ydb::StatusIds::StatusCode_Name(expectedStatus) + << " issues was " << event->Record.GetIssues()); + } + void TestBuildIndex(TTestActorRuntime& runtime, ui64 id, ui64 schemeShard, const TString &dbName, const TString &src, const TBuildIndexConfig& cfg, Ydb::StatusIds::StatusCode expectedStatus) { @@ -2866,6 +2895,29 @@ namespace NSchemeShardUT_Private { return result; } + TVector ReadShards(TTestActorRuntime& runtime, ui64 schemeshardId, const TString& table) { + auto pathDesc = DescribePath(runtime, schemeshardId, table, true, false, true); + auto tableDesc = pathDesc.GetPathDescription().GetTable(); + TVector columns; + for (const auto& c : tableDesc.GetColumns()) { + columns.push_back(c.GetName()); + } + TVector shardRows; + for (const auto& partition : pathDesc.GetPathDescription().GetTablePartitions()) { + auto result = ReadTable(runtime, partition.GetDatashardId(), tableDesc.GetName(), + {tableDesc.GetKeyColumnNames().begin(), tableDesc.GetKeyColumnNames().end()}, {columns.begin(), columns.end()}); + + TStringStream ysonStream; + NYson::TYsonWriter writer(&ysonStream, NYson::EYsonFormat::Text); + NYql::IDataProvider::TFillSettings fillSettings; + bool truncated; + KikimrResultToYson(ysonStream, writer, result, {}, fillSettings, truncated); + UNIT_ASSERT(!truncated); + shardRows.push_back(ysonStream.Str()); + } + return shardRows; + } + ui32 CountRows(TTestActorRuntime& runtime, ui64 schemeshardId, const TString& table) { auto tableDesc = DescribePath(runtime, schemeshardId, table, true, false, true); const auto& pathDesc = tableDesc.GetPathDescription(); diff --git a/ydb/core/tx/schemeshard/ut_helpers/helpers.h b/ydb/core/tx/schemeshard/ut_helpers/helpers.h index d0dfc1c68f99..d0fe94642c4c 100644 --- a/ydb/core/tx/schemeshard/ut_helpers/helpers.h +++ b/ydb/core/tx/schemeshard/ut_helpers/helpers.h @@ -422,14 +422,17 @@ namespace NSchemeShardUT_Private { }; std::unique_ptr CreateBuildColumnRequest(ui64 id, const TString& dbName, const TString& src, const TString& columnName, const Ydb::TypedValue& literal); + TEvIndexBuilder::TEvCreateRequest* CreateBuildIndexRequest(ui64 id, const TString& dbName, const TString& src, const Ydb::Table::TableIndex& index); TEvIndexBuilder::TEvCreateRequest* CreateBuildIndexRequest(ui64 id, const TString& dbName, const TString& src, const TBuildIndexConfig& cfg); void AsyncBuildColumn(TTestActorRuntime& runtime, ui64 id, ui64 schemeShard, const TString &dbName, const TString &src, const TString& columnName, const Ydb::TypedValue& literal); + void AsyncBuildIndex(TTestActorRuntime& runtime, ui64 id, ui64 schemeShard, const TString &dbName, const TString &src, const Ydb::Table::TableIndex& index); void AsyncBuildIndex(TTestActorRuntime& runtime, ui64 id, ui64 schemeShard, const TString &dbName, const TString &src, const TBuildIndexConfig &cfg); void AsyncBuildIndex(TTestActorRuntime& runtime, ui64 id, ui64 schemeShard, const TString &dbName, const TString &src, const TString &name, TVector columns, TVector dataColumns = {}); void AsyncBuildUniqIndex(TTestActorRuntime& runtime, ui64 id, ui64 schemeShard, const TString &dbName, const TString &src, const TString &name, TVector columns, TVector dataColumns = {}); void AsyncBuildVectorIndex(TTestActorRuntime& runtime, ui64 id, ui64 schemeShard, const TString &dbName, const TString &src, const TString &name, TVector columns, TVector dataColumns = {}); void TestBuildColumn(TTestActorRuntime& runtime, ui64 id, ui64 schemeShard, const TString &dbName, const TString &src, const TString& columnName, const Ydb::TypedValue& literal, Ydb::StatusIds::StatusCode expectedStatus); + void TestBuildIndex(TTestActorRuntime& runtime, ui64 id, ui64 schemeShard, const TString &dbName, const TString &src, const Ydb::Table::TableIndex& index, Ydb::StatusIds::StatusCode expectedStatus = Ydb::StatusIds::SUCCESS); void TestBuildIndex(TTestActorRuntime& runtime, ui64 id, ui64 schemeShard, const TString &dbName, const TString &src, const TBuildIndexConfig &cfg, Ydb::StatusIds::StatusCode expectedStatus = Ydb::StatusIds::SUCCESS); void TestBuildIndex(TTestActorRuntime& runtime, ui64 id, ui64 schemeShard, const TString &dbName, const TString &src, const TString &name, TVector columns, Ydb::StatusIds::StatusCode expectedStatus = Ydb::StatusIds::SUCCESS); void TestBuildUniqIndex(TTestActorRuntime& runtime, ui64 id, ui64 schemeShard, const TString &dbName, const TString &src, const TString &name, TVector columns, Ydb::StatusIds::StatusCode expectedStatus = Ydb::StatusIds::SUCCESS); @@ -718,6 +721,7 @@ namespace NSchemeShardUT_Private { NKikimrMiniKQL::TResult ReadTable(TTestActorRuntime& runtime, ui64 tabletId, const TString& table, const TVector& pk, const TVector& columns, const TString& rangeFlags = ""); + TVector ReadShards(TTestActorRuntime& runtime, ui64 schemeshardId, const TString& table); ui32 CountRows(TTestActorRuntime& runtime, ui64 schemeshardId, const TString& table); ui32 CountRows(TTestActorRuntime& runtime, const TString& table); diff --git a/ydb/core/tx/schemeshard/ut_index_build/ut_fulltext_build.cpp b/ydb/core/tx/schemeshard/ut_index_build/ut_fulltext_build.cpp new file mode 100644 index 000000000000..de6dc02c21a1 --- /dev/null +++ b/ydb/core/tx/schemeshard/ut_index_build/ut_fulltext_build.cpp @@ -0,0 +1,92 @@ +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +using namespace NKikimr; +using namespace NSchemeShard; +using namespace NSchemeShardUT_Private; + +Y_UNIT_TEST_SUITE(FulltextIndexBuildTest) { + + Y_UNIT_TEST(Basic) { + TTestBasicRuntime runtime; + TTestEnv env(runtime); + ui64 txId = 100; + + runtime.SetLogPriority(NKikimrServices::TX_DATASHARD, NLog::PRI_TRACE); + runtime.SetLogPriority(NKikimrServices::BUILD_INDEX, NLog::PRI_TRACE); + + TestCreateTable(runtime, ++txId, "/MyRoot", R"( + Name: "texts" + Columns { Name: "id" Type: "Uint64" } + Columns { Name: "text" Type: "String" } + Columns { Name: "data" Type: "String" } + KeyColumnNames: [ "id" ] + )"); + env.TestWaitNotification(runtime, txId); + + auto fnWriteRow = [&] (ui64 id, TString text, TString data) { + TString writeQuery = Sprintf(R"( + ( + (let key '( '('id (Uint64 '%u) ) ) ) + (let row '( '('text (String '"%s") ) '('data (String '"%s") ) ) ) + (return (AsList (UpdateRow '__user__texts key row) )) + ) + )", id, text.c_str(), data.c_str()); + + NKikimrMiniKQL::TResult result; + TString err; + NKikimrProto::EReplyStatus status = LocalMiniKQL(runtime, TTestTxConfig::FakeHiveTablets, writeQuery, result, err); + UNIT_ASSERT_VALUES_EQUAL_C(status, NKikimrProto::EReplyStatus::OK, err); + }; + + fnWriteRow(1, "green apple", "one"); + fnWriteRow(2, "red apple", "two"); + fnWriteRow(3, "yellow apple", "three"); + fnWriteRow(4, "red car", "four"); + + Ydb::Table::TableIndex index; + index.set_name("fulltext_idx"); + index.add_index_columns("text"); + index.add_data_columns("data"); + auto& fulltext = *index.mutable_global_fulltext_index()->mutable_fulltext_settings(); + fulltext.set_layout(Ydb::Table::FulltextIndexSettings::FLAT); + auto& analyzers = *fulltext.add_columns()->mutable_analyzers(); + fulltext.mutable_columns()->at(0).set_column("text"); + analyzers.set_tokenizer(Ydb::Table::FulltextIndexSettings::WHITESPACE); + + const ui64 buildIndexTx = ++txId; + TestBuildIndex(runtime, buildIndexTx, TTestTxConfig::SchemeShard, "/MyRoot", "/MyRoot/texts", index); + env.TestWaitNotification(runtime, buildIndexTx); + + { + auto buildIndexOperation = TestGetBuildIndex(runtime, TTestTxConfig::SchemeShard, "/MyRoot", buildIndexTx); + UNIT_ASSERT_VALUES_EQUAL_C( + buildIndexOperation.GetIndexBuild().GetState(), Ydb::Table::IndexBuildState::STATE_DONE, + buildIndexOperation.DebugString() + ); + } + + auto rows = ReadShards(runtime, TTestTxConfig::SchemeShard, "/MyRoot/texts/fulltext_idx/indexImplTable").at(0); + UNIT_ASSERT_VALUES_EQUAL("[[[[" + R"(["apple";["one"];["1"]];)" + R"(["apple";["two"];["2"]];)" + R"(["apple";["three"];["3"]];)" + R"(["car";["four"];["4"]];)" + R"(["green";["one"];["1"]];)" + R"(["red";["two"];["2"]];)" + R"(["red";["four"];["4"]];)" + R"(["yellow";["three"];["3"]]];)" + "%false]]]", rows); + } + +} diff --git a/ydb/core/tx/schemeshard/ut_index_build/ya.make b/ydb/core/tx/schemeshard/ut_index_build/ya.make index bea890191a49..10294cf81465 100644 --- a/ydb/core/tx/schemeshard/ut_index_build/ya.make +++ b/ydb/core/tx/schemeshard/ut_index_build/ya.make @@ -8,6 +8,7 @@ PEERDIR( library/cpp/getopt library/cpp/regex/pcre library/cpp/svnversion + ydb/core/kqp/ut/common ydb/core/metering ydb/core/testlib/default ydb/core/tx @@ -19,6 +20,7 @@ YQL_LAST_ABI_VERSION() SRCS( ut_schemeshard_build_index_helpers.cpp + ut_fulltext_build.cpp ut_index_build.cpp ut_vector_index_build.cpp )