From 5a188c2510d38d096a565e6043a5c23a9c0c0442 Mon Sep 17 00:00:00 2001
From: kungasc <kungasc@ydb.tech>
Date: Mon, 8 Sep 2025 17:56:06 +0300
Subject: [PATCH 1/4] Support multiple columns in fulltext index

---
 ydb/core/tx/schemeshard/schemeshard_utils.h   |  12 +
 .../tx/schemeshard/ut_helpers/ls_checks.cpp   |  42 +++-
 .../ut_index/ut_fulltext_index.cpp            | 205 ++++++++++++++----
 ydb/public/api/protos/ydb_table.proto         |  85 +++++---
 4 files changed, 258 insertions(+), 86 deletions(-)

diff --git a/ydb/core/tx/schemeshard/schemeshard_utils.h b/ydb/core/tx/schemeshard/schemeshard_utils.h
index d23dfbdb89a2..cf299e6564ae 100644
--- a/ydb/core/tx/schemeshard/schemeshard_utils.h
+++ b/ydb/core/tx/schemeshard/schemeshard_utils.h
@@ -194,6 +194,18 @@ bool CommonCheck(const TTableDesc& tableDesc, const NKikimrSchemeOp::TIndexCreat
                 error = TStringBuilder() << "fulltext index can only have a single key text column";
                 return false;
             }
+            if (indexDesc.GetFulltextIndexDescription().GetSettings().Getcolumns().size() != 1) {
+                status = NKikimrScheme::EStatus::StatusInvalidParameter;
+                error = TStringBuilder() << "fulltext index should have single '" << indexKeys.KeyColumns.at(0) << "' column settings"
+                    << " but have " << indexDesc.GetFulltextIndexDescription().GetSettings().Getcolumns().size() << " of them";
+                return false;
+            }
+            if (indexDesc.GetFulltextIndexDescription().GetSettings().Getcolumns().at(0).Getcolumn() != indexKeys.KeyColumns.at(0)) {
+                status = NKikimrScheme::EStatus::StatusInvalidParameter;
+                error = TStringBuilder() << "fulltext index should have '" << indexKeys.KeyColumns.at(0) << "' column settings"
+                    << " but have '" << indexDesc.GetFulltextIndexDescription().GetSettings().Getcolumns().at(0).Getcolumn() << "' column settings";
+                return false;
+            }
     
             const TString& indexColumnName = indexKeys.KeyColumns.back();
             Y_ABORT_UNLESS(baseColumnTypes.contains(indexColumnName));
diff --git a/ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp b/ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp
index a75939072c02..cd06a10a1909 100644
--- a/ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp
+++ b/ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp
@@ -1,5 +1,6 @@
 #include "ls_checks.h"
 
+#include <google/protobuf/text_format.h>
 #include <ydb/public/api/protos/ydb_cms.pb.h>
 #include <ydb/public/api/protos/ydb_coordination.pb.h>
 #include <ydb/public/lib/scheme_types/scheme_type_id.h>
@@ -914,20 +915,37 @@ TCheckFunc KMeansTreeDescription(Ydb::Table::VectorIndexSettings_Metric metric,
 
 TCheckFunc SpecializedIndexDescription(const TString& proto) {
     return [=] (const NKikimrScheme::TEvDescribeSchemeResult& record) {
-        TString actual;
         switch (record.GetPathDescription().GetTableIndex().GetSpecializedIndexDescriptionCase()) {
-        case NKikimrSchemeOp::TIndexDescription::kVectorIndexKmeansTreeDescription:
-            actual = record.GetPathDescription().GetTableIndex().GetVectorIndexKmeansTreeDescription().GetSettings().ShortDebugString();
-            break;
-        case NKikimrSchemeOp::TIndexDescription::kFulltextIndexDescription:
-            actual = record.GetPathDescription().GetTableIndex().GetFulltextIndexDescription().GetSettings().ShortDebugString();
-            break;
-        case NKikimrSchemeOp::TIndexDescription::SPECIALIZEDINDEXDESCRIPTION_NOT_SET:
-            actual = "SPECIALIZEDINDEXDESCRIPTION_NOT_SET";
-            break;
+            case NKikimrSchemeOp::TIndexDescription::kVectorIndexKmeansTreeDescription: {
+                auto actual = record.GetPathDescription().GetTableIndex().GetVectorIndexKmeansTreeDescription().GetSettings();
+                Ydb::Table::KMeansTreeSettings expected;
+                UNIT_ASSERT(google::protobuf::TextFormat::ParseFromString(proto, &expected));
+                UNIT_ASSERT_C(google::protobuf::util::MessageDifferencer::Equals(actual, expected),
+                    TStringBuilder() << "Expected"
+                        << expected.ShortDebugString()
+                        << " but got "
+                        << actual.ShortDebugString());
+                break;
+            }
+            case NKikimrSchemeOp::TIndexDescription::kFulltextIndexDescription: {
+                auto actual = record.GetPathDescription().GetTableIndex().GetFulltextIndexDescription().GetSettings();
+                Ydb::Table::FulltextIndexSettings expected;
+                UNIT_ASSERT(google::protobuf::TextFormat::ParseFromString(proto, &expected));
+                UNIT_ASSERT_C(google::protobuf::util::MessageDifferencer::Equals(actual, expected),
+                    TStringBuilder() << "Expected"
+                        << expected.ShortDebugString()
+                        << " but got "
+                        << actual.ShortDebugString());
+                break;
+            }
+            case NKikimrSchemeOp::TIndexDescription::SPECIALIZEDINDEXDESCRIPTION_NOT_SET: {
+                UNIT_ASSERT_C(proto == "SPECIALIZEDINDEXDESCRIPTION_NOT_SET",
+                    TStringBuilder() << "Expected"
+                        << proto
+                        << " but got SPECIALIZEDINDEXDESCRIPTION_NOT_SET");
+                break;
+            }
         }
-
-        UNIT_ASSERT_VALUES_EQUAL(actual, proto);
     };
 }
 
diff --git a/ydb/core/tx/schemeshard/ut_index/ut_fulltext_index.cpp b/ydb/core/tx/schemeshard/ut_index/ut_fulltext_index.cpp
index e9f772e8e3fc..6c147d7af6ad 100644
--- a/ydb/core/tx/schemeshard/ut_index/ut_fulltext_index.cpp
+++ b/ydb/core/tx/schemeshard/ut_index/ut_fulltext_index.cpp
@@ -18,23 +18,38 @@ Y_UNIT_TEST_SUITE(TFulltextIndexTests) {
         TTestEnv env(runtime);
         ui64 txId = 100;
 
-        TestCreateIndexedTable(runtime, ++txId, "/MyRoot", R"(
+        TString fulltextSettings = R"(
+            layout: FLAT
+            columns: {
+                column: "text"
+                settings: {
+                    tokenizer: STANDARD
+                    use_filter_ngram: true
+                    filter_ngram_max_length: 42
+                }
+            }
+        )";
+        TestCreateIndexedTable(runtime, ++txId, "/MyRoot", Sprintf(R"(
             TableDescription {
-              Name: "texts"
-              Columns { Name: "id" Type: "Uint64" }
-              Columns { Name: "text" Type: "String" }
-              Columns { Name: "covered" Type: "String" }
-              Columns { Name: "another" Type: "Uint64" }
-              KeyColumnNames: ["id"]
+                Name: "texts"
+                Columns { Name: "id" Type: "Uint64" }
+                Columns { Name: "text" Type: "String" }
+                Columns { Name: "covered" Type: "String" }
+                Columns { Name: "another" Type: "Uint64" }
+                KeyColumnNames: ["id"]
             }
             IndexDescription {
-              Name: "idx_fulltext"
-              KeyColumnNames: ["text"]
-              DataColumnNames: ["covered"]
-              Type: EIndexTypeGlobalFulltext
-              FulltextIndexDescription: { Settings: { layout: FLAT, tokenizer: STANDARD, use_filter_ngram: true, filter_ngram_max_length: 42 } }
+                Name: "idx_fulltext"
+                KeyColumnNames: ["text"]
+                DataColumnNames: ["covered"]
+                Type: EIndexTypeGlobalFulltext
+                FulltextIndexDescription: { 
+                    Settings: { 
+                        %s
+                    }
+                }
             }
-        )");
+        )", fulltextSettings.c_str()));
         env.TestWaitNotification(runtime, txId);
 
         NKikimrSchemeOp::TDescribeOptions opts;
@@ -49,7 +64,7 @@ Y_UNIT_TEST_SUITE(TFulltextIndexTests) {
                 NLs::IndexState(NKikimrSchemeOp::EIndexStateReady),
                 NLs::IndexKeys({"text"}),
                 NLs::IndexDataColumns({"covered"}),
-                NLs::SpecializedIndexDescription("layout: FLAT tokenizer: STANDARD use_filter_ngram: true filter_ngram_max_length: 42"),
+                NLs::SpecializedIndexDescription(fulltextSettings),
                 NLs::ChildrenCount(1),
             });
 
@@ -70,23 +85,38 @@ Y_UNIT_TEST_SUITE(TFulltextIndexTests) {
         TTestEnv env(runtime);
         ui64 txId = 100;
 
-        TestCreateIndexedTable(runtime, ++txId, "/MyRoot", R"(
+        TString fulltextSettings = R"(
+            layout: FLAT
+            columns: {
+                column: "text"
+                settings: {
+                    tokenizer: STANDARD
+                    use_filter_ngram: true
+                    filter_ngram_max_length: 42
+                }
+            }
+        )";
+        TestCreateIndexedTable(runtime, ++txId, "/MyRoot", Sprintf(R"(
             TableDescription {
-              Name: "texts"
-              Columns { Name: "id" Type: "Uint64" }
-              Columns { Name: "text" Type: "String" }
-              Columns { Name: "covered" Type: "String" }
-              Columns { Name: "another" Type: "Uint64" }
-              KeyColumnNames: [ "id"]
+                Name: "texts"
+                Columns { Name: "id" Type: "Uint64" }
+                Columns { Name: "text" Type: "String" }
+                Columns { Name: "covered" Type: "String" }
+                Columns { Name: "another" Type: "Uint64" }
+                KeyColumnNames: ["id"]
             }
             IndexDescription {
-              Name: "idx_fulltext"
-              KeyColumnNames: [ "another", "text"]
-              DataColumnNames: ["covered"]
-              Type: EIndexTypeGlobalFulltext
-              FulltextIndexDescription: { Settings: { layout: FLAT, tokenizer: STANDARD, use_filter_ngram: true, filter_ngram_max_length: 42 } }
+                Name: "idx_fulltext"
+                KeyColumnNames: [ "another", "text"]
+                DataColumnNames: ["covered"]
+                Type: EIndexTypeGlobalFulltext
+                FulltextIndexDescription: { 
+                    Settings: { 
+                        %s
+                    }
+                }
             }
-        )", {NKikimrScheme::StatusInvalidParameter});
+        )", fulltextSettings.c_str()), {NKikimrScheme::StatusInvalidParameter});
         env.TestWaitNotification(runtime, txId);
 
         TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/texts/idx_fulltext"),{ 
@@ -99,23 +129,118 @@ Y_UNIT_TEST_SUITE(TFulltextIndexTests) {
         TTestEnv env(runtime);
         ui64 txId = 100;
 
-        TestCreateIndexedTable(runtime, ++txId, "/MyRoot", R"(
+        TString fulltextSettings = R"(
+            layout: FLAT
+            columns: {
+                column: "text"
+                settings: {
+                    tokenizer: STANDARD
+                    use_filter_ngram: true
+                    filter_ngram_max_length: 42
+                }
+            }
+        )";
+        TestCreateIndexedTable(runtime, ++txId, "/MyRoot", Sprintf(R"(
+            TableDescription {
+                Name: "texts"
+                Columns { Name: "id" Type: "Uint64" }
+                Columns { Name: "text" Type: "Uint64" }
+                Columns { Name: "covered" Type: "String" }
+                Columns { Name: "another" Type: "Uint64" }
+                KeyColumnNames: ["id"]
+            }
+            IndexDescription {
+                Name: "idx_fulltext"
+                KeyColumnNames: ["text"]
+                DataColumnNames: ["covered"]
+                Type: EIndexTypeGlobalFulltext
+                FulltextIndexDescription: { 
+                    Settings: { 
+                        %s
+                    }
+                }
+            }
+        )", fulltextSettings.c_str()), {NKikimrScheme::StatusInvalidParameter});
+        env.TestWaitNotification(runtime, txId);
+
+        TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/texts/idx_fulltext"),{ 
+            NLs::PathNotExist,
+        });
+    }
+
+    Y_UNIT_TEST(CreateTableColumnsMismatch) {
+        TTestBasicRuntime runtime;
+        TTestEnv env(runtime);
+        ui64 txId = 100;
+
+        TString fulltextSettings = R"(
+            layout: FLAT
+            columns: {
+                column: "text_wrong"
+                settings: {
+                    tokenizer: STANDARD
+                    use_filter_ngram: true
+                    filter_ngram_max_length: 42
+                }
+            }
+        )";
+        TestCreateIndexedTable(runtime, ++txId, "/MyRoot", Sprintf(R"(
+            TableDescription {
+                Name: "texts"
+                Columns { Name: "id" Type: "Uint64" }
+                Columns { Name: "text" Type: "String" }
+                Columns { Name: "covered" Type: "String" }
+                Columns { Name: "another" Type: "Uint64" }
+                KeyColumnNames: ["id"]
+            }
+            IndexDescription {
+                Name: "idx_fulltext"
+                KeyColumnNames: ["text"]
+                DataColumnNames: ["covered"]
+                Type: EIndexTypeGlobalFulltext
+                FulltextIndexDescription: { 
+                    Settings: { 
+                        %s
+                    }
+                }
+            }
+        )", fulltextSettings.c_str()), {NKikimrScheme::StatusInvalidParameter});
+        env.TestWaitNotification(runtime, txId);
+
+        TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/texts/idx_fulltext"),{ 
+            NLs::PathNotExist,
+        });
+    }
+
+    Y_UNIT_TEST(CreateTableNoColumnsSettings) {
+        TTestBasicRuntime runtime;
+        TTestEnv env(runtime);
+        ui64 txId = 100;
+
+        TString fulltextSettings = R"(
+            layout: FLAT
+        )";
+        TestCreateIndexedTable(runtime, ++txId, "/MyRoot", Sprintf(R"(
             TableDescription {
-              Name: "texts"
-              Columns { Name: "id" Type: "Uint64" }
-              Columns { Name: "text" Type: "Uint64" }
-              Columns { Name: "covered" Type: "String" }
-              Columns { Name: "another" Type: "Uint64" }
-              KeyColumnNames: ["id"]
+                Name: "texts"
+                Columns { Name: "id" Type: "Uint64" }
+                Columns { Name: "text" Type: "String" }
+                Columns { Name: "covered" Type: "String" }
+                Columns { Name: "another" Type: "Uint64" }
+                KeyColumnNames: ["id"]
             }
             IndexDescription {
-              Name: "idx_fulltext"
-              KeyColumnNames: ["text"]
-              DataColumnNames: ["covered"]
-              Type: EIndexTypeGlobalFulltext
-              FulltextIndexDescription: { Settings: { layout: FLAT, tokenizer: STANDARD, use_filter_ngram: true, filter_ngram_max_length: 42 } }
+                Name: "idx_fulltext"
+                KeyColumnNames: ["text"]
+                DataColumnNames: ["covered"]
+                Type: EIndexTypeGlobalFulltext
+                FulltextIndexDescription: { 
+                    Settings: { 
+                        %s
+                    }
+                }
             }
-        )", {NKikimrScheme::StatusInvalidParameter});
+        )", fulltextSettings.c_str()), {NKikimrScheme::StatusInvalidParameter});
         env.TestWaitNotification(runtime, txId);
 
         TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/texts/idx_fulltext"),{ 
diff --git a/ydb/public/api/protos/ydb_table.proto b/ydb/public/api/protos/ydb_table.proto
index ea0bb46f00d5..5f784c654794 100644
--- a/ydb/public/api/protos/ydb_table.proto
+++ b/ydb/public/api/protos/ydb_table.proto
@@ -113,35 +113,7 @@ message GlobalVectorKMeansTreeIndex {
     KMeansTreeSettings vector_settings = 3;
 }
 
-message FulltextIndexSettings {
-    // Specifies the layout strategy for storing and updating the full-text index
-    enum Layout {
-        LAYOUT_UNSPECIFIED = 0;
-
-        // Uses a single flat inverted index table (indexImplTable)
-        // Example source table:
-        //     ┌────┬────────────────────────────┐
-        //     │ id │ text                       │
-        //     ├────┼────────────────────────────┤
-        //     │ 1  │ "The quick brown fox"      │
-        //     │ 2  │ "The quick blue hare"      │
-        //     └────┴────────────────────────────┘
-        // Example inverted index table (indexImplTable):
-        //     ┌──────────────┬────┐
-        //     │ __ydb_token  │ id │
-        //     ├──────────────┼────┤
-        //     │ "blue"       │ 2  │
-        //     │ "brown"      │ 1  │
-        //     │ "fox"        │ 1  │
-        //     │ "hare"       │ 2  │
-        //     │ "quick"      │ 1  │
-        //     │ "quick"      │ 2  │
-        //     │ "The"        │ 1  │
-        //     │ "The"        │ 2  │
-        //     └──────────────┴────┘
-        FLAT = 1;
-    }
-
+message FulltextIndexAnalyzerSettings {
     // Specifies how text is tokenized during indexing
     enum Tokenizer {
         TOKENIZER_UNSPECIFIED = 0;
@@ -168,16 +140,13 @@ message FulltextIndexSettings {
         KEYWORD = 3;
     }
 
-    // See Layout enum
-    Layout layout = 1;
-
     // See Tokenizer enum
-    Tokenizer tokenizer = 2;
+    Tokenizer tokenizer = 1;
 
     // Language used for language-sensitive operations like stopword filtering
     // Example: language = "english"
     // By default is not specified and no language-specific logic is applied
-    string language = 3;
+    string language = 2;
 
     // Whether to convert tokens to lowercase
     // Example:
@@ -231,6 +200,54 @@ message FulltextIndexSettings {
     int32 filter_length_max = 132 [(Ydb.value) = ">= 0"];
 }
 
+// Represents fulltext index settings for a single column
+message FulltextIndexColumnSettings {
+    // Name of the column to be indexed
+    string column = 1;
+
+    // Fulltext index analyzer settings specific to this column
+    FulltextIndexAnalyzerSettings settings = 2;
+}
+
+message FulltextIndexSettings {
+    // Specifies the layout strategy for storing and updating the full-text index
+    enum Layout {
+        LAYOUT_UNSPECIFIED = 0;
+
+        // Uses a single flat inverted index table (indexImplTable)
+        // Example source table:
+        //     ┌────┬────────────────────────────┐
+        //     │ id │ text                       │
+        //     ├────┼────────────────────────────┤
+        //     │ 1  │ "The quick brown fox"      │
+        //     │ 2  │ "The quick blue hare"      │
+        //     └────┴────────────────────────────┘
+        // Example inverted index table (indexImplTable):
+        //     ┌──────────────┬────┐
+        //     │ __ydb_token  │ id │
+        //     ├──────────────┼────┤
+        //     │ "blue"       │ 2  │
+        //     │ "brown"      │ 1  │
+        //     │ "fox"        │ 1  │
+        //     │ "hare"       │ 2  │
+        //     │ "quick"      │ 1  │
+        //     │ "quick"      │ 2  │
+        //     │ "The"        │ 1  │
+        //     │ "The"        │ 2  │
+        //     └──────────────┴────┘
+        // Supports single column only
+        FLAT = 1;
+    }
+
+    // See Layout enum
+    Layout layout = 1;
+
+    // List of columns and their fulltext settings
+    // Currently, this list should contain a single entry
+    // And provided column should be the only one in the TableIndex.index_columns list
+    repeated FulltextIndexColumnSettings columns = 2;
+}
+
 message GlobalFulltextIndex {
     GlobalIndexSettings settings = 1;
     FulltextIndexSettings fulltext_settings = 2;

From ca1ada5ea1dab9c7e8f79ec6f317335477f4f9b2 Mon Sep 17 00:00:00 2001
From: kungasc <kungasc@ydb.tech>
Date: Tue, 9 Sep 2025 11:55:22 +0300
Subject: [PATCH 2/4] better structure inside FulltextIndexSettings

---
 .../ut_index/ut_fulltext_index.cpp            |   8 +-
 ydb/public/api/protos/ydb_table.proto         | 195 +++++++++---------
 2 files changed, 102 insertions(+), 101 deletions(-)

diff --git a/ydb/core/tx/schemeshard/ut_index/ut_fulltext_index.cpp b/ydb/core/tx/schemeshard/ut_index/ut_fulltext_index.cpp
index 6c147d7af6ad..4cf0e1cacc6f 100644
--- a/ydb/core/tx/schemeshard/ut_index/ut_fulltext_index.cpp
+++ b/ydb/core/tx/schemeshard/ut_index/ut_fulltext_index.cpp
@@ -22,7 +22,7 @@ Y_UNIT_TEST_SUITE(TFulltextIndexTests) {
             layout: FLAT
             columns: {
                 column: "text"
-                settings: {
+                analyzers: {
                     tokenizer: STANDARD
                     use_filter_ngram: true
                     filter_ngram_max_length: 42
@@ -89,7 +89,7 @@ Y_UNIT_TEST_SUITE(TFulltextIndexTests) {
             layout: FLAT
             columns: {
                 column: "text"
-                settings: {
+                analyzers: {
                     tokenizer: STANDARD
                     use_filter_ngram: true
                     filter_ngram_max_length: 42
@@ -133,7 +133,7 @@ Y_UNIT_TEST_SUITE(TFulltextIndexTests) {
             layout: FLAT
             columns: {
                 column: "text"
-                settings: {
+                analyzers: {
                     tokenizer: STANDARD
                     use_filter_ngram: true
                     filter_ngram_max_length: 42
@@ -177,7 +177,7 @@ Y_UNIT_TEST_SUITE(TFulltextIndexTests) {
             layout: FLAT
             columns: {
                 column: "text_wrong"
-                settings: {
+                analyzers: {
                     tokenizer: STANDARD
                     use_filter_ngram: true
                     filter_ngram_max_length: 42
diff --git a/ydb/public/api/protos/ydb_table.proto b/ydb/public/api/protos/ydb_table.proto
index 5f784c654794..59dc37224a0f 100644
--- a/ydb/public/api/protos/ydb_table.proto
+++ b/ydb/public/api/protos/ydb_table.proto
@@ -113,102 +113,6 @@ message GlobalVectorKMeansTreeIndex {
     KMeansTreeSettings vector_settings = 3;
 }
 
-message FulltextIndexAnalyzerSettings {
-    // Specifies how text is tokenized during indexing
-    enum Tokenizer {
-        TOKENIZER_UNSPECIFIED = 0;
-
-        // Splits text only by whitespace
-        // Does not split on punctuation
-        // Example:
-        //   Text: "foo-bar baz_lorem ipsum"
-        //   Tokens: ["foo-bar", "baz_lorem", "ipsum"]
-        WHITESPACE = 1;
-
-        // Applies general language-aware tokenization
-        // Splits text on whitespace and punctuation
-        // Example:
-        //   Text: "foo-bar baz_lorem ipsum"
-        //   Tokens: ["foo", "bar", "baz", "lorem", "ipsum"]
-        STANDARD = 2;
-
-        // Treats the entire input as a single token
-        // No splitting is performed
-        // Example:
-        //   Text: "Hello World!"
-        //   Tokens: ["Hello World!"]
-        KEYWORD = 3;
-    }
-
-    // See Tokenizer enum
-    Tokenizer tokenizer = 1;
-
-    // Language used for language-sensitive operations like stopword filtering
-    // Example: language = "english"
-    // By default is not specified and no language-specific logic is applied
-    string language = 2;
-
-    // Whether to convert tokens to lowercase
-    // Example:
-    //   Token: "Quick"
-    //   Output: "quick"
-    bool use_filter_lowercase = 100;
-
-    // Whether to remove common stopwords like "the", "a", "is"
-    // Example: language = "english"
-    //   Tokens: ["the", "quick", "brown"]
-    //   Output: ["quick", "brown"]
-    bool use_filter_stopwords = 110;
-
-    // Whether to apply character n-gram indexing to each token
-    // Must be used with filter_ngram_min_length and filter_ngram_max_length
-    // Example: filter_ngram_min_length = 3, filter_ngram_max_length = 4
-    //   Token: "search"
-    //   Output: ["sea", "ear", "arc", "rch", "sear", "earc", "arch"]
-    bool use_filter_ngram = 120;
-
-    // Whether to apply edge n-gram indexing (prefix-based) to each token
-    // Used with filter_ngram_min_length and filter_ngram_max_length
-    // Example: filter_ngram_min_length = 3, filter_ngram_max_length = 4
-    //   Token: "search"
-    //   Output: ["sea", "sear"]
-    bool use_filter_edge_ngram = 121;
-
-    // Minimum length of n-grams to generate (inclusive)
-    // Must be used with use_filter_ngram or use_filter_edge_ngram
-    // Default value is 3
-    int32 filter_ngram_min_length = 122 [(Ydb.value) = ">= 0"];
-
-    // Maximum length of n-grams to generate (inclusive)
-    // Must be used with use_filter_ngram or use_filter_edge_ngram
-    // Default value is 4
-    int32 filter_ngram_max_length = 123 [(Ydb.value) = ">= 0"];
-
-    // Whether to filter tokens by their length
-    // Must be used with filter_length_min or filter_length_max
-    // Example: filter_length_min = 4, filter_length_max = 6
-    //   Tokens: ["foo", "fooba", "foobar", "foobarbaz"]
-    //   Output: ["fooba", "foobar"]
-    bool use_filter_length = 130;
-
-    // Minimum token length to keep (inclusive)
-    // Must be used with use_filter_length
-    int32 filter_length_min = 131 [(Ydb.value) = ">= 0"];
-
-    // Maximum token length to keep (inclusive)
-    // Must be used with use_filter_length
-    int32 filter_length_max = 132 [(Ydb.value) = ">= 0"];
-}
-
-// Represents fulltext index settings for a single column
-message FulltextIndexColumnSettings {
-    // Name of the column to be indexed
-    string column = 1;
-
-    // Fulltext index analyzer settings specific to this column
-    FulltextIndexAnalyzerSettings settings = 2;
-}
-
 message FulltextIndexSettings {
     // Specifies the layout strategy for storing and updating the full-text index
     enum Layout {
@@ -239,13 +143,110 @@ message FulltextIndexSettings {
         FLAT = 1;
     }
 
+    // Specifies how text is tokenized during indexing
+    enum Tokenizer {
+        TOKENIZER_UNSPECIFIED = 0;
+
+        // Splits text only by whitespace
+        // Does not split on punctuation
+        // Example:
+        //   Text: "foo-bar baz_lorem ipsum"
+        //   Tokens: ["foo-bar", "baz_lorem", "ipsum"]
+        WHITESPACE = 1;
+
+        // Applies general language-aware tokenization
+        // Splits text on whitespace and punctuation
+        // Example:
+        //   Text: "foo-bar baz_lorem ipsum"
+        //   Tokens: ["foo", "bar", "baz", "lorem", "ipsum"]
+        STANDARD = 2;
+
+        // Treats the entire input as a single token
+        // No splitting is performed
+        // Example:
+        //   Text: "Hello World!"
+        //   Tokens: ["Hello World!"]
+        KEYWORD = 3;
+    }
+
+    // Represents text analyzers settings
+    message Analyzers {
+        // See Tokenizer enum
+        Tokenizer tokenizer = 1;
+    
+        // Language used for language-sensitive operations like stopword filtering
+        // Example: language = "english"
+        // By default is not specified and no language-specific logic is applied
+        string language = 2;
+    
+        // Whether to convert tokens to lowercase
+        // Example:
+        //   Token: "Quick"
+        //   Output: "quick"
+        bool use_filter_lowercase = 100;
+    
+        // Whether to remove common stopwords like "the", "a", "is"
+        // Example: language = "english"
+        //   Tokens: ["the", "quick", "brown"]
+        //   Output: ["quick", "brown"]
+        bool use_filter_stopwords = 110;
+    
+        // Whether to apply character n-gram indexing to each token
+        // Must be used with filter_ngram_min_length and filter_ngram_max_length
+        // Example: filter_ngram_min_length = 3, filter_ngram_max_length = 4
+        //   Token: "search"
+        //   Output: ["sea", "ear", "arc", "rch", "sear", "earc", "arch"]
+        bool use_filter_ngram = 120;
+    
+        // Whether to apply edge n-gram indexing (prefix-based) to each token
+        // Used with filter_ngram_min_length and filter_ngram_max_length
+        // Example: filter_ngram_min_length = 3, filter_ngram_max_length = 4
+        //   Token: "search"
+        //   Output: ["sea", "sear"]
+        bool use_filter_edge_ngram = 121;
+    
+        // Minimum length of n-grams to generate (inclusive)
+        // Must be used with use_filter_ngram or use_filter_edge_ngram
+        // Default value is 3
+        int32 filter_ngram_min_length = 122 [(Ydb.value) = ">= 0"];
+    
+        // Maximum length of n-grams to generate (inclusive)
+        // Must be used with use_filter_ngram or use_filter_edge_ngram
+        // Default value is 4
+        int32 filter_ngram_max_length = 123 [(Ydb.value) = ">= 0"];
+    
+        // Whether to filter tokens by their length
+        // Must be used with filter_length_min or filter_length_max
+        // Example: filter_length_min = 4, filter_length_max = 6
+        //   Tokens: ["foo", "fooba", "foobar", "foobarbaz"]
+        //   Output: ["fooba", "foobar"]
+        bool use_filter_length = 130;
+    
+        // Minimum token length to keep (inclusive)
+        // Must be used with use_filter_length
+        int32 filter_length_min = 131 [(Ydb.value) = ">= 0"];
+    
+        // Maximum token length to keep (inclusive)
+        // Must be used with use_filter_length
+        int32 filter_length_max = 132 [(Ydb.value) = ">= 0"];
+    }
+
+    // Represents text analyzers settings for a specific column
+    message ColumnAnalyzers {
+        // Name of the column to be indexed
+        string column = 1;
+
+        // Analyzer settings specific to this column
+        Analyzers analyzers = 2;
+    }
+
     // See Layout enum
     Layout layout = 1;
 
     // List of columns and their fulltext settings
     // Currently, this list should contain a single entry
     // And provided column should be the only one in the TableIndex.index_columns list
-    repeated FulltextIndexColumnSettings columns = 2;
+    repeated ColumnAnalyzers columns = 2;
 }
 
 message GlobalFulltextIndex {

From ac8a824230f6724efc4adb43e96a5c08a8591e0c Mon Sep 17 00:00:00 2001
From: kungasc <kungasc@ydb.tech>
Date: Tue, 9 Sep 2025 11:59:43 +0300
Subject: [PATCH 3/4] fix typo

---
 ydb/public/api/protos/ydb_table.proto | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ydb/public/api/protos/ydb_table.proto b/ydb/public/api/protos/ydb_table.proto
index 59dc37224a0f..5237514c8b3e 100644
--- a/ydb/public/api/protos/ydb_table.proto
+++ b/ydb/public/api/protos/ydb_table.proto
@@ -139,7 +139,7 @@ message FulltextIndexSettings {
         //     │ "The"        │ 1  │
         //     │ "The"        │ 2  │
         //     └──────────────┴────┘
-        // Supports single column only
+        // Supports a single column only
         FLAT = 1;
     }
 

From fb8f9d8fac7bb2f8f3fdb0a070b472bc9f676ef7 Mon Sep 17 00:00:00 2001
From: kungasc <kungasc@ydb.tech>
Date: Tue, 9 Sep 2025 14:40:30 +0300
Subject: [PATCH 4/4] cr: add multiple columns tests

---
 .../ut_index/ut_fulltext_index.cpp            | 53 +++++++++++++++++++
 1 file changed, 53 insertions(+)

diff --git a/ydb/core/tx/schemeshard/ut_index/ut_fulltext_index.cpp b/ydb/core/tx/schemeshard/ut_index/ut_fulltext_index.cpp
index 4cf0e1cacc6f..26de835d2c67 100644
--- a/ydb/core/tx/schemeshard/ut_index/ut_fulltext_index.cpp
+++ b/ydb/core/tx/schemeshard/ut_index/ut_fulltext_index.cpp
@@ -124,6 +124,59 @@ Y_UNIT_TEST_SUITE(TFulltextIndexTests) {
         });
     }
 
+    Y_UNIT_TEST(CreateTableMultipleColumns) { // not supported for now, maybe later
+        TTestBasicRuntime runtime;
+        TTestEnv env(runtime);
+        ui64 txId = 100;
+
+        TString fulltextSettings = R"(
+            layout: FLAT
+            columns: {
+                column: "text1"
+                analyzers: {
+                    tokenizer: STANDARD
+                    use_filter_ngram: true
+                    filter_ngram_max_length: 42
+                }
+            }
+            columns: {
+                column: "text2"
+                analyzers: {
+                    tokenizer: STANDARD
+                    use_filter_ngram: true
+                    filter_ngram_max_length: 42
+                }
+            }
+        )";
+        TestCreateIndexedTable(runtime, ++txId, "/MyRoot", Sprintf(R"(
+            TableDescription {
+                Name: "texts"
+                Columns { Name: "id" Type: "Uint64" }
+                Columns { Name: "text1" Type: "String" }
+                Columns { Name: "text2" Type: "String" }
+                Columns { Name: "covered" Type: "String" }
+                Columns { Name: "another" Type: "Uint64" }
+                KeyColumnNames: ["id"]
+            }
+            IndexDescription {
+                Name: "idx_fulltext"
+                KeyColumnNames: ["text1", "text2"]
+                DataColumnNames: ["covered"]
+                Type: EIndexTypeGlobalFulltext
+                FulltextIndexDescription: { 
+                    Settings: { 
+                        %s
+                    }
+                }
+            }
+        )", fulltextSettings.c_str()), {NKikimrScheme::StatusInvalidParameter});
+        env.TestWaitNotification(runtime, txId);
+
+        TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/texts/idx_fulltext"),{ 
+            NLs::PathNotExist,
+        });
+    }
+
     Y_UNIT_TEST(CreateTableNotText) {
         TTestBasicRuntime runtime;
         TTestEnv env(runtime);