diff --git a/src/common/plugin/fulltext/elasticsearch/ESAdapter.cpp b/src/common/plugin/fulltext/elasticsearch/ESAdapter.cpp index d582b43e90f..693282d6926 100644 --- a/src/common/plugin/fulltext/elasticsearch/ESAdapter.cpp +++ b/src/common/plugin/fulltext/elasticsearch/ESAdapter.cpp @@ -22,7 +22,7 @@ void ESBulk::put(const std::string& indexName, const std::string& src, const std::string& dst, int64_t rank, - const std::string& text) { + std::map data) { folly::dynamic action = folly::dynamic::object(); folly::dynamic metadata = folly::dynamic::object(); folly::dynamic body = folly::dynamic::object(); @@ -35,7 +35,9 @@ void ESBulk::put(const std::string& indexName, body["src"] = src; body["dst"] = dst; body["rank"] = rank; - body["text"] = text; + for (auto& [key, value] : data) { + body[key] = std::move(value); + } documents_[indexName].emplace_back(std::move(action)); documents_[indexName].emplace_back(std::move(body)); } diff --git a/src/common/plugin/fulltext/elasticsearch/ESAdapter.h b/src/common/plugin/fulltext/elasticsearch/ESAdapter.h index a4488b9060d..9d70882e87f 100644 --- a/src/common/plugin/fulltext/elasticsearch/ESAdapter.h +++ b/src/common/plugin/fulltext/elasticsearch/ESAdapter.h @@ -44,7 +44,7 @@ class ESBulk { const std::string& src, const std::string& dst, int64_t rank, - const std::string& text); + std::map data); void delete_(const std::string& indexName, const std::string& vid, const std::string& src, diff --git a/src/common/plugin/fulltext/test/ElasticsearchTest.cpp b/src/common/plugin/fulltext/test/ElasticsearchTest.cpp index 4ec9dc6405b..c7c5c059a66 100644 --- a/src/common/plugin/fulltext/test/ElasticsearchTest.cpp +++ b/src/common/plugin/fulltext/test/ElasticsearchTest.cpp @@ -324,7 +324,8 @@ TEST_F(ESTest, bulk) { plugin::ESClient client(mockHttpClient, "http", "127.0.0.1:9200", "", ""); plugin::ESAdapter adapter(std::vector({client})); plugin::ESBulk bulk; - bulk.put("nebula_index_1", "1", "", "", 0, "vertex text"); + std::map data{{"text", "vretex text"}}; + bulk.put("nebula_index_1", "1", "", "", 0, data); bulk.delete_("nebula_index_2", "", "a", "b", 10); { auto result = adapter.bulk(bulk, true); diff --git a/src/graph/executor/query/FulltextIndexScanExecutor.cpp b/src/graph/executor/query/FulltextIndexScanExecutor.cpp index 8c2627c2e21..ef8b3a16b05 100644 --- a/src/graph/executor/query/FulltextIndexScanExecutor.cpp +++ b/src/graph/executor/query/FulltextIndexScanExecutor.cpp @@ -7,6 +7,7 @@ #include "common/datatypes/DataSet.h" #include "common/datatypes/Edge.h" #include "graph/planner/plan/Query.h" +#include "graph/util/Constants.h" #include "graph/util/FTIndexUtils.h" using nebula::storage::StorageClient; @@ -31,7 +32,7 @@ folly::Future FulltextIndexScanExecutor::execute() { const auto& space = qctx()->rctx()->session()->space(); if (!isIntVidType(space)) { if (ftIndexScan->isEdge()) { - DataSet edges({"edge"}); + DataSet edges({"id", kScore}); for (auto& item : esResultValue.items) { Edge edge; edge.src = item.src; @@ -42,7 +43,7 @@ folly::Future FulltextIndexScanExecutor::execute() { } finish(ResultBuilder().value(Value(std::move(edges))).iter(Iterator::Kind::kProp).build()); } else { - DataSet vertices({kVid}); + DataSet vertices({"id", kScore}); for (auto& item : esResultValue.items) { vertices.emplace_back(Row({item.vid, item.score})); } @@ -50,17 +51,18 @@ folly::Future FulltextIndexScanExecutor::execute() { } } else { if (ftIndexScan->isEdge()) { - DataSet edges({kSrc, kRank, kDst}); + DataSet edges({"id", kScore}); for (auto& item : esResultValue.items) { - std::string srcStr = item.src; - std::string dstStr = item.dst; - int64_t src = *reinterpret_cast(srcStr.data()); - int64_t dst = *reinterpret_cast(dstStr.data()); - edges.emplace_back(Row({src, item.rank, dst})); + Edge edge; + edge.src = item.src; + edge.dst = item.dst; + edge.ranking = item.rank; + edge.type = ftIndexScan->schemaId(); + edges.emplace_back(Row({std::move(edge), item.score})); } finish(ResultBuilder().value(Value(std::move(edges))).iter(Iterator::Kind::kProp).build()); } else { - DataSet vertices({kVid}); + DataSet vertices({"id", kScore}); for (auto& item : esResultValue.items) { std::string vidStr = item.vid; int64_t vid = *reinterpret_cast(vidStr.data()); @@ -83,7 +85,9 @@ StatusOr FulltextIndexScanExecutor::accessFulltextIndex( auto index = arg->index(); auto query = arg->query(); int64_t offset = ftIndexScan->offset(); - int64_t count = ftIndexScan->limit(); + int64_t count = ftIndexScan->limit() > std::numeric_limits::max() + ? std::numeric_limits::max() + : ftIndexScan->limit(); execFunc = [=, &esAdapter]() { return esAdapter.queryString(index, query, offset, count); }; break; } diff --git a/src/graph/planner/ngql/LookupPlanner.cpp b/src/graph/planner/ngql/LookupPlanner.cpp index 9d3637c505c..ca0c8ffc83b 100644 --- a/src/graph/planner/ngql/LookupPlanner.cpp +++ b/src/graph/planner/ngql/LookupPlanner.cpp @@ -69,7 +69,7 @@ StatusOr LookupPlanner::transform(AstContext* astCtx) { GetEdges::make(qctx, plan.root, spaceId, src, type, rank, dst, std::move(edgeProps)); hashKeys = {ColumnExpression::make(pool, 0)}; - probeKeys = {ColumnExpression::make(pool, 0)}; + probeKeys = {EdgeExpression::make(pool)}; } else { storage::cpp2::VertexProp vertexProp; vertexProp.tag_ref() = lookupCtx->schemaId; @@ -80,7 +80,7 @@ StatusOr LookupPlanner::transform(AstContext* astCtx) { qctx, plan.root, spaceId, ColumnExpression::make(pool, 0), std::move(vertexProps)); hashKeys = {ColumnExpression::make(pool, 0)}; - probeKeys = {FunctionCallExpression::make(pool, "id", {ColumnExpression::make(pool, 0)})}; + probeKeys = {FunctionCallExpression::make(pool, "id", {VertexExpression::make(pool)})}; } if (lookupCtx->hasScore) { diff --git a/src/kvstore/listener/elasticsearch/ESListener.cpp b/src/kvstore/listener/elasticsearch/ESListener.cpp index 1f40cba4a22..288a89bc99a 100644 --- a/src/kvstore/listener/elasticsearch/ESListener.cpp +++ b/src/kvstore/listener/elasticsearch/ESListener.cpp @@ -41,9 +41,9 @@ bool ESListener::apply(const BatchHolder& batch) { const std::string& src, const std::string& dst, int64_t rank, - const std::string& text) { + std::map data) { if (type == BatchLogType::OP_BATCH_PUT) { - bulk.put(index, vid, src, dst, rank, text); + bulk.put(index, vid, src, dst, rank, std::move(data)); } else if (type == BatchLogType::OP_BATCH_REMOVE) { bulk.delete_(index, vid, src, dst, rank); } else { @@ -134,21 +134,24 @@ void ESListener::pickTagAndEdgeData(BatchLogType type, if (index.second.get_fields().size() > 1) { LOG(ERROR) << "Only one field will create fulltext index"; } - std::string text; + std::map data; std::string indexName = index.first; if (type == BatchLogType::OP_BATCH_PUT) { - auto field = index.second.get_fields().front(); - auto v = reader->getValueByName(field); - if (v.type() == Value::Type::NULLVALUE) { - callback(BatchLogType::OP_BATCH_REMOVE, indexName, vid, src, dst, 0, text); - continue; - } - if (v.type() != Value::Type::STRING) { - LOG(ERROR) << "Can't create fulltext index on type " << v.type(); + for (auto& field : index.second.get_fields()) { + auto v = reader->getValueByName(field); + if (v.type() == Value::Type::NULLVALUE) { + data[field] = ""; + continue; + } + if (v.type() != Value::Type::STRING) { + data[field] = ""; + LOG(ERROR) << "Can't create fulltext index on type " << v.type(); + continue; + } + data[field] = std::move(v).getStr(); } - text = std::move(v).getStr(); } - callback(type, indexName, vid, src, dst, rank, text); + callback(type, indexName, vid, src, dst, rank, std::move(data)); } } diff --git a/src/kvstore/listener/elasticsearch/ESListener.h b/src/kvstore/listener/elasticsearch/ESListener.h index 4edbb30437e..5d9adc11426 100644 --- a/src/kvstore/listener/elasticsearch/ESListener.h +++ b/src/kvstore/listener/elasticsearch/ESListener.h @@ -112,7 +112,7 @@ class ESListener : public Listener { const std::string& src, const std::string& dst, int64_t rank, - const std::string& text)>; + std::map data)>; void pickTagAndEdgeData(BatchLogType type, const std::string& key, const std::string& value, diff --git a/src/parser/parser.yy b/src/parser/parser.yy index 5cd6a5c87bf..85ab5dcd8b6 100644 --- a/src/parser/parser.yy +++ b/src/parser/parser.yy @@ -572,6 +572,7 @@ unreserved_keyword | KW_DIVIDE { $$ = new std::string("divide"); } | KW_RENAME { $$ = new std::string("rename"); } | KW_CLEAR { $$ = new std::string("clear"); } + | KW_ANALYZER { $$ = new std::string("analyzer"); } ; expression @@ -2618,10 +2619,15 @@ opt_analyzer create_fulltext_index_sentence : KW_CREATE KW_FULLTEXT KW_TAG KW_INDEX name_label KW_ON name_label L_PAREN name_label_list R_PAREN opt_analyzer { - $$ = new CreateFTIndexSentence(false, $5, $7, $9, $11); + auto sentence = new CreateFTIndexSentence(false, $5, $7, $9, $11); + delete $9; + $$ = sentence; + } | KW_CREATE KW_FULLTEXT KW_EDGE KW_INDEX name_label KW_ON name_label L_PAREN name_label_list R_PAREN opt_analyzer { - $$ = new CreateFTIndexSentence(true, $5, $7, $9, $11); + auto sentence = new CreateFTIndexSentence(true, $5, $7, $9, $11); + delete $9; + $$ = sentence; } ; diff --git a/src/parser/scanner.lex b/src/parser/scanner.lex index a84f92782c1..01b4fc9b163 100644 --- a/src/parser/scanner.lex +++ b/src/parser/scanner.lex @@ -283,6 +283,7 @@ LABEL_FULL_WIDTH {CN_EN_FULL_WIDTH}{CN_EN_NUM_FULL_WIDTH}* "HTTP" { return TokenType::KW_HTTP; } "HTTPS" { return TokenType::KW_HTTPS; } "FULLTEXT" { return TokenType::KW_FULLTEXT; } +"ANALYZER" { return TokenType::KW_ANALYZER; } "AUTO" { return TokenType::KW_AUTO; } "ES_QUERY" { return TokenType::KW_ES_QUERY; } "TEXT" { return TokenType::KW_TEXT; } diff --git a/src/parser/test/ParserTest.cpp b/src/parser/test/ParserTest.cpp index acbb5bf75f7..7572fd09b2d 100644 --- a/src/parser/test/ParserTest.cpp +++ b/src/parser/test/ParserTest.cpp @@ -2928,6 +2928,16 @@ TEST_F(ParserTest, Zone) { } TEST_F(ParserTest, FullText) { + { + std::string query = "CREATE FULLTEXT TAG INDEX i1 on t1(str)"; + auto result = parse(query); + EXPECT_TRUE(result.ok()) << result.status(); + } + { + std::string query = "CREATE FULLTEXT TAG INDEX i1 on t1(str) analyzer=\"standard\""; + auto result = parse(query); + EXPECT_TRUE(result.ok()) << result.status(); + } { std::string query = "LOOKUP ON t1 WHERE ES_QUERY(abc, \"qwerty\")"; auto result = parse(query);