Skip to content

Commit 14bbe8a

Browse files
committed
Added new URL parameters to the Resolution API: max_time_per_query, _seq_no_primary_term, _version, and several parameters for advanced search optimizations.
1 parent 3d325e2 commit 14bbe8a

File tree

4 files changed

+190
-9
lines changed

4 files changed

+190
-9
lines changed

pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
<zentity.website>https://zentity.io</zentity.website>
1717
<zentity.version>1.5.1</zentity.version>
1818
<!-- dependency versions -->
19-
<elasticsearch.version>7.6.0</elasticsearch.version>
19+
<elasticsearch.version>7.6.1</elasticsearch.version>
2020
<jackson.core.version>2.9.10</jackson.core.version>
2121
<jackson.databind.version>2.9.10.3</jackson.databind.version>
2222
<jdk.version>1.11</jdk.version>

src/main/java/io/zentity/resolution/Job.java

Lines changed: 98 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import org.elasticsearch.client.node.NodeClient;
2424
import org.elasticsearch.common.Strings;
2525
import org.elasticsearch.common.settings.Settings;
26+
import org.elasticsearch.common.unit.TimeValue;
2627
import org.elasticsearch.common.xcontent.DeprecationHandler;
2728
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
2829
import org.elasticsearch.common.xcontent.ToXContent;
@@ -61,25 +62,47 @@ public class Job {
6162
public static final boolean DEFAULT_INCLUDE_EXPLANATION = false;
6263
public static final boolean DEFAULT_INCLUDE_HITS = true;
6364
public static final boolean DEFAULT_INCLUDE_QUERIES = false;
65+
public static final boolean DEFAULT_INCLUDE_SEQ_NO_PRIMARY_TERM = false;
6466
public static final boolean DEFAULT_INCLUDE_SOURCE = true;
67+
public static final boolean DEFAULT_INCLUDE_VERSION = false;
6568
public static final int DEFAULT_MAX_DOCS_PER_QUERY = 1000;
6669
public static final int DEFAULT_MAX_HOPS = 100;
70+
public static final String DEFAULT_MAX_TIME_PER_QUERY = "10s";
6771
public static final boolean DEFAULT_PRETTY = false;
6872
public static final boolean DEFAULT_PROFILE = false;
6973

74+
// Constants (optional search parameters)
75+
public static Boolean DEFAULT_SEARCH_ALLOW_PARTIAL_SEARCH_RESULTS = null;
76+
public static Integer DEFAULT_SEARCH_BATCHED_REDUCE_SIZE = null;
77+
public static Integer DEFAULT_SEARCH_MAX_CONCURRENT_SHARD_REQUESTS = null;
78+
public static Integer DEFAULT_SEARCH_PRE_FILTER_SHARD_SIZE = null;
79+
public static String DEFAULT_SEARCH_PREFERENCE = null;
80+
public static Boolean DEFAULT_SEARCH_REQUEST_CACHE = null;
81+
7082
// Job configuration
7183
private Input input;
7284
private boolean includeAttributes = DEFAULT_INCLUDE_ATTRIBUTES;
7385
private boolean includeErrorTrace = DEFAULT_INCLUDE_ERROR_TRACE;
7486
private boolean includeExplanation = DEFAULT_INCLUDE_EXPLANATION;
7587
private boolean includeHits = DEFAULT_INCLUDE_HITS;
7688
private boolean includeQueries = DEFAULT_INCLUDE_QUERIES;
89+
private boolean includeSeqNoPrimaryTerm = DEFAULT_INCLUDE_SEQ_NO_PRIMARY_TERM;
7790
private boolean includeSource = DEFAULT_INCLUDE_SOURCE;
91+
private boolean includeVersion = DEFAULT_INCLUDE_VERSION;
7892
private int maxDocsPerQuery = DEFAULT_MAX_DOCS_PER_QUERY;
7993
private int maxHops = DEFAULT_MAX_HOPS;
94+
private String maxTimePerQuery = DEFAULT_MAX_TIME_PER_QUERY;
8095
private boolean pretty = DEFAULT_PRETTY;
8196
private boolean profile = DEFAULT_PROFILE;
8297

98+
// Job configuration (optional search parameters)
99+
private Boolean searchAllowPartialSearchResults = DEFAULT_SEARCH_ALLOW_PARTIAL_SEARCH_RESULTS;
100+
private Integer searchBatchedReduceSize = DEFAULT_SEARCH_BATCHED_REDUCE_SIZE;
101+
private Integer searchMaxConcurrentShardRequests = DEFAULT_SEARCH_MAX_CONCURRENT_SHARD_REQUESTS;
102+
private Integer searchPreFilterShardSize = DEFAULT_SEARCH_PRE_FILTER_SHARD_SIZE;
103+
private String searchPreference = DEFAULT_SEARCH_PREFERENCE;
104+
private Boolean searchRequestCache = DEFAULT_SEARCH_REQUEST_CACHE;
105+
83106
// Job state
84107
private Map<String, Attribute> attributes = new TreeMap<>();
85108
private NodeClient client;
@@ -581,6 +604,12 @@ public void includeQueries(boolean includeQueries) {
581604
this.includeQueries = includeQueries;
582605
}
583606

607+
public Boolean includeSeqNoPrimaryTerm() {
608+
return this.includeSeqNoPrimaryTerm;
609+
}
610+
611+
public void includeSeqNoPrimaryTerm(Boolean includeSeqNoPrimaryTerm) { this.includeSeqNoPrimaryTerm = includeSeqNoPrimaryTerm; }
612+
584613
public boolean includeSource() {
585614
return this.includeSource;
586615
}
@@ -589,13 +618,9 @@ public void includeSource(boolean includeSource) {
589618
this.includeSource = includeSource;
590619
}
591620

592-
public int maxHops() {
593-
return this.maxHops;
594-
}
621+
public Boolean includeVersion() { return this.includeVersion; }
595622

596-
public void maxHops(int maxHops) {
597-
this.maxHops = maxHops;
598-
}
623+
public void includeVersion(Boolean includeVersion) { this.includeVersion = includeVersion; }
599624

600625
public int maxDocsPerQuery() {
601626
return this.maxDocsPerQuery;
@@ -605,6 +630,18 @@ public void maxDocsPerQuery(int maxDocsPerQuery) {
605630
this.maxDocsPerQuery = maxDocsPerQuery;
606631
}
607632

633+
public int maxHops() {
634+
return this.maxHops;
635+
}
636+
637+
public void maxHops(int maxHops) {
638+
this.maxHops = maxHops;
639+
}
640+
641+
public String maxTimePerQuery() { return this.maxTimePerQuery; }
642+
643+
public void maxTimePerQuery(String maxTimePerQuery) { this.maxTimePerQuery = maxTimePerQuery; }
644+
608645
public boolean pretty() {
609646
return this.pretty;
610647
}
@@ -621,6 +658,41 @@ public void profile(Boolean profile) {
621658
this.profile = profile;
622659
}
623660

661+
public Boolean searchAllowPartialSearchResults() {
662+
return this.searchAllowPartialSearchResults;
663+
}
664+
665+
public void searchAllowPartialSearchResults(Boolean searchAllowPartialSearchResults) { this.searchAllowPartialSearchResults = searchAllowPartialSearchResults; }
666+
667+
public Integer searchBatchedReduceSize() {
668+
return this.searchBatchedReduceSize;
669+
}
670+
671+
public void searchBatchedReduceSize(Integer searchBatchedReduceSize) { this.searchBatchedReduceSize = searchBatchedReduceSize; }
672+
673+
public Integer searchMaxConcurrentShardRequests() {
674+
return this.searchMaxConcurrentShardRequests;
675+
}
676+
677+
public void searchMaxConcurrentShardRequests(Integer searchMaxConcurrentShardRequests) { this.searchMaxConcurrentShardRequests = searchMaxConcurrentShardRequests; }
678+
679+
public Integer searchPreFilterShardSize() {
680+
return this.searchPreFilterShardSize;
681+
}
682+
683+
public void searchPreFilterShardSize(Integer searchPreFilterShardSize) { this.searchPreFilterShardSize = searchPreFilterShardSize; }
684+
685+
public String searchPreference() {
686+
return this.searchPreference;
687+
}
688+
689+
public void searchPreference(String searchPreference) { this.searchPreference = searchPreference; }
690+
public Boolean searchRequestCache() {
691+
return this.searchRequestCache;
692+
}
693+
694+
public void searchRequestCache(Boolean searchRequestCache) { this.searchRequestCache = searchRequestCache; }
695+
624696
public Input input() {
625697
return this.input;
626698
}
@@ -653,7 +725,22 @@ private SearchResponse search(String indexName, String query) throws IOException
653725
searchSourceBuilder.parseXContent(parser);
654726
}
655727
SearchRequestBuilder searchRequestBuilder = new SearchRequestBuilder(client, SearchAction.INSTANCE);
656-
return searchRequestBuilder.setIndices(indexName).setSource(searchSourceBuilder).execute().actionGet();
728+
searchRequestBuilder.setIndices(indexName).setSource(searchSourceBuilder);
729+
if (this.searchAllowPartialSearchResults != null)
730+
searchRequestBuilder.setAllowPartialSearchResults(this.searchAllowPartialSearchResults);
731+
if (this.searchBatchedReduceSize != null)
732+
searchRequestBuilder.setBatchedReduceSize(this.searchBatchedReduceSize);
733+
if (this.searchMaxConcurrentShardRequests != null)
734+
searchRequestBuilder.setMaxConcurrentShardRequests(this.searchMaxConcurrentShardRequests);
735+
if (this.searchPreFilterShardSize != null)
736+
searchRequestBuilder.setPreFilterShardSize(this.searchPreFilterShardSize);
737+
if (this.searchPreference != null)
738+
searchRequestBuilder.setPreference(this.searchPreference);
739+
if (this.searchRequestCache != null)
740+
searchRequestBuilder.setRequestCache(this.searchRequestCache);
741+
if (this.maxTimePerQuery != null)
742+
searchRequestBuilder.setTimeout(TimeValue.parseTimeValue(this.maxTimePerQuery, "timeout"));
743+
return searchRequestBuilder.execute().actionGet();
657744
}
658745

659746
/**
@@ -1018,6 +1105,10 @@ else if (!resolversClause.isEmpty())
10181105
// Construct the "profile" clause.
10191106
if (this.profile)
10201107
topLevelClauses.add("\"profile\":true");
1108+
if (this.includeSeqNoPrimaryTerm)
1109+
topLevelClauses.add("\"seq_no_primary_term\":true");
1110+
if (this.includeVersion)
1111+
topLevelClauses.add("\"version\":true");
10211112

10221113
// Construct the final query.
10231114
query = "{" + String.join(",", topLevelClauses) + "}";

src/main/java/org/elasticsearch/plugin/zentity/ResolutionAction.java

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,20 +40,41 @@ protected RestChannelConsumer prepareRequest(RestRequest restRequest, NodeClient
4040
Boolean includeExplanation = restRequest.paramAsBoolean("_explanation", Job.DEFAULT_INCLUDE_EXPLANATION);
4141
Boolean includeHits = restRequest.paramAsBoolean("hits", Job.DEFAULT_INCLUDE_HITS);
4242
Boolean includeQueries = restRequest.paramAsBoolean("queries", Job.DEFAULT_INCLUDE_QUERIES);
43+
Boolean includeSeqNoPrimaryTerm = restRequest.paramAsBoolean("_seq_no_primary_term", Job.DEFAULT_INCLUDE_SEQ_NO_PRIMARY_TERM);
4344
Boolean includeSource = restRequest.paramAsBoolean("_source", Job.DEFAULT_INCLUDE_SOURCE);
45+
Boolean includeVersion = restRequest.paramAsBoolean("_version", Job.DEFAULT_INCLUDE_VERSION);
4446
int maxDocsPerQuery = restRequest.paramAsInt("max_docs_per_query", Job.DEFAULT_MAX_DOCS_PER_QUERY);
4547
int maxHops = restRequest.paramAsInt("max_hops", Job.DEFAULT_MAX_HOPS);
48+
String maxTimePerQuery = restRequest.param("max_time_per_query", Job.DEFAULT_MAX_TIME_PER_QUERY);
4649
Boolean pretty = restRequest.paramAsBoolean("pretty", Job.DEFAULT_PRETTY);
4750
Boolean profile = restRequest.paramAsBoolean("profile", Job.DEFAULT_PROFILE);
4851

52+
// Parse any optional search parameters that will be passed to the job configuration.
53+
// Note: org.elasticsearch.rest.RestRequest doesn't allow null values as default values for integer parameters,
54+
// which is why the code below handles the integer parameters differently from the others.
55+
Boolean searchAllowPartialSearchResults = restRequest.paramAsBoolean("search.allow_partial_search_results", Job.DEFAULT_SEARCH_ALLOW_PARTIAL_SEARCH_RESULTS);
56+
Integer searchBatchedReduceSize = Job.DEFAULT_SEARCH_BATCHED_REDUCE_SIZE;
57+
if (restRequest.hasParam("search.batched_reduce_size"))
58+
searchBatchedReduceSize = Integer.parseInt(restRequest.param("search.batched_reduce_size"));
59+
Integer searchMaxConcurrentShardRequests = Job.DEFAULT_SEARCH_MAX_CONCURRENT_SHARD_REQUESTS;
60+
if (restRequest.hasParam("search.max_concurrent_shard_requests"))
61+
searchMaxConcurrentShardRequests = Integer.parseInt(restRequest.param("search.max_concurrent_shard_requests"));
62+
Integer searchPreFilterShardSize = Job.DEFAULT_SEARCH_PRE_FILTER_SHARD_SIZE;
63+
if (restRequest.hasParam("search.pre_filter_shard_size"))
64+
searchPreFilterShardSize = Integer.parseInt(restRequest.param("search.pre_filter_shard_size"));
65+
String searchPreference = restRequest.param("search.preference", Job.DEFAULT_SEARCH_PREFERENCE);
66+
Boolean searchRequestCache = restRequest.paramAsBoolean("search.request_cache", Job.DEFAULT_SEARCH_REQUEST_CACHE);
67+
Integer finalSearchBatchedReduceSize = searchBatchedReduceSize;
68+
Integer finalSearchMaxConcurrentShardRequests = searchMaxConcurrentShardRequests;
69+
Integer finalSearchPreFilterShardSize = searchPreFilterShardSize;
70+
4971
return channel -> {
5072
try {
5173

5274
// Validate the request body.
5375
if (body == null || body.equals(""))
5476
throw new ValidationException("Request body is missing.");
5577

56-
5778
// Parse and validate the job input.
5879
Input input;
5980
if (entityType == null || entityType.equals("")) {
@@ -73,13 +94,24 @@ protected RestChannelConsumer prepareRequest(RestRequest restRequest, NodeClient
7394
job.includeExplanation(includeExplanation);
7495
job.includeHits(includeHits);
7596
job.includeQueries(includeQueries);
97+
job.includeSeqNoPrimaryTerm(includeSeqNoPrimaryTerm);
7698
job.includeSource(includeSource);
99+
job.includeVersion(includeVersion);
77100
job.maxDocsPerQuery(maxDocsPerQuery);
78101
job.maxHops(maxHops);
102+
job.maxTimePerQuery(maxTimePerQuery);
79103
job.pretty(pretty);
80104
job.profile(profile);
81105
job.input(input);
82106

107+
// Optional search parameters
108+
job.searchAllowPartialSearchResults(searchAllowPartialSearchResults);
109+
job.searchBatchedReduceSize(finalSearchBatchedReduceSize);
110+
job.searchMaxConcurrentShardRequests(finalSearchMaxConcurrentShardRequests);
111+
job.searchPreFilterShardSize(finalSearchPreFilterShardSize);
112+
job.searchPreference(searchPreference);
113+
job.searchRequestCache(searchRequestCache);
114+
83115
// Run the entity resolution job.
84116
String response = job.run();
85117
if (job.failed())

src/test/java/io/zentity/resolution/JobIT.java

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1716,4 +1716,62 @@ public void testJobArrays() throws Exception {
17161716
destroyTestResources(testResourceSet);
17171717
}
17181718
}
1719+
1720+
public void testJobSearchParams() throws Exception {
1721+
int testResourceSet = TEST_RESOURCES_A;
1722+
prepareTestResources(testResourceSet);
1723+
try {
1724+
String endpoint = "_zentity/resolution/zentity_test_entity_a";
1725+
Request postResolution = new Request("POST", endpoint);
1726+
postResolution.setEntity(TEST_PAYLOAD_JOB_ATTRIBUTES);
1727+
postResolution.addParameter("_seq_no_primary_term", "true");
1728+
postResolution.addParameter("_version", "true");
1729+
postResolution.addParameter("max_time_per_query", "5s");
1730+
postResolution.addParameter("search.allow_partial_search_results", "true");
1731+
postResolution.addParameter("search.batched_reduce_size", "5");
1732+
postResolution.addParameter("search.max_concurrent_shard_requests", "5");
1733+
postResolution.addParameter("search.pre_filter_shard_size", "5");
1734+
postResolution.addParameter("search.request_cache", "true");
1735+
Response response = client.performRequest(postResolution);
1736+
JsonNode json = Json.MAPPER.readTree(response.getEntity().getContent());
1737+
assertEquals(json.get("hits").get("total").asInt(), 6);
1738+
Set<String> docsExpected = new TreeSet<>();
1739+
docsExpected.add("a0,0");
1740+
docsExpected.add("b0,0");
1741+
docsExpected.add("c0,1");
1742+
docsExpected.add("a1,2");
1743+
docsExpected.add("b1,3");
1744+
docsExpected.add("c1,4");
1745+
assertEquals(docsExpected, getActual(json));
1746+
for (JsonNode doc : json.get("hits").get("hits")) {
1747+
assertTrue(doc.has("_primary_term"));
1748+
assertTrue(doc.has("_seq_no"));
1749+
assertTrue(doc.has("_version"));
1750+
}
1751+
1752+
String endpoint2 = "_zentity/resolution/zentity_test_entity_a";
1753+
Request postResolution2 = new Request("POST", endpoint2);
1754+
postResolution2.setEntity(TEST_PAYLOAD_JOB_ATTRIBUTES);
1755+
postResolution2.addParameter("_seq_no_primary_term", "false");
1756+
postResolution2.addParameter("_version", "false");
1757+
Response response2 = client.performRequest(postResolution2);
1758+
JsonNode json2 = Json.MAPPER.readTree(response2.getEntity().getContent());
1759+
assertEquals(json2.get("hits").get("total").asInt(), 6);
1760+
Set<String> docsExpected2 = new TreeSet<>();
1761+
docsExpected2.add("a0,0");
1762+
docsExpected2.add("b0,0");
1763+
docsExpected2.add("c0,1");
1764+
docsExpected2.add("a1,2");
1765+
docsExpected2.add("b1,3");
1766+
docsExpected2.add("c1,4");
1767+
assertEquals(docsExpected2, getActual(json2));
1768+
for (JsonNode doc : json2.get("hits").get("hits")) {
1769+
assertFalse(doc.has("_primary_term"));
1770+
assertFalse(doc.has("_seq_no"));
1771+
assertFalse(doc.has("_version"));
1772+
}
1773+
} finally {
1774+
destroyTestResources(testResourceSet);
1775+
}
1776+
}
17191777
}

0 commit comments

Comments
 (0)