Skip to content

Commit 08a2ffc

Browse files
committed
Added an "_explanation" URL parameter to the Resolution API, which explains which resolvers and input attribute values caused a document to match.
1 parent 0d76da0 commit 08a2ffc

File tree

3 files changed

+79
-21
lines changed

3 files changed

+79
-21
lines changed

src/main/java/io/zentity/resolution/Job.java

Lines changed: 68 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package io.zentity.resolution;
22

33
import com.fasterxml.jackson.databind.JsonNode;
4+
import com.fasterxml.jackson.databind.node.ArrayNode;
45
import com.fasterxml.jackson.databind.node.ObjectNode;
56
import io.zentity.common.Json;
67
import io.zentity.common.Patterns;
@@ -27,6 +28,7 @@
2728
import java.io.IOException;
2829
import java.util.ArrayList;
2930
import java.util.Arrays;
31+
import java.util.Base64;
3032
import java.util.Collections;
3133
import java.util.List;
3234
import java.util.Map;
@@ -40,6 +42,7 @@ public class Job {
4042

4143
// Constants
4244
public static final boolean DEFAULT_INCLUDE_ATTRIBUTES = true;
45+
public static final boolean DEFAULT_INCLUDE_EXPLANATION = false;
4346
public static final boolean DEFAULT_INCLUDE_HITS = true;
4447
public static final boolean DEFAULT_INCLUDE_QUERIES = false;
4548
public static final boolean DEFAULT_INCLUDE_SOURCE = true;
@@ -51,7 +54,8 @@ public class Job {
5154
// Job configuration
5255
private Input input;
5356
private boolean includeAttributes = DEFAULT_INCLUDE_ATTRIBUTES;
54-
private boolean includeHits = DEFAULT_INCLUDE_QUERIES;
57+
private boolean includeExplanation = DEFAULT_INCLUDE_EXPLANATION;
58+
private boolean includeHits = DEFAULT_INCLUDE_HITS;
5559
private boolean includeQueries = DEFAULT_INCLUDE_QUERIES;
5660
private boolean includeSource = DEFAULT_INCLUDE_SOURCE;
5761
private int maxDocsPerQuery = DEFAULT_MAX_DOCS_PER_QUERY;
@@ -234,7 +238,7 @@ public static String populateMatcherClause(Matcher matcher, String indexFieldNam
234238
* @param combiner Combine clauses with "should" or "filter".
235239
* @return
236240
*/
237-
public static List<String> makeIndexFieldClauses(Model model, String indexName, Map<String, Attribute> attributes, String attributeName, String combiner) throws ValidationException {
241+
public static List<String> makeIndexFieldClauses(Model model, String indexName, Map<String, Attribute> attributes, String attributeName, String combiner, boolean includeExplanation) throws ValidationException {
238242
if (!combiner.equals("should") && !combiner.equals("filter"))
239243
throw new ValidationException("'" + combiner + "' is not a supported clause combiner.");
240244
List<String> indexFieldClauses = new ArrayList<>();
@@ -266,7 +270,16 @@ public static List<String> makeIndexFieldClauses(Model model, String indexName,
266270
continue;
267271

268272
// Populate the {{ field }}, {{ value }}, and {{ param.* }} variables of the matcher template.
269-
valueClauses.add(populateMatcherClause(matcher, indexFieldName, value.serialized(), params));
273+
String valueClause = populateMatcherClause(matcher, indexFieldName, value.serialized(), params);
274+
if (includeExplanation) {
275+
276+
// Name the clause to determine why any matching document matched
277+
String nameBase64 = Base64.getEncoder().encodeToString(attributeName.getBytes());
278+
String valueBase64 = Base64.getEncoder().encodeToString(value.serialized().getBytes());
279+
String _name = nameBase64 + ":" + valueBase64;
280+
valueClause = "{\"bool\":{\"_name\":\"" + _name + "\",\"filter\":" + valueClause + "}}";
281+
}
282+
valueClauses.add(valueClause);
270283
}
271284
if (valueClauses.size() == 0)
272285
continue;
@@ -291,14 +304,14 @@ public static List<String> makeIndexFieldClauses(Model model, String indexName,
291304
* @param combiner Combine clauses with "should" or "filter".
292305
* @return
293306
*/
294-
public static List<String> makeAttributeClauses(Model model, String indexName, Map<String, Attribute> attributes, String combiner) throws ValidationException {
307+
public static List<String> makeAttributeClauses(Model model, String indexName, Map<String, Attribute> attributes, String combiner, boolean includeExplanation) throws ValidationException {
295308
if (!combiner.equals("should") && !combiner.equals("filter"))
296309
throw new ValidationException("'" + combiner + "' is not a supported clause combiner.");
297310
List<String> attributeClauses = new ArrayList<>();
298311
for (String attributeName : attributes.keySet()) {
299312

300313
// Construct a "should" or "filter" clause for each index field mapped to this attribute.
301-
List<String> indexFieldClauses = makeIndexFieldClauses(model, indexName, attributes, attributeName, combiner);
314+
List<String> indexFieldClauses = makeIndexFieldClauses(model, indexName, attributes, attributeName, combiner, includeExplanation);
302315
if (indexFieldClauses.size() == 0)
303316
continue;
304317

@@ -320,14 +333,14 @@ public static List<String> makeAttributeClauses(Model model, String indexName, M
320333
* @param attributes The names and values for the input attributes.
321334
* @return A "bool" clause for all applicable resolvers.
322335
*/
323-
public static String populateResolversFilterTree(Model model, String indexName, TreeMap<String, TreeMap> resolversFilterTree, Map<String, Attribute> attributes) throws ValidationException {
336+
public static String populateResolversFilterTree(Model model, String indexName, TreeMap<String, TreeMap> resolversFilterTree, Map<String, Attribute> attributes, boolean includeExplanation) throws ValidationException {
324337

325338
// Construct a "filter" clause for each attribute at this level of the filter tree.
326339
List<String> attributeClauses = new ArrayList<>();
327340
for (String attributeName : resolversFilterTree.keySet()) {
328341

329342
// Construct a "should" clause for each index field mapped to this attribute.
330-
List<String> indexFieldClauses = makeIndexFieldClauses(model, indexName, attributes, attributeName, "should");
343+
List<String> indexFieldClauses = makeIndexFieldClauses(model, indexName, attributes, attributeName, "should", includeExplanation);
331344
if (indexFieldClauses.size() == 0)
332345
continue;
333346

@@ -337,7 +350,7 @@ public static String populateResolversFilterTree(Model model, String indexName,
337350
indexFieldsClause = "{\"bool\":{\"should\":[" + indexFieldsClause + "]}}";
338351

339352
// Populate any child filters.
340-
String filter = populateResolversFilterTree(model, indexName, resolversFilterTree.get(attributeName), attributes);
353+
String filter = populateResolversFilterTree(model, indexName, resolversFilterTree.get(attributeName), attributes, includeExplanation);
341354
if (!filter.equals("{}"))
342355
attributeClauses.add("{\"bool\":{\"filter\":[" + indexFieldsClause + "," + filter + "]}}");
343356
else
@@ -459,6 +472,14 @@ public void includeAttributes(boolean includeAttributes) {
459472
this.includeAttributes = includeAttributes;
460473
}
461474

475+
public boolean includeExplanation() {
476+
return this.includeExplanation;
477+
}
478+
479+
public void includeExplanation(boolean includeExplanation) {
480+
this.includeExplanation = includeExplanation;
481+
}
482+
462483
public boolean includeHits() {
463484
return this.includeHits;
464485
}
@@ -587,7 +608,7 @@ private void traverse() throws IOException, ValidationException {
587608

588609
// Create "scope.exclude.attributes" clauses. Combine them into a single "should" clause.
589610
if (!this.input.scope().exclude().attributes().isEmpty()) {
590-
List<String> attributeClauses = makeAttributeClauses(this.input.model(), indexName, this.input.scope().exclude().attributes(), "should");
611+
List<String> attributeClauses = makeAttributeClauses(this.input.model(), indexName, this.input.scope().exclude().attributes(), "should", this.includeExplanation);
591612
int size = attributeClauses.size();
592613
if (size > 1)
593614
queryMustNotClauses.add("{\"bool\":{\"should\":[" + String.join(",", attributeClauses) + "]}}");
@@ -601,7 +622,7 @@ else if (size == 1)
601622

602623
// Construct "scope.include.attributes" clauses. Combine them into a single "filter" clause.
603624
if (!this.input.scope().include().attributes().isEmpty()) {
604-
List<String> attributeClauses = makeAttributeClauses(this.input.model(), indexName, this.input.scope().include().attributes(), "filter");
625+
List<String> attributeClauses = makeAttributeClauses(this.input.model(), indexName, this.input.scope().include().attributes(), "filter", this.includeExplanation);
605626
int size = attributeClauses.size();
606627
if (size > 1)
607628
queryFilterClauses.add("{\"bool\":{\"filter\":[" + String.join(",", attributeClauses) + "]}}");
@@ -636,7 +657,7 @@ else if (size == 1)
636657
List<List<String>> resolversSorted = sortResolverAttributes(this.input.model(), resolversGroup, counts);
637658
resolversFilterTree = makeResolversFilterTree(resolversSorted);
638659
resolversFilterTreeGrouped.put(numPriorityLevels - level - 1, resolversFilterTree);
639-
resolversClause = populateResolversFilterTree(this.input.model(), indexName, resolversFilterTree, this.attributes);
660+
resolversClause = populateResolversFilterTree(this.input.model(), indexName, resolversFilterTree, this.attributes, this.includeExplanation);
640661

641662
// If there are multiple levels of priority, then each lower priority group of resolvers must ensure
642663
// that every higher priority resolver either matches or does not exist.
@@ -662,7 +683,7 @@ else if (size == 1)
662683
Map<String, Integer> parentCounts = countAttributesAcrossResolvers(this.input.model(), parentResolverGroup);
663684
List<List<String>> parentResolverSorted = sortResolverAttributes(this.input.model(), parentResolverGroup, parentCounts);
664685
TreeMap<String, TreeMap> parentResolverFilterTree = makeResolversFilterTree(parentResolverSorted);
665-
String parentResolverClause = populateResolversFilterTree(this.input.model(), indexName, parentResolverFilterTree, this.attributes);
686+
String parentResolverClause = populateResolversFilterTree(this.input.model(), indexName, parentResolverFilterTree, this.attributes, this.includeExplanation);
666687

667688
// Construct a "should" clause for the above two clauses.
668689
parentResolverClauses.add("{\"bool\":{\"should\":[" + attributesExistsClause + "," + parentResolverClause + "]}}");
@@ -799,6 +820,27 @@ else if (!idsClause.equals("{}"))
799820
}
800821
}
801822

823+
// Determine why any matching documents matched.
824+
TreeMap<String, TreeSet<String>> explanationAttributes = new TreeMap<>();
825+
TreeSet<String> explanationResolvers = new TreeSet<>();
826+
if (this.includeExplanation && doc.has("matched_queries")) {
827+
JsonNode matchedQueriesNode = doc.get("matched_queries");
828+
if (matchedQueriesNode.size() > 0) {
829+
for (JsonNode mqNode : matchedQueriesNode) {
830+
String[] _name = mqNode.asText().split(":");
831+
String attributeName = new String(Base64.getDecoder().decode(_name[0]));
832+
String attributeValue = new String(Base64.getDecoder().decode(_name[1]));
833+
if (!explanationAttributes.containsKey(attributeName))
834+
explanationAttributes.put(attributeName, new TreeSet<>());
835+
// TODO: Pass serialized Value objects, not strings.
836+
explanationAttributes.get(attributeName).add(attributeValue);
837+
}
838+
}
839+
for (String resolverName : resolvers)
840+
if (explanationAttributes.keySet().containsAll(input.model().resolvers().get(resolverName).attributes()))
841+
explanationResolvers.add(resolverName);
842+
}
843+
802844
// Modify doc metadata.
803845
if (this.includeHits) {
804846
ObjectNode docObjNode = (ObjectNode) doc;
@@ -812,6 +854,20 @@ else if (!idsClause.equals("{}"))
812854
docAttributesObjNode.set(attributeName, values);
813855
}
814856
}
857+
// Determine why any matching documents matched.
858+
if (this.includeExplanation && docObjNode.has("matched_queries")) {
859+
ObjectNode docExplanationObjNode = docObjNode.putObject("_explanation");
860+
ObjectNode docExpAttrsObjNode = docExplanationObjNode.putObject("attributes");
861+
for (String attributeName : explanationAttributes.keySet()) {
862+
ArrayNode docExpAttrsArrNode = docExpAttrsObjNode.putArray(attributeName);
863+
for (String attributeValue : explanationAttributes.get(attributeName))
864+
docExpAttrsArrNode.add(attributeValue);
865+
}
866+
ArrayNode docExpResArrNode = docExplanationObjNode.putArray("resolvers");
867+
for (String resolverName : explanationResolvers)
868+
docExpResArrNode.add(resolverName);
869+
docObjNode.remove("matched_queries");
870+
}
815871
if (!this.includeSource) {
816872
docObjNode.remove("_source");
817873
} else {

src/main/java/org/elasticsearch/plugin/zentity/ResolutionAction.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ protected RestChannelConsumer prepareRequest(RestRequest restRequest, NodeClient
3838
// Parse the request params that will be passed to the job configuration
3939
String entityType = restRequest.param("entity_type");
4040
Boolean includeAttributes = restRequest.paramAsBoolean("_attributes", Job.DEFAULT_INCLUDE_ATTRIBUTES);
41+
Boolean includeExplanation = restRequest.paramAsBoolean("_explanation", Job.DEFAULT_INCLUDE_EXPLANATION);
4142
Boolean includeHits = restRequest.paramAsBoolean("hits", Job.DEFAULT_INCLUDE_HITS);
4243
Boolean includeQueries = restRequest.paramAsBoolean("queries", Job.DEFAULT_INCLUDE_QUERIES);
4344
Boolean includeSource = restRequest.paramAsBoolean("_source", Job.DEFAULT_INCLUDE_SOURCE);
@@ -71,6 +72,7 @@ protected RestChannelConsumer prepareRequest(RestRequest restRequest, NodeClient
7172
// Prepare the entity resolution job.
7273
Job job = new Job(client);
7374
job.includeAttributes(includeAttributes);
75+
job.includeExplanation(includeExplanation);
7476
job.includeHits(includeHits);
7577
job.includeQueries(includeQueries);
7678
job.includeSource(includeSource);

0 commit comments

Comments
 (0)