Skip to content

Commit 998c04b

Browse files
committed
Fixed cases in which null values or missing fields from the "_source" field caused jobs to fail with an attribute type validation exception. Null values and missing fields are now skipped. Added integration tests for arrays in "_attributes" and "_source".
1 parent c2c723b commit 998c04b

File tree

6 files changed

+218
-29
lines changed

6 files changed

+218
-29
lines changed

src/main/java/io/zentity/resolution/Job.java

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1105,10 +1105,14 @@ else if (!resolversClause.isEmpty())
11051105
// Get the attribute value from the "fields" field if it exists there.
11061106
// This would include 'date' attribute types, for example.
11071107
JsonNode valueNode = doc.get("fields").get(indexFieldName);
1108-
if (valueNode.isArray()) {
1108+
if (valueNode.isNull() || valueNode.isMissingNode()) {
1109+
continue;
1110+
} else if (valueNode.isArray()) {
11091111
Iterator<JsonNode> valueNodeIterator = valueNode.elements();
11101112
while (valueNodeIterator.hasNext()) {
11111113
JsonNode vNode = valueNodeIterator.next();
1114+
if (vNode.isNull() || valueNode.isMissingNode())
1115+
continue;
11121116
Value value = Value.create(attributeType, vNode);
11131117
docAttributes.get(attributeName).add(value);
11141118
nextInputAttributes.get(attributeName).values().add(value);
@@ -1140,11 +1144,15 @@ else if (!resolversClause.isEmpty())
11401144
else
11411145
continue;
11421146
}
1147+
if (valueNode.isNull() || valueNode.isMissingNode())
1148+
continue;
11431149
docIndexFields.put(indexFieldName, valueNode);
11441150
if (valueNode.isArray()) {
11451151
Iterator<JsonNode> valueNodeIterator = valueNode.elements();
11461152
while (valueNodeIterator.hasNext()) {
11471153
JsonNode vNode = valueNodeIterator.next();
1154+
if (vNode.isNull() || valueNode.isMissingNode())
1155+
continue;
11481156
Value value = Value.create(attributeType, vNode);
11491157
docAttributes.get(attributeName).add(value);
11501158
nextInputAttributes.get(attributeName).values().add(value);

src/test/java/io/zentity/resolution/JobIT.java

Lines changed: 123 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,9 @@ public class JobIT extends AbstractITCase {
2121

2222
private final int TEST_RESOURCES_A = 0;
2323
private final int TEST_RESOURCES_B = 1;
24-
private final int TEST_RESOURCES_ELASTICSEARCH_ERROR = 2;
25-
private final int TEST_RESOURCES_ZENTITY_ERROR = 3;
24+
private final int TEST_RESOURCES_ARRAYS = 3;
25+
private final int TEST_RESOURCES_ELASTICSEARCH_ERROR = 4;
26+
private final int TEST_RESOURCES_ZENTITY_ERROR = 5;
2627

2728
private final StringEntity TEST_PAYLOAD_JOB_NO_SCOPE = new StringEntity("{\n" +
2829
" \"attributes\": {\n" +
@@ -493,16 +494,30 @@ public class JobIT extends AbstractITCase {
493494
" }\n" +
494495
"}", ContentType.APPLICATION_JSON);
495496

497+
private final StringEntity TEST_PAYLOAD_JOB_ARRAYS = new StringEntity("{\n" +
498+
" \"attributes\": {\n" +
499+
" \"string\": [ \"abc\" ],\n" +
500+
" \"array\": [ \"222\" ]\n" +
501+
" }\n" +
502+
"}", ContentType.APPLICATION_JSON);
503+
496504
private byte[] readFile(String filename) throws IOException {
497505
InputStream stream = this.getClass().getResourceAsStream("/" + filename);
498506
return IOUtils.toByteArray(stream);
499507
}
500508

501-
private void destroyTestIndices() throws IOException {
502-
client.performRequest(new Request("DELETE", ".zentity_test_index_a"));
503-
client.performRequest(new Request("DELETE", ".zentity_test_index_b"));
504-
client.performRequest(new Request("DELETE", ".zentity_test_index_c"));
505-
client.performRequest(new Request("DELETE", ".zentity_test_index_d"));
509+
private void destroyTestIndices(int testResourceSet) throws IOException {
510+
switch (testResourceSet) {
511+
case TEST_RESOURCES_ARRAYS:
512+
client.performRequest(new Request("DELETE", ".zentity_test_index_arrays"));
513+
break;
514+
default:
515+
client.performRequest(new Request("DELETE", ".zentity_test_index_a"));
516+
client.performRequest(new Request("DELETE", ".zentity_test_index_b"));
517+
client.performRequest(new Request("DELETE", ".zentity_test_index_c"));
518+
client.performRequest(new Request("DELETE", ".zentity_test_index_d"));
519+
break;
520+
}
506521
}
507522

508523
private void destroyTestEntityModelA() throws IOException {
@@ -513,6 +528,10 @@ private void destroyTestEntityModelB() throws IOException {
513528
client.performRequest(new Request("DELETE", "_zentity/models/zentity_test_entity_b"));
514529
}
515530

531+
private void destroyTestEntityModelArrays() throws IOException {
532+
client.performRequest(new Request("DELETE", "_zentity/models/zentity_test_entity_arrays"));
533+
}
534+
516535
private void destroyTestEntityModelElasticsearchError() throws IOException {
517536
client.performRequest(new Request("DELETE", "_zentity/models/zentity_test_entity_elasticsearch_error"));
518537
}
@@ -522,14 +541,17 @@ private void destroyTestEntityModelZentityError() throws IOException {
522541
}
523542

524543
private void destroyTestResources(int testResourceSet) throws IOException {
525-
destroyTestIndices();
544+
destroyTestIndices(testResourceSet);
526545
switch (testResourceSet) {
527546
case TEST_RESOURCES_A:
528547
destroyTestEntityModelA();
529548
break;
530549
case TEST_RESOURCES_B:
531550
destroyTestEntityModelB();
532551
break;
552+
case TEST_RESOURCES_ARRAYS:
553+
destroyTestEntityModelArrays();
554+
break;
533555
case TEST_RESOURCES_ELASTICSEARCH_ERROR:
534556
destroyTestEntityModelElasticsearchError();
535557
break;
@@ -555,6 +577,14 @@ private void prepareTestEntityModelB() throws Exception {
555577
client.performRequest(postModelB);
556578
}
557579

580+
private void prepareTestEntityModelArrays() throws Exception {
581+
ByteArrayEntity testEntityModelArrays;
582+
testEntityModelArrays = new ByteArrayEntity(readFile("TestEntityModelArrays.json"), ContentType.APPLICATION_JSON);
583+
Request postModelArrays = new Request("POST", "_zentity/models/zentity_test_entity_arrays");
584+
postModelArrays.setEntity(testEntityModelArrays);
585+
client.performRequest(postModelArrays);
586+
}
587+
558588
private void prepareTestEntityModelElasticsearchError() throws Exception {
559589
ByteArrayEntity testEntityModelElasticsearchError;
560590
testEntityModelElasticsearchError = new ByteArrayEntity(readFile("TestEntityModelElasticsearchError.json"), ContentType.APPLICATION_JSON);
@@ -571,7 +601,7 @@ private void prepareTestEntityModelZentityError() throws Exception {
571601
client.performRequest(postModelZentityError);
572602
}
573603

574-
private void prepareTestIndices() throws Exception {
604+
private void prepareTestIndices(int testResourceSet) throws Exception {
575605

576606
// Load files
577607
ByteArrayEntity testIndex;
@@ -580,27 +610,50 @@ private void prepareTestIndices() throws Exception {
580610
// Elasticsearch 7.0.0+ removes mapping types
581611
Properties props = new Properties();
582612
props.load(ZentityPlugin.class.getResourceAsStream("/plugin-descriptor.properties"));
583-
if (props.getProperty("elasticsearch.version").compareTo("7.") >= 0) {
584-
testIndex = new ByteArrayEntity(readFile("TestIndex.json"), ContentType.APPLICATION_JSON);
585-
testData = new ByteArrayEntity(readFile("TestData.txt"), ContentType.create("application/x-ndjson"));
586-
} else {
587-
testIndex = new ByteArrayEntity(readFile("TestIndexElasticsearch6.json"), ContentType.APPLICATION_JSON);
588-
testData = new ByteArrayEntity(readFile("TestDataElasticsearch6.txt"), ContentType.create("application/x-ndjson"));
613+
switch (testResourceSet) {
614+
case TEST_RESOURCES_ARRAYS:
615+
if (props.getProperty("elasticsearch.version").compareTo("7.") >= 0) {
616+
testIndex = new ByteArrayEntity(readFile("TestIndexArrays.json"), ContentType.APPLICATION_JSON);
617+
testData = new ByteArrayEntity(readFile("TestDataArrays.txt"), ContentType.create("application/x-ndjson"));
618+
} else {
619+
testIndex = new ByteArrayEntity(readFile("TestIndexArraysElasticsearch6.json"), ContentType.APPLICATION_JSON);
620+
testData = new ByteArrayEntity(readFile("TestDataArraysElasticsearch6.txt"), ContentType.create("application/x-ndjson"));
621+
}
622+
break;
623+
default:
624+
if (props.getProperty("elasticsearch.version").compareTo("7.") >= 0) {
625+
testIndex = new ByteArrayEntity(readFile("TestIndex.json"), ContentType.APPLICATION_JSON);
626+
testData = new ByteArrayEntity(readFile("TestData.txt"), ContentType.create("application/x-ndjson"));
627+
} else {
628+
testIndex = new ByteArrayEntity(readFile("TestIndexElasticsearch6.json"), ContentType.APPLICATION_JSON);
629+
testData = new ByteArrayEntity(readFile("TestDataElasticsearch6.txt"), ContentType.create("application/x-ndjson"));
630+
}
631+
break;
589632
}
590633

591634
// Create indices
592-
Request putTestIndexA = new Request("PUT", ".zentity_test_index_a");
593-
putTestIndexA.setEntity(testIndex);
594-
client.performRequest(putTestIndexA);
595-
Request putTestIndexB = new Request("PUT", ".zentity_test_index_b");
596-
putTestIndexB.setEntity(testIndex);
597-
client.performRequest(putTestIndexB);
598-
Request putTestIndexC = new Request("PUT", ".zentity_test_index_c");
599-
putTestIndexC.setEntity(testIndex);
600-
client.performRequest(putTestIndexC);
601-
Request putTestIndexD = new Request("PUT", ".zentity_test_index_d");
602-
putTestIndexD.setEntity(testIndex);
603-
client.performRequest(putTestIndexD);
635+
switch (testResourceSet) {
636+
case TEST_RESOURCES_ARRAYS:
637+
Request putTestIndexArrays = new Request("PUT", ".zentity_test_index_arrays");
638+
putTestIndexArrays.setEntity(testIndex);
639+
client.performRequest(putTestIndexArrays);
640+
break;
641+
default:
642+
Request putTestIndexA = new Request("PUT", ".zentity_test_index_a");
643+
putTestIndexA.setEntity(testIndex);
644+
client.performRequest(putTestIndexA);
645+
Request putTestIndexB = new Request("PUT", ".zentity_test_index_b");
646+
putTestIndexB.setEntity(testIndex);
647+
client.performRequest(putTestIndexB);
648+
Request putTestIndexC = new Request("PUT", ".zentity_test_index_c");
649+
putTestIndexC.setEntity(testIndex);
650+
client.performRequest(putTestIndexC);
651+
Request putTestIndexD = new Request("PUT", ".zentity_test_index_d");
652+
putTestIndexD.setEntity(testIndex);
653+
client.performRequest(putTestIndexD);
654+
break;
655+
}
656+
604657

605658
// Load data into indices
606659
Request postBulk = new Request("POST", "_bulk");
@@ -610,14 +663,17 @@ private void prepareTestIndices() throws Exception {
610663
}
611664

612665
private void prepareTestResources(int testResourceSet) throws Exception {
613-
prepareTestIndices();
666+
prepareTestIndices(testResourceSet);
614667
switch (testResourceSet) {
615668
case TEST_RESOURCES_A:
616669
prepareTestEntityModelA();
617670
break;
618671
case TEST_RESOURCES_B:
619672
prepareTestEntityModelB();
620673
break;
674+
case TEST_RESOURCES_ARRAYS:
675+
prepareTestEntityModelArrays();
676+
break;
621677
case TEST_RESOURCES_ELASTICSEARCH_ERROR:
622678
prepareTestEntityModelElasticsearchError();
623679
break;
@@ -1612,4 +1668,43 @@ public void testJobZentityError() throws Exception {
16121668
destroyTestResources(testResourceSet);
16131669
}
16141670
}
1671+
1672+
public void testJobArrays() throws Exception {
1673+
int testResourceSet = TEST_RESOURCES_ARRAYS;
1674+
prepareTestResources(testResourceSet);
1675+
try {
1676+
String endpoint = "_zentity/resolution/zentity_test_entity_arrays";
1677+
Set<String> docsExpectedArrays = new TreeSet<>();
1678+
docsExpectedArrays.add("1,0");
1679+
docsExpectedArrays.add("2,1");
1680+
1681+
Request q1 = new Request("POST", endpoint);
1682+
q1.addParameter("_explanation", "true");
1683+
q1.setEntity(TEST_PAYLOAD_JOB_ARRAYS);
1684+
Response r1 = client.performRequest(q1);
1685+
JsonNode j1 = Json.MAPPER.readTree(r1.getEntity().getContent());
1686+
assertEquals(j1.get("hits").get("total").asInt(), 2);
1687+
assertEquals(docsExpectedArrays, getActual(j1));
1688+
1689+
for (JsonNode doc : j1.get("hits").get("hits")) {
1690+
String attributesExpected = "";
1691+
String explanationExpected = "";
1692+
switch (doc.get("_id").asText()) {
1693+
case "1":
1694+
attributesExpected = "{\"array\":[\"111\",\"222\",\"333\",\"444\"],\"string\":[\"abc\"]}";
1695+
explanationExpected = "{\"resolvers\":{\"array\":{\"attributes\":[\"array\"]},\"string\":{\"attributes\":[\"string\"]}},\"matches\":[{\"attribute\":\"array\",\"target_field\":\"array_2\",\"target_value\":[\"222\",\"222\"],\"input_value\":\"222\",\"input_matcher\":\"exact\",\"input_matcher_params\":{}},{\"attribute\":\"array\",\"target_field\":\"array_4\",\"target_value\":[\"222\",\"333\",\"444\"],\"input_value\":\"222\",\"input_matcher\":\"exact\",\"input_matcher_params\":{}},{\"attribute\":\"string\",\"target_field\":\"string\",\"target_value\":\"abc\",\"input_value\":\"abc\",\"input_matcher\":\"exact\",\"input_matcher_params\":{}}]}";
1696+
break;
1697+
case "2":
1698+
attributesExpected = "{\"array\":[\"444\",\"555\"],\"string\":[\"xyz\"]}";
1699+
explanationExpected = "{\"resolvers\":{\"array\":{\"attributes\":[\"array\"]}},\"matches\":[{\"attribute\":\"array\",\"target_field\":\"array_1\",\"target_value\":[\"444\"],\"input_value\":\"444\",\"input_matcher\":\"exact\",\"input_matcher_params\":{}}]}";
1700+
break;
1701+
}
1702+
assertEquals(attributesExpected, Json.MAPPER.writeValueAsString(doc.get("_attributes")));
1703+
assertEquals(explanationExpected, Json.MAPPER.writeValueAsString(doc.get("_explanation")));
1704+
}
1705+
1706+
} finally {
1707+
destroyTestResources(testResourceSet);
1708+
}
1709+
}
16151710
}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{ "index" : { "_index" : ".zentity_test_index_arrays", "_id" : "1" }}
2+
{ "string" : "abc", "array_1": [ "111" ], "array_2" : ["222","222"], "array_3" : [],"array_4" : [ "222", "333", "444" ]}
3+
{ "index" : { "_index" : ".zentity_test_index_arrays", "_id" : "2" }}
4+
{ "string" : "xyz", "array_1": [ "444" ], "array_2" : null, "array_4" : [ "555" ]}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{ "index" : { "_index" : ".zentity_test_index_arrays", "_type": "doc", "_id" : "1" }}
2+
{ "string" : "abc", "array_1": [ "111" ], "array_2" : ["222","222"], "array_3" : [],"array_4" : [ "222", "333", "444" ]}
3+
{ "index" : { "_index" : ".zentity_test_index_arrays", "_type": "doc", "_id" : "2" }}
4+
{ "string" : "xyz", "array_1": [ "444" ], "array_2" : null, "array_4" : [ "555" ]}
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
{
2+
"attributes": {
3+
"string": {},
4+
"array": {}
5+
},
6+
"resolvers": {
7+
"string": {
8+
"attributes": [
9+
"string"
10+
]
11+
},
12+
"array": {
13+
"attributes": [
14+
"array"
15+
]
16+
}
17+
},
18+
"matchers": {
19+
"exact": {
20+
"clause": {
21+
"term": {
22+
"{{ field }}": "{{ value }}"
23+
}
24+
}
25+
}
26+
},
27+
"indices": {
28+
".zentity_test_index_arrays": {
29+
"fields": {
30+
"string": {
31+
"attribute": "string",
32+
"matcher": "exact"
33+
},
34+
"array_1": {
35+
"attribute": "array",
36+
"matcher": "exact"
37+
},
38+
"array_2": {
39+
"attribute": "array",
40+
"matcher": "exact"
41+
},
42+
"array_3": {
43+
"attribute": "array",
44+
"matcher": "exact"
45+
},
46+
"array_4": {
47+
"attribute": "array",
48+
"matcher": "exact"
49+
}
50+
}
51+
}
52+
}
53+
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
{
2+
"settings": {
3+
"number_of_shards": 1,
4+
"number_of_replicas": 0
5+
},
6+
"mappings" : {
7+
"properties" : {
8+
"array_1" : {
9+
"type" : "keyword"
10+
},
11+
"array_2" : {
12+
"type" : "keyword"
13+
},
14+
"array_3" : {
15+
"type" : "keyword"
16+
},
17+
"array_4" : {
18+
"type" : "keyword"
19+
},
20+
"string" : {
21+
"type" : "keyword"
22+
}
23+
}
24+
}
25+
}

0 commit comments

Comments
 (0)