|
4 | 4 | import com.fasterxml.jackson.databind.JsonNode; |
5 | 5 | import com.fasterxml.jackson.databind.node.ObjectNode; |
6 | 6 | import io.zentity.common.Json; |
| 7 | +import io.zentity.common.Patterns; |
| 8 | +import io.zentity.model.Index; |
7 | 9 | import io.zentity.model.Matcher; |
8 | 10 | import io.zentity.model.Model; |
9 | 11 | import io.zentity.model.ValidationException; |
@@ -70,6 +72,61 @@ public Job(NodeClient client) { |
70 | 72 | this.client = client; |
71 | 73 | } |
72 | 74 |
|
| 75 | + public static String makeScriptFieldsClause(Input input, String indexName) throws ValidationException { |
| 76 | + List<String> scriptFieldClauses = new ArrayList<>(); |
| 77 | + |
| 78 | + // Find any index fields that need to be included in the "script_fields" clause. |
| 79 | + // Currently this includes any index field that is associated with a "date" attribute, |
| 80 | + // which requires the "_source" value to be reformatted to a normalized format. |
| 81 | + Index index = input.model().indices().get(indexName); |
| 82 | + for (String attributeName : index.attributeIndexFieldsMap().keySet()) { |
| 83 | + switch (input.model().attributes().get(attributeName).type()) { |
| 84 | + case "date": |
| 85 | + |
| 86 | + // Required params |
| 87 | + String format; |
| 88 | + |
| 89 | + // Make a "script" clause for each index field associated with this attribute. |
| 90 | + for (String indexFieldName : index.attributeIndexFieldsMap().get(attributeName).keySet()) { |
| 91 | + // Check if the required params are defined in the input attribute. |
| 92 | + if (input.attributes().containsKey(attributeName) && input.attributes().get(attributeName).params().containsKey("format") && !input.attributes().get(attributeName).params().get("format").equals("null") && !Patterns.EMPTY_STRING.matcher(input.attributes().get(attributeName).params().get("format")).matches()) { |
| 93 | + format = input.attributes().get(attributeName).params().get("format"); |
| 94 | + } else { |
| 95 | + // Otherwise check if the required params are defined in the model attribute. |
| 96 | + Map<String, String> params = input.model().attributes().get(attributeName).params(); |
| 97 | + if (params.containsKey("format") && !params.get("format").equals("null") && !Patterns.EMPTY_STRING.matcher(params.get("format")).matches()) { |
| 98 | + format = params.get("format"); |
| 99 | + } else { |
| 100 | + // Otherwise check if the required params are defined in the matcher associated with the index field. |
| 101 | + String matcherName = index.attributeIndexFieldsMap().get(attributeName).get(indexFieldName).matcher(); |
| 102 | + params = input.model().matchers().get(matcherName).params(); |
| 103 | + if (params.containsKey("format") && !params.get("format").equals("null") && !Patterns.EMPTY_STRING.matcher(params.get("format")).matches()) { |
| 104 | + format = params.get("format"); |
| 105 | + } else { |
| 106 | + // If we've gotten this far, that means that the required params for this attribute type |
| 107 | + // haven't been specified in any valid places. |
| 108 | + throw new ValidationException("'attributes." + attributeName + "' is a 'date' which required a 'format' to be specified in the params."); |
| 109 | + } |
| 110 | + } |
| 111 | + } |
| 112 | + |
| 113 | + // Make the "script" clause |
| 114 | + String scriptSource = "doc[params.field].value.toString(params.format)"; |
| 115 | + String scriptParams = "\"field\":\"" + indexFieldName + "\",\"format\":\"" + format + "\""; |
| 116 | + String scriptFieldClause = "\"" + indexFieldName + "\":{\"script\":{\"lang\":\"painless\",\"source\":\"" + scriptSource + "\",\"params\":{" + scriptParams + "}}}"; |
| 117 | + scriptFieldClauses.add(scriptFieldClause); |
| 118 | + } |
| 119 | + break; |
| 120 | + |
| 121 | + default: |
| 122 | + break; |
| 123 | + } |
| 124 | + } |
| 125 | + if (scriptFieldClauses.isEmpty()) |
| 126 | + return null; |
| 127 | + return "\"script_fields\":{" + String.join(",", scriptFieldClauses) + "}"; |
| 128 | + } |
| 129 | + |
73 | 130 | /** |
74 | 131 | * Determine if a field of an index has a matcher associated with that field. |
75 | 132 | * |
@@ -152,14 +209,18 @@ public static String populateMatcherClause(Matcher matcher, String indexFieldNam |
152 | 209 | matcherClause = pattern.matcher(matcherClause).replaceAll(value); |
153 | 210 | break; |
154 | 211 | default: |
155 | | - String paramValue; |
156 | | - if (attribute.params().containsKey(variable)) |
157 | | - paramValue = attribute.params().get(variable); |
158 | | - else if (matcher.params().containsKey(variable)) |
159 | | - paramValue = matcher.params().get(variable); |
160 | | - else |
161 | | - throw new ValidationException("'matchers." + matcher.name() + "' was given no value for '{{ " + variable + " }}'"); |
162 | | - matcherClause = pattern.matcher(matcherClause).replaceAll(paramValue); |
| 212 | + java.util.regex.Matcher m = Patterns.VARIABLE_PARAMS.matcher(variable); |
| 213 | + if (m.find()) { |
| 214 | + String var = m.group(1); |
| 215 | + String paramValue; |
| 216 | + if (attribute.params().containsKey(var)) |
| 217 | + paramValue = attribute.params().get(var); |
| 218 | + else if (matcher.params().containsKey(var)) |
| 219 | + paramValue = matcher.params().get(var); |
| 220 | + else |
| 221 | + throw new ValidationException("'matchers." + matcher.name() + "' was given no value for '{{ " + variable + " }}'"); |
| 222 | + matcherClause = pattern.matcher(matcherClause).replaceAll(paramValue); |
| 223 | + } |
163 | 224 | break; |
164 | 225 | } |
165 | 226 | } |
@@ -502,6 +563,8 @@ private void traverse() throws IOException, ValidationException { |
502 | 563 | List<String> queryClauses = new ArrayList<>(); |
503 | 564 | List<String> queryMustNotClauses = new ArrayList<>(); |
504 | 565 | List<String> queryFilterClauses = new ArrayList<>(); |
| 566 | + List<String> topLevelClauses = new ArrayList<>(); |
| 567 | + topLevelClauses.add("\"_source\":true"); |
505 | 568 |
|
506 | 569 | // Exclude docs by _id |
507 | 570 | Set<String> ids = this.docIds.get(indexName); |
@@ -549,13 +612,24 @@ else if (size == 1) |
549 | 612 |
|
550 | 613 | // Construct the "query" clause. |
551 | 614 | if (!queryClauses.isEmpty()) |
552 | | - queryClause = "{\"bool\":{" + String.join(",", queryClauses) + "}}"; |
| 615 | + queryClause = "\"query\":{\"bool\":{" + String.join(",", queryClauses) + "}}"; |
| 616 | + topLevelClauses.add(queryClause); |
553 | 617 |
|
554 | | - // Construct the final query. |
| 618 | + // Construct the "script_fields" clause. |
| 619 | + String scriptFieldsClause = makeScriptFieldsClause(this.input, indexName); |
| 620 | + if (scriptFieldsClause != null) |
| 621 | + topLevelClauses.add(scriptFieldsClause); |
| 622 | + |
| 623 | + // Construct the "size" clause. |
| 624 | + topLevelClauses.add("\"size\":" + this.maxDocsPerQuery); |
| 625 | + |
| 626 | + // Construct the "profile" clause. |
555 | 627 | if (this.profile) |
556 | | - query = "{\"query\":" + queryClause + ",\"size\": " + this.maxDocsPerQuery + ",\"profile\":true}"; |
557 | | - else |
558 | | - query = "{\"query\":" + queryClause + ",\"size\": " + this.maxDocsPerQuery + "}"; |
| 628 | + topLevelClauses.add("\"profile\":true"); |
| 629 | + |
| 630 | + // Construct the final query. |
| 631 | + query = "{" + String.join(",", topLevelClauses) + "}"; |
| 632 | + System.out.println(query); |
559 | 633 |
|
560 | 634 | // Submit query to Elasticsearch. |
561 | 635 | SearchResponse response = this.search(indexName, query); |
@@ -604,25 +678,50 @@ else if (size == 1) |
604 | 678 | String attributeType = this.input.model().attributes().get(attributeName).type(); |
605 | 679 | if (!nextInputAttributes.containsKey(attributeName)) |
606 | 680 | nextInputAttributes.put(attributeName, new Attribute(attributeName, attributeType)); |
607 | | - // The index field name might not refer to the _source property. |
608 | | - // If it's not in the _source, remove the last part of the index field name from the dot notation. |
609 | | - // Index field names can reference multi-fields, which are not returned in the _source. |
610 | | - String path = this.input.model().indices().get(indexName).fields().get(indexFieldName).path(); |
611 | | - String pathParent = this.input.model().indices().get(indexName).fields().get(indexFieldName).pathParent(); |
612 | | - JsonNode valueNode = doc.get("_source").at(path); |
613 | | - if (valueNode.isMissingNode()) |
614 | | - valueNode = doc.get("_source").at(pathParent); |
615 | | - if (valueNode.isMissingNode()) |
616 | | - continue; |
617 | | - docAttributes.put(attributeName, valueNode); |
618 | | - Value value = Value.create(attributeType, valueNode); |
619 | | - nextInputAttributes.get(attributeName).values().add(value); |
| 681 | + |
| 682 | + // Get the attribute value from the doc. |
| 683 | + if (doc.has("fields") && doc.get("fields").has(indexFieldName)) { |
| 684 | + |
| 685 | + // Get the attribute value from the "fields" field if it exists there. |
| 686 | + // This would include 'date' attribute types, for example. |
| 687 | + JsonNode valueNode = doc.get("fields").get(indexFieldName); |
| 688 | + if (valueNode.size() > 1) { |
| 689 | + docAttributes.put(attributeName, valueNode); // Return multiple values (as an array) in "_attributes" |
| 690 | + for (JsonNode vNode : valueNode) { |
| 691 | + Value value = Value.create(attributeType, vNode); |
| 692 | + nextInputAttributes.get(attributeName).values().add(value); |
| 693 | + } |
| 694 | + } else { |
| 695 | + JsonNode vNode = valueNode.get(0); // Return single value (not as an array) in "_attributes" |
| 696 | + docAttributes.put(attributeName, vNode); |
| 697 | + Value value = Value.create(attributeType, vNode); |
| 698 | + nextInputAttributes.get(attributeName).values().add(value); |
| 699 | + } |
| 700 | + |
| 701 | + } else { |
| 702 | + |
| 703 | + // Get the attribute value from the "_source" field. |
| 704 | + // The index field name might not refer to the _source property. |
| 705 | + // If it's not in the _source, remove the last part of the index field name from the dot notation. |
| 706 | + // Index field names can reference multi-fields, which are not returned in the _source. |
| 707 | + String path = this.input.model().indices().get(indexName).fields().get(indexFieldName).path(); |
| 708 | + String pathParent = this.input.model().indices().get(indexName).fields().get(indexFieldName).pathParent(); |
| 709 | + JsonNode valueNode = doc.get("_source").at(path); |
| 710 | + if (valueNode.isMissingNode()) |
| 711 | + valueNode = doc.get("_source").at(pathParent); |
| 712 | + if (valueNode.isMissingNode()) |
| 713 | + continue; |
| 714 | + docAttributes.put(attributeName, valueNode); |
| 715 | + Value value = Value.create(attributeType, valueNode); |
| 716 | + nextInputAttributes.get(attributeName).values().add(value); |
| 717 | + } |
620 | 718 | } |
621 | 719 |
|
622 | 720 | // Modify doc metadata. |
623 | 721 | if (this.includeHits) { |
624 | 722 | ObjectNode docObjNode = (ObjectNode) doc; |
625 | 723 | docObjNode.remove("_score"); |
| 724 | + docObjNode.remove("fields"); |
626 | 725 | docObjNode.put("_hop", this.hop); |
627 | 726 | if (this.includeAttributes) { |
628 | 727 | ObjectNode docAttributesObjNode = docObjNode.putObject("_attributes"); |
|
0 commit comments