Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP

Comparing changes

Choose two branches to see what's changed or to start a new pull request. If you need to, you can also compare across forks.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also compare across forks.
base fork: yakaz/elasticsearch-analysis-hashsplitter
base: a80e891aab
...
head fork: yakaz/elasticsearch-analysis-hashsplitter
compare: 90cc965e46
  • 6 commits
  • 16 files changed
  • 0 commit comments
  • 1 contributor
Commits on Apr 17, 2012
@ofavre ofavre Wildcard query support 88696e7
@ofavre ofavre WildcardFilter and its HashSplitter- variant
Centralizing default wildcard values.

Bug in HashSplitterFieldMapper.TypeParser, used Integer.getInteger()
(which reads a system property), instead of Integer.valueOf().
d37e5c0
@ofavre ofavre Wildcard query/filter builders wildcardOne/Any support 934f231
@ofavre ofavre Fix prefix query/filter
They should use the HashSplitterAnalyzer, not the -Search- one,
as they do not use any wildcard.
001db3d
@ofavre ofavre Alternate wildcards test + remove wildcard conf in queries/filters
Wildcard configuration should solely be performed in the mapping.
Passing a custom configuration at query time would require more
parameters to the generic wildcardQuery/Filter() interface,
or a more extensible QueryParserContext.

Real FilterParser logic for HashSplitterWildcardFilterParser
(was too inspired from the QueryParser logic).

Fix bugs using field(String, int) instead of the desired inexistent
field(String, char), leading chars to be serialized as numbers.
Builder now only accept wildcardOne/Any(char), instead of String,
and the check for a single char String is performed with an exception.
316bdf5
@ofavre ofavre Better code coverage + maven plugin for reporting
Using Cobertura
90cc965
Showing with 1,497 additions and 18 deletions.
  1. +26 −0 pom.xml
  2. +124 −0 src/main/java/org/apache/lucene/search/WildcardFilter.java
  3. +134 −0 src/main/java/org/apache/lucene/search/WildcardQuery.java
  4. +189 −0 src/main/java/org/apache/lucene/search/WildcardTermEnum.java
  5. +51 −0 src/main/java/org/elasticsearch/index/mapper/hashsplitter/CustomWildcardSearchFieldMapper.java
  6. +76 −18 src/main/java/org/elasticsearch/index/mapper/hashsplitter/HashSplitterFieldMapper.java
  7. +94 −0 src/main/java/org/elasticsearch/index/query/HashSplitterWildcardFilterBuilder.java
  8. +138 −0 src/main/java/org/elasticsearch/index/query/HashSplitterWildcardFilterParser.java
  9. +89 −0 src/main/java/org/elasticsearch/index/query/HashSplitterWildcardQueryBuilder.java
  10. +119 −0 src/main/java/org/elasticsearch/index/query/HashSplitterWildcardQueryParser.java
  11. +3 −0  src/main/java/org/elasticsearch/index/query/RegisterHashSplitterQueryParsers.java
  12. +213 −0 src/test/java/org/apache/lucene/search/WildcardTermEnumTests.java
  13. +34 −0 src/test/java/org/elasticsearch/index/mapper/hashsplitter/HashSplitterFieldMapperTests.java
  14. +174 −0 src/test/java/org/elasticsearch/index/query/HashSplitterQueryParsersTests.java
  15. +17 −0 src/test/resources/chunklength4-prefixesLowercasedAlphabet-SqlWildcards-mapping.json
  16. +16 −0 src/test/resources/chunklength4-prefixesLowercasedAlphabet-size16Fixed-mapping.json
View
26 pom.xml
@@ -76,8 +76,23 @@
<build>
+ <pluginManagement>
+ <plugins>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>cobertura-maven-plugin</artifactId>
+ <version>2.5.1</version>
+ </plugin>
+ </plugins>
+ </pluginManagement>
+
<plugins>
<plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>cobertura-maven-plugin</artifactId>
+ <version>2.5.1</version>
+ </plugin>
+ <plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>2.3.2</version>
@@ -130,4 +145,15 @@
</plugin>
</plugins>
</build>
+
+ <reporting>
+ <plugins>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>cobertura-maven-plugin</artifactId>
+ <version>2.5.1</version>
+ </plugin>
+ </plugins>
+ </reporting>
+
</project>
View
124 src/main/java/org/apache/lucene/search/WildcardFilter.java
@@ -0,0 +1,124 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.search;
+
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.index.TermDocs;
+import org.apache.lucene.util.OpenBitSet;
+
+import java.io.IOException;
+
+
+/**
+ * @see org.apache.solr.search.WildcardFilter from Solr 3.6
+ * @author Modified by ofavre
+ * @version $Id: WildcardFilter.java 922957 2010-03-14 20:58:32Z markrmiller $
+ */
+public class WildcardFilter extends Filter {
+ protected final Term term;
+ protected final char wildcardOne;
+ protected final char wildcardAny;
+
+ public static final char DEFAULT_WILDCARD_ONE = WildcardQuery.DEFAULT_WILDCARD_ONE;
+ public static final char DEFAULT_WILDCARD_ANY = WildcardQuery.DEFAULT_WILDCARD_ANY;
+
+ public WildcardFilter(Term wildcardTerm) {
+ this(wildcardTerm, DEFAULT_WILDCARD_ONE, DEFAULT_WILDCARD_ANY);
+ }
+
+ public WildcardFilter(Term wildcardTerm, char wildcardOne, char wildcardAny) {
+ this.term = wildcardTerm;
+ this.wildcardOne = wildcardOne;
+ this.wildcardAny = wildcardAny;
+ }
+
+ public Term getTerm() { return term; }
+
+ @Override
+ public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
+ final OpenBitSet bitSet = new OpenBitSet(reader.maxDoc());
+ TermEnum enumerator = new WildcardTermEnum(reader, term, wildcardOne, wildcardAny);
+ TermDocs termDocs = reader.termDocs();
+ try {
+ do {
+ Term term = enumerator.term();
+ if (term==null) break;
+ termDocs.seek(term);
+ while (termDocs.next()) {
+ bitSet.set(termDocs.doc());
+ }
+ } while (enumerator.next());
+ } finally {
+ termDocs.close();
+ enumerator.close();
+ }
+ return bitSet;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+
+ WildcardFilter that = (WildcardFilter) o;
+
+ if (wildcardAny != that.wildcardAny) return false;
+ if (wildcardOne != that.wildcardOne) return false;
+ if (term != null ? !term.equals(that.term) : that.term != null) return false;
+
+ return true;
+ }
+
+ @Override
+ public int hashCode() {
+ int result = term != null ? term.hashCode() : 0;
+ result = 31 * result + (int) wildcardOne;
+ result = 31 * result + (int) wildcardAny;
+ return result;
+ }
+
+ public String toString (String field) {
+ StringBuilder buffer = new StringBuilder();
+ if (!term.field().equals(field)) {
+ buffer.append(term.field());
+ buffer.append(":");
+ }
+ buffer.append(term.text());
+ if (wildcardOne != DEFAULT_WILDCARD_ONE) {
+ buffer.append(',');
+ buffer.append(DEFAULT_WILDCARD_ONE);
+ buffer.append('=');
+ buffer.append(wildcardOne);
+ }
+ if (wildcardAny != DEFAULT_WILDCARD_ANY) {
+ buffer.append(',');
+ buffer.append(DEFAULT_WILDCARD_ANY);
+ buffer.append('=');
+ buffer.append(wildcardAny);
+ }
+ return buffer.toString();
+ }
+
+ @Override
+ public String toString () {
+ return toString("");
+ }
+
+}
View
134 src/main/java/org/apache/lucene/search/WildcardQuery.java
@@ -0,0 +1,134 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.util.ToStringUtils;
+
+import java.io.IOException;
+
+/** Implements the wildcard search query. By default, <code>*</code>
+ * matches any character sequence (including the empty one), and <code>?</code>,
+ * matches any single character. Note this query can be slow, as it
+ * needs to iterate over many terms. In order to prevent extremely slow WildcardQueries,
+ * a Wildcard term should not start with one of the wildcards <code>*</code> or
+ * <code>?</code>.
+ *
+ * <p>This query uses the {@link
+ * MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}
+ * rewrite method.
+ *
+ * @see WildcardTermEnum */
+public class WildcardQuery extends MultiTermQuery {
+
+ public static final char DEFAULT_WILDCARD_ONE = '?';
+ public static final char DEFAULT_WILDCARD_ANY = '*';
+
+ private boolean termContainsWildcard;
+ private boolean termIsPrefix;
+ protected Term term;
+ private char wildcardOne;
+ private char wildcardAny;
+
+ public WildcardQuery(Term term) {
+ this(term, DEFAULT_WILDCARD_ONE, DEFAULT_WILDCARD_ANY);
+ }
+
+ public WildcardQuery(Term term, char wildcardOne, char wildcardAny) {
+ this.term = term;
+ this.wildcardOne = wildcardOne;
+ this.wildcardAny = wildcardAny;
+ String text = term.text();
+ this.termContainsWildcard = (text.indexOf(this.wildcardAny) != -1)
+ || (text.indexOf(this.wildcardOne) != -1);
+ this.termIsPrefix = termContainsWildcard
+ && (text.indexOf(this.wildcardOne) == -1)
+ && (text.indexOf(this.wildcardAny) == text.length() - 1);
+ }
+
+ @Override
+ protected FilteredTermEnum getEnum(IndexReader reader) throws IOException {
+ if (termIsPrefix) {
+ return new PrefixTermEnum(reader, term.createTerm(term.text()
+ .substring(0, term.text().indexOf(wildcardAny))));
+ } else if (termContainsWildcard) {
+ return new WildcardTermEnum(reader, getTerm(), wildcardOne, wildcardAny);
+ } else {
+ return new SingleTermEnum(reader, getTerm());
+ }
+ }
+
+ /**
+ * Returns the pattern term.
+ */
+ public Term getTerm() {
+ return term;
+ }
+
+ /** Prints a user-readable version of this query. */
+ @Override
+ public String toString(String field) {
+ StringBuilder buffer = new StringBuilder();
+ if (!term.field().equals(field)) {
+ buffer.append(term.field());
+ buffer.append(":");
+ }
+ buffer.append(term.text());
+ buffer.append(ToStringUtils.boost(getBoost()));
+ if (wildcardOne != DEFAULT_WILDCARD_ONE) {
+ buffer.append(',');
+ buffer.append(DEFAULT_WILDCARD_ONE);
+ buffer.append('=');
+ buffer.append(wildcardOne);
+ }
+ if (wildcardAny != DEFAULT_WILDCARD_ANY) {
+ buffer.append(',');
+ buffer.append(DEFAULT_WILDCARD_ANY);
+ buffer.append('=');
+ buffer.append(wildcardAny);
+ }
+ return buffer.toString();
+ }
+
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = super.hashCode();
+ result = prime * result + ((term == null) ? 0 : term.hashCode());
+ return result;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj)
+ return true;
+ if (!super.equals(obj))
+ return false;
+ if (getClass() != obj.getClass())
+ return false;
+ WildcardQuery other = (WildcardQuery) obj;
+ if (term == null) {
+ if (other.term != null)
+ return false;
+ } else if (!term.equals(other.term))
+ return false;
+ return true;
+ }
+
+}
View
189 src/main/java/org/apache/lucene/search/WildcardTermEnum.java
@@ -0,0 +1,189 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+
+/**
+ * Subclass of FilteredTermEnum for enumerating all terms that match the
+ * specified wildcard filter term.
+ * <p>
+ * Term enumerations are always ordered by Term.compareTo(). Each term in
+ * the enumeration is greater than all that precede it.
+ */
+public class WildcardTermEnum extends FilteredTermEnum {
+ final Term searchTerm;
+ final char wildcardOne;
+ final char wildcardAny;
+ final String field;
+ final String text;
+ final String pre;
+ final int preLen;
+ boolean endEnum = false;
+
+ /**
+ * Creates a new <code>WildcardTermEnum</code>.
+ * <p>
+ * After calling the constructor the enumeration is already pointing to the first
+ * valid term if such a term exists.
+ */
+ public WildcardTermEnum(IndexReader reader, Term term, char wildcardOne, char wildcardAny) throws IOException {
+ super();
+ searchTerm = term;
+ this.wildcardOne = wildcardOne;
+ this.wildcardAny = wildcardAny;
+ field = searchTerm.field();
+ final String searchTermText = searchTerm.text();
+
+ final int sidx = searchTermText.indexOf(this.wildcardAny);
+ final int cidx = searchTermText.indexOf(this.wildcardOne);
+ int idx = sidx;
+ if (idx == -1) {
+ idx = cidx;
+ }
+ else if (cidx >= 0) {
+ idx = Math.min(idx, cidx);
+ }
+ pre = idx != -1?searchTerm.text().substring(0,idx): "";
+
+ preLen = pre.length();
+ text = searchTermText.substring(preLen);
+ setEnum(reader.terms(new Term(searchTerm.field(), pre)));
+ }
+
+ @Override
+ protected final boolean termCompare(Term term) {
+ if (field == term.field()) {
+ String searchText = term.text();
+ if (searchText.startsWith(pre)) {
+ return wildcardEquals(text, 0, searchText, preLen, wildcardOne, wildcardAny);
+ }
+ }
+ endEnum = true;
+ return false;
+ }
+
+ @Override
+ public float difference() {
+ return 1.0f;
+ }
+
+ @Override
+ public final boolean endEnum() {
+ return endEnum;
+ }
+
+ /**
+ * Determines if a word matches a wildcard pattern.
+ * <small>Work released by Granta Design Ltd after originally being done on
+ * company time.</small>
+ */
+ public static final boolean wildcardEquals(String pattern, int patternIdx,
+ String string, int stringIdx,
+ char wildcardOne, char wildcardAny)
+ {
+ int p = patternIdx;
+
+ for (int s = stringIdx; ; ++p, ++s)
+ {
+ // End of string yet?
+ boolean sEnd = (s >= string.length());
+ // End of pattern yet?
+ boolean pEnd = (p >= pattern.length());
+
+ // If we're looking at the end of the string...
+ if (sEnd)
+ {
+ // Assume the only thing left on the pattern is/are wildcards
+ boolean justWildcardsLeft = true;
+
+ // Current wildcard position
+ int wildcardSearchPos = p;
+ // While we haven't found the end of the pattern,
+ // and haven't encountered any non-wildcard characters
+ while (wildcardSearchPos < pattern.length() && justWildcardsLeft)
+ {
+ // Check the character at the current position
+ char wildchar = pattern.charAt(wildcardSearchPos);
+
+ // If it's not a wildcard character, then there is more
+ // pattern information after this/these wildcards.
+ if (wildchar != wildcardOne && wildchar != wildcardAny)
+ {
+ justWildcardsLeft = false;
+ }
+ else
+ {
+ // to prevent "cat" matches "ca??"
+ if (wildchar == wildcardOne) {
+ return false;
+ }
+
+ // Look at the next character
+ wildcardSearchPos++;
+ }
+ }
+
+ // This was a prefix wildcard search, and we've matched, so
+ // return true.
+ if (justWildcardsLeft)
+ {
+ return true;
+ }
+ }
+
+ // If we've gone past the end of the string, or the pattern,
+ // return false.
+ if (sEnd || pEnd)
+ {
+ break;
+ }
+
+ // Match a single character, so continue.
+ if (pattern.charAt(p) == wildcardOne)
+ {
+ continue;
+ }
+
+ //
+ if (pattern.charAt(p) == wildcardAny)
+ {
+ // Look at the character beyond the '*' characters.
+ while (p < pattern.length() && pattern.charAt(p) == wildcardAny)
+ ++p;
+ // Examine the string, starting at the last character.
+ for (int i = string.length(); i >= s; --i)
+ {
+ if (wildcardEquals(pattern, p, string, i, wildcardOne, wildcardAny))
+ {
+ return true;
+ }
+ }
+ break;
+ }
+ if (pattern.charAt(p) != string.charAt(s))
+ {
+ break;
+ }
+ }
+ return false;
+ }
+}
View
51 ...ain/java/org/elasticsearch/index/mapper/hashsplitter/CustomWildcardSearchFieldMapper.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to Elastic Search and Shay Banon under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. Elastic Search licenses this
+ * file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.index.mapper.hashsplitter;
+
+import org.apache.lucene.search.Filter;
+import org.apache.lucene.search.MultiTermQuery;
+import org.apache.lucene.search.Query;
+import org.elasticsearch.common.Nullable;
+import org.elasticsearch.index.query.QueryParseContext;
+
+public interface CustomWildcardSearchFieldMapper {
+
+ /**
+ * Return the actual {@link org.apache.lucene.search.Query} to be performed against the current field
+ * for the given value.
+ * @param value Value to be searched, with possible wildcards.
+ * @param method Rewrite method to be used.
+ * @param context The current parser context.
+ * @return A {@link org.apache.lucene.search.WildcardQuery}, or any other {@link org.apache.lucene.search.Query}
+ * to be used to perform the actual query, or null to use the default, fallback WildcardQuery.
+ */
+ public Query wildcardQuery(String value, @Nullable MultiTermQuery.RewriteMethod method, @Nullable QueryParseContext context);
+
+ /**
+ * Return the actual {@link org.apache.lucene.search.Filter} to be performed against the current field
+ * for the given value.
+ * @param value Value to be searched, with possible wildcards.
+ * @param context The current parser context.
+ * @return A {@link org.apache.lucene.search.WildcardFilter}, or any other {@link org.apache.lucene.search.Filter}
+ * to be used to perform the actual filter, or null to use the default, fallback WildcardFilter.
+ */
+ public Filter wildcardFilter(String value, @Nullable QueryParseContext context);
+
+}
View
94 src/main/java/org/elasticsearch/index/mapper/hashsplitter/HashSplitterFieldMapper.java
@@ -33,6 +33,8 @@
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.WildcardFilter;
+import org.apache.lucene.search.WildcardQuery;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.io.FastStringReader;
@@ -54,7 +56,7 @@
import static org.elasticsearch.common.xcontent.support.XContentMapValues.nodeIntegerValue;
import static org.elasticsearch.index.mapper.core.TypeParsers.parseField;
-public class HashSplitterFieldMapper extends StringFieldMapper {
+public class HashSplitterFieldMapper extends StringFieldMapper implements CustomWildcardSearchFieldMapper {
public static final String CONTENT_TYPE = "hashsplitter";
@@ -69,8 +71,8 @@
public static final int CHUNK_LENGTH = 1;
public static final String PREFIX = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789,.";
public static final Integer SIZE = null;
- public static final char WILDCARD_ONE = '?';
- public static final char WILDCARD_ANY = '*';
+ public static final char WILDCARD_ONE = WildcardQuery.DEFAULT_WILDCARD_ONE;
+ public static final char WILDCARD_ANY = WildcardQuery.DEFAULT_WILDCARD_ANY;
}
public static class Builder extends AbstractFieldMapper.Builder<Builder, HashSplitterFieldMapper> {
@@ -141,15 +143,13 @@ public Builder size(Integer size) {
return this;
}
- public Builder wildcardOne(String wildcardOne) {
- if (wildcardOne != null && wildcardOne.length() == 1)
- this.wildcardOne = wildcardOne.charAt(0);
+ public Builder wildcardOne(char wildcardOne) {
+ this.wildcardOne = wildcardOne;
return this;
}
- public Builder wildcardAny(String wildcardAny) {
- if (wildcardAny != null && wildcardAny.length() == 1)
- this.wildcardAny = wildcardAny.charAt(0);
+ public Builder wildcardAny(char wildcardAny) {
+ this.wildcardAny = wildcardAny;
return this;
}
@@ -173,7 +173,7 @@ public HashSplitterFieldMapper build(BuilderContext context) {
* field1 : {
* type : "hashsplitter",
* settings: {
- * chunk_length : 2,
+ * chunk_length : 1,
* size : "variable" | 32,
* wildcard_one : "?",
* wildcard_any : "*"
@@ -204,9 +204,15 @@ public HashSplitterFieldMapper build(BuilderContext context) {
} else if ("size".equals(propName)) {
builder.size(nodeSizeValue(propNode));
} else if ("wildcard_one".equals(propName)) {
- builder.wildcardOne(propNode.toString());
+ String value = propNode.toString();
+ if (value.length() != 1)
+ throw new MapperParsingException("["+HashSplitterFieldMapper.CONTENT_TYPE+"] Field "+name+" only supports 1-character long wildcard_one");
+ builder.wildcardOne(value.charAt(0));
} else if ("wildcard_any".equals(propName)) {
- builder.wildcardAny(propNode.toString());
+ String value = propNode.toString();
+ if (value.length() != 1)
+ throw new MapperParsingException("["+HashSplitterFieldMapper.CONTENT_TYPE+"] Field "+name+" only supports 1-character long wildcard_any");
+ builder.wildcardAny(value.charAt(0));
}
}
}
@@ -220,9 +226,11 @@ public static Integer nodeSizeValue(Object node) {
return (Integer) node;
return ((Number) node).intValue();
}
- if ("variable".equals(node.toString()))
+ try {
+ return Integer.valueOf(node.toString());
+ } catch (NumberFormatException ex) {
return null;
- return Integer.getInteger(node.toString());
+ }
}
}
@@ -355,10 +363,10 @@ protected void doXContentBody(XContentBuilder builder) throws IOException {
builder.value(sizeValue);
}
if (wildcardOne != Defaults.WILDCARD_ONE) {
- builder.field("wildcard_one", wildcardOne);
+ builder.field("wildcard_one", Character.toString(wildcardOne));
}
if (wildcardAny != Defaults.WILDCARD_ANY) {
- builder.field("wildcard_any", wildcardAny);
+ builder.field("wildcard_any", Character.toString(wildcardAny));
}
builder.endObject();
}
@@ -438,7 +446,7 @@ public Query prefixQuery(String value, @Nullable MultiTermQuery.RewriteMethod me
// Use HashSplitterSearch* analysis and post-process it to create the real query
TokenStream tok = null;
try {
- tok = searchAnalyzer.reusableTokenStream(names().indexNameClean(), new FastStringReader(value));
+ tok = indexAnalyzer.reusableTokenStream(names().indexNameClean(), new FastStringReader(value));
tok.reset();
} catch (IOException e) {
return null;
@@ -468,7 +476,7 @@ public Filter prefixFilter(String value, @Nullable QueryParseContext context) {
// Use HashSplitterSearch* analysis and post-process it to create the real filter
TokenStream tok = null;
try {
- tok = searchAnalyzer.reusableTokenStream(names().indexNameClean(), new FastStringReader(value));
+ tok = indexAnalyzer.reusableTokenStream(names().indexNameClean(), new FastStringReader(value));
tok.reset();
} catch (IOException e) {
return null;
@@ -527,4 +535,54 @@ public Query fuzzyQuery(String value, double minSim, int prefixLength, int maxEx
return null; // will fallback to an unusable default query
}
+ @Override
+ public Query wildcardQuery(String value, @Nullable MultiTermQuery.RewriteMethod method, @Nullable QueryParseContext context) {
+ // Use HashSplitterSearch* analysis and post-process it to create the real query
+ TokenStream tok = null;
+ try {
+ tok = searchAnalyzer.reusableTokenStream(names().indexNameClean(), new FastStringReader(value));
+ tok.reset();
+ } catch (IOException e) {
+ return null;
+ }
+ CharTermAttribute termAtt = tok.getAttribute(CharTermAttribute.class);
+ BooleanQuery q = new BooleanQuery();
+ try {
+ while (tok.incrementToken()) {
+ q.add(new WildcardQuery(names().createIndexNameTerm(termAtt.toString()), wildcardOne, wildcardAny), BooleanClause.Occur.MUST);
+ }
+ tok.end();
+ tok.close();
+ } catch (IOException e) {
+ e.printStackTrace();
+ q = null;
+ }
+ return q;
+ }
+
+ @Override
+ public Filter wildcardFilter(String value, @Nullable QueryParseContext context) {
+ // Use HashSplitterSearch* analysis and post-process it to create the real query
+ TokenStream tok = null;
+ try {
+ tok = searchAnalyzer.reusableTokenStream(names().indexNameClean(), new FastStringReader(value));
+ tok.reset();
+ } catch (IOException e) {
+ return null;
+ }
+ CharTermAttribute termAtt = tok.getAttribute(CharTermAttribute.class);
+ BooleanFilter f = new BooleanFilter();
+ try {
+ while (tok.incrementToken()) {
+ f.add(new WildcardFilter(names().createIndexNameTerm(termAtt.toString()), wildcardOne, wildcardAny), BooleanClause.Occur.MUST);
+ }
+ tok.end();
+ tok.close();
+ } catch (IOException e) {
+ e.printStackTrace();
+ f = null;
+ }
+ return f;
+ }
+
}
View
94 src/main/java/org/elasticsearch/index/query/HashSplitterWildcardFilterBuilder.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to ElasticSearch and Shay Banon under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. ElasticSearch licenses this
+ * file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.index.query;
+
+import org.elasticsearch.common.xcontent.XContentBuilder;
+
+import java.io.IOException;
+
+/**
+ * Implements the wildcard search filter.
+ * Note this filter can be a bit slow, as it needs to iterate over a number of terms.
+ */
+public class HashSplitterWildcardFilterBuilder extends BaseFilterBuilder {
+
+ private final String name;
+
+ private final String wildcard;
+
+ private Boolean cache;
+ private String cacheKey;
+
+ private String filterName;
+
+ public static HashSplitterWildcardFilterBuilder hashSplitterWildcardFilter(String name, String value) {
+ return new HashSplitterWildcardFilterBuilder(name, value);
+ }
+
+ /**
+ * Implements the wildcard search filter.
+ * Note this filter can be a bit slow, as it needs to iterate over a number of terms.
+ *
+ * @param name The field name
+ * @param wildcard The wildcard filter string
+ */
+ public HashSplitterWildcardFilterBuilder(String name, String wildcard) {
+ this.name = name;
+ this.wildcard = wildcard;
+ }
+
+ /**
+ * Sets the filter name for the filter that can be used when searching for matched_filters per hit.
+ */
+ public HashSplitterWildcardFilterBuilder filterName(String filterName) {
+ this.filterName = filterName;
+ return this;
+ }
+
+ /**
+ * Should the filter be cached or not. Defaults to <tt>true</tt>.
+ */
+ public HashSplitterWildcardFilterBuilder cache(boolean cache) {
+ this.cache = cache;
+ return this;
+ }
+
+ public HashSplitterWildcardFilterBuilder cacheKey(String cacheKey) {
+ this.cacheKey = cacheKey;
+ return this;
+ }
+
+ @Override
+ public void doXContent(XContentBuilder builder, Params params) throws IOException {
+ builder.startObject(HashSplitterWildcardFilterParser.NAME);
+ builder.field(name, wildcard);
+ if (filterName != null) {
+ builder.field("_name", filterName);
+ }
+ if (cache != null) {
+ builder.field("_cache", cache);
+ }
+ if (cacheKey != null) {
+ builder.field("_cache_key", cacheKey);
+ }
+ builder.endObject();
+ }
+
+}
View
138 src/main/java/org/elasticsearch/index/query/HashSplitterWildcardFilterParser.java
@@ -0,0 +1,138 @@
+/*
+ * Licensed to ElasticSearch and Shay Banon under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. ElasticSearch licenses this
+ * file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.index.query;
+
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.Filter;
+import org.apache.lucene.search.MultiTermQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.QueryWrapperFilter;
+import org.apache.lucene.search.WildcardFilter;
+import org.elasticsearch.common.inject.Inject;
+import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.index.cache.filter.support.CacheKeyFilter;
+import org.elasticsearch.index.mapper.FieldMapper;
+import org.elasticsearch.index.mapper.MapperService;
+import org.elasticsearch.index.mapper.hashsplitter.CustomWildcardSearchFieldMapper;
+import org.elasticsearch.index.mapper.hashsplitter.HashSplitterFieldMapper;
+
+import java.io.IOException;
+
+import static org.elasticsearch.index.query.support.QueryParsers.wrapSmartNameFilter;
+
+/**
+ *
+ */
+public class HashSplitterWildcardFilterParser implements FilterParser {
+
+ public static final String NAME = "hashsplitter_wildcard";
+
+ @Inject
+ public HashSplitterWildcardFilterParser() {
+ }
+
+ @Override
+ public String[] names() {
+ return new String[]{NAME};
+ }
+
+ @Override
+ public Filter parse(QueryParseContext parseContext) throws IOException, QueryParsingException {
+ XContentParser parser = parseContext.parser();
+
+ boolean cache = false;
+ CacheKeyFilter.Key cacheKey = null;
+ String filterName = null;
+ String value = null;
+
+ String fieldName = null;
+ String currentFieldName = null;
+ XContentParser.Token token;
+ while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
+ if (token == XContentParser.Token.FIELD_NAME) {
+ currentFieldName = parser.currentName();
+ } else if (token.isValue()) {
+ if ("_name".equals(currentFieldName)) {
+ filterName = parser.text();
+ } else if ("_cache".equals(currentFieldName)) {
+ cache = parser.booleanValue();
+ } else if ("_cache_key".equals(currentFieldName) || "_cacheKey".equals(currentFieldName)) {
+ cacheKey = new CacheKeyFilter.Key(parser.text());
+ } else {
+ fieldName = currentFieldName;
+ value = parser.text();
+ }
+ }
+ }
+
+ if (fieldName == null) {
+ throw new QueryParsingException(parseContext.index(), "No field specified for term filter");
+ }
+
+ if (value == null) {
+ throw new QueryParsingException(parseContext.index(), "No value specified for "+NAME+" query");
+ }
+
+ Filter filter = null;
+ MapperService.SmartNameFieldMappers smartNameFieldMappers = parseContext.smartFieldMappers(fieldName);
+ if (smartNameFieldMappers != null && smartNameFieldMappers.hasMapper()) {
+ String[] previousTypes = null;
+ try {
+ if (smartNameFieldMappers.explicitTypeInNameWithDocMapper()) {
+ previousTypes = QueryParseContext.setTypesWithPrevious(new String[]{smartNameFieldMappers.docMapper().type()});
+ }
+ FieldMapper mapper = smartNameFieldMappers.mapper();
+ if (mapper != null && mapper instanceof CustomWildcardSearchFieldMapper) {
+ CustomWildcardSearchFieldMapper hashsplitterMapper = (CustomWildcardSearchFieldMapper) mapper;
+ filter = hashsplitterMapper.wildcardFilter(value, parseContext);
+ if (filter == null) {
+ // No useful wildcardFilter() implementation, try wildcardQuery()
+ Query query = hashsplitterMapper.wildcardQuery(value, MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE, parseContext);
+ if (query != null)
+ filter = new QueryWrapperFilter(query);
+ }
+ }
+ if (filter == null) { // not associated with a HashSplitterFieldMapper OR wildcardFilter/Query() returned null
+ // Fallback on the same code as org.elasticsearch.index.query.WildcardQueryParser
+ fieldName = smartNameFieldMappers.mapper().names().indexName();
+ value = smartNameFieldMappers.mapper().indexedValue(value);
+ }
+ } finally {
+ if (smartNameFieldMappers.explicitTypeInNameWithDocMapper()) {
+ QueryParseContext.setTypes(previousTypes);
+ }
+ }
+ }
+ if (filter == null) {
+ WildcardFilter f = new WildcardFilter(new Term(fieldName, value));
+ filter = f;
+ }
+
+ if (cache) {
+ filter = parseContext.cacheFilter(filter, cacheKey);
+ }
+ filter = wrapSmartNameFilter(filter, smartNameFieldMappers, parseContext);
+ if (filterName != null) {
+ parseContext.addNamedFilter(filterName, filter);
+ }
+
+ return filter;
+ }
+}
View
89 src/main/java/org/elasticsearch/index/query/HashSplitterWildcardQueryBuilder.java
@@ -0,0 +1,89 @@
+/*
+ * Licensed to ElasticSearch and Shay Banon under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. ElasticSearch licenses this
+ * file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.index.query;
+
+import org.apache.lucene.search.WildcardQuery;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+
+import java.io.IOException;
+
+/**
+ * Implements the wildcard search query.
+ * Note this query can be a bit slow, as it needs to iterate over a number of terms.
+ */
+public class HashSplitterWildcardQueryBuilder extends BaseQueryBuilder {
+
+ private final String name;
+
+ private final String wildcard;
+
+ private float boost = -1;
+
+ private String rewrite;
+
+ public static HashSplitterWildcardQueryBuilder hashSplitterWildcardQuery(String name, String value) {
+ return new HashSplitterWildcardQueryBuilder(name, value);
+ }
+
+ /**
+ * Implements the wildcard search query.
+ * Note this query can be a bit slow, as it needs to iterate over a number of terms.
+ *
+ * @param name The field name
+ * @param wildcard The wildcard query string
+ */
+ public HashSplitterWildcardQueryBuilder(String name, String wildcard) {
+ this.name = name;
+ this.wildcard = wildcard;
+ }
+
+ public HashSplitterWildcardQueryBuilder rewrite(String rewrite) {
+ this.rewrite = rewrite;
+ return this;
+ }
+
+ /**
+ * Sets the boost for this query. Documents matching this query will (in addition to the normal
+ * weightings) have their score multiplied by the boost provided.
+ */
+ public HashSplitterWildcardQueryBuilder boost(float boost) {
+ this.boost = boost;
+ return this;
+ }
+
+ @Override
+ public void doXContent(XContentBuilder builder, Params params) throws IOException {
+ builder.startObject(HashSplitterWildcardQueryParser.NAME);
+ if (boost == -1 && rewrite == null) {
+ builder.field(name, wildcard);
+ } else {
+ builder.startObject(name);
+ builder.field("wildcard", wildcard);
+ if (boost != -1) {
+ builder.field("boost", boost);
+ }
+ if (rewrite != null) {
+ builder.field("rewrite", rewrite);
+ }
+ builder.endObject();
+ }
+ builder.endObject();
+ }
+}
View
119 src/main/java/org/elasticsearch/index/query/HashSplitterWildcardQueryParser.java
@@ -0,0 +1,119 @@
+/*
+ * Licensed to ElasticSearch and Shay Banon under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. ElasticSearch licenses this
+ * file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.index.query;
+
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.WildcardQuery;
+import org.elasticsearch.common.inject.Inject;
+import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.index.mapper.FieldMapper;
+import org.elasticsearch.index.mapper.MapperService;
+import org.elasticsearch.index.mapper.hashsplitter.CustomWildcardSearchFieldMapper;
+import org.elasticsearch.index.mapper.hashsplitter.HashSplitterFieldMapper;
+import org.elasticsearch.index.query.support.QueryParsers;
+
+import java.io.IOException;
+
+import static org.elasticsearch.index.query.support.QueryParsers.wrapSmartNameQuery;
+
+/**
+ *
+ */
+public class HashSplitterWildcardQueryParser implements QueryParser {
+
+ public static final String NAME = "hashsplitter_wildcard";
+
+ @Inject
+ public HashSplitterWildcardQueryParser() {
+ }
+
+ @Override
+ public String[] names() {
+ return new String[]{NAME};
+ }
+
+ @Override
+ public Query parse(QueryParseContext parseContext) throws IOException, QueryParsingException {
+ XContentParser parser = parseContext.parser();
+
+ XContentParser.Token token = parser.nextToken();
+ if (token != XContentParser.Token.FIELD_NAME) {
+ throw new QueryParsingException(parseContext.index(), "["+NAME+"] query malformed, no field");
+ }
+ String fieldName = parser.currentName();
+ String rewriteMethod = null;
+
+ String value = null;
+ float boost = 1.0f;
+ token = parser.nextToken();
+ if (token == XContentParser.Token.START_OBJECT) {
+ String currentFieldName = null;
+ while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
+ if (token == XContentParser.Token.FIELD_NAME) {
+ currentFieldName = parser.currentName();
+ } else {
+ if ("wildcard".equals(currentFieldName)) {
+ value = parser.text();
+ } else if ("value".equals(currentFieldName)) {
+ value = parser.text();
+ } else if ("boost".equals(currentFieldName)) {
+ boost = parser.floatValue();
+ } else if ("rewrite".equals(currentFieldName)) {
+ rewriteMethod = parser.textOrNull();
+ } else {
+ throw new QueryParsingException(parseContext.index(), "["+NAME+"] query does not support [" + currentFieldName + "]");
+ }
+ }
+ }
+ parser.nextToken();
+ } else {
+ value = parser.text();
+ parser.nextToken();
+ }
+
+ if (value == null) {
+ throw new QueryParsingException(parseContext.index(), "No value specified for "+NAME+" query");
+ }
+
+ Query query = null;
+ MapperService.SmartNameFieldMappers smartNameFieldMappers = parseContext.smartFieldMappers(fieldName);
+ if (smartNameFieldMappers != null && smartNameFieldMappers.hasMapper()) {
+ FieldMapper mapper = smartNameFieldMappers.mapper();
+ if (mapper != null && mapper instanceof CustomWildcardSearchFieldMapper) {
+ CustomWildcardSearchFieldMapper hashsplitterMapper = (CustomWildcardSearchFieldMapper) mapper;
+ query = hashsplitterMapper.wildcardQuery(value, QueryParsers.parseRewriteMethod(rewriteMethod), parseContext);
+ }
+ if (query == null) { // not associated with a HashSplitterFieldMapper OR wildcardQuery() returned null
+ // Fallback on the same code as org.elasticsearch.index.query.WildcardQueryParser
+ fieldName = smartNameFieldMappers.mapper().names().indexName();
+ value = smartNameFieldMappers.mapper().indexedValue(value);
+ }
+ }
+ if (query == null) {
+ WildcardQuery q = new WildcardQuery(new Term(fieldName, value));
+ q.setRewriteMethod(QueryParsers.parseRewriteMethod(rewriteMethod));
+ query = q;
+ }
+ query.setBoost(boost);
+ return wrapSmartNameQuery(query, smartNameFieldMappers, parseContext);
+ }
+}
View
3  src/main/java/org/elasticsearch/index/query/RegisterHashSplitterQueryParsers.java
@@ -37,6 +37,9 @@ public RegisterHashSplitterQueryParsers(Index index, @IndexSettings Settings ind
indicesQueriesRegistry.addQueryParser(new HashSplitterTermQueryParser());
indicesQueriesRegistry.addFilterParser(new HashSplitterTermFilterParser());
+
+ indicesQueriesRegistry.addQueryParser(new HashSplitterWildcardQueryParser());
+ indicesQueriesRegistry.addFilterParser(new HashSplitterWildcardFilterParser());
}
}
View
213 src/test/java/org/apache/lucene/search/WildcardTermEnumTests.java
@@ -0,0 +1,213 @@
+/*
+ * Licensed to ElasticSearch and Shay Banon under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. ElasticSearch licenses this
+ * file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.lucene.search;
+
+import org.apache.lucene.analysis.KeywordAnalyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util.Version;
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.Test;
+
+import java.io.IOException;
+
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.not;
+
+@Test
+public class WildcardTermEnumTests {
+
+ private Directory index;
+ private IndexReader reader;
+ private IndexSearcher searcher;
+
+ @BeforeClass
+ public void init() throws Exception {
+ index = new RAMDirectory();
+ IndexWriterConfig cfg = new IndexWriterConfig(Version.LUCENE_35, new KeywordAnalyzer());
+ IndexWriter writer = new IndexWriter(index, cfg);
+ writer.addDocument(tokensToDoc("a0000", "b1111", "c2222", "d3333"));
+ writer.close();
+
+ reader = IndexReader.open(index, true);
+ searcher = new IndexSearcher(reader);
+ }
+
+ @AfterClass
+ public void tearDown() throws Exception {
+ searcher.close();
+ reader.close();
+ index.close();
+ }
+
+ private TokenStream tokensToStream(final String... tokens) {
+ return new TokenStream() {
+ private int pos = 0;
+ private CharTermAttribute termAttr = addAttribute(CharTermAttribute.class);
+ @Override
+ public boolean incrementToken() throws IOException {
+ if (pos >= tokens.length)
+ return false;
+ termAttr.setEmpty();
+ termAttr.append(tokens[pos]);
+ pos++;
+ return true;
+ }
+ };
+ }
+
+ private Document tokensToDoc(String... tokens) {
+ Document rtn = new Document();
+ rtn.add(new Field("raw", tokensToStream(tokens)));
+ return rtn;
+ }
+
+ @Test
+ public void testExact() throws Exception {
+ WildcardTermEnum termEnum = new WildcardTermEnum(reader, new Term("raw", "c2222"), '?', '*');
+ assertThat(termEnum.term(), not(equalTo(null)));
+ assertThat(termEnum.term().field(), equalTo("raw"));
+ assertThat(termEnum.term().text(), equalTo("c2222"));
+ assertThat(termEnum.next(), equalTo(false));
+ assertThat(termEnum.term(), equalTo(null));
+ termEnum.close();
+ }
+
+ @Test
+ public void testWildcardAnyPrefix() throws Exception {
+ WildcardTermEnum termEnum = new WildcardTermEnum(reader, new Term("raw", "c2*"), '?', '*');
+ assertThat(termEnum.term(), not(equalTo(null)));
+ assertThat(termEnum.term().field(), equalTo("raw"));
+ assertThat(termEnum.term().text(), equalTo("c2222"));
+ assertThat(termEnum.next(), equalTo(false));
+ assertThat(termEnum.term(), equalTo(null));
+ termEnum.close();
+ }
+
+ @Test
+ public void testWildcardAnyPrefixAlternate() throws Exception {
+ WildcardTermEnum termEnum = new WildcardTermEnum(reader, new Term("raw", "c2%"), '_', '%');
+ assertThat(termEnum.term(), not(equalTo(null)));
+ assertThat(termEnum.term().field(), equalTo("raw"));
+ assertThat(termEnum.term().text(), equalTo("c2222"));
+ assertThat(termEnum.next(), equalTo(false));
+ assertThat(termEnum.term(), equalTo(null));
+ termEnum.close();
+ }
+
+ @Test
+ public void testWildcardAnySuffix() throws Exception {
+ WildcardTermEnum termEnum = new WildcardTermEnum(reader, new Term("raw", "*2"), '?', '*');
+ assertThat(termEnum.term(), not(equalTo(null)));
+ assertThat(termEnum.term().field(), equalTo("raw"));
+ assertThat(termEnum.term().text(), equalTo("c2222"));
+ assertThat(termEnum.next(), equalTo(false));
+ assertThat(termEnum.term(), equalTo(null));
+ termEnum.close();
+ }
+
+ @Test
+ public void testWildcardAnySuffixAlternate() throws Exception {
+ WildcardTermEnum termEnum = new WildcardTermEnum(reader, new Term("raw", "%2"), '_', '%');
+ assertThat(termEnum.term(), not(equalTo(null)));
+ assertThat(termEnum.term().field(), equalTo("raw"));
+ assertThat(termEnum.term().text(), equalTo("c2222"));
+ assertThat(termEnum.next(), equalTo(false));
+ assertThat(termEnum.term(), equalTo(null));
+ termEnum.close();
+ }
+
+ @Test
+ public void testWildcardOnePrefix() throws Exception {
+ WildcardTermEnum termEnum = new WildcardTermEnum(reader, new Term("raw", "c2???"), '?', '*');
+ assertThat(termEnum.term(), not(equalTo(null)));
+ assertThat(termEnum.term().field(), equalTo("raw"));
+ assertThat(termEnum.term().text(), equalTo("c2222"));
+ assertThat(termEnum.next(), equalTo(false));
+ assertThat(termEnum.term(), equalTo(null));
+ termEnum.close();
+ }
+
+ @Test
+ public void testWildcardOnePrefixAlternate() throws Exception {
+ WildcardTermEnum termEnum = new WildcardTermEnum(reader, new Term("raw", "c2___"), '_', '%');
+ assertThat(termEnum.term(), not(equalTo(null)));
+ assertThat(termEnum.term().field(), equalTo("raw"));
+ assertThat(termEnum.term().text(), equalTo("c2222"));
+ assertThat(termEnum.next(), equalTo(false));
+ assertThat(termEnum.term(), equalTo(null));
+ termEnum.close();
+ }
+
+ @Test
+ public void testWildcardOneSuffix() throws Exception {
+ WildcardTermEnum termEnum = new WildcardTermEnum(reader, new Term("raw", "????2"), '?', '*');
+ assertThat(termEnum.term(), not(equalTo(null)));
+ assertThat(termEnum.term().field(), equalTo("raw"));
+ assertThat(termEnum.term().text(), equalTo("c2222"));
+ assertThat(termEnum.next(), equalTo(false));
+ assertThat(termEnum.term(), equalTo(null));
+ termEnum.close();
+ }
+
+ @Test
+ public void testWildcardOneSuffixAlternate() throws Exception {
+ WildcardTermEnum termEnum = new WildcardTermEnum(reader, new Term("raw", "____2"), '_', '%');
+ assertThat(termEnum.term(), not(equalTo(null)));
+ assertThat(termEnum.term().field(), equalTo("raw"));
+ assertThat(termEnum.term().text(), equalTo("c2222"));
+ assertThat(termEnum.next(), equalTo(false));
+ assertThat(termEnum.term(), equalTo(null));
+ termEnum.close();
+ }
+
+ @Test
+ public void testCombination() throws Exception {
+ WildcardTermEnum termEnum = new WildcardTermEnum(reader, new Term("raw", "c?2*"), '?', '*');
+ assertThat(termEnum.term(), not(equalTo(null)));
+ assertThat(termEnum.term().field(), equalTo("raw"));
+ assertThat(termEnum.term().text(), equalTo("c2222"));
+ assertThat(termEnum.next(), equalTo(false));
+ assertThat(termEnum.term(), equalTo(null));
+ termEnum.close();
+ }
+
+ @Test
+ public void testCombinationAlternate() throws Exception {
+ WildcardTermEnum termEnum = new WildcardTermEnum(reader, new Term("raw", "c_2%"), '_', '%');
+ assertThat(termEnum.term(), not(equalTo(null)));
+ assertThat(termEnum.term().field(), equalTo("raw"));
+ assertThat(termEnum.term().text(), equalTo("c2222"));
+ assertThat(termEnum.next(), equalTo(false));
+ assertThat(termEnum.term(), equalTo(null));
+ termEnum.close();
+ }
+
+}
View
34 src/test/java/org/elasticsearch/index/mapper/hashsplitter/HashSplitterFieldMapperTests.java
@@ -44,6 +44,7 @@
import static org.elasticsearch.common.io.Streams.copyToStringFromClasspath;
import static org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
+import static org.elasticsearch.index.query.FilterBuilders.termFilter;
import static org.elasticsearch.index.query.QueryBuilders.fieldQuery;
import static org.elasticsearch.index.query.QueryBuilders.filteredQuery;
import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery;
@@ -196,6 +197,39 @@ public void testTermQueries() throws Exception {
}
@Test
+ public void testTermFilters() throws Exception {
+ String mapping = copyToStringFromClasspath("/chunklength2-mapping.json");
+
+ node.client().admin().indices().putMapping(putMappingRequest("test").type("splitted_hashes").source(mapping)).actionGet();
+
+ node.client().index(indexRequest("test").type("splitted_hashes")
+ .source(jsonBuilder().startObject().field("hash", "0011223344556677").endObject())).actionGet();
+ node.client().admin().indices().refresh(refreshRequest()).actionGet();
+
+ CountResponse countResponse;
+
+ // We would like these to work, but it doesn't seem possible...
+ // (ie. having a term filter that is *not analyzed*. it instead goes through fieldFilter)
+// countResponse = node.client().count(countRequest("test").types("splitted_hashes").query(filteredQuery(matchAllQuery(), termFilter("hash", "A00")))).actionGet();
+// assertThat("term filter", countResponse.count(), equalTo(1l));
+//
+// countResponse = node.client().count(countRequest("test").types("splitted_hashes").query(filteredQuery(matchAllQuery(), termFilter("hash", "B22")))).actionGet();
+// assertThat("term filter with unexisting term", countResponse.count(), equalTo(0l));
+
+ countResponse = node.client().count(countRequest("test").types("splitted_hashes").query(filteredQuery(matchAllQuery(), termFilter("hash", "0011223344556677")))).actionGet();
+ assertThat("term filter on exact value", countResponse.count(), equalTo(1l));
+
+ countResponse = node.client().count(countRequest("test").types("splitted_hashes").query(filteredQuery(matchAllQuery(), termFilter("hash", "00112233445566")))).actionGet();
+ assertThat("term filter on a prefix", countResponse.count(), equalTo(1l)); // should match, unfortunately!
+
+ countResponse = node.client().count(countRequest("test").types("splitted_hashes").query(filteredQuery(matchAllQuery(), termFilter("hash", "0011223344556")))).actionGet();
+ assertThat("term filter on a prefix with incomplete chunk", countResponse.count(), equalTo(0l));
+
+ countResponse = node.client().count(countRequest("test").types("splitted_hashes").query(filteredQuery(matchAllQuery(), termFilter("hash", "0011223344556688")))).actionGet();
+ assertThat("term filter on different value, same prefix", countResponse.count(), equalTo(0l));
+ }
+
+ @Test
public void testTextQueries() throws Exception {
String mapping = copyToStringFromClasspath("/chunklength2-mapping.json");
View
174 src/test/java/org/elasticsearch/index/query/HashSplitterQueryParsersTests.java
@@ -44,6 +44,8 @@
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.elasticsearch.index.query.HashSplitterTermFilterBuilder.hashSplitterTermFilter;
import static org.elasticsearch.index.query.HashSplitterTermQueryBuilder.hashSplitterTermQuery;
+import static org.elasticsearch.index.query.HashSplitterWildcardFilterBuilder.hashSplitterWildcardFilter;
+import static org.elasticsearch.index.query.HashSplitterWildcardQueryBuilder.hashSplitterWildcardQuery;
import static org.elasticsearch.index.query.QueryBuilders.filteredQuery;
import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery;
import static org.elasticsearch.node.NodeBuilder.nodeBuilder;
@@ -133,4 +135,176 @@ public void testTermFilter() throws Exception {
assertThat("term filter on inexistent term", countResponse.count(), equalTo(0l));
}
+ @Test
+ public void testWildcardQueryVariableSize() throws Exception {
+ String mapping = copyToStringFromClasspath("/chunklength4-prefixesLowercasedAlphabet-mapping.json");
+
+ node.client().admin().indices().putMapping(putMappingRequest("test").type("splitted_hashes").source(mapping)).actionGet();
+
+ node.client().index(indexRequest("test").type("splitted_hashes")
+ .source(jsonBuilder().startObject().field("hash", "0000111122223333").endObject())).actionGet();
+ node.client().admin().indices().refresh(refreshRequest()).actionGet();
+
+ CountResponse countResponse;
+
+ countResponse = node.client().count(countRequest("test").types("splitted_hashes").query(hashSplitterWildcardQuery("hash", "????1111*"))).actionGet();
+ assertThat("wildcard query existence", countResponse.failedShards(), equalTo(0));
+ assertThat("wildcard query", countResponse.count(), equalTo(1l));
+
+ countResponse = node.client().count(countRequest("test").types("splitted_hashes").query(hashSplitterWildcardQuery("hash", "000*"))).actionGet();
+ assertThat("wildcard query on a prefix", countResponse.count(), equalTo(1l));
+
+ countResponse = node.client().count(countRequest("test").types("splitted_hashes").query(hashSplitterWildcardQuery("hash", "*3333"))).actionGet();
+ assertThat("wildcard query on a suffix with default variable size", countResponse.count(), equalTo(0l)); // no match because of variable size
+
+ countResponse = node.client().count(countRequest("test").types("splitted_hashes").query(hashSplitterWildcardQuery("hash", "000*3"))).actionGet();
+ assertThat("wildcard query on a prefix and suffix", countResponse.count(), equalTo(0l)); // no match because of variable size
+
+ countResponse = node.client().count(countRequest("test").types("splitted_hashes").query(hashSplitterWildcardQuery("hash", "99*99"))).actionGet();
+ assertThat("wildcard query on inexistent term", countResponse.count(), equalTo(0l));
+ }
+
+ @Test
+ public void testWildcardQueryVariableSizeAlternate() throws Exception {
+ String mapping = copyToStringFromClasspath("/chunklength4-prefixesLowercasedAlphabet-SqlWildcards-mapping.json");
+
+ node.client().admin().indices().putMapping(putMappingRequest("test").type("splitted_hashes").source(mapping)).actionGet();
+
+ node.client().index(indexRequest("test").type("splitted_hashes")
+ .source(jsonBuilder().startObject().field("hash", "0000111122223333").endObject())).actionGet();
+ node.client().admin().indices().refresh(refreshRequest()).actionGet();
+
+ CountResponse countResponse;
+
+ countResponse = node.client().count(countRequest("test").types("splitted_hashes").query(hashSplitterWildcardQuery("hash", "____1111%"))).actionGet();
+ assertThat("wildcard query existence", countResponse.failedShards(), equalTo(0));
+ assertThat("wildcard query with SQL-flavoured wildcards", countResponse.count(), equalTo(1l));
+ }
+
+ @Test
+ public void testWildcardFilterVariableSize() throws Exception {
+ String mapping = copyToStringFromClasspath("/chunklength4-prefixesLowercasedAlphabet-mapping.json");
+
+ node.client().admin().indices().putMapping(putMappingRequest("test").type("splitted_hashes").source(mapping)).actionGet();
+
+ node.client().index(indexRequest("test").type("splitted_hashes")
+ .source(jsonBuilder().startObject().field("hash", "0000111122223333").endObject())).actionGet();
+ node.client().admin().indices().refresh(refreshRequest()).actionGet();
+
+ CountResponse countResponse;
+
+ countResponse = node.client().count(countRequest("test").types("splitted_hashes").query(filteredQuery(matchAllQuery(), hashSplitterWildcardFilter("hash", "????1111*")))).actionGet();
+ assertThat("wildcard filter existence", countResponse.failedShards(), equalTo(0));
+ assertThat("wildcard filter", countResponse.count(), equalTo(1l));
+
+ countResponse = node.client().count(countRequest("test").types("splitted_hashes").query(filteredQuery(matchAllQuery(), hashSplitterWildcardFilter("hash", "000*")))).actionGet();
+ assertThat("wildcard filter on a prefix", countResponse.count(), equalTo(1l));
+
+ countResponse = node.client().count(countRequest("test").types("splitted_hashes").query(filteredQuery(matchAllQuery(), hashSplitterWildcardFilter("hash", "*3333")))).actionGet();
+ assertThat("wildcard filter on a suffix with default variable size", countResponse.count(), equalTo(0l)); // no match because of variable size
+
+ countResponse = node.client().count(countRequest("test").types("splitted_hashes").query(filteredQuery(matchAllQuery(), hashSplitterWildcardFilter("hash", "000*3")))).actionGet();
+ assertThat("wildcard filter on a prefix and suffix", countResponse.count(), equalTo(0l)); // no match because of variable size
+
+ countResponse = node.client().count(countRequest("test").types("splitted_hashes").query(filteredQuery(matchAllQuery(), hashSplitterWildcardFilter("hash", "99*99")))).actionGet();
+ assertThat("wildcard filter on inexistent term", countResponse.count(), equalTo(0l));
+ }
+
+ @Test
+ public void testWildcardFilterVariableSizeAlternate() throws Exception {
+ String mapping = copyToStringFromClasspath("/chunklength4-prefixesLowercasedAlphabet-SqlWildcards-mapping.json");
+
+ node.client().admin().indices().putMapping(putMappingRequest("test").type("splitted_hashes").source(mapping)).actionGet();
+
+ node.client().index(indexRequest("test").type("splitted_hashes")
+ .source(jsonBuilder().startObject().field("hash", "0000111122223333").endObject())).actionGet();
+ node.client().admin().indices().refresh(refreshRequest()).actionGet();
+
+ CountResponse countResponse;
+
+ countResponse = node.client().count(countRequest("test").types("splitted_hashes").query(filteredQuery(matchAllQuery(), hashSplitterWildcardFilter("hash", "____1111%")))).actionGet();
+ assertThat("wildcard query existence", countResponse.failedShards(), equalTo(0));
+ assertThat("wildcard query with SQL-flavoured wildcards", countResponse.count(), equalTo(1l));
+ }
+
+ @Test
+ public void testWildcardQueryFixedSize() throws Exception {
+ String mapping = copyToStringFromClasspath("/chunklength4-prefixesLowercasedAlphabet-size16Fixed-mapping.json");
+
+ node.client().admin().indices().putMapping(putMappingRequest("test").type("splitted_hashes").source(mapping)).actionGet();
+
+ node.client().index(indexRequest("test").type("splitted_hashes")
+ .source(jsonBuilder().startObject().field("hash", "0000111122223333").endObject())).actionGet();
+ node.client().admin().indices().refresh(refreshRequest()).actionGet();
+
+ CountResponse countResponse;
+
+ countResponse = node.client().count(countRequest("test").types("splitted_hashes").query(hashSplitterWildcardQuery("hash", "????1111*"))).actionGet();
+ assertThat("wildcard query existence", countResponse.failedShards(), equalTo(0));
+ assertThat("wildcard query", countResponse.count(), equalTo(1l));
+
+ countResponse = node.client().count(countRequest("test").types("splitted_hashes").query(hashSplitterWildcardQuery("hash", "000*"))).actionGet();
+ assertThat("wildcard query on a prefix", countResponse.count(), equalTo(1l));
+
+ countResponse = node.client().count(countRequest("test").types("splitted_hashes").query(hashSplitterWildcardQuery("hash", "*3333"))).actionGet();
+ assertThat("wildcard query on a suffix with fixed size", countResponse.count(), equalTo(1l)); // matches because of fixed size
+
+ countResponse = node.client().count(countRequest("test").types("splitted_hashes").query(hashSplitterWildcardQuery("hash", "000*3"))).actionGet();
+ assertThat("wildcard query on a prefix and suffix", countResponse.count(), equalTo(1l));
+
+ countResponse = node.client().count(countRequest("test").types("splitted_hashes").query(hashSplitterWildcardQuery("hash", "99*99"))).actionGet();
+ assertThat("wildcard query on inexistent term", countResponse.count(), equalTo(0l));
+ }
+
+ @Test
+ public void testWildcardFilterFixedSize() throws Exception {
+ String mapping = copyToStringFromClasspath("/chunklength4-prefixesLowercasedAlphabet-size16Fixed-mapping.json");
+
+ node.client().admin().indices().putMapping(putMappingRequest("test").type("splitted_hashes").source(mapping)).actionGet();
+
+ node.client().index(indexRequest("test").type("splitted_hashes")
+ .source(jsonBuilder().startObject().field("hash", "0000111122223333").endObject())).actionGet();
+ node.client().admin().indices().refresh(refreshRequest()).actionGet();
+
+ CountResponse countResponse;
+
+ countResponse = node.client().count(countRequest("test").types("splitted_hashes").query(filteredQuery(matchAllQuery(), hashSplitterWildcardFilter("hash", "????1111*")))).actionGet();
+ assertThat("wildcard filter existence", countResponse.failedShards(), equalTo(0));
+ assertThat("wildcard filter", countResponse.count(), equalTo(1l));
+
+ countResponse = node.client().count(countRequest("test").types("splitted_hashes").query(filteredQuery(matchAllQuery(), hashSplitterWildcardFilter("hash", "000*")))).actionGet();
+ assertThat("wildcard filter on a prefix", countResponse.count(), equalTo(1l));
+
+ countResponse = node.client().count(countRequest("test").types("splitted_hashes").query(filteredQuery(matchAllQuery(), hashSplitterWildcardFilter("hash", "*3333")))).actionGet();
+ assertThat("wildcard filter on a suffix with fixed size", countResponse.count(), equalTo(1l)); // matches because of fixed size
+
+ countResponse = node.client().count(countRequest("test").types("splitted_hashes").query(filteredQuery(matchAllQuery(), hashSplitterWildcardFilter("hash", "000*3")))).actionGet();
+ assertThat("wildcard filter on a prefix and suffix", countResponse.count(), equalTo(1l));
+
+ countResponse = node.client().count(countRequest("test").types("splitted_hashes").query(filteredQuery(matchAllQuery(), hashSplitterWildcardFilter("hash", "99*99")))).actionGet();
+ assertThat("wildcard filter on inexistent term", countResponse.count(), equalTo(0l));
+ }
+
+ @Test
+ public void testCoverage() throws Exception {
+ String mapping = copyToStringFromClasspath("/chunklength4-prefixesLowercasedAlphabet-size16Fixed-mapping.json");
+
+ node.client().admin().indices().putMapping(putMappingRequest("test").type("splitted_hashes").source(mapping)).actionGet();
+
+ node.client().index(indexRequest("test").type("splitted_hashes")
+ .source(jsonBuilder().startObject().field("hash", "0000111122223333").endObject())).actionGet();
+ node.client().admin().indices().refresh(refreshRequest()).actionGet();
+
+ CountResponse countResponse;
+
+ countResponse = node.client().count(countRequest("test").types("splitted_hashes").query(hashSplitterTermQuery("hash", "a0000").boost(2.0f))).actionGet();
+ assertThat(countResponse.count(), equalTo(1l));
+
+ countResponse = node.client().count(countRequest("test").types("splitted_hashes").query(filteredQuery(matchAllQuery(), hashSplitterTermFilter("hash", "a0000").filterName("hash:a0000").cache(true).cacheKey("hash:a0000")))).actionGet();
+ assertThat(countResponse.count(), equalTo(1l));
+
+ countResponse = node.client().count(countRequest("test").types("splitted_hashes").query(filteredQuery(matchAllQuery(), hashSplitterWildcardFilter("hash", "000*").filterName("hash:0000*").cache(true).cacheKey("hash:000*")))).actionGet();
+ assertThat(countResponse.count(), equalTo(1l));
+ }
+
}
View
17 src/test/resources/chunklength4-prefixesLowercasedAlphabet-SqlWildcards-mapping.json
@@ -0,0 +1,17 @@
+{
+ splitted_hashes:{
+ dynamic:false,
+ include_in_all: false,
+ properties:{
+ "hash":{
+ type:"hashsplitter",
+ settings:{
+ chunk_length: 4,
+ prefix: "abcdefghijklmnopqrstuvwxyz",
+ wildcard_one: "_",
+ wildcard_any: "%"
+ }
+ }
+ }
+ }
+}
View
16 src/test/resources/chunklength4-prefixesLowercasedAlphabet-size16Fixed-mapping.json
@@ -0,0 +1,16 @@
+{
+ splitted_hashes:{
+ dynamic:false,
+ include_in_all: false,
+ properties:{
+ "hash":{
+ type:"hashsplitter",
+ settings:{
+ chunk_length: 4,
+ prefix: "abcdefghijklmnopqrstuvwxyz",
+ size: 16
+ }
+ }
+ }
+ }
+}

No commit comments for this range

Something went wrong with that request. Please try again.