From d451d1138457c08a7cd076367aad8942940253cf Mon Sep 17 00:00:00 2001 From: Alex Eng Date: Tue, 2 Jun 2015 10:46:55 +1000 Subject: [PATCH] Use whitespace case insensitive analyser for project name and desc --- docs/release-notes.md | 2 + .../CaseInsensitiveWhitespaceAnalyzer.java | 56 ++++++++++++++ .../main/java/org/zanata/model/HProject.java | 7 +- .../java/org/zanata/action/ProjectSearch.java | 2 +- .../main/java/org/zanata/dao/ProjectDAO.java | 73 ++++++++++--------- 5 files changed, 101 insertions(+), 39 deletions(-) create mode 100644 zanata-model/src/main/java/org/zanata/hibernate/search/CaseInsensitiveWhitespaceAnalyzer.java diff --git a/docs/release-notes.md b/docs/release-notes.md index 26561b2e59..e71d9e85b7 100644 --- a/docs/release-notes.md +++ b/docs/release-notes.md @@ -41,6 +41,8 @@ Example usage in html file: `aeng@redhat.com + */ +public class CaseInsensitiveWhitespaceAnalyzer extends ReusableAnalyzerBase { + + private final Version matchVersion; + + /** + * Creates a new {@link CaseInsensitiveWhitespaceAnalyzer} + * @param matchVersion Lucene version to match See {@link above} + */ + public CaseInsensitiveWhitespaceAnalyzer(Version matchVersion) { + this.matchVersion = matchVersion; + } + + @Override + protected TokenStreamComponents createComponents( + String fieldName, Reader reader) { + + final WhitespaceTokenizer src = new WhitespaceTokenizer(matchVersion, reader); + TokenStream tok = new LowerCaseFilter(matchVersion, src); + + return new TokenStreamComponents(src, tok); + } +} diff --git a/zanata-model/src/main/java/org/zanata/model/HProject.java b/zanata-model/src/main/java/org/zanata/model/HProject.java index 026af0cea4..7dcfb673de 100644 --- a/zanata-model/src/main/java/org/zanata/model/HProject.java +++ b/zanata-model/src/main/java/org/zanata/model/HProject.java @@ -51,6 +51,7 @@ import lombok.Setter; import lombok.ToString; +import org.apache.lucene.analysis.WhitespaceAnalyzer; import org.hibernate.annotations.Cache; import org.hibernate.annotations.CacheConcurrencyStrategy; import org.hibernate.annotations.Cascade; @@ -58,6 +59,7 @@ import org.hibernate.annotations.TypeDef; import org.hibernate.annotations.TypeDefs; import org.hibernate.annotations.Where; +import org.hibernate.search.annotations.Analyzer; import org.hibernate.search.annotations.Field; import org.hibernate.search.annotations.Indexed; import org.hibernate.validator.constraints.NotEmpty; @@ -66,6 +68,7 @@ import org.zanata.common.EntityStatus; import org.zanata.common.LocaleId; import org.zanata.common.ProjectType; +import org.zanata.hibernate.search.CaseInsensitiveWhitespaceAnalyzer; import org.zanata.model.type.EntityStatusType; import org.zanata.model.type.LocaleIdType; import org.zanata.model.validator.Url; @@ -102,11 +105,11 @@ public class HProject extends SlugEntityBase implements Serializable, @Size(max = 80) @NotEmpty - @Field() + @Field(analyzer = @Analyzer(impl = CaseInsensitiveWhitespaceAnalyzer.class)) private String name; @Size(max = 100) - @Field() + @Field(analyzer = @Analyzer(impl = CaseInsensitiveWhitespaceAnalyzer.class)) private String description; @Type(type = "text") diff --git a/zanata-war/src/main/java/org/zanata/action/ProjectSearch.java b/zanata-war/src/main/java/org/zanata/action/ProjectSearch.java index bff634eecc..b8a139d12e 100644 --- a/zanata-war/src/main/java/org/zanata/action/ProjectSearch.java +++ b/zanata-war/src/main/java/org/zanata/action/ProjectSearch.java @@ -46,7 +46,7 @@ public class ProjectSearch implements Serializable { new QueryProjectPagedListDataModel(DEFAULT_PAGE_SIZE); // Count of result to be return as part of autocomplete - private final static int INITIAL_RESULT_COUNT = 5; + private final static int INITIAL_RESULT_COUNT = 10; public DataModel getProjectPagedListDataModel() { return queryProjectPagedListDataModel; diff --git a/zanata-war/src/main/java/org/zanata/dao/ProjectDAO.java b/zanata-war/src/main/java/org/zanata/dao/ProjectDAO.java index c207be1284..d8e8030eec 100644 --- a/zanata-war/src/main/java/org/zanata/dao/ProjectDAO.java +++ b/zanata-war/src/main/java/org/zanata/dao/ProjectDAO.java @@ -7,6 +7,8 @@ import javax.annotation.Nullable; import org.apache.commons.lang.StringUtils; +import org.apache.lucene.analysis.StopAnalyzer; +import org.apache.lucene.analysis.WhitespaceAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.index.Term; import org.apache.lucene.queryParser.ParseException; @@ -14,6 +16,7 @@ import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.util.Version; import org.hibernate.Query; import org.hibernate.Session; @@ -25,14 +28,13 @@ import org.jboss.seam.annotations.Name; import org.jboss.seam.annotations.Scope; import org.zanata.common.EntityStatus; +import org.zanata.hibernate.search.CaseInsensitiveWhitespaceAnalyzer; import org.zanata.hibernate.search.IndexFieldLabels; import org.zanata.model.HAccount; import org.zanata.model.HPerson; import org.zanata.model.HProject; import org.zanata.model.HProjectIteration; -import com.google.common.collect.Lists; - @Name("projectDAO") @AutoCreate @Scope(ScopeType.STATELESS) @@ -255,51 +257,24 @@ public int getTotalObsoleteProjectCount() { public List searchProjects(@Nonnull String searchQuery, int maxResult, int firstResult, boolean includeObsolete) throws ParseException { - FullTextQuery query = getTextQuery(searchQuery, includeObsolete); + FullTextQuery query = buildSearchQuery(searchQuery, includeObsolete); return query.setMaxResults(maxResult).setFirstResult(firstResult) .getResultList(); } public int getQueryProjectSize(@Nonnull String searchQuery, boolean includeObsolete) throws ParseException { - FullTextQuery query = getTextQuery(searchQuery, includeObsolete); + FullTextQuery query = buildSearchQuery(searchQuery, includeObsolete); return query.getResultSize(); } - private org.apache.lucene.search.Query constructQuery(String field, String searchQuery) - throws ParseException { - QueryParser parser = - new QueryParser(Version.LUCENE_29, field, - new StandardAnalyzer(Version.LUCENE_29)); - return parser.parse(searchQuery); - } - - /** - * Lucene index for project name and slug replaces hyphen with - * space. This method is to replace hyphen with space when performing search - * - * @param query - * @return - */ - private String parseSlugAndName(String query) { - return query.replace("-", " "); - } - - private FullTextQuery getTextQuery(@Nonnull String searchQuery, - boolean includeObsolete) throws ParseException { - org.apache.lucene.search.Query nameQuery = - constructQuery("name", parseSlugAndName(searchQuery) + "*"); - org.apache.lucene.search.Query slugQuery = - constructQuery("slug", parseSlugAndName(searchQuery) + "*"); - - searchQuery = QueryParser.escape(searchQuery); - org.apache.lucene.search.Query descQuery = - constructQuery("description", searchQuery); + private FullTextQuery buildSearchQuery(@Nonnull String searchQuery, + boolean includeObsolete) throws ParseException { BooleanQuery booleanQuery = new BooleanQuery(); - booleanQuery.add(slugQuery, BooleanClause.Occur.SHOULD); - booleanQuery.add(nameQuery, BooleanClause.Occur.SHOULD); - booleanQuery.add(descQuery, BooleanClause.Occur.SHOULD); + booleanQuery.add(buildSearchFieldQuery(searchQuery, "slug"), BooleanClause.Occur.SHOULD); + booleanQuery.add(buildSearchFieldQuery(searchQuery, "name"), BooleanClause.Occur.SHOULD); + booleanQuery.add(buildSearchFieldQuery(searchQuery, "description"), BooleanClause.Occur.SHOULD); if (!includeObsolete) { TermQuery obsoleteStateQuery = @@ -311,6 +286,32 @@ private FullTextQuery getTextQuery(@Nonnull String searchQuery, return entityManager.createFullTextQuery(booleanQuery, HProject.class); } + /** + * Build BooleanQuery on single lucene field by splitting searchQuery with + * white space. + * + * @param searchQuery + * - query string, will replace hypen with space and escape + * special char + * @param field + * - lucene field + */ + private BooleanQuery buildSearchFieldQuery(@Nonnull String searchQuery, + @Nonnull String field) throws ParseException { + BooleanQuery query = new BooleanQuery(); + + //escape special character search + searchQuery = QueryParser.escape(searchQuery); + + for(String searchString: searchQuery.split("\\s+")) { + QueryParser parser = new QueryParser(Version.LUCENE_29, field, + new CaseInsensitiveWhitespaceAnalyzer(Version.LUCENE_29)); + + query.add(parser.parse(searchString + "*"), BooleanClause.Occur.MUST); + } + return query; + } + public List findAllTranslatedProjects(HAccount account, int maxResults) { Query q = getSession()