Skip to content
This repository has been archived by the owner on Nov 9, 2017. It is now read-only.

Commit

Permalink
Use whitespace case insensitive analyser for project name and desc
Browse files Browse the repository at this point in the history
  • Loading branch information
Alex Eng committed Jun 2, 2015
1 parent 3daa552 commit d451d11
Show file tree
Hide file tree
Showing 5 changed files with 101 additions and 39 deletions.
2 changes: 2 additions & 0 deletions docs/release-notes.md
Expand Up @@ -41,6 +41,8 @@ Example usage in html file: `<link rel="shortcut icon" href="#{assets['img/logo/
* [1098362](https://bugzilla.redhat.com/show_bug.cgi?id=1098362) - download link in editor doesn't encode properly and result in 404
* [1225689](https://bugzilla.redhat.com/show_bug.cgi?id=1225689) - [Project Version View] Failed to load entries when the doc id contains characters that should be URL encoded
* [981498](https://bugzilla.redhat.com/show_bug.cgi?id=981498) - No underscore sanity checking on creating usernames
* [1147304](https://bugzilla.redhat.com/show_bug.cgi?id=1147304) - Project search fails on special characters
* [1123186](https://bugzilla.redhat.com/show_bug.cgi?id=1123186) - Project search fails for multiple word project names

-----------------------

Expand Down
@@ -0,0 +1,56 @@
/*
* Copyright 2015, Red Hat, Inc. and individual contributors as indicated by the
* @author tags. See the copyright.txt file in the distribution for a full
* listing of individual contributors.
*
* This is free software; you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free
* Software Foundation; either version 2.1 of the License, or (at your option)
* any later version.
*
* This software is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this software; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA, or see the FSF
* site: http://www.fsf.org.
*/

package org.zanata.hibernate.search;

import java.io.Reader;

import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.util.Version;

/**
* @author Alex Eng <a href="mailto:aeng@redhat.com">aeng@redhat.com</a>
*/
public class CaseInsensitiveWhitespaceAnalyzer extends ReusableAnalyzerBase {

private final Version matchVersion;

/**
* Creates a new {@link CaseInsensitiveWhitespaceAnalyzer}
* @param matchVersion Lucene version to match See {@link <a href="#version">above</a>}
*/
public CaseInsensitiveWhitespaceAnalyzer(Version matchVersion) {
this.matchVersion = matchVersion;
}

@Override
protected TokenStreamComponents createComponents(
String fieldName, Reader reader) {

final WhitespaceTokenizer src = new WhitespaceTokenizer(matchVersion, reader);
TokenStream tok = new LowerCaseFilter(matchVersion, src);

return new TokenStreamComponents(src, tok);
}
}
7 changes: 5 additions & 2 deletions zanata-model/src/main/java/org/zanata/model/HProject.java
Expand Up @@ -51,13 +51,15 @@
import lombok.Setter;
import lombok.ToString;

import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.hibernate.annotations.Cache;
import org.hibernate.annotations.CacheConcurrencyStrategy;
import org.hibernate.annotations.Cascade;
import org.hibernate.annotations.Type;
import org.hibernate.annotations.TypeDef;
import org.hibernate.annotations.TypeDefs;
import org.hibernate.annotations.Where;
import org.hibernate.search.annotations.Analyzer;
import org.hibernate.search.annotations.Field;
import org.hibernate.search.annotations.Indexed;
import org.hibernate.validator.constraints.NotEmpty;
Expand All @@ -66,6 +68,7 @@
import org.zanata.common.EntityStatus;
import org.zanata.common.LocaleId;
import org.zanata.common.ProjectType;
import org.zanata.hibernate.search.CaseInsensitiveWhitespaceAnalyzer;
import org.zanata.model.type.EntityStatusType;
import org.zanata.model.type.LocaleIdType;
import org.zanata.model.validator.Url;
Expand Down Expand Up @@ -102,11 +105,11 @@ public class HProject extends SlugEntityBase implements Serializable,

@Size(max = 80)
@NotEmpty
@Field()
@Field(analyzer = @Analyzer(impl = CaseInsensitiveWhitespaceAnalyzer.class))
private String name;

@Size(max = 100)
@Field()
@Field(analyzer = @Analyzer(impl = CaseInsensitiveWhitespaceAnalyzer.class))
private String description;

@Type(type = "text")
Expand Down
Expand Up @@ -46,7 +46,7 @@ public class ProjectSearch implements Serializable {
new QueryProjectPagedListDataModel(DEFAULT_PAGE_SIZE);

// Count of result to be return as part of autocomplete
private final static int INITIAL_RESULT_COUNT = 5;
private final static int INITIAL_RESULT_COUNT = 10;

public DataModel getProjectPagedListDataModel() {
return queryProjectPagedListDataModel;
Expand Down
73 changes: 37 additions & 36 deletions zanata-war/src/main/java/org/zanata/dao/ProjectDAO.java
Expand Up @@ -7,13 +7,16 @@
import javax.annotation.Nullable;

import org.apache.commons.lang.StringUtils;
import org.apache.lucene.analysis.StopAnalyzer;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.util.Version;
import org.hibernate.Query;
import org.hibernate.Session;
Expand All @@ -25,14 +28,13 @@
import org.jboss.seam.annotations.Name;
import org.jboss.seam.annotations.Scope;
import org.zanata.common.EntityStatus;
import org.zanata.hibernate.search.CaseInsensitiveWhitespaceAnalyzer;
import org.zanata.hibernate.search.IndexFieldLabels;
import org.zanata.model.HAccount;
import org.zanata.model.HPerson;
import org.zanata.model.HProject;
import org.zanata.model.HProjectIteration;

import com.google.common.collect.Lists;

@Name("projectDAO")
@AutoCreate
@Scope(ScopeType.STATELESS)
Expand Down Expand Up @@ -255,51 +257,24 @@ public int getTotalObsoleteProjectCount() {
public List<HProject> searchProjects(@Nonnull String searchQuery,
int maxResult, int firstResult, boolean includeObsolete)
throws ParseException {
FullTextQuery query = getTextQuery(searchQuery, includeObsolete);
FullTextQuery query = buildSearchQuery(searchQuery, includeObsolete);
return query.setMaxResults(maxResult).setFirstResult(firstResult)
.getResultList();
}

public int getQueryProjectSize(@Nonnull String searchQuery,
boolean includeObsolete) throws ParseException {
FullTextQuery query = getTextQuery(searchQuery, includeObsolete);
FullTextQuery query = buildSearchQuery(searchQuery, includeObsolete);
return query.getResultSize();
}

private org.apache.lucene.search.Query constructQuery(String field, String searchQuery)
throws ParseException {
QueryParser parser =
new QueryParser(Version.LUCENE_29, field,
new StandardAnalyzer(Version.LUCENE_29));
return parser.parse(searchQuery);
}

/**
* Lucene index for project name and slug replaces hyphen with
* space. This method is to replace hyphen with space when performing search
*
* @param query
* @return
*/
private String parseSlugAndName(String query) {
return query.replace("-", " ");
}

private FullTextQuery getTextQuery(@Nonnull String searchQuery,
boolean includeObsolete) throws ParseException {
org.apache.lucene.search.Query nameQuery =
constructQuery("name", parseSlugAndName(searchQuery) + "*");
org.apache.lucene.search.Query slugQuery =
constructQuery("slug", parseSlugAndName(searchQuery) + "*");

searchQuery = QueryParser.escape(searchQuery);
org.apache.lucene.search.Query descQuery =
constructQuery("description", searchQuery);
private FullTextQuery buildSearchQuery(@Nonnull String searchQuery,
boolean includeObsolete) throws ParseException {

BooleanQuery booleanQuery = new BooleanQuery();
booleanQuery.add(slugQuery, BooleanClause.Occur.SHOULD);
booleanQuery.add(nameQuery, BooleanClause.Occur.SHOULD);
booleanQuery.add(descQuery, BooleanClause.Occur.SHOULD);
booleanQuery.add(buildSearchFieldQuery(searchQuery, "slug"), BooleanClause.Occur.SHOULD);
booleanQuery.add(buildSearchFieldQuery(searchQuery, "name"), BooleanClause.Occur.SHOULD);
booleanQuery.add(buildSearchFieldQuery(searchQuery, "description"), BooleanClause.Occur.SHOULD);

if (!includeObsolete) {
TermQuery obsoleteStateQuery =
Expand All @@ -311,6 +286,32 @@ private FullTextQuery getTextQuery(@Nonnull String searchQuery,
return entityManager.createFullTextQuery(booleanQuery, HProject.class);
}

/**
* Build BooleanQuery on single lucene field by splitting searchQuery with
* white space.
*
* @param searchQuery
* - query string, will replace hypen with space and escape
* special char
* @param field
* - lucene field
*/
private BooleanQuery buildSearchFieldQuery(@Nonnull String searchQuery,
@Nonnull String field) throws ParseException {
BooleanQuery query = new BooleanQuery();

//escape special character search
searchQuery = QueryParser.escape(searchQuery);

for(String searchString: searchQuery.split("\\s+")) {
QueryParser parser = new QueryParser(Version.LUCENE_29, field,
new CaseInsensitiveWhitespaceAnalyzer(Version.LUCENE_29));

query.add(parser.parse(searchString + "*"), BooleanClause.Occur.MUST);
}
return query;
}

public List<HProject> findAllTranslatedProjects(HAccount account, int maxResults) {
Query q =
getSession()
Expand Down

0 comments on commit d451d11

Please sign in to comment.