Skip to content
This repository has been archived by the owner on Nov 9, 2017. It is now read-only.

Commit

Permalink
Remove case sensitive indexes from text flow / target.
Browse files Browse the repository at this point in the history
Rename index field names and remove unnecessary elements from code.
Adapt the queries to filter the case-sensitive results in memory.
  • Loading branch information
Carlos Munoz committed Aug 10, 2012
1 parent c34c679 commit a00ece0
Show file tree
Hide file tree
Showing 7 changed files with 97 additions and 99 deletions.
Expand Up @@ -17,25 +17,25 @@ public interface IndexFieldLabels
public static final String LOCALE_ID_FIELD = "locale";
public static final String CONTENT_STATE_FIELD = "state";

public static final String CONTENT_CASE_FOLDED = "content-nocase";
public static final String CONTENT_CASE_PRESERVED = "content-case";
public static final String TF_CONTENT = "textFlow.content-nocase";
public static final String CONTENT = "content-nocase";

public static final String CONTENT_FIELDS_CASE_FOLDED[] = {
CONTENT_CASE_FOLDED + 0,
CONTENT_CASE_FOLDED + 1,
CONTENT_CASE_FOLDED + 2,
CONTENT_CASE_FOLDED + 3,
CONTENT_CASE_FOLDED + 4,
CONTENT_CASE_FOLDED + 5
public static final String TF_CONTENT_FIELDS[] = {
TF_CONTENT + 0,
TF_CONTENT + 1,
TF_CONTENT + 2,
TF_CONTENT + 3,
TF_CONTENT + 4,
TF_CONTENT + 5
};

public static final String CONTENT_FIELDS_CASE_PRESERVED[] = {
CONTENT_CASE_PRESERVED + 0,
CONTENT_CASE_PRESERVED + 1,
CONTENT_CASE_PRESERVED + 2,
CONTENT_CASE_PRESERVED + 3,
CONTENT_CASE_PRESERVED + 4,
CONTENT_CASE_PRESERVED + 5
};
public static final String CONTENT_FIELDS[] = {
CONTENT + 0,
CONTENT + 1,
CONTENT + 2,
CONTENT + 3,
CONTENT + 4,
CONTENT + 5
};

}
20 changes: 6 additions & 14 deletions zanata-model/src/main/java/org/zanata/model/HTextContainer.java
Expand Up @@ -30,7 +30,6 @@

import org.hibernate.search.annotations.Field;
import org.hibernate.search.annotations.FieldBridge;
import org.hibernate.search.annotations.Fields;
import org.hibernate.search.annotations.Index;
import org.hibernate.search.annotations.Parameter;
import org.zanata.common.HasContents;
Expand All @@ -46,18 +45,11 @@ abstract class HTextContainer implements HasContents, Serializable
private static final long serialVersionUID = 1L;

@SuppressWarnings("unused")
@Fields({
@Field(name=IndexFieldLabels.CONTENT_CASE_FOLDED,
index = Index.TOKENIZED,
bridge = @FieldBridge(impl = StringListBridge.class,
params = {@Parameter(name="case", value="fold"),
@Parameter(name="ngrams", value="multisize")})),
@Field(name = IndexFieldLabels.CONTENT_CASE_PRESERVED,
index = Index.TOKENIZED,
bridge = @FieldBridge(impl = StringListBridge.class,
params = {@Parameter(name="case", value="preserve"),
@Parameter(name="ngrams", value="multisize")}))
})
@Field(name=IndexFieldLabels.CONTENT,
index = Index.TOKENIZED,
bridge = @FieldBridge(impl = StringListBridge.class,
params = {@Parameter(name="case", value="fold"),
@Parameter(name="ngrams", value="multisize")}))
private List<String> getContentsToIndex()
{
return getContents();
Expand All @@ -75,7 +67,7 @@ public String getContent()
}

/**
* As of release 1.6, replaced by {@link #setContents()}
* As of release 1.6, replaced by {@link #setContents(String...)}
* @return
*/
@Deprecated
Expand Down
11 changes: 1 addition & 10 deletions zanata-model/src/main/java/org/zanata/model/HTextFlow.java
Expand Up @@ -25,7 +25,6 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import javax.persistence.CascadeType;
import javax.persistence.Column;
import javax.persistence.Entity;
Expand All @@ -45,7 +44,6 @@
import javax.persistence.PostUpdate;
import javax.persistence.PreUpdate;

import lombok.AccessLevel;
import org.hibernate.annotations.AccessType;
import org.hibernate.annotations.BatchSize;
import org.hibernate.annotations.Cache;
Expand All @@ -57,8 +55,6 @@
import org.hibernate.annotations.Type;
import org.hibernate.search.annotations.Field;
import org.hibernate.search.annotations.FieldBridge;
import org.hibernate.search.annotations.FilterCacheModeType;
import org.hibernate.search.annotations.FullTextFilterDef;
import org.hibernate.search.annotations.Index;
import org.hibernate.search.annotations.Indexed;
import org.hibernate.validator.Length;
Expand All @@ -67,14 +63,13 @@
import org.zanata.common.HasContents;
import org.zanata.common.LocaleId;
import org.zanata.hibernate.search.ContainingWorkspaceBridge;
import org.zanata.hibernate.search.IdFilterFactory;
import org.zanata.model.po.HPotEntryData;
import org.zanata.util.HashUtil;
import org.zanata.util.OkapiUtil;
import org.zanata.util.StringUtil;

import com.google.common.base.Objects;

import lombok.AccessLevel;
import lombok.NoArgsConstructor;
import lombok.Setter;
import lombok.ToString;
Expand All @@ -91,10 +86,6 @@
@Entity
@Cache(usage = CacheConcurrencyStrategy.READ_WRITE)
@Indexed
@FullTextFilterDef(
name = "textFlowFilter",
impl = IdFilterFactory.class,
cache = FilterCacheModeType.INSTANCE_AND_DOCIDSETRESULTS)
@NamedQueries(@NamedQuery(
name = "HTextFlow.findIdsWithTranslations",
query = "SELECT tft.textFlow.id FROM HTextFlowTarget tft " +
Expand Down
21 changes: 7 additions & 14 deletions zanata-model/src/main/java/org/zanata/model/HTextFlowTarget.java
Expand Up @@ -61,9 +61,9 @@
import org.hibernate.annotations.Type;
import org.hibernate.search.annotations.Field;
import org.hibernate.search.annotations.FieldBridge;
import org.hibernate.search.annotations.Fields;
import org.hibernate.search.annotations.Index;
import org.hibernate.search.annotations.Indexed;
import org.hibernate.search.annotations.IndexedEmbedded;
import org.hibernate.search.annotations.Parameter;
import org.hibernate.validator.NotNull;
import org.zanata.common.ContentState;
Expand Down Expand Up @@ -192,6 +192,7 @@ public HPerson getLastModifiedBy()
@JoinColumn(name = "tf_id")
@Field(index = Index.UN_TOKENIZED)
@FieldBridge(impl = ContainingWorkspaceBridge.class)
@IndexedEmbedded
public HTextFlow getTextFlow()
{
return textFlow;
Expand Down Expand Up @@ -230,19 +231,11 @@ public void setContent(String content)
@IndexColumn(name = "pos", nullable = false)
@Column(name = "content", nullable = false)
// TODO extend HTextContainer and remove this
@Fields({
@Field(name=IndexFieldLabels.CONTENT_CASE_FOLDED,
index = Index.TOKENIZED,
bridge = @FieldBridge(impl = StringListBridge.class,
params = {@Parameter(name="case", value="fold"),
@Parameter(name="ngrams", value="multisize")})),
@Field(name = IndexFieldLabels.CONTENT_CASE_PRESERVED,
index = Index.TOKENIZED,
bridge = @FieldBridge(impl = StringListBridge.class,
params = {@Parameter(name="case", value="preserve"),
@Parameter(name="ngrams", value="multisize")}))
})

@Field(name=IndexFieldLabels.CONTENT,
index = Index.TOKENIZED,
bridge = @FieldBridge(impl = StringListBridge.class,
params = {@Parameter(name="case", value="fold"),
@Parameter(name="ngrams", value="multisize")}))
public List<String> getContents()
{
// Copy lazily loaded relations to the history object as this cannot be
Expand Down
28 changes: 7 additions & 21 deletions zanata-war/src/main/java/org/zanata/dao/TextFlowDAO.java
Expand Up @@ -35,26 +35,23 @@
import org.hibernate.Query;
import org.hibernate.Session;
import org.hibernate.criterion.CriteriaSpecification;
import org.hibernate.criterion.Criterion;
import org.hibernate.criterion.Order;
import org.hibernate.criterion.Restrictions;
import org.hibernate.criterion.SimpleExpression;
import org.hibernate.search.jpa.FullTextEntityManager;
import org.hibernate.search.jpa.FullTextQuery;
import org.jboss.seam.ScopeType;
import org.jboss.seam.annotations.AutoCreate;
import org.jboss.seam.annotations.In;
import org.jboss.seam.annotations.Logger;
import org.jboss.seam.annotations.Name;
import org.jboss.seam.annotations.Scope;
import org.jboss.seam.log.Log;
import org.zanata.common.ContentState;
import org.zanata.common.LocaleId;
import org.zanata.hibernate.search.CaseInsensitiveNgramAnalyzer;
import org.zanata.hibernate.search.IndexFieldLabels;
import org.zanata.model.HDocument;
import org.zanata.model.HLocale;
import org.zanata.model.HTextFlow;
import org.zanata.model.HTextFlowTarget;
import org.zanata.util.HTextFlowPosComparator;
import org.zanata.webtrans.shared.model.DocumentId;
import org.zanata.webtrans.shared.model.TransMemoryQuery;
Expand Down Expand Up @@ -119,16 +116,6 @@ public HTextFlow getObsoleteById(HDocument document, String id)
return (HTextFlow) cr.uniqueResult();
}

@SuppressWarnings("unchecked")
public List<Long> findIdsWithTranslations(LocaleId locale)
{
Query q = getSession().getNamedQuery("HTextFlow.findIdsWithTranslations");
q.setParameter("locale", locale);
// TextFlowFilter does its own caching, no need for double caching
q.setCacheable(false).setComment("TextFlowDAO.findIdsWithTranslations");
return q.list();
}

@SuppressWarnings("unchecked")
public List<HTextFlow> getNavigationByDocumentId(Long documentId)
{
Expand All @@ -142,7 +129,7 @@ public List<HTextFlow> getNavigationByDocumentId(Long documentId)
return c.list();
}

public List<Object[]> getSearchResult(TransMemoryQuery query, List<Long> translatedIds, final int maxResult) throws ParseException
public List<Object[]> getSearchResult(TransMemoryQuery query, final int maxResult) throws ParseException
{
String queryText = null;
String[] multiQueryText = null;
Expand Down Expand Up @@ -178,22 +165,21 @@ public List<Object[]> getSearchResult(TransMemoryQuery query, List<Long> transla
if (query.getSearchType() == SearchType.FUZZY_PLURAL)
{
int queriesSize = multiQueryText.length;
if (queriesSize > IndexFieldLabels.CONTENT_FIELDS_CASE_FOLDED.length)
if (queriesSize > IndexFieldLabels.TF_CONTENT_FIELDS.length)
{
log.warn("query contains {} fields, but we only index {}", queriesSize, IndexFieldLabels.CONTENT_FIELDS_CASE_FOLDED.length);
log.warn("query contains {} fields, but we only index {}", queriesSize, IndexFieldLabels.TF_CONTENT_FIELDS.length);
}
String[] searchFields = new String[queriesSize];
System.arraycopy(IndexFieldLabels.CONTENT_FIELDS_CASE_FOLDED, 0, searchFields, 0, queriesSize);
System.arraycopy(IndexFieldLabels.TF_CONTENT_FIELDS, 0, searchFields, 0, queriesSize);

textQuery = MultiFieldQueryParser.parse(LUCENE_VERSION, multiQueryText, searchFields, analyzer);
}
else
{
MultiFieldQueryParser parser = new MultiFieldQueryParser(LUCENE_VERSION, IndexFieldLabels.CONTENT_FIELDS_CASE_FOLDED, analyzer);
MultiFieldQueryParser parser = new MultiFieldQueryParser(LUCENE_VERSION, IndexFieldLabels.TF_CONTENT_FIELDS, analyzer);
textQuery = parser.parse(queryText);
}
FullTextQuery ftQuery = entityManager.createFullTextQuery(textQuery, HTextFlow.class);
ftQuery.enableFullTextFilter("textFlowFilter").setParameter("ids", translatedIds);
FullTextQuery ftQuery = entityManager.createFullTextQuery(textQuery, HTextFlowTarget.class);

ftQuery.setProjection(FullTextQuery.SCORE, FullTextQuery.THIS);
@SuppressWarnings("unchecked")
Expand Down
Expand Up @@ -21,6 +21,7 @@
package org.zanata.service.impl;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;

Expand Down Expand Up @@ -138,10 +139,10 @@ private List<HTextFlow> findTextFlowsByDocumentPaths(WorkspaceId workspace, List
}
Analyzer ngramAnalyzer = new ConfigurableNgramAnalyzer(searchLength, !constraints.isCaseSensitive());

String[] searchFields = (constraints.isCaseSensitive() ? IndexFieldLabels.CONTENT_FIELDS_CASE_PRESERVED : IndexFieldLabels.CONTENT_FIELDS_CASE_FOLDED);
//String[] searchFields = (constraints.isCaseSensitive() ? IndexFieldLabels.CONTENT_FIELDS_CASE_PRESERVED : IndexFieldLabels.TF_CONTENT_FIELDS);

Query searchPhraseQuery;
QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_29, searchFields, ngramAnalyzer);
QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_29, IndexFieldLabels.CONTENT_FIELDS, ngramAnalyzer);
try
{
searchPhraseQuery = parser.parse("\"" + QueryParser.escape(constraints.getSearchString()) + "\"");
Expand Down Expand Up @@ -201,7 +202,11 @@ private List<HTextFlow> findTextFlowsByDocumentPaths(WorkspaceId workspace, List
log.info("got {} HTextFLowTarget results", matchedTargets.size());
for (HTextFlowTarget htft : matchedTargets)
{
resultList.add(htft.getTextFlow());
// manually check for case sensitive matches
if( !constraints.isCaseSensitive() || (constraints.isCaseSensitive() && contentIsValid(htft.getContents(), constraints)) )
{
resultList.add(htft.getTextFlow());
}
}
}

Expand All @@ -215,7 +220,11 @@ private List<HTextFlow> findTextFlowsByDocumentPaths(WorkspaceId workspace, List
{
if (!resultList.contains(htf))
{
resultList.add(htf);
// manually check for case sensitive matches
if( !constraints.isCaseSensitive() || (constraints.isCaseSensitive() && contentIsValid(htf.getContents(), constraints)) )
{
resultList.add(htf);
}
}
}
}
Expand Down Expand Up @@ -247,10 +256,10 @@ public List<HTextFlow> findTextFlows(WorkspaceId workspace, DocumentId doc, Filt
}
Analyzer ngramAnalyzer = new ConfigurableNgramAnalyzer(searchLength, !constraints.isCaseSensitive());

String[] searchFields = (constraints.isCaseSensitive() ? IndexFieldLabels.CONTENT_FIELDS_CASE_PRESERVED : IndexFieldLabels.CONTENT_FIELDS_CASE_FOLDED);
//String[] searchFields = (constraints.isCaseSensitive() ? IndexFieldLabels.CONTENT_FIELDS_CASE_PRESERVED : IndexFieldLabels.TF_CONTENT_FIELDS);

Query searchPhraseQuery;
QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_29, searchFields, ngramAnalyzer);
QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_29, IndexFieldLabels.TF_CONTENT_FIELDS, ngramAnalyzer);
try
{
searchPhraseQuery = parser.parse("\"" + QueryParser.escape(constraints.getSearchString()) + "\"");
Expand Down Expand Up @@ -348,4 +357,35 @@ private static boolean isContentStateValid(HTextFlowTarget hTextFlowTarget, Filt
}
}

private static boolean contentIsValid(Collection<String> contents, FilterConstraints constraints)
{
boolean valid = false;
if( constraints.isSearchInSource() )
{
for( String content : contents )
{
// make sure contents are EXACTLY the same (they should already be the same case insensitively)
if( constraints.isCaseSensitive() && content.contains( constraints.getSearchString() ) )
{
valid = true;
break;
}
}
}
if( constraints.isSearchInTarget() )
{
for( String content : contents )
{
// make sure contents are EXACTLY the same (they should already be the same case insensitively)
if( constraints.isCaseSensitive() && content.contains( constraints.getSearchString() ) )
{
valid = true;
break;
}
}
}

return valid;
}

}

0 comments on commit a00ece0

Please sign in to comment.