Skip to content
This repository has been archived by the owner on Nov 9, 2017. It is now read-only.

Commit

Permalink
Merge branch 'search-replace-indexing' into search-replace
Browse files Browse the repository at this point in the history
  • Loading branch information
davidmason committed Apr 13, 2012
2 parents 32e74e3 + 48326a1 commit 0881607
Show file tree
Hide file tree
Showing 4 changed files with 157 additions and 57 deletions.
@@ -0,0 +1,97 @@
package org.zanata.hibernate.search;

import java.io.IOException;
import java.io.StringReader;
import java.util.List;
import java.util.Map;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.hibernate.search.bridge.FieldBridge;
import org.hibernate.search.bridge.LuceneOptions;
import org.hibernate.search.bridge.ParameterizedBridge;
import org.jboss.seam.annotations.Logger;
import org.jboss.seam.log.Log;

/**
* Index a list of strings in multiple fields, appending the string index to the
* field name to produce unique fields.
*
* e.g. For a field labeled 'fieldName' for a list of 3 strings
* <ul>
* <li>First string is indexed as 'fieldName0'</li>
* <li>Second string is indexed as 'fieldName1'</li>
* <li>Third string is indexed as 'fieldName2'</li>
* </ul>
*
* @author David Mason, damason@redhat.com
*
*/
public class StringListBridge implements FieldBridge, ParameterizedBridge
{

@Logger
Log log;

private ConfigurableNgramAnalyzer analyzer;

@Override
public void setParameterValues(@SuppressWarnings("rawtypes") Map parameters)
{
if (parameters.containsKey("case"))
{
String caseBehaviour = (String) parameters.get("case");
if ("fold".equals(caseBehaviour))
{
analyzer = new DefaultNgramAnalyzer();
}
else if ("preserve".equals(caseBehaviour))
{
analyzer = new CaseSensitiveNgramAnalyzer();
}
else
{
log.warn("invalid value for parameter \"case\": \"{0}\", default will be used", caseBehaviour);
analyzer = new DefaultNgramAnalyzer();
}
}
}

@Override
public void set(String name, Object value, Document luceneDocument, LuceneOptions luceneOptions)
{
if (analyzer == null)
{
analyzer = new DefaultNgramAnalyzer();
}

if (!(value instanceof List<?>))
{
throw new IllegalArgumentException("this bridge must be applied to a List");
}
List<String> strings = (List<String>) value;
for (int i = 0; i < strings.size(); i++)
{
addStringField(name + i, strings.get(i), luceneDocument, luceneOptions);
}
}

private void addStringField(String fieldName, String fieldValue, Document luceneDocument, LuceneOptions luceneOptions)
{
Field field = new Field(fieldName, fieldValue, luceneOptions.getStore(), luceneOptions.getIndex(), luceneOptions.getTermVector());
field.setBoost(luceneOptions.getBoost());

// manually apply token stream from analyzer, as hibernate search does not
// apply the specified analyzer properly
try
{
field.setTokenStream(analyzer.reusableTokenStream(fieldName, new StringReader(fieldValue)));
}
catch (IOException e)
{
log.error("Failed to get token stream from analyzer for field \"{0}\" with content \"{1}\"", e, fieldName, fieldValue);
}
luceneDocument.add(field);
}

}
Expand Up @@ -55,20 +55,19 @@
import org.hibernate.annotations.IndexColumn;
import org.hibernate.annotations.NaturalId;
import org.hibernate.annotations.Type;
import org.hibernate.search.annotations.Analyzer;
import org.hibernate.search.annotations.Field;
import org.hibernate.search.annotations.FieldBridge;
import org.hibernate.search.annotations.Fields;
import org.hibernate.search.annotations.Index;
import org.hibernate.search.annotations.Indexed;
import org.hibernate.search.annotations.Parameter;
import org.hibernate.validator.NotNull;
import org.zanata.common.ContentState;
import org.zanata.common.HasContents;
import org.zanata.hibernate.search.CaseSensitiveNgramAnalyzer;
import org.zanata.hibernate.search.ContainingWorkspaceBridge;
import org.zanata.hibernate.search.ContentStateBridge;
import org.zanata.hibernate.search.DefaultNgramAnalyzer;
import org.zanata.hibernate.search.LocaleIdBridge;
import org.zanata.hibernate.search.StringListBridge;

/**
* Represents a flow of translated text that should be processed as a
Expand All @@ -82,7 +81,7 @@
@NamedQueries({
@NamedQuery(name = "HTextFlowTarget.findLatestEquivalentTranslations",
query = "select tft, tfExample, max(tft.lastChanged) " +
"from HTextFlowTarget tft, HTextFlow tfExample " +
"from HTextFlowTarget tft, HTextFlow tfExample " +
"left join fetch tft.textFlow " +
"where " +
"tfExample.resId = tft.textFlow.resId " +
Expand All @@ -109,18 +108,17 @@ public class HTextFlowTarget extends ModelEntityBase implements HasContents, Has
private HPerson lastModifiedBy;

private HSimpleComment comment;

public Map<Integer, HTextFlowTargetHistory> history;

// Only for internal use (persistence transient)
private Integer oldVersionNum;
// Only for internal use (persistence transient)

// Only for internal use (persistence transient)
private HTextFlowTargetHistory initialState;
// Only for internal use (persistence transient)

// Only for internal use (persistence transient)
private boolean lazyRelationsCopied = false;


public HTextFlowTarget()
{
Expand All @@ -133,7 +131,6 @@ public HTextFlowTarget(HTextFlow textFlow, HLocale locale)
this.textFlowRevision = textFlow.getRevision();
}


@Id
@GeneratedValue
public Long getId()
Expand All @@ -146,7 +143,6 @@ protected void setId(Long id)
this.id = id;
}


// TODO PERF @NaturalId(mutable=false) for better criteria caching
@NaturalId
@ManyToOne
Expand Down Expand Up @@ -222,51 +218,55 @@ public void setTextFlow(HTextFlow textFlow)

/**
* As of release 1.6, replaced by {@link #getContents()}
*
* @return
*/
@Override
@Deprecated
@Transient
//FIXME index contents rather than content
//TODO add case sensitive content field to index.
// This will require a different analyzer as DefaultNgramAnalyzer and its
// parent and grandparent class are implicitly case insensitive.
@Fields({
@Field(name="content-nocase", index = Index.TOKENIZED, analyzer = @Analyzer(impl = DefaultNgramAnalyzer.class)),
@Field(name="content-case", index = Index.TOKENIZED, analyzer = @Analyzer(impl = CaseSensitiveNgramAnalyzer.class))
})
public String getContent()
{
if( this.getContents().size() > 0 )
if (this.getContents().size() > 0)
{
return this.getContents().get(0);
}
return null;
}

@Deprecated
@Transient
public void setContent( String content )
public void setContent(String content)
{
this.setContents( Arrays.asList(content) );
this.setContents(Arrays.asList(content));
}

@Override
@Type(type = "text")
@AccessType("field")
@CollectionOfElements(fetch = FetchType.EAGER)
@JoinTable(name = "HTextFlowTargetContent",
joinColumns = @JoinColumn(name = "text_flow_target_id")
joinColumns = @JoinColumn(name = "text_flow_target_id")
)
@IndexColumn(name = "pos", nullable = false)
@Column(name = "content", nullable = false)
@Fields({
@Field(name="content-nocase",
index = Index.TOKENIZED,
bridge = @FieldBridge(impl = StringListBridge.class,
params = {@Parameter(name="case", value="fold")})),
@Field(name = "content-case",
index = Index.TOKENIZED,
bridge = @FieldBridge(impl = StringListBridge.class,
params = {@Parameter(name="case", value="preserve")}))
})

public List<String> getContents()
{
// Copy lazily loaded relations to the history object as this cannot be done
// in the entity callbacks
// Copy lazily loaded relations to the history object as this cannot be
// done in the entity callbacks
copyLazyLoadedRelationsToHistory();
if( contents == null )

if (contents == null)
{
contents = new ArrayList<String>();
}
Expand All @@ -275,14 +275,14 @@ public List<String> getContents()

public void setContents(List<String> contents)
{
// Copy lazily loaded relations to the history object as this cannot be done
// in the entity callbacks
// Copy lazily loaded relations to the history object as this cannot be
// done in the entity callbacks
copyLazyLoadedRelationsToHistory();

this.contents = new ArrayList<String>(contents);
}
public void setContents(String ... contents)

public void setContents(String... contents)
{
this.setContents(Arrays.asList(contents));
}
Expand All @@ -300,12 +300,12 @@ public void setComment(HSimpleComment comment)
{
this.comment = comment;
}
@OneToMany(cascade = {CascadeType.REMOVE, CascadeType.MERGE, CascadeType.PERSIST}, mappedBy = "textFlowTarget")

@OneToMany(cascade = { CascadeType.REMOVE, CascadeType.MERGE, CascadeType.PERSIST }, mappedBy = "textFlowTarget")
@MapKey(name = "versionNum")
public Map<Integer, HTextFlowTargetHistory> getHistory()
{
if( this.history == null )
if (this.history == null)
{
this.history = new HashMap<Integer, HTextFlowTargetHistory>();
}
Expand All @@ -316,12 +316,12 @@ public void setHistory(Map<Integer, HTextFlowTargetHistory> history)
{
this.history = history;
}

@PreUpdate
private void preUpdate()
{
// insert history if this has changed from its initial state
if( this.initialState != null && this.initialState.hasChanged(this) )
if (this.initialState != null && this.initialState.hasChanged(this))
{
this.getHistory().put(this.oldVersionNum, this.initialState);
}
Expand All @@ -336,15 +336,15 @@ private void updateInternalHistory()
this.initialState = new HTextFlowTargetHistory(this);
this.lazyRelationsCopied = false;
}

/**
* Copies all lazy loaded relations to the history object.
*/
private void copyLazyLoadedRelationsToHistory()
{
if( this.initialState != null && this.initialState.getContents() == null && !this.lazyRelationsCopied )
if (this.initialState != null && this.initialState.getContents() == null && !this.lazyRelationsCopied)
{
this.initialState.setContents( this.contents );
this.initialState.setContents(this.contents);
this.lazyRelationsCopied = true;
}
}
Expand Down
Expand Up @@ -106,21 +106,21 @@ public List<HTextFlowTarget> findTextFlowTargets(WorkspaceId workspace, FilterCo
throw new ZanataServiceException("Failed to validate locale", e);
}

//TODO add case-sensitive flag to FilterConstraints

String searchField;
Analyzer ngramAnalyzer;
if (constraints.isCaseSensitive())
{
searchField = "content-case";
searchField = "content-case0";
ngramAnalyzer = new CaseSensitiveNgramAnalyzer();
}
else
{
searchField = "content-nocase";
searchField = "content-nocase0";
ngramAnalyzer = new DefaultNgramAnalyzer();
}

//TODO search in all content fields (content 0..5 or more)

Query searchPhraseQuery;
QueryParser parser = new QueryParser(Version.LUCENE_29, searchField, ngramAnalyzer);
try
Expand Down

0 comments on commit 0881607

Please sign in to comment.