Merge branch 'search-replace-indexing' into search-replace

zanata · Apr 13, 2012 · 0881607 · 0881607
2 parents 32e74e3 + 48326a1
commit 0881607
Show file tree

Hide file tree

Showing 4 changed files with 157 additions and 57 deletions.
diff --git a/server/zanata-model/src/main/java/org/zanata/hibernate/search/StringListBridge.java b/server/zanata-model/src/main/java/org/zanata/hibernate/search/StringListBridge.java
@@ -0,0 +1,97 @@
+package org.zanata.hibernate.search;
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.hibernate.search.bridge.FieldBridge;
+import org.hibernate.search.bridge.LuceneOptions;
+import org.hibernate.search.bridge.ParameterizedBridge;
+import org.jboss.seam.annotations.Logger;
+import org.jboss.seam.log.Log;
+
+/**
+ * Index a list of strings in multiple fields, appending the string index to the
+ * field name to produce unique fields.
+ * 
+ * e.g. For a field labeled 'fieldName' for a list of 3 strings
+ * <ul>
+ * <li>First string is indexed as 'fieldName0'</li>
+ * <li>Second string is indexed as 'fieldName1'</li>
+ * <li>Third string is indexed as 'fieldName2'</li>
+ * </ul>
+ * 
+ * @author David Mason, damason@redhat.com
+ * 
+ */
+public class StringListBridge implements FieldBridge, ParameterizedBridge
+{
+
+   @Logger
+   Log log;
+
+   private ConfigurableNgramAnalyzer analyzer;
+
+   @Override
+   public void setParameterValues(@SuppressWarnings("rawtypes") Map parameters)
+   {
+      if (parameters.containsKey("case"))
+      {
+         String caseBehaviour = (String) parameters.get("case");
+         if ("fold".equals(caseBehaviour))
+         {
+            analyzer = new DefaultNgramAnalyzer();
+         }
+         else if ("preserve".equals(caseBehaviour))
+         {
+            analyzer = new CaseSensitiveNgramAnalyzer();
+         }
+         else
+         {
+            log.warn("invalid value for parameter \"case\": \"{0}\", default will be used", caseBehaviour);
+            analyzer = new DefaultNgramAnalyzer();
+         }
+      }
+   }
+
+   @Override
+   public void set(String name, Object value, Document luceneDocument, LuceneOptions luceneOptions)
+   {
+      if (analyzer == null)
+      {
+         analyzer = new DefaultNgramAnalyzer();
+      }
+
+      if (!(value instanceof List<?>))
+      {
+         throw new IllegalArgumentException("this bridge must be applied to a List");
+      }
+      List<String> strings = (List<String>) value;
+      for (int i = 0; i < strings.size(); i++)
+      {
+         addStringField(name + i, strings.get(i), luceneDocument, luceneOptions);
+      }
+   }
+
+   private void addStringField(String fieldName, String fieldValue, Document luceneDocument, LuceneOptions luceneOptions)
+   {
+      Field field = new Field(fieldName, fieldValue, luceneOptions.getStore(), luceneOptions.getIndex(), luceneOptions.getTermVector());
+      field.setBoost(luceneOptions.getBoost());
+
+      // manually apply token stream from analyzer, as hibernate search does not
+      // apply the specified analyzer properly
+      try
+      {
+         field.setTokenStream(analyzer.reusableTokenStream(fieldName, new StringReader(fieldValue)));
+      }
+      catch (IOException e)
+      {
+         log.error("Failed to get token stream from analyzer for field \"{0}\" with content \"{1}\"", e, fieldName, fieldValue);
+      }
+      luceneDocument.add(field);
+   }
+
+}
diff --git a/server/zanata-model/src/main/java/org/zanata/model/HTextFlowTarget.java b/server/zanata-model/src/main/java/org/zanata/model/HTextFlowTarget.java
@@ -55,20 +55,19 @@
 import org.hibernate.annotations.IndexColumn;
 import org.hibernate.annotations.NaturalId;
 import org.hibernate.annotations.Type;
-import org.hibernate.search.annotations.Analyzer;
 import org.hibernate.search.annotations.Field;
 import org.hibernate.search.annotations.FieldBridge;
 import org.hibernate.search.annotations.Fields;
 import org.hibernate.search.annotations.Index;
 import org.hibernate.search.annotations.Indexed;
+import org.hibernate.search.annotations.Parameter;
 import org.hibernate.validator.NotNull;
 import org.zanata.common.ContentState;
 import org.zanata.common.HasContents;
-import org.zanata.hibernate.search.CaseSensitiveNgramAnalyzer;
 import org.zanata.hibernate.search.ContainingWorkspaceBridge;
 import org.zanata.hibernate.search.ContentStateBridge;
-import org.zanata.hibernate.search.DefaultNgramAnalyzer;
 import org.zanata.hibernate.search.LocaleIdBridge;
+import org.zanata.hibernate.search.StringListBridge;
 
 /**
  * Represents a flow of translated text that should be processed as a
@@ -82,7 +81,7 @@
 @NamedQueries({
    @NamedQuery(name = "HTextFlowTarget.findLatestEquivalentTranslations",
                query = "select tft, tfExample, max(tft.lastChanged) " +
-               		  "from HTextFlowTarget tft, HTextFlow tfExample " +
+                       "from HTextFlowTarget tft, HTextFlow tfExample " +
                        "left join fetch tft.textFlow " +
                        "where " +
                        "tfExample.resId = tft.textFlow.resId " +
@@ -109,18 +108,17 @@ public class HTextFlowTarget extends ModelEntityBase implements HasContents, Has
    private HPerson lastModifiedBy;
 
    private HSimpleComment comment;
-   
+
    public Map<Integer, HTextFlowTargetHistory> history;
-   
+
    // Only for internal use (persistence transient)
    private Integer oldVersionNum;
-   
-   // Only for internal use (persistence transient) 
+
+   // Only for internal use (persistence transient)
    private HTextFlowTargetHistory initialState;
-   
-   // Only for internal use (persistence transient) 
+
+   // Only for internal use (persistence transient)
    private boolean lazyRelationsCopied = false;
-
 
    public HTextFlowTarget()
    {
@@ -133,7 +131,6 @@ public HTextFlowTarget(HTextFlow textFlow, HLocale locale)
       this.textFlowRevision = textFlow.getRevision();
    }
 
-
    @Id
    @GeneratedValue
    public Long getId()
@@ -146,7 +143,6 @@ protected void setId(Long id)
       this.id = id;
    }
 
-
    // TODO PERF @NaturalId(mutable=false) for better criteria caching
    @NaturalId
    @ManyToOne
@@ -222,51 +218,55 @@ public void setTextFlow(HTextFlow textFlow)
 
    /**
     * As of release 1.6, replaced by {@link #getContents()}
+    * 
     * @return
     */
    @Override
    @Deprecated
    @Transient
-   //FIXME index contents rather than content
-   //TODO add case sensitive content field to index.
-   // This will require a different analyzer as DefaultNgramAnalyzer and its
-   // parent and grandparent class are implicitly case insensitive.
-   @Fields({
-      @Field(name="content-nocase", index = Index.TOKENIZED, analyzer = @Analyzer(impl = DefaultNgramAnalyzer.class)),
-      @Field(name="content-case", index = Index.TOKENIZED, analyzer = @Analyzer(impl = CaseSensitiveNgramAnalyzer.class))
-   })
    public String getContent()
    {
-      if( this.getContents().size() > 0 )
+      if (this.getContents().size() > 0)
       {
          return this.getContents().get(0);
       }
       return null;
    }
-   
+
    @Deprecated
    @Transient
-   public void setContent( String content )
+   public void setContent(String content)
    {
-      this.setContents( Arrays.asList(content) );
+      this.setContents(Arrays.asList(content));
    }
-   
+
    @Override
    @Type(type = "text")
    @AccessType("field")
    @CollectionOfElements(fetch = FetchType.EAGER)
    @JoinTable(name = "HTextFlowTargetContent", 
-      joinColumns = @JoinColumn(name = "text_flow_target_id")
+              joinColumns = @JoinColumn(name = "text_flow_target_id")
    )
    @IndexColumn(name = "pos", nullable = false)
    @Column(name = "content", nullable = false)
+   @Fields({
+      @Field(name="content-nocase",
+             index = Index.TOKENIZED,
+             bridge = @FieldBridge(impl = StringListBridge.class,
+                                   params = {@Parameter(name="case", value="fold")})),
+      @Field(name = "content-case",
+             index = Index.TOKENIZED,
+             bridge = @FieldBridge(impl = StringListBridge.class,
+                                   params = {@Parameter(name="case", value="preserve")}))
+   })
+
    public List<String> getContents()
    {
-      // Copy lazily loaded relations to the history object as this cannot be done
-      // in the entity callbacks
+      // Copy lazily loaded relations to the history object as this cannot be
+      // done in the entity callbacks
       copyLazyLoadedRelationsToHistory();
-      
-      if( contents == null )
+
+      if (contents == null)
       {
          contents = new ArrayList<String>();
       }
@@ -275,14 +275,14 @@ public List<String> getContents()
 
    public void setContents(List<String> contents)
    {
-      // Copy lazily loaded relations to the history object as this cannot be done
-      // in the entity callbacks
+      // Copy lazily loaded relations to the history object as this cannot be
+      // done in the entity callbacks
       copyLazyLoadedRelationsToHistory();
-      
+
       this.contents = new ArrayList<String>(contents);
    }
-   
-   public void setContents(String ... contents)
+
+   public void setContents(String... contents)
    {
       this.setContents(Arrays.asList(contents));
    }
@@ -300,12 +300,12 @@ public void setComment(HSimpleComment comment)
    {
       this.comment = comment;
    }
-   
-   @OneToMany(cascade = {CascadeType.REMOVE, CascadeType.MERGE, CascadeType.PERSIST}, mappedBy = "textFlowTarget")
+
+   @OneToMany(cascade = { CascadeType.REMOVE, CascadeType.MERGE, CascadeType.PERSIST }, mappedBy = "textFlowTarget")
    @MapKey(name = "versionNum")
    public Map<Integer, HTextFlowTargetHistory> getHistory()
    {
-      if( this.history == null )
+      if (this.history == null)
       {
          this.history = new HashMap<Integer, HTextFlowTargetHistory>();
       }
@@ -316,12 +316,12 @@ public void setHistory(Map<Integer, HTextFlowTargetHistory> history)
    {
       this.history = history;
    }
-   
+
    @PreUpdate
    private void preUpdate()
    {
       // insert history if this has changed from its initial state
-      if( this.initialState != null && this.initialState.hasChanged(this) )
+      if (this.initialState != null && this.initialState.hasChanged(this))
       {
          this.getHistory().put(this.oldVersionNum, this.initialState);
       }
@@ -336,15 +336,15 @@ private void updateInternalHistory()
       this.initialState = new HTextFlowTargetHistory(this);
       this.lazyRelationsCopied = false;
    }
-   
+
    /**
     * Copies all lazy loaded relations to the history object.
     */
    private void copyLazyLoadedRelationsToHistory()
    {
-      if( this.initialState != null && this.initialState.getContents() == null && !this.lazyRelationsCopied )
+      if (this.initialState != null && this.initialState.getContents() == null && !this.lazyRelationsCopied)
       {
-         this.initialState.setContents( this.contents );
+         this.initialState.setContents(this.contents);
          this.lazyRelationsCopied = true;
       }
    }

diff --git a/server/zanata-war/src/main/java/org/zanata/service/impl/TextFlowSearchServiceImpl.java b/server/zanata-war/src/main/java/org/zanata/service/impl/TextFlowSearchServiceImpl.java
@@ -106,21 +106,21 @@ public List<HTextFlowTarget> findTextFlowTargets(WorkspaceId workspace, FilterCo
          throw new ZanataServiceException("Failed to validate locale", e);
       }
 
-      //TODO add case-sensitive flag to FilterConstraints
-
       String searchField;
       Analyzer ngramAnalyzer;
       if (constraints.isCaseSensitive())
       {
-         searchField = "content-case";
+         searchField = "content-case0";
          ngramAnalyzer = new CaseSensitiveNgramAnalyzer();
       }
       else
       {
-         searchField = "content-nocase";
+         searchField = "content-nocase0";
          ngramAnalyzer = new DefaultNgramAnalyzer();
       }
 
+      //TODO search in all content fields (content 0..5 or more)
+
       Query searchPhraseQuery;
       QueryParser parser = new QueryParser(Version.LUCENE_29, searchField, ngramAnalyzer);
       try