Skip to content
This repository has been archived by the owner on Nov 9, 2017. It is now read-only.

Commit

Permalink
Refactor Indexing Strategies for better performance.
Browse files Browse the repository at this point in the history
Create an indexing strategy that performs less database queries and is able to index TextFlow targets faster.
  • Loading branch information
Carlos A. Munoz authored and seanf committed Jul 26, 2013
1 parent 89b50be commit 2975176
Show file tree
Hide file tree
Showing 6 changed files with 202 additions and 95 deletions.
14 changes: 10 additions & 4 deletions zanata-war/src/main/java/org/zanata/action/ReindexAsyncBean.java
Expand Up @@ -26,9 +26,9 @@
import org.zanata.model.HTextFlow;
import org.zanata.model.HTextFlowTarget;
import org.zanata.process.RunnableProcess;
import org.zanata.search.AbstractIndexingStrategy;
import org.zanata.search.ClassIndexer;
import org.zanata.search.GenericClassIndexer;
import org.zanata.search.HTextFlowTargetIndexer;
import org.zanata.search.HTextFlowTargetIndexingStrategy;
import org.zanata.search.IndexerProcessHandle;
import org.zanata.service.ProcessManagerService;

Expand Down Expand Up @@ -184,11 +184,17 @@ ClassIndexer getIndexer(Class<?> clazz)
{
if( clazz.equals( HTextFlowTarget.class ) )
{
return new HTextFlowTargetIndexer();
return new ClassIndexer<HTextFlowTarget>() {
@Override
public AbstractIndexingStrategy<HTextFlowTarget> createIndexingStrategy(FullTextSession session, IndexerProcessHandle handle, Class clazz)
{
return new HTextFlowTargetIndexingStrategy(session, handle, clazz);
}
};
}
else
{
return new GenericClassIndexer();
return new ClassIndexer();
}
}

Expand Down
@@ -0,0 +1,85 @@
package org.zanata.search;

import org.hibernate.Query;
import org.hibernate.ScrollableResults;
import org.hibernate.search.FullTextSession;

import lombok.extern.slf4j.Slf4j;

/**
* Base indexing strategy.
*
* @param <T> The type of object that this indexing strategy handles.
*/
@Slf4j
public abstract class AbstractIndexingStrategy<T>
{
private IndexerProcessHandle handle;
private int sessionClearBatchSize = 1000;
FullTextSession session;
Class<T> clazz;
ScrollableResults scrollableResults;


public AbstractIndexingStrategy(FullTextSession session, IndexerProcessHandle handle, Class<T> clazz)
{
this.session = session;
this.handle = handle;
this.clazz = clazz;
}

/**
* Performs the indexing.
*/
public void invoke()
{
int n = 0;
try
{
scrollableResults = getScrollableResults(session, clazz, n);
while (scrollableResults.next() && !handle.shouldStop())
{
n++;
T entity = (T) scrollableResults.get(0); // index each element
session.index(entity);
handle.incrementProgress(1);
if (n % sessionClearBatchSize == 0)
{
log.info("periodic flush and clear for {} (n={})", clazz, n);
session.flushToIndexes(); // apply changes to indexes
session.clear(); // clear since the queue is processed
}
onEntityIndexed(n);
}
}
finally
{
if( scrollableResults != null )
{
scrollableResults.close();
}
}
}

/**
* Callback method that is called every time an entity is indexed.
* @param n The entity number that was indexed.
*/
protected abstract void onEntityIndexed(int n);

/**
* Returns the Scrollable results
* @param session Session used to query and index the entities
* @param clazz The type of entity to be returned by the Scrollable results
* @param firstResult
* @return
*/
protected abstract ScrollableResults getScrollableResults(FullTextSession session, Class<T> clazz, int firstResult);

/**
* Create a query which returns instances of clazz
* @param clazz The type of objects being returned by this query.
* @return
*/
protected abstract Query getQuery(FullTextSession session, Class<T> clazz);
}
51 changes: 6 additions & 45 deletions zanata-war/src/main/java/org/zanata/search/ClassIndexer.java
Expand Up @@ -25,9 +25,6 @@

import org.hibernate.CacheMode;
import org.hibernate.FlushMode;
import org.hibernate.Query;
import org.hibernate.ScrollMode;
import org.hibernate.ScrollableResults;
import org.hibernate.criterion.Projections;
import org.hibernate.search.FullTextSession;

Expand All @@ -36,12 +33,13 @@
*
*/
@Slf4j
public abstract class ClassIndexer <T>
public class ClassIndexer<T>
{
//TODO make this configurable
private static final int DEFAULT_BATCH_SIZE = 5000;

private final int batchSize = DEFAULT_BATCH_SIZE;
public AbstractIndexingStrategy<T> createIndexingStrategy(FullTextSession session, IndexerProcessHandle handle, Class<T> clazz)
{
return new SimpleClassIndexingStrategy(session, handle, clazz);
}

public int getEntityCount(FullTextSession session, Class<T> clazz)
{
Expand All @@ -54,26 +52,9 @@ public void index(FullTextSession session, IndexerProcessHandle handle, Class<T>
log.info("Setting manual-flush and ignore-cache for {}", clazz);
session.setFlushMode(FlushMode.MANUAL);
session.setCacheMode(CacheMode.IGNORE);
ScrollableResults results = null;
try
{
int n = 0;
results = getScrollableResults(session, clazz, n);
while (results.next() && !handle.shouldStop())
{
n++;
T entity = (T) results.get(0); // index each element
session.index(entity);
handle.incrementProgress(1);
if (n % batchSize == 0)
{
log.info("periodic flush and clear for {} (n={})", clazz, n);
session.flushToIndexes(); // apply changes to indexes
session.clear(); // clear since the queue is processed
results.close();
results = getScrollableResults(session, clazz, n);
}
}
createIndexingStrategy(session, handle, clazz).invoke();
session.flushToIndexes(); // apply changes to indexes
session.clear(); // clear since the queue is processed
}
Expand All @@ -82,26 +63,6 @@ public void index(FullTextSession session, IndexerProcessHandle handle, Class<T>
log.warn("Unable to index objects of type {}", e, clazz.getName());
handle.setHasError(true);
}
finally
{
if (results != null)
results.close();
}
}

private ScrollableResults getScrollableResults(FullTextSession session, Class<T> clazz, int fromIndex)
{
Query query = getQuery(session, clazz);
// Criteria query = session.createCriteria(clazz);
query.setFirstResult(fromIndex).setMaxResults(batchSize);
return query.scroll(ScrollMode.FORWARD_ONLY);
}

/**
* Create a query which returns all instances of clazz
* @param clazz
* @return
*/
protected abstract Query getQuery(FullTextSession session, Class<T> clazz);

}

This file was deleted.

Expand Up @@ -21,21 +21,47 @@
package org.zanata.search;

import org.hibernate.Query;
import org.hibernate.ScrollMode;
import org.hibernate.ScrollableResults;
import org.hibernate.search.FullTextSession;
import org.zanata.model.HTextFlowTarget;

/**
* Indexing strategy specific to HTextFlowTargets.
* This indexing strategy eagerly loads all of HTextFlowTarget's indexable relationships and
* fetches its results in a memory-efficient manner.
*
* @author Carlos Munoz <a href="mailto:camunoz@redhat.com">camunoz@redhat.com</a>
*/
public class HTextFlowTargetIndexer extends ClassIndexer<HTextFlowTarget>
public class HTextFlowTargetIndexingStrategy extends AbstractIndexingStrategy<HTextFlowTarget>
{
public HTextFlowTargetIndexingStrategy(FullTextSession session, IndexerProcessHandle handle, Class clazz)
{
super(session, handle, clazz);
}

@Override
protected void onEntityIndexed(int n)
{
// Nothing to do
}

@Override
protected ScrollableResults getScrollableResults(FullTextSession session, Class clazz, int firstResult)
{
Query query = getQuery(session, clazz);
query.setFetchSize(Integer.MIN_VALUE);
return query.scroll(ScrollMode.FORWARD_ONLY);
}

@Override
protected Query getQuery(FullTextSession session, Class<HTextFlowTarget> clazz)
protected Query getQuery(FullTextSession session, Class clazz)
{
return session.createQuery("from HTextFlowTarget tft " +
"join fetch tft.locale " +
"join fetch tft.textFlow " +
"join fetch tft.textFlow.document " +
"join fetch tft.textFlow.document.locale " +
"join fetch tft.textFlow.document.projectIteration " +
"join fetch tft.textFlow.document.projectIteration.project");
}
Expand Down
@@ -0,0 +1,73 @@
/*
* Copyright 2010, Red Hat, Inc. and individual contributors as indicated by the
* @author tags. See the copyright.txt file in the distribution for a full
* listing of individual contributors.
*
* This is free software; you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free
* Software Foundation; either version 2.1 of the License, or (at your option)
* any later version.
*
* This software is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this software; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA, or see the FSF
* site: http://www.fsf.org.
*/
package org.zanata.search;

import org.hibernate.Query;
import org.hibernate.ScrollMode;
import org.hibernate.ScrollableResults;
import org.hibernate.search.FullTextSession;

import lombok.extern.slf4j.Slf4j;

/**
* Indexing strategy that fetches all instances in a given class and indexes them.
* This class batches the fetching of the entities and might be a bit slower as it does
* not account for lazily loaded entity relationships.
*
* @author Carlos Munoz <a href="mailto:camunoz@redhat.com">camunoz@redhat.com</a>
*/
@Slf4j
public class SimpleClassIndexingStrategy<T> extends AbstractIndexingStrategy<T>
{

public static final int MAX_QUERY_ROWS = 5000;

public SimpleClassIndexingStrategy(FullTextSession session, IndexerProcessHandle handle, Class<T> clazz)
{
super(session, handle, clazz);
}

@Override
protected void onEntityIndexed(int n)
{
if (n % MAX_QUERY_ROWS == 0)
{
SimpleClassIndexingStrategy.log.info("restarting query for {} (n={})", clazz, n);
scrollableResults.close();
scrollableResults = getScrollableResults(session, clazz, n);
}
}

@Override
protected ScrollableResults getScrollableResults(FullTextSession session, Class<T> clazz, int firstResult)
{
Query query = getQuery(session, clazz);
query.setFirstResult(firstResult);
query.setMaxResults(MAX_QUERY_ROWS);
return query.scroll(ScrollMode.FORWARD_ONLY);
}

@Override
protected Query getQuery(FullTextSession session, Class<T> clazz)
{
return session.createQuery("from "+clazz.getName());
}
}

0 comments on commit 2975176

Please sign in to comment.