Skip to content
This repository has been archived by the owner on Nov 9, 2017. It is now read-only.

Commit

Permalink
Add tf_id as index
Browse files Browse the repository at this point in the history
  • Loading branch information
Alex Eng committed Apr 15, 2014
1 parent 034c81d commit d4bd248
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 82 deletions.
Expand Up @@ -21,6 +21,7 @@ public interface IndexFieldLabels {
public static final String TF_CONTENT = "textFlow.content-nocase";
public static final String CONTENT = "content-nocase";
public static final String TF_RES_ID = "textFlow.resId";
public static final String TF_ID = "textFlow.id";
public static final String TF_CONTENT_HASH = "textFlow.contentHash";

public static final String TF_CONTENT_FIELDS[] = { TF_CONTENT + 0,
Expand Down
1 change: 1 addition & 0 deletions zanata-model/src/main/java/org/zanata/model/HTextFlow.java
Expand Up @@ -150,6 +150,7 @@ public HTextFlow(HDocument document, String resId, String content) {

@Id
@GeneratedValue
@Field(analyze = Analyze.NO)
public Long getId() {
return id;
}
Expand Down
Expand Up @@ -22,34 +22,27 @@

import java.util.List;

import com.google.common.annotations.VisibleForTesting;
import lombok.AllArgsConstructor;
import lombok.NoArgsConstructor;
import lombok.extern.slf4j.Slf4j;

import org.jboss.seam.ScopeType;
import org.jboss.seam.annotations.In;
import org.jboss.seam.annotations.Name;
import org.jboss.seam.annotations.Scope;
import org.zanata.async.AsyncUtils;
import org.zanata.async.tasks.CopyTransTask.CopyTransTaskHandle;
import org.zanata.dao.DocumentDAO;
import org.zanata.dao.ProjectDAO;
import org.zanata.dao.TextFlowTargetDAO;
import org.zanata.model.HCopyTransOptions;
import org.zanata.model.HDocument;
import org.zanata.model.HLocale;
import org.zanata.model.HProjectIteration;
import org.zanata.model.HTextFlow;
import org.zanata.service.CopyTransService;
import org.zanata.service.LocaleService;
import org.zanata.service.TranslationFinder;
import org.zanata.service.ValidationService;
import org.zanata.service.VersionStateCache;
import com.google.common.base.Optional;

import lombok.AllArgsConstructor;
import lombok.extern.slf4j.Slf4j;

import static org.zanata.model.HCopyTransOptions.ConditionRuleAction.DOWNGRADE_TO_FUZZY;
import static org.zanata.model.HCopyTransOptions.ConditionRuleAction.REJECT;
import com.google.common.base.Optional;
import com.google.common.base.Stopwatch;

@Name("copyTransServiceImpl")
@Scope(ScopeType.STATELESS)
Expand All @@ -71,10 +64,10 @@ public class CopyTransServiceImpl implements CopyTransService {
* Copies previous matching translations for the given locale into a
* document. Translations are matching if their document id, textflow id and
* source content are identical, and their state is approved.
*
*
* The text flow revision for copied targets is set to the current text flow
* revision.
*
*
* @param document
* the document to copy translations into
* @param targetLocale
Expand All @@ -86,12 +79,14 @@ private void copyTransForDocumentLocale(HDocument document,
int numCopied = 0;
int start = 0;

Stopwatch stopwatch = new Stopwatch().start();
while (start < document.getTextFlows().size()) {
numCopied +=
copyTransForBatch(document, start, COPY_TRANS_BATCH_SIZE,
targetLocale, options);
start += COPY_TRANS_BATCH_SIZE;
}
log.info("Copy completed:" + stopwatch);

// Advance the task handler if there is one
Optional<CopyTransTaskHandle> taskHandle =
Expand All @@ -107,9 +102,9 @@ private void copyTransForDocumentLocale(HDocument document,

/**
* Perform copy trans on a batch of text flows for a document.
*
* @param batchStart USE_HIBERNATE_SEARCH
* The text flow position to start copying.
*
* @param batchStart
* USE_HIBERNATE_SEARCH The text flow position to start copying.
* @param batchLength
* The number of text flows on which to perform copy trans,
* starting from batchStart.
Expand All @@ -123,17 +118,13 @@ private int copyTransForBatch(HDocument document, final int batchStart,
boolean requireTranslationReview =
document.getProjectIteration()
.getRequireTranslationReview();
List<HTextFlow> docTextFlows =
document.getTextFlows();
List<HTextFlow> docTextFlows = document.getTextFlows();
int batchEnd =
Math.min(batchStart + batchLength,
docTextFlows.size());
Math.min(batchStart + batchLength, docTextFlows.size());
List<HTextFlow> copyTargets =
docTextFlows.subList(batchStart, batchEnd);
return copyTransWorkFactory.createCopyTransWork(targetLocale, options,
document,
requireTranslationReview,
copyTargets)
return copyTransWorkFactory.createCopyTransWork(targetLocale,
options, document, requireTranslationReview, copyTargets)
.workInTransaction();
} catch (Exception e) {
log.warn("exception during copy trans", e);
Expand Down
Expand Up @@ -115,7 +115,7 @@ public class TranslationMemoryServiceImpl implements TranslationMemoryService {
/**
* This is used by CopyTrans, with ContentHash search in lucene. Returns
* first entry of the matches which sort by HTextFlowTarget.lastChanged DESC
*
*
* @param textFlow
* @param targetLocaleId
* @param sourceLocaleId
Expand All @@ -131,16 +131,12 @@ public Optional<HTextFlowTarget> searchBestMatchTransMemory(

TransMemoryQuery query =
buildTMQuery(textFlow, HasSearchType.SearchType.CONTENT_HASH,
checkContext, checkDocument, checkProject);
checkContext, checkDocument, checkProject, true);

List<Object[]> matches =
findMatchingTranslation(targetLocaleId, sourceLocaleId, query,
null, HTextFlowTarget.class);

matches =
Lists.newArrayList(filterMatches(matches, true,
textFlow.getId()));

if (matches.isEmpty()) {
return Optional.<HTextFlowTarget> absent();
}
Expand All @@ -150,8 +146,8 @@ public Optional<HTextFlowTarget> searchBestMatchTransMemory(
public int compare(Object[] o1, Object[] o2) {
HTextFlowTarget target1 = (HTextFlowTarget) o1[1];
HTextFlowTarget target2 = (HTextFlowTarget) o2[1];
return target1.getLastChanged().compareTo(
target2.getLastChanged());
return target2.getLastChanged().compareTo(
target1.getLastChanged());
}
});
return Optional.of((HTextFlowTarget) matches.get(0)[1]);
Expand All @@ -160,7 +156,7 @@ public int compare(Object[] o1, Object[] o2) {
/**
* This is used by TMMerge. Returns first entry of the matches which sort by
* similarityPercent, sourceContents, and contents size.
*
*
* @param textFlow
* @param targetLocaleId
* @param sourceLocaleId
Expand All @@ -175,13 +171,9 @@ public Optional<TransMemoryResultItem> searchBestMatchTransMemory(
LocaleId sourceLocaleId, boolean checkContext,
boolean checkDocument, boolean checkProject, int thresholdPercent) {

HasSearchType.SearchType searchType =
thresholdPercent == 100 ? HasSearchType.SearchType.CONTENT_HASH
: HasSearchType.SearchType.FUZZY_PLURAL;

TransMemoryQuery query =
buildTMQuery(textFlow, searchType, checkContext, checkDocument,
checkProject);
buildTMQuery(textFlow, HasSearchType.SearchType.FUZZY_PLURAL,
checkContext, checkDocument, checkProject, false);

List<TransMemoryResultItem> tmResults =
searchTransMemory(targetLocaleId, sourceLocaleId, query);
Expand All @@ -207,12 +199,9 @@ public List<TransMemoryResultItem> searchTransMemory(
findMatchingTranslation(targetLocaleId, sourceLocaleId,
transMemoryQuery, SEARCH_MAX_RESULTS);

Collection<Object[]> filteredList = filterMatches(matches, false, null);

Map<TMKey, TransMemoryResultItem> matchesMap =
new LinkedHashMap<TMKey, TransMemoryResultItem>(
filteredList.size());
for (Object[] match : filteredList) {
new LinkedHashMap<TMKey, TransMemoryResultItem>(matches.size());
for (Object[] match : matches) {
processIndexMatch(transMemoryQuery, matchesMap, match,
sourceLocaleId, targetLocaleId);
}
Expand All @@ -224,7 +213,8 @@ public List<TransMemoryResultItem> searchTransMemory(

private TransMemoryQuery buildTMQuery(HTextFlow textFlow,
HasSearchType.SearchType searchType, boolean checkContext,
boolean checkDocument, boolean checkProject) {
boolean checkDocument, boolean checkProject,
boolean excludeOwnTranslation) {
TransMemoryQuery.Condition project =
new TransMemoryQuery.Condition(checkProject, textFlow
.getDocument().getProjectIteration().getProject()
Expand All @@ -236,42 +226,28 @@ private TransMemoryQuery buildTMQuery(HTextFlow textFlow,
new TransMemoryQuery.Condition(checkContext,
textFlow.getResId());

TransMemoryQuery query;
if (searchType.equals(HasSearchType.SearchType.CONTENT_HASH)) {
return new TransMemoryQuery(textFlow.getContentHash(), searchType,
project, document, res);
query =
new TransMemoryQuery(textFlow.getContentHash(), searchType,
project, document, res);
} else {
return new TransMemoryQuery(textFlow.getContents(), searchType,
project, document, res);
query =
new TransMemoryQuery(textFlow.getContents(), searchType,
project, document, res);
}
}

// filter out invalid targets(obsolete project or version) OR own
// translation
private Collection<Object[]> filterMatches(List<Object[]> results,
final boolean filterOwnTranslation, final Long tfId) {
return Collections2.filter(results, new Predicate<Object[]>() {
@Override
public boolean apply(Object[] input) {
Object entity = input[1];
if (entity instanceof HTextFlowTarget) {
HTextFlowTarget target = (HTextFlowTarget) entity;
boolean isValidResult = isValidResult(target);
if (!filterOwnTranslation) {
return isValidResult;
} else if (!isValidResult
|| target.getTextFlow().getId().equals(tfId)) {
return false;
}
}
return true;
}
});
if (excludeOwnTranslation) {
query.setExcludeOwnTranslation(excludeOwnTranslation, textFlow
.getId().toString());
}
return query;
}

/**
* return match[0] = (float)score, match[1] = entity(HTextFlowTarget or
* TransMemoryUnit)
*
*
* @param targetLocaleId
* @param sourceLocaleId
* @param transMemoryQuery
Expand All @@ -283,8 +259,13 @@ private List<Object[]> findMatchingTranslation(LocaleId targetLocaleId,
Integer maxResults, Class<?>... entities) {
List<Object[]> results = Lists.newArrayList();
try {
return getSearchResult(transMemoryQuery, sourceLocaleId,
targetLocaleId, maxResults, entities);
List<Object[]> matches =
getSearchResult(transMemoryQuery, sourceLocaleId,
targetLocaleId, maxResults, entities);

//filter out invalid target
return Lists.newArrayList(Collections2.filter(matches,
new ValidTargetFilterPredicate()));

} catch (ParseException e) {
if (transMemoryQuery.getSearchType() == HasSearchType.SearchType.RAW) {
Expand Down Expand Up @@ -399,7 +380,7 @@ private void addOrIncrementResultItem(TransMemoryQuery transMemoryQuery,
item.addSourceId(sourceId);
}

private boolean isValidResult(HTextFlowTarget textFlowTarget) {
private static boolean isValidResult(HTextFlowTarget textFlowTarget) {
if (textFlowTarget == null || !textFlowTarget.getState().isTranslated()) {
return false;
} else {
Expand Down Expand Up @@ -563,7 +544,7 @@ private List<Object[]> getSearchResult(TransMemoryQuery query,
/**
* Generate the query to match all source contents in all the searchable
* indexes. (HTextFlowTarget and TransMemoryUnit)
*
*
* @param query
* @param sourceLocale
* @param targetLocale
Expand Down Expand Up @@ -597,7 +578,7 @@ private Query generateQuery(TransMemoryQuery query, LocaleId sourceLocale,
/**
* Generates the Hibernate Search Query that will search for
* {@link HTextFlowTarget} objects for matches.
*
*
* @param queryParams
* @param sourceLocale
* @param targetLocale
Expand Down Expand Up @@ -625,6 +606,14 @@ private Query generateTextFlowTargetQuery(TransMemoryQuery queryParams,

buildContextQuery(query, queryParams);

if (queryParams.getExcludeOwnTranslation().isCheck()) {
TermQuery tmIdQuery =
new TermQuery(new Term(IndexFieldLabels.TF_ID, queryParams
.getExcludeOwnTranslation().getValue()));

query.add(tmIdQuery, BooleanClause.Occur.MUST_NOT);
}

query.add(buildStateQuery(ContentState.New),
BooleanClause.Occur.MUST_NOT);
query.add(buildStateQuery(ContentState.NeedReview),
Expand All @@ -637,7 +626,7 @@ private Query generateTextFlowTargetQuery(TransMemoryQuery queryParams,

/**
* Build query for project, document and resId context
*
*
* @param queryParams
* @return
*/
Expand Down Expand Up @@ -720,7 +709,7 @@ private static TermQuery buildStateQuery(ContentState state) {
/**
* Generates the Hibernate Search Query that will search for
* {@link org.zanata.model.tm.TransMemoryUnit} objects for matches.
*
*
* @param sourceLocale
* @param targetLocale
* @param queryText
Expand Down Expand Up @@ -751,7 +740,7 @@ private Query generateTransMemoryQuery(LocaleId sourceLocale,
/**
* Joins a given set of queries into a single one with the specified
* occurrence condition.
*
*
* @param condition
* The occurrence condition all the joined queries will have.
* @param queries
Expand Down Expand Up @@ -780,4 +769,18 @@ public boolean apply(TransMemoryResultItem tmResult) {
return (int) tmResult.getSimilarityPercent() >= approvedThreshold;
}
}

// filter out invalid targets(obsolete project or version)
private static class ValidTargetFilterPredicate implements
Predicate<Object[]> {
@Override
public boolean apply(Object[] input) {
Object entity = input[1];
if (entity instanceof HTextFlowTarget) {
HTextFlowTarget target = (HTextFlowTarget) entity;
return isValidResult(target);
}
return true;
}
}
}

0 comments on commit d4bd248

Please sign in to comment.