From d4bd2482e3a123aeb87696e34c7bee6b5d14b187 Mon Sep 17 00:00:00 2001 From: Alex Eng Date: Tue, 15 Apr 2014 14:05:56 +1000 Subject: [PATCH] Add tf_id as index --- .../hibernate/search/IndexFieldLabels.java | 1 + .../main/java/org/zanata/model/HTextFlow.java | 1 + .../service/impl/CopyTransServiceImpl.java | 41 +++---- .../impl/TranslationMemoryServiceImpl.java | 115 +++++++++--------- .../shared/model/TransMemoryQuery.java | 11 +- 5 files changed, 87 insertions(+), 82 deletions(-) diff --git a/zanata-model/src/main/java/org/zanata/hibernate/search/IndexFieldLabels.java b/zanata-model/src/main/java/org/zanata/hibernate/search/IndexFieldLabels.java index d365218faa..dd46dadb46 100644 --- a/zanata-model/src/main/java/org/zanata/hibernate/search/IndexFieldLabels.java +++ b/zanata-model/src/main/java/org/zanata/hibernate/search/IndexFieldLabels.java @@ -21,6 +21,7 @@ public interface IndexFieldLabels { public static final String TF_CONTENT = "textFlow.content-nocase"; public static final String CONTENT = "content-nocase"; public static final String TF_RES_ID = "textFlow.resId"; + public static final String TF_ID = "textFlow.id"; public static final String TF_CONTENT_HASH = "textFlow.contentHash"; public static final String TF_CONTENT_FIELDS[] = { TF_CONTENT + 0, diff --git a/zanata-model/src/main/java/org/zanata/model/HTextFlow.java b/zanata-model/src/main/java/org/zanata/model/HTextFlow.java index 08e6da1415..dab1585e3d 100644 --- a/zanata-model/src/main/java/org/zanata/model/HTextFlow.java +++ b/zanata-model/src/main/java/org/zanata/model/HTextFlow.java @@ -150,6 +150,7 @@ public HTextFlow(HDocument document, String resId, String content) { @Id @GeneratedValue + @Field(analyze = Analyze.NO) public Long getId() { return id; } diff --git a/zanata-war/src/main/java/org/zanata/service/impl/CopyTransServiceImpl.java b/zanata-war/src/main/java/org/zanata/service/impl/CopyTransServiceImpl.java index 30d8490ac4..a17aa22cbc 100644 --- a/zanata-war/src/main/java/org/zanata/service/impl/CopyTransServiceImpl.java +++ b/zanata-war/src/main/java/org/zanata/service/impl/CopyTransServiceImpl.java @@ -22,17 +22,17 @@ import java.util.List; -import com.google.common.annotations.VisibleForTesting; +import lombok.AllArgsConstructor; import lombok.NoArgsConstructor; +import lombok.extern.slf4j.Slf4j; + import org.jboss.seam.ScopeType; import org.jboss.seam.annotations.In; import org.jboss.seam.annotations.Name; import org.jboss.seam.annotations.Scope; import org.zanata.async.AsyncUtils; import org.zanata.async.tasks.CopyTransTask.CopyTransTaskHandle; -import org.zanata.dao.DocumentDAO; import org.zanata.dao.ProjectDAO; -import org.zanata.dao.TextFlowTargetDAO; import org.zanata.model.HCopyTransOptions; import org.zanata.model.HDocument; import org.zanata.model.HLocale; @@ -40,16 +40,9 @@ import org.zanata.model.HTextFlow; import org.zanata.service.CopyTransService; import org.zanata.service.LocaleService; -import org.zanata.service.TranslationFinder; -import org.zanata.service.ValidationService; -import org.zanata.service.VersionStateCache; -import com.google.common.base.Optional; -import lombok.AllArgsConstructor; -import lombok.extern.slf4j.Slf4j; - -import static org.zanata.model.HCopyTransOptions.ConditionRuleAction.DOWNGRADE_TO_FUZZY; -import static org.zanata.model.HCopyTransOptions.ConditionRuleAction.REJECT; +import com.google.common.base.Optional; +import com.google.common.base.Stopwatch; @Name("copyTransServiceImpl") @Scope(ScopeType.STATELESS) @@ -71,10 +64,10 @@ public class CopyTransServiceImpl implements CopyTransService { * Copies previous matching translations for the given locale into a * document. Translations are matching if their document id, textflow id and * source content are identical, and their state is approved. - * + * * The text flow revision for copied targets is set to the current text flow * revision. - * + * * @param document * the document to copy translations into * @param targetLocale @@ -86,12 +79,14 @@ private void copyTransForDocumentLocale(HDocument document, int numCopied = 0; int start = 0; + Stopwatch stopwatch = new Stopwatch().start(); while (start < document.getTextFlows().size()) { numCopied += copyTransForBatch(document, start, COPY_TRANS_BATCH_SIZE, targetLocale, options); start += COPY_TRANS_BATCH_SIZE; } + log.info("Copy completed:" + stopwatch); // Advance the task handler if there is one Optional taskHandle = @@ -107,9 +102,9 @@ private void copyTransForDocumentLocale(HDocument document, /** * Perform copy trans on a batch of text flows for a document. - * - * @param batchStart USE_HIBERNATE_SEARCH - * The text flow position to start copying. + * + * @param batchStart + * USE_HIBERNATE_SEARCH The text flow position to start copying. * @param batchLength * The number of text flows on which to perform copy trans, * starting from batchStart. @@ -123,17 +118,13 @@ private int copyTransForBatch(HDocument document, final int batchStart, boolean requireTranslationReview = document.getProjectIteration() .getRequireTranslationReview(); - List docTextFlows = - document.getTextFlows(); + List docTextFlows = document.getTextFlows(); int batchEnd = - Math.min(batchStart + batchLength, - docTextFlows.size()); + Math.min(batchStart + batchLength, docTextFlows.size()); List copyTargets = docTextFlows.subList(batchStart, batchEnd); - return copyTransWorkFactory.createCopyTransWork(targetLocale, options, - document, - requireTranslationReview, - copyTargets) + return copyTransWorkFactory.createCopyTransWork(targetLocale, + options, document, requireTranslationReview, copyTargets) .workInTransaction(); } catch (Exception e) { log.warn("exception during copy trans", e); diff --git a/zanata-war/src/main/java/org/zanata/service/impl/TranslationMemoryServiceImpl.java b/zanata-war/src/main/java/org/zanata/service/impl/TranslationMemoryServiceImpl.java index 5e9200f847..17b04629e3 100644 --- a/zanata-war/src/main/java/org/zanata/service/impl/TranslationMemoryServiceImpl.java +++ b/zanata-war/src/main/java/org/zanata/service/impl/TranslationMemoryServiceImpl.java @@ -115,7 +115,7 @@ public class TranslationMemoryServiceImpl implements TranslationMemoryService { /** * This is used by CopyTrans, with ContentHash search in lucene. Returns * first entry of the matches which sort by HTextFlowTarget.lastChanged DESC - * + * * @param textFlow * @param targetLocaleId * @param sourceLocaleId @@ -131,16 +131,12 @@ public Optional searchBestMatchTransMemory( TransMemoryQuery query = buildTMQuery(textFlow, HasSearchType.SearchType.CONTENT_HASH, - checkContext, checkDocument, checkProject); + checkContext, checkDocument, checkProject, true); List matches = findMatchingTranslation(targetLocaleId, sourceLocaleId, query, null, HTextFlowTarget.class); - matches = - Lists.newArrayList(filterMatches(matches, true, - textFlow.getId())); - if (matches.isEmpty()) { return Optional. absent(); } @@ -150,8 +146,8 @@ public Optional searchBestMatchTransMemory( public int compare(Object[] o1, Object[] o2) { HTextFlowTarget target1 = (HTextFlowTarget) o1[1]; HTextFlowTarget target2 = (HTextFlowTarget) o2[1]; - return target1.getLastChanged().compareTo( - target2.getLastChanged()); + return target2.getLastChanged().compareTo( + target1.getLastChanged()); } }); return Optional.of((HTextFlowTarget) matches.get(0)[1]); @@ -160,7 +156,7 @@ public int compare(Object[] o1, Object[] o2) { /** * This is used by TMMerge. Returns first entry of the matches which sort by * similarityPercent, sourceContents, and contents size. - * + * * @param textFlow * @param targetLocaleId * @param sourceLocaleId @@ -175,13 +171,9 @@ public Optional searchBestMatchTransMemory( LocaleId sourceLocaleId, boolean checkContext, boolean checkDocument, boolean checkProject, int thresholdPercent) { - HasSearchType.SearchType searchType = - thresholdPercent == 100 ? HasSearchType.SearchType.CONTENT_HASH - : HasSearchType.SearchType.FUZZY_PLURAL; - TransMemoryQuery query = - buildTMQuery(textFlow, searchType, checkContext, checkDocument, - checkProject); + buildTMQuery(textFlow, HasSearchType.SearchType.FUZZY_PLURAL, + checkContext, checkDocument, checkProject, false); List tmResults = searchTransMemory(targetLocaleId, sourceLocaleId, query); @@ -207,12 +199,9 @@ public List searchTransMemory( findMatchingTranslation(targetLocaleId, sourceLocaleId, transMemoryQuery, SEARCH_MAX_RESULTS); - Collection filteredList = filterMatches(matches, false, null); - Map matchesMap = - new LinkedHashMap( - filteredList.size()); - for (Object[] match : filteredList) { + new LinkedHashMap(matches.size()); + for (Object[] match : matches) { processIndexMatch(transMemoryQuery, matchesMap, match, sourceLocaleId, targetLocaleId); } @@ -224,7 +213,8 @@ public List searchTransMemory( private TransMemoryQuery buildTMQuery(HTextFlow textFlow, HasSearchType.SearchType searchType, boolean checkContext, - boolean checkDocument, boolean checkProject) { + boolean checkDocument, boolean checkProject, + boolean excludeOwnTranslation) { TransMemoryQuery.Condition project = new TransMemoryQuery.Condition(checkProject, textFlow .getDocument().getProjectIteration().getProject() @@ -236,42 +226,28 @@ private TransMemoryQuery buildTMQuery(HTextFlow textFlow, new TransMemoryQuery.Condition(checkContext, textFlow.getResId()); + TransMemoryQuery query; if (searchType.equals(HasSearchType.SearchType.CONTENT_HASH)) { - return new TransMemoryQuery(textFlow.getContentHash(), searchType, - project, document, res); + query = + new TransMemoryQuery(textFlow.getContentHash(), searchType, + project, document, res); } else { - return new TransMemoryQuery(textFlow.getContents(), searchType, - project, document, res); + query = + new TransMemoryQuery(textFlow.getContents(), searchType, + project, document, res); } - } - // filter out invalid targets(obsolete project or version) OR own - // translation - private Collection filterMatches(List results, - final boolean filterOwnTranslation, final Long tfId) { - return Collections2.filter(results, new Predicate() { - @Override - public boolean apply(Object[] input) { - Object entity = input[1]; - if (entity instanceof HTextFlowTarget) { - HTextFlowTarget target = (HTextFlowTarget) entity; - boolean isValidResult = isValidResult(target); - if (!filterOwnTranslation) { - return isValidResult; - } else if (!isValidResult - || target.getTextFlow().getId().equals(tfId)) { - return false; - } - } - return true; - } - }); + if (excludeOwnTranslation) { + query.setExcludeOwnTranslation(excludeOwnTranslation, textFlow + .getId().toString()); + } + return query; } /** * return match[0] = (float)score, match[1] = entity(HTextFlowTarget or * TransMemoryUnit) - * + * * @param targetLocaleId * @param sourceLocaleId * @param transMemoryQuery @@ -283,8 +259,13 @@ private List findMatchingTranslation(LocaleId targetLocaleId, Integer maxResults, Class... entities) { List results = Lists.newArrayList(); try { - return getSearchResult(transMemoryQuery, sourceLocaleId, - targetLocaleId, maxResults, entities); + List matches = + getSearchResult(transMemoryQuery, sourceLocaleId, + targetLocaleId, maxResults, entities); + + //filter out invalid target + return Lists.newArrayList(Collections2.filter(matches, + new ValidTargetFilterPredicate())); } catch (ParseException e) { if (transMemoryQuery.getSearchType() == HasSearchType.SearchType.RAW) { @@ -399,7 +380,7 @@ private void addOrIncrementResultItem(TransMemoryQuery transMemoryQuery, item.addSourceId(sourceId); } - private boolean isValidResult(HTextFlowTarget textFlowTarget) { + private static boolean isValidResult(HTextFlowTarget textFlowTarget) { if (textFlowTarget == null || !textFlowTarget.getState().isTranslated()) { return false; } else { @@ -563,7 +544,7 @@ private List getSearchResult(TransMemoryQuery query, /** * Generate the query to match all source contents in all the searchable * indexes. (HTextFlowTarget and TransMemoryUnit) - * + * * @param query * @param sourceLocale * @param targetLocale @@ -597,7 +578,7 @@ private Query generateQuery(TransMemoryQuery query, LocaleId sourceLocale, /** * Generates the Hibernate Search Query that will search for * {@link HTextFlowTarget} objects for matches. - * + * * @param queryParams * @param sourceLocale * @param targetLocale @@ -625,6 +606,14 @@ private Query generateTextFlowTargetQuery(TransMemoryQuery queryParams, buildContextQuery(query, queryParams); + if (queryParams.getExcludeOwnTranslation().isCheck()) { + TermQuery tmIdQuery = + new TermQuery(new Term(IndexFieldLabels.TF_ID, queryParams + .getExcludeOwnTranslation().getValue())); + + query.add(tmIdQuery, BooleanClause.Occur.MUST_NOT); + } + query.add(buildStateQuery(ContentState.New), BooleanClause.Occur.MUST_NOT); query.add(buildStateQuery(ContentState.NeedReview), @@ -637,7 +626,7 @@ private Query generateTextFlowTargetQuery(TransMemoryQuery queryParams, /** * Build query for project, document and resId context - * + * * @param queryParams * @return */ @@ -720,7 +709,7 @@ private static TermQuery buildStateQuery(ContentState state) { /** * Generates the Hibernate Search Query that will search for * {@link org.zanata.model.tm.TransMemoryUnit} objects for matches. - * + * * @param sourceLocale * @param targetLocale * @param queryText @@ -751,7 +740,7 @@ private Query generateTransMemoryQuery(LocaleId sourceLocale, /** * Joins a given set of queries into a single one with the specified * occurrence condition. - * + * * @param condition * The occurrence condition all the joined queries will have. * @param queries @@ -780,4 +769,18 @@ public boolean apply(TransMemoryResultItem tmResult) { return (int) tmResult.getSimilarityPercent() >= approvedThreshold; } } + + // filter out invalid targets(obsolete project or version) + private static class ValidTargetFilterPredicate implements + Predicate { + @Override + public boolean apply(Object[] input) { + Object entity = input[1]; + if (entity instanceof HTextFlowTarget) { + HTextFlowTarget target = (HTextFlowTarget) entity; + return isValidResult(target); + } + return true; + } + } } diff --git a/zanata-war/src/main/java/org/zanata/webtrans/shared/model/TransMemoryQuery.java b/zanata-war/src/main/java/org/zanata/webtrans/shared/model/TransMemoryQuery.java index e1a7a7bc27..2364fb6f09 100644 --- a/zanata-war/src/main/java/org/zanata/webtrans/shared/model/TransMemoryQuery.java +++ b/zanata-war/src/main/java/org/zanata/webtrans/shared/model/TransMemoryQuery.java @@ -31,7 +31,7 @@ /** * @author Sean Flanigan sflaniga@redhat.com - * + * */ public class TransMemoryQuery implements IsSerializable { private SearchType searchType; @@ -39,6 +39,7 @@ public class TransMemoryQuery implements IsSerializable { private Condition project; private Condition document; private Condition res; + private Condition excludeOwnTranslation = new Condition(false, null); @SuppressWarnings("unused") private TransMemoryQuery() { @@ -79,6 +80,10 @@ public TransMemoryQuery(String query, SearchType searchType, this.res = res; } + public void setExcludeOwnTranslation(boolean isExclude, String tfId) { + this.excludeOwnTranslation = new Condition(isExclude, tfId); + } + public Condition getProject() { return project; } @@ -91,6 +96,10 @@ public Condition getRes() { return res; } + public Condition getExcludeOwnTranslation() { + return excludeOwnTranslation; + } + public List getQueries() { return queries; }