From ee4c865d71dfbdf4126b204e3a5bf32bc2b6d3ef Mon Sep 17 00:00:00 2001 From: David Mason Date: Tue, 21 Aug 2012 17:54:54 +1000 Subject: [PATCH] add DTD adapter for upload and download of DTD files --- zanata-war/pom.xml | 9 ++ .../java/org/zanata/adapter/DTDAdapter.java | 37 ++++++++ .../org/zanata/adapter/FileFormatAdapter.java | 28 +++++- .../adapter/GenericOkapiFilterAdapter.java | 92 ++++++++++++++++--- .../org/zanata/adapter/PlainTextAdapter.java | 2 +- .../impl/TranslationFileServiceImpl.java | 13 ++- 6 files changed, 163 insertions(+), 18 deletions(-) create mode 100644 zanata-war/src/main/java/org/zanata/adapter/DTDAdapter.java diff --git a/zanata-war/pom.xml b/zanata-war/pom.xml index 8fce7e1c2f..569312ad97 100644 --- a/zanata-war/pom.xml +++ b/zanata-war/pom.xml @@ -924,6 +924,15 @@ 2.5.1 + + + + net.sf.okapi.filters + okapi-filter-dtd + 0.17 + + + diff --git a/zanata-war/src/main/java/org/zanata/adapter/DTDAdapter.java b/zanata-war/src/main/java/org/zanata/adapter/DTDAdapter.java new file mode 100644 index 0000000000..82a636c95c --- /dev/null +++ b/zanata-war/src/main/java/org/zanata/adapter/DTDAdapter.java @@ -0,0 +1,37 @@ +/* + * Copyright 2012, Red Hat, Inc. and individual contributors + * as indicated by the @author tags. See the copyright.txt file in the + * distribution for a full listing of individual contributors. + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this software; if not, write to the Free + * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA, or see the FSF site: http://www.fsf.org. + */ +package org.zanata.adapter; + +import net.sf.okapi.filters.dtd.DTDFilter; + +/** + * Adapter for DTD, including Mozilla DTD. + * + * @author David Mason, damason@redhat.com + * @see DTDFilter + */ +public class DTDAdapter extends GenericOkapiFilterAdapter +{ + public DTDAdapter() + { + super(new DTDFilter(), IdSource.textUnitName); + } +} diff --git a/zanata-war/src/main/java/org/zanata/adapter/FileFormatAdapter.java b/zanata-war/src/main/java/org/zanata/adapter/FileFormatAdapter.java index 3da0841772..15b586fecd 100644 --- a/zanata-war/src/main/java/org/zanata/adapter/FileFormatAdapter.java +++ b/zanata-war/src/main/java/org/zanata/adapter/FileFormatAdapter.java @@ -28,6 +28,7 @@ import org.zanata.common.LocaleId; import org.zanata.rest.dto.resource.Resource; import org.zanata.rest.dto.resource.TextFlowTarget; +import org.zanata.rest.dto.resource.TranslationsResource; /** * Common interface for classes wrapping Okapi filters. @@ -38,16 +39,33 @@ public interface FileFormatAdapter { + /** + * Extract source strings from the given document content. + * + * @param documentContent + * @param sourceLocale + * @return representation of the strings in the document + */ + // TODO may want to use a string locale id so it can be used both for Zanata and Okapi locale classes Resource parseDocumentFile(InputStream documentContent, LocaleId sourceLocale); /** - * TODO could pass a TranslationsResource or a List + * Extract translation strings from the given translation document. + * + * @param translatedDocumentContent translated document to parse + * @return representation of the translations in the document + */ + TranslationsResource parseTranslationFile(InputStream translatedDocumentContent); + + /** + * Write translated file to the given output, using the given list of translations. * - * @param output - * @param original - * @param translations - * @param locale + * @param output stream to write translated document + * @param original source document + * @param translations to use in generating translated file + * @param locale to use for translated document * @throws IOException */ void writeTranslatedFile(OutputStream output, InputStream original, List translations, String locale) throws IOException; + } diff --git a/zanata-war/src/main/java/org/zanata/adapter/GenericOkapiFilterAdapter.java b/zanata-war/src/main/java/org/zanata/adapter/GenericOkapiFilterAdapter.java index 1209ab1790..9989033147 100644 --- a/zanata-war/src/main/java/org/zanata/adapter/GenericOkapiFilterAdapter.java +++ b/zanata-war/src/main/java/org/zanata/adapter/GenericOkapiFilterAdapter.java @@ -33,14 +33,17 @@ import net.sf.okapi.common.resource.TextFragment; import net.sf.okapi.common.resource.TextUnit; +import org.zanata.common.ContentState; import org.zanata.common.ContentType; import org.zanata.common.LocaleId; import org.zanata.rest.dto.resource.Resource; import org.zanata.rest.dto.resource.TextFlow; import org.zanata.rest.dto.resource.TextFlowTarget; +import org.zanata.rest.dto.resource.TranslationsResource; import org.zanata.util.HashUtil; /** + * An adapter that uses a provided {@link IFilter} implementation to parse documents. * * @author David Mason, damason@redhat.com * @@ -48,24 +51,44 @@ public class GenericOkapiFilterAdapter implements FileFormatAdapter { + /** + * Determines how TextFlow ids are assigned for Okapi TextUnits + */ + public enum IdSource { + textUnitId, + textUnitName, + contentHash, + }; + private final IFilter filter; - private final boolean useContentHashId; + private final IdSource idSource; + /** + * Create an adapter that will use filter-provided id as TextFlow id. + * + * @param filter {@link IFilter} used to parse the document + */ public GenericOkapiFilterAdapter(IFilter filter) { - this(filter, false); + this.filter = filter; + this.idSource = IdSource.textUnitId; } - public GenericOkapiFilterAdapter(IFilter filter, boolean useContentHashId) + /** + * Create an adapter that will use the specified {@link IdSource} as TextFlow id. + * + * @param filter {@link IFilter} used to parse the document + * @param idSource determines how ids are assigned to TextFlows + */ + public GenericOkapiFilterAdapter(IFilter filter, IdSource idSource) { this.filter = filter; - this.useContentHashId = useContentHashId; + this.idSource = idSource; } @Override public Resource parseDocumentFile(InputStream documentContent, LocaleId sourceLocale) { - // TODO may want to use a string locale id so it can be used both for Zanata and Okapi locale classes Resource document = new Resource(); document.setLang(sourceLocale); document.setContentType(ContentType.TextPlain); @@ -94,6 +117,37 @@ public Resource parseDocumentFile(InputStream documentContent, LocaleId sourceLo return document; } + @Override + public TranslationsResource parseTranslationFile(InputStream fileContents) + { + TranslationsResource transRes = new TranslationsResource(); + List translations = transRes.getTextFlowTargets(); + + // TODO look at passing the appropriate locale in to this if en is not appropriate. + // or make sure it is processed later. + RawDocument rawDoc = new RawDocument(fileContents, "UTF-8", net.sf.okapi.common.LocaleId.fromString("en")); + filter.open(rawDoc); + + while (filter.hasNext()) { + Event event = filter.next(); + if (event.getEventType() == EventType.TEXT_UNIT) + { + TextUnit tu = (TextUnit) event.getResource(); + if (tu.isTranslatable()) + { + TextFlowTarget tft = new TextFlowTarget(getIdFor(tu)); + tft.setContents(tu.getSource().toString()); + tft.setState(ContentState.Approved); + translations.add(tft); + } + } + } + filter.close(); + + return transRes; + } + + @Override public void writeTranslatedFile(OutputStream output, InputStream original, List translations, String locale) throws IOException { @@ -125,6 +179,14 @@ public void writeTranslatedFile(OutputStream output, InputStream original, List< writer.close(); } + /** + * Attempt to locate a matching translation for the given id in the given list. + * + * @param idToFind + * @param translationsToSearchIn + * @return the matching translation, or null if no translation matches the id + */ + // TODO make targets a map against id private TextFlowTarget findTextFlowTarget(String idToFind, List translationsToSearchIn) { for (TextFlowTarget target : translationsToSearchIn) @@ -137,16 +199,24 @@ private TextFlowTarget findTextFlowTarget(String idToFind, List return null; } - private String getIdFor(TextUnit tu) + /** + * Return the id for a TextUnit based on id assignment rules. + * This method can be overridden for more complex id assignment. + * + * @param tu for which to get id + * @return the id for the given tu + */ + protected String getIdFor(TextUnit tu) { - if (useContentHashId) + switch (idSource) { + case contentHash: return HashUtil.generateHash(tu.getSource().toString()); - } - else - { + case textUnitName: + return tu.getName(); + case textUnitId: + default: return tu.getId(); } } - } diff --git a/zanata-war/src/main/java/org/zanata/adapter/PlainTextAdapter.java b/zanata-war/src/main/java/org/zanata/adapter/PlainTextAdapter.java index 3f6e9d3b9f..934ff9bedf 100644 --- a/zanata-war/src/main/java/org/zanata/adapter/PlainTextAdapter.java +++ b/zanata-war/src/main/java/org/zanata/adapter/PlainTextAdapter.java @@ -31,6 +31,6 @@ public class PlainTextAdapter extends GenericOkapiFilterAdapter { public PlainTextAdapter() { - super(new PlainTextFilter(), true); + super(new PlainTextFilter(), IdSource.contentHash); } } diff --git a/zanata-war/src/main/java/org/zanata/service/impl/TranslationFileServiceImpl.java b/zanata-war/src/main/java/org/zanata/service/impl/TranslationFileServiceImpl.java index 58c96f28ac..dbfcac93d2 100644 --- a/zanata-war/src/main/java/org/zanata/service/impl/TranslationFileServiceImpl.java +++ b/zanata-war/src/main/java/org/zanata/service/impl/TranslationFileServiceImpl.java @@ -25,7 +25,9 @@ import org.jboss.seam.annotations.Name; import org.jboss.seam.annotations.Scope; import org.xml.sax.InputSource; +import org.zanata.adapter.DTDAdapter; import org.zanata.adapter.FileFormatAdapter; +import org.zanata.adapter.OpenOfficeAdapter; import org.zanata.adapter.PlainTextAdapter; import org.zanata.adapter.po.PoReader2; import org.zanata.common.LocaleId; @@ -68,6 +70,11 @@ public TranslationsResource parseTranslationFile(InputStream fileContents, Strin throw new ZanataServiceException("Invalid PO file contents on file: " + fileName); } } + else if (hasAdapterFor(fileName)) + { + // TODO handle exceptions + return getAdapterFor(fileName).parseTranslationFile(fileContents); + } else { throw new ZanataServiceException("Unsupported Translation file: " + fileName); @@ -154,7 +161,7 @@ public boolean hasAdapterFor(String fileNameOrExtension) else { // TODO add real mapping - return extension.equals("txt"); + return extension.equals("txt") || extension.equals("dtd"); } } @@ -173,6 +180,10 @@ public FileFormatAdapter getAdapterFor(String fileNameOrExtension) { return new PlainTextAdapter(); } + else if (extension.equals("dtd")) + { + return new DTDAdapter(); + } else { return null;