Skip to content
This repository has been archived by the owner on Nov 9, 2017. It is now read-only.

Commit

Permalink
add DTD adapter for upload and download of DTD files
Browse files Browse the repository at this point in the history
  • Loading branch information
davidmason committed Aug 21, 2012
1 parent 3826751 commit ee4c865
Show file tree
Hide file tree
Showing 6 changed files with 163 additions and 18 deletions.
9 changes: 9 additions & 0 deletions zanata-war/pom.xml
Expand Up @@ -924,6 +924,15 @@
<version>2.5.1</version>
</dependency>

<!-- Okapi Filters -->

<dependency>
<groupId>net.sf.okapi.filters</groupId>
<artifactId>okapi-filter-dtd</artifactId>
<version>0.17</version>
</dependency>


<!-- Other -->

<dependency>
Expand Down
37 changes: 37 additions & 0 deletions zanata-war/src/main/java/org/zanata/adapter/DTDAdapter.java
@@ -0,0 +1,37 @@
/*
* Copyright 2012, Red Hat, Inc. and individual contributors
* as indicated by the @author tags. See the copyright.txt file in the
* distribution for a full listing of individual contributors.
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this software; if not, write to the Free
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA, or see the FSF site: http://www.fsf.org.
*/
package org.zanata.adapter;

import net.sf.okapi.filters.dtd.DTDFilter;

/**
* Adapter for DTD, including Mozilla DTD.
*
* @author David Mason, <a href="mailto:damason@redhat.com">damason@redhat.com</a>
* @see DTDFilter
*/
public class DTDAdapter extends GenericOkapiFilterAdapter
{
public DTDAdapter()
{
super(new DTDFilter(), IdSource.textUnitName);
}
}
28 changes: 23 additions & 5 deletions zanata-war/src/main/java/org/zanata/adapter/FileFormatAdapter.java
Expand Up @@ -28,6 +28,7 @@
import org.zanata.common.LocaleId;
import org.zanata.rest.dto.resource.Resource;
import org.zanata.rest.dto.resource.TextFlowTarget;
import org.zanata.rest.dto.resource.TranslationsResource;

/**
* Common interface for classes wrapping Okapi filters.
Expand All @@ -38,16 +39,33 @@
public interface FileFormatAdapter
{

/**
* Extract source strings from the given document content.
*
* @param documentContent
* @param sourceLocale
* @return representation of the strings in the document
*/
// TODO may want to use a string locale id so it can be used both for Zanata and Okapi locale classes
Resource parseDocumentFile(InputStream documentContent, LocaleId sourceLocale);

/**
* TODO could pass a TranslationsResource or a List<TextFlowTarget>
* Extract translation strings from the given translation document.
*
* @param translatedDocumentContent translated document to parse
* @return representation of the translations in the document
*/
TranslationsResource parseTranslationFile(InputStream translatedDocumentContent);

/**
* Write translated file to the given output, using the given list of translations.
*
* @param output
* @param original
* @param translations
* @param locale
* @param output stream to write translated document
* @param original source document
* @param translations to use in generating translated file
* @param locale to use for translated document
* @throws IOException
*/
void writeTranslatedFile(OutputStream output, InputStream original, List<TextFlowTarget> translations, String locale) throws IOException;

}
Expand Up @@ -33,39 +33,62 @@
import net.sf.okapi.common.resource.TextFragment;
import net.sf.okapi.common.resource.TextUnit;

import org.zanata.common.ContentState;
import org.zanata.common.ContentType;
import org.zanata.common.LocaleId;
import org.zanata.rest.dto.resource.Resource;
import org.zanata.rest.dto.resource.TextFlow;
import org.zanata.rest.dto.resource.TextFlowTarget;
import org.zanata.rest.dto.resource.TranslationsResource;
import org.zanata.util.HashUtil;

/**
* An adapter that uses a provided {@link IFilter} implementation to parse documents.
*
* @author David Mason, <a href="mailto:damason@redhat.com">damason@redhat.com</a>
*
*/
public class GenericOkapiFilterAdapter implements FileFormatAdapter
{

/**
* Determines how TextFlow ids are assigned for Okapi TextUnits
*/
public enum IdSource {
textUnitId,
textUnitName,
contentHash,
};

private final IFilter filter;
private final boolean useContentHashId;
private final IdSource idSource;

/**
* Create an adapter that will use filter-provided id as TextFlow id.
*
* @param filter {@link IFilter} used to parse the document
*/
public GenericOkapiFilterAdapter(IFilter filter)
{
this(filter, false);
this.filter = filter;
this.idSource = IdSource.textUnitId;
}

public GenericOkapiFilterAdapter(IFilter filter, boolean useContentHashId)
/**
* Create an adapter that will use the specified {@link IdSource} as TextFlow id.
*
* @param filter {@link IFilter} used to parse the document
* @param idSource determines how ids are assigned to TextFlows
*/
public GenericOkapiFilterAdapter(IFilter filter, IdSource idSource)
{
this.filter = filter;
this.useContentHashId = useContentHashId;
this.idSource = idSource;
}

@Override
public Resource parseDocumentFile(InputStream documentContent, LocaleId sourceLocale)
{
// TODO may want to use a string locale id so it can be used both for Zanata and Okapi locale classes
Resource document = new Resource();
document.setLang(sourceLocale);
document.setContentType(ContentType.TextPlain);
Expand Down Expand Up @@ -94,6 +117,37 @@ public Resource parseDocumentFile(InputStream documentContent, LocaleId sourceLo
return document;
}

@Override
public TranslationsResource parseTranslationFile(InputStream fileContents)
{
TranslationsResource transRes = new TranslationsResource();
List<TextFlowTarget> translations = transRes.getTextFlowTargets();

// TODO look at passing the appropriate locale in to this if en is not appropriate.
// or make sure it is processed later.
RawDocument rawDoc = new RawDocument(fileContents, "UTF-8", net.sf.okapi.common.LocaleId.fromString("en"));
filter.open(rawDoc);

while (filter.hasNext()) {
Event event = filter.next();
if (event.getEventType() == EventType.TEXT_UNIT)
{
TextUnit tu = (TextUnit) event.getResource();
if (tu.isTranslatable())
{
TextFlowTarget tft = new TextFlowTarget(getIdFor(tu));
tft.setContents(tu.getSource().toString());
tft.setState(ContentState.Approved);
translations.add(tft);
}
}
}
filter.close();

return transRes;
}


@Override
public void writeTranslatedFile(OutputStream output, InputStream original, List<TextFlowTarget> translations, String locale) throws IOException
{
Expand Down Expand Up @@ -125,6 +179,14 @@ public void writeTranslatedFile(OutputStream output, InputStream original, List<
writer.close();
}

/**
* Attempt to locate a matching translation for the given id in the given list.
*
* @param idToFind
* @param translationsToSearchIn
* @return the matching translation, or null if no translation matches the id
*/
// TODO make targets a map against id
private TextFlowTarget findTextFlowTarget(String idToFind, List<TextFlowTarget> translationsToSearchIn)
{
for (TextFlowTarget target : translationsToSearchIn)
Expand All @@ -137,16 +199,24 @@ private TextFlowTarget findTextFlowTarget(String idToFind, List<TextFlowTarget>
return null;
}

private String getIdFor(TextUnit tu)
/**
* Return the id for a TextUnit based on id assignment rules.
* This method can be overridden for more complex id assignment.
*
* @param tu for which to get id
* @return the id for the given tu
*/
protected String getIdFor(TextUnit tu)
{
if (useContentHashId)
switch (idSource)
{
case contentHash:
return HashUtil.generateHash(tu.getSource().toString());
}
else
{
case textUnitName:
return tu.getName();
case textUnitId:
default:
return tu.getId();
}
}

}
Expand Up @@ -31,6 +31,6 @@ public class PlainTextAdapter extends GenericOkapiFilterAdapter
{
public PlainTextAdapter()
{
super(new PlainTextFilter(), true);
super(new PlainTextFilter(), IdSource.contentHash);
}
}
Expand Up @@ -25,7 +25,9 @@
import org.jboss.seam.annotations.Name;
import org.jboss.seam.annotations.Scope;
import org.xml.sax.InputSource;
import org.zanata.adapter.DTDAdapter;
import org.zanata.adapter.FileFormatAdapter;
import org.zanata.adapter.OpenOfficeAdapter;
import org.zanata.adapter.PlainTextAdapter;
import org.zanata.adapter.po.PoReader2;
import org.zanata.common.LocaleId;
Expand Down Expand Up @@ -68,6 +70,11 @@ public TranslationsResource parseTranslationFile(InputStream fileContents, Strin
throw new ZanataServiceException("Invalid PO file contents on file: " + fileName);
}
}
else if (hasAdapterFor(fileName))
{
// TODO handle exceptions
return getAdapterFor(fileName).parseTranslationFile(fileContents);
}
else
{
throw new ZanataServiceException("Unsupported Translation file: " + fileName);
Expand Down Expand Up @@ -154,7 +161,7 @@ public boolean hasAdapterFor(String fileNameOrExtension)
else
{
// TODO add real mapping
return extension.equals("txt");
return extension.equals("txt") || extension.equals("dtd");
}
}

Expand All @@ -173,6 +180,10 @@ public FileFormatAdapter getAdapterFor(String fileNameOrExtension)
{
return new PlainTextAdapter();
}
else if (extension.equals("dtd"))
{
return new DTDAdapter();
}
else
{
return null;
Expand Down

0 comments on commit ee4c865

Please sign in to comment.