Skip to content
This repository has been archived by the owner on Nov 9, 2017. It is now read-only.

Commit

Permalink
Disallow elements inside source/target elements; removed escaping for…
Browse files Browse the repository at this point in the history
… '&' chars
  • Loading branch information
seanf committed Oct 31, 2012
1 parent 15f6d60 commit 1229a40
Show file tree
Hide file tree
Showing 9 changed files with 140 additions and 147 deletions.
@@ -1,7 +1,7 @@
package org.zanata.adapter.xliff;

import java.util.ArrayList;
import java.util.List;
import java.util.Collection;
import java.util.TreeSet;

public abstract class XliffCommon
{
Expand All @@ -15,7 +15,7 @@ public abstract class XliffCommon
protected static final String ELE_TARGET = "target";
protected static final String ELE_BODY = "body";

private static List<String> contentEle = new ArrayList<String>();
private static Collection<String> contentEle = new TreeSet<String>();

protected static final String ATTRI_SOURCE_LANGUAGE = "source-language";
protected static final String ATTRI_TARGET_LANGUAGE = "target-language";
Expand All @@ -25,37 +25,25 @@ public abstract class XliffCommon
protected static final String ATTRI_DATATYPE = "datatype";
protected static final String ATTRI_ORIGINAL = "original";

protected static List<String> getContentElementList()
public static boolean legalInsideContent(String localName)
{
return getContentElementList().contains(localName);
}


protected static Collection<String> getContentElementList()
{
if (contentEle.isEmpty())
{
contentEle.add("<g>");
contentEle.add("</g>");
contentEle.add("<g/>");
contentEle.add("<x>");
contentEle.add("</x>");
contentEle.add("<x/>");
contentEle.add("<bx>");
contentEle.add("</bx>");
contentEle.add("<bx/>");
contentEle.add("<ex>");
contentEle.add("</ex>");
contentEle.add("<ex/>");
contentEle.add("<bpt>");
contentEle.add("</bpt>");
contentEle.add("<bpt/>");
contentEle.add("<ept>");
contentEle.add("</ept>");
contentEle.add("<ept/>");
contentEle.add("<ph>");
contentEle.add("</ph>");
contentEle.add("<ph/>");
contentEle.add("<it>");
contentEle.add("</it>");
contentEle.add("<it/>");
contentEle.add("<mrk>");
contentEle.add("</mrk>");
contentEle.add("<mrk/>");
contentEle.add("g");
contentEle.add("x");
contentEle.add("bx");
contentEle.add("ex");
contentEle.add("bpt");
contentEle.add("ept");
contentEle.add("ph");
contentEle.add("it");
contentEle.add("mrk");
}
return contentEle;

Expand Down
Expand Up @@ -2,9 +2,8 @@

import static java.util.Arrays.asList;

import java.util.Collection;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
Expand Down Expand Up @@ -103,6 +102,7 @@ else if (xmlr.isStartElement() && xmlr.getLocalName().equals(ELE_TRANS_UNIT))
else if (xmlr.isEndElement() && xmlr.getLocalName().equals(ELE_FILE))
{
// this is to ensure only 1 <file> element in each xliff document
// FIXME it only ensures that we silently ignore extra file elements!
break;
}
}
Expand All @@ -113,32 +113,6 @@ else if (xmlr.isEndElement() && xmlr.getLocalName().equals(ELE_FILE))
}
}

// Text,
// Zero, one or more of the following elements: <g>, <x/>, <bx/>, <ex/>,
// <bpt> , <ept>, <ph>, <it> , <mrk>, in any order.

// private final static String xmlTagRegex = "(<.[^(><.)]+>)";
private final static String xmlTagRegex = "<[/]?[a-z]+[0-9]*[/]?>";
private final static Pattern xmlTagPattern = Pattern.compile(xmlTagRegex);

private String extractAndValidateContent(XMLStreamReader xmlr, String endElement, String id) throws XMLStreamException
{
String content = getElementValue(xmlr, endElement);
if (!StringUtils.isEmpty(content))
{
Matcher matcher = xmlTagPattern.matcher(content);

while (matcher.find())
{
if (!getContentElementList().contains(matcher.group()))
{
throw new RuntimeException("Invalid XLIFF file format: unknown element in -id:" + id + " -content:" + content + " -element:" + matcher.group());
}
}
}
return content;
}

private TextFlow extractTransUnit(XMLStreamReader xmlr) throws XMLStreamException
{
TextFlow textFlow = new TextFlow();
Expand All @@ -161,7 +135,7 @@ private TextFlow extractTransUnit(XMLStreamReader xmlr) throws XMLStreamExceptio
boolean startElement = xmlr.isStartElement();
if (startElement && localName.equals(ELE_SOURCE))
{
String content = extractAndValidateContent(xmlr, ELE_SOURCE, id);
String content = getElementValue(xmlr, ELE_SOURCE, getContentElementList());
textFlow.setContents(content);
}
else if (startElement && localName.equals(ELE_CONTEXT_GROUP))
Expand Down Expand Up @@ -195,7 +169,7 @@ private TextFlowTarget extractTransUnitTarget(XMLStreamReader xmlr) throws XMLSt
{
if (xmlr.isStartElement() && localName.equals(ELE_TARGET))
{
String content = extractAndValidateContent(xmlr, ELE_TARGET, textFlowTarget.getResId());
String content = getElementValue(xmlr, ELE_TARGET, getContentElementList());
textFlowTarget.setContents(asList(content));
}
else if (xmlr.isStartElement() && localName.equals(ELE_CONTEXT_GROUP))
Expand Down Expand Up @@ -237,62 +211,61 @@ private ExtensionSet<SimpleComment> extractContextList(XMLStreamReader xmlr) thr
sb.append(DELIMITER);
sb.append(getAttributeValue(xmlr, ATTRI_CONTEXT_TYPE));// context-type
sb.append(DELIMITER);
sb.append(getElementValue(xmlr, ELE_CONTEXT));// value
sb.append(getElementValue(xmlr, ELE_CONTEXT, null));// value
contextList.add(new SimpleComment(sb.toString()));
}
}
}
return contextList;
}

// Escape html character
private String escapeHTML(String text)
{
return text.replaceAll("&", "&amp;").replaceAll("<", "&lt;").replaceAll(">", "&gt;");
}
/**
* Extract given element's value
*
* @param currentCursor
* @param reader
* @return
* @throws XMLStreamException
*/
private String getElementValue(XMLStreamReader currentCursor, String endElement) throws XMLStreamException
private String getElementValue(XMLStreamReader reader, String elementName, Collection<String> legalElements) throws XMLStreamException
{
boolean loop = true;
boolean keepReading = true;
StringBuilder contents = new StringBuilder();

currentCursor.next();
reader.next();

if ((currentCursor.isEndElement() || currentCursor.isStartElement()) && currentCursor.getLocalName().equals(endElement))
String localName = reader.getLocalName();
if ((reader.isEndElement() || reader.isStartElement()) && localName.equals(elementName))
{
loop = false;
keepReading = false;
}

while (loop)
while (keepReading)
{
if (currentCursor.hasText()) // if the value in element is text
if (reader.hasText()) // if the value in element is text
{
// make sure all the values are properly xml encoded/escaped
contents.append(escapeHTML(currentCursor.getText()));
contents.append(reader.getText());
}
else
{
// if value in element is a xml element; invalid text
if (currentCursor.isStartElement())
{
contents.append("<" + currentCursor.getLocalName() + ">");
}
else if (currentCursor.isEndElement())
if (reader.isStartElement() || reader.isEndElement())
{
contents.append("</" + currentCursor.getLocalName() + ">");
if (legalElements == null || legalElements.contains(localName))
{
throw new RuntimeException("Sorry, Zanata does not support elements inside " + elementName + ": " + localName);
}
else
{
throw new RuntimeException("Invalid XLIFF: " + localName + " is not legal inside " + elementName);
}
}
}
currentCursor.next();
reader.next();
localName = reader.getLocalName();

if ((currentCursor.isEndElement() || currentCursor.isStartElement()) && currentCursor.getLocalName().equals(endElement))
if ((reader.isEndElement() || reader.isStartElement()) && localName.equals(elementName))
{
loop = false;
keepReading = false;
}
}
return contents.toString();
Expand Down
Expand Up @@ -38,7 +38,7 @@ public void extractTemplateSizeTest() throws FileNotFoundException
Resource doc = getTemplateDoc();

assertThat(doc.getName(), equalTo(DOC_NAME));
assertThat(doc.getTextFlows().size(), is(6));
assertThat(doc.getTextFlows().size(), is(7));
}

@Test
Expand All @@ -50,7 +50,7 @@ public void templateFirstAndSecondLastTextFlowTest() throws FileNotFoundExceptio
TextFlow lastTextFlow = doc.getTextFlows().get(doc.getTextFlows().size() - 2);

assertThat(firstTextFlow.getContents(), equalTo(asList("Translation Unit 1")));
assertThat(lastTextFlow.getContents(), equalTo(asList("Translation Unit 4 (4 &lt; 5 &amp; 4 &gt; 3)")));
assertThat(lastTextFlow.getContents(), equalTo(asList("Translation Unit 4 (4 < 5 & 4 > 3)")));
}

@Test
Expand All @@ -74,7 +74,7 @@ public void targetFirstAndLastTextFlowTest() throws FileNotFoundException
TextFlowTarget lastTextFlow = tr.getTextFlowTargets().get(tr.getTextFlowTargets().size() - 2);

assertThat(firstTextFlow.getContents(), equalTo(asList("Translation 1")));
assertThat(lastTextFlow.getContents(), equalTo(asList("Translation 4 (4 &lt; 5 &amp; 4 &gt; 3)")));
assertThat(lastTextFlow.getContents(), equalTo(asList("Translation 4 (4 < 5 & 4 > 3)")));
}

@Test
Expand All @@ -99,27 +99,37 @@ public void leadingEndingWhiteSpaceSourceTest() throws FileNotFoundException
Resource resource = reader.extractTemplate(inputSource, LocaleId.EN_US, null);

TextFlow tf = resource.getTextFlows().get(resource.getTextFlows().size() - 1);
assertThat(tf.getContents(), equalTo(asList(" Translation Unit 5 (4 &lt; 5 &amp; 4 &gt; 3) ")));
assertThat(tf.getContents(), not(equalTo(asList("Translation Unit 5 (4 &lt; 5 &amp; 4 &gt; 3)"))));
assertThat(tf.getContents(), not(equalTo(asList(" Translation Unit 5 (4 &lt; 5 &amp; 4 &gt; 3)"))));
assertThat(tf.getContents(), not(equalTo(asList("Translation Unit 5 (4 &lt; 5 &amp; 4 &gt; 3) "))));
assertThat(tf.getContents(), equalTo(asList(" Translation Unit 5 (4 < 5 & 4 > 3) ")));
assertThat(tf.getContents(), not(equalTo(asList("Translation Unit 5 (4 < 5 & 4 > 3)"))));
assertThat(tf.getContents(), not(equalTo(asList(" Translation Unit 5 (4 < 5 & 4 > 3)"))));
assertThat(tf.getContents(), not(equalTo(asList("Translation Unit 5 (4 < 5 & 4 > 3) "))));
}

@Test(expectedExceptions = RuntimeException.class)
@Test (expectedExceptions = RuntimeException.class, expectedExceptionsMessageRegExp = ".*br is not legal.*")
public void invalidSourceContentElementTest() throws FileNotFoundException
{
// expect RuntimeException with tu:transunit2 - source

File fileTarget = new File(TEST_DIR, "/StringResource_de2.xml");
File fileTarget = new File(TEST_DIR, "/StringResource_source_invalid.xml");
InputSource inputSource = new InputSource(new FileInputStream(fileTarget));
Resource resource = reader.extractTemplate(inputSource, LocaleId.EN_US, null);
}

@Test(expectedExceptions = RuntimeException.class)
@Test (expectedExceptions = RuntimeException.class, expectedExceptionsMessageRegExp = ".*does not support elements.*: g.*")
public void unsupportedSourceContentElementTest() throws FileNotFoundException
{
// expect RuntimeException with tu:transunit2 - source

File fileTarget = new File(TEST_DIR, "/StringResource_source_unsupported.xml");
InputSource inputSource = new InputSource(new FileInputStream(fileTarget));
Resource resource = reader.extractTemplate(inputSource, LocaleId.EN_US, null);
}

@Test (expectedExceptions = RuntimeException.class, expectedExceptionsMessageRegExp = ".*test is not legal.*")
public void invalidTargetContentElementTest() throws FileNotFoundException
{
// expect RuntimeException with tu:transunit1 - target
File fileTarget = new File(TEST_DIR, "/StringResource_de2.xml");
File fileTarget = new File(TEST_DIR, "/StringResource_target_invalid.xml");
InputSource inputSource = new InputSource(new FileInputStream(fileTarget));
TranslationsResource tr = reader.extractTarget(inputSource);
}
Expand Down
Expand Up @@ -39,8 +39,8 @@ public void checkTransUnit() throws FileNotFoundException
TextFlow lastTextFlow = doc.getTextFlows().get(doc.getTextFlows().size() - 1);

assertThat(firstTextFlow.getContents(), equalTo(asList("Translation Unit 1")));
assertThat(secondTextFlow.getContents(), equalTo(asList("Translation Unit 4 (4 &amp;lt; 5 &amp;amp; 4 &amp;gt; 3)")));
assertThat(lastTextFlow.getContents(), equalTo(asList(" Translation Unit 5 (4 &amp;lt; 5 &amp;amp; 4 &amp;gt; 3) ")));
assertThat(secondTextFlow.getContents(), equalTo(asList("Translation Unit 4 (4 < 5 & 4 > 3)")));
assertThat(lastTextFlow.getContents(), equalTo(asList(" Translation Unit 5 (4 < 5 & 4 > 3) ")));
}

@Test
Expand All @@ -52,7 +52,7 @@ public void extractSizeTest() throws FileNotFoundException
InputSource inputSource = new InputSource(new FileInputStream(generatedFile));
Resource doc = reader.extractTemplate(inputSource, LocaleId.EN_US, generatedDocName);

assertThat(doc.getTextFlows().size(), is(6));
assertThat(doc.getTextFlows().size(), is(7));
}

private void prepareTemplateDoc() throws FileNotFoundException
Expand Down
47 changes: 0 additions & 47 deletions zanata-adapter-xliff/src/test/resources/StringResource_de2.xml

This file was deleted.

Expand Up @@ -28,6 +28,9 @@ http://www.oasis-open.org/committees/xliff/documents/xliff-core-1.1.xsd"
<trans-unit id="3a">
<source>system&#8217;s channels</source>
</trans-unit>
<trans-unit id="transunit3a">
<source> Translation Unit 6 (4 &lt;br/4 &gt; 3) </source>
</trans-unit>
<trans-unit id="transunit4">
<source>Translation Unit 4 (4 &lt; 5 &amp; 4 &gt; 3)</source>
</trans-unit>
Expand Down

0 comments on commit 1229a40

Please sign in to comment.