Permalink
Browse files

Also handle text content when parsing XML within limits.

  • Loading branch information...
luccioman committed Aug 14, 2017
1 parent f38fb7f commit acab6a6defb3307d27fb97004f99e0be4be8f55f
@@ -193,11 +193,17 @@ public boolean isParseWithLimitsSupported() {
} catch(StreamLimitException e) {
limitExceeded = true;
}
if (writer.isOverflow()) {
throw new Parser.Failure("Not enough Memory available for generic the XML parser : "
+ Formatter.bytesToString(availableMemory), location);
}
/* create the parsed document with empty text content */
/* Create the parsed document with eventually only partial part of the text and links */
final byte[] contentBytes = UTF8.getBytes(writer.toString());
Document[] docs = new Document[] { new Document(location, mimeType, detectedCharset, this, null, null, null, null, "",
null, null, 0.0d, 0.0d, new byte[0], detectedURLs, null, null, false, new Date()) };
null, null, 0.0d, 0.0d, contentBytes, detectedURLs, null, null, false, new Date()) };
docs[0].setPartiallyParsed(limitExceeded);
return docs;
} catch (final Exception e) {
@@ -390,6 +390,8 @@ public void testParseWithLimits() throws Exception {
assertEquals(1, documents.length);
assertFalse(documents[0].isPartiallyParsed());
assertTrue(documents[0].getTextString().contains("And this is a relative link"));
Collection<AnchorURL> detectedAnchors = documents[0].getAnchors();
assertNotNull(detectedAnchors);
assertEquals(5, detectedAnchors.size());
@@ -410,6 +412,9 @@ public void testParseWithLimits() throws Exception {
assertEquals(1, documents.length);
assertTrue(documents[0].isPartiallyParsed());
assertTrue(documents[0].getTextString().contains("Home page"));
assertFalse(documents[0].getTextString().contains("And this is a relative link"));
Collection<AnchorURL> detectedAnchors = documents[0].getAnchors();
assertNotNull(detectedAnchors);
assertEquals(2, detectedAnchors.size());
@@ -447,6 +452,9 @@ public void testParseWithLimits() throws Exception {
assertEquals(1, documents.length);
assertTrue(documents[0].isPartiallyParsed());
assertTrue(documents[0].getTextString().contains("and this is a mention to a relative URL"));
assertFalse(documents[0].getTextString().contains("And this is a relative link to another"));
Collection<AnchorURL> detectedAnchors = documents[0].getAnchors();
assertNotNull(detectedAnchors);
assertEquals(3, detectedAnchors.size());

0 comments on commit acab6a6

Please sign in to comment.