Permalink
Browse files

Improved stream-oriented parsing entering conditions.

  • Loading branch information...
luccioman committed Jun 17, 2017
1 parent 32288a8 commit d2a4a27f52792986c9da1244234d229fb82590ff
Showing with 28 additions and 4 deletions.
  1. +28 −4 source/net/yacy/document/TextParser.java
@@ -238,11 +238,35 @@ private static void initParser(final Parser parser) {
}
assert !idioms.isEmpty() : "no parsers applied for url " + location.toNormalform(true);
// if we do not have more than one parser or the content size is over MaxInt
boolean canStream = false;
Parser streamParser = idioms.iterator().next();
if(idioms.size() == 1) {
canStream = true;
} else if(idioms.size() == 2) {
/* When there are only 2 available parsers, stream oriented parsing can still be applied when one of the 2 parsers is the generic one*/
for(Parser idiom : idioms) {
if(idiom instanceof genericParser) {
canStream = true;
} else {
/* stream oriented parsing will be performed by the non generic parser */
streamParser = idiom;
}
}
} else if(idioms.size() > 2) {
/* Prefer the first available non generic parser */
for(Parser idiom : idioms) {
if(!(idiom instanceof genericParser)) {
streamParser = idiom;
break;
}
}
}
// if we do not have more than one non generic parser or the content size is over MaxInt (2GB) or is over the totally available memory
// then we use only one stream-oriented parser.
if (idioms.size() == 1 || contentLength > Integer.MAX_VALUE) {
if (canStream || contentLength > Integer.MAX_VALUE || contentLength > MemoryControl.available()) {
// use a specific stream-oriented parser
return parseSource(location, mimeType, idioms.iterator().next(), charset, scraper, timezoneOffset, sourceStream);
return parseSource(location, mimeType, streamParser, charset, scraper, timezoneOffset, sourceStream);
}
// in case that we know more parsers we first transform the content into a byte[] and use that as base
@@ -347,7 +371,7 @@ private static void initParser(final Parser parser) {
assert textStream != null : "mimeType = " + mimeType;
try {
if(textStream != null) {
/* this textStream can wrap a FileInputStream : as it won't be used anymore, we must close it to ensure the system resource is released */
/* textStream can be a FileInputStream : we must close it to ensure releasing system resource */
textStream.close();
}
} catch (IOException e) {

0 comments on commit d2a4a27

Please sign in to comment.