Permalink
Browse files

Fixed regression introduced by commit 9ad4d16

On MediaWiki dump imports, the SurrogateReader was trying to unread too
many bytes, then failing with the following exception :
"java.io.IOException: Push back buffer is full".
  • Loading branch information...
luccioman committed May 2, 2017
1 parent 7678fd6 commit 79fdf14b0abe85e74ae35cae44e0406b38f26559
Showing with 7 additions and 4 deletions.
  1. +7 −4 source/net/yacy/document/content/SurrogateReader.java
@@ -71,6 +71,9 @@
public final static String SURROGATES_MAIN_ELEMENT_CLOSE =
"</" + SURROGATES_MAIN_ELEMENT_NAME + ">";
public final static SolrInputDocument POISON_DOCUMENT = new SolrInputDocument();
/** Maximum bytes number that can be unread on the underlying input stream */
private static final int PUSHBACK_SIZE = 1024;
// class variables
private final StringBuilder buffer;
@@ -100,7 +103,7 @@ private static SAXParser getParser() throws SAXException {
}
public SurrogateReader(final InputStream stream, int queueSize, CrawlStacker crawlStacker, CollectionConfiguration configuration, int concurrency) throws IOException {
this(new PushbackInputStream(stream, 200), queueSize, crawlStacker, configuration, concurrency);
this(new PushbackInputStream(stream, PUSHBACK_SIZE), queueSize, crawlStacker, configuration, concurrency);
}
public SurrogateReader(final PushbackInputStream stream, int queueSize, CrawlStacker crawlStacker, CollectionConfiguration configuration, int concurrency) throws IOException {
@@ -181,14 +184,14 @@ public void run() {
/**
* Check for format string in responseHeader "yacy.index.export.solr.xml"
* (introduced v1.92/9188 2017-04-30) or guess format by existing "<respons>"
* and "<result>" or "<doc>" tag in the first 1024 characters.
* (introduced v1.92/9188 2017-04-30) or guess format by existing "<response>"
* and "<result>" or "<doc>" tag in the first {@value #PUSHBACK_SIZE} characters.
*
* @return true when inputStream is likely to contain a rich and full-text Solr xml data dump (see IndexExport_p.html)
*/
private boolean isSolrDump() {
boolean res = false;
byte[] b = new byte[1024];
byte[] b = new byte[PUSHBACK_SIZE];
int nbRead = -1;
try {
nbRead = this.inputStream.read(b);

0 comments on commit 79fdf14

Please sign in to comment.