From 41c36ffd751dadb84376ca4d8c1556c09d6375d9 Mon Sep 17 00:00:00 2001 From: reger Date: Sun, 26 Jun 2016 06:46:26 +0200 Subject: [PATCH] exclude rejected results from result count (by using the resultcontainer.size instead of input docList.size) skip waiting for write-search-result-to-local-index (by removing the Thread.join - which will bring a small performance increase) --- source/net/yacy/peers/Protocol.java | 111 +++++++++++++--------------- 1 file changed, 51 insertions(+), 60 deletions(-) diff --git a/source/net/yacy/peers/Protocol.java b/source/net/yacy/peers/Protocol.java index bb5ea5954a..1acfef42a2 100644 --- a/source/net/yacy/peers/Protocol.java +++ b/source/net/yacy/peers/Protocol.java @@ -1147,25 +1147,25 @@ public void run() { return 0; } - List container = new ArrayList(); + List resultContainer = new ArrayList(); Network.log.info("SEARCH (solr), returned " + docList[0].size() + " out of " + docList[0].getNumFound() + " documents and " + facets.size() + " facets " + facets.keySet().toString() + " from " + (target == null ? "shard" : ("peer " + target.hash + ":" + target.getName()))); int term = count; Collection docs; if (event.addResultsToLocalIndex) { // only needed to store remote results docs = new ArrayList(docList[0].size()); } else docs = null; - for (final SolrDocument doc: docList[0]) { + for (final SolrDocument tmpdoc: docList[0]) { //System.out.println("***DEBUG*** " + ((String) doc.getFieldValue("sku"))); if ( term-- <= 0 ) { break; // do not process more that requested (in case that evil peers fill us up with rubbish) } // get one single search result - if ( doc == null ) { + if ( tmpdoc == null ) { continue; } URIMetadataNode urlEntry; try { - urlEntry = new URIMetadataNode(doc); + urlEntry = new URIMetadataNode(tmpdoc); } catch (MalformedURLException ex) { continue; } @@ -1198,73 +1198,61 @@ public void run() { // put the remote documents to the local index. We must convert the solr document to a solr input document: if (event.addResultsToLocalIndex) { - /* Check document size, only if a limit is set on remote documents size allowed to be stored to local index */ - if(checkDocumentSize(doc, event.getRemoteDocStoredMaxSize() * 1024)) { - final SolrInputDocument sid = event.query.getSegment().fulltext().getDefaultConfiguration().toSolrInputDocument(doc); - - // the input document stays untouched because it contains top-level cloned objects - docs.add(sid); - // will be stored to index, and is a full solr document, can be added to firstseen - event.query.getSegment().setFirstSeenTime(urlEntry.hash(), Math.min(urlEntry.moddate().getTime(), System.currentTimeMillis())); - } else { - Network.log.info("Document size greater than " + event.getRemoteDocStoredMaxSize() + " kbytes, excludes it from being stored to local index. Url : " + urlEntry.urlstring()); - } + /* Check document size, only if a limit is set on remote documents size allowed to be stored to local index */ + if (checkDocumentSize(tmpdoc, event.getRemoteDocStoredMaxSize() * 1024)) { + final SolrInputDocument sid = event.query.getSegment().fulltext().getDefaultConfiguration().toSolrInputDocument(tmpdoc); + + // the input document stays untouched because it contains top-level cloned objects + docs.add(sid); + // will be stored to index, and is a full solr document, can be added to firstseen + event.query.getSegment().setFirstSeenTime(urlEntry.hash(), Math.min(urlEntry.moddate().getTime(), System.currentTimeMillis())); + } else { + Network.log.info("Document size greater than " + event.getRemoteDocStoredMaxSize() + " kbytes, excludes it from being stored to local index. Url : " + urlEntry.urlstring()); + } } // after this conversion we can remove the largest and not used field text_t and synonyms_sxt from the document // because that goes into a search cache and would take a lot of memory in the search cache //doc.removeFields(CollectionSchema.text_t.getSolrFieldName()); - doc.removeFields(CollectionSchema.synonyms_sxt.getSolrFieldName()); - + tmpdoc.removeFields(CollectionSchema.synonyms_sxt.getSolrFieldName()); + ResultURLs.stack( - ASCII.String(urlEntry.url().hash()), - urlEntry.url().getHost(), - event.peers.mySeed().hash.getBytes(), - UTF8.getBytes(target.hash), - EventOrigin.QUERIES); + ASCII.String(urlEntry.url().hash()), + urlEntry.url().getHost(), + event.peers.mySeed().hash.getBytes(), + UTF8.getBytes(target.hash), + EventOrigin.QUERIES); } - // add the url entry to the word indexes - container.add(urlEntry); + // add the url entry to the checked results + resultContainer.add(urlEntry); } - final int dls = docList[0].size(); final int numFound = (int) docList[0].getNumFound(); docList[0].clear(); docList[0] = null; if (localsearch) { - event.addNodes(container, facets, snippets, true, "localpeer", numFound); + event.addNodes(resultContainer, facets, snippets, true, "localpeer", numFound); event.addFinalize(); event.addExpectedRemoteReferences(-count); - Network.log.info("local search (solr): localpeer sent " + container.size() + "/" + numFound + " references"); + Network.log.info("local search (solr): localpeer sent " + resultContainer.size() + "/" + numFound + " references"); } else { if (event.addResultsToLocalIndex) { - /* - * Current thread might be interrupted by SearchEvent.cleanup() - */ - if (Thread.interrupted()) { - throw new InterruptedException("solrQuery interrupted"); - } - WriteToLocalIndexThread writeToLocalIndexThread = new WriteToLocalIndexThread(event.query.getSegment(), - docs); - writeToLocalIndexThread.start(); - try { - writeToLocalIndexThread.join(); - } catch (InterruptedException e) { - /* - * Current thread interruption might happen while waiting - * for writeToLocalIndexThread. - */ - writeToLocalIndexThread.stopWriting(); - throw new InterruptedException("solrQuery interrupted"); - } - docs.clear(); + /* + * Current thread might be interrupted by SearchEvent.cleanup() + */ + if (Thread.interrupted()) { + throw new InterruptedException("solrQuery interrupted"); + } + WriteToLocalIndexThread writeToLocalIndexThread = new WriteToLocalIndexThread(event.query.getSegment(), + docs); // will clear docs on return + writeToLocalIndexThread.start(); } - event.addNodes(container, facets, snippets, false, target.getName() + "/" + target.hash, numFound); + event.addNodes(resultContainer, facets, snippets, false, target.getName() + "/" + target.hash, numFound); event.addFinalize(); event.addExpectedRemoteReferences(-count); - Network.log.info("remote search (solr): peer " + target.getName() + " sent " + (container.size() == 0 ? 0 : container.size()) + "/" + numFound + " references"); + Network.log.info("remote search (solr): peer " + target.getName() + " sent " + (resultContainer.size()) + "/" + numFound + " references"); } - return dls; + return resultContainer.size(); } /** @@ -1285,6 +1273,7 @@ private static class WriteToLocalIndexThread extends Thread { /** * Parameters must be not null. + * After writing the collection is cleared * @param segment solr segment to write * @param docs solr documents collection to put to segment */ @@ -1300,17 +1289,19 @@ public void stopWriting() { this.stop.set(true); } - @Override - public void run() { - for (SolrInputDocument doc: docs) { - if(stop.get()) { - Network.log.info("Writing documents collection to Solr segment was stopped."); - return; - } - segment.putDocument(doc); + @Override + public void run() { + for (SolrInputDocument doc : docs) { + if (stop.get()) { + docs.clear(); + Network.log.info("Writing documents collection to Solr segment was stopped."); + return; + } + segment.putDocument(doc); } - } - } + docs.clear(); + } + } /** * Only when maxSize is greater than zero, check that doc size is lower. To