fix for preparation of search result pages with offset > 10:

- less pages are fetched in advance - just-in-time fetch of next required pages - fix for missing hand-over of offset to fetch threads git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6279 6c8d7289-2bf4-0310-a012-ef5d649a1542
yacy · Aug 30, 2009 · ead48c4 · ead48c4
1 parent 39a311d
commit ead48c4
Show file tree

Hide file tree

Showing 5 changed files with 44 additions and 51 deletions.
diff --git a/htroot/AccessTracker_p.java b/htroot/AccessTracker_p.java
@@ -169,12 +169,12 @@ public static serverObjects respond(final RequestHeader header, final serverObje
                     prop.putHTML("page_list_" + entCount + "_peername", (searchProfile.remotepeer == null) ? "<unknown>" : searchProfile.remotepeer.getName());
                     prop.put("page_list_" + entCount + "_queryhashes", QueryParams.anonymizedQueryHashes(searchProfile.queryHashes));
                 }
-                prop.putNum("page_list_" + entCount + "_querycount", searchProfile.linesPerPage);
+                prop.putNum("page_list_" + entCount + "_querycount", searchProfile.itemsPerPage);
                 prop.putNum("page_list_" + entCount + "_resultcount", searchProfile.resultcount);
                 prop.putNum("page_list_" + entCount + "_urltime", searchProfile.urlretrievaltime);
                 prop.putNum("page_list_" + entCount + "_snippettime", searchProfile.snippetcomputationtime);
                 prop.putNum("page_list_" + entCount + "_resulttime", searchProfile.searchtime);
-                qcountSum += searchProfile.linesPerPage;
+                qcountSum += searchProfile.itemsPerPage;
                 rcountSum += searchProfile.resultcount;
                 utimeSum += searchProfile.urlretrievaltime;
                 stimeSum += searchProfile.snippetcomputationtime;

diff --git a/source/de/anomic/search/QueryParams.java b/source/de/anomic/search/QueryParams.java
@@ -64,7 +64,7 @@ public static enum FetchMode {
 
     public String queryString;
     public TreeSet<byte[]> fullqueryHashes, queryHashes, excludeHashes;
-    public int linesPerPage, offset;
+    public int itemsPerPage, offset;
     public String prefer;
     public int contentdom;
     public String urlMask;
@@ -90,7 +90,7 @@ public static enum FetchMode {
     public boolean specialRights; // is true if the user has a special authorization and my use more database-extensive options
 
     public QueryParams(final String queryString,
-    						 final int lines,
+    						 final int itemsPerPage,
     		                 final RankingProfile ranking,
     		                 final Bitfield constraint) {
     	if ((queryString.length() == 12) && (Base64Order.enhancedCoder.wellformed(queryString.getBytes()))) {
@@ -110,7 +110,7 @@ public QueryParams(final String queryString,
         this.maxDistance = Integer.MAX_VALUE;
         this.prefer = "";
         this.contentdom = CONTENTDOM_ALL;
-        this.linesPerPage = lines;
+        this.itemsPerPage = itemsPerPage;
         this.offset = 0;
         this.urlMask = ".*";
         this.targetlang = "en";
@@ -139,7 +139,7 @@ public QueryParams(
         final String language,
         final String navigators,
         final boolean onlineSnippetFetch,
-        final int lines, final int offset, final String urlMask,
+        final int itemsPerPage, final int offset, final String urlMask,
         final int domType, final int domMaxTargets,
         final Bitfield constraint, final boolean allofconstraint,
         final String site,
@@ -156,7 +156,7 @@ public QueryParams(
 		this.maxDistance = maxDistance;
 		this.prefer = prefer;
 		this.contentdom = contentdom;
-		this.linesPerPage = Math.min((specialRights) ? 1000 : 50, lines);
+		this.itemsPerPage = Math.min((specialRights) ? 1000 : 50, itemsPerPage);
 		this.offset = Math.min((specialRights) ? 10000 : 100, offset);
 		this.urlMask = urlMask;
 		assert language != null;
@@ -178,12 +178,12 @@ public QueryParams(
 
     public int neededResults() {
         // the number of result lines that must be computed
-        return this.offset + this.linesPerPage;
+        return this.offset + this.itemsPerPage;
     }
 
     public int displayResults() {
         // the number of result lines that are displayed at once (size of result page)
-        return this.linesPerPage;
+        return this.itemsPerPage;
     }
 
     public void setOffset(final int newOffset) {

diff --git a/source/de/anomic/search/ResultFetcher.java b/source/de/anomic/search/ResultFetcher.java
@@ -50,7 +50,7 @@ public class ResultFetcher {
 
     // input values
     final RankingProcess  rankedCache; // ordered search results, grows dynamically as all the query threads enrich this container
-    final QueryParams     query;
+    QueryParams     query;
     private final Segment         indexSegment;
     private final yacySeedDB      peers;
 
@@ -91,23 +91,18 @@ public ResultFetcher(
         }
 
         // start worker threads to fetch urls and snippets
-        this.workerThreads = new Worker[(query.onlineSnippetFetch) ? workerThreadCount : 1];
-        for (int i = 0; i < this.workerThreads.length; i++) {
-            this.workerThreads[i] = new Worker(i, 10000, (query.onlineSnippetFetch) ? 2 : 0);
-            this.workerThreads[i].start();
-        }
+        this.workerThreads = null;
+        deployWorker(10);
         serverProfiling.update("SEARCH", new ProfilingGraph.searchEvent(query.id(true), this.workerThreads.length + " online snippet fetch threads started", 0, 0), false);
 
     }
 
-    public void restartWorker() {
+    public void deployWorker(int neededResults) {
     	if (anyWorkerAlive()) return;
-    	this.workerThreads = new Worker[workerThreadCount];
-    	Worker worker;
-        for (int i = 0; i < workerThreads.length; i++) {
-            worker = new Worker(i, 6000, (query.onlineSnippetFetch) ? 2 : 0);
-            worker.start();
-            workerThreads[i] = worker;
+    	this.workerThreads = new Worker[(query.onlineSnippetFetch) ? workerThreadCount : 1];
+    	for (int i = 0; i < workerThreads.length; i++) {
+    		this.workerThreads[i] = new Worker(i, 10000, (query.onlineSnippetFetch) ? 2 : 0, neededResults);
+    		this.workerThreads[i].start();
         }
     }
 
@@ -136,12 +131,14 @@ protected class Worker extends Thread {
         private long lastLifeSign; // when the last time the run()-loop was executed
         private final int id;
         private int snippetMode;
+        private int neededResults;
 
-        public Worker(final int id, final long maxlifetime, int snippetMode) {
+        public Worker(final int id, final long maxlifetime, int snippetMode, int neededResults) {
             this.id = id;
             this.snippetMode = snippetMode;
             this.lastLifeSign = System.currentTimeMillis();
             this.timeout = System.currentTimeMillis() + Math.max(1000, maxlifetime);
+            this.neededResults = neededResults;
         }
 
         public void run() {
@@ -152,6 +149,7 @@ public void run() {
             boolean nav_topics = query.navigators.equals("all") || query.navigators.indexOf("topics") >= 0;
             try {
                 while (System.currentTimeMillis() < this.timeout) {
+                	if (result.size() >= neededResults) break;
                     this.lastLifeSign = System.currentTimeMillis();
 
                     // check if we have enough
@@ -285,10 +283,24 @@ public ResultEntry oneResult(final int item) {
             return this.result.element(item).element;
         }
 
+        System.out.println("rankedCache.size() = " + this.rankedCache.size());
+        System.out.println("result.size() = " + this.result.size());
+        System.out.println("query.neededResults() = " + query.neededResults());
+
+        if ((!anyWorkerAlive()) &&
+            (((query.contentdom == QueryParams.CONTENTDOM_IMAGE) && (images.size() + 30 < query.neededResults())) ||
+             (this.result.size() < query.neededResults())) &&
+            //(event.query.onlineSnippetFetch) &&
+            (this.rankedCache.size() > this.result.size())
+           ) {
+            // start worker threads to fetch urls and snippets
+            deployWorker(query.neededResults());
+        }
+
         // finally wait until enough results are there produced from the
         // snippet fetch process
         while ((anyWorkerAlive()) && (result.size() <= item)) {
-            try {Thread.sleep(item * 50L);} catch (final InterruptedException e) {}
+            try {Thread.sleep((item % query.itemsPerPage) * 50L);} catch (final InterruptedException e) {}
         }
 
         // finally, if there is something, return the result

diff --git a/source/de/anomic/search/SearchEvent.java b/source/de/anomic/search/SearchEvent.java
@@ -66,7 +66,7 @@ public final class SearchEvent {
     private final Segment indexSegment;
     private final yacySeedDB peers;
     private RankingProcess rankedCache; // ordered search results, grows dynamically as all the query threads enrich this container
-    private ResultFetcher snippets;
+    private ResultFetcher results;
 
     // class variables for search abstracts
     private final IndexAbstracts rcAbstracts; // cache for index abstracts; word:TreeMap mapping where the embedded TreeMap is a urlhash:peerlist relation
@@ -176,7 +176,7 @@ public final class SearchEvent {
         }
 
         // start worker threads to fetch urls and snippets
-        this.snippets = new ResultFetcher(rankedCache, query, indexSegment, peers);
+        this.results = new ResultFetcher(rankedCache, query, indexSegment, peers);
 
         // clean up events
         SearchEventCache.cleanupEvents(false);
@@ -201,19 +201,20 @@ public QueryParams getQuery() {
 
    public void setQuery(QueryParams query) {
        this.query = query;
+       this.results.query = query;
    }
 
    public void cleanup() {
        // execute deletion of failed words
-       int rw = this.snippets.failedURLs.size();
+       int rw = this.results.failedURLs.size();
        if (rw > 0) {
            final TreeSet<byte[]> removeWords = query.queryHashes;
            removeWords.addAll(query.excludeHashes);
            try {
                final Iterator<byte[]> j = removeWords.iterator();
                // remove the same url hashes for multiple words
                while (j.hasNext()) {
-                   this.indexSegment.termIndex().remove(j.next(), this.snippets.failedURLs.keySet());
+                   this.indexSegment.termIndex().remove(j.next(), this.results.failedURLs.keySet());
                }                    
            } catch (IOException e) {
                e.printStackTrace();
@@ -311,16 +312,8 @@ public ResultEntry oneResult(final int item) {
                 // remote search requests, wait that the local process terminates first
             	try {localSearchThread.join();} catch (InterruptedException e) {}
             }
-            // now wait until as many remote worker threads have finished, as we
-            // want to display results
-            while (this.primarySearchThreads != null &&
-                   this.primarySearchThreads.length > item &&
-                   this.snippets.anyWorkerAlive() &&
-                   (this.snippets.resultCount() <= item || countFinishedRemoteSearch() <= item)) {
-                try {Thread.sleep(item * 50L);} catch (final InterruptedException e) {}
-            }
         }
-        return this.snippets.oneResult(item);
+        return this.results.oneResult(item);
     }
 
     boolean secondarySearchStartet = false;
@@ -401,7 +394,7 @@ public void remove(final String urlhash) {
     }
 
     public ResultFetcher result() {
-        return this.snippets;
+        return this.results;
     }
 
 }
diff --git a/source/de/anomic/search/SearchEventCache.java b/source/de/anomic/search/SearchEventCache.java
@@ -90,20 +90,8 @@ public static SearchEvent getEvent(
             }
         }
         if (event == null) {
-            // generate a new event
+            // start a new event
             event = new SearchEvent(query, indexSegment, peers, crawlResults, preselectedPeerHashes, generateAbstracts);
-        } else {
-            // if worker threads had been alive, but did not succeed, start them again to fetch missing links
-            if ((!event.result().anyWorkerAlive()) &&
-                (((query.contentdom == QueryParams.CONTENTDOM_IMAGE) && (event.result().images.size() + 30 < query.neededResults())) ||
-                 (event.result().result.size() < query.neededResults() + 10)) &&
-                 //(event.query.onlineSnippetFetch) &&
-                (event.getRankingResult().getLocalResourceSize() + event.getRankingResult().getRemoteResourceSize() > event.result().result.size())) {
-                // set new timeout
-                event.resetEventTime();
-                // start worker threads to fetch urls and snippets
-                event.result().restartWorker();
-            }
         }
 
         return event;