Skip to content

Commit

Permalink
fix for preparation of search result pages with offset > 10:
Browse files Browse the repository at this point in the history
- less pages are fetched in advance
- just-in-time fetch of next required pages
- fix for missing hand-over of offset to fetch threads

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6279 6c8d7289-2bf4-0310-a012-ef5d649a1542
  • Loading branch information
orbiter committed Aug 30, 2009
1 parent 39a311d commit ead48c4
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 51 deletions.
4 changes: 2 additions & 2 deletions htroot/AccessTracker_p.java
Expand Up @@ -169,12 +169,12 @@ public static serverObjects respond(final RequestHeader header, final serverObje
prop.putHTML("page_list_" + entCount + "_peername", (searchProfile.remotepeer == null) ? "<unknown>" : searchProfile.remotepeer.getName());
prop.put("page_list_" + entCount + "_queryhashes", QueryParams.anonymizedQueryHashes(searchProfile.queryHashes));
}
prop.putNum("page_list_" + entCount + "_querycount", searchProfile.linesPerPage);
prop.putNum("page_list_" + entCount + "_querycount", searchProfile.itemsPerPage);
prop.putNum("page_list_" + entCount + "_resultcount", searchProfile.resultcount);
prop.putNum("page_list_" + entCount + "_urltime", searchProfile.urlretrievaltime);
prop.putNum("page_list_" + entCount + "_snippettime", searchProfile.snippetcomputationtime);
prop.putNum("page_list_" + entCount + "_resulttime", searchProfile.searchtime);
qcountSum += searchProfile.linesPerPage;
qcountSum += searchProfile.itemsPerPage;
rcountSum += searchProfile.resultcount;
utimeSum += searchProfile.urlretrievaltime;
stimeSum += searchProfile.snippetcomputationtime;
Expand Down
14 changes: 7 additions & 7 deletions source/de/anomic/search/QueryParams.java
Expand Up @@ -64,7 +64,7 @@ public static enum FetchMode {

public String queryString;
public TreeSet<byte[]> fullqueryHashes, queryHashes, excludeHashes;
public int linesPerPage, offset;
public int itemsPerPage, offset;
public String prefer;
public int contentdom;
public String urlMask;
Expand All @@ -90,7 +90,7 @@ public static enum FetchMode {
public boolean specialRights; // is true if the user has a special authorization and my use more database-extensive options

public QueryParams(final String queryString,
final int lines,
final int itemsPerPage,
final RankingProfile ranking,
final Bitfield constraint) {
if ((queryString.length() == 12) && (Base64Order.enhancedCoder.wellformed(queryString.getBytes()))) {
Expand All @@ -110,7 +110,7 @@ public QueryParams(final String queryString,
this.maxDistance = Integer.MAX_VALUE;
this.prefer = "";
this.contentdom = CONTENTDOM_ALL;
this.linesPerPage = lines;
this.itemsPerPage = itemsPerPage;
this.offset = 0;
this.urlMask = ".*";
this.targetlang = "en";
Expand Down Expand Up @@ -139,7 +139,7 @@ public QueryParams(
final String language,
final String navigators,
final boolean onlineSnippetFetch,
final int lines, final int offset, final String urlMask,
final int itemsPerPage, final int offset, final String urlMask,
final int domType, final int domMaxTargets,
final Bitfield constraint, final boolean allofconstraint,
final String site,
Expand All @@ -156,7 +156,7 @@ public QueryParams(
this.maxDistance = maxDistance;
this.prefer = prefer;
this.contentdom = contentdom;
this.linesPerPage = Math.min((specialRights) ? 1000 : 50, lines);
this.itemsPerPage = Math.min((specialRights) ? 1000 : 50, itemsPerPage);
this.offset = Math.min((specialRights) ? 10000 : 100, offset);
this.urlMask = urlMask;
assert language != null;
Expand All @@ -178,12 +178,12 @@ public QueryParams(

public int neededResults() {
// the number of result lines that must be computed
return this.offset + this.linesPerPage;
return this.offset + this.itemsPerPage;
}

public int displayResults() {
// the number of result lines that are displayed at once (size of result page)
return this.linesPerPage;
return this.itemsPerPage;
}

public void setOffset(final int newOffset) {
Expand Down
42 changes: 27 additions & 15 deletions source/de/anomic/search/ResultFetcher.java
Expand Up @@ -50,7 +50,7 @@ public class ResultFetcher {

// input values
final RankingProcess rankedCache; // ordered search results, grows dynamically as all the query threads enrich this container
final QueryParams query;
QueryParams query;
private final Segment indexSegment;
private final yacySeedDB peers;

Expand Down Expand Up @@ -91,23 +91,18 @@ public ResultFetcher(
}

// start worker threads to fetch urls and snippets
this.workerThreads = new Worker[(query.onlineSnippetFetch) ? workerThreadCount : 1];
for (int i = 0; i < this.workerThreads.length; i++) {
this.workerThreads[i] = new Worker(i, 10000, (query.onlineSnippetFetch) ? 2 : 0);
this.workerThreads[i].start();
}
this.workerThreads = null;
deployWorker(10);
serverProfiling.update("SEARCH", new ProfilingGraph.searchEvent(query.id(true), this.workerThreads.length + " online snippet fetch threads started", 0, 0), false);

}

public void restartWorker() {
public void deployWorker(int neededResults) {
if (anyWorkerAlive()) return;
this.workerThreads = new Worker[workerThreadCount];
Worker worker;
for (int i = 0; i < workerThreads.length; i++) {
worker = new Worker(i, 6000, (query.onlineSnippetFetch) ? 2 : 0);
worker.start();
workerThreads[i] = worker;
this.workerThreads = new Worker[(query.onlineSnippetFetch) ? workerThreadCount : 1];
for (int i = 0; i < workerThreads.length; i++) {
this.workerThreads[i] = new Worker(i, 10000, (query.onlineSnippetFetch) ? 2 : 0, neededResults);
this.workerThreads[i].start();
}
}

Expand Down Expand Up @@ -136,12 +131,14 @@ protected class Worker extends Thread {
private long lastLifeSign; // when the last time the run()-loop was executed
private final int id;
private int snippetMode;
private int neededResults;

public Worker(final int id, final long maxlifetime, int snippetMode) {
public Worker(final int id, final long maxlifetime, int snippetMode, int neededResults) {
this.id = id;
this.snippetMode = snippetMode;
this.lastLifeSign = System.currentTimeMillis();
this.timeout = System.currentTimeMillis() + Math.max(1000, maxlifetime);
this.neededResults = neededResults;
}

public void run() {
Expand All @@ -152,6 +149,7 @@ public void run() {
boolean nav_topics = query.navigators.equals("all") || query.navigators.indexOf("topics") >= 0;
try {
while (System.currentTimeMillis() < this.timeout) {
if (result.size() >= neededResults) break;
this.lastLifeSign = System.currentTimeMillis();

// check if we have enough
Expand Down Expand Up @@ -285,10 +283,24 @@ public ResultEntry oneResult(final int item) {
return this.result.element(item).element;
}

System.out.println("rankedCache.size() = " + this.rankedCache.size());
System.out.println("result.size() = " + this.result.size());
System.out.println("query.neededResults() = " + query.neededResults());

if ((!anyWorkerAlive()) &&
(((query.contentdom == QueryParams.CONTENTDOM_IMAGE) && (images.size() + 30 < query.neededResults())) ||
(this.result.size() < query.neededResults())) &&
//(event.query.onlineSnippetFetch) &&
(this.rankedCache.size() > this.result.size())
) {
// start worker threads to fetch urls and snippets
deployWorker(query.neededResults());
}

// finally wait until enough results are there produced from the
// snippet fetch process
while ((anyWorkerAlive()) && (result.size() <= item)) {
try {Thread.sleep(item * 50L);} catch (final InterruptedException e) {}
try {Thread.sleep((item % query.itemsPerPage) * 50L);} catch (final InterruptedException e) {}
}

// finally, if there is something, return the result
Expand Down
21 changes: 7 additions & 14 deletions source/de/anomic/search/SearchEvent.java
Expand Up @@ -66,7 +66,7 @@ public final class SearchEvent {
private final Segment indexSegment;
private final yacySeedDB peers;
private RankingProcess rankedCache; // ordered search results, grows dynamically as all the query threads enrich this container
private ResultFetcher snippets;
private ResultFetcher results;

// class variables for search abstracts
private final IndexAbstracts rcAbstracts; // cache for index abstracts; word:TreeMap mapping where the embedded TreeMap is a urlhash:peerlist relation
Expand Down Expand Up @@ -176,7 +176,7 @@ public final class SearchEvent {
}

// start worker threads to fetch urls and snippets
this.snippets = new ResultFetcher(rankedCache, query, indexSegment, peers);
this.results = new ResultFetcher(rankedCache, query, indexSegment, peers);

// clean up events
SearchEventCache.cleanupEvents(false);
Expand All @@ -201,19 +201,20 @@ public QueryParams getQuery() {

public void setQuery(QueryParams query) {
this.query = query;
this.results.query = query;
}

public void cleanup() {
// execute deletion of failed words
int rw = this.snippets.failedURLs.size();
int rw = this.results.failedURLs.size();
if (rw > 0) {
final TreeSet<byte[]> removeWords = query.queryHashes;
removeWords.addAll(query.excludeHashes);
try {
final Iterator<byte[]> j = removeWords.iterator();
// remove the same url hashes for multiple words
while (j.hasNext()) {
this.indexSegment.termIndex().remove(j.next(), this.snippets.failedURLs.keySet());
this.indexSegment.termIndex().remove(j.next(), this.results.failedURLs.keySet());
}
} catch (IOException e) {
e.printStackTrace();
Expand Down Expand Up @@ -311,16 +312,8 @@ public ResultEntry oneResult(final int item) {
// remote search requests, wait that the local process terminates first
try {localSearchThread.join();} catch (InterruptedException e) {}
}
// now wait until as many remote worker threads have finished, as we
// want to display results
while (this.primarySearchThreads != null &&
this.primarySearchThreads.length > item &&
this.snippets.anyWorkerAlive() &&
(this.snippets.resultCount() <= item || countFinishedRemoteSearch() <= item)) {
try {Thread.sleep(item * 50L);} catch (final InterruptedException e) {}
}
}
return this.snippets.oneResult(item);
return this.results.oneResult(item);
}

boolean secondarySearchStartet = false;
Expand Down Expand Up @@ -401,7 +394,7 @@ public void remove(final String urlhash) {
}

public ResultFetcher result() {
return this.snippets;
return this.results;
}

}
14 changes: 1 addition & 13 deletions source/de/anomic/search/SearchEventCache.java
Expand Up @@ -90,20 +90,8 @@ public static SearchEvent getEvent(
}
}
if (event == null) {
// generate a new event
// start a new event
event = new SearchEvent(query, indexSegment, peers, crawlResults, preselectedPeerHashes, generateAbstracts);
} else {
// if worker threads had been alive, but did not succeed, start them again to fetch missing links
if ((!event.result().anyWorkerAlive()) &&
(((query.contentdom == QueryParams.CONTENTDOM_IMAGE) && (event.result().images.size() + 30 < query.neededResults())) ||
(event.result().result.size() < query.neededResults() + 10)) &&
//(event.query.onlineSnippetFetch) &&
(event.getRankingResult().getLocalResourceSize() + event.getRankingResult().getRemoteResourceSize() > event.result().result.size())) {
// set new timeout
event.resetEventTime();
// start worker threads to fetch urls and snippets
event.result().restartWorker();
}
}

return event;
Expand Down

0 comments on commit ead48c4

Please sign in to comment.