Skip to content

Commit

Permalink
- fixed bad behaviour of search event worker processes
Browse files Browse the repository at this point in the history
- fixed export of url lists in xml

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4229 6c8d7289-2bf4-0310-a012-ef5d649a1542
  • Loading branch information
orbiter committed Nov 23, 2007
1 parent 445c0b5 commit 2fcd18a
Show file tree
Hide file tree
Showing 7 changed files with 40 additions and 46 deletions.
2 changes: 1 addition & 1 deletion build.properties
Expand Up @@ -3,7 +3,7 @@ javacSource=1.4
javacTarget=1.4

# Release Configuration
releaseVersion=0.553
releaseVersion=0.554
releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
proReleaseFile=yacy_pro_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
releaseFileParentDir=yacy
Expand Down
20 changes: 10 additions & 10 deletions htroot/yacy/search.java
Expand Up @@ -128,7 +128,7 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
int indexabstractContainercount = 0;
int joincount = 0;
plasmaSearchQuery theQuery = null;
serverProfiling localProcess = null;
serverProfiling localProfiling = null;
ArrayList accu = null;
long urlRetrievalAllTime = 0, snippetComputationAllTime = 0;
if ((query.length() == 0) && (abstractSet != null)) {
Expand All @@ -138,12 +138,12 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
yacyCore.log.logInfo("INIT HASH SEARCH (abstracts only): " + plasmaSearchQuery.anonymizedQueryHashes(theQuery.queryHashes) + " - " + theQuery.displayResults() + " links");

// prepare a search profile
localProcess = new serverProfiling(theQuery.maximumTime, theQuery.displayResults());
localProfiling = new serverProfiling();

//theSearch = new plasmaSearchEvent(squery, rankingProfile, localTiming, remoteTiming, true, sb.wordIndex, null);
localProcess.startTimer();
localProfiling.startTimer();
Map[] containers = sb.wordIndex.localSearchContainers(theQuery, plasmaSearchQuery.hashes2Set(urls));
localProcess.yield(plasmaSearchEvent.COLLECTION, containers[0].size());
localProfiling.yield(plasmaSearchEvent.COLLECTION, containers[0].size());
if (containers != null) {
Iterator ci = containers[0].entrySet().iterator();
Map.Entry entry;
Expand All @@ -170,8 +170,8 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve

// prepare a search profile
plasmaSearchRankingProfile rankingProfile = (profile.length() == 0) ? new plasmaSearchRankingProfile(plasmaSearchQuery.contentdomParser(contentdom)) : new plasmaSearchRankingProfile("", profile);
localProcess = new serverProfiling(theQuery.maximumTime, theQuery.displayResults());
plasmaSearchEvent theSearch = plasmaSearchEvent.getEvent(theQuery, rankingProfile, localProcess, sb.wordIndex, null, true, abstractSet);
localProfiling = new serverProfiling();
plasmaSearchEvent theSearch = plasmaSearchEvent.getEvent(theQuery, rankingProfile, localProfiling, sb.wordIndex, null, true, abstractSet);
urlRetrievalAllTime = theSearch.getURLRetrievalTime();
snippetComputationAllTime = theSearch.getSnippetComputationTime();

Expand Down Expand Up @@ -231,15 +231,15 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
if (partitions > 0) sb.requestedQueries = sb.requestedQueries + 1d / partitions; // increase query counter

// prepare reference hints
localProcess.startTimer();
localProfiling.startTimer();
Set ws = theSearch.references(10);
StringBuffer refstr = new StringBuffer();
Iterator j = ws.iterator();
while (j.hasNext()) {
refstr.append(",").append((String) j.next());
}
prop.put("references", (refstr.length() > 0) ? refstr.substring(1) : refstr.toString());
localProcess.yield("reference collection", ws.size());
localProfiling.yield("reference collection", ws.size());
}
prop.put("indexabstract", indexabstract.toString());

Expand All @@ -253,7 +253,7 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve

} else {
// result is a List of urlEntry elements
localProcess.startTimer();
localProfiling.startTimer();
StringBuffer links = new StringBuffer();
String resource = null;
plasmaSearchEvent.ResultEntry entry;
Expand All @@ -266,7 +266,7 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
}
prop.put("links", links.toString());
prop.put("linkcount", accu.size());
localProcess.yield("result list preparation", accu.size());
localProfiling.yield("result list preparation", accu.size());
}

// add information about forward peers
Expand Down
4 changes: 2 additions & 2 deletions htroot/yacysearch.java
Expand Up @@ -268,7 +268,7 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
20,
constraint,
true);
serverProfiling localTiming = new serverProfiling(4 * theQuery.maximumTime / 10, theQuery.displayResults());
serverProfiling localProfiling = new serverProfiling();

String client = (String) header.get("CLIENTIP"); // the search client who initiated the search

Expand All @@ -287,7 +287,7 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
theQuery.setOffset(0); // in case that this is a new search, always start without a offset
offset = 0;
}
plasmaSearchEvent theSearch = plasmaSearchEvent.getEvent(theQuery, sb.getRanking(), localTiming, sb.wordIndex, (sb.isRobinsonMode()) ? sb.clusterhashes : null, false, null);
plasmaSearchEvent theSearch = plasmaSearchEvent.getEvent(theQuery, sb.getRanking(), localProfiling, sb.wordIndex, (sb.isRobinsonMode()) ? sb.clusterhashes : null, false, null);

// generate result object
serverLog.logFine("LOCAL_SEARCH", "SEARCH TIME AFTER ORDERING OF SEARCH RESULTS: " + ((System.currentTimeMillis() - timestamp) / 1000) + " seconds");
Expand Down
2 changes: 1 addition & 1 deletion htroot/yacysearchitem.html
@@ -1,7 +1,7 @@
#(content)#::
<div class="searchresults">
<h4 class="linktitle">
<img src="ViewImage.png?width=16&height=16&code=#[faviconCode]#" id="f#[urlhash]#" class="favicon" width="16" height="16" alt="favicon of #[url]#" />
<img src="ViewImage.png?width=16&height=16&code=#[faviconCode]#" id="f#[urlhash]#" class="favicon" width="16" height="16" alt="" />
<a href="#[url]#" target="_parent">#[description]#</a></h4>
#(authorized)#::
<div class="urlactions">
Expand Down
9 changes: 5 additions & 4 deletions source/de/anomic/plasma/plasmaCrawlLURL.java
Expand Up @@ -62,6 +62,7 @@
import java.util.Iterator;
import java.util.LinkedList;

import de.anomic.data.htmlTools;
import de.anomic.http.httpc;
import de.anomic.http.httpc.response;
import de.anomic.index.indexRWIEntry;
Expand Down Expand Up @@ -628,14 +629,14 @@ public void run() {
pw.println(url);
}
if (format == 1) {
pw.println("<a href=\"" + url + "\">" + comp.title() + "</a><br>");
pw.println("<a href=\"" + url + "\">" + htmlTools.encodeUnicode2html(comp.title(), true, true) + "</a><br>");
}
if (format == 2) {
pw.println("<item>");
pw.println("<title>" + comp.title() + "</title>");
pw.println("<title>" + htmlTools.encodeUnicode2html(comp.title(), true, true) + "</title>");
pw.println("<link>" + yacyURL.escape(url) + "</link>");
if (comp.author().length() > 0) pw.println("<author>" + comp.author() + "</author>");
if (comp.tags().length() > 0) pw.println("<description>" + comp.tags() + "</description>");
if (comp.author().length() > 0) pw.println("<author>" + htmlTools.encodeUnicode2html(comp.author(), true, true) + "</author>");
if (comp.tags().length() > 0) pw.println("<description>" + htmlTools.encodeUnicode2html(comp.tags(), true, true) + "</description>");
pw.println("<pubDate>" + entry.moddate().toString() + "</pubDate>");
pw.println("<guid isPermaLink=\"false\">" + entry.hash() + "</guid>");
pw.println("</item>");
Expand Down
28 changes: 20 additions & 8 deletions source/de/anomic/plasma/plasmaSearchEvent.java
Expand Up @@ -57,7 +57,7 @@ public final class plasmaSearchEvent {
public static final String URLFETCH = "urlfetch";
public static final String NORMALIZING = "normalizing";

public static int workerThreadCount = 3;
public static int workerThreadCount = 8;
public static String lastEventID = "";
private static HashMap lastEvents = new HashMap(); // a cache for objects from this class: re-use old search requests
public static final long eventLifetime = 600000; // the time an event will stay in the cache, 10 Minutes
Expand Down Expand Up @@ -200,7 +200,7 @@ private plasmaSearchEvent(plasmaSearchQuery query,
// start worker threads to fetch urls and snippets
this.workerThreads = new resultWorker[workerThreadCount];
for (int i = 0; i < workerThreadCount; i++) {
this.workerThreads[i] = new resultWorker(i, process.getTargetTime() * 3);
this.workerThreads[i] = new resultWorker(i, query.maximumTime * 3);
this.workerThreads[i].start();
}
} else {
Expand Down Expand Up @@ -480,7 +480,7 @@ public static plasmaSearchEvent getEvent(plasmaSearchQuery query,
// start worker threads to fetch urls and snippets
event.workerThreads = new resultWorker[workerThreadCount];
for (int i = 0; i < workerThreadCount; i++) {
event.workerThreads[i] = event.deployWorker(i, 3 * event.process.getTargetTime());
event.workerThreads[i] = event.deployWorker(i, 3 * query.maximumTime);
}
}

Expand Down Expand Up @@ -514,9 +514,15 @@ public void run() {

// start fetching urls and snippets
indexURLEntry page;
while ((resultList.size() < query.neededResults() + query.displayResults()) &&
(System.currentTimeMillis() < this.timeout) &&
((page = rankedCache.bestURL(true)) != null)) {
while (System.currentTimeMillis() < this.timeout) {

// get next entry
page = rankedCache.bestURL(true);
if (page == null) {
// if we did not get another entry, sleep some time and try again
try {Thread.sleep(100);} catch (InterruptedException e1) {}
continue;
}
if (anyResultWith(page.hash())) continue;
if (anyFailureWith(page.hash())) continue;

Expand All @@ -527,6 +533,7 @@ public void run() {
if (resultEntry == null) continue; // the entry had some problems, cannot be used
urlRetrievalAllTime += resultEntry.dbRetrievalTime;
snippetComputationAllTime += resultEntry.snippetComputationTime;
//System.out.println("+++DEBUG-resultWorker+++ fetched " + resultEntry.urlstring());

// place the result to the result vector
synchronized (resultList) {
Expand All @@ -537,8 +544,9 @@ public void run() {
synchronized (rankedCache) {
rankedCache.addReferences(resultEntry);
}

System.out.println("DEBUG SNIPPET_LOADING: thread " + id + " got " + resultEntry.url());
//System.out.println("DEBUG SNIPPET_LOADING: thread " + id + " got " + resultEntry.url());

if (resultList.size() >= query.neededResults() + query.displayResults()) break; // we have enough
}
serverLog.logInfo("SEARCH", "resultWorker thread " + id + " terminated");
}
Expand All @@ -565,15 +573,18 @@ public ResultEntry oneResult(int item) {
long sleeptime = this.eventTime + (this.query.maximumTime / this.query.displayResults() * ((item % this.query.displayResults()) + 1)) - System.currentTimeMillis();
if ((anyWorkerAlive()) && (sleeptime > 0)) {
try {Thread.sleep(sleeptime);} catch (InterruptedException e) {}
//System.out.println("+++DEBUG-oneResult+++ (1) sleeping " + sleeptime);
}

// if there are less than 10 more results available, sleep some extra time to get a chance that the "common sense" ranking algorithm can work
if ((this.resultList.size() <= item + 10) && (anyWorkerAlive())) {
try {Thread.sleep(300);} catch (InterruptedException e) {}
//System.out.println("+++DEBUG-oneResult+++ (2) sleeping " + 300);
}
// then sleep until any result is available (that should not happen)
while ((this.resultList.size() <= item) && (anyWorkerAlive())) {
try {Thread.sleep(100);} catch (InterruptedException e) {}
//System.out.println("+++DEBUG-oneResult+++ (3) sleeping " + 100);
}

// finally, if there is something, return the result
Expand Down Expand Up @@ -602,6 +613,7 @@ public ArrayList completeResults(long waitingtime) {
long timeout = System.currentTimeMillis() + waitingtime;
while ((this.resultList.size() < query.neededResults()) && (anyWorkerAlive()) && (System.currentTimeMillis() < timeout)) {
try {Thread.sleep(200);} catch (InterruptedException e) {}
//System.out.println("+++DEBUG-completeResults+++ sleeping " + 200);
}
return this.resultList;
}
Expand Down
21 changes: 1 addition & 20 deletions source/de/anomic/server/serverProfiling.java
Expand Up @@ -31,25 +31,14 @@

public class serverProfiling implements Cloneable {

private static final long minimumTargetTime = 100;
private long targetTime;
private int targetCount;
private ArrayList yield;
private long timer;

private serverProfiling() {
targetTime = minimumTargetTime;
targetCount = 10;
public serverProfiling() {
yield = new ArrayList();
timer = 0;
}

public serverProfiling(long time, int count) {
this();
this.targetTime = time;
this.targetCount = count;
}

public static class Entry {
public String process;
public int count;
Expand All @@ -62,14 +51,6 @@ public Entry(String process, int count, long time) {
}
}

public int getTargetCount() {
return this.targetCount;
}

public long getTargetTime() {
return this.targetTime;
}

public void startTimer() {
this.timer = System.currentTimeMillis();
}
Expand Down

0 comments on commit 2fcd18a

Please sign in to comment.