Skip to content

Commit

Permalink
possible fix for lost crawl profile handles: clean-up job did wrong m…
Browse files Browse the repository at this point in the history
…easurement to see if crawl is still running.

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6465 6c8d7289-2bf4-0310-a012-ef5d649a1542
  • Loading branch information
orbiter committed Nov 6, 2009
1 parent cd6745b commit 4c99d46
Show file tree
Hide file tree
Showing 9 changed files with 24 additions and 22 deletions.
2 changes: 1 addition & 1 deletion htroot/IndexCreateLoaderQueue_p.java
Expand Up @@ -42,7 +42,7 @@ public static serverObjects respond(final RequestHeader header, final serverObje
final serverObjects prop = new serverObjects();


if (sb.crawlQueues.size() == 0) {
if (sb.crawlQueues.workerSize() == 0) {
prop.put("loader-set", "0");
} else {
prop.put("loader-set", "1");
Expand Down
2 changes: 1 addition & 1 deletion htroot/PerformanceQueues_p.java
Expand Up @@ -318,7 +318,7 @@ else if (threadName.equals(SwitchboardConstants.CRAWLJOB_REMOTE_CRAWL_LOADER)
// table thread pool settings
prop.put("pool_0_name","Crawler Pool");
prop.put("pool_0_maxActive", sb.getConfigLong("crawler.MaxActiveThreads", 0));
prop.put("pool_0_numActive",sb.crawlQueues.size());
prop.put("pool_0_numActive",sb.crawlQueues.workerSize());

final WorkflowThread httpd = sb.getThread("10_httpd");
prop.put("pool_1_name", "httpd Session Pool");
Expand Down
2 changes: 1 addition & 1 deletion htroot/Status.java
Expand Up @@ -286,7 +286,7 @@ else if (jobType.equals("remoteTriggeredCrawl"))
prop.putNum("connectionsMax", httpd.getMaxSessionCount());

// Queue information
final int loaderJobCount = sb.crawlQueues.size();
final int loaderJobCount = sb.crawlQueues.workerSize();
final int loaderMaxCount = Integer.parseInt(sb.getConfig(SwitchboardConstants.CRAWLER_THREADS_ACTIVE_MAX, "10"));
final int loaderPercent = (loaderMaxCount==0)?0:loaderJobCount*100/loaderMaxCount;
prop.putNum("loaderQueueSize", loaderJobCount);
Expand Down
2 changes: 1 addition & 1 deletion htroot/WatchCrawler_p.java
Expand Up @@ -108,7 +108,7 @@ public static serverObjects respond(final RequestHeader header, final serverObje
if ((post.containsKey("autoforward")) &&
(sb.crawlQueues.coreCrawlJobSize() == 0) &&
(sb.crawlQueues.remoteTriggeredCrawlJobSize() == 0) &&
(sb.getActiveQueueSize() < 30)) {
(sb.getIndexingProcessorsQueueSize() < 30)) {
prop.put("forwardToCrawlStart", "1");
}

Expand Down
4 changes: 2 additions & 2 deletions htroot/api/queues_p.java
Expand Up @@ -48,9 +48,9 @@ public static serverObjects respond(final RequestHeader header, final serverObje
prop.putNum("rwipublictextSize", segment.termIndex().sizesMax());

// loader queue
prop.put("loaderSize", Integer.toString(sb.crawlQueues.size()));
prop.put("loaderSize", Integer.toString(sb.crawlQueues.workerSize()));
prop.put("loaderMax", sb.getConfigLong(SwitchboardConstants.CRAWLER_THREADS_ACTIVE_MAX, 10));
if (sb.crawlQueues.size() == 0) {
if (sb.crawlQueues.workerSize() == 0) {
prop.put("list-loader", "0");
} else {
final Request[] w = sb.crawlQueues.activeWorkerEntries();
Expand Down
1 change: 0 additions & 1 deletion htroot/imagetest.java
Expand Up @@ -24,7 +24,6 @@
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

import net.yacy.kelondro.logging.Log;
import net.yacy.visualization.PrintTool;
import net.yacy.visualization.RasterPlotter;
import de.anomic.http.server.RequestHeader;
Expand Down
9 changes: 5 additions & 4 deletions source/de/anomic/crawler/CrawlQueues.java
Expand Up @@ -352,13 +352,13 @@ public boolean remoteCrawlLoaderJob() {
return false;
}

if (this.size() >= sb.getConfigLong(SwitchboardConstants.CRAWLER_THREADS_ACTIVE_MAX, 10)) {
if (this.workers.size() >= sb.getConfigLong(SwitchboardConstants.CRAWLER_THREADS_ACTIVE_MAX, 10)) {
// try a cleanup
cleanup();
}
// check again
if (this.size() >= sb.getConfigLong(SwitchboardConstants.CRAWLER_THREADS_ACTIVE_MAX, 10)) {
if (this.log.isFine()) log.logFine("remoteCrawlLoaderJob: too many processes in loader queue, dismissed (" + "cacheLoader=" + this.size() + "), httpClients = " + Client.connectionCount());
if (this.workers.size() >= sb.getConfigLong(SwitchboardConstants.CRAWLER_THREADS_ACTIVE_MAX, 10)) {
if (this.log.isFine()) log.logFine("remoteCrawlLoaderJob: too many processes in loader queue, dismissed (" + "cacheLoader=" + this.workers.size() + "), httpClients = " + Client.connectionCount());
return false;
}

Expand Down Expand Up @@ -514,7 +514,8 @@ public boolean remoteTriggeredCrawlJob() {
}
}

public int size() {

public int workerSize() {
return workers.size();
}

Expand Down
2 changes: 0 additions & 2 deletions source/de/anomic/net/whois.java
Expand Up @@ -26,8 +26,6 @@
import java.io.InputStreamReader;
import java.util.Properties;

import net.yacy.kelondro.logging.Log;

public class whois {

public static Properties Whois(final String dom) {
Expand Down
22 changes: 13 additions & 9 deletions source/de/anomic/search/Switchboard.java
Expand Up @@ -661,7 +661,7 @@ public Switchboard(final File rootPath, final String initPath, final String conf
log.logConfig("Finished Switchboard Initialization");
}

public int getActiveQueueSize() {
public int getIndexingProcessorsQueueSize() {
return
this.indexingDocumentProcessor.queueSize() +
this.indexingCondensementProcessor.queueSize() +
Expand Down Expand Up @@ -1066,16 +1066,16 @@ private static String ppRamString(long bytes) {
}

/**
* {@link CrawlProfile Crawl Profiles} are saved independantly from the queues themselves
* {@link CrawlProfile Crawl Profiles} are saved independently from the queues themselves
* and therefore have to be cleaned up from time to time. This method only performs the clean-up
* if - and only if - the {@link IndexingStack switchboard},
* {@link LoaderDispatcher loader} and {@link plasmaCrawlNURL local crawl} queues are all empty.
* <p>
* Then it iterates through all existing {@link CrawlProfile crawl profiles} and removes
* all profiles which are not hardcoded.
* all profiles which are not hard-coded.
* </p>
* <p>
* <i>If this method encounters DB-failures, the profile DB will be resetted and</i>
* <i>If this method encounters DB-failures, the profile DB will be reseted and</i>
* <code>true</code><i> will be returned</i>
* </p>
* @see #CRAWL_PROFILE_PROXY hardcoded
Expand All @@ -1088,9 +1088,13 @@ private static String ppRamString(long bytes) {
* shutdown procedure
*/
public boolean cleanProfiles() throws InterruptedException {
if ((getActiveQueueSize() > 0) || (crawlQueues.size() > 0) ||
if (getIndexingProcessorsQueueSize() > 0 ||
crawlQueues.workerSize() > 0 ||
crawlQueues.coreCrawlJobSize() > 0 ||
crawlQueues.limitCrawlJobSize() > 0 ||
crawlQueues.remoteTriggeredCrawlJobSize() > 0 ||
(crawlStacker != null && crawlStacker.size() > 0) ||
(crawlQueues.noticeURL.notEmpty()))
crawlQueues.noticeURL.notEmpty())
return false;
return this.crawler.cleanProfiles();
}
Expand Down Expand Up @@ -1916,10 +1920,10 @@ public String dhtShallTransfer(String segment) {
return "no DHT distribution: not enough words - wordIndex.size() = " + indexSegment.termIndex().sizesMax();
}
if ((getConfig(SwitchboardConstants.INDEX_DIST_ALLOW_WHILE_CRAWLING, "false").equalsIgnoreCase("false")) && (crawlQueues.noticeURL.notEmptyLocal())) {
return "no DHT distribution: crawl in progress: noticeURL.stackSize() = " + crawlQueues.noticeURL.size() + ", sbQueue.size() = " + getActiveQueueSize();
return "no DHT distribution: crawl in progress: noticeURL.stackSize() = " + crawlQueues.noticeURL.size() + ", sbQueue.size() = " + getIndexingProcessorsQueueSize();
}
if ((getConfig(SwitchboardConstants.INDEX_DIST_ALLOW_WHILE_INDEXING, "false").equalsIgnoreCase("false")) && (getActiveQueueSize() > 1)) {
return "no DHT distribution: indexing in progress: noticeURL.stackSize() = " + crawlQueues.noticeURL.size() + ", sbQueue.size() = " + getActiveQueueSize();
if ((getConfig(SwitchboardConstants.INDEX_DIST_ALLOW_WHILE_INDEXING, "false").equalsIgnoreCase("false")) && (getIndexingProcessorsQueueSize() > 1)) {
return "no DHT distribution: indexing in progress: noticeURL.stackSize() = " + crawlQueues.noticeURL.size() + ", sbQueue.size() = " + getIndexingProcessorsQueueSize();
}
return null; // this means; yes, please do dht transfer
}
Expand Down

0 comments on commit 4c99d46

Please sign in to comment.