Skip to content

Commit

Permalink
when profiles are cleaned, there shall be first a callback showing which
Browse files Browse the repository at this point in the history
profiles are cleaned. This shall enable a profile-termination-driven
postprocessing. To do this, index writings must carry the profile key
which will be implemented in another (next) step.
  • Loading branch information
Orbiter committed Sep 25, 2013
1 parent 0013d0d commit 14442ef
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 11 deletions.
18 changes: 10 additions & 8 deletions source/net/yacy/crawler/CrawlSwitchboard.java
Expand Up @@ -534,7 +534,7 @@ public boolean clear() throws InterruptedException {
return hasDoneSomething;
}

public int cleanFinishesProfiles(CrawlQueues crawlQueues) {
public Set<String> getFinishesProfiles(CrawlQueues crawlQueues) {
// clear the counter cache
this.profilesActiveCrawlsCounter.clear();

Expand All @@ -547,7 +547,7 @@ public int cleanFinishesProfiles(CrawlQueues crawlQueues) {
deletionCandidate.add(ASCII.String(handle));
}
}
if (deletionCandidate.size() == 0) return 0;
if (deletionCandidate.size() == 0) return new HashSet<String>(0);

// iterate through all the queues and see if one of these handles appear there
// this is a time-consuming process, set a time-out
Expand All @@ -564,15 +564,18 @@ public int cleanFinishesProfiles(CrawlQueues crawlQueues) {
if (us == null) {us = new RowHandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0); this.profilesActiveCrawlsCounter.put(handle, us);}
if (us.size() < 100) us.put(r.url().hash()); // store the hash, but not too many
deletionCandidate.remove(handle);
if (deletionCandidate.size() == 0) return 0;
if (System.currentTimeMillis() > timeout) return 0; // give up; this is too large
if (deletionCandidate.size() == 0) return new HashSet<String>(0);
if (System.currentTimeMillis() > timeout) return new HashSet<String>(0); // give up; this is too large
}
if (deletionCandidate.size() == 0) return 0;
if (deletionCandidate.size() == 0) return new HashSet<String>(0);
}
} catch (final Throwable e) {
return 0;
return new HashSet<String>(0);
}

return deletionCandidate;
}

public void cleanProfiles(Set<String> deletionCandidate) {
// all entries that are left are candidates for deletion; do that now
for (String h: deletionCandidate) {
byte[] handle = ASCII.getBytes(h);
Expand All @@ -582,7 +585,6 @@ public int cleanFinishesProfiles(CrawlQueues crawlQueues) {
this.removeActive(handle);
}
}
return deletionCandidate.size();
}

public synchronized void close() {
Expand Down
12 changes: 9 additions & 3 deletions source/net/yacy/search/Switchboard.java
Expand Up @@ -2130,9 +2130,15 @@ && getConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, "").isEmpty() ) {

// clean up profiles
checkInterruption();
//cleanProfiles();
int cleanup = this.crawlJobIsPaused(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL) ? 0 : this.crawler.cleanFinishesProfiles(this.crawlQueues);
if (cleanup > 0) log.info("cleanup removed " + cleanup + " crawl profiles");

if (!this.crawlJobIsPaused(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL)) {
Set<String> deletionCandidates = this.crawler.getFinishesProfiles(this.crawlQueues);
int cleanup = deletionCandidates.size();
if (cleanup > 0) {
this.crawler.cleanProfiles(deletionCandidates);
log.info("cleanup removed " + cleanup + " crawl profiles");
}
}

// clean up news
checkInterruption();
Expand Down

0 comments on commit 14442ef

Please sign in to comment.