Skip to content
Permalink
Browse files

Fixed removal of URLs from the delegatedURL remote crawl stack

URLs were removed from the stack using their hash as a bytes array,
whereas the hash is stored in the stack as String instance.
  • Loading branch information...
luccioman committed Jul 5, 2018
1 parent 2bdd71d commit c726154a59994137053d3c557a73e18315dcbbfc
Showing with 8 additions and 4 deletions.
  1. +2 −2 htroot/yacy/crawlReceipt.java
  2. +6 −2 source/net/yacy/crawler/data/CrawlQueues.java
@@ -147,7 +147,7 @@ public static serverObjects respond(@SuppressWarnings("unused") final RequestHea
// put new entry into database
sb.index.fulltext().putMetadata(entry);
ResultURLs.stack(ASCII.String(entry.url().hash()), entry.url().getHost(), youare.getBytes(), iam.getBytes(), EventOrigin.REMOTE_RECEIPTS);
sb.crawlQueues.delegatedURL.remove(entry.hash()); // the delegated work has been done
sb.crawlQueues.delegatedURL.remove(ASCII.String(entry.hash())); // the delegated work has been done
if (log.isInfo()) log.info("crawlReceipt: RECEIVED RECEIPT from " + otherPeerName + " for URL " + ASCII.String(entry.hash()) + ":" + entry.url().toNormalform(false));

// ready for more
@@ -160,7 +160,7 @@ public static serverObjects respond(@SuppressWarnings("unused") final RequestHea
}

if (sb.crawlQueues.delegatedURL != null) { // the delegated work is transformed into an error case
sb.crawlQueues.delegatedURL.remove(entry.hash());
sb.crawlQueues.delegatedURL.remove(ASCII.String(entry.hash()));
sb.crawlQueues.errorURL.push(entry.url(), 997, null, FailCategory.FINAL_LOAD_CONTEXT, result + ":" + reason, -1);
}
//switchboard.noticeURL.remove(receivedUrlhash);
@@ -82,6 +82,8 @@

public NoticedURL noticeURL;
public ErrorCache errorURL;

/** URLs pulled by remote peers in order to crawl them for us */
public Map<String, DigestURL> delegatedURL;

public CrawlQueues(final Switchboard sb, final File queuePath) {
@@ -107,7 +109,7 @@ public void initRemoteCrawlQueues () {
if (this.remoteCrawlProviderHashes == null) this.remoteCrawlProviderHashes = new ArrayList<String>();
if (this.delegatedURL == null) {
this.delegatedURL = new ConcurrentHashMap<String, DigestURL>();
log.config("Finishted Startup of Crawling Management");
log.config("Finished Startup of Crawling Management");
}
}
/**
@@ -205,7 +207,9 @@ public int hostcount(final String host) {
public void removeURL(final byte[] hash) {
assert hash != null && hash.length == 12;
this.noticeURL.removeByURLHash(hash);
if (this.delegatedURL != null) this.delegatedURL.remove(hash);
if (this.delegatedURL != null) {
this.delegatedURL.remove(ASCII.String(hash));
}
}

public int removeHosts(final Set<String> hosthashes) {

0 comments on commit c726154

Please sign in to comment.
You can’t perform that action at this time.