Skip to content

Commit

Permalink
- Cache known URLs during indexReceive to avoid getting blocked durin…
Browse files Browse the repository at this point in the history
…g loadedURL.exists() whenever possible

- Small logging updates



git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2359 6c8d7289-2bf4-0310-a012-ef5d649a1542
  • Loading branch information
hermens committed Aug 7, 2006
1 parent c09f734 commit d56f064
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 21 deletions.
45 changes: 25 additions & 20 deletions htroot/yacy/transferRWI.java
Expand Up @@ -136,8 +136,11 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
indexEntry iEntry;
int wordhashesSize = v.size();
final HashSet unknownURL = new HashSet();
final HashSet knownURL = new HashSet();
String[] wordhashes = new String[v.size()];
int received = 0;
int blocked = 0;
int receivedURL = 0;
for (int i = 0; i < wordhashesSize; i++) {
serverCore.checkInterruption();

Expand All @@ -147,29 +150,31 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
wordHash = estring.substring(0, p);
wordhashes[received] = wordHash;
iEntry = new indexURLEntry(estring.substring(p));
sb.wordIndex.addEntry(wordHash, iEntry, System.currentTimeMillis(), true);
serverCore.checkInterruption();

urlHash = iEntry.urlHash();
try {
if ((!(unknownURL.contains(urlHash))) &&
(!(sb.urlPool.loadedURL.exists(urlHash)))) {
if ((blockBlacklist) && (plasmaSwitchboard.urlBlacklist.hashInBlacklistedCache(urlHash))) {
int deleted = sb.wordIndex.tryRemoveURLs(urlHash);
yacyCore.log.logFine("transferRWI: blocked blacklisted URLHash '" + urlHash + "' from peer " + otherPeerName + "; deleted " + deleted + " URL entries from RWIs");
//TODO: set to logFine if it works.
}
else {
unknownURL.add(urlHash);
if ((blockBlacklist) && (plasmaSwitchboard.urlBlacklist.hashInBlacklistedCache(urlHash))) {
//int deleted = sb.wordIndex.tryRemoveURLs(urlHash);
yacyCore.log.logFine("transferRWI: blocked blacklisted URLHash '" + urlHash + "' from peer " + otherPeerName + "; deleted 1 URL entries from RWIs");
blocked++;
} else {
sb.wordIndex.addEntry(wordHash, iEntry, System.currentTimeMillis(), true);
serverCore.checkInterruption();

if (!(knownURL.contains(urlHash)||unknownURL.contains(urlHash))) {
try {
if (sb.urlPool.loadedURL.exists(urlHash)) {
knownURL.add(urlHash);
} else {
unknownURL.add(urlHash);
}
} catch (Exception ex) {
sb.getLog().logWarning(
"transferRWI: DB-Error while trying to determine if URL with hash '" +
urlHash + "' is known.", ex);
}
receivedURL++;
}
} catch (Exception ex) {
sb.getLog().logWarning(
"transferRWI: DB-Error while trying to determine if URL with hash '" +
urlHash + "' is known.", ex);
unknownURL.add(urlHash);
received++;
}
received++;
}
}
yacyCore.seedDB.mySeed.incRI(received);
Expand All @@ -185,7 +190,7 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
sb.getLog().logInfo("Received 0 RWIs from " + otherPeerName + ", processed in " + (System.currentTimeMillis() - startProcess) + " milliseconds, requesting " + unknownURL.size() + " URLs");
} else {
final double avdist = (yacyDHTAction.dhtDistance(yacyCore.seedDB.mySeed.hash, wordhashes[0]) + yacyDHTAction.dhtDistance(yacyCore.seedDB.mySeed.hash, wordhashes[received - 1])) / 2.0;
sb.getLog().logInfo("Received " + received + " Words [" + wordhashes[0] + " .. " + wordhashes[received - 1] + "]/" + avdist + " from " + otherPeerName + ", processed in " + (System.currentTimeMillis() - startProcess) + " milliseconds, requesting " + unknownURL.size() + " URLs");
sb.getLog().logInfo("Received " + received + " Entries " + wordc + " Words [" + wordhashes[0] + " .. " + wordhashes[received - 1] + "]/" + avdist + " from " + otherPeerName + ", processed in " + (System.currentTimeMillis() - startProcess) + " milliseconds, requesting " + unknownURL.size() + "/" + receivedURL + " URLs, blocked " + blocked + " RWIs");
}
result = "ok";

Expand Down
4 changes: 3 additions & 1 deletion htroot/yacy/transferURL.java
Expand Up @@ -83,6 +83,7 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve

if (granted) {
int received = 0;
int blocked = 0;
final int sizeBefore = sb.urlPool.loadedURL.size();
// read the urls from the other properties and store
String urls;
Expand All @@ -100,6 +101,7 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
int deleted = sb.wordIndex.tryRemoveURLs(lEntry.hash());
yacyCore.log.logFine("transferURL: blocked blacklisted URL '" + lEntry.url() + "' from peer " + otherPeerName + "; deleted " + deleted + " URL entries from RWIs");
lEntry = null;
blocked++;
} else {
lEntry.store();
sb.urlPool.loadedURL.stackEntry(lEntry, iam, iam, 3);
Expand All @@ -121,7 +123,7 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
// return rewrite properties
final int more = sb.urlPool.loadedURL.size() - sizeBefore;
doublevalues = Integer.toString(received - more);
sb.getLog().logInfo("Received " + received + " URLs from peer " + otherPeerName + " in " + (System.currentTimeMillis() - start) + " ms.");
sb.getLog().logInfo("Received " + received + " URLs from peer " + otherPeerName + " in " + (System.currentTimeMillis() - start) + " ms, Blocked " + blocked + " URLs");
if ((received - more) > 0) sb.getLog().logSevere("Received " + doublevalues + " double URLs from peer " + otherPeerName);
result = "ok";
} else {
Expand Down
2 changes: 2 additions & 0 deletions source/de/anomic/plasma/plasmaDHTTransfer.java
Expand Up @@ -160,7 +160,9 @@ public void uploadIndex() throws InterruptedException {
this.payloadSize = ((Integer)result.get("payloadSize")).intValue();

this.log.logInfo("Index transfer of " + this.dhtChunk.indexCount() +
" entries " + this.dhtChunk.containerSize() +
" words [" + this.dhtChunk.firstContainer().getWordHash() + " .. " + this.dhtChunk.lastContainer().getWordHash() + "]" +
" and " + this.dhtChunk.urlCacheMap().size() + " URLs" +
" to peer " + this.seed.getName() + ":" + this.seed.hash +
" in " + (this.transferTime / 1000) +
" seconds successful (" + (1000 * this.dhtChunk.indexCount() / (this.transferTime + 1)) +
Expand Down

0 comments on commit d56f064

Please sign in to comment.