Skip to content

Commit

Permalink
- refactoring of CrawlStacker (to prepare it for new multi-Threading …
Browse files Browse the repository at this point in the history
…to remove DNS lookup bottleneck)

- fix of shallBeOwnWord target computation heuristic


git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5392 6c8d7289-2bf4-0310-a012-ef5d649a1542
  • Loading branch information
orbiter committed Dec 15, 2008
1 parent b1e211b commit 2802138
Show file tree
Hide file tree
Showing 12 changed files with 208 additions and 276 deletions.
2 changes: 1 addition & 1 deletion htroot/ConfigAccounts_p.java
Expand Up @@ -62,7 +62,7 @@ public static serverObjects respond(final httpRequestHeader header, final server
}

if (localhostAccess) {
if (sb.acceptLocalURLs) {
if (sb.crawlStacker.acceptLocalURLs()) {
// in this case it is not allowed to use a localhostAccess option
prop.put("commitIntranetWarning", 1);
localhostAccess = false;
Expand Down
6 changes: 2 additions & 4 deletions htroot/IndexTransfer_p.java
Expand Up @@ -105,14 +105,12 @@ public static serverObjects respond(final httpRequestHeader header, final server
} else {
if (!prop.containsKey("running_status")) prop.put("running_status","Not running");
}




//List known hosts
yacySeed seed;
int hc = 0;
if ((sb.webIndex.seedDB != null) && (sb.webIndex.seedDB.sizeConnected() > 0)) {
final Iterator<yacySeed> e = yacyPeerSelection.getAcceptRemoteIndexSeeds(sb.webIndex.seedDB, "AAAAAAAAAAAA", sb.webIndex.seedDB.sizeConnected());
final Iterator<yacySeed> e = yacyPeerSelection.getAcceptRemoteIndexSeeds(sb.webIndex.seedDB, null, sb.webIndex.seedDB.sizeConnected(), false);
final TreeMap<String, String> hostList = new TreeMap<String, String>();
while (e.hasNext()) {
seed = e.next();
Expand Down
2 changes: 1 addition & 1 deletion htroot/rct_p.java
Expand Up @@ -72,7 +72,7 @@ public static serverObjects respond(final httpRequestHeader header, final server
loaddate = new Date();
}
final yacyURL referrer = null; // referrer needed!
final String urlRejectReason = sb.acceptURL(url);
final String urlRejectReason = sb.crawlStacker.urlInAcceptedDomain(url);
if (urlRejectReason == null) {
// stack url
if (sb.getLog().isFinest()) sb.getLog().logFinest("crawlOrder: stack: url='" + url + "'");
Expand Down
2 changes: 1 addition & 1 deletion htroot/yacy/crawlReceipt.java
Expand Up @@ -127,7 +127,7 @@ public static serverObjects respond(final httpRequestHeader header, final server
}

// check if the entry is in our network domain
final String urlRejectReason = sb.acceptURL(comp.url());
final String urlRejectReason = sb.crawlStacker.urlInAcceptedDomain(comp.url());
if (urlRejectReason != null) {
log.logWarning("crawlReceipt: RECEIVED wrong RECEIPT (" + urlRejectReason + ") for hash " + entry.hash() + " from peer " + iam + "\n\tURL properties: "+ propStr);
prop.put("delay", "9999");
Expand Down
2 changes: 1 addition & 1 deletion htroot/yacy/transferURL.java
Expand Up @@ -128,7 +128,7 @@ public static serverObjects respond(final httpRequestHeader header, final server
}

// check if the entry is in our network domain
final String urlRejectReason = sb.acceptURL(comp.url());
final String urlRejectReason = sb.crawlStacker.urlInAcceptedDomain(comp.url());
if (urlRejectReason != null) {
if (yacyCore.log.isFine()) yacyCore.log.logFine("transferURL: blocked URL '" + comp.url() + "' (" + urlRejectReason + ") from peer " + otherPeerName);
lEntry = null;
Expand Down
2 changes: 1 addition & 1 deletion source/de/anomic/crawler/CrawlQueues.java
Expand Up @@ -393,7 +393,7 @@ public boolean remoteCrawlLoaderJob() {
} catch (final ParseException e) {
loaddate = new Date();
}
final String urlRejectReason = sb.acceptURL(url);
final String urlRejectReason = sb.crawlStacker.urlInAcceptedDomain(url);
if (urlRejectReason == null) {
// stack url
if (sb.getLog().isFinest()) sb.getLog().logFinest("crawlOrder: stack: url='" + url + "'");
Expand Down

0 comments on commit 2802138

Please sign in to comment.