Skip to content

Commit

Permalink
enhanced parallelization of local/global/remote crawling
Browse files Browse the repository at this point in the history
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@197 6c8d7289-2bf4-0310-a012-ef5d649a1542
  • Loading branch information
orbiter committed May 29, 2005
1 parent a05d738 commit 3d8a2ff
Show file tree
Hide file tree
Showing 5 changed files with 220 additions and 79 deletions.
21 changes: 13 additions & 8 deletions htroot/IndexCreate_p.java
Expand Up @@ -169,12 +169,17 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
if (post.containsKey("clearcrawlqueue")) {
String urlHash;
int c = 0;
while (switchboard.noticeURL.localStackSize() > 0) {
urlHash = switchboard.noticeURL.localPop().hash();
if (urlHash != null) {
switchboard.noticeURL.remove(urlHash);
c++;
}
while (switchboard.noticeURL.coreStackSize() > 0) {
urlHash = switchboard.noticeURL.corePop().hash();
if (urlHash != null) { switchboard.noticeURL.remove(urlHash); c++; }
}
while (switchboard.noticeURL.limitStackSize() > 0) {
urlHash = switchboard.noticeURL.limitPop().hash();
if (urlHash != null) { switchboard.noticeURL.remove(urlHash); c++; }
}
while (switchboard.noticeURL.remoteStackSize() > 0) {
urlHash = switchboard.noticeURL.remotePop().hash();
if (urlHash != null) { switchboard.noticeURL.remove(urlHash); c++; }
}
prop.put("info", 3);//crawling queue cleared
prop.put("info_numEntries", c);
Expand Down Expand Up @@ -375,12 +380,12 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
prop.put("loader-set_list", i );
}

int localStackSize = switchboard.noticeURL.localStackSize();
int localStackSize = switchboard.noticeURL.coreStackSize();
if (localStackSize == 0) {
prop.put("crawler-queue", 0);
} else {
prop.put("crawler-queue", 1);
plasmaCrawlNURL.entry[] crawlerList = switchboard.noticeURL.localTop(20);
plasmaCrawlNURL.entry[] crawlerList = switchboard.noticeURL.coreTop(20);
prop.put("crawler-queue_num", localStackSize);//num Entries
prop.put("crawler-queue_show-num", crawlerList.length); //showin sjow-num most recent
plasmaCrawlNURL.entry urle;
Expand Down
68 changes: 50 additions & 18 deletions source/de/anomic/plasma/plasmaCrawlNURL.java
Expand Up @@ -61,8 +61,13 @@

public class plasmaCrawlNURL extends plasmaURL {

public static final int STACK_TYPE_NULL = 0; // do not stack
public static final int STACK_TYPE_CORE = 1; // put on local stack
public static final int STACK_TYPE_LIMIT = 2; // put on global stack
public static final int STACK_TYPE_OVERHANG = 3; // put on overhang stack; links that are known but not crawled
public static final int STACK_TYPE_REMOTE = 4; // put on remote-triggered stack

private kelondroStack localStack; // links found by crawling to depth-1
private kelondroStack coreStack; // links found by crawling to depth-1
private kelondroStack limitStack; // links found by crawling at target depth
private kelondroStack overhangStack; // links found by crawling at depth+1
private kelondroStack remoteStack; // links from remote crawl orders
Expand Down Expand Up @@ -101,9 +106,21 @@ public plasmaCrawlNURL(File cacheStacksPath, int bufferkb) throws IOException {

File localCrawlStack = new File(cacheStacksPath, "urlNoticeLocal0.stack");
if (localCrawlStack.exists()) {
localStack = new kelondroStack(localCrawlStack, 0);
coreStack = new kelondroStack(localCrawlStack, 0);
} else {
localStack = new kelondroStack(localCrawlStack, 0, new int[] {plasmaURL.urlHashLength});
coreStack = new kelondroStack(localCrawlStack, 0, new int[] {plasmaURL.urlHashLength});
}
File limitCrawlStack = new File(cacheStacksPath, "urlNoticeLimit0.stack");
if (limitCrawlStack.exists()) {
limitStack = new kelondroStack(limitCrawlStack, 0);
} else {
limitStack = new kelondroStack(limitCrawlStack, 0, new int[] {plasmaURL.urlHashLength});
}
File overhangCrawlStack = new File(cacheStacksPath, "urlNoticeOverhang0.stack");
if (overhangCrawlStack.exists()) {
overhangStack = new kelondroStack(overhangCrawlStack, 0);
} else {
overhangStack = new kelondroStack(overhangCrawlStack, 0, new int[] {plasmaURL.urlHashLength});
}
File globalCrawlStack = new File(cacheStacksPath, "urlNoticeRemote0.stack");
if (globalCrawlStack.exists()) {
Expand All @@ -114,7 +131,7 @@ public plasmaCrawlNURL(File cacheStacksPath, int bufferkb) throws IOException {

// init stack Index
stackIndex = new HashSet();
Iterator i = localStack.iterator();
Iterator i = coreStack.iterator();
while (i.hasNext()) stackIndex.add(new String(((kelondroRecords.Node) i.next()).getKey()));
i = remoteStack.iterator();
while (i.hasNext()) stackIndex.add(new String(((kelondroRecords.Node) i.next()).getKey()));
Expand All @@ -134,11 +151,22 @@ private static String normalizeHandle(int h) {
}

public int stackSize() {
return localStack.size() + remoteStack.size();
// this does not count the overhang stack size
return coreStack.size() + limitStack.size() + remoteStack.size();
}
public int localStackSize() {
return localStack.size();

public int coreStackSize() {
return coreStack.size();
}

public int limitStackSize() {
return limitStack.size();
}

public int overhangStackSize() {
return overhangStack.size();
}

public int remoteStackSize() {
return remoteStack.size();
}
Expand All @@ -159,21 +187,24 @@ public synchronized entry newEntry(String initiator, URL url, Date loaddate, Str
// 3 = on overhang stack
// 4 = on remote stack
try {
if (stackMode == 1) {
localStack.push(new byte[][] {e.hash.getBytes()});
stackIndex.add(new String(e.hash.getBytes()));
}
if (stackMode == 4) {
remoteStack.push(new byte[][] {e.hash.getBytes()});
stackIndex.add(new String(e.hash.getBytes()));
}
if (stackMode == 1) coreStack.push(new byte[][] {e.hash.getBytes()});
if (stackMode == 2) limitStack.push(new byte[][] {e.hash.getBytes()});
if (stackMode == 3) overhangStack.push(new byte[][] {e.hash.getBytes()});
if (stackMode == 4) remoteStack.push(new byte[][] {e.hash.getBytes()});
stackIndex.add(new String(e.hash.getBytes()));
} catch (IOException er) {
}
return e;
}

public entry localPop() { return pop(localStack); }
public entry[] localTop(int count) { return top(localStack, count); }
public entry corePop() { return pop(coreStack); }
public entry[] coreTop(int count) { return top(coreStack, count); }

public entry limitPop() { return pop(limitStack); }
public entry[] limitTop(int count) { return top(limitStack, count); }

public entry overhangPop() { return pop(overhangStack); }
public entry[] overhangTop(int count) { return top(overhangStack, count); }

public entry remotePop() { return pop(remoteStack); }
public entry[] remoteTop(int count) { return top(remoteStack, count); }
Expand Down Expand Up @@ -344,6 +375,7 @@ public String profileHandle() {
}
}

/*
public class kenum implements Enumeration {
// enumerates entry elements
kelondroTree.rowIterator i;
Expand All @@ -362,5 +394,5 @@ public Enumeration elements(boolean up, boolean rotating) throws IOException {
// enumerates entry elements
return new kenum(up, rotating);
}

*/
}

0 comments on commit 3d8a2ff

Please sign in to comment.