Skip to content

Commit

Permalink
code clean-up
Browse files Browse the repository at this point in the history
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@401 6c8d7289-2bf4-0310-a012-ef5d649a1542
  • Loading branch information
orbiter committed Jul 12, 2005
1 parent 00f63ea commit 19dbed7
Show file tree
Hide file tree
Showing 16 changed files with 439 additions and 428 deletions.
1 change: 1 addition & 0 deletions htroot/IndexControl_p.java
Expand Up @@ -108,6 +108,7 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
if (post.containsKey("setIndexDistribute")) {
boolean allowDistributeIndex = ((String) post.get("indexDistribute", "")).equals("on");
switchboard.setConfig("allowDistributeIndex", (allowDistributeIndex) ? "true" : "false");
if (allowDistributeIndex) switchboard.indexDistribution.enable(); else switchboard.indexDistribution.disable();
}

if (post.containsKey("setIndexReceive")) {
Expand Down
4 changes: 2 additions & 2 deletions htroot/IndexCreateIndexingQueue_p.java
Expand Up @@ -123,11 +123,11 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
}
dark = true;
String url, initiatorHash, executorHash;
plasmaCrawlEURL.entry entry;
plasmaCrawlEURL.Entry entry;
yacySeed initiatorSeed, executorSeed;
int j=0;
for (i = switchboard.urlPool.errorURL.stackSize() - 1; i >= (switchboard.urlPool.errorURL.stackSize() - showRejectedCount); i--) {
entry = (plasmaCrawlEURL.entry) switchboard.urlPool.errorURL.getStack(i);
entry = (plasmaCrawlEURL.Entry) switchboard.urlPool.errorURL.getStack(i);
initiatorHash = entry.initiator();
executorHash = entry.executor();
url = entry.url().toString();
Expand Down
4 changes: 2 additions & 2 deletions htroot/IndexCreateWWWGlobalQueue_p.java
Expand Up @@ -85,10 +85,10 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
prop.put("crawler-queue", 0);
} else {
prop.put("crawler-queue", 1);
plasmaCrawlNURL.entry[] crawlerList = switchboard.urlPool.noticeURL.top(plasmaCrawlNURL.STACK_TYPE_LIMIT, 100);
plasmaCrawlNURL.Entry[] crawlerList = switchboard.urlPool.noticeURL.top(plasmaCrawlNURL.STACK_TYPE_LIMIT, 100);
prop.put("crawler-queue_num", stackSize);//num Entries
prop.put("crawler-queue_show-num", crawlerList.length); //showin sjow-num most recent
plasmaCrawlNURL.entry urle;
plasmaCrawlNURL.Entry urle;
boolean dark = true;
yacySeed initiator;
int i;
Expand Down
4 changes: 2 additions & 2 deletions htroot/IndexCreateWWWLocalQueue_p.java
Expand Up @@ -85,10 +85,10 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
prop.put("crawler-queue", 0);
} else {
prop.put("crawler-queue", 1);
plasmaCrawlNURL.entry[] crawlerList = switchboard.urlPool.noticeURL.top(plasmaCrawlNURL.STACK_TYPE_CORE, 100);
plasmaCrawlNURL.Entry[] crawlerList = switchboard.urlPool.noticeURL.top(plasmaCrawlNURL.STACK_TYPE_CORE, 100);
prop.put("crawler-queue_num", stackSize);//num Entries
prop.put("crawler-queue_show-num", crawlerList.length); //showin sjow-num most recent
plasmaCrawlNURL.entry urle;
plasmaCrawlNURL.Entry urle;
boolean dark = true;
yacySeed initiator;
int i;
Expand Down
2 changes: 1 addition & 1 deletion htroot/yacy/crawlReceipt.java
Expand Up @@ -125,7 +125,7 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
// ready for more
prop.put("delay", "10");
} else {
plasmaCrawlNURL.entry en = switchboard.urlPool.noticeURL.getEntry(urlhash);
plasmaCrawlNURL.Entry en = switchboard.urlPool.noticeURL.getEntry(urlhash);
if (en != null) {
switchboard.urlPool.errorURL.newEntry(en.url(), en.referrerHash(), en.initiator(), iam, en.name(), result + ":" + reason, new bitfield(plasmaURL.urlFlagLength), false);
switchboard.urlPool.noticeURL.remove(urlhash);
Expand Down
47 changes: 47 additions & 0 deletions source/de/anomic/kelondro/kelondroMSetTools.java
Expand Up @@ -40,6 +40,11 @@

package de.anomic.kelondro;

import java.io.File;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.io.IOException;
import java.io.FileInputStream;
import java.util.Comparator;
import java.util.Iterator;
import java.util.TreeMap;
Expand Down Expand Up @@ -351,6 +356,48 @@ public boolean equals(Object obj) {

// ------------------------------------------------------------------------------------------------

public static TreeMap loadMap(String mapname, String filename, String sep) {
TreeMap map = new TreeMap();
BufferedReader br = null;
try {
br = new BufferedReader(new InputStreamReader(new FileInputStream(filename)));
String line;
int pos;
while ((line = br.readLine()) != null) {
line = line.trim();
if ((line.length() > 0) && (!(line.startsWith("#"))) && ((pos = line.indexOf(sep)) > 0))
map.put(line.substring(0, pos).trim().toLowerCase(), line.substring(pos + sep.length()).trim());
}
} catch (IOException e) {
} finally {
if (br != null) try { br.close(); } catch (Exception e) {}
}
return map;
}

public static TreeSet loadList(File file) {
TreeSet list = new TreeSet(kelondroMSetTools.fastStringComparator);
if (!(file.exists())) return list;

BufferedReader br = null;
try {
br = new BufferedReader(new InputStreamReader(new FileInputStream(file)));
String line;
while ((line = br.readLine()) != null) {
line = line.trim();
if ((line.length() > 0) && (!(line.startsWith("#")))) list.add(line.trim().toLowerCase());
}
br.close();
} catch (IOException e) {
} finally {
if (br != null) try{br.close();}catch(Exception e){}
}
return list;
}

// ------------------------------------------------------------------------------------------------


public static void main(String[] args) {
TreeMap m = new TreeMap();
TreeSet s = new TreeSet();
Expand Down
20 changes: 10 additions & 10 deletions source/de/anomic/plasma/plasmaCrawlEURL.java
Expand Up @@ -84,7 +84,7 @@ public plasmaCrawlEURL(File cachePath, int bufferkb) throws IOException {
}
}

public synchronized entry newEntry(URL url, String referrer, String initiator, String executor,
public synchronized Entry newEntry(URL url, String referrer, String initiator, String executor,
String name, String failreason, bitfield flags, boolean retry) {
if ((referrer == null) || (referrer.length() < urlHashLength)) referrer = dummyHash;
if ((initiator == null) || (initiator.length() < urlHashLength)) initiator = dummyHash;
Expand All @@ -101,15 +101,15 @@ public synchronized entry newEntry(URL url, String referrer, String initiator, S
map.put("failreason", failreason);
map.put("flags", flags);
rejectedStack.add(map);
entry e = new entry(url, referrer, initiator, executor, name, failreason, flags);
Entry e = new Entry(url, referrer, initiator, executor, name, failreason, flags);

// put in table
if (retry) e.store();
return e;
}

public synchronized entry getEntry(String hash) {
return new entry(hash);
public synchronized Entry getEntry(String hash) {
return new Entry(hash);
}

public void clearStack() {
Expand All @@ -120,13 +120,13 @@ public int stackSize() {
return rejectedStack.size();
}

public entry getStack(int pos) {
public Entry getStack(int pos) {
HashMap m = (HashMap) rejectedStack.get(pos);
return new entry((URL) m.get("url"), (String) m.get("referrer"), (String) m.get("initiator"), (String) m.get("executor"),
return new Entry((URL) m.get("url"), (String) m.get("referrer"), (String) m.get("initiator"), (String) m.get("executor"),
(String) m.get("name"), (String) m.get("failreason"), (bitfield) m.get("flags"));
}

public class entry {
public class Entry {

private String hash; // the url's hash
private String referrer; // the url's referrer hash
Expand All @@ -140,7 +140,7 @@ public class entry {
private String failreason; // string describing reason for load fail
private bitfield flags; // extra space

public entry(URL url, String referrer, String initiator, String executor, String name, String failreason, bitfield flags) {
public Entry(URL url, String referrer, String initiator, String executor, String name, String failreason, bitfield flags) {
// create new entry and store it into database
this.hash = urlHash(url);
this.referrer = (referrer == null) ? dummyHash : referrer;
Expand All @@ -156,7 +156,7 @@ public entry(URL url, String referrer, String initiator, String executor, String

}

public entry(String hash) {
public Entry(String hash) {
// generates an plasmaEURLEntry using the url hash
// to speed up the access, the url-hashes are buffered
// in the hash cache.
Expand Down Expand Up @@ -265,7 +265,7 @@ public boolean hasMoreElements() {
return i.hasNext();
}
public Object nextElement() {
return new entry(new String(((byte[][]) i.next())[0]));
return new Entry(new String(((byte[][]) i.next())[0]));
}
}

Expand Down
28 changes: 14 additions & 14 deletions source/de/anomic/plasma/plasmaCrawlNURL.java
Expand Up @@ -187,9 +187,9 @@ public boolean existsInStack(String urlhash) {
return stackIndex.contains(urlhash);
}

public synchronized entry newEntry(String initiator, URL url, Date loaddate, String referrer, String name,
public synchronized Entry newEntry(String initiator, URL url, Date loaddate, String referrer, String name,
String profile, int depth, int anchors, int forkfactor, int stackMode) {
entry e = new entry(initiator, url, referrer, name, loaddate, profile,
Entry e = new Entry(initiator, url, referrer, name, loaddate, profile,
depth, anchors, forkfactor);
try {
switch (stackMode) {
Expand All @@ -208,7 +208,7 @@ public synchronized entry newEntry(String initiator, URL url, Date loaddate, Str
return e;
}

public entry[] top(int stackType, int count) {
public Entry[] top(int stackType, int count) {
switch (stackType) {
case STACK_TYPE_CORE: return top(coreStack, count);
case STACK_TYPE_LIMIT: return top(limitStack, count);
Expand All @@ -221,7 +221,7 @@ public entry[] top(int stackType, int count) {
}
}

public entry pop(int stackType) {
public Entry pop(int stackType) {
switch (stackType) {
case STACK_TYPE_CORE: return pop(coreStack);
case STACK_TYPE_LIMIT: return pop(limitStack);
Expand All @@ -234,11 +234,11 @@ public entry pop(int stackType) {
}
}

private entry pop(kelondroStack stack) {
private Entry pop(kelondroStack stack) {
// this is a filo - pop
try {
if (stack.size() > 0) {
entry e = new entry(new String(stack.pop()[0]));
Entry e = new Entry(new String(stack.pop()[0]));
stackIndex.remove(e.hash);
return e;
} else {
Expand All @@ -249,22 +249,22 @@ private entry pop(kelondroStack stack) {
}
}

private entry[] top(kelondroStack stack, int count) {
private Entry[] top(kelondroStack stack, int count) {
// this is a filo - top
if (count > stack.size()) count = stack.size();
entry[] list = new entry[count];
Entry[] list = new Entry[count];
try {
for (int i = 0; i < count; i++) {
list[i] = new entry(new String(stack.top(i)[0]));
list[i] = new Entry(new String(stack.top(i)[0]));
}
return list;
} catch (IOException e) {
return null;
}
}

public synchronized entry getEntry(String hash) {
return new entry(hash);
public synchronized Entry getEntry(String hash) {
return new Entry(hash);
}

public synchronized void remove(String hash) {
Expand All @@ -273,7 +273,7 @@ public synchronized void remove(String hash) {
} catch (IOException e) {}
}

public class entry {
public class Entry {

private String initiator; // the initiator hash, is NULL or "" if it is the own proxy;
// if this is generated by a crawl, the own peer hash in entered
Expand All @@ -289,7 +289,7 @@ public class entry {
private bitfield flags;
private int handle;

public entry(String initiator, URL url, String referrer, String name, Date loaddate, String profileHandle,
public Entry(String initiator, URL url, String referrer, String name, Date loaddate, String profileHandle,
int depth, int anchors, int forkfactor) {
// create new entry and store it into database
this.hash = urlHash(url);
Expand All @@ -307,7 +307,7 @@ public entry(String initiator, URL url, String referrer, String name, Date loadd
store();
}

public entry(String hash) {
public Entry(String hash) {
// generates an plasmaNURLEntry using the url hash
// to speed up the access, the url-hashes are buffered
// in the hash cache.
Expand Down
37 changes: 37 additions & 0 deletions source/de/anomic/plasma/plasmaSnippetCache.java
Expand Up @@ -54,6 +54,7 @@
import de.anomic.kelondro.kelondroMScoreCluster;
import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacySearch;
import de.anomic.htmlFilter.htmlFilterContentScraper;

public class plasmaSnippetCache {

Expand Down Expand Up @@ -368,4 +369,40 @@ private void loadResourceFromWeb(URL url, int socketTimeout) throws IOException
log);
}

public void fetch(plasmaSearch.result acc, Set queryhashes, String urlmask, int fetchcount) {
// fetch snippets
int i = 0;
plasmaCrawlLURL.Entry urlentry;
String urlstring;
plasmaSnippetCache.result snippet;
while ((acc.hasMoreElements()) && (i < fetchcount)) {
urlentry = acc.nextElement();
if (urlentry.url().getHost().endsWith(".yacyh")) continue;
urlstring = htmlFilterContentScraper.urlNormalform(urlentry.url());
if ((urlstring.matches(urlmask)) &&
(!(existsInCache(urlentry.url(), queryhashes)))) {
new Fetcher(urlentry.url(), queryhashes).start();
i++;
}
}
}

public class Fetcher extends Thread {
URL url;
Set queryhashes;
public Fetcher(URL url, Set queryhashes) {
if (url.getHost().endsWith(".yacyh")) return;
this.url = url;
this.queryhashes = queryhashes;
}
public void run() {
log.logDebug("snippetFetcher: try to get URL " + url);
plasmaSnippetCache.result snippet = retrieve(url, queryhashes, true, 260);
if (snippet.line == null)
log.logDebug("snippetFetcher: cannot get URL " + url + ". error(" + snippet.source + "): " + snippet.error);
else
log.logDebug("snippetFetcher: got URL " + url + ", the snippet is '" + snippet.line + "', source=" + snippet.source);
}
}

}

0 comments on commit 19dbed7

Please sign in to comment.