Skip to content

Commit

Permalink
bug fixes for word ordering and dht index selection
Browse files Browse the repository at this point in the history
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@521 6c8d7289-2bf4-0310-a012-ef5d649a1542
  • Loading branch information
orbiter committed Aug 12, 2005
1 parent 41cd5e9 commit 5716f85
Show file tree
Hide file tree
Showing 11 changed files with 123 additions and 15 deletions.
6 changes: 4 additions & 2 deletions htroot/Network.java
Expand Up @@ -193,7 +193,7 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve

prop.put("table_comment",0);
}
}else {
} else {
// generate table
int page = Integer.parseInt(post.get("page", "1"));
int conCount = 0;
Expand Down Expand Up @@ -226,7 +226,9 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
try {
for (int c = availableNews - 1; c >= 0; c--) {
record = yacyCore.newsPool.get(yacyNewsPool.INCOMING_DB, c);
if (record.category().equals("prfleupd")) {
if (record == null) {
break;
} else if (record.category().equals("prfleupd")) {
updatedProfile.add(record.originator());
} else if (record.category().equals("wiki_upd")) {
updatedWiki.put(record.originator(), record.attributes().get("page"));
Expand Down
47 changes: 46 additions & 1 deletion source/de/anomic/kelondro/kelondroTree.java
Expand Up @@ -241,6 +241,7 @@ private void searchproc() throws IOException {
}
}
// we reached a node where we must insert the new value
// the parent of this new value can be obtained by getParent()
// all values are set, just return
}

Expand Down Expand Up @@ -737,14 +738,58 @@ public synchronized Iterator nodeIterator(boolean up, boolean rotating, byte[] f
if (nn == null) {
return (new HashSet()).iterator(); // an empty iterator
} else {
return new nodeIterator(up, rotating, nn);
// the node nn may be greater or smaller than the firstKey
// depending on the ordering direction,
// we must find the next smaller or greater node
return new correctedNodeIterator(up, rotating, nn, firstKey);
}
}
} catch (IOException e) {
throw new RuntimeException("error creating an iteration: " + e.getMessage());
}
}

private class correctedNodeIterator implements Iterator {

Iterator ii;
Node nextNode;

public correctedNodeIterator(boolean up, boolean rotating, Node start, byte[] firstKey) throws IOException {
ii = new nodeIterator(up, rotating, start);
nextNode = (ii.hasNext()) ? (Node) ii.next() : null;
if (nextNode != null) {
int c = compare(firstKey, nextNode.getKey());
if ((c > 0) && (up)) {
// firstKey > nextNode.getKey()
System.out.println("CORRECTING ITERATOR: firstKey=" + new String(firstKey) + ", nextNode=" + new String(nextNode.getKey()));
nextNode = (ii.hasNext()) ? (Node) ii.next() : null;
}
if ((c < 0) && (!(up))) {
nextNode = (ii.hasNext()) ? (Node) ii.next() : null;
}
}
}

public void finalize() {
ii = null;
nextNode = null;
}

public boolean hasNext() {
return nextNode != null;
}

public Object next() {
Node r = nextNode;
nextNode = (ii.hasNext()) ? (Node) ii.next() : null;
return r;
}

public void remove() {
throw new java.lang.UnsupportedOperationException("kelondroTree: remove in kelondro Tables not yet supported");
}
}

private class nodeIterator implements Iterator {
// we implement an iteration! (not a recursive function as the structure would suggest...)
// the iterator iterates Node objects
Expand Down
4 changes: 2 additions & 2 deletions source/de/anomic/plasma/plasmaSwitchboard.java
Expand Up @@ -1298,8 +1298,8 @@ public serverObjects searchFromLocal(Set querywords, String order1, String order
// do global fetching
int globalresults = 0;
if (global) {
int fetchcount = ((int) time / 1000) * 4; // number of wanted results until break in search
int fetchpeers = ((int) time / 1000) * 3; // number of target peers; means 30 peers in 10 seconds
int fetchcount = ((int) time / 1000) * 4; // number of wanted results until break in search
int fetchpeers = 10 + ((int) time / 1000) * 3; // number of target peers; means 30 peers in 10 seconds
long fetchtime = time * 7 / 10; // time to waste
if (fetchcount > count) fetchcount = count;
globalresults = yacySearch.searchHashes(queryhashes, urlPool.loadedURL, searchManager, fetchcount, fetchpeers, urlBlacklist, snippetCache, fetchtime);
Expand Down
50 changes: 49 additions & 1 deletion source/de/anomic/plasma/plasmaWordIndex.java
Expand Up @@ -116,10 +116,58 @@ public void deleteIndex(String wordHash) {
}

public Iterator wordHashes(String startHash, boolean up, boolean rot) {
return ramCache.wordHashes(startHash, up);
//return ramCache.wordHashes(startHash, up);
return new correctedWordIterator(up, rot, startHash); // use correction until bug is found
}

private class correctedWordIterator implements Iterator {

Iterator ii;
String nextWord;

public correctedWordIterator(boolean up, boolean rotating, String firstWord) {
ii = ramCache.wordHashes(firstWord, up);
nextWord = (ii.hasNext()) ? (String) ii.next() : null;
boolean corrected = true;
int cc = 0; // to avoid rotation loops
while ((nextWord != null) && (corrected) && (cc < 50)) {
int c = firstWord.compareTo(nextWord);
corrected = false;
if ((c > 0) && (up)) {
// firstKey > nextNode.getKey()
//System.out.println("CORRECTING WORD ITERATOR: firstWord=" + firstWord + ", nextWord=" + nextWord);
nextWord = (ii.hasNext()) ? (String) ii.next() : null;
corrected = true;
cc++;
}
if ((c < 0) && (!(up))) {
nextWord = (ii.hasNext()) ? (String) ii.next() : null;
corrected = true;
cc++;
}
}
}

public void finalize() {
ii = null;
nextWord = null;
}

public boolean hasNext() {
return nextWord != null;
}

public Object next() {
String r = nextWord;
nextWord = (ii.hasNext()) ? (String) ii.next() : null;
return r;
}

public void remove() {
throw new java.lang.UnsupportedOperationException("kelondroTree: remove in kelondro Tables not yet supported");
}
}

public Iterator fileIterator(String startHash, boolean up, boolean deleteEmpty) {
return new iterateFiles(startHash, up, deleteEmpty);
}
Expand Down
3 changes: 2 additions & 1 deletion source/de/anomic/plasma/plasmaWordIndexCache.java
Expand Up @@ -47,6 +47,7 @@
import java.util.Iterator;
import java.util.Map;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.Enumeration;

import de.anomic.kelondro.kelondroException;
Expand Down Expand Up @@ -329,7 +330,7 @@ public Iterator wordHashes(String startWordHash, boolean up) {
if (!(up)) throw new RuntimeException("plasmaWordIndexCache.wordHashes can only count up");
return new kelondroMergeIterator(
new kelondroMergeIterator(
cache.keySet().iterator(),
cache.tailMap(startWordHash).keySet().iterator(),
assortmentCluster.hashConjunction(startWordHash, true),
true),
backend.wordHashes(startWordHash, true),
Expand Down
4 changes: 2 additions & 2 deletions source/de/anomic/plasma/plasmaWordIndexDistribution.java
Expand Up @@ -131,8 +131,8 @@ public int performTransferIndex(int indexCount, int peerCount, boolean delete) {
if ((yacyCore.seedDB == null) || (yacyCore.seedDB.sizeConnected() == 0)) return -1;

// collect index
//String startPointHash = yacyCore.seedCache.mySeed.hash;
String startPointHash = serverCodings.encodeMD5B64("" + System.currentTimeMillis(), true).substring(0, yacySeedDB.commonHashLength);
String startPointHash = yacyCore.seedDB.mySeed.hash;
//String startPointHash = serverCodings.encodeMD5B64("" + System.currentTimeMillis(), true).substring(0, yacySeedDB.commonHashLength);
plasmaWordIndexEntity[] indexEntities = selectTransferIndexes(startPointHash, indexCount);
if ((indexEntities == null) || (indexEntities.length == 0)) {
log.logDebug("No index available for index transfer, hash start-point " + startPointHash);
Expand Down
2 changes: 1 addition & 1 deletion source/de/anomic/yacy/yacyClient.java
Expand Up @@ -380,7 +380,7 @@ public static int search(String wordhashes, int count, boolean global,
} catch (NumberFormatException e) {
searchtime = totalrequesttime;
}
yacyCore.log.logDebug("yacyClient.search: processed " + results + " links from peer " + targetPeer.hash + "; duetime=" + duetime + ", searchtime=" + searchtime + ", netdelay=" + (totalrequesttime - searchtime) + ", references=" + result.get("references"));
yacyCore.log.logDebug("yacyClient.search: processed " + results + " links from peer " + targetPeer.hash + ", score " + targetPeer.selectscore + "; duetime=" + duetime + ", searchtime=" + searchtime + ", netdelay=" + (totalrequesttime - searchtime) + ", references=" + result.get("references"));
return results;
} catch (Exception e) {
yacyCore.log.logError("yacyClient.search error: '" + targetPeer.get("Name", "anonymous") + "' failed - " + e);
Expand Down
2 changes: 1 addition & 1 deletion source/de/anomic/yacy/yacyNewsAction.java
Expand Up @@ -58,7 +58,7 @@ public void processPeerArrival(yacySeed peer, boolean direct) {
if ((recordString == null) || (recordString.length() == 0)) return;
String decodedString = de.anomic.tools.crypt.simpleDecode(recordString, "");
yacyNewsRecord record = new yacyNewsRecord(decodedString);
System.out.println("### news arrival from peer " + peer.getName() + ", decoded=" + decodedString + ", record=" + recordString + ", news=" + record.toString());
//System.out.println("### news arrival from peer " + peer.getName() + ", decoded=" + decodedString + ", record=" + recordString + ", news=" + record.toString());
String cre1 = (String) serverCodings.string2map(decodedString).get("cre");
String cre2 = (String) serverCodings.string2map(record.toString()).get("cre");
if ((cre1 == null) || (cre2 == null) || (!(cre1.equals(cre2)))) {
Expand Down
1 change: 1 addition & 0 deletions source/de/anomic/yacy/yacyNewsPool.java
Expand Up @@ -169,6 +169,7 @@ private boolean automaticProcessP(yacyNewsRecord record) {
if ((record.category().equals("crwlstrt")) &&
((yacyCore.universalTime() - record.created().getTime()) > (1000 * 60 * 60 * 24) /* 1 Day */)) {
yacySeed seed = yacyCore.seedDB.get(record.originator());
if (seed == null) return false;
try {
return (Integer.parseInt(seed.get("ISpeed", "-")) < 10);
} catch (NumberFormatException ee) {
Expand Down
18 changes: 14 additions & 4 deletions source/de/anomic/yacy/yacySearch.java
Expand Up @@ -43,6 +43,7 @@
import java.util.Enumeration;
import java.util.Iterator;
import java.util.Set;
import java.util.HashMap;

import de.anomic.kelondro.kelondroMScoreCluster;
import de.anomic.plasma.plasmaCrawlLURL;
Expand Down Expand Up @@ -106,23 +107,32 @@ private static yacySeed[] selectPeers(Set wordhashes, int seedcount) {
if (seedcount > yacyCore.seedDB.sizeConnected()) seedcount = yacyCore.seedDB.sizeConnected();

kelondroMScoreCluster ranking = new kelondroMScoreCluster();
HashMap seeds = new HashMap();
yacySeed seed;
Enumeration dhtEnum;
Iterator i = wordhashes.iterator();
int c;
String wordhash;
while (i.hasNext()) {
dhtEnum = yacyCore.dhtAgent.getDHTSeeds(true, (String) i.next());
wordhash = (String) i.next();
dhtEnum = yacyCore.dhtAgent.getDHTSeeds(true, wordhash);
c = 0;
while ((dhtEnum.hasMoreElements()) && (c < seedcount)) {
seed = (yacySeed) dhtEnum.nextElement();
//System.out.println("Selected peer " + seed.hash + " for wordhash " + wordhash + ", score " + c);
ranking.addScore(seed.hash, c++);
seeds.put(seed.hash, seed);
}
}
if (ranking.size() < seedcount) seedcount = ranking.size();
yacySeed[] result = new yacySeed[seedcount];
Iterator e = ranking.scores(true); // lower are better
c = 0;
while ((e.hasNext()) && (c < result.length)) result[c++] = yacyCore.seedDB.getConnected((String) e.next());
while ((e.hasNext()) && (c < result.length)) {
seed = (yacySeed) seeds.get((String) e.next());
seed.selectscore = c;
result[c++] = seed;
}

//System.out.println("DEBUG yacySearch.selectPeers = " + seedcount + " seeds:"); for (int i = 0; i < seedcount; i++) System.out.println(" #" + i + ":" + result[i]); // debug
return result;
Expand Down Expand Up @@ -162,8 +172,6 @@ public static int searchHashes(Set wordhashes, plasmaCrawlLURL urlManager, plasm
// wait until wanted delay passed or wanted result appeared
boolean anyIdle = true;
while ((anyIdle) && ((System.currentTimeMillis() - start) < waitingtime)) {
// wait..
try {Thread.currentThread().sleep(200);} catch (InterruptedException e) {}
// check if all threads have been finished or results so far are enough
c = 0;
anyIdle = false;
Expand All @@ -175,6 +183,8 @@ public static int searchHashes(Set wordhashes, plasmaCrawlLURL urlManager, plasm
break; // we have enough
}
if (c >= count * 5) break;
// wait a little time ..
try {Thread.currentThread().sleep(100);} catch (InterruptedException e) {}
}

// collect results
Expand Down
1 change: 1 addition & 0 deletions source/de/anomic/yacy/yacySeed.java
Expand Up @@ -89,6 +89,7 @@ public class yacySeed {
public String hash;
private Map dna;
public int available;
public int selectscore = -1; // only for debugging

public yacySeed(String hash, Map dna) {
// create a seed with a pre-defined hash map
Expand Down

0 comments on commit 5716f85

Please sign in to comment.