Skip to content

Commit

Permalink
changed peer selection for search targets:
Browse files Browse the repository at this point in the history
- less dht targets are selected
- more other peers are selected: all robinson peers with more than one million urls

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4962 6c8d7289-2bf4-0310-a012-ef5d649a1542
  • Loading branch information
orbiter committed Jun 30, 2008
1 parent 69aac0d commit d37fd06
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 20 deletions.
2 changes: 1 addition & 1 deletion source/de/anomic/plasma/plasmaSearchEvent.java
Expand Up @@ -131,7 +131,7 @@ private plasmaSearchEvent(plasmaSearchQuery query,
// do a global search
this.rankedCache = new plasmaSearchRankingProcess(wordIndex, query, max_results_preparation, 16);

int fetchpeers = 30;
int fetchpeers = 10;

// the result of the fetch is then in the rcGlobal
long timer = System.currentTimeMillis();
Expand Down
53 changes: 34 additions & 19 deletions source/de/anomic/yacy/yacySearch.java
Expand Up @@ -176,7 +176,8 @@ private static yacySeed[] selectSearchTargets(yacySeedDB seedDB, yacyPeerActions

// put in seeds according to dht
final kelondroMScoreCluster<String> ranking = new kelondroMScoreCluster<String>();
final HashMap<String, yacySeed> seeds = new HashMap<String, yacySeed>();
final HashMap<String, yacySeed> regularSeeds = new HashMap<String, yacySeed>();
final HashMap<String, yacySeed> robinsonSeeds = new HashMap<String, yacySeed>();
yacySeed seed;
Iterator<yacySeed> dhtEnum;
int c;
Expand All @@ -195,23 +196,22 @@ private static yacySeed[] selectSearchTargets(yacySeedDB seedDB, yacyPeerActions
if (!seed.getFlagAcceptRemoteIndex()) continue; // probably a robinson peer
serverLog.logFine("PLASMA", "selectPeers/DHTorder: " + seed.hash + ":" + seed.getName() + "/" + distance + " for wordhash " + wordhash + ", score " + c);
ranking.addScore(seed.hash, c--);
seeds.put(seed.hash, seed);
regularSeeds.put(seed.hash, seed);
}
}

// put in seeds according to size of peer
dhtEnum = seedDB.seedsSortedConnected(false, yacySeed.ICOUNT);
c = seedcount;
c = Math.min(seedDB.sizeConnected(), seedcount);
int score;
if (c > seedDB.sizeConnected()) { c = seedDB.sizeConnected(); }
while (dhtEnum.hasNext() && c > 0) {
seed = dhtEnum.next();
if (seed == null) continue;
if (!seed.getFlagAcceptRemoteIndex()) continue; // probably a robinson peer
score = (int) Math.round(Math.random() * ((c / 3) + 3));
serverLog.logFine("PLASMA", "selectPeers/RWIcount: " + seed.hash + ":" + seed.getName() + ", RWIcount=" + seed.get(yacySeed.ICOUNT,"") + ", score " + score);
ranking.addScore(seed.hash, score);
seeds.put(seed.hash, seed);
regularSeeds.put(seed.hash, seed);
c--;
}

Expand All @@ -221,29 +221,44 @@ private static yacySeed[] selectSearchTargets(yacySeedDB seedDB, yacyPeerActions
while (dhtEnum.hasNext()) {
seed = dhtEnum.next();
if (seed == null) continue;
if (seed.matchPeerTags(wordhashes)) { // access robinson peers with matching tag
serverLog.logInfo("PLASMA", "selectPeers/PeerTags: " + seed.hash + ":" + seed.getName() + ", is specialized peer for " + seed.getPeerTags().toString());
ranking.addScore(seed.hash, seedcount);
seeds.put(seed.hash, seed);
}
if (seed.getAge() < 1) { // the 'workshop feature'
serverLog.logInfo("PLASMA", "selectPeers/Age: " + seed.hash + ":" + seed.getName() + ", is newbie, age = " + seed.getAge());
ranking.addScore(seed.hash, seedcount);
seeds.put(seed.hash, seed);
if (seed.getFlagAcceptRemoteIndex()) {
// enhance ranking for regular peers
if (seed.matchPeerTags(wordhashes)) { // access robinson peers with matching tag
serverLog.logInfo("PLASMA", "selectPeers/PeerTags: " + seed.hash + ":" + seed.getName() + ", is specialized peer for " + seed.getPeerTags().toString());
ranking.addScore(seed.hash, seedcount);
regularSeeds.put(seed.hash, seed);
}
if (seed.getAge() < 1) { // the 'workshop feature'
serverLog.logInfo("PLASMA", "selectPeers/Age: " + seed.hash + ":" + seed.getName() + ", is newbie, age = " + seed.getAge());
ranking.addScore(seed.hash, seedcount);
regularSeeds.put(seed.hash, seed);
}
} else {
// this is a robinson peer
// in case the peer has more than a million urls, take it as search target
if (seed.getLinkCount() > 1000000) {
regularSeeds.remove(seed.hash);
ranking.deleteScore(seed.hash);
robinsonSeeds.put(seed.hash, seed);
}
}
}

// evaluate the ranking score and select seeds
if (ranking.size() < seedcount) { seedcount = ranking.size(); }
yacySeed[] result = new yacySeed[seedcount];
seedcount = Math.min(ranking.size(), seedcount);
yacySeed[] result = new yacySeed[seedcount + robinsonSeeds.size()];
c = 0;
iter = ranking.scores(false); // higher are better
while (iter.hasNext() && c < result.length) {
seed = seeds.get(iter.next());
while (iter.hasNext() && c < seedcount) {
seed = regularSeeds.get(iter.next());
seed.selectscore = c;
serverLog.logFine("PLASMA", "selectPeers/_lineup_: " + seed.hash + ":" + seed.getName() + " is choice " + c);
serverLog.logInfo("PLASMA", "selectPeers/_lineup_: " + seed.hash + ":" + seed.getName() + " is choice " + c);
result[c++] = seed;
}
for (yacySeed s: robinsonSeeds.values()) {
serverLog.logInfo("PLASMA", "selectPeers/_robinson_: " + s.hash + ":" + s.getName() + " is choice " + c);
result[c++] = s;
}

// System.out.println("DEBUG yacySearch.selectPeers = " + seedcount + " seeds:"); for (int i = 0; i < seedcount; i++) System.out.println(" #" + i + ":" + result[i]); // debug
return result;
Expand Down

0 comments on commit d37fd06

Please sign in to comment.