Skip to content

Commit

Permalink
robinson cluster: added client-side protocol implementation
Browse files Browse the repository at this point in the history
- the network configuration page shows a new option: robinson clusters
- when a global search is made, all robinson peers are excluded, but:
- robinson peers/clusters that provide peer tags and where search words match
  such tags, they are included in global search. Therefore, robinson peers/clusters
  support the global yacy network with their indexes, without doin DHT-exchange


git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3598 6c8d7289-2bf4-0310-a012-ef5d649a1542
  • Loading branch information
orbiter committed Apr 26, 2007
1 parent 794eefe commit f8de19f
Show file tree
Hide file tree
Showing 15 changed files with 165 additions and 23 deletions.
2 changes: 1 addition & 1 deletion build.properties
Expand Up @@ -3,7 +3,7 @@ javacSource=1.4
javacTarget=1.4

# Release Configuration
releaseVersion=0.514
releaseVersion=0.515
releaseFile=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
#releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
releaseDir=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}
Expand Down
5 changes: 3 additions & 2 deletions htroot/ConfigNetwork_p.html
Expand Up @@ -71,14 +71,15 @@ <h2>Network Configuration</h2>
List of ip:port - addresses of the cluster: (comma-separated)<br>
<input type="text" name="cluster.peers.ipport" value="#[cluster.peers.ipport]#" size="80" maxlength="800" />
</dd>
-->
<dt>Public Cluster<input type="radio" value="publiccluster" name="cluster.mode" #(publicclusterChecked)#::checked="checked" #(/publicclusterChecked)#/></dt>
<dd>Your peer is part of a public cluster within the YaCy network.<br>
Index data is not distributed, but remote crawl requests are distributed and accepted<br>
Search requests are spread over all peers of the cluster, and answered from all peers of the cluster.<br>
List of .yacy or .yacyh - domains of the cluster: (comma-separated)<br>
<input type="text" name="cluster.peers.yacydomain" value="#[cluster.peers.yacydomain]#" size="80" maxlength="800" />
<input type="text" name="cluster.peers.yacydomain" value="#[cluster.peers.yacydomain]#" size="80" maxlength="800" /><br>
#[cluster.peers.yacydomain.hashes]#
</dd>
-->
<dt>Public Peer<input type="radio" value="publicpeer" name="cluster.mode" #(publicpeerChecked)#::checked="checked" #(/publicpeerChecked)#/></dt>
<dd>You are visible to other peers and contact them to distribute your presence.<br>
Your peer does not accept any outside index data, but responds on all remote search requests.
Expand Down
5 changes: 4 additions & 1 deletion htroot/ConfigNetwork_p.java
Expand Up @@ -120,6 +120,9 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
sb.setConfig("cluster.peers.ipport", checkIPPortList(post.get("cluster.peers.ipport", "")));
sb.setConfig("cluster.peers.yacydomain", checkYaCyDomainList(post.get("cluster.peers.yacydomain", "")));

// update the cluster hash set
sb.clusterhashes = yacyCore.seedDB.clusterHashes(sb.getConfig("cluster.peers.yacydomain", ""));

}

// write answer code
Expand Down Expand Up @@ -150,7 +153,7 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
prop.put("robinson.checked", (indexDistribute || indexReceive) ? 0 : 1);
prop.put("cluster.peers.ipport", sb.getConfig("cluster.peers.ipport", ""));
prop.put("cluster.peers.yacydomain", sb.getConfig("cluster.peers.yacydomain", ""));

prop.put("cluster.peers.yacydomain.hashes", (sb.clusterhashes.size() == 0) ? "" : sb.clusterhashes.toString());
// set p2p mode flags
prop.put("privatepeerChecked", (sb.getConfig("cluster.mode", "").equals("privatepeer")) ? 1 : 0);
prop.put("privateclusterChecked", (sb.getConfig("cluster.mode", "").equals("privatecluster")) ? 1 : 0);
Expand Down
4 changes: 2 additions & 2 deletions htroot/yacy/search.java
Expand Up @@ -160,7 +160,7 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
plasmaSearchTimingProfile localTiming = new plasmaSearchTimingProfile(squery.maximumTime, squery.wantedResults);
plasmaSearchTimingProfile remoteTiming = null;

theSearch = new plasmaSearchEvent(squery, rankingProfile, localTiming, remoteTiming, true, yacyCore.log, sb.wordIndex, sb.wordIndex.loadedURL, sb.snippetCache);
theSearch = new plasmaSearchEvent(squery, rankingProfile, localTiming, remoteTiming, true, yacyCore.log, sb.wordIndex, sb.wordIndex.loadedURL, sb.snippetCache, null);
Map[] containers = theSearch.localSearchContainers(plasmaSearchQuery.hashes2Set(urls));
if (containers != null) {
Iterator ci = containers[0].entrySet().iterator();
Expand Down Expand Up @@ -190,7 +190,7 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
theSearch = new plasmaSearchEvent(squery,
rankingProfile, localTiming, remoteTiming, true,
yacyCore.log, sb.wordIndex, sb.wordIndex.loadedURL,
sb.snippetCache);
sb.snippetCache, null);
Map[] containers = theSearch.localSearchContainers(plasmaSearchQuery.hashes2Set(urls));

// set statistic details of search result and find best result index set
Expand Down
7 changes: 6 additions & 1 deletion htroot/yacysearch.java
Expand Up @@ -183,7 +183,11 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
final boolean indexDistributeGranted = sb.getConfig(plasmaSwitchboard.INDEX_DIST_ALLOW, "true").equals("true");
final boolean indexReceiveGranted = sb.getConfig("allowReceiveIndex", "true").equals("true");
final boolean offline = yacyCore.seedDB.mySeed.isVirgin();
final boolean clustersearch = sb.isRobinsonMode() &&
(sb.getConfig("clustermode", "").equals("privatecluster") ||
sb.getConfig("clustermode", "").equals("publiccluster"));
if (offline || !indexDistributeGranted || !indexReceiveGranted) { global = false; }
if (clustersearch) global = true; // switches search on, but search target is limited to cluster nodes

// find search domain
int contentdomCode = plasmaSearchQuery.CONTENTDOM_TEXT;
Expand Down Expand Up @@ -268,7 +272,8 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
count,
searchtime,
urlmask,
(globalsearch) ? plasmaSearchQuery.SEARCHDOM_GLOBALDHT : plasmaSearchQuery.SEARCHDOM_LOCAL,
(clustersearch && globalsearch) ? plasmaSearchQuery.SEARCHDOM_CLUSTERALL :
((globalsearch) ? plasmaSearchQuery.SEARCHDOM_GLOBALDHT : plasmaSearchQuery.SEARCHDOM_LOCAL),
"",
20,
constraint);
Expand Down
2 changes: 2 additions & 0 deletions source/de/anomic/kelondro/kelondroRowSet.java
Expand Up @@ -240,6 +240,8 @@ public Object clone(Object second) {
}

public boolean hasNext() {
if (p < 0) return false;
if (p >= size()) return false;
if (up) {
return p < bound;
} else {
Expand Down
12 changes: 9 additions & 3 deletions source/de/anomic/plasma/plasmaSearchEvent.java
Expand Up @@ -48,6 +48,7 @@
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;

import de.anomic.index.indexContainer;
import de.anomic.index.indexRWIEntry;
Expand Down Expand Up @@ -78,6 +79,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
private yacySearch[] primarySearchThreads, secondarySearchThreads;
private long searchtime;
private int searchcount;
private TreeSet preselectedPeerHashes;

public plasmaSearchEvent(plasmaSearchQuery query,
plasmaSearchRankingProfile ranking,
Expand All @@ -87,7 +89,8 @@ public plasmaSearchEvent(plasmaSearchQuery query,
serverLog log,
plasmaWordIndex wordIndex,
plasmaCrawlLURL urlStore,
plasmaSnippetCache snippetCache) {
plasmaSnippetCache snippetCache,
TreeSet preselectedPeerHashes) {
this.log = log;
this.wordIndex = wordIndex;
this.query = query;
Expand All @@ -104,6 +107,7 @@ public plasmaSearchEvent(plasmaSearchQuery query,
this.secondarySearchThreads = null;
this.searchtime = -1;
this.searchcount = -1;
this.preselectedPeerHashes = preselectedPeerHashes;
}

public plasmaSearchQuery getQuery() {
Expand Down Expand Up @@ -141,7 +145,8 @@ public plasmaSearchPostOrder search() {
synchronized (flushThreads) {
long start = System.currentTimeMillis();
plasmaSearchPostOrder result;
if (query.domType == plasmaSearchQuery.SEARCHDOM_GLOBALDHT) {
if ((query.domType == plasmaSearchQuery.SEARCHDOM_GLOBALDHT) ||
(query.domType == plasmaSearchQuery.SEARCHDOM_CLUSTERALL)) {
int fetchpeers = (int) (query.maximumTime / 500L); // number of target peers; means 10 peers in 10 seconds
if (fetchpeers > 50) fetchpeers = 50;
if (fetchpeers < 30) fetchpeers = 30;
Expand All @@ -153,7 +158,8 @@ public plasmaSearchPostOrder search() {
long primaryTimeout = System.currentTimeMillis() + profileGlobal.duetime();
primarySearchThreads = yacySearch.primaryRemoteSearches(plasmaSearchQuery.hashSet2hashString(query.queryHashes), plasmaSearchQuery.hashSet2hashString(query.excludeHashes), "",
query.prefer, query.urlMask, query.maxDistance, urlStore, wordIndex, rcContainers, rcAbstracts,
fetchpeers, plasmaSwitchboard.urlBlacklist, snippetCache, profileGlobal, ranking, query.constraint);
fetchpeers, plasmaSwitchboard.urlBlacklist, snippetCache, profileGlobal, ranking, query.constraint,
(query.domType == plasmaSearchQuery.SEARCHDOM_GLOBALDHT) ? null : preselectedPeerHashes);

// meanwhile do a local search
Map[] searchContainerMaps = localSearchContainers(null);
Expand Down
4 changes: 2 additions & 2 deletions source/de/anomic/plasma/plasmaSearchQuery.java
Expand Up @@ -57,8 +57,8 @@
public final class plasmaSearchQuery {

public static final int SEARCHDOM_LOCAL = 0;
public static final int SEARCHDOM_GROUPDHT = 1;
public static final int SEARCHDOM_GROUPALL = 2;
public static final int SEARCHDOM_CLUSTERDHT = 1;
public static final int SEARCHDOM_CLUSTERALL = 2;
public static final int SEARCHDOM_GLOBALDHT = 3;
public static final int SEARCHDOM_GLOBALALL = 4;

Expand Down
20 changes: 18 additions & 2 deletions source/de/anomic/plasma/plasmaSwitchboard.java
Expand Up @@ -251,6 +251,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
public double lastrequestedQueries = 0d;
public int totalPPM = 0;
public double totalQPM = 0d;
public TreeSet clusterhashes;

/*
* Remote Proxy configuration
Expand Down Expand Up @@ -1272,6 +1273,9 @@ public plasmaSwitchboard(String rootPath, String initPath, String configPath) {

this.dbImportManager = new dbImportManager(this);

// init robinson cluster
this.clusterhashes = yacyCore.seedDB.clusterHashes(getConfig("cluster.peers.yacydomain", ""));

sb=this;
log.logConfig("Finished Switchboard Initialization");
}
Expand Down Expand Up @@ -1881,6 +1885,9 @@ public boolean cleanupJob() {
kelondroRecords.setCacheGrowStati(memprereq + (memprereq / 8) + 2 * 1024 * 1024, memprereq);
kelondroCache.setCacheGrowStati(memprereq + (memprereq / 8) + 2 * 1024 * 1024, memprereq);

// update the cluster set
this.clusterhashes = yacyCore.seedDB.clusterHashes(getConfig("cluster.peers.yacydomain", ""));

return hasDoneSomething;
} catch (InterruptedException e) {
this.log.logInfo("cleanupJob: Shutdown detected");
Expand Down Expand Up @@ -2009,6 +2016,13 @@ public boolean limitCrawlTriggerJob() {
return false;
}

if ((isRobinsonMode()) &&
((getConfig("cluster.mode", "").equals("publicpeer")) ||
(getConfig("cluster.mode", "").equals("privatepeer")))){
// not-clustered robinson peers do not do remote crawling
return false;
}

if ((coreCrawlJobSize() <= 20) && (limitCrawlTriggerJobSize() > 100)) {
// it is not efficient if the core crawl job is empty and we have too much to do
// move some tasks to the core crawl job
Expand Down Expand Up @@ -2672,7 +2686,9 @@ private boolean processRemoteCrawlTrigger(plasmaCrawlEntry urlEntry) {
}

// check if peer for remote crawl is available
yacySeed remoteSeed = yacyCore.dhtAgent.getCrawlSeed(urlEntry.urlhash());
yacySeed remoteSeed = ((this.isOpenRobinsonCluster()) && (getConfig("cluster.mode", "").equals("publiccluster"))) ?
yacyCore.dhtAgent.getPublicClusterCrawlSeed(urlEntry.urlhash(), this.clusterhashes) :
yacyCore.dhtAgent.getGlobalCrawlSeed(urlEntry.urlhash());
if (remoteSeed == null) {
log.logFine("plasmaSwitchboard.processRemoteCrawlTrigger: no remote crawl seed available");
return false;
Expand Down Expand Up @@ -2788,7 +2804,7 @@ public plasmaSearchResults searchFromLocal(plasmaSearchQuery query,
//}

// create a new search event
plasmaSearchEvent theSearch = new plasmaSearchEvent(query, ranking, localTiming, remoteTiming, postsort, log, wordIndex, wordIndex.loadedURL, snippetCache);
plasmaSearchEvent theSearch = new plasmaSearchEvent(query, ranking, localTiming, remoteTiming, postsort, log, wordIndex, wordIndex.loadedURL, snippetCache, (isRobinsonMode()) ? this.clusterhashes : null);
plasmaSearchPostOrder acc = theSearch.search();

// fetch snippets
Expand Down
2 changes: 1 addition & 1 deletion source/de/anomic/soap/services/ShareService.java
Expand Up @@ -78,7 +78,7 @@ public class ShareService extends AbstractService {
private static final int FILEINFO_COMMENT = 1;

private static final int GENMD5_MD5_ARRAY = 0;
private static final int GENMD5_MD5_STRING = 1;
//private static final int GENMD5_MD5_STRING = 1;

/* =====================================================================
* Used XML Templates
Expand Down
8 changes: 8 additions & 0 deletions source/de/anomic/yacy/yacyCore.java
Expand Up @@ -326,6 +326,14 @@ public void publishSeedList() {

public void peerPing() {
if (!online()) return;
if ((switchboard.isRobinsonMode()) && (switchboard.getConfig("cluster.mode", "").equals("privatepeer"))) {
// in case this peer is a privat peer we omit the peer ping
// all other robinson peer types do a peer ping:
// the privatecluster does the ping to the other cluster members
// the publiccluster does the ping to all peers, but prefers the own peer
// the publicpeer does the ping to all peers
return;
}

// before publishing, update some seed data
peerActions.updateMySeed();
Expand Down
14 changes: 13 additions & 1 deletion source/de/anomic/yacy/yacyDHTAction.java
Expand Up @@ -46,10 +46,14 @@
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.TreeSet;

import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroCloneableIterator;
import de.anomic.kelondro.kelondroCloneableSetIterator;
import de.anomic.kelondro.kelondroException;
import de.anomic.kelondro.kelondroMScoreCluster;
import de.anomic.kelondro.kelondroRotateIterator;
import de.anomic.server.logging.serverLog;

public class yacyDHTAction implements yacyPeerAction {
Expand Down Expand Up @@ -205,13 +209,21 @@ public Object nextElement() {

}

public synchronized yacySeed getCrawlSeed(String urlHash) {
public synchronized yacySeed getGlobalCrawlSeed(String urlHash) {
Enumeration e = getAcceptRemoteCrawlSeeds(urlHash, true);
yacySeed seed;
if (e.hasMoreElements()) seed = (yacySeed) e.nextElement(); else seed = null;
e = null;
return seed;
}

public synchronized yacySeed getPublicClusterCrawlSeed(String urlHash, TreeSet clusterhashes) {
kelondroCloneableIterator i = new kelondroRotateIterator(new kelondroCloneableSetIterator(clusterhashes, urlHash), null);
if (i.hasNext()) {
return seedDB.getConnected((String) i.next());
}
return null;
}

public void setCrawlTime(String seedHash, int newYacyTime) {
if (newYacyTime < yacyCore.yacyTime()) newYacyTime = yacyCore.yacyTime();
Expand Down

0 comments on commit f8de19f

Please sign in to comment.