Skip to content

Commit

Permalink
indexing of local (intranet) urls enabled
Browse files Browse the repository at this point in the history
To do this, one must create a separate YaCy network that has a local URL domain
A description how to do this is here: http://www.yacy-websuche.de/wiki/index.php/De:Netzdefinition

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4001 6c8d7289-2bf4-0310-a012-ef5d649a1542
  • Loading branch information
orbiter committed Jul 24, 2007
1 parent c48c973 commit 9ca46a8
Show file tree
Hide file tree
Showing 35 changed files with 502 additions and 415 deletions.
2 changes: 1 addition & 1 deletion build.properties
Expand Up @@ -3,7 +3,7 @@ javacSource=1.4
javacTarget=1.4

# Release Configuration
releaseVersion=0.537
releaseVersion=0.538
releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
proReleaseFile=yacy_pro_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
releaseFileParentDir=yacy
Expand Down
3 changes: 2 additions & 1 deletion htroot/ConfigBasic.java
Expand Up @@ -59,6 +59,7 @@
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverCodings;
import de.anomic.server.serverCore;
import de.anomic.server.serverDomains;
import de.anomic.server.serverInstantThread;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
Expand Down Expand Up @@ -181,7 +182,7 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
int idx = host.indexOf(":");
if (idx != -1) host = host.substring(0,idx);
} else {
host = serverCore.publicLocalIP().getHostAddress();
host = serverDomains.myPublicLocalIP().getHostAddress();
}

prop.put("reconnect", 1);
Expand Down
6 changes: 3 additions & 3 deletions htroot/PerformanceMemory_p.java
Expand Up @@ -49,11 +49,11 @@
import java.util.Map;

import de.anomic.http.httpHeader;
import de.anomic.http.httpc;
import de.anomic.kelondro.kelondroCache;
import de.anomic.kelondro.kelondroFlexTable;
import de.anomic.kelondro.kelondroRecords;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverDomains;
import de.anomic.server.serverFileUtils;
import de.anomic.server.serverMemory;
import de.anomic.server.serverObjects;
Expand Down Expand Up @@ -211,9 +211,9 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
prop.put("Xms", Xms.substring(0, Xms.length() - 1));

// other caching structures
long amount = httpc.nameCacheHitSize();
long amount = serverDomains.nameCacheHitSize();
prop.put("namecache.hit",Long.toString(amount));
amount = httpc.nameCacheNoCachingListSize();
amount = serverDomains.nameCacheNoCachingListSize();
prop.put("namecache.noCache",Long.toString(amount));
amount = plasmaSwitchboard.urlBlacklist.blacklistCacheSize();
prop.put("blacklistcache.size",Long.toString(amount));
Expand Down
10 changes: 3 additions & 7 deletions htroot/PerformanceQueues_p.html
Expand Up @@ -120,11 +120,8 @@ <h2>Performance Settings of Queues and Processes</h2>
</tr>
<tr valign="top" class="TableCellDark">
<td>Maximum number of words in cache:</td>
<td>
<input name="wordOutCacheMaxCount" type="text" size="10" maxlength="100" value="#[wordOutCacheMaxCount]#" />
</td>
<td>
<input name="wordInCacheMaxCount" type="text" size="10" maxlength="100" value="#[wordInCacheMaxCount]#" />
<td colspan="2">
<input name="wordCacheMaxCount" type="text" size="10" maxlength="100" value="#[wordCacheMaxCount]#" />
</td>
<td>
This is is the number of word indexes that shall be held in the
Expand All @@ -134,10 +131,9 @@ <h2>Performance Settings of Queues and Processes</h2>
</tr>
<tr valign="top" class="TableCellDark">
<td>Initial space of words in cache:</td>
<td>
<td colspan="2">
<input name="wordCacheInitCount" type="text" size="10" maxlength="100" value="#[wordCacheInitCount]#" />
</td>
<td>-</td>
<td>
This is is the init size of space for words in cache.
</td>
Expand Down
19 changes: 7 additions & 12 deletions htroot/PerformanceQueues_p.java
Expand Up @@ -182,16 +182,12 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
prop.put("table", c);

if ((post != null) && (post.containsKey("cacheSizeSubmit"))) {
int wordOutCacheMaxCount = post.getInt("wordOutCacheMaxCount", 20000);
switchboard.setConfig("wordCacheMaxCount", Integer.toString(wordOutCacheMaxCount));
switchboard.wordIndex.setMaxWordCount(wordOutCacheMaxCount);

int wordInCacheMaxCount = post.getInt("wordInCacheMaxCount", 1000);
switchboard.setConfig("indexDistribution.dhtReceiptLimit", Integer.toString(wordInCacheMaxCount));
switchboard.wordIndex.setInMaxWordCount(wordInCacheMaxCount);
int wordCacheMaxCount = post.getInt("wordCacheMaxCount", 20000);
switchboard.setConfig(plasmaSwitchboard.WORDCACHE_MAX_COUNT, Integer.toString(wordCacheMaxCount));
switchboard.wordIndex.setMaxWordCount(wordCacheMaxCount);

int wordCacheInitCount = post.getInt("wordCacheInitCount", 30000);
switchboard.setConfig("wordCacheInitCount", Integer.toString(wordCacheInitCount));
int wordCacheInitCount = post.getInt(plasmaSwitchboard.WORDCACHE_INIT_COUNT, 30000);
switchboard.setConfig(plasmaSwitchboard.WORDCACHE_INIT_COUNT, Integer.toString(wordCacheInitCount));

int flushsize = post.getInt("wordFlushSize", 2000);
switchboard.setConfig("wordFlushSize", Integer.toString(flushsize));
Expand Down Expand Up @@ -282,9 +278,8 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
prop.put("minAgeOfWCache", "" + (switchboard.wordIndex.minAgeOfDHTOutCache() / 1000 / 60)); // minutes
prop.put("minAgeOfKCache", "" + (switchboard.wordIndex.minAgeOfDHTInCache() / 1000 / 60)); // minutes
prop.put("maxWaitingWordFlush", switchboard.getConfig("maxWaitingWordFlush", "180"));
prop.put("wordOutCacheMaxCount", switchboard.getConfigLong("wordCacheMaxCount", 20000));
prop.put("wordInCacheMaxCount", switchboard.getConfigLong("indexDistribution.dhtReceiptLimit", 1000));
prop.put("wordCacheInitCount", switchboard.getConfigLong("wordCacheInitCount", 30000));
prop.put("wordCacheMaxCount", switchboard.getConfigLong(plasmaSwitchboard.WORDCACHE_MAX_COUNT, 20000));
prop.put("wordCacheInitCount", switchboard.getConfigLong(plasmaSwitchboard.WORDCACHE_INIT_COUNT, 30000));
prop.put("wordFlushSize", switchboard.getConfigLong("wordFlushSize", 2000));
prop.put("onlineCautionDelay", switchboard.getConfig("onlineCautionDelay", "30000"));
prop.put("onlineCautionDelayCurrent", System.currentTimeMillis() - switchboard.proxyLastAccess);
Expand Down
4 changes: 2 additions & 2 deletions htroot/Statistics.java
Expand Up @@ -50,7 +50,7 @@
import de.anomic.http.httpHeader;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverCore;
import de.anomic.server.serverDomains;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;

Expand Down Expand Up @@ -81,7 +81,7 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
if (count >= maxCount) break;
urlString = (String) map.get("key");
try { url = new URL(urlString); } catch (MalformedURLException e) { url = null; }
if ((url != null) && (serverCore.isNotLocal(url))) {
if ((url != null) && (!serverDomains.isLocal(url))) {
prop.put("page_backlinks_list_" + count + "_dark", ((dark) ? 1 : 0)); dark =! dark;
prop.put("page_backlinks_list_" + count + "_url", urlString);
prop.put("page_backlinks_list_" + count + "_date", map.get("date"));
Expand Down
3 changes: 2 additions & 1 deletion htroot/Status.java
Expand Up @@ -57,6 +57,7 @@
import de.anomic.net.URL;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverCore;
import de.anomic.server.serverDomains;
import de.anomic.server.serverDate;
import de.anomic.server.serverMemory;
import de.anomic.server.serverObjects;
Expand Down Expand Up @@ -180,7 +181,7 @@ else if (jobType.equals("globalCrawlTrigger"))
} else {
prop.put("extPortFormat",0);
}
prop.put("host", serverCore.publicLocalIP().getHostAddress());
prop.put("host", serverDomains.myPublicLocalIP().getHostAddress());

// ssl support
prop.put("sslSupport",sb.getConfig("keyStore", "").length() == 0 ? 0:1);
Expand Down
5 changes: 3 additions & 2 deletions htroot/htdocsdefault/dir.java
Expand Up @@ -69,6 +69,7 @@
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverCodings;
import de.anomic.server.serverCore;
import de.anomic.server.serverDomains;
import de.anomic.server.serverFileUtils;
import de.anomic.server.serverMemory;
import de.anomic.server.serverObjects;
Expand Down Expand Up @@ -106,9 +107,9 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
prop.put("peername", env.getConfig("peerName", "<nameless>"));
prop.put("peerdomain", env.getConfig("peerName", "<nameless>").toLowerCase());
prop.put("peeraddress", yacyCore.seedDB.mySeed.getPublicAddress());
prop.put("hostname", serverCore.publicIP());
prop.put("hostname", serverDomains.myPublicIP());
try{
prop.put("hostip", InetAddress.getByName(serverCore.publicIP()).getHostAddress());
prop.put("hostip", InetAddress.getByName(serverDomains.myPublicIP()));
}catch(UnknownHostException e){
prop.put("hostip", "Unknown Host Exception");
}
Expand Down
4 changes: 2 additions & 2 deletions htroot/index.java
Expand Up @@ -36,7 +36,7 @@
import de.anomic.net.URL;
import de.anomic.plasma.plasmaSearchQuery;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverCore;
import de.anomic.server.serverDomains;
import de.anomic.server.serverDate;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
Expand Down Expand Up @@ -86,7 +86,7 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
} catch (MalformedURLException e) {
url = null;
}
if ((url != null) && (serverCore.isNotLocal(url))) {
if ((url != null) && (!serverDomains.isLocal(url))) {
final HashMap referrerprop = new HashMap();
referrerprop.put("count", "1");
referrerprop.put("clientip", header.get(httpHeader.CONNECTION_PROP_CLIENTIP));
Expand Down
5 changes: 3 additions & 2 deletions htroot/www/welcome.java
Expand Up @@ -52,6 +52,7 @@

import de.anomic.http.httpHeader;
import de.anomic.server.serverCore;
import de.anomic.server.serverDomains;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.yacy.yacyCore;
Expand All @@ -69,9 +70,9 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
prop.put("peername", env.getConfig("peerName", "<nameless>"));
prop.put("peerdomain", env.getConfig("peerName", "<nameless>").toLowerCase());
prop.put("peeraddress", yacyCore.seedDB.mySeed.getPublicAddress());
prop.put("hostname", serverCore.publicIP());
prop.put("hostname", serverDomains.myPublicIP());
try{
prop.put("hostip", InetAddress.getByName(serverCore.publicIP()).getHostAddress());
prop.put("hostip", InetAddress.getByName(serverDomains.myPublicIP()).getHostAddress());
}catch(UnknownHostException e){
prop.put("hostip", "Unknown Host Exception");
}
Expand Down
2 changes: 1 addition & 1 deletion htroot/xml/status_p.java
Expand Up @@ -63,7 +63,7 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
prop.put("wordCacheSize", switchboard.wordIndex.dhtOutCacheSize() + switchboard.wordIndex.dhtInCacheSize());
prop.put("wordCacheWSize", switchboard.wordIndex.dhtOutCacheSize());
prop.put("wordCacheKSize", switchboard.wordIndex.dhtInCacheSize());
prop.put("wordCacheMaxCount", switchboard.getConfig("wordCacheMaxCount", "10000"));
prop.put("wordCacheMaxCount", switchboard.getConfig(plasmaSwitchboard.WORDCACHE_MAX_COUNT, "10000"));

//
// memory usage and system attributes
Expand Down
5 changes: 5 additions & 0 deletions htroot/yacy/crawlOrder.java
Expand Up @@ -191,6 +191,11 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
env.getLog().logWarning("crawlOrder: Received not normalized Referer URL " + refv.get(0) + " of URL " + urlv.get(0));
}

if (!switchboard.acceptURL(new URL(newURL))) {
env.getLog().logWarning("crawlOrder: Received URL outside of our domain: " + newURL);
return null;
}

// adding URL to noticeURL Queue
env.getLog().logFinest("crawlOrder: a: url='" + newURL + "'");

Expand Down
7 changes: 7 additions & 0 deletions htroot/yacy/crawlReceipt.java
Expand Up @@ -146,6 +146,13 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
return prop;
}

// check if the entry is in our network domain
if (!switchboard.acceptURL(comp.url())) {
log.logWarning("crawlReceipt: RECEIVED wrong RECEIPT (url outside of our domain) for hash " + entry.hash() + " from peer " + iam + "\n\tURL properties: "+ propStr);
prop.putASIS("delay", "9999");
return prop;
}

if (result.equals("fill")) try {
// put new entry into database
switchboard.wordIndex.loadedURL.store(entry);
Expand Down
12 changes: 8 additions & 4 deletions htroot/yacy/hello.java
Expand Up @@ -46,13 +46,15 @@
// javac -classpath .:../../classes hello.java
// if the shell's current path is HTROOT

import java.net.InetAddress;
import java.util.Iterator;
import java.util.Map;

import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverCore;
import de.anomic.server.serverDate;
import de.anomic.server.serverDomains;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.yacy.yacyClient;
Expand Down Expand Up @@ -95,6 +97,8 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve

// we easily know the caller's IP:
final String clientip = (String) header.get("CLIENTIP", "<unknown>"); // read an artificial header addendum
InetAddress ias = serverDomains.dnsResolve(clientip);
if (ias == null) return null;
final String userAgent = (String) header.get(httpHeader.USER_AGENT, "<unknown>");
final String reportedip = remoteSeed.get(yacySeed.IP, "");
final String reportedPeerType = remoteSeed.get(yacySeed.PEERTYPE, yacySeed.PEERTYPE_JUNIOR);
Expand Down Expand Up @@ -126,10 +130,10 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
boolean isNotLocal = true;

// we are only allowed to connect to the client IP address if it's not our own address
if(serverCore.portForwardingEnabled || serverCore.useStaticIP)
isNotLocal = serverCore.isNotLocal(clientip);

if(isNotLocal) {
if (serverCore.portForwardingEnabled || serverCore.useStaticIP) {
isNotLocal = !ias.isSiteLocalAddress();
}
if (isNotLocal) {
serverCore.checkInterruption();

prop.putASIS("yourip", clientip);
Expand Down
2 changes: 1 addition & 1 deletion htroot/yacy/transferRWI.java
Expand Up @@ -107,7 +107,7 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
granted = false; // don't accept more words if there are too many words to flush
result = "busy";
pause = 60000;
} /* else if ((checkLimit && sb.wordIndex.dhtOutCacheSize() > sb.getConfigLong("wordCacheMaxCount", 20000)) || ((sb.wordIndex.busyCacheFlush) && (!shortCacheFlush))) {
} /* else if ((checkLimit && sb.wordIndex.dhtOutCacheSize() > sb.getConfigLong(plasmaSwitchboard.WORDCACHE_MAX_COUNT, 20000)) || ((sb.wordIndex.busyCacheFlush) && (!shortCacheFlush))) {
// we are too busy flushing the ramCache to receive indexes
sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". We are too busy (wordcachesize=" + sb.wordIndex.dhtOutCacheSize() + ").");
granted = false; // don't accept more words if there are too many words to flush
Expand Down
12 changes: 12 additions & 0 deletions htroot/yacy/transferURL.java
Expand Up @@ -109,26 +109,30 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
urls = (String) post.get("url" + i);
if (urls == null) {
yacyCore.log.logFine("transferURL: got null URL-string from peer " + otherPeerName);
blocked++;
continue;
}

// parse new lurl-entry
lEntry = sb.wordIndex.loadedURL.newEntry(urls);
if (lEntry == null) {
yacyCore.log.logWarning("transferURL: received invalid URL (entry null) from peer " + otherPeerName + "\n\tURL Property: " + urls);
blocked++;
continue;
}

// check if entry is well-formed
indexURLEntry.Components comp = lEntry.comp();
if (comp.url() == null) {
yacyCore.log.logWarning("transferURL: received invalid URL from peer " + otherPeerName + "\n\tURL Property: " + urls);
blocked++;
continue;
}

// check whether entry is too old
if (lEntry.freshdate().getTime() <= freshdate) {
yacyCore.log.logFine("transerURL: received too old URL from peer " + otherPeerName + ": " + lEntry.freshdate());
blocked++;
continue;
}

Expand All @@ -141,6 +145,14 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
continue;
}

// check if the entry is in our network domain
if (!sb.acceptURL(comp.url())) {
yacyCore.log.logFine("transferURL: blocked URL outside of our domain '" + comp.url().toNormalform(false, true) + "' from peer " + otherPeerName);
lEntry = null;
blocked++;
continue;
}

// write entry to database
try {
sb.wordIndex.loadedURL.store(lEntry);
Expand Down
3 changes: 2 additions & 1 deletion htroot/yacysearch.java
Expand Up @@ -74,6 +74,7 @@
import de.anomic.plasma.plasmaURL;
import de.anomic.plasma.plasmaSearchResults;
import de.anomic.server.serverCore;
import de.anomic.server.serverDomains;
import de.anomic.server.serverDate;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
Expand Down Expand Up @@ -111,7 +112,7 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
if (referer != null) {
URL url;
try { url = new URL(referer); } catch (MalformedURLException e) { url = null; }
if ((url != null) && (serverCore.isNotLocal(url))) {
if ((url != null) && (!serverDomains.isLocal(url))) {
final HashMap referrerprop = new HashMap();
referrerprop.put("count", "1");
referrerprop.put("clientip", header.get("CLIENTIP"));
Expand Down

0 comments on commit 9ca46a8

Please sign in to comment.