Skip to content

Commit

Permalink
- added flag to urlhash that shows that domain is a local domain
Browse files Browse the repository at this point in the history
- enhanced local domain detection
- bugfixing for memory assignment in kelondroFlexSplit
- automatic memory assignment to caches according to available RAM
- bugfixes for details during search process

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2924 6c8d7289-2bf4-0310-a012-ef5d649a1542
  • Loading branch information
orbiter committed Nov 6, 2006
1 parent eafb5ec commit 114a76a
Show file tree
Hide file tree
Showing 11 changed files with 148 additions and 86 deletions.
35 changes: 6 additions & 29 deletions htroot/yacy/hello.java
Expand Up @@ -46,11 +46,9 @@
// javac -classpath .:../../classes hello.java
// if the shell's current path is HTROOT

import java.net.InetAddress;
import java.util.Date;

import de.anomic.http.httpHeader;
import de.anomic.http.httpc;
import de.anomic.server.serverCore;
import de.anomic.server.serverDate;
import de.anomic.server.serverObjects;
Expand Down Expand Up @@ -109,35 +107,14 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve

// if the previous attempt (using the reported ip address) was not successful, try the ip where
// the request came from
if (urls < 0) {
boolean isLocalIP = false;
if (serverCore.portForwardingEnabled || serverCore.useStaticIP) {
try {
final InetAddress clientAddress = httpc.dnsResolve(clientip);
if (clientAddress != null) {
if (clientAddress.isAnyLocalAddress() || clientAddress.isLoopbackAddress()) {
isLocalIP = true;
} else {
final InetAddress[] localAddress = InetAddress.getAllByName(InetAddress.getLocalHost().getHostName());
for (i = 0; i < localAddress.length; i++) {
if (localAddress[i].equals(clientAddress)) {
isLocalIP = true;
break;
}
}
}
}
} catch (Exception e) {}
}

if ((urls < 0) && (serverCore.portForwardingEnabled || serverCore.useStaticIP) && (serverCore.isNotLocal(clientip))) {
// we are only allowed to connect to the client IP address if it's not our own address
if (!isLocalIP) {
serverCore.checkInterruption();

serverCore.checkInterruption();

prop.put(yacySeed.YOURIP, clientip);
remoteSeed.put(yacySeed.IP, clientip);
urls = yacyClient.queryUrlCount(remoteSeed);
}
prop.put(yacySeed.YOURIP, clientip);
remoteSeed.put(yacySeed.IP, clientip);
urls = yacyClient.queryUrlCount(remoteSeed);
}

// System.out.println("YACYHELLO: YOUR IP=" + clientip);
Expand Down
9 changes: 5 additions & 4 deletions source/de/anomic/http/httpc.java
Expand Up @@ -423,11 +423,8 @@ public boolean isClosed() {
*/
public static InetAddress dnsResolve(String host) {
if ((host == null)||(host.length() == 0)) return null;
host = host.toLowerCase().trim();
host = host.toLowerCase().trim();

// flushing old entries before accsessing
flushNameCacheHit();

// trying to resolve host by doing a name cache lookup
InetAddress ip = (InetAddress) nameCacheHit.get(host);
if (ip != null) return ip;
Expand Down Expand Up @@ -456,6 +453,10 @@ public static InetAddress dnsResolve(String host) {
}

if (doCaching) {
// remove old entries
flushNameCacheHit();

// add new entries
synchronized (nameCacheHit) {
nameCacheHit.put(ip.getHostName(), ip);
nameCacheAges.setScore(ip.getHostName(), intTime(System.currentTimeMillis()));
Expand Down
4 changes: 2 additions & 2 deletions source/de/anomic/index/indexContainer.java
Expand Up @@ -285,7 +285,7 @@ public static indexContainer joinConstructive(indexContainer i1, indexContainer

private static indexContainer joinConstructiveByTest(indexContainer small, indexContainer large, long time, int maxDistance) {
System.out.println("DEBUG: JOIN METHOD BY TEST");
assert small.rowdef.equals(large);
assert small.rowdef.equals(large.rowdef) : "small = " + small.rowdef.toString() + "; large = " + large.rowdef.toString();
indexContainer conj = new indexContainer(null, small.rowdef); // start with empty search result
Iterator se = small.entries();
indexEntry ie0, ie1;
Expand All @@ -304,7 +304,7 @@ private static indexContainer joinConstructiveByTest(indexContainer small, index

private static indexContainer joinConstructiveByEnumeration(indexContainer i1, indexContainer i2, long time, int maxDistance) {
System.out.println("DEBUG: JOIN METHOD BY ENUMERATION");
assert i1.rowdef.equals(i2);
assert i1.rowdef.equals(i2.rowdef) : "i1 = " + i1.rowdef.toString() + "; i2 = " + i2.rowdef.toString();
indexContainer conj = new indexContainer(null, i1.rowdef); // start with empty search result
if (!((i1.order().signature().equals(i2.order().signature())) &&
(i1.primarykey() == i2.primarykey()))) return conj; // ordering must be equal
Expand Down
53 changes: 35 additions & 18 deletions source/de/anomic/index/indexURL.java
Expand Up @@ -40,6 +40,7 @@
import de.anomic.net.URL;
import de.anomic.server.serverByteBuffer;
import de.anomic.server.serverCodings;
import de.anomic.server.serverCore;
import de.anomic.yacy.yacySeedDB;

public class indexURL {
Expand Down Expand Up @@ -133,6 +134,7 @@ public class indexURL {
// and culturally close to europe
"AD=Andorra",
"AL=Albania",
"AQ=Antarctica",
"AT=Austria",
"BA=Bosnia and Herzegovina",
"BE=Belgium",
Expand Down Expand Up @@ -190,6 +192,7 @@ public class indexURL {
"IQ=Iraq",
"IR=Iran",
"PK=Pakistan",
"TR=Turkey",
"YE=Yemen"
};
private static final String[] TLD_SouthEastAsia = {
Expand Down Expand Up @@ -270,10 +273,7 @@ public class indexURL {
"PRO=",
"ARPA=",
"INT=International",
"ARPA=Arpanet"
};
private static final String[] TLD_Unassigned = {
"AQ=Antarctica",
"ARPA=Arpanet",
"NT=Neutral Zone"
};

Expand Down Expand Up @@ -349,7 +349,6 @@ public class indexURL {
"TM=Turkmenistan",
"TO=Tonga",
"TP=East Timor",
"TR=Turkey",
"TT=Trinidad and Tobago",
"TV=Tuvalu",
"TW=Taiwan",
Expand Down Expand Up @@ -406,7 +405,7 @@ private static void insertTLDProps(String[] TLDList, int id) {
insertTLDProps(TLD_NorthAmericaOceania, 4);
insertTLDProps(TLD_Africa, 5);
insertTLDProps(TLD_Generic, 6);
insertTLDProps(TLD_Unassigned, 7);
// the id=7 is used to flag local addresses
}


Expand Down Expand Up @@ -479,8 +478,8 @@ public static final String urlHash(URL url) {
tld = host.substring(p + 1);
dom = host.substring(0, p);
}
Integer ID = (Integer) TLDID.get(tld);
int id = (ID == null) ? 7 : ID.intValue();
Integer ID = (serverCore.isNotLocal(tld)) ? (Integer) TLDID.get(tld) : null; // identify local addresses
int id = (ID == null) ? 7 : ID.intValue(); // local addresses are flagged with id=7
boolean isHTTP = url.getProtocol().equals("http");
p = dom.lastIndexOf('.'); // locate subdomain
String subdom = "";
Expand All @@ -506,18 +505,23 @@ public static final String urlHash(URL url) {
if (p > 0) {
rootpath = path.substring(0, p);
}

// we collected enough information to compute the fragments that are basis for hashes
int l = dom.length();
int domlengthKey = (l <= 8) ? 0 : (l <= 12) ? 1 : (l <= 16) ? 2 : 3;
byte flagbyte = (byte) (((isHTTP) ? 0 : 32) | (id << 2) | domlengthKey);

// combine the attributes
StringBuffer hash = new StringBuffer(12);
// form the 'local' part of the hash
String hash3 = kelondroBase64Order.enhancedCoder.encode(serverCodings.encodeMD5Raw(url.toNormalform())).substring(0, 5);
char hash2 = subdomPortPath(subdom, port, rootpath);
hash.append(kelondroBase64Order.enhancedCoder.encode(serverCodings.encodeMD5Raw(url.toNormalform())).substring(0, 5)); // 5 chars
hash.append(subdomPortPath(subdom, port, rootpath)); // 1 char
// form the 'global' part of the hash
String hash1 = protocolHostPort(url.getProtocol(), host, port);
char hash0 = kelondroBase64Order.enhancedCoder.encodeByte(flagbyte);
// combine the hashes
return hash3 + hash2 + hash1 + hash0;
hash.append(protocolHostPort(url.getProtocol(), host, port)); // 5 chars
hash.append(kelondroBase64Order.enhancedCoder.encodeByte(flagbyte)); // 1 char

// return result hash
return new String(hash);
}

private static char subdomPortPath(String subdom, int port, String rootpath) {
Expand Down Expand Up @@ -557,6 +561,8 @@ public static final boolean isWordRootURL(String givenURLHash, String word) {

public static final int domLengthEstimation(String urlHash) {
// generates an estimation of the original domain length
assert (urlHash != null);
assert (urlHash.length() == 12) : "urlhash = " + urlHash;
int flagbyte = kelondroBase64Order.enhancedCoder.decodeByte(urlHash.charAt(11));
int domLengthKey = flagbyte & 3;
switch (domLengthKey) {
Expand All @@ -568,9 +574,21 @@ public static final int domLengthEstimation(String urlHash) {
return 20;
}

public static int domLengthNormalized(String urlHash) {
return 255 * domLengthEstimation(urlHash) / 30;
}
public static int domLengthNormalized(String urlHash) {
return 255 * domLengthEstimation(urlHash) / 30;
}

public static final int domDomain(String urlHash) {
// returns the ID of the domain of the domain
assert (urlHash != null);
assert (urlHash.length() == 12) : "urlhash = " + urlHash;
int flagbyte = kelondroBase64Order.enhancedCoder.decodeByte(urlHash.charAt(11));
return (flagbyte & 12) >> 2;
}

public static boolean isGlobalDomain(String urlhash) {
return domDomain(urlhash) != 7;
}

public static final String oldurlHash(URL url) {
if (url == null) return null;
Expand All @@ -583,7 +601,6 @@ public static final String oldurlHash(String url) throws MalformedURLException {
String hash = kelondroBase64Order.enhancedCoder.encode(serverCodings.encodeMD5Raw(new URL(url).toNormalform())).substring(0, urlHashLength);
return hash;
}


public static final serverByteBuffer compressIndex(indexContainer inputContainer, indexContainer excludeContainer, long maxtime) {
// collect references according to domains
Expand Down
4 changes: 2 additions & 2 deletions source/de/anomic/kelondro/kelondroBase64Order.java
Expand Up @@ -298,9 +298,9 @@ public final int compares(byte[] a, int aoffset, int alength, byte[] b, int boff
bc = b[boffset + i];
assert (bc >= 0) && (bc < 128) : "bc = " + bc + ", b = " + serverLog.arrayList(b, boffset, len);
acc = ahpla[ac];
assert (acc >= 0) : "acc = " + acc + ", a = " + serverLog.arrayList(a, aoffset, len) + ", aoffset = 0x" + Integer.toHexString(aoffset) + serverLog.table(a, 16, aoffset);
assert (acc >= 0) : "acc = " + acc + ", a = " + serverLog.arrayList(a, aoffset, len) + ", aoffset = 0x" + Integer.toHexString(aoffset) + ", i = " + i + "\n" + serverLog.table(a, 16, aoffset);
bcc = ahpla[bc];
assert (bcc >= 0) : "bcc = " + bcc + ", b = " + serverLog.arrayList(b, boffset, len) + ", boffset = 0x" + Integer.toHexString(boffset) + serverLog.table(b, 16, boffset);
assert (bcc >= 0) : "bcc = " + bcc + ", b = " + serverLog.arrayList(b, boffset, len) + ", boffset = 0x" + Integer.toHexString(boffset) + ", i = " + i + "\n" + serverLog.table(b, 16, boffset);
if (acc > bcc) return 1;
if (acc < bcc) return -1;
// else the bytes are equal and it may go on yet undecided
Expand Down
2 changes: 2 additions & 0 deletions source/de/anomic/kelondro/kelondroCache.java
Expand Up @@ -353,6 +353,7 @@ public synchronized Entry put(Entry row) throws IOException {
// remove entry from miss- and hit-cache
if (readMissCache != null) {
if (readMissCache.remove(key) != null) {
this.hasnotHit++;
// the entry does not exist before
if (writeBufferUnique != null) {
// since we know that the entry does not exist, we know that new
Expand Down Expand Up @@ -547,6 +548,7 @@ public synchronized Entry remove(byte[] key) throws IOException {
if (dummy == null) {
this.hasnotUnique++;
} else {
this.hasnotHit++;
this.hasnotDouble++;
return null;
}
Expand Down
53 changes: 38 additions & 15 deletions source/de/anomic/kelondro/kelondroFlexSplitTable.java
Expand Up @@ -60,31 +60,54 @@ public kelondroFlexSplitTable(File path, String tablename, long buffersize, long

// first pass: find tables
HashMap t = new HashMap(); // file/Integer(size) relation
int size, sum = 0;
long ram, sum = 0;
for (int i = 0; i < dir.length; i++) {
if ((dir[i].startsWith(tablename)) &&
(dir[i].charAt(tablename.length()) == '.') &&
(dir[i].length() == tablename.length() + 7)) {
size = kelondroFlexTable.staticSize(path, dir[i]);
if (size > 0) {
t.put(dir[i], new Integer(size));
sum += size;
ram = kelondroFlexTable.staticRAMIndexNeed(path, dir[i], rowdef);
if (ram > 0) {
t.put(dir[i], new Long(ram));
sum += ram;
}
}
}

// second pass: open tables
Iterator i = t.entrySet().iterator();
Iterator i;
Map.Entry entry;
String f;
long bs;
while (i.hasNext()) {
entry = (Map.Entry) i.next();
f = (String) entry.getKey();
size = ((Integer) entry.getValue()).intValue();
date = f.substring(tablename.length() + 1);
bs = buffersize * size / sum;
tables.put(date, new kelondroCache(new kelondroFlexTable(path, f, bs / 2, preloadTime, rowdef, objectOrder), bs / 2, true, false));
String f, maxf;
long maxram;
kelondroIndex table;
while (t.size() > 0) {
// find maximum table
maxram = 0;
maxf = null;
i = t.entrySet().iterator();
while (i.hasNext()) {
entry = (Map.Entry) i.next();
f = (String) entry.getKey();
ram = ((Long) entry.getValue()).longValue();
if (ram > maxram) {
maxf = f;
maxram = ram;
}
}

// open next biggest table
t.remove(maxf);
date = maxf.substring(tablename.length() + 1);
if (maxram <= sum) {
// this will cause usage of a complete RAM index
table = new kelondroCache(new kelondroFlexTable(path, maxf, maxram, preloadTime, rowdef, objectOrder), maxram / 10, true, false);
sum -= maxram;
sum -= maxram / 10;
} else {
// this will cause a generation of a file index
table = new kelondroFlexTable(path, maxf, sum / (t.size() + 1), preloadTime, rowdef, objectOrder);
sum -= sum / (t.size() + 1);
}
tables.put(date, table);
}
}

Expand Down
14 changes: 9 additions & 5 deletions source/de/anomic/kelondro/kelondroFlexTable.java
Expand Up @@ -42,7 +42,7 @@ public kelondroFlexTable(File path, String tablename, long buffersize, long prel
// if the ram is not sufficient, a tree file is generated
// if, and only if a tree file exists, the preload time is applied
super(path, tablename, rowdef);
long neededRAM = (super.row().column(0).cellwidth() + 4) * 12 / 10 * super.size();
long neededRAM = (super.row().column(0).cellwidth() + 4) * super.size();

File newpath = new File(path, tablename);
File indexfile = new File(newpath, "col.000.index");
Expand Down Expand Up @@ -78,10 +78,10 @@ public kelondroFlexTable(File path, String tablename, long buffersize, long prel
System.out.println("*** Using File index " + indexfile);
ki = new kelondroCache(kelondroTree.open(indexfile, buffersize / 3 * 2, preloadTime, treeIndexRow(rowdef.width(0)), objectOrder, 2, 80), buffersize / 3, true, false);
RAMIndex = false;
} else if ((preloadTime >= 0) && (stt > preloadTime)) {
} else {
// generate new index file
System.out.print("*** Generating File index for " + size() + " entries from " + indexfile);
System.out.print("*** Cause: too less RAM configured. Assign at least " + neededRAM + " bytes buffersize to enable a RAM index.");
System.out.println("*** Generating File index for " + size() + " entries from " + indexfile);
System.out.println("*** Cause: too less RAM (" + (buffersize / 1024 / 1024) + " MB) configured. Assign at least " + (neededRAM / 1024 / 1024) + " MB buffersize to enable a RAM index.");
ki = initializeTreeIndex(indexfile, buffersize, preloadTime, objectOrder);

System.out.println(" -done-");
Expand All @@ -99,6 +99,10 @@ public static int staticSize(File path, String tablename) {
return kelondroFlexWidthArray.staticsize(path, tablename);
}

public static int staticRAMIndexNeed(File path, String tablename, kelondroRow rowdef) {
return (rowdef.column(0).cellwidth() + 4) * staticSize(path, tablename);
}

public boolean hasRAMIndex() {
return RAMIndex;
}
Expand Down Expand Up @@ -157,7 +161,7 @@ private kelondroIndex initializeTreeIndex(File indexfile, long buffersize, long
treeindex.addUnique(indexentry);
c++;
if (System.currentTimeMillis() - last > 30000) {
System.out.println(".. generated " + c+ " entries, " + ((System.currentTimeMillis() - start) / c * (all - c) / 60000) + " minutes remaining");
System.out.println(".. generated " + c + "/" + all + " entries, " + ((System.currentTimeMillis() - start) / c * (all - c) / 60000) + " minutes remaining");
System.out.flush();
last = System.currentTimeMillis();
}
Expand Down

0 comments on commit 114a76a

Please sign in to comment.