Skip to content

Commit

Permalink
enhanced indexing-caching
Browse files Browse the repository at this point in the history
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@107 6c8d7289-2bf4-0310-a012-ef5d649a1542
  • Loading branch information
orbiter committed May 11, 2005
1 parent 0cfe94b commit ea478f3
Show file tree
Hide file tree
Showing 7 changed files with 100 additions and 33 deletions.
10 changes: 10 additions & 0 deletions htroot/Performance_p.html
Expand Up @@ -81,6 +81,13 @@ <h2>Performance</h2>
If this is a big number, it shows that the caching works efficiently.
</td>
</tr>
<tr valign="top" class="TableCellDark">
<td class=small>Singletons Cache Size:</td>
<td class=small>#[singletonsSize]#</td>
<td class=small>
The Singletons Cache is a database that holds words that occurred only once.
</td>
</tr>
<tr valign="top" class="TableCellDark">
<td class=small>Maximum number of Word Caches:</td>
<td class=small><input name="wordCacheMax" type="text" size="20" maxlength="100" value="#[wordCacheMax]#"></td>
Expand All @@ -90,6 +97,7 @@ <h2>Performance</h2>
flushed to disc; this may last some minutes.
</td>
</tr>
<!--
<tr valign="top" class="TableCellDark">
<td class=small>Maximum waitingtime to flush word cache<br>during shut-down (seconds):</td>
<td class=small><input name="maxWaitingWordFlush" type="text" size="6" maxlength="6" value="#[maxWaitingWordFlush]#"></td>
Expand All @@ -99,11 +107,13 @@ <h2>Performance</h2>
time are lost.
</td>
</tr>
-->
<tr valign="top" class="TableCellLight">
<td class=small colspan="3"><input type="submit" name="cacheSizeSubmit" value="Enter New Cache Size">
Changes take effect immediately</td>
</td>
</tr>

</form>
</table>
</p>
Expand Down
1 change: 1 addition & 0 deletions htroot/Performance_p.java
Expand Up @@ -146,6 +146,7 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
prop.put("maxURLinWordCache", "" + switchboard.wordIndex.maxURLinWordCache());
prop.put("maxWaitingWordFlush", switchboard.getConfig("maxWaitingWordFlush", "180"));
prop.put("wordCacheMax", switchboard.getConfig("wordCacheMax", "10000"));
prop.put("singletonsSize", switchboard.wordIndex.singletonsSize());

// return rewrite values for templates
return prop;
Expand Down
36 changes: 31 additions & 5 deletions source/de/anomic/kelondro/kelondroMScoreCluster.java
Expand Up @@ -44,6 +44,7 @@
import java.text.SimpleDateFormat;
import java.util.Iterator;
import java.util.TreeMap;
import java.util.Map;

public class kelondroMScoreCluster {

Expand Down Expand Up @@ -243,21 +244,22 @@ public Object[] getScores(int maxCount, boolean up, int minScore, int maxScore)
}

public Iterator scores(boolean up) {
return scores(up, Integer.MIN_VALUE, Integer.MAX_VALUE);
if (up) return new simpleScoreIterator();
else return scores(false, Integer.MIN_VALUE, Integer.MAX_VALUE);
}

public Iterator scores(boolean up, int minScore, int maxScore) {
return new scoreIterator(up, minScore, maxScore);
return new komplexScoreIterator(up, minScore, maxScore);
}

private class scoreIterator implements Iterator {
private class komplexScoreIterator implements Iterator {

boolean up;
TreeMap keyrefDBcopy;
Object n;
int min, max;

public scoreIterator(boolean up, int minScore, int maxScore) {
public komplexScoreIterator(boolean up, int minScore, int maxScore) {
this.up = up;
this.min = minScore;
this.max = maxScore;
Expand Down Expand Up @@ -299,7 +301,31 @@ public void remove() {

}

public static void main(String[] args) {
private class simpleScoreIterator implements Iterator {

Iterator ii;
Map.Entry entry;

public simpleScoreIterator() {
ii = keyrefDB.entrySet().iterator();
}

public boolean hasNext() {
return ii.hasNext();
}

public Object next() {
entry = (Map.Entry) ii.next();
return entry.getValue();
}

public void remove() {
ii.remove();
}

}

public static void main(String[] args) {
System.out.println("Test for Score: start");
kelondroMScoreCluster s = new kelondroMScoreCluster();
int c = 0;
Expand Down
6 changes: 5 additions & 1 deletion source/de/anomic/plasma/plasmaWordIndex.java
Expand Up @@ -65,7 +65,7 @@ public class plasmaWordIndex {
public plasmaWordIndex(File databaseRoot, int bufferkb, serverLog log) throws IOException {
this.databaseRoot = databaseRoot;
plasmaWordIndexClassicDB fileDB = new plasmaWordIndexClassicDB(databaseRoot, log);
this.ramCache = new plasmaWordIndexCache(databaseRoot, fileDB, 1000000, log);
this.ramCache = new plasmaWordIndexCache(databaseRoot, fileDB, bufferkb, log);
}

public int maxURLinWordCache() {
Expand All @@ -76,6 +76,10 @@ public int wordCacheRAMSize() {
return ramCache.wordCacheRAMSize();
}

public int singletonsSize() {
return ramCache.singletonsSize();
}

public void setMaxWords(int maxWords) {
ramCache.setMaxWords(maxWords);
}
Expand Down
69 changes: 48 additions & 21 deletions source/de/anomic/plasma/plasmaWordIndexCache.java
Expand Up @@ -82,11 +82,11 @@ public class plasmaWordIndexCache implements plasmaWordIndexInterface {
for (int i = 0; i < yacySeedDB.commonHashLength; i++) maxKey += '-';
}

public plasmaWordIndexCache(File databaseRoot, plasmaWordIndexInterface backend, long singletonBufferSize, serverLog log) {
public plasmaWordIndexCache(File databaseRoot, plasmaWordIndexInterface backend, int singletonbufferkb, serverLog log) {
// creates a new index cache
// the cache has a back-end where indexes that do not fit in the cache are flushed
this.databaseRoot = databaseRoot;
this.singletonBufferSize = singletonBufferSize;
this.singletonBufferSize = singletonbufferkb * 1024;
this.cache = new TreeMap();
this.hashScore = new kelondroMScoreCluster();
this.hashDate = new HashMap();
Expand Down Expand Up @@ -132,7 +132,7 @@ private void dump(int waitingSeconds) throws IOException {
long wordsPerSecond = 0, wordcount = 0, urlcount = 0;
synchronized (cache) {
//Iterator i = cache.entrySet().iterator();
Iterator i = hashScore.scores(false);
Iterator i = hashScore.scores(true);
//Map.Entry entry;
String wordHash;
plasmaWordIndexEntryContainer container;
Expand Down Expand Up @@ -318,6 +318,10 @@ public int wordCacheRAMSize() {
return cache.size();
}

public int singletonsSize() {
return singletons.size();
}

public void setMaxWords(int maxWords) {
this.maxWords = maxWords;
}
Expand All @@ -341,7 +345,14 @@ public Iterator wordHashes(String startWordHash, boolean up) {
true);
}

private int flushFromMem(String key) {
private int flushFromMem(String key, boolean reintegrate) {
// this method flushes indexes out from the ram to the disc.
// at first we check the singleton database and act accordingly
// if we we are to flush an index, but see also an entry in the singletons, we
// decide upn the 'reintegrate'-Flag:
// true: do not flush to disc, but re-Integrate the singleton to the RAM
// false: flush the singleton together with container to disc

plasmaWordIndexEntryContainer container = null;
long time;
synchronized (cache) {
Expand All @@ -358,12 +369,13 @@ private int flushFromMem(String key) {
// now decide where to flush that container
Object[] singleton = readSingleton(key);
if (singleton == null) {
// not found in singletons
if (container.size() == 1) {
// store to singleton
// it is a singleton: store to singleton
storeSingleton(key, container.getOne(), time);
return 1;
} else {
// store to back-end
// store to back-end; this should be a rare case
return backend.addEntries(container, time);
}
} else {
Expand All @@ -376,17 +388,28 @@ private int flushFromMem(String key) {
// it is superfluous to flush this, simple do nothing
return 0;
} else {
// we flush to the backend, but remove the entry from the singletons
// we flush to the backend, and the entry from the singletons
removeSingleton(key);
return backend.addEntries(container, java.lang.Math.max(time, oldTime));
}
} else {
// now we have more than one entry,
// now we have more than one entry
// we must remove the key from the singleton database
removeSingleton(key);
// add this to the backend
// .. and put it to the container
container.add(oldEntry);
return backend.addEntries(container, java.lang.Math.max(time, oldTime));
if (reintegrate) {
// put singleton together with container back to ram
synchronized (cache) {
cache.put(key, container);
hashScore.setScore(key, container.size());
hashDate.put(key, new Long(time));
}
return -1;
} else {
// add this to the backend
return backend.addEntries(container, java.lang.Math.max(time, oldTime));
}
}
}
}
Expand Down Expand Up @@ -441,31 +464,35 @@ private int flushFromMemToLimit() {
break;
}
//log.logDebug("flushing high-key " + key + ", count=" + count + ", cachesize=" + cache.size() + ", singleton-size=" + singletons.size());
total += flushFromMem(key);
total += flushFromMem(key, false);
}

// flush singletons
while ((total < 200) && (hashScore.size() >= maxWords)) {
key = (String) hashScore.getMinObject();
Iterator i = hashScore.scores(true);
ArrayList al = new ArrayList();
while ((i.hasNext()) && (total < 200)) {
key = (String) i.next();
createTime = (Long) hashDate.get(key);
count = hashScore.getScore(key);
if (count > 1) {
//log.logDebug("flush of singleton-key " + key + ": count too high (count=" + count + ")");
break;
}
if ((createTime != null) && ((System.currentTimeMillis() - createTime.longValue()) < 9000)) {
//log.logDebug("singleton-key " + key + " is too fresh, interruptiong flush (count=" + count + ", cachesize=" + cache.size() + ", singleton-size=" + singletons.size() + ")");
break;
if ((createTime != null) && ((System.currentTimeMillis() - createTime.longValue()) < 90000)) {
//log.logDebug("singleton-key " + key + " is too fresh, interrupting flush (count=" + count + ", cachesize=" + cache.size() + ", singleton-size=" + singletons.size() + ")");
continue;
}
//log.logDebug("flushing singleton-key " + key + ", count=" + count + ", cachesize=" + cache.size() + ", singleton-size=" + singletons.size());
total += flushFromMem(key);
al.add(key);
total++;
}
for (int k = 0; k < al.size(); k++) flushFromMem((String) al.get(k), true);
}
return total;
}

public plasmaWordIndexEntity getIndex(String wordHash, boolean deleteIfEmpty) {
flushFromMem(wordHash);
flushFromMem(wordHash, false);
flushFromSingleton(wordHash);
return backend.getIndex(wordHash, deleteIfEmpty);
}
Expand All @@ -486,13 +513,13 @@ public void deleteIndex(String wordHash) {
backend.deleteIndex(wordHash);
}

public int removeEntries(String wordHash, String[] urlHashes, boolean deleteComplete) {
flushFromMem(wordHash);
public synchronized int removeEntries(String wordHash, String[] urlHashes, boolean deleteComplete) {
flushFromMem(wordHash, false);
flushFromSingleton(wordHash);
return backend.removeEntries(wordHash, urlHashes, deleteComplete);
}

public int addEntries(plasmaWordIndexEntryContainer container, long creationTime) {
public synchronized int addEntries(plasmaWordIndexEntryContainer container, long creationTime) {
//serverLog.logDebug("PLASMA INDEXING", "addEntryToIndexMem: cache.size=" + cache.size() + "; hashScore.size=" + hashScore.size());
flushFromMemToLimit();
//if (flushc > 0) serverLog.logDebug("PLASMA INDEXING", "addEntryToIndexMem - flushed " + flushc + " entries");
Expand Down
9 changes: 4 additions & 5 deletions source/de/anomic/yacy/yacySearch.java
Expand Up @@ -101,16 +101,15 @@ private static yacySeed[] selectPeers(Set wordhashes, int seedcount) {
int c;
while (i.hasNext()) {
dhtEnum = yacyCore.dhtAgent.getDHTSeeds(true, (String) i.next());
c = seedcount;
while ((dhtEnum.hasMoreElements()) && (c > 0)) {
c = 0;
while ((dhtEnum.hasMoreElements()) && (c < seedcount)) {
seed = (yacySeed) dhtEnum.nextElement();
ranking.addScore(seed.hash, c);
c--;
ranking.addScore(seed.hash, c++);
}
}
if (ranking.size() < seedcount) seedcount = ranking.size();
yacySeed[] result = new yacySeed[seedcount];
Iterator e = ranking.scores(false);
Iterator e = ranking.scores(true);
c = 0;
while ((e.hasNext()) && (c < result.length))
result[c++] = yacyCore.seedDB.getConnected((String) e.next());
Expand Down
2 changes: 1 addition & 1 deletion yacy.parser
@@ -1,2 +1,2 @@
#plasmaParser configuration file
#Wed May 11 17:48:25 CEST 2005
#Thu May 12 01:40:28 CEST 2005

0 comments on commit ea478f3

Please sign in to comment.