From 2a9d868f6dd0eaaf7294edabb78c13c9e5327e6f Mon Sep 17 00:00:00 2001 From: orbiter Date: Tue, 24 Oct 2006 13:48:16 +0000 Subject: [PATCH] - removed object cache from kelondroTree - generalized object caching and added new object caching class - added object caching wherever kelondroTree was used - added object caching also to usage of kelondroFlex - added object buffering (a write cache) to NURLs - added many assert statements; fixed bugs here and there - added missing close methods to latest added classes git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2858 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/PerformanceMemory_p.java | 4 +- htroot/PerformanceQueues_p.html | 4 +- source/dbtest.java | 20 ++- .../anomic/kelondro/kelondroBase64Order.java | 4 +- .../kelondro/kelondroBufferedIndex.java | 27 +++- .../anomic/kelondro/kelondroBytesIntMap.java | 14 ++ .../anomic/kelondro/kelondroCachedIndex.java | 151 ++++++++++++++++++ .../kelondro/kelondroCollectionIndex.java | 2 +- source/de/anomic/kelondro/kelondroDyn.java | 125 ++++++--------- .../de/anomic/kelondro/kelondroDynTree.java | 8 +- .../kelondro/kelondroFlexSplitTable.java | 20 +++ .../de/anomic/kelondro/kelondroFlexTable.java | 35 +++- source/de/anomic/kelondro/kelondroIndex.java | 8 + source/de/anomic/kelondro/kelondroMap.java | 11 +- .../de/anomic/kelondro/kelondroMapTable.java | 18 +-- .../anomic/kelondro/kelondroObjectCache.java | 36 +---- .../de/anomic/kelondro/kelondroRAMIndex.java | 19 +++ .../de/anomic/kelondro/kelondroRecords.java | 34 +++- .../kelondro/kelondroRowCollection.java | 7 + source/de/anomic/kelondro/kelondroRowSet.java | 25 +++ .../anomic/kelondro/kelondroSplittedTree.java | 22 ++- source/de/anomic/kelondro/kelondroTree.java | 106 ++++-------- .../plasma/dbImport/AssortmentImporter.java | 8 +- .../plasma/dbImport/plasmaDbImporter.java | 8 +- source/de/anomic/plasma/plasmaCrawlEURL.java | 2 +- source/de/anomic/plasma/plasmaCrawlLURL.java | 7 +- source/de/anomic/plasma/plasmaCrawlNURL.java | 7 +- .../de/anomic/plasma/plasmaCrawlStacker.java | 10 +- .../de/anomic/plasma/plasmaSwitchboard.java | 7 +- source/de/anomic/plasma/plasmaWordIndex.java | 4 +- .../plasma/plasmaWordIndexAssortment.java | 37 +++-- .../plasmaWordIndexAssortmentCluster.java | 26 ++- .../de/anomic/plasma/plasmaWordIndexFile.java | 4 +- .../de/anomic/server/logging/serverLog.java | 23 +++ source/de/anomic/server/serverObjects.java | 1 - .../de/anomic/server/servletProperties.java | 3 + source/de/anomic/yacy/yacyCore.java | 7 +- source/de/anomic/yacy/yacyNewsDB.java | 16 +- source/de/anomic/yacy/yacyNewsPool.java | 9 +- source/yacy.java | 12 +- 40 files changed, 619 insertions(+), 272 deletions(-) create mode 100644 source/de/anomic/kelondro/kelondroCachedIndex.java diff --git a/htroot/PerformanceMemory_p.java b/htroot/PerformanceMemory_p.java index b880900a80..f5983e3ff0 100644 --- a/htroot/PerformanceMemory_p.java +++ b/htroot/PerformanceMemory_p.java @@ -49,7 +49,7 @@ import de.anomic.http.httpHeader; import de.anomic.http.httpc; -import de.anomic.kelondro.kelondroTree; +import de.anomic.kelondro.kelondroCachedIndex; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverFileUtils; import de.anomic.server.serverMemory; @@ -338,7 +338,7 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve private static void putprop(serverObjects prop, serverSwitch env, String wdb, String db, String set) { if ((slt == null) || (ost == null)) return; - usd = chk * slt[1] + obj * ost[2] /*hit*/ + kelondroTree.cacheObjectMissSize * ost[3] /*miss*/; + usd = chk * slt[1] + obj * ost[2] /*hit*/ + kelondroCachedIndex.cacheObjectMissSize * ost[3] /*miss*/; bst = (((((long) chk) * ((long) req)) >> 10) + 1) << 10; if (set.equals("setBest")) env.setConfig("ramCache" + db, bst); prop.put(wdb + ((wdb.length() > 0) ? ("_") : ("")) + "nodsz" + db, chk); diff --git a/htroot/PerformanceQueues_p.html b/htroot/PerformanceQueues_p.html index ea34bf6bce..a0cfd69e2c 100644 --- a/htroot/PerformanceQueues_p.html +++ b/htroot/PerformanceQueues_p.html @@ -76,10 +76,10 @@

Performance Settings of Queues and Processes

Description - URLs in RAM cache: + URLs in RAM buffer: #[urlCacheSize]# - This is the size of the URL cache. Its purpose is to buffer incoming URLs + This is the size of the URL write buffer. Its purpose is to buffer incoming URLs in case of search result transmission and during DHT transfer. diff --git a/source/dbtest.java b/source/dbtest.java index debcb37e8c..24f41ab001 100644 --- a/source/dbtest.java +++ b/source/dbtest.java @@ -13,6 +13,7 @@ import java.util.Random; import de.anomic.kelondro.kelondroBase64Order; +import de.anomic.kelondro.kelondroCachedIndex; import de.anomic.kelondro.kelondroFlexSplitTable; import de.anomic.kelondro.kelondroFlexTable; import de.anomic.kelondro.kelondroIndex; @@ -174,7 +175,7 @@ public static void main(String[] args) { kelondroRow testRow = new kelondroRow("byte[] key-" + keylength + ", byte[] dummy-" + keylength + ", value-" + valuelength); if (dbe.equals("kelondroTree")) { File tablefile = new File(tablename + ".kelondro.db"); - table = new kelondroTree(tablefile, buffer, preload, kelondroTree.defaultObjectCachePercent, testRow); + table = new kelondroCachedIndex(new kelondroTree(tablefile, buffer / 2, preload, testRow), buffer / 2); } if (dbe.equals("kelondroSplittedTree")) { File tablepath = new File(tablename).getParentFile(); @@ -616,6 +617,23 @@ public kelondroProfile profile() { return new kelondroProfile(); } + public final int cacheObjectChunkSize() { + // dummy method + return -1; + } + + public long[] cacheObjectStatus() { + // dummy method + return null; + } + + public final int cacheNodeChunkSize() { + return -1; + } + + public final int[] cacheNodeStatus() { + return new int[]{0,0,0,0,0,0,0,0,0,0}; + } } diff --git a/source/de/anomic/kelondro/kelondroBase64Order.java b/source/de/anomic/kelondro/kelondroBase64Order.java index 2d7d8c35b7..4224f261fe 100644 --- a/source/de/anomic/kelondro/kelondroBase64Order.java +++ b/source/de/anomic/kelondro/kelondroBase64Order.java @@ -298,9 +298,9 @@ public final int compares(byte[] a, int aoffset, int alength, byte[] b, int boff bc = b[boffset + i]; assert (bc >= 0) && (bc < 128) : "bc = " + bc + ", b = " + serverLog.arrayList(b, boffset, len); acc = ahpla[ac]; - assert (acc >= 0) : "acc = " + acc + ", a = " + serverLog.arrayList(a, aoffset, len); + assert (acc >= 0) : "acc = " + acc + ", a = " + serverLog.arrayList(a, aoffset, len) + ", aoffset = " + aoffset + serverLog.table(a, aoffset); bcc = ahpla[bc]; - assert (bcc >= 0) : "bcc = " + bcc + ", b = " + serverLog.arrayList(b, boffset, len); + assert (bcc >= 0) : "bcc = " + bcc + ", b = " + serverLog.arrayList(b, boffset, len) + ", boffset = " + boffset + serverLog.table(b, boffset); if (acc > bcc) return 1; if (acc < bcc) return -1; // else the bytes are equal and it may go on yet undecided diff --git a/source/de/anomic/kelondro/kelondroBufferedIndex.java b/source/de/anomic/kelondro/kelondroBufferedIndex.java index bdc44ce83d..11e87e6d1e 100644 --- a/source/de/anomic/kelondro/kelondroBufferedIndex.java +++ b/source/de/anomic/kelondro/kelondroBufferedIndex.java @@ -33,6 +33,7 @@ import java.util.TreeMap; import de.anomic.server.serverMemory; +import de.anomic.server.logging.serverLog; public class kelondroBufferedIndex implements kelondroIndex { @@ -50,7 +51,7 @@ public kelondroBufferedIndex(kelondroIndex theIndex) { } public synchronized void flush() throws IOException { - if (buffer.size() == 0) return; + if ((buffer == null) || (buffer.size() == 0)) return; Iterator i = buffer.entrySet().iterator(); Map.Entry entry; while (i.hasNext()) { @@ -103,6 +104,9 @@ public synchronized kelondroRow.Entry put(kelondroRow.Entry row) throws IOExcept } public synchronized kelondroRow.Entry put(kelondroRow.Entry row, Date entryDate) throws IOException { + assert (row != null); + assert (row.getColBytes(index.primarykey()) != null); + assert (!(serverLog.allZero(row.getColBytes(index.primarykey())))); long handle = (index instanceof kelondroFlexSplitTable) ? -1 : index.profile().startWrite(); byte[] key = row.getColBytes(index.primarykey()); kelondroRow.Entry oldentry = null; @@ -211,4 +215,25 @@ public synchronized Iterator rows(boolean up, boolean rotating, byte[] firstKey) public static kelondroBufferedIndex getRAMIndex(kelondroRow rowdef, int initSize) { return new kelondroBufferedIndex(new kelondroRowSet(rowdef, kelondroNaturalOrder.naturalOrder, 0, initSize)); } + + public final int cacheObjectChunkSize() { + // dummy method + return -1; + } + + public long[] cacheObjectStatus() { + // dummy method + return null; + } + + public final int cacheNodeChunkSize() { + // returns the size that the node cache uses for a single entry + return index.cacheNodeChunkSize(); + } + + public final int[] cacheNodeStatus() { + // a collection of different node cache status values + return index.cacheNodeStatus(); + } + } diff --git a/source/de/anomic/kelondro/kelondroBytesIntMap.java b/source/de/anomic/kelondro/kelondroBytesIntMap.java index 7e920d0e36..5da8a1853a 100644 --- a/source/de/anomic/kelondro/kelondroBytesIntMap.java +++ b/source/de/anomic/kelondro/kelondroBytesIntMap.java @@ -27,6 +27,8 @@ import java.io.IOException; import java.util.Iterator; +import de.anomic.server.logging.serverLog; + public class kelondroBytesIntMap { private kelondroIndex ki; @@ -38,12 +40,16 @@ public kelondroBytesIntMap(kelondroIndex ki) throws IOException { } public synchronized int geti(byte[] key) throws IOException { + assert (key != null); + assert (!(serverLog.allZero(key))); kelondroRow.Entry indexentry = ki.get(key); if (indexentry == null) return -1; return (int) indexentry.getColLong(1); } public synchronized int puti(byte[] key, int i) throws IOException { + assert (key != null); + assert (!(serverLog.allZero(key))); kelondroRow.Entry newentry = ki.row().newEntry(); newentry.setCol(0, key); newentry.setCol(1, i); @@ -53,6 +59,8 @@ public synchronized int puti(byte[] key, int i) throws IOException { } public synchronized void addi(byte[] key, int i) throws IOException { + assert (key != null); + assert (!(serverLog.allZero(key))); kelondroRow.Entry newentry = ki.row().newEntry(); newentry.setCol(0, key); newentry.setCol(1, i); @@ -60,6 +68,8 @@ public synchronized void addi(byte[] key, int i) throws IOException { } public synchronized int removei(byte[] key) throws IOException { + assert (key != null); + assert (!(serverLog.allZero(key))); // returns the integer index of the key, if the key can be found and was removed // and -1 if the key was not found. if (ki.size() == 0) return -1; @@ -94,4 +104,8 @@ public kelondroProfile profile() { return ki.profile(); } + public synchronized void close() throws IOException { + ki.close(); + } + } diff --git a/source/de/anomic/kelondro/kelondroCachedIndex.java b/source/de/anomic/kelondro/kelondroCachedIndex.java new file mode 100644 index 0000000000..08722cdcca --- /dev/null +++ b/source/de/anomic/kelondro/kelondroCachedIndex.java @@ -0,0 +1,151 @@ +// kelondroCachedIndex +// (C) 2006 by Michael Peter Christen; mc@anomic.de, Frankfurt a. M., Germany +// first published 23.10.2006 on http://www.anomic.de +// +// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $ +// $LastChangedRevision: 1986 $ +// $LastChangedBy: orbiter $ +// +// LICENSE +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +package de.anomic.kelondro; + +import java.io.IOException; +import java.util.Date; +import java.util.Iterator; + +import de.anomic.kelondro.kelondroRow.Entry; +import de.anomic.server.logging.serverLog; + +public class kelondroCachedIndex implements kelondroIndex { + + public final static int cacheObjectMissSize = 120; + public final static int defaultObjectCachePercent = 10; + + private kelondroObjectCache objectCache; + private kelondroIndex theIndex; + + public kelondroCachedIndex(kelondroIndex superIndex, long objectbuffersize) throws IOException { + this.theIndex = superIndex; + long objecthitcachesize = objectbuffersize * 4 / 5 / cacheObjectChunkSize(); + long objectmisscachesize = objectbuffersize / 5 / cacheObjectMissSize; + this.objectCache = new kelondroObjectCache("generic", (int) objecthitcachesize, (int) objectmisscachesize, objecthitcachesize * 3000 , 4*1024*1024); + } + + public final int cacheObjectChunkSize() { + try { + return this.theIndex.row().objectsize() + /* overhead */ 16 * this.theIndex.row().columns(); + } catch (IOException e) { + return 0; + } + } + + public long[] cacheObjectStatus() { + if (this.objectCache == null) return null; + return this.objectCache.status(); + } + + public final int cacheNodeChunkSize() { + // returns the size that the node cache uses for a single entry + return theIndex.cacheNodeChunkSize(); + } + + public final int[] cacheNodeStatus() { + // a collection of different node cache status values + return theIndex.cacheNodeStatus(); + } + + public void addUnique(Entry row) throws IOException { + // the use case for add implies that usually the objects are not needed in the cache + // therefore omit an object cache write here + this.theIndex.addUnique(row); + } + + public void addUnique(Entry row, Date entryDate) throws IOException { + this.theIndex.addUnique(row, entryDate); + } + + public void close() throws IOException { + this.objectCache = null; + this.theIndex.close(); + + } + + public Entry get(byte[] key) throws IOException { + // get result from cache + kelondroRow.Entry result = (objectCache == null) ? null : (kelondroRow.Entry) objectCache.get(key); + if (result != null) return result; + // check if we have an entry in the miss cache + if ((objectCache != null) && (objectCache.has(key) == -1)) return null; + // finally: get it from the index + result = this.theIndex.get(key); + if (result == null) objectCache.hasnot(key); else objectCache.put(key, result); + return result; + } + + public kelondroOrder order() { + return this.theIndex.order(); + } + + public int primarykey() { + return this.theIndex.primarykey(); + } + + public kelondroProfile profile() { + return this.theIndex.profile(); + } + + public Entry put(Entry row) throws IOException { + assert (row != null); + assert (row.columns() == row().columns()); + assert (!(serverLog.allZero(row.getColBytes(theIndex.primarykey())))); + objectCache.put(row.getColBytes(theIndex.primarykey()), row); + return this.theIndex.put(row); + } + + public Entry put(Entry row, Date entryDate) throws IOException { + assert (row.columns() == row().columns()); + objectCache.put(row.getColBytes(theIndex.primarykey()), row); + return this.theIndex.put(row, entryDate); + } + + public Entry remove(byte[] key) throws IOException { + if (objectCache.has(key) == -1) return null; + objectCache.remove(key); + return this.theIndex.remove(key); + } + + public Entry removeOne() throws IOException { + Entry entry = this.theIndex.removeOne(); + if (entry == null) return null; + this.objectCache.remove(entry.getColBytes(this.theIndex.primarykey())); + return entry; + } + + public kelondroRow row() throws IOException { + return this.theIndex.row(); + } + + public Iterator rows(boolean up, boolean rotating, byte[] firstKey) throws IOException { + return this.theIndex.rows(up, rotating, firstKey); + } + + public int size() throws IOException { + return this.theIndex.size(); + } + +} diff --git a/source/de/anomic/kelondro/kelondroCollectionIndex.java b/source/de/anomic/kelondro/kelondroCollectionIndex.java index 3cac11a2e5..b46ff71553 100644 --- a/source/de/anomic/kelondro/kelondroCollectionIndex.java +++ b/source/de/anomic/kelondro/kelondroCollectionIndex.java @@ -177,7 +177,7 @@ private kelondroIndex openIndexFile(File path, String filenameStub, kelondroOrde long buffersize, long preloadTime, int loadfactor, kelondroRow rowdef) throws IOException { // open/create index table - kelondroFlexTable theindex = new kelondroFlexTable(path, filenameStub + ".index", buffersize, preloadTime, indexRow(), indexOrder); + kelondroIndex theindex = new kelondroCachedIndex(new kelondroFlexTable(path, filenameStub + ".index", buffersize / 2, preloadTime, indexRow(), indexOrder), buffersize / 2); // save/check property file for this array File propfile = propertyFile(path, filenameStub, loadfactor, rowdef.objectsize()); diff --git a/source/de/anomic/kelondro/kelondroDyn.java b/source/de/anomic/kelondro/kelondroDyn.java index 54faf4a8db..10ee19471b 100644 --- a/source/de/anomic/kelondro/kelondroDyn.java +++ b/source/de/anomic/kelondro/kelondroDyn.java @@ -58,15 +58,19 @@ import java.io.IOException; import java.util.Iterator; -public class kelondroDyn extends kelondroTree { +import de.anomic.server.logging.serverLog; + +public class kelondroDyn { private static final int counterlen = 8; protected int keylen; private int reclen; - private int segmentCount; + //private int segmentCount; private char fillChar; + private kelondroIndex index; private kelondroObjectBuffer buffer; + private kelondroRow rowdef; public kelondroDyn(File file, long buffersize /*bytes*/, long preloadTime, int key, int nodesize, char fillChar) throws IOException { this(file, buffersize, preloadTime, key, nodesize, fillChar, new kelondroNaturalOrder(true)); @@ -75,12 +79,14 @@ public kelondroDyn(File file, long buffersize /*bytes*/, long preloadTime, int k public kelondroDyn(File file, long buffersize /* bytes */, long preloadTime, int key, int nodesize, char fillChar, kelondroOrder objectOrder) throws IOException { // creates or opens a dynamic tree - super(file, buffersize, preloadTime, kelondroTree.defaultObjectCachePercent, new kelondroRow("byte[] key-" + (key + counterlen) + ", byte[] node-" + nodesize), objectOrder, 1, 8); - this.keylen = row().width(0) - counterlen; - this.reclen = row().width(1); + rowdef = new kelondroRow("byte[] key-" + (key + counterlen) + ", byte[] node-" + nodesize); + kelondroTree tree = new kelondroTree(file, buffersize / 2, preloadTime, rowdef, objectOrder, 1, 8); + this.index = new kelondroCachedIndex(tree, buffersize / 2); + this.keylen = index.row().width(0) - counterlen; + this.reclen = index.row().width(1); this.fillChar = fillChar; - this.segmentCount = 0; - if (!(super.fileExisted)) writeSegmentCount(); + //this.segmentCount = 0; + //if (!(tree.fileExisted)) writeSegmentCount(); buffer = new kelondroObjectBuffer(file.toString()); } @@ -99,7 +105,7 @@ public static final kelondroDyn open(File file, long buffersize /* bytes */, lon try { return new kelondroDyn(file, buffersize, preloadTime, key, nodesize, fillChar, objectOrder); } catch (IOException ee) { - log.severe("cannot open or create file " + file.toString()); + serverLog.logSevere("kelondroDyn", "cannot open or create file " + file.toString()); e.printStackTrace(); ee.printStackTrace(); return null; @@ -107,6 +113,7 @@ public static final kelondroDyn open(File file, long buffersize /* bytes */, lon } } + /* private void writeSegmentCount() { try { setText(0, kelondroBase64Order.enhancedCoder.encodeLong(segmentCount, 8).getBytes()); @@ -114,12 +121,33 @@ private void writeSegmentCount() { } } + */ + + public kelondroRow row() { + return this.rowdef; + } + + public int cacheNodeChunkSize() { + return index.cacheNodeChunkSize(); + } + + public int cacheObjectChunkSize() { + return index.cacheObjectChunkSize(); + } + + public int[] cacheNodeStatus() { + return index.cacheNodeStatus(); + } + + public long[] cacheObjectStatus() { + return index.cacheObjectStatus(); + } - public synchronized int sizeDyn() { + public synchronized int sizeDyn() throws IOException { //this.segmentCount = 0; //Iterator i = keys(true); while (i.hasNext()) segmentCount++; //return segmentCount; - return super.size(); + return index.size(); } private static String counter(int c) { @@ -174,7 +202,7 @@ private String n() { kelondroRow.Entry nt; while (ri.hasNext()) { nt = (kelondroRow.Entry) ri.next(); - if (nt == null) throw new kelondroException(filename, "no more elements available"); + if (nt == null) throw new kelondroException("no more elements available"); g = nt.getColBytes(0); if (g == null) return null; k = new String(g, 0, keylen); @@ -193,11 +221,11 @@ private String n() { public synchronized dynKeyIterator dynKeys(boolean up, boolean rotating) throws IOException { // iterates only the keys of the Nodes // enumerated objects are of type String - return new dynKeyIterator(super.rows(up, rotating, null)); + return new dynKeyIterator(index.rows(up, rotating, null)); } public synchronized dynKeyIterator dynKeys(boolean up, boolean rotating, byte[] firstKey) throws IOException { - return new dynKeyIterator(super.rows(up, rotating, firstKey)); + return new dynKeyIterator(index.rows(up, rotating, firstKey)); } private byte[] getValueCached(byte[] key) throws IOException { @@ -207,7 +235,7 @@ private byte[] getValueCached(byte[] key) throws IOException { if (buffered != null) return buffered; // read from db - kelondroRow.Entry result = get(key); + kelondroRow.Entry result = index.get(key); if (result == null) return null; // return result @@ -217,7 +245,7 @@ private byte[] getValueCached(byte[] key) throws IOException { private synchronized void setValueCached(byte[] key, byte[] value) throws IOException { // update storage synchronized (this) { - put(key, value); + index.put(rowdef.newEntry(new byte[][]{key, value})); buffer.put(key, value); } } @@ -318,8 +346,8 @@ public synchronized void remove(String key) throws IOException { if (key == null) return; int recpos = 0; byte[] k; - while (super.get(k = dynKey(key, recpos)) != null) { - super.remove(k); + while (index.get(k = dynKey(key, recpos)) != null) { + index.remove(k); buffer.remove(k); recpos++; } @@ -442,6 +470,10 @@ public synchronized void readFile(String key, File f) throws IOException { } } } + + public synchronized void close() throws IOException { + index.close(); + } public static void main(String[] args) { // test app for DB functions @@ -449,13 +481,11 @@ public static void main(String[] args) { // arguments: // {-f2db/-db2f} - if (args.length == 0) { - randomtest(20); - } else if (args.length == 1) { + if (args.length == 1) { // open a db and list keys try { kelondroDyn kd = new kelondroDyn(new File(args[0]), 0x100000, 0, 4 ,100, '_'); - System.out.println(kd.size() + " elements in DB"); + System.out.println(kd.sizeDyn() + " elements in DB"); Iterator i = kd.dynKeys(true, false); while (i.hasNext()) System.out.println((String) i.next()); @@ -482,59 +512,6 @@ public static void main(String[] args) { } } - public static void randomtest(int elements) { - System.out.println("random " + elements + ":"); - String s = "ABCDEFGHIJKLMNOPQRSTUVWXYZ".substring(0, elements); - String t, d; - char c; - kelondroDyn tt; - File testFile = new File("test.db"); - byte[] b; - byte[] cont; - try { - int steps = 0; - while (true) { - if (testFile.exists()) testFile.delete(); - tt = new kelondroDyn(testFile, 0, 0, 4 ,100, '_'); - steps = ((int) System.currentTimeMillis() % 7) * (((int) System.currentTimeMillis() + 17) % 11); - t = s; - d = ""; - System.out.println("NEW SESSION"); - for (int i = 0; i < steps; i++) { - if ((d.length() < 3) || ((t.length() > 0) && (((int) System.currentTimeMillis() % 7) < 3))) { - // add one - c = t.charAt((int) (System.currentTimeMillis() % t.length())); - b = testWord(c); - cont = new byte[(int) (System.currentTimeMillis() % 777L)]; - tt.putDyn(new String(b), 0, cont, 0, cont.length); - d = d + c; - t = t.substring(0, t.indexOf(c)) + t.substring(t.indexOf(c) + 1); - System.out.println("added " + new String(b) + ", " + cont.length + " bytes"); - } else { - // delete one - c = d.charAt((int) (System.currentTimeMillis() % d.length())); - b = testWord(c); - tt.remove(new String(b)); - d = d.substring(0, d.indexOf(c)) + d.substring(d.indexOf(c) + 1); - t = t + c; - System.out.println("removed " + new String(b)); - } - if (countElementsDyn(tt) != tt.sizeDyn()) { - System.out.println("wrong size: count=" + countElementsDyn(tt) + ", size=" + tt.sizeDyn() + "; Tree:"); - //tt.print(); - //break; - } - } - //tt.print(); - tt.close(); - } - - } catch (Exception e) { - e.printStackTrace(); - System.out.println("TERMINATED"); - } - } - public static int countElementsDyn(kelondroDyn t) { int count = 0; try { diff --git a/source/de/anomic/kelondro/kelondroDynTree.java b/source/de/anomic/kelondro/kelondroDynTree.java index 8b10e5b76e..393f40f642 100644 --- a/source/de/anomic/kelondro/kelondroDynTree.java +++ b/source/de/anomic/kelondro/kelondroDynTree.java @@ -86,7 +86,7 @@ public kelondroDynTree(File file, long buffersize, long preloadTime, int keyleng public void close() throws IOException { Enumeration e = treeRAHandles.keys(); while (e.hasMoreElements()) closeTree((String) e.nextElement()); - int size = table.size(); + int size = table.sizeDyn(); table.close(); if (size == 0) this.file.delete(); } @@ -107,7 +107,7 @@ protected kelondroTree newTree(String key) throws IOException { kelondroRA ra = table.getRA(key); // works always, even with no-existing entry treeRAHandles.put(key, ra); try { - return new kelondroTree(ra, buffersize, preloadTime, kelondroTree.defaultObjectCachePercent, rowdef, false); + return new kelondroTree(ra, buffersize, preloadTime, rowdef, false); } catch (RuntimeException e) { throw new IOException(e.getMessage()); } @@ -117,7 +117,7 @@ protected kelondroTree getTree(String key) throws IOException { if (table.existsDyn(key)) { kelondroRA ra = table.getRA(key); treeRAHandles.put(key, ra); - return new kelondroTree(ra, buffersize, preloadTime, kelondroTree.defaultObjectCachePercent); + return new kelondroTree(ra, buffersize, preloadTime); } return null; } @@ -299,7 +299,7 @@ public static void main(String[] args) { File file = new File("D:\\bin\\testDyn.db"); if (file.exists()) { kelondroDynTree dt = new kelondroDynTree(file, 0x100000L, 0, 16, 512, new kelondroRow("byte[] a-10, byte[] b-20, byte[] c-30"), '_'); - System.out.println("opened: table keylength=" + dt.table.row().width(0) + ", sectorsize=" + dt.table.row().width(1) + ", " + dt.table.size() + " entries."); + System.out.println("opened: table keylength=" + dt.table.row().width(0) + ", sectorsize=" + dt.table.row().width(1) + ", " + dt.table.sizeDyn() + " entries."); } else { kelondroDynTree dt = new kelondroDynTree(file, 0x100000L, 0, 16, 512, new kelondroRow("byte[] a-10, byte[] b-20, byte[] c-30"), '_'); String name; diff --git a/source/de/anomic/kelondro/kelondroFlexSplitTable.java b/source/de/anomic/kelondro/kelondroFlexSplitTable.java index aa2f16d5ed..d88d59341b 100644 --- a/source/de/anomic/kelondro/kelondroFlexSplitTable.java +++ b/source/de/anomic/kelondro/kelondroFlexSplitTable.java @@ -243,6 +243,26 @@ public void remove() { } + public final int cacheObjectChunkSize() { + // dummy method + return -1; + } + + public long[] cacheObjectStatus() { + // dummy method + return null; + } + + public final int cacheNodeChunkSize() { + // returns the size that the node cache uses for a single entry + return -1; + } + + public final int[] cacheNodeStatus() { + // a collection of different node cache status values + return new int[]{0,0,0,0,0,0,0,0,0,0}; + } + public synchronized void close() throws IOException { Iterator i = tables.values().iterator(); while (i.hasNext()) ((kelondroFlexTable) i.next()).close(); diff --git a/source/de/anomic/kelondro/kelondroFlexTable.java b/source/de/anomic/kelondro/kelondroFlexTable.java index dba7623187..35fde208d9 100644 --- a/source/de/anomic/kelondro/kelondroFlexTable.java +++ b/source/de/anomic/kelondro/kelondroFlexTable.java @@ -30,6 +30,8 @@ import java.util.Date; import java.util.Iterator; +import de.anomic.server.logging.serverLog; + public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondroIndex { protected kelondroBytesIntMap index; @@ -53,7 +55,7 @@ public kelondroFlexTable(File path, String tablename, long buffersize, long prel if (indexfile.exists()) { // use existing index file System.out.println("*** Using File index " + indexfile); - ki = kelondroTree.open(indexfile, buffersize, preloadTime, 10, treeIndexRow(rowdef.width(0)), objectOrder, 2, 80); + ki = new kelondroCachedIndex(kelondroTree.open(indexfile, buffersize / 2, preloadTime, treeIndexRow(rowdef.width(0)), objectOrder, 2, 80), buffersize / 2); } else if ((preloadTime >= 0) && (stt > preloadTime)) { // generate new index file System.out.print("*** Generating File index for " + size() + " entries from " + indexfile); @@ -105,8 +107,8 @@ private kelondroIndex initializeRamIndex(kelondroOrder objectOrder) throws IOExc return ri; } - private kelondroTree initializeTreeIndex(File indexfile, long buffersize, long preloadTime, kelondroOrder objectOrder) throws IOException { - kelondroTree treeindex = new kelondroTree(indexfile, buffersize, preloadTime, 10, treeIndexRow(rowdef.width(0)), objectOrder, 2, 80); + private kelondroIndex initializeTreeIndex(File indexfile, long buffersize, long preloadTime, kelondroOrder objectOrder) throws IOException { + kelondroIndex treeindex = new kelondroCachedIndex(new kelondroTree(indexfile, buffersize / 2, preloadTime, treeIndexRow(rowdef.width(0)), objectOrder, 2, 80), buffersize / 2); Iterator content = super.col[0].contentNodes(-1); kelondroRecords.Node node; kelondroRow.Entry indexentry; @@ -144,6 +146,8 @@ public synchronized kelondroRow.Entry put(kelondroRow.Entry row, Date entryDate) } public synchronized kelondroRow.Entry put(kelondroRow.Entry row) throws IOException { + assert (row != null); + assert (!(serverLog.allZero(row.getColBytes(0)))); int i = index.geti(row.getColBytes(0)); if (i < 0) { index.puti(row.getColBytes(0), super.add(row)); @@ -223,4 +227,29 @@ public kelondroProfile profile() { return index.profile(); } + public final int cacheObjectChunkSize() { + // dummy method + return -1; + } + + public long[] cacheObjectStatus() { + // dummy method + return null; + } + + public final int cacheNodeChunkSize() { + // returns the size that the node cache uses for a single entry + return -1; + } + + public final int[] cacheNodeStatus() { + // a collection of different node cache status values + return new int[]{0,0,0,0,0,0,0,0,0,0}; + } + + public synchronized void close() throws IOException { + index.close(); + super.close(); + } + } diff --git a/source/de/anomic/kelondro/kelondroIndex.java b/source/de/anomic/kelondro/kelondroIndex.java index bc77629a8c..754f5a61d0 100644 --- a/source/de/anomic/kelondro/kelondroIndex.java +++ b/source/de/anomic/kelondro/kelondroIndex.java @@ -70,4 +70,12 @@ public interface kelondroIndex { public kelondroRow.Entry removeOne() throws IOException; public Iterator rows(boolean up, boolean rotating, byte[] firstKey) throws IOException; public void close() throws IOException; + + // statistics for node caches + public int cacheNodeChunkSize(); // the size that the node cache uses for a single entry + public int[] cacheNodeStatus(); // a collection of different node cache status values + + // statistics for object caches + public int cacheObjectChunkSize(); // the size of an object entry + public long[] cacheObjectStatus(); // a collection of different object cache status values } diff --git a/source/de/anomic/kelondro/kelondroMap.java b/source/de/anomic/kelondro/kelondroMap.java index ce3cac429a..df7f1da1b6 100644 --- a/source/de/anomic/kelondro/kelondroMap.java +++ b/source/de/anomic/kelondro/kelondroMap.java @@ -150,6 +150,9 @@ public long[] cacheObjectStatus() { } public synchronized void set(String key, Map newMap) throws IOException { + assert (key != null); + assert (key.length() > 0); + assert (newMap != null); // update elementCount if ((sortfields != null) || (accfields != null)) { final Map oldMap = get(key, false); @@ -330,7 +333,11 @@ public synchronized long getAcc(final String field) { public synchronized int size() { if ((sortfields != null) || (accfields != null)) return elementCount; - return dyn.size(); + try { + return dyn.sizeDyn(); + } catch (IOException e) { + return 0; + } } public void close() throws IOException { @@ -373,7 +380,7 @@ public Object next() { } try { final Map map = get(nextKey); - if (map == null) throw new kelondroException(dyn.filename, "no more elements available"); + if (map == null) throw new kelondroException("no more elements available"); map.put("key", nextKey); return map; } catch (IOException e) { diff --git a/source/de/anomic/kelondro/kelondroMapTable.java b/source/de/anomic/kelondro/kelondroMapTable.java index 9f32bacacd..a735d5ee5b 100644 --- a/source/de/anomic/kelondro/kelondroMapTable.java +++ b/source/de/anomic/kelondro/kelondroMapTable.java @@ -87,11 +87,11 @@ public void declareMaps( mTables.put(tablename, map); } - public void declareTree(String tablename, kelondroRow rowdef, long buffersize /*bytes*/, long preloadTime) { + public void declareTree(String tablename, kelondroRow rowdef, long buffersize /*bytes*/, long preloadTime) throws IOException { if (mTables.containsKey(tablename)) throw new RuntimeException("kelondroTables.declareTree: table '" + tablename + "' declared already in other context."); if (tTables.containsKey(tablename)) throw new RuntimeException("kelondroTables.declareTree: table '" + tablename + "' declared twice."); File tablefile = new File(tablesPath, "table." + tablename + ".tdb"); - kelondroTree Tree = kelondroTree.open(tablefile, buffersize, preloadTime, kelondroTree.defaultObjectCachePercent, rowdef); + kelondroIndex Tree = new kelondroCachedIndex(kelondroTree.open(tablefile, buffersize / 2, preloadTime, rowdef), buffersize / 2); tTables.put(tablename, Tree); } @@ -104,7 +104,7 @@ public synchronized void update(String tablename, String key, Map map) throws IO } public synchronized void update(String tablename, kelondroRow.Entry row /* first element is the unique key = index */) throws IOException { - kelondroTree tree = (kelondroTree) tTables.get(tablename); + kelondroIndex tree = (kelondroIndex) tTables.get(tablename); if (tree == null) throw new RuntimeException("kelondroTables.update: tree table '" + tablename + "' does not exist."); tree.put(row); tTables.put(tablename, tree); @@ -118,7 +118,7 @@ public synchronized Map selectMap(String tablename, String key) throws IOExcepti } public synchronized kelondroRow.Entry selectByte(String tablename, String key) throws IOException { - kelondroTree tree = (kelondroTree) tTables.get(tablename); + kelondroIndex tree = (kelondroIndex) tTables.get(tablename); if (tree == null) throw new RuntimeException("kelondroTables.selectByte: tree table '" + tablename + "' does not exist."); return tree.get(key.getBytes()); } @@ -142,7 +142,7 @@ public synchronized kelondroRow.Entry selectByte(String tablename, String key) t } public synchronized Iterator /* of kelondroRow.Entry-Elements */ rows(String tablename, boolean up, boolean rotating, byte[] firstKey) throws IOException { - kelondroTree tree = (kelondroTree) tTables.get(tablename); + kelondroIndex tree = (kelondroIndex) tTables.get(tablename); if (tree == null) throw new RuntimeException("kelondroTables.bytes: tree table '" + tablename + "' does not exist."); return tree.rows(up, rotating, firstKey); } @@ -154,7 +154,7 @@ public synchronized void delete(String tablename, String key) throws IOException if (key.length() > table.keySize()) key = key.substring(0, table.keySize()); if (table != null) {table.remove(key); mTables.put(tablename, table); return;} - kelondroTree Tree = (kelondroTree) tTables.get(tablename); + kelondroIndex Tree = (kelondroIndex) tTables.get(tablename); if (Tree != null) {Tree.remove(key.getBytes()); tTables.put(tablename, Tree); return;} throw new RuntimeException("kelondroTables.delete: table '" + tablename + "' does not exist."); @@ -170,8 +170,8 @@ public synchronized int size(String tablename) { kelondroMap table = (kelondroMap) mTables.get(tablename); if (table != null) return table.size(); - kelondroTree Tree = (kelondroTree) tTables.get(tablename); - if (Tree != null) return Tree.size(); + kelondroIndex Tree = (kelondroIndex) tTables.get(tablename); + if (Tree != null) try { return Tree.size(); } catch (IOException e) {return 0;} throw new RuntimeException("kelondroTables.accumulator: table '" + tablename + "' does not exist."); } @@ -182,7 +182,7 @@ public void close() throws IOException { mTables = null; Iterator TreeIt = tTables.values().iterator(); - while (TreeIt.hasNext()) ((kelondroTree) TreeIt.next()).close(); + while (TreeIt.hasNext()) ((kelondroIndex) TreeIt.next()).close(); tTables = null; } diff --git a/source/de/anomic/kelondro/kelondroObjectCache.java b/source/de/anomic/kelondro/kelondroObjectCache.java index c3983526ae..69946ba469 100644 --- a/source/de/anomic/kelondro/kelondroObjectCache.java +++ b/source/de/anomic/kelondro/kelondroObjectCache.java @@ -1,16 +1,11 @@ // kelondroObjectCache.java -// ------------------------ -// (C) by Michael Peter Christen; mc@anomic.de -// first published on http://www.anomic.de -// Frankfurt, Germany, 2006 -// -// This is a part of the kelondro database, which is a part of YaCy +// (C) 2006 by Michael Peter Christen; mc@anomic.de, Frankfurt a. M., Germany +// first published 2006 on http://www.anomic.de // // $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $ // $LastChangedRevision: 1986 $ // $LastChangedBy: orbiter $ // -// // LICENSE // // This program is free software; you can redistribute it and/or modify @@ -26,33 +21,6 @@ // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -// -// A NOTE FROM THE AUTHOR TO THE USERS: -// -// Using this software in any meaning (reading, learning, copying, compiling, -// running) means that you agree that the Author(s) is (are) not responsible -// for cost, loss of data or any harm that may be caused directly or indirectly -// by usage of this softare or this documentation. The usage of this software -// is on your own risk. The installation and usage (starting/running) of this -// software may allow other people or application to access your computer and -// any attached devices and is highly dependent on the configuration of the -// software which must be done by the user of the software; the author(s) is -// (are) also not responsible for proper configuration and usage of the -// software, even if provoked by documentation provided together with -// the software. -// -// -// A NOTE FROM THE AUTHOR TO DEVELOPERS: -// -// Contributions and changes to the program code should be marked as such: -// Please enter your own (C) notice below; they must be compatible with the GPL. -// Please mark also all changes in the code; if you don't mark them then they -// can't be identified; thus all unmarked code belong to the copyright holder -// as mentioned above. A good documentation of code authorities will also help -// to maintain the code and the project. -// A re-distribution must contain the intact and unchanged copyright statement. - package de.anomic.kelondro; diff --git a/source/de/anomic/kelondro/kelondroRAMIndex.java b/source/de/anomic/kelondro/kelondroRAMIndex.java index ab090c1365..1011491715 100644 --- a/source/de/anomic/kelondro/kelondroRAMIndex.java +++ b/source/de/anomic/kelondro/kelondroRAMIndex.java @@ -104,4 +104,23 @@ public kelondroProfile profile() { return profile; } + public final int cacheObjectChunkSize() { + // dummy method + return -1; + } + + public long[] cacheObjectStatus() { + // dummy method + return null; + } + + public final int cacheNodeChunkSize() { + // returns the size that the node cache uses for a single entry + return -1; + } + + public final int[] cacheNodeStatus() { + // a collection of different node cache status values + return new int[]{0,0,0,0,0,0,0,0,0,0}; + } } diff --git a/source/de/anomic/kelondro/kelondroRecords.java b/source/de/anomic/kelondro/kelondroRecords.java index 6eef999dd6..c7ef965418 100644 --- a/source/de/anomic/kelondro/kelondroRecords.java +++ b/source/de/anomic/kelondro/kelondroRecords.java @@ -451,11 +451,23 @@ public File file() { return new File(filename); } + public final int cacheObjectChunkSize() { + // dummy method + return -1; + } + + public long[] cacheObjectStatus() { + // dummy method + return null; + } + public final int cacheNodeChunkSize() { + // returns the size that the node cache uses for a single entry return this.headchunksize + element_in_cache; } public final int[] cacheNodeStatus() { + // a collection of different node cache status values if (cacheHeaders == null) return new int[]{0,0,0,0,0,0,0,0,0,0}; return new int[]{ cacheSize, @@ -1097,6 +1109,7 @@ protected final class contentNodeIterator implements Iterator { private int bulksize; private int bulkstart; // the offset of the bulk array to the node position private boolean fullyMarked; + private Node next; public contentNodeIterator(long maxInitTime) throws IOException, kelondroException { // initialize markedDeleted set of deleted Handles @@ -1111,16 +1124,27 @@ public contentNodeIterator(long maxInitTime) throws IOException, kelondroExcepti bulksize = Math.min(65536 / recordsize, USAGE.allCount()); bulkstart = -bulksize; bulk = new byte[bulksize * recordsize]; + next = (hasNext0()) ? next0() : null; } + public Object next() { + Node n = next; + next = next0(); + return n; + } + public boolean hasNext() { - return pos.index < USAGE.allCount(); + return next != null; } - public Object next() { + public boolean hasNext0() { + return pos.index < USAGE.allCount(); + } + + public Node next0() { // read Objects until a non-deleted Node appears - while (hasNext()) { - Node nn = next0(); + while (hasNext0()) { + Node nn = next00(); byte[] key = nn.getKey(); if ((key == null) || ((key.length > 1) && ((key[0] == 0) && (key[1] == 0))) || @@ -1134,7 +1158,7 @@ public Object next() { return null; } - public Node next0() { + public Node next00() { try { // see if the next record is in the bulk, and if not re-fill the bulk if ((pos.index - bulkstart) >= bulksize) { diff --git a/source/de/anomic/kelondro/kelondroRowCollection.java b/source/de/anomic/kelondro/kelondroRowCollection.java index 6a33f65936..b0fd203365 100644 --- a/source/de/anomic/kelondro/kelondroRowCollection.java +++ b/source/de/anomic/kelondro/kelondroRowCollection.java @@ -28,6 +28,8 @@ import java.util.Iterator; import java.util.Set; +import de.anomic.server.logging.serverLog; + public class kelondroRowCollection { protected byte[] chunkcache; @@ -214,6 +216,11 @@ public void add(byte[] a) { } private final void add(byte[] a, int astart, int alength) { + assert (a != null); + assert (astart >= 0) && (astart < a.length) : " astart = " + a; + assert (!(serverLog.allZero(a, astart, alength))) : "a = " + serverLog.arrayList(a, astart, alength); + assert (alength > 0); + assert (astart + alength <= a.length); int l = Math.min(rowdef.objectsize(), Math.min(alength, a.length - astart)); synchronized (chunkcache) { ensureSize(chunkcount + 1); diff --git a/source/de/anomic/kelondro/kelondroRowSet.java b/source/de/anomic/kelondro/kelondroRowSet.java index 18fa70b885..6cab1a433e 100644 --- a/source/de/anomic/kelondro/kelondroRowSet.java +++ b/source/de/anomic/kelondro/kelondroRowSet.java @@ -30,6 +30,8 @@ import java.util.Random; import java.util.TreeSet; +import de.anomic.server.logging.serverLog; + public class kelondroRowSet extends kelondroRowCollection implements kelondroIndex { private static final int collectionReSortLimit = 90; @@ -88,6 +90,9 @@ public kelondroRow.Entry put(kelondroRow.Entry row, Date entryDate) throws IOExc } public kelondroRow.Entry put(kelondroRow.Entry entry) { + assert (entry != null); + assert (entry.getColBytes(super.sortColumn) != null); + assert (!(serverLog.allZero(entry.getColBytes(super.sortColumn)))); long handle = profile.startWrite(); int index = -1; kelondroRow.Entry oldentry = null; @@ -361,6 +366,26 @@ public void close() { // just for compatibility with kelondroIndex interface; do nothing } + public final int cacheObjectChunkSize() { + // dummy method + return -1; + } + + public long[] cacheObjectStatus() { + // dummy method + return null; + } + + public final int cacheNodeChunkSize() { + // returns the size that the node cache uses for a single entry + return -1; + } + + public final int[] cacheNodeStatus() { + // a collection of different node cache status values + return new int[]{0,0,0,0,0,0,0,0,0,0}; + } + public static void main(String[] args) { /* String[] test = { "eins", "zwei", "drei", "vier", "fuenf", "sechs", "sieben", "acht", "neun", "zehn" }; diff --git a/source/de/anomic/kelondro/kelondroSplittedTree.java b/source/de/anomic/kelondro/kelondroSplittedTree.java index 02eebdaa27..70fa6737ba 100644 --- a/source/de/anomic/kelondro/kelondroSplittedTree.java +++ b/source/de/anomic/kelondro/kelondroSplittedTree.java @@ -82,8 +82,7 @@ public kelondroSplittedTree(File pathToFiles, String filenameStub, kelondroOrder File f; for (int i = 0; i < forkfactor; i++) { f = dbFile(pathToFiles, filenameStub, forkfactor, rowdef.columns(), i); - ktfs[i] = kelondroTree.open(f, buffersize/forkfactor, preloadTime / forkfactor, kelondroTree.defaultObjectCachePercent, - rowdef, objectOrder, txtProps, txtPropsWidth); + ktfs[i] = kelondroTree.open(f, buffersize/forkfactor, preloadTime / forkfactor, rowdef, objectOrder, txtProps, txtPropsWidth); } this.order = objectOrder; ff = forkfactor; @@ -238,4 +237,23 @@ public kelondroProfile profile() { return kelondroProfile.consolidate(profiles); } + public final int cacheObjectChunkSize() { + // dummy method + return -1; + } + + public long[] cacheObjectStatus() { + // dummy method + return null; + } + + public final int cacheNodeChunkSize() { + // returns the size that the node cache uses for a single entry + return -1; + } + + public final int[] cacheNodeStatus() { + // a collection of different node cache status values + return new int[]{0,0,0,0,0,0,0,0,0,0}; + } } diff --git a/source/de/anomic/kelondro/kelondroTree.java b/source/de/anomic/kelondro/kelondroTree.java index bcaedd7e5f..7eb6b043e9 100644 --- a/source/de/anomic/kelondro/kelondroTree.java +++ b/source/de/anomic/kelondro/kelondroTree.java @@ -61,6 +61,8 @@ import java.util.Vector; import java.util.logging.Logger; +import de.anomic.server.logging.serverLog; + public class kelondroTree extends kelondroRecords implements kelondroIndex { // logging (This probably needs someone to initialize the java.util.logging.* facilities); @@ -80,9 +82,6 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex { protected static final int rightchild = 2; // pointer for OHHandle-array: handle()-Value of right child Node protected static final int root = 0; // pointer for FHandles-array: pointer to root node - - // calibration of cache - public static final int defaultObjectCachePercent = 10; // class variables private final Search writeSearchObj = new Search(); @@ -90,17 +89,15 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex { protected kelondroOrder loopDetectionOrder = new kelondroNaturalOrder(true); protected int readAheadChunkSize = 100; protected long lastIteratorCount = readAheadChunkSize; - private kelondroObjectCache objectCache; - public kelondroTree(File file, long buffersize, long preloadTime, int objectCachePercent, kelondroRow rowdef) throws IOException { - this(file, buffersize, preloadTime, objectCachePercent, rowdef, new kelondroNaturalOrder(true), rowdef.columns() /* txtProps */, 80 /* txtPropWidth */); + public kelondroTree(File file, long buffersize, long preloadTime, kelondroRow rowdef) throws IOException { + this(file, buffersize, preloadTime, rowdef, new kelondroNaturalOrder(true), rowdef.columns() /* txtProps */, 80 /* txtPropWidth */); } - public kelondroTree(File file, long buffersize, long preloadTime, int objectCachePercent, kelondroRow rowdef, + public kelondroTree(File file, long buffersize, long preloadTime, kelondroRow rowdef, kelondroOrder objectOrder, int txtProps, int txtPropsWidth) throws IOException { // opens an existing tree file or creates a new tree file - super(file, - (100 - objectCachePercent) * buffersize / 100, preloadTime, + super(file, buffersize, preloadTime, thisOHBytes, thisOHHandles, rowdef, thisFHandles, txtProps, txtPropsWidth); @@ -115,23 +112,22 @@ public kelondroTree(File file, long buffersize, long preloadTime, int objectCach this.objectOrder = objectOrder; writeOrderType(); } - initObjectCache(buffersize, objectCachePercent); } - public static final kelondroTree open(File file, long buffersize, long preloadTime, int objectCachePercent, kelondroRow rowdef) { - return open(file, buffersize, preloadTime, objectCachePercent, rowdef, new kelondroNaturalOrder(true), rowdef.columns() /* txtProps */, 80 /* txtPropWidth */); + public static final kelondroTree open(File file, long buffersize, long preloadTime, kelondroRow rowdef) { + return open(file, buffersize, preloadTime, rowdef, new kelondroNaturalOrder(true), rowdef.columns() /* txtProps */, 80 /* txtPropWidth */); } - public static final kelondroTree open(File file, long buffersize, long preloadTime, int objectCachePercent, kelondroRow rowdef, + public static final kelondroTree open(File file, long buffersize, long preloadTime, kelondroRow rowdef, kelondroOrder objectOrder, int txtProps, int txtPropsWidth) { // opens new or existing file; in case that any error occur the file is deleted again and it is tried to create the file again // if that fails, the method returns null try { - return new kelondroTree(file, buffersize, preloadTime, objectCachePercent, rowdef, objectOrder, txtProps, txtPropsWidth); + return new kelondroTree(file, buffersize, preloadTime, rowdef, objectOrder, txtProps, txtPropsWidth); } catch (IOException e) { file.delete(); try { - return new kelondroTree(file, buffersize, preloadTime, objectCachePercent, rowdef, objectOrder, txtProps, txtPropsWidth); + return new kelondroTree(file, buffersize, preloadTime, rowdef, objectOrder, txtProps, txtPropsWidth); } catch (IOException ee) { log.severe("cannot open or create file " + file.toString()); e.printStackTrace(); @@ -141,15 +137,14 @@ public static final kelondroTree open(File file, long buffersize, long preloadTi } } - public kelondroTree(kelondroRA ra, long buffersize, long preloadTime, int objectCachePercent, kelondroRow rowdef, boolean exitOnFail) { + public kelondroTree(kelondroRA ra, long buffersize, long preloadTime, kelondroRow rowdef, boolean exitOnFail) { // this creates a new tree within a kelondroRA - this(ra, buffersize, preloadTime, objectCachePercent, rowdef, new kelondroNaturalOrder(true), rowdef.columns() /* txtProps */, 80 /* txtPropWidth */, exitOnFail); + this(ra, buffersize, preloadTime, rowdef, new kelondroNaturalOrder(true), rowdef.columns() /* txtProps */, 80 /* txtPropWidth */, exitOnFail); } - public kelondroTree(kelondroRA ra, long buffersize, long preloadTime, int objectCachePercent, kelondroRow rowdef, kelondroOrder objectOrder, int txtProps, int txtPropsWidth, boolean exitOnFail) { + public kelondroTree(kelondroRA ra, long buffersize, long preloadTime, kelondroRow rowdef, kelondroOrder objectOrder, int txtProps, int txtPropsWidth, boolean exitOnFail) { // this creates a new tree within a kelondroRA - super(ra, - (100 - objectCachePercent) * buffersize / 100, preloadTime, + super(ra, buffersize, preloadTime, thisOHBytes, thisOHHandles, rowdef, thisFHandles, txtProps, txtPropsWidth, exitOnFail); try { @@ -162,37 +157,13 @@ public kelondroTree(kelondroRA ra, long buffersize, long preloadTime, int object this.objectOrder = objectOrder; writeOrderType(); super.setLogger(log); - initObjectCache(buffersize, objectCachePercent); } - public kelondroTree(kelondroRA ra, long buffersize, long preloadTime, int objectCachePercent) throws IOException { + public kelondroTree(kelondroRA ra, long buffersize, long preloadTime) throws IOException { // this opens a file with an existing tree in a kelondroRA - super(ra, (100 - objectCachePercent) * buffersize / 100, preloadTime); + super(ra, buffersize, preloadTime); readOrderType(); super.setLogger(log); - initObjectCache(buffersize, objectCachePercent); - } - - private void initObjectCache(long buffersize, int objectCachePercent) { - if (objectCachePercent > 0) { - long objectbuffersize = objectCachePercent * buffersize / 100; - long objecthitcachesize = objectbuffersize * 4 / 5 / cacheObjectChunkSize(); - long objectmisscachesize = objectbuffersize / 5 / cacheObjectMissSize; - this.objectCache = new kelondroObjectCache(this.filename, (int) objecthitcachesize, (int) objectmisscachesize, objecthitcachesize * 3000 , 4*1024*1024); - } else { - this.objectCache = null; - } - } - - public final static int cacheObjectMissSize = 120; - - public final int cacheObjectChunkSize() { - return row().objectsize() + /* overhead */ 16 * super.row().columns(); - } - - public long[] cacheObjectStatus() { - if (this.objectCache == null) return null; - return this.objectCache.status(); } private void writeOrderType() { @@ -243,21 +214,13 @@ private void commitNode(Node n) throws IOException { // Returns the value to which this map maps the specified key. public kelondroRow.Entry get(byte[] key) throws IOException { - // System.out.println("kelondroTree.get " + new String(key) + " in " + filename); - kelondroRow.Entry result = (objectCache == null) ? null : (kelondroRow.Entry) objectCache.get(key); - if (result != null) { - //System.out.println("cache hit in objectCache, db:" + super.filename); - return result; - } - if ((objectCache != null) && (objectCache.has(key) == -1)) return null; + kelondroRow.Entry result; synchronized (writeSearchObj) { writeSearchObj.process(key); if (writeSearchObj.found()) { result = row().newEntry(writeSearchObj.getMatcher().getValueRow()); - if (objectCache != null) objectCache.put(key, result); } else { result = null; - if (objectCache != null) objectCache.hasnot(key); } } return result; @@ -413,13 +376,14 @@ public kelondroRow.Entry put(kelondroRow.Entry row, Date entryDate) throws IOExc } public kelondroRow.Entry put(kelondroRow.Entry newrow) throws IOException { + assert (newrow != null); + assert (newrow.columns() == row().columns()); + assert (!(serverLog.allZero(newrow.getColBytes(primarykey())))); // Associates the specified value with the specified key in this map kelondroRow.Entry result = null; //writeLock.stay(2000, 1000); - if (newrow.columns() != row().columns()) throw new IllegalArgumentException("put: wrong row length " + newrow.columns() + "; must be " + row().columns()); // first try to find the key element in the database synchronized(writeSearchObj) { - if (objectCache != null) objectCache.put(newrow.getColBytes(0), newrow); writeSearchObj.process(newrow.getColBytes(0)); if (writeSearchObj.found()) { // a node with this key exist. simply overwrite the content and return old content @@ -686,14 +650,8 @@ public byte[] put(byte[] key, byte[] value) throws IOException { // Removes the mapping for this key from this map if present (optional operation). public kelondroRow.Entry remove(byte[] key) throws IOException { - - // check with miss cache of object cache - // if we know that the object does not exist, then we don't need to lookup in the file - if ((objectCache != null) && (objectCache.has(key) == -1)) return null; - // delete from database synchronized(writeSearchObj) { - if (objectCache != null) objectCache.remove(key); writeSearchObj.process(key); if (writeSearchObj.found()) { Node result = writeSearchObj.getMatcher(); @@ -1299,7 +1257,7 @@ public static void cmd(String[] args) { // test script File testFile = new File("test.db"); while (testFile.exists()) testFile.delete(); - kelondroTree fm = new kelondroTree(testFile, 0x100000, 0, 10, new kelondroRow("byte[] a-4, byte[] b-4")); + kelondroTree fm = new kelondroTree(testFile, 0x100000, 10, new kelondroRow("byte[] a-4, byte[] b-4")); byte[] dummy = "".getBytes(); fm.put("abc0".getBytes(), dummy); fm.put("bcd0".getBytes(), dummy); fm.put("def0".getBytes(), dummy); fm.put("bab0".getBytes(), dummy); @@ -1321,7 +1279,7 @@ public static void cmd(String[] args) { ret = null; } } else if (args.length == 2) { - kelondroTree fm = new kelondroTree(new File(args[1]), 0x100000, 0, 10, new kelondroRow("byte[] a-4, byte[] b-4")); + kelondroTree fm = new kelondroTree(new File(args[1]), 0x100000, 10, new kelondroRow("byte[] a-4, byte[] b-4")); if (args[0].equals("-v")) { fm.print(); ret = null; @@ -1329,11 +1287,11 @@ public static void cmd(String[] args) { fm.close(); } else if (args.length == 3) { if (args[0].equals("-d")) { - kelondroTree fm = new kelondroTree(new File(args[1]), 0x100000, 0, 10, new kelondroRow("byte[] a-4, byte[] b-4")); + kelondroTree fm = new kelondroTree(new File(args[1]), 0x100000, 10, new kelondroRow("byte[] a-4, byte[] b-4")); fm.remove(args[2].getBytes()); fm.close(); } else if (args[0].equals("-i")) { - kelondroTree fm = new kelondroTree(new File(args[1]), 0x100000, 0, 10, new kelondroRow("byte[] a-4, byte[] b-4")); + kelondroTree fm = new kelondroTree(new File(args[1]), 0x100000, 10, new kelondroRow("byte[] a-4, byte[] b-4")); int i = fm.imp(new File(args[1]),";"); fm.close(); ret = (i + " records imported").getBytes(); @@ -1356,12 +1314,12 @@ public static void cmd(String[] args) { if (f != null) try {f.close();}catch(Exception e){} } } else if (args[0].equals("-g")) { - kelondroTree fm = new kelondroTree(new File(args[1]), 0x100000, 0, 10, new kelondroRow("byte[] a-4, byte[] b-4")); + kelondroTree fm = new kelondroTree(new File(args[1]), 0x100000, 10, new kelondroRow("byte[] a-4, byte[] b-4")); kelondroRow.Entry ret2 = fm.get(args[2].getBytes()); ret = ((ret2 == null) ? null : ret2.getColBytes(1)); fm.close(); } else if (args[0].equals("-n")) { - kelondroTree fm = new kelondroTree(new File(args[1]), 0x100000, 0, 10, new kelondroRow("byte[] a-4, byte[] b-4")); + kelondroTree fm = new kelondroTree(new File(args[1]), 0x100000, 10, new kelondroRow("byte[] a-4, byte[] b-4")); //byte[][] keys = fm.getSequentialKeys(args[2].getBytes(), 500, true); Iterator rowIt = fm.rows(true, false, (args[2].length() == 0) ? null : args[2].getBytes()); Vector v = new Vector(); @@ -1375,10 +1333,10 @@ public static void cmd(String[] args) { File f = new File(args[3]); if (f.exists()) f.delete(); kelondroRow lens = new kelondroRow("byte[] key-" + Integer.parseInt(args[1]) + ", byte[] value-" + Integer.parseInt(args[2])); - kelondroTree fm = new kelondroTree(f, 0x100000, 0, 10, lens); + kelondroTree fm = new kelondroTree(f, 0x100000, 10, lens); fm.close(); } else if (args[0].equals("-u")) { - kelondroTree fm = new kelondroTree(new File(args[1]), 0x100000, 0, 10, new kelondroRow("byte[] a-4, byte[] b-4")); + kelondroTree fm = new kelondroTree(new File(args[1]), 0x100000, 10, new kelondroRow("byte[] a-4, byte[] b-4")); ret = fm.put(args[1].getBytes(), args[2].getBytes()); fm.close(); } @@ -1437,7 +1395,7 @@ public static void randomtest(int elements) { int steps = 0; while (true) { if (testFile.exists()) testFile.delete(); - tt = new kelondroTree(testFile, 200, 0, 10, new kelondroRow("byte[] a-4, byte[] b-4")); + tt = new kelondroTree(testFile, 200, 10, new kelondroRow("byte[] a-4, byte[] b-4")); steps = 10 + ((int) System.currentTimeMillis() % 7) * (((int) System.currentTimeMillis() + 17) % 11); t = s; d = ""; @@ -1503,7 +1461,7 @@ public static void smalltest() { File f = new File("test.db"); if (f.exists()) f.delete(); try { - kelondroTree tt = new kelondroTree(f, 1000, 0, 10, new kelondroRow("byte[] a-4, byte[] b-4")); + kelondroTree tt = new kelondroTree(f, 1000, 10, new kelondroRow("byte[] a-4, byte[] b-4")); byte[] b; b = testWord('B'); tt.put(b, b); //tt.print(); b = testWord('C'); tt.put(b, b); //tt.print(); @@ -1568,7 +1526,7 @@ public static void iterationtest() { public static kelondroTree testTree(File f, String testentities) throws IOException { if (f.exists()) f.delete(); - kelondroTree tt = new kelondroTree(f, 0, 0, 10, new kelondroRow("byte[] a-4, byte[] b-4")); + kelondroTree tt = new kelondroTree(f, 0, 10, new kelondroRow("byte[] a-4, byte[] b-4")); byte[] b; for (int i = 0; i < testentities.length(); i++) { b = testWord(testentities.charAt(i)); diff --git a/source/de/anomic/plasma/dbImport/AssortmentImporter.java b/source/de/anomic/plasma/dbImport/AssortmentImporter.java index 1969b5e464..8ffe978a1f 100644 --- a/source/de/anomic/plasma/dbImport/AssortmentImporter.java +++ b/source/de/anomic/plasma/dbImport/AssortmentImporter.java @@ -1,6 +1,7 @@ package de.anomic.plasma.dbImport; import java.io.File; +import java.io.IOException; import java.util.Iterator; import de.anomic.index.indexContainer; @@ -60,7 +61,12 @@ else if (!this.importAssortmentFile.canWrite()) // initializing the import assortment db this.log.logInfo("Initializing source assortment file"); - this.assortmentFile = new plasmaWordIndexAssortment(importAssortmentPath,assortmentNr, this.cacheSize/1024, preloadTime, this.log); + try { + this.assortmentFile = new plasmaWordIndexAssortment(importAssortmentPath,assortmentNr, this.cacheSize/1024, preloadTime, this.log); + } catch (IOException e) { + e.printStackTrace(); + System.exit(-1); + } this.importStartSize = this.assortmentFile.size(); } diff --git a/source/de/anomic/plasma/dbImport/plasmaDbImporter.java b/source/de/anomic/plasma/dbImport/plasmaDbImporter.java index ec36684593..6528a8bd94 100644 --- a/source/de/anomic/plasma/dbImport/plasmaDbImporter.java +++ b/source/de/anomic/plasma/dbImport/plasmaDbImporter.java @@ -1,6 +1,7 @@ package de.anomic.plasma.dbImport; import java.io.File; +import java.io.IOException; import java.util.HashSet; import java.util.Iterator; import java.util.TreeSet; @@ -75,7 +76,12 @@ public void init(File theImportPath, File theIndexPath, int theCacheSize, long p } this.log.logFine("Initializing source word index db."); - this.importWordIndex = new plasmaWordIndex(this.importPath, this.indexPath, true, (this.cacheSize/2)/1024, preloadTime / 2, this.log, sb.getConfigBool("useCollectionIndex", false)); + try { + this.importWordIndex = new plasmaWordIndex(this.importPath, this.indexPath, true, (this.cacheSize/2)/1024, preloadTime / 2, this.log, sb.getConfigBool("useCollectionIndex", false)); + } catch (IOException e) { + e.printStackTrace(); + System.exit(-1); + } this.log.logFine("Initializing import URL db."); this.importUrlDB = new plasmaCrawlLURL(this.importPath, this.indexPath, (this.cacheSize/2)/1024, preloadTime / 2, false); this.importStartSize = this.importWordIndex.size(); diff --git a/source/de/anomic/plasma/plasmaCrawlEURL.java b/source/de/anomic/plasma/plasmaCrawlEURL.java index 838f2452e4..74dc6c6f85 100644 --- a/source/de/anomic/plasma/plasmaCrawlEURL.java +++ b/source/de/anomic/plasma/plasmaCrawlEURL.java @@ -158,7 +158,7 @@ public plasmaCrawlEURL(File cachePath, int bufferkb, long preloadTime, boolean n } else { File oldCacheFile = new File(cachePath, "urlErr0.db"); oldCacheFile.getParentFile().mkdirs(); - urlIndexFile = kelondroTree.open(oldCacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, rowdef); + urlIndexFile = kelondroTree.open(oldCacheFile, bufferkb * 0x400, preloadTime, rowdef); } } diff --git a/source/de/anomic/plasma/plasmaCrawlLURL.java b/source/de/anomic/plasma/plasmaCrawlLURL.java index a6ec43a8c0..2666af033b 100644 --- a/source/de/anomic/plasma/plasmaCrawlLURL.java +++ b/source/de/anomic/plasma/plasmaCrawlLURL.java @@ -67,6 +67,7 @@ import de.anomic.index.indexEntry; import de.anomic.index.indexURL; import de.anomic.kelondro.kelondroBufferedIndex; +import de.anomic.kelondro.kelondroCachedIndex; import de.anomic.kelondro.kelondroFlexSplitTable; import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroRow; @@ -101,13 +102,11 @@ public plasmaCrawlLURL(File plasmaPath, File indexPath, int bufferkb, long prelo try { if (newdb) { - urlIndexFile = new kelondroBufferedIndex( - new kelondroFlexSplitTable(new File(indexPath, "PUBLIC/TEXT"), "urls", bufferkb * 0x400, preloadTime, plasmaCrawlLURLNewEntry.rowdef, kelondroBase64Order.enhancedCoder)); + urlIndexFile = new kelondroBufferedIndex(new kelondroCachedIndex(new kelondroFlexSplitTable(new File(indexPath, "PUBLIC/TEXT"), "urls", bufferkb / 2 * 0x400, preloadTime, plasmaCrawlLURLNewEntry.rowdef, kelondroBase64Order.enhancedCoder), bufferkb / 2 * 0x400)); } else { File oldLURLDB = new File(plasmaPath, "urlHash.db"); oldLURLDB.getParentFile().mkdirs(); - urlIndexFile = new kelondroBufferedIndex( - new kelondroTree(oldLURLDB, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, plasmaCrawlLURLOldEntry.rowdef)); + urlIndexFile = new kelondroBufferedIndex(new kelondroCachedIndex(new kelondroTree(oldLURLDB, bufferkb / 2 * 0x400, preloadTime, plasmaCrawlLURLOldEntry.rowdef), bufferkb / 2 * 0x400)); } } catch (IOException e) { e.printStackTrace(); diff --git a/source/de/anomic/plasma/plasmaCrawlNURL.java b/source/de/anomic/plasma/plasmaCrawlNURL.java index 9f13f8af03..f8bc9d5bf2 100644 --- a/source/de/anomic/plasma/plasmaCrawlNURL.java +++ b/source/de/anomic/plasma/plasmaCrawlNURL.java @@ -52,6 +52,7 @@ import de.anomic.index.indexURL; import de.anomic.kelondro.kelondroBase64Order; +import de.anomic.kelondro.kelondroBufferedIndex; import de.anomic.kelondro.kelondroException; import de.anomic.kelondro.kelondroFlexTable; import de.anomic.kelondro.kelondroRecords; @@ -98,7 +99,7 @@ public class plasmaCrawlNURL extends indexURL { private kelondroStack movieStack; // links pointing to movie resources private kelondroStack musicStack; // links pointing to music resources - private final HashSet stackIndex; // to find out if a specific link is already on any stack + private final HashSet stackIndex; // to find out if a specific link is already on any stack private File cacheStacksPath; private int bufferkb; private long preloadTime; @@ -156,7 +157,7 @@ private void openHashCache() { String newCacheName = "urlNotice4.table"; cacheStacksPath.mkdirs(); try { - urlIndexFile = new kelondroFlexTable(cacheStacksPath, newCacheName, bufferkb * 0x400, preloadTime, rowdef, kelondroBase64Order.enhancedCoder); + urlIndexFile = new kelondroBufferedIndex(new kelondroFlexTable(cacheStacksPath, newCacheName, bufferkb * 0x400, preloadTime, rowdef, kelondroBase64Order.enhancedCoder)); } catch (IOException e) { e.printStackTrace(); System.exit(-1); @@ -164,7 +165,7 @@ private void openHashCache() { } else { File oldCacheFile = new File(cacheStacksPath, "urlNotice1.db"); oldCacheFile.getParentFile().mkdirs(); - urlIndexFile = kelondroTree.open(oldCacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, rowdef); + urlIndexFile = new kelondroBufferedIndex(kelondroTree.open(oldCacheFile, bufferkb * 0x400, preloadTime, rowdef)); } } diff --git a/source/de/anomic/plasma/plasmaCrawlStacker.java b/source/de/anomic/plasma/plasmaCrawlStacker.java index c1230ed699..382551f21c 100644 --- a/source/de/anomic/plasma/plasmaCrawlStacker.java +++ b/source/de/anomic/plasma/plasmaCrawlStacker.java @@ -61,6 +61,8 @@ import de.anomic.http.httpc; import de.anomic.index.indexURL; import de.anomic.kelondro.kelondroBase64Order; +import de.anomic.kelondro.kelondroBufferedIndex; +import de.anomic.kelondro.kelondroCachedIndex; import de.anomic.kelondro.kelondroException; import de.anomic.kelondro.kelondroFlexTable; import de.anomic.kelondro.kelondroIndex; @@ -681,7 +683,7 @@ private void openDB() { String newCacheName = "urlPreNotice1.table"; cacheStacksPath.mkdirs(); try { - this.urlEntryCache = new kelondroFlexTable(cacheStacksPath, newCacheName, bufferkb * 0x400, preloadTime, plasmaCrawlNURL.rowdef, kelondroBase64Order.enhancedCoder); + this.urlEntryCache = new kelondroBufferedIndex(new kelondroFlexTable(cacheStacksPath, newCacheName, bufferkb * 0x400, preloadTime, plasmaCrawlNURL.rowdef, kelondroBase64Order.enhancedCoder)); } catch (IOException e) { e.printStackTrace(); System.exit(-1); @@ -690,7 +692,7 @@ private void openDB() { if (this.dbtype == QUEUE_DB_TYPE_TREE) { File cacheFile = new File(cacheStacksPath, "urlPreNotice.db"); cacheFile.getParentFile().mkdirs(); - this.urlEntryCache = kelondroTree.open(cacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, plasmaCrawlNURL.rowdef); + this.urlEntryCache = new kelondroBufferedIndex(kelondroTree.open(cacheFile, bufferkb * 0x400, preloadTime, plasmaCrawlNURL.rowdef)); } } @@ -705,12 +707,12 @@ public int[] cacheNodeStatus() { } public int cacheObjectChunkSize() { - if (urlEntryCache instanceof kelondroTree) return ((kelondroTree) urlEntryCache).cacheObjectChunkSize(); + if (urlEntryCache instanceof kelondroCachedIndex) return ((kelondroCachedIndex) urlEntryCache).cacheObjectChunkSize(); return 0; } public long[] cacheObjectStatus() { - if (urlEntryCache instanceof kelondroTree) return ((kelondroTree) urlEntryCache).cacheObjectStatus(); + if (urlEntryCache instanceof kelondroCachedIndex) return ((kelondroCachedIndex) urlEntryCache).cacheObjectStatus(); return new long[]{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; } diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index 690cde2e6e..2b243237f2 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -423,7 +423,12 @@ ramLURL, getConfigBool("useFlexTableForLURL", false), ramNURL, getConfigBool("useFlexTableForNURL", false), ramEURL, getConfigBool("useFlexTableForEURL", true), ramLURL_time); - wordIndex = new plasmaWordIndex(plasmaPath, indexPath, true, ramRWI, ramRWI_time, log, getConfigBool("useCollectionIndex", false)); + try { + wordIndex = new plasmaWordIndex(plasmaPath, indexPath, true, ramRWI, ramRWI_time, log, getConfigBool("useCollectionIndex", false)); + } catch (IOException e1) { + e1.printStackTrace(); + System.exit(-1); + } // set a high maximum cache size to current size; this is adopted later automatically int wordCacheMaxCount = Math.max((int) getConfigLong("wordCacheInitCount", 30000), diff --git a/source/de/anomic/plasma/plasmaWordIndex.java b/source/de/anomic/plasma/plasmaWordIndex.java index 7154942a3c..7feb095846 100644 --- a/source/de/anomic/plasma/plasmaWordIndex.java +++ b/source/de/anomic/plasma/plasmaWordIndex.java @@ -72,7 +72,7 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI { public boolean useCollectionIndex; // flag for usage of new collectionIndex db private int idleDivisor, busyDivisor; - public plasmaWordIndex(File oldDatabaseRoot, File newIndexRoot, boolean dummy, int bufferkb, long preloadTime, serverLog log, boolean useCollectionIndex) { + public plasmaWordIndex(File oldDatabaseRoot, File newIndexRoot, boolean dummy, int bufferkb, long preloadTime, serverLog log, boolean useCollectionIndex) throws IOException { this.oldDatabaseRoot = oldDatabaseRoot; this.backend = new plasmaWordIndexFileCluster(oldDatabaseRoot, log); this.dhtOutCache = new indexRAMCacheRI(oldDatabaseRoot, (useCollectionIndex) ? 1024 : 64, "indexDump1.array", log); @@ -857,8 +857,8 @@ public static void main(String[] args) { // System.out.println(new Date(reverseMicroDateDays(microDateDays(System.currentTimeMillis())))); File plasmadb = new File("D:\\dev\\proxy\\DATA\\PLASMADB"); File indexdb = new File("D:\\dev\\proxy\\DATA\\INDEX"); - plasmaWordIndex index = new plasmaWordIndex(plasmadb, indexdb, true, 555, 1000, new serverLog("TESTAPP"), false); try { + plasmaWordIndex index = new plasmaWordIndex(plasmadb, indexdb, true, 555, 1000, new serverLog("TESTAPP"), false); Iterator containerIter = index.wordContainers("5A8yhZMh_Kmv", plasmaWordIndex.RL_WORDFILES, true); while (containerIter.hasNext()) { System.out.println("File: " + (indexContainer) containerIter.next()); diff --git a/source/de/anomic/plasma/plasmaWordIndexAssortment.java b/source/de/anomic/plasma/plasmaWordIndexAssortment.java index d7d95e9b1a..0f530c452e 100644 --- a/source/de/anomic/plasma/plasmaWordIndexAssortment.java +++ b/source/de/anomic/plasma/plasmaWordIndexAssortment.java @@ -60,6 +60,7 @@ import de.anomic.index.indexEntry; import de.anomic.index.indexEntryAttribute; import de.anomic.index.indexURLEntry; +import de.anomic.kelondro.kelondroCachedIndex; import de.anomic.kelondro.kelondroColumn; import de.anomic.kelondro.kelondroException; import de.anomic.kelondro.kelondroRow; @@ -81,7 +82,7 @@ public final class plasmaWordIndexAssortment { private File assortmentFile; private int assortmentLength; private serverLog log; - private kelondroTree assortments; + private kelondroCachedIndex assortments; private long bufferSize; private long preloadTime; @@ -107,7 +108,7 @@ private static int assortmentCapacity(int rowsize) { return (rowsize - bufferStructureBasis.width(0) - bufferStructureBasis.width(1) - bufferStructureBasis.width(2)) / (bufferStructureBasis.width(3) + bufferStructureBasis.width(4)); } - public plasmaWordIndexAssortment(File storagePath, int assortmentLength, int bufferkb, long preloadTime, serverLog log) { + public plasmaWordIndexAssortment(File storagePath, int assortmentLength, int bufferkb, long preloadTime, serverLog log) throws IOException { if (!(storagePath.exists())) storagePath.mkdirs(); this.assortmentFile = new File(storagePath, assortmentFileName + intx(assortmentLength) + ".db"); this.assortmentLength = assortmentLength; @@ -117,7 +118,7 @@ public plasmaWordIndexAssortment(File storagePath, int assortmentLength, int buf this.log = log; // open assortment tree file long start = System.currentTimeMillis(); - assortments = kelondroTree.open(assortmentFile, bufferSize, preloadTime, kelondroTree.defaultObjectCachePercent, bufferStructure(assortmentLength)); + assortments = new kelondroCachedIndex(kelondroTree.open(assortmentFile, bufferSize / 2, preloadTime, bufferStructure(assortmentLength)), bufferSize / 2); long stop = System.currentTimeMillis(); if (log != null) log.logConfig("Opened Assortment, " + assortments.size() + " entries, width " + @@ -127,7 +128,7 @@ public plasmaWordIndexAssortment(File storagePath, int assortmentLength, int buf assortments.cacheNodeStatus()[1] + " preloaded"); } - public void store(indexContainer newContainer) { + public void store(indexContainer newContainer) throws IOException { // stores a word index to assortment database // this throws an exception if the word hash already existed //log.logDebug("storeAssortment: wordHash=" + wordHash + ", urlHash=" + entry.getUrlHash() + ", time=" + creationTime); @@ -148,15 +149,15 @@ public void store(indexContainer newContainer) { oldrow = assortments.put(row); } catch (IOException e) { e.printStackTrace(); - log.logSevere("storeAssortment/IO-error: " + e.getMessage() + " - reset assortment-DB " + assortments.file(), e); + log.logSevere("storeAssortment/IO-error: " + e.getMessage() + " - reset assortment-DB " + assortmentFile, e); resetDatabase(); } catch (IndexOutOfBoundsException e) { e.printStackTrace(); - log.logSevere("storeAssortment/IO-error: " + e.getMessage() + " - reset assortment-DB " + assortments.file(), e); + log.logSevere("storeAssortment/IO-error: " + e.getMessage() + " - reset assortment-DB " + assortmentFile, e); resetDatabase(); } catch (kelondroException e) { e.printStackTrace(); - log.logSevere("storeAssortment/kelondro-error: " + e.getMessage() + " - reset assortment-DB " + assortments.file(), e); + log.logSevere("storeAssortment/kelondro-error: " + e.getMessage() + " - reset assortment-DB " + assortmentFile, e); resetDatabase(); } if (oldrow != null) throw new RuntimeException("Store to assortment ambiguous"); @@ -170,12 +171,12 @@ public indexContainer remove(String wordHash) { row = assortments.remove(wordHash.getBytes()); } catch (IOException e) { log.logSevere("removeAssortment/IO-error: " + e.getMessage() - + " - reset assortment-DB " + assortments.file(), e); + + " - reset assortment-DB " + assortmentFile, e); resetDatabase(); return null; } catch (kelondroException e) { log.logSevere("removeAssortment/kelondro-error: " + e.getMessage() - + " - reset assortment-DB " + assortments.file(), e); + + " - reset assortment-DB " + assortmentFile, e); resetDatabase(); return null; } @@ -193,7 +194,7 @@ public boolean contains(String wordHash) { return false; } catch (kelondroException e) { log.logSevere("removeAssortment/kelondro-error: " + e.getMessage() - + " - reset assortment-DB " + assortments.file(), e); + + " - reset assortment-DB " + assortmentFile, e); resetDatabase(); return false; } @@ -207,12 +208,12 @@ public indexContainer get(String wordHash) { row = assortments.get(wordHash.getBytes()); } catch (IOException e) { log.logSevere("removeAssortment/IO-error: " + e.getMessage() - + " - reset assortment-DB " + assortments.file(), e); + + " - reset assortment-DB " + assortmentFile, e); resetDatabase(); return null; } catch (kelondroException e) { log.logSevere("removeAssortment/kelondro-error: " + e.getMessage() - + " - reset assortment-DB " + assortments.file(), e); + + " - reset assortment-DB " + assortmentFile, e); resetDatabase(); return null; } @@ -246,12 +247,12 @@ private void resetDatabase() { File backupFile = new File(backupPath, assortmentFile.getName() + System.currentTimeMillis()); assortmentFile.renameTo(backupFile); log.logInfo("a back-up of the deleted assortment file is in " + backupFile.toString()); + if (assortmentFile.exists()) assortmentFile.delete(); + assortments = new kelondroCachedIndex(kelondroTree.open(assortmentFile, bufferSize / 2, preloadTime, bufferStructure(assortmentLength)), bufferSize / 2); } catch (Exception e) { // if this fails, delete the file if (!(assortmentFile.delete())) throw new RuntimeException("cannot delete assortment database"); } - if (assortmentFile.exists()) assortmentFile.delete(); - assortments = kelondroTree.open(assortmentFile, bufferSize, preloadTime, kelondroTree.defaultObjectCachePercent, bufferStructure(assortmentLength)); } public Iterator containers(String startWordHash, boolean up, boolean rot) throws IOException { @@ -259,7 +260,7 @@ public Iterator containers(String startWordHash, boolean up, boolean rot) throws try { return new containerIterator(startWordHash, up, rot); } catch (kelondroException e) { - log.logSevere("iterateAssortment/kelondro-error: " + e.getMessage() + " - reset assortment-DB " + assortments.file(), e); + log.logSevere("iterateAssortment/kelondro-error: " + e.getMessage() + " - reset assortment-DB " + assortmentFile, e); resetDatabase(); return null; } @@ -289,7 +290,11 @@ public void remove() { } public int size() { - return assortments.size(); + try { + return assortments.size(); + } catch (IOException e) { + return 0; + } } public int cacheNodeChunkSize() { diff --git a/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java b/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java index bd83b151e9..4ba46e98c6 100644 --- a/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java +++ b/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java @@ -73,7 +73,7 @@ public final class plasmaWordIndexAssortmentCluster extends indexAbstractRI impl private plasmaWordIndexAssortment[] assortments; private long completeBufferKB; - public plasmaWordIndexAssortmentCluster(File assortmentsPath, int clusterCount, int bufferkb, long preloadTime, serverLog log) { + public plasmaWordIndexAssortmentCluster(File assortmentsPath, int clusterCount, int bufferkb, long preloadTime, serverLog log) throws IOException { // set class variables if (!(assortmentsPath.exists())) assortmentsPath.mkdirs(); this.clusterCount = clusterCount; @@ -112,7 +112,7 @@ public plasmaWordIndexAssortmentCluster(File assortmentsPath, int clusterCount, } } - private indexContainer storeSingular(indexContainer newContainer) { + private indexContainer storeSingular(indexContainer newContainer) throws IOException { // this tries to store the record. If the record does not fit, or a same hash already // exists and would not fit together with the new record, then the record is deleted from // the assortmen(s) and returned together with the newRecord. @@ -129,14 +129,14 @@ private indexContainer storeSingular(indexContainer newContainer) { return null; } - private void storeForced(indexContainer newContainer) { + private void storeForced(indexContainer newContainer) throws IOException { // this stores the record and overwrites an existing record. // this is safe if we can be shure that the record does not exist before. if ((newContainer == null) || (newContainer.size() == 0) || (newContainer.size() > clusterCount)) return; // it will not fit assortments[newContainer.size() - 1].store(newContainer); } - private void storeStretched(indexContainer newContainer) { + private void storeStretched(indexContainer newContainer) throws IOException { // this stores the record and stretches the storage over // all the assortments that are necessary to fit in the record // IMPORTANT: it must be ensured that the wordHash does not exist in the cluster before @@ -209,18 +209,30 @@ public indexContainer addEntries(indexContainer newContainer, long creationTime, assert (i.hasNext()); c.add((indexEntry) i.next(), newContainer.updated()); } - storeForced(c); + try { + storeForced(c); + } catch (IOException e) { + e.printStackTrace(); + } } return null; } - if (newContainer.size() <= clusterCount) newContainer = storeSingular(newContainer); + if (newContainer.size() <= clusterCount) try { + newContainer = storeSingular(newContainer); + } catch (IOException e) { + e.printStackTrace(); + } if (newContainer == null) return null; // clean up the whole thing and try to insert the container then newContainer.add(deleteContainer(newContainer.getWordHash(), -1), -1); if (newContainer.size() > clusterCapacity) return newContainer; - storeStretched(newContainer); + try { + storeStretched(newContainer); + } catch (IOException e) { + e.printStackTrace(); + } return null; } diff --git a/source/de/anomic/plasma/plasmaWordIndexFile.java b/source/de/anomic/plasma/plasmaWordIndexFile.java index 40c2d792c1..286382fd61 100644 --- a/source/de/anomic/plasma/plasmaWordIndexFile.java +++ b/source/de/anomic/plasma/plasmaWordIndexFile.java @@ -1,4 +1,4 @@ -// plasmaWordIndexEntity.java +// plasmaWordIndexFile.java // -------------------------- // part of YACY // (C) by Michael Peter Christen; mc@anomic.de @@ -90,7 +90,7 @@ private kelondroTree indexFile(File databaseRoot, String wordHash) { if (fp != null) fp.mkdirs(); long cacheSize = theLocation.length(); if (cacheSize > 1048576) cacheSize = 1048576; - return kelondroTree.open(theLocation, cacheSize, 0, kelondroTree.defaultObjectCachePercent, + return kelondroTree.open(theLocation, cacheSize, 0, new kelondroRow("byte[] urlhash-" + indexURL.urlHashLength + ", byte[] ba-" + indexURLEntry.encodedByteArrayFormLength(false))); } diff --git a/source/de/anomic/server/logging/serverLog.java b/source/de/anomic/server/logging/serverLog.java index 8bf535260f..035d77eca5 100644 --- a/source/de/anomic/server/logging/serverLog.java +++ b/source/de/anomic/server/logging/serverLog.java @@ -225,4 +225,27 @@ public static final String arrayList(byte[] b, int start, int length) { sb.append(']'); return sb.toString(); } + + public static final String table(byte[] b, int marker) { + StringBuffer sb = new StringBuffer(b.length * 4); + for (int i = 0; i < b.length; i++) { + if (i % 16 == 0) + sb.append('\n').append("# ").append(Integer.toHexString(i)); + else + sb.append(','); + sb.append(' ').append(Integer.toString((int) b[i])); + } + sb.append('\n'); + return sb.toString(); + } + + public static final boolean allZero(byte[] a) { + return allZero(a, 0, a.length); + } + + public static final boolean allZero(byte[] a, int astart, int alength) { + for (int i = 0; i < alength; i++) if (a[astart + i] != 0) return false; + return true; + } + } diff --git a/source/de/anomic/server/serverObjects.java b/source/de/anomic/server/serverObjects.java index 44a54d4c4c..684b7dbe77 100644 --- a/source/de/anomic/server/serverObjects.java +++ b/source/de/anomic/server/serverObjects.java @@ -67,7 +67,6 @@ This shall speed up usage when a slow internet connection is used (dial-up) import java.util.Map; import de.anomic.data.wikiCode; -import de.anomic.http.httpHeader; public class serverObjects extends Hashtable implements Cloneable { diff --git a/source/de/anomic/server/servletProperties.java b/source/de/anomic/server/servletProperties.java index 6194f334dd..5471809641 100644 --- a/source/de/anomic/server/servletProperties.java +++ b/source/de/anomic/server/servletProperties.java @@ -24,6 +24,9 @@ import de.anomic.http.httpHeader; public class servletProperties extends serverObjects { + + private static final long serialVersionUID = 1L; + private String prefix=""; private httpHeader outgoingHeader; diff --git a/source/de/anomic/yacy/yacyCore.java b/source/de/anomic/yacy/yacyCore.java index b56a8edaae..f63d9eb3f7 100644 --- a/source/de/anomic/yacy/yacyCore.java +++ b/source/de/anomic/yacy/yacyCore.java @@ -209,7 +209,12 @@ public yacyCore(plasmaSwitchboard sb) { int memNews = Integer.parseInt(switchboard.getConfig("ramCacheNews", "1024")) / 1024; long memNews_time = Long.parseLong(switchboard.getConfig("ramCacheNews_time", "1000")); log.logConfig("News Cache memory = " + memNews + " KB"); - newsPool = new yacyNewsPool(yacyDBPath, memNews, memNews_time); + try { + newsPool = new yacyNewsPool(yacyDBPath, memNews, memNews_time); + } catch (IOException e) { + e.printStackTrace(); + System.exit(-1); + } loadSeedUploadMethods(); diff --git a/source/de/anomic/yacy/yacyNewsDB.java b/source/de/anomic/yacy/yacyNewsDB.java index c8ab83fd0a..25c6a2264e 100644 --- a/source/de/anomic/yacy/yacyNewsDB.java +++ b/source/de/anomic/yacy/yacyNewsDB.java @@ -50,7 +50,9 @@ import java.util.Iterator; import de.anomic.kelondro.kelondroBase64Order; +import de.anomic.kelondro.kelondroCachedIndex; import de.anomic.kelondro.kelondroException; +import de.anomic.kelondro.kelondroIndex; import de.anomic.kelondro.kelondroRow; import de.anomic.kelondro.kelondroTree; import de.anomic.server.serverCodings; @@ -61,19 +63,19 @@ public class yacyNewsDB { private File path; private int bufferkb; private long preloadTime; - protected kelondroTree news; + protected kelondroIndex news; - public yacyNewsDB(File path, int bufferkb, long preloadTime) { + public yacyNewsDB(File path, int bufferkb, long preloadTime) throws IOException { this.path = path; this.bufferkb = bufferkb; this.preloadTime = preloadTime; - this.news = kelondroTree.open(path, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, yacyNewsRecord.rowdef); + this.news = new kelondroCachedIndex(kelondroTree.open(path, bufferkb / 2 * 0x400, preloadTime, yacyNewsRecord.rowdef), bufferkb / 2 * 0x400); } - private void resetDB() { + private void resetDB() throws IOException { try {close();} catch (Exception e) {} if (path.exists()) path.delete(); - this.news = kelondroTree.open(path, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, yacyNewsRecord.rowdef); + this.news = new kelondroCachedIndex(kelondroTree.open(path, bufferkb / 2 * 0x400, preloadTime, yacyNewsRecord.rowdef), bufferkb / 2 * 0x400); } public int cacheNodeChunkSize() { @@ -101,7 +103,7 @@ public void finalize() { close(); } - public int size() { + public int size() throws IOException { return news.size(); } @@ -164,7 +166,7 @@ protected final static yacyNewsRecord b2r(kelondroRow.Entry b) { ); } - protected final kelondroRow.Entry r2b(yacyNewsRecord r) { + protected final kelondroRow.Entry r2b(yacyNewsRecord r) throws IOException { try { if (r == null) return null; String attributes = r.attributes().toString(); diff --git a/source/de/anomic/yacy/yacyNewsPool.java b/source/de/anomic/yacy/yacyNewsPool.java index 986cbd55a9..c8e88376f1 100644 --- a/source/de/anomic/yacy/yacyNewsPool.java +++ b/source/de/anomic/yacy/yacyNewsPool.java @@ -91,7 +91,7 @@ public class yacyNewsPool { private int maxDistribution; - public yacyNewsPool(File yacyDBPath, int bufferkb, long preloadTime) { + public yacyNewsPool(File yacyDBPath, int bufferkb, long preloadTime) throws IOException { newsDB = new yacyNewsDB(new File(yacyDBPath, "news1.db"), bufferkb, preloadTime); outgoingNews = new yacyNewsQueue(new File(yacyDBPath, "newsOut1.stack"), newsDB); publishedNews = new yacyNewsQueue(new File(yacyDBPath, "newsPublished1.stack"), newsDB); @@ -101,7 +101,12 @@ public yacyNewsPool(File yacyDBPath, int bufferkb, long preloadTime) { } public int dbSize() { - return newsDB.size(); + try { + return newsDB.size(); + } catch (IOException e) { + e.printStackTrace(); + return 0; + } } public int cacheNodeChunkSize() { diff --git a/source/yacy.java b/source/yacy.java index f03e188967..277b31b857 100644 --- a/source/yacy.java +++ b/source/yacy.java @@ -656,7 +656,13 @@ public static void migrateWords(String homePath) { serverLog log = new serverLog("WORDMIGRATION"); log.logInfo("STARTING MIGRATION"); boolean useCollectionIndex = sps.getConfigBool("useCollectionIndex", false); - plasmaWordIndex wordIndexCache = new plasmaWordIndex(dbroot, indexRoot, true, 20000, 10000, log, useCollectionIndex); + plasmaWordIndex wordIndexCache = null; + try { + wordIndexCache = new plasmaWordIndex(dbroot, indexRoot, true, 20000, 10000, log, useCollectionIndex); + } catch (IOException e1) { + e1.printStackTrace(); + System.exit(-1); + } enumerateFiles words = new enumerateFiles(new File(dbroot, "WORDS"), true, false, true, true); String wordhash; File wordfile; @@ -1125,7 +1131,7 @@ private static void migratelurls(File root, File urlHash) { plasmaURLPool pool = new plasmaURLPool(new File(root, "DATA/PLASMADB"), new File(root, "DATA/INDEX"), 16000, true, 1000, true, 1000, true, 10000); kelondroTree oldindex = null; try { - oldindex = new kelondroTree(urlHash, 1000, -1, kelondroTree.defaultObjectCachePercent, plasmaCrawlLURLOldEntry.rowdef); + oldindex = new kelondroTree(urlHash, 1000, -1, plasmaCrawlLURLOldEntry.rowdef); } catch (IOException e) { System.out.println("ERROR: CANNOT OPEN OLD INDEX: " + e.getMessage()); } @@ -1144,7 +1150,7 @@ private static void migratelurls(File root, File urlHash) { oldrow = (kelondroRow.Entry) eiter.next(); } catch (Exception e) { // an IOException may occur here - e.printStackTrace(); + //e.printStackTrace(); oldrow = null; } if (oldrow != null) try {