- fixed re-search bug: after a search with several words, a second se…

…arch could not find the same words as before. This was caused because indexContaines stored the url references with a hashtable. A tree was needed to work with the index conjunction-by-numeration - added permanent ram cache flush (again) - removed direct flush of ram cache after a large container is added. this happens especially during DHT transmission and therefore this fix should speed up DHT transmission on server side. - removed unused and out-dated methods git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1765 6c8d7289-2bf4-0310-a012-ef5d649a1542
yacy · Feb 25, 2006 · 3703f76 · 3703f76
1 parent 88c0e1d
commit 3703f76
Show file tree

Hide file tree

Showing 6 changed files with 47 additions and 182 deletions.
diff --git a/htroot/xml/snippet.java b/htroot/xml/snippet.java
@@ -13,8 +13,6 @@
 import de.anomic.plasma.plasmaURL;
 import de.anomic.server.serverObjects;
 import de.anomic.server.serverSwitch;
-import de.anomic.yacy.yacyCore;
-import de.anomic.yacy.yacySeed;
 
 public class snippet {
     public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) throws MalformedURLException {

diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java
@@ -862,6 +862,10 @@ public boolean deQueue() {
             return false;
         }
 
+        // flush some entries from the RAM cache
+        // (new permanent cache flushing)
+        wordIndex.flushCacheSome();
+
         boolean doneSomething = false;
 
         // possibly delete entries from last chunk
@@ -883,7 +887,6 @@ public boolean deQueue() {
             doneSomething = true;
         }
 
-
         synchronized (sbQueue) {
 
             if (sbQueue.size() == 0) {
@@ -929,6 +932,8 @@ public boolean deQueue() {
 
             processResourceStack(nextentry);
         }
+
+        // ready & finished
         return true;
     }
 

diff --git a/source/de/anomic/plasma/plasmaWordIndex.java b/source/de/anomic/plasma/plasmaWordIndex.java
@@ -118,28 +118,41 @@ public int addEntries(plasmaWordIndexEntryContainer entries, long updateTime, bo
         int added = ramCache.addEntries(entries, updateTime, highPriority);
 
         // force flush
-        while (ramCache.maxURLinWordCache() > plasmaWordIndexCache.ramCacheLimit) {
-            try { Thread.sleep(10); } catch (InterruptedException e) { }
-            flushCacheToBackend(ramCache.bestFlushWordHash());
-        }
-
         if (highPriority) {
             if (ramCache.size() > ramCache.getMaxWordsHigh()) {
-            while (ramCache.size() + 500 > ramCache.getMaxWordsHigh()) {
-                try { Thread.sleep(10); } catch (InterruptedException e) { }
-                flushCacheToBackend(ramCache.bestFlushWordHash());
-            }}
+                while (ramCache.size() + 500 > ramCache.getMaxWordsHigh()) {
+                    flushCache(1);
+                }
+            }
         } else {
+            while (ramCache.maxURLinWordCache() > plasmaWordIndexCache.ramCacheLimit) {
+                flushCache(1);
+            }
             if (ramCache.size() > ramCache.getMaxWordsLow()) {
-            while (ramCache.size() + 500 > ramCache.getMaxWordsLow()) {
-                try { Thread.sleep(10); } catch (InterruptedException e) { }
-                flushCacheToBackend(ramCache.bestFlushWordHash());
-            }}
+                while (ramCache.size() + 500 > ramCache.getMaxWordsLow()) {
+                    flushCache(1);
+                }
+            }
         }
         return added;
     }
 
-    private synchronized void flushCacheToBackend(String wordHash) {
+    public synchronized void flushCacheSome() {
+        int flushCount = ramCache.size() / 500;
+        if (flushCount > 50) flushCount = 50;
+        if (flushCount < 5) flushCount = 5;
+        flushCache(flushCount);
+    }
+
+    public synchronized void flushCache(int count) {
+        for (int i = 0; i < count; i++) {
+            if (ramCache.size() == 0) break;
+            flushCache(ramCache.bestFlushWordHash());
+            try {Thread.sleep(10);} catch (InterruptedException e) {}
+        }
+    }
+
+    private synchronized void flushCache(String wordHash) {
         plasmaWordIndexEntryContainer c = ramCache.deleteContainer(wordHash);
         if (c != null) {
             plasmaWordIndexEntryContainer feedback = assortmentCluster.storeTry(wordHash, c);
@@ -149,15 +162,6 @@ private synchronized void flushCacheToBackend(String wordHash) {
         }
     }
 
-    private int addEntriesBackend(plasmaWordIndexEntryContainer entries) {
-        plasmaWordIndexEntryContainer feedback = assortmentCluster.storeTry(entries.wordHash(), entries);
-        if (feedback == null) {
-            return entries.size();
-        } else {
-            return backend.addEntries(feedback, -1, true);
-        }
-    }
-
     private static final int hour = 3600000;
     private static final int day  = 86400000;
 
@@ -259,22 +263,6 @@ public plasmaWordIndexEntryContainer getContainer(String wordHash, boolean delet
         return container;
     }
 
-    public plasmaWordIndexEntity getEntity(String wordHash, boolean deleteIfEmpty, long maxTime) {
-        // this possibly creates an index file in the back-end
-        // the index file is opened and returned as entity object
-        long start = System.currentTimeMillis();
-        flushCacheToBackend(wordHash);
-        if (maxTime < 0) {
-            flushFromAssortmentCluster(wordHash, -1);
-        } else {
-            long remaining = maxTime - (System.currentTimeMillis() - start);
-            if (remaining > 0)
-                flushFromAssortmentCluster(wordHash, remaining);
-        }
-        long r = maxTime - (System.currentTimeMillis() - start);
-        return backend.getEntity(wordHash, deleteIfEmpty, (r < 0) ? 0 : r);
-    }
-
     public Set getContainers(Set wordHashes, boolean deleteIfEmpty, boolean interruptIfEmpty, long maxTime) {
 
         // retrieve entities that belong to the hashes
@@ -351,19 +339,6 @@ public synchronized int removeEntries(String wordHash, String[] urlHashes, boole
         return removed;
     }
 
-    private boolean flushFromAssortmentCluster(String key, long maxTime) {
-        // this should only be called if the assortment shall be deleted or returned in an index entity
-        if (maxTime > 0) maxTime = 8 * maxTime / 10; // reserve time for later adding to backend
-        plasmaWordIndexEntryContainer container = assortmentCluster.removeFromAll(key, maxTime);
-        if (container == null) {
-            return false;
-        } else {
-            // we have a non-empty entry-container
-            // integrate it to the backend
-            return backend.addEntries(container, container.updated(), true) > 0;
-        }
-    }
-
     public static final int RL_RAMCACHE    = 0;
     public static final int RL_FILECACHE   = 1;
     public static final int RL_ASSORTMENTS = 2;
@@ -485,121 +460,6 @@ public void remove() {
         }
     } // class rotatingWordIterator
 
-/*
-    public Iterator fileIterator(String startHash, boolean up, boolean deleteEmpty) {
-        return new iterateFiles(startHash, up, deleteEmpty);
-    }
-
-    public final class iterateFiles implements Iterator {
-        // Iterator of hash-strings in WORDS path
-
-        private final ArrayList hierarchy; // contains TreeSet elements, earch TreeSet contains File Entries
-        private final Comparator comp;     // for string-compare
-        private String buffer;       // the prefetch-buffer
-        private final boolean delete;
-
-        public iterateFiles(String startHash, boolean up, boolean deleteEmpty) {
-            this.hierarchy = new ArrayList();
-            this.comp = kelondroNaturalOrder.naturalOrder; // this is the wrong ordering but mut be used as long as the assortments uses the same ordering
-            //this.comp = new kelondroBase64Order(up, false);
-            this.delete = deleteEmpty;
-
-            // the we initially fill the hierarchy with the content of the root folder
-            String path = "WORDS";
-            TreeSet list = list(new File(databaseRoot, path));
-
-            // if we have a start hash then we find the appropriate subdirectory to start
-            if ((startHash != null) && (startHash.length() == yacySeedDB.commonHashLength)) {
-                delete(startHash.substring(0, 1), list);
-                if (list.size() > 0) {
-                    hierarchy.add(list);
-                    String[] paths = new String[]{startHash.substring(0, 1), startHash.substring(1, 2), startHash.substring(2, 4), startHash.substring(4, 6)};
-                    int pathc = 0;
-                    while ((pathc < paths.length) &&
-                    (comp.compare((String) list.first(), paths[pathc]) == 0)) {
-                        path = path + "/" + paths[pathc];
-                        list = list(new File(databaseRoot, path));
-                        delete(paths[pathc], list);
-                        if (list.size() == 0) break;
-                        hierarchy.add(list);
-                        pathc++;
-                    }
-                }
-                while (((buffer = next0()) != null) && (comp.compare(buffer, startHash) < 0)) {};
-            } else {
-                hierarchy.add(list);
-                buffer = next0();
-            }
-        }
-
-        private synchronized void delete(String pattern, TreeSet names) {
-            String name;
-            while ((names.size() > 0) && (comp.compare((new File(name = (String) names.first())).getName(), pattern) < 0)) names.remove(name);
-        }
-
-        private TreeSet list(File path) {
-//          System.out.println("PATH: " + path);
-            TreeSet t = new TreeSet(comp);
-            String[] l = path.list();
-            if (l != null) for (int i = 0; i < l.length; i++) t.add(path + "/" + l[i]);
-//          else System.out.println("DEBUG: wrong path " + path);
-//          System.out.println(t);
-            return t;
-        }
-
-        private synchronized String next0() {
-            // the object is a File pointing to the corresponding file
-            File f;
-            String n;
-            TreeSet t;
-            do {
-                t = null;
-                while ((t == null) && (hierarchy.size() > 0)) {
-                    t = (TreeSet) hierarchy.get(hierarchy.size() - 1);
-                    if (t.size() == 0) {
-                        hierarchy.remove(hierarchy.size() - 1); // we step up one hierarchy
-                        t = null;
-                    }
-                }
-                if ((hierarchy.size() == 0) || (t.size() == 0)) return null; // this is the end
-                // fetch value
-                f = new File(n = (String) t.first());
-                t.remove(n);
-                // if the value represents another folder, we step into the next hierarchy
-                if (f.isDirectory()) {
-                    t = list(f);
-                    if (t.size() == 0) {
-                        if (delete) f.delete();
-                    } else {
-                        hierarchy.add(t);
-                    }
-                    f = null;
-                }
-            } while (f == null);
-            // thats it
-            if ((f == null) || ((n = f.getName()) == null) || (n.length() < yacySeedDB.commonHashLength)) {
-                return null;
-            } else {
-                return n.substring(0, yacySeedDB.commonHashLength);
-            }
-        }
-
-        public boolean hasNext() {
-            return buffer != null;
-        }
-
-        public Object next() {
-            String r = buffer;
-            while (((buffer = next0()) != null) && (comp.compare(buffer, r) < 0)) {};
-            return r;
-        }
-
-        public void remove() {
-        }
-    }
-*/
-
-
     public Object migrateWords2Assortment(String wordhash) throws IOException {
         // returns the number of entries that had been added to the assortments
         // can be negative if some assortments have been moved to the backend

diff --git a/source/de/anomic/plasma/plasmaWordIndexClassicDB.java b/source/de/anomic/plasma/plasmaWordIndexClassicDB.java
@@ -194,7 +194,7 @@ public plasmaWordIndexEntryContainer getContainer(String wordHash, boolean delet
             }
             return container;
         } else {
-            return new plasmaWordIndexEntryContainer(wordHash, 0);
+            return new plasmaWordIndexEntryContainer(wordHash);
         }
     }
 

diff --git a/source/de/anomic/plasma/plasmaWordIndexEntryContainer.java b/source/de/anomic/plasma/plasmaWordIndexEntryContainer.java
@@ -52,27 +52,28 @@
 
 package de.anomic.plasma;
 
-import java.util.HashMap;
 import java.util.Iterator;
 import java.util.Set;
 import java.util.TreeMap;
 
 import de.anomic.kelondro.kelondroBase64Order;
+import de.anomic.kelondro.kelondroNaturalOrder;
+import de.anomic.kelondro.kelondroOrder;
 
 public final class plasmaWordIndexEntryContainer implements Comparable {
 
     private String wordHash;
-    private final HashMap container; // urlHash/plasmaWordIndexEntry - Mapping
+    private final TreeMap container; // urlHash/plasmaWordIndexEntry - Mapping
     private long updateTime;
 
     public plasmaWordIndexEntryContainer(String wordHash) {
-        this(wordHash,16);
+        this(wordHash, new kelondroNaturalOrder(true));
     }
 
-    public plasmaWordIndexEntryContainer(String wordHash, int initContainerSize) {
+    public plasmaWordIndexEntryContainer(String wordHash, kelondroOrder ordering) {
         this.wordHash = wordHash;
         this.updateTime = 0;
-        container = new HashMap(initContainerSize); // a urlhash/plasmaWordIndexEntry - relation
+        container = new TreeMap(ordering); // a urlhash/plasmaWordIndexEntry - relation
     }
 
     public void setWordHash(String newWordHash) {
@@ -158,7 +159,7 @@ public Iterator entries() {
     }
 
     public static plasmaWordIndexEntryContainer instantContainer(String wordHash, long creationTime, plasmaWordIndexEntry entry) {
-        plasmaWordIndexEntryContainer c = new plasmaWordIndexEntryContainer(wordHash,1);
+        plasmaWordIndexEntryContainer c = new plasmaWordIndexEntryContainer(wordHash);
         c.add(entry);
         c.updateTime = creationTime;
         return c;
@@ -283,6 +284,7 @@ private static plasmaWordIndexEntryContainer joinConstructiveByEnumeration(plasm
             long stamp = System.currentTimeMillis();
             while ((System.currentTimeMillis() - stamp) < time) {
                 c = ie1.getUrlHash().compareTo(ie2.getUrlHash());
+                //System.out.println("** '" + ie1.getUrlHash() + "'.compareTo('" + ie2.getUrlHash() + "')="+c);
                 if (c < 0) {
                     if (e1.hasNext()) ie1 = (plasmaWordIndexEntry) e1.next(); else break;
                 } else if (c > 0) {

diff --git a/source/yacy.java b/source/yacy.java
@@ -956,14 +956,14 @@ public static void minimizeUrlDB(String homePath) {
             String wordChunkStartHash = "------------", wordChunkEndHash;
 
             while (wordHashIterator.hasNext()) {
-                plasmaWordIndexEntity wordIdxEntity = null;
+                plasmaWordIndexEntryContainer wordIdxContainer = null;
                 try {
                     wordCounter++;
                     wordhash = (String) wordHashIterator.next();
-                    wordIdxEntity = wordIndex.getEntity(wordhash, true, -1);
+                    wordIdxContainer = wordIndex.getContainer(wordhash, true, -1);
 
                     // the combined container will fit, read the container
-                    Iterator wordIdxEntries = wordIdxEntity.elements(true);
+                    Iterator wordIdxEntries = wordIdxContainer.entries();
                     plasmaWordIndexEntry wordIdxEntry;
                     while (wordIdxEntries.hasNext()) {
                         wordIdxEntry = (plasmaWordIndexEntry) wordIdxEntries.next();
@@ -978,7 +978,7 @@ public static void minimizeUrlDB(String homePath) {
                         } catch (IOException e) {}
                     }
                     // we have read all elements, now we can close it
-                    wordIdxEntity.close(); wordIdxEntity = null;
+                    wordIdxContainer = null;
 
                     if (wordCounter%500 == 0) {
                         wordChunkEndHash = wordhash;
@@ -997,7 +997,7 @@ public static void minimizeUrlDB(String homePath) {
                 } catch (Exception e) {
                     e.printStackTrace();
                 } finally {
-                    if (wordIdxEntity != null) try { wordIdxEntity.close(); } catch (Exception e) {}
+                    if (wordIdxContainer != null) try { wordIdxContainer = null; } catch (Exception e) {}
                 }
             }
             currentUrlDB.close();