Skip to content

Commit

Permalink
- fixed re-search bug: after a search with several words, a second se…
Browse files Browse the repository at this point in the history
…arch could not

  find the same words as before. This was caused because indexContaines stored the url references
  with a hashtable. A tree was needed to work with the index conjunction-by-numeration
- added permanent ram cache flush (again)
- removed direct flush of ram cache after a large container is added.
  this happens especially during DHT transmission and therefore this fix should
  speed up DHT transmission on server side.
- removed unused and out-dated methods

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1765 6c8d7289-2bf4-0310-a012-ef5d649a1542
  • Loading branch information
orbiter committed Feb 25, 2006
1 parent 88c0e1d commit 3703f76
Show file tree
Hide file tree
Showing 6 changed files with 47 additions and 182 deletions.
2 changes: 0 additions & 2 deletions htroot/xml/snippet.java
Expand Up @@ -13,8 +13,6 @@
import de.anomic.plasma.plasmaURL;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacySeed;

public class snippet {
public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) throws MalformedURLException {
Expand Down
7 changes: 6 additions & 1 deletion source/de/anomic/plasma/plasmaSwitchboard.java
Expand Up @@ -862,6 +862,10 @@ public boolean deQueue() {
return false;
}

// flush some entries from the RAM cache
// (new permanent cache flushing)
wordIndex.flushCacheSome();

boolean doneSomething = false;

// possibly delete entries from last chunk
Expand All @@ -883,7 +887,6 @@ public boolean deQueue() {
doneSomething = true;
}


synchronized (sbQueue) {

if (sbQueue.size() == 0) {
Expand Down Expand Up @@ -929,6 +932,8 @@ public boolean deQueue() {

processResourceStack(nextentry);
}

// ready & finished
return true;
}

Expand Down
194 changes: 27 additions & 167 deletions source/de/anomic/plasma/plasmaWordIndex.java
Expand Up @@ -118,28 +118,41 @@ public int addEntries(plasmaWordIndexEntryContainer entries, long updateTime, bo
int added = ramCache.addEntries(entries, updateTime, highPriority);

// force flush
while (ramCache.maxURLinWordCache() > plasmaWordIndexCache.ramCacheLimit) {
try { Thread.sleep(10); } catch (InterruptedException e) { }
flushCacheToBackend(ramCache.bestFlushWordHash());
}

if (highPriority) {
if (ramCache.size() > ramCache.getMaxWordsHigh()) {
while (ramCache.size() + 500 > ramCache.getMaxWordsHigh()) {
try { Thread.sleep(10); } catch (InterruptedException e) { }
flushCacheToBackend(ramCache.bestFlushWordHash());
}}
while (ramCache.size() + 500 > ramCache.getMaxWordsHigh()) {
flushCache(1);
}
}
} else {
while (ramCache.maxURLinWordCache() > plasmaWordIndexCache.ramCacheLimit) {
flushCache(1);
}
if (ramCache.size() > ramCache.getMaxWordsLow()) {
while (ramCache.size() + 500 > ramCache.getMaxWordsLow()) {
try { Thread.sleep(10); } catch (InterruptedException e) { }
flushCacheToBackend(ramCache.bestFlushWordHash());
}}
while (ramCache.size() + 500 > ramCache.getMaxWordsLow()) {
flushCache(1);
}
}
}
return added;
}

private synchronized void flushCacheToBackend(String wordHash) {
public synchronized void flushCacheSome() {
int flushCount = ramCache.size() / 500;
if (flushCount > 50) flushCount = 50;
if (flushCount < 5) flushCount = 5;
flushCache(flushCount);
}

public synchronized void flushCache(int count) {
for (int i = 0; i < count; i++) {
if (ramCache.size() == 0) break;
flushCache(ramCache.bestFlushWordHash());
try {Thread.sleep(10);} catch (InterruptedException e) {}
}
}

private synchronized void flushCache(String wordHash) {
plasmaWordIndexEntryContainer c = ramCache.deleteContainer(wordHash);
if (c != null) {
plasmaWordIndexEntryContainer feedback = assortmentCluster.storeTry(wordHash, c);
Expand All @@ -149,15 +162,6 @@ private synchronized void flushCacheToBackend(String wordHash) {
}
}

private int addEntriesBackend(plasmaWordIndexEntryContainer entries) {
plasmaWordIndexEntryContainer feedback = assortmentCluster.storeTry(entries.wordHash(), entries);
if (feedback == null) {
return entries.size();
} else {
return backend.addEntries(feedback, -1, true);
}
}

private static final int hour = 3600000;
private static final int day = 86400000;

Expand Down Expand Up @@ -259,22 +263,6 @@ public plasmaWordIndexEntryContainer getContainer(String wordHash, boolean delet
return container;
}

public plasmaWordIndexEntity getEntity(String wordHash, boolean deleteIfEmpty, long maxTime) {
// this possibly creates an index file in the back-end
// the index file is opened and returned as entity object
long start = System.currentTimeMillis();
flushCacheToBackend(wordHash);
if (maxTime < 0) {
flushFromAssortmentCluster(wordHash, -1);
} else {
long remaining = maxTime - (System.currentTimeMillis() - start);
if (remaining > 0)
flushFromAssortmentCluster(wordHash, remaining);
}
long r = maxTime - (System.currentTimeMillis() - start);
return backend.getEntity(wordHash, deleteIfEmpty, (r < 0) ? 0 : r);
}

public Set getContainers(Set wordHashes, boolean deleteIfEmpty, boolean interruptIfEmpty, long maxTime) {

// retrieve entities that belong to the hashes
Expand Down Expand Up @@ -351,19 +339,6 @@ public synchronized int removeEntries(String wordHash, String[] urlHashes, boole
return removed;
}

private boolean flushFromAssortmentCluster(String key, long maxTime) {
// this should only be called if the assortment shall be deleted or returned in an index entity
if (maxTime > 0) maxTime = 8 * maxTime / 10; // reserve time for later adding to backend
plasmaWordIndexEntryContainer container = assortmentCluster.removeFromAll(key, maxTime);
if (container == null) {
return false;
} else {
// we have a non-empty entry-container
// integrate it to the backend
return backend.addEntries(container, container.updated(), true) > 0;
}
}

public static final int RL_RAMCACHE = 0;
public static final int RL_FILECACHE = 1;
public static final int RL_ASSORTMENTS = 2;
Expand Down Expand Up @@ -485,121 +460,6 @@ public void remove() {
}
} // class rotatingWordIterator

/*
public Iterator fileIterator(String startHash, boolean up, boolean deleteEmpty) {
return new iterateFiles(startHash, up, deleteEmpty);
}
public final class iterateFiles implements Iterator {
// Iterator of hash-strings in WORDS path
private final ArrayList hierarchy; // contains TreeSet elements, earch TreeSet contains File Entries
private final Comparator comp; // for string-compare
private String buffer; // the prefetch-buffer
private final boolean delete;
public iterateFiles(String startHash, boolean up, boolean deleteEmpty) {
this.hierarchy = new ArrayList();
this.comp = kelondroNaturalOrder.naturalOrder; // this is the wrong ordering but mut be used as long as the assortments uses the same ordering
//this.comp = new kelondroBase64Order(up, false);
this.delete = deleteEmpty;
// the we initially fill the hierarchy with the content of the root folder
String path = "WORDS";
TreeSet list = list(new File(databaseRoot, path));
// if we have a start hash then we find the appropriate subdirectory to start
if ((startHash != null) && (startHash.length() == yacySeedDB.commonHashLength)) {
delete(startHash.substring(0, 1), list);
if (list.size() > 0) {
hierarchy.add(list);
String[] paths = new String[]{startHash.substring(0, 1), startHash.substring(1, 2), startHash.substring(2, 4), startHash.substring(4, 6)};
int pathc = 0;
while ((pathc < paths.length) &&
(comp.compare((String) list.first(), paths[pathc]) == 0)) {
path = path + "/" + paths[pathc];
list = list(new File(databaseRoot, path));
delete(paths[pathc], list);
if (list.size() == 0) break;
hierarchy.add(list);
pathc++;
}
}
while (((buffer = next0()) != null) && (comp.compare(buffer, startHash) < 0)) {};
} else {
hierarchy.add(list);
buffer = next0();
}
}
private synchronized void delete(String pattern, TreeSet names) {
String name;
while ((names.size() > 0) && (comp.compare((new File(name = (String) names.first())).getName(), pattern) < 0)) names.remove(name);
}
private TreeSet list(File path) {
// System.out.println("PATH: " + path);
TreeSet t = new TreeSet(comp);
String[] l = path.list();
if (l != null) for (int i = 0; i < l.length; i++) t.add(path + "/" + l[i]);
// else System.out.println("DEBUG: wrong path " + path);
// System.out.println(t);
return t;
}
private synchronized String next0() {
// the object is a File pointing to the corresponding file
File f;
String n;
TreeSet t;
do {
t = null;
while ((t == null) && (hierarchy.size() > 0)) {
t = (TreeSet) hierarchy.get(hierarchy.size() - 1);
if (t.size() == 0) {
hierarchy.remove(hierarchy.size() - 1); // we step up one hierarchy
t = null;
}
}
if ((hierarchy.size() == 0) || (t.size() == 0)) return null; // this is the end
// fetch value
f = new File(n = (String) t.first());
t.remove(n);
// if the value represents another folder, we step into the next hierarchy
if (f.isDirectory()) {
t = list(f);
if (t.size() == 0) {
if (delete) f.delete();
} else {
hierarchy.add(t);
}
f = null;
}
} while (f == null);
// thats it
if ((f == null) || ((n = f.getName()) == null) || (n.length() < yacySeedDB.commonHashLength)) {
return null;
} else {
return n.substring(0, yacySeedDB.commonHashLength);
}
}
public boolean hasNext() {
return buffer != null;
}
public Object next() {
String r = buffer;
while (((buffer = next0()) != null) && (comp.compare(buffer, r) < 0)) {};
return r;
}
public void remove() {
}
}
*/


public Object migrateWords2Assortment(String wordhash) throws IOException {
// returns the number of entries that had been added to the assortments
// can be negative if some assortments have been moved to the backend
Expand Down
2 changes: 1 addition & 1 deletion source/de/anomic/plasma/plasmaWordIndexClassicDB.java
Expand Up @@ -194,7 +194,7 @@ public plasmaWordIndexEntryContainer getContainer(String wordHash, boolean delet
}
return container;
} else {
return new plasmaWordIndexEntryContainer(wordHash, 0);
return new plasmaWordIndexEntryContainer(wordHash);
}
}

Expand Down
14 changes: 8 additions & 6 deletions source/de/anomic/plasma/plasmaWordIndexEntryContainer.java
Expand Up @@ -52,27 +52,28 @@

package de.anomic.plasma;

import java.util.HashMap;
import java.util.Iterator;
import java.util.Set;
import java.util.TreeMap;

import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.kelondro.kelondroOrder;

public final class plasmaWordIndexEntryContainer implements Comparable {

private String wordHash;
private final HashMap container; // urlHash/plasmaWordIndexEntry - Mapping
private final TreeMap container; // urlHash/plasmaWordIndexEntry - Mapping
private long updateTime;

public plasmaWordIndexEntryContainer(String wordHash) {
this(wordHash,16);
this(wordHash, new kelondroNaturalOrder(true));
}

public plasmaWordIndexEntryContainer(String wordHash, int initContainerSize) {
public plasmaWordIndexEntryContainer(String wordHash, kelondroOrder ordering) {
this.wordHash = wordHash;
this.updateTime = 0;
container = new HashMap(initContainerSize); // a urlhash/plasmaWordIndexEntry - relation
container = new TreeMap(ordering); // a urlhash/plasmaWordIndexEntry - relation
}

public void setWordHash(String newWordHash) {
Expand Down Expand Up @@ -158,7 +159,7 @@ public Iterator entries() {
}

public static plasmaWordIndexEntryContainer instantContainer(String wordHash, long creationTime, plasmaWordIndexEntry entry) {
plasmaWordIndexEntryContainer c = new plasmaWordIndexEntryContainer(wordHash,1);
plasmaWordIndexEntryContainer c = new plasmaWordIndexEntryContainer(wordHash);
c.add(entry);
c.updateTime = creationTime;
return c;
Expand Down Expand Up @@ -283,6 +284,7 @@ private static plasmaWordIndexEntryContainer joinConstructiveByEnumeration(plasm
long stamp = System.currentTimeMillis();
while ((System.currentTimeMillis() - stamp) < time) {
c = ie1.getUrlHash().compareTo(ie2.getUrlHash());
//System.out.println("** '" + ie1.getUrlHash() + "'.compareTo('" + ie2.getUrlHash() + "')="+c);
if (c < 0) {
if (e1.hasNext()) ie1 = (plasmaWordIndexEntry) e1.next(); else break;
} else if (c > 0) {
Expand Down
10 changes: 5 additions & 5 deletions source/yacy.java
Expand Up @@ -956,14 +956,14 @@ public static void minimizeUrlDB(String homePath) {
String wordChunkStartHash = "------------", wordChunkEndHash;

while (wordHashIterator.hasNext()) {
plasmaWordIndexEntity wordIdxEntity = null;
plasmaWordIndexEntryContainer wordIdxContainer = null;
try {
wordCounter++;
wordhash = (String) wordHashIterator.next();
wordIdxEntity = wordIndex.getEntity(wordhash, true, -1);
wordIdxContainer = wordIndex.getContainer(wordhash, true, -1);

// the combined container will fit, read the container
Iterator wordIdxEntries = wordIdxEntity.elements(true);
Iterator wordIdxEntries = wordIdxContainer.entries();
plasmaWordIndexEntry wordIdxEntry;
while (wordIdxEntries.hasNext()) {
wordIdxEntry = (plasmaWordIndexEntry) wordIdxEntries.next();
Expand All @@ -978,7 +978,7 @@ public static void minimizeUrlDB(String homePath) {
} catch (IOException e) {}
}
// we have read all elements, now we can close it
wordIdxEntity.close(); wordIdxEntity = null;
wordIdxContainer = null;

if (wordCounter%500 == 0) {
wordChunkEndHash = wordhash;
Expand All @@ -997,7 +997,7 @@ public static void minimizeUrlDB(String homePath) {
} catch (Exception e) {
e.printStackTrace();
} finally {
if (wordIdxEntity != null) try { wordIdxEntity.close(); } catch (Exception e) {}
if (wordIdxContainer != null) try { wordIdxContainer = null; } catch (Exception e) {}
}
}
currentUrlDB.close();
Expand Down

0 comments on commit 3703f76

Please sign in to comment.