Skip to content

Commit

Permalink
* more logging
Browse files Browse the repository at this point in the history
* option in yacy.init to set useCollectionIndex usage

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2374 6c8d7289-2bf4-0310-a012-ef5d649a1542
  • Loading branch information
orbiter committed Aug 10, 2006
1 parent a52f367 commit 3140214
Show file tree
Hide file tree
Showing 7 changed files with 88 additions and 21 deletions.
11 changes: 4 additions & 7 deletions source/de/anomic/index/indexRAMCacheRI.java
Expand Up @@ -38,7 +38,6 @@
import de.anomic.kelondro.kelondroMScoreCluster;
import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.kelondro.kelondroRow;
import de.anomic.plasma.plasmaWordIndex;
import de.anomic.plasma.plasmaWordIndexAssortment;
import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacySeedDB;
Expand All @@ -47,12 +46,8 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {

// environment constants
private static final String indexArrayFileName = "indexDump1.array";
public static int wCacheReferenceLimit = 64;
public static final long wCacheMaxAge = 1000 * 60 * 30; // milliseconds; 30 minutes
public static final long kCacheMaxAge = 1000 * 60 * 2; // milliseconds; 2 minutes
static {
if (plasmaWordIndex.useCollectionIndex) wCacheReferenceLimit = 256;
}

// class variables
private final File databaseRoot;
Expand All @@ -63,16 +58,17 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
private long kCacheInc = 0;
private long startTime;
private int wCacheMaxCount;
public int wCacheReferenceLimit;
private final serverLog log;

// calculated constants
private static String maxKey;
static {
maxKey = ""; for (int i = 0; i < yacySeedDB.commonHashLength; i++) maxKey += 'z';
//minKey = ""; for (int i = 0; i < yacySeedDB.commonHashLength; i++) maxKey += '-';
}

public indexRAMCacheRI(File databaseRoot, serverLog log) {
public indexRAMCacheRI(File databaseRoot, int wCacheReferenceLimitInit, serverLog log) {

// creates a new index cache
// the cache has a back-end where indexes that do not fit in the cache are flushed
Expand All @@ -84,6 +80,7 @@ public indexRAMCacheRI(File databaseRoot, serverLog log) {
this.kCacheInc = 0;
this.startTime = System.currentTimeMillis();
this.wCacheMaxCount = 10000;
this.wCacheReferenceLimit = wCacheReferenceLimitInit;
this.log = log;

// read in dump of last session
Expand Down
2 changes: 1 addition & 1 deletion source/de/anomic/plasma/dbImport/plasmaDbImporter.java
Expand Up @@ -75,7 +75,7 @@ public void init(File theImportPath, File theIndexPath, int theCacheSize, long p
}

this.log.logFine("Initializing source word index db.");
this.importWordIndex = new plasmaWordIndex(this.importPath, this.indexPath, (this.cacheSize/2)/1024, preloadTime / 2, this.log);
this.importWordIndex = new plasmaWordIndex(this.importPath, this.indexPath, (this.cacheSize/2)/1024, preloadTime / 2, this.log, sb.getConfigBool("useCollectionIndex", false));
this.log.logFine("Initializing import URL db.");
this.importUrlDB = new plasmaCrawlLURL(new File(this.importPath, "urlHash.db"), (this.cacheSize/2)/1024, preloadTime / 2);
this.importStartSize = this.importWordIndex.size();
Expand Down
16 changes: 11 additions & 5 deletions source/de/anomic/plasma/plasmaSwitchboard.java
Expand Up @@ -389,7 +389,7 @@ public plasmaSwitchboard(String rootPath, String initPath, String configPath) {
log.logConfig("Starting Indexing Management");
urlPool = new plasmaURLPool(plasmaPath, ramLURL, ramNURL, ramEURL, ramLURL_time);

wordIndex = new plasmaWordIndex(plasmaPath, indexPublicTextPath, ramRWI, ramRWI_time, log);
wordIndex = new plasmaWordIndex(plasmaPath, indexPublicTextPath, ramRWI, ramRWI_time, log, getConfigBool("useCollectionIndex", false));
int wordCacheMaxCount = (int) getConfigLong("wordCacheMaxCount", 10000);
wordIndex.setMaxWordCount(wordCacheMaxCount);

Expand Down Expand Up @@ -926,7 +926,7 @@ public void enQueue(Object job) {
public boolean deQueue() {
// work off fresh entries from the proxy or from the crawler
if (onlineCaution()) {
log.logFiner("deQueue: online caution, omitting resource stack processing");
log.logFine("deQueue: online caution, omitting resource stack processing");
return false;
}

Expand Down Expand Up @@ -959,7 +959,7 @@ public boolean deQueue() {
synchronized (sbQueue) {

if (sbQueue.size() == 0) {
// log.logDebug("DEQUEUE: queue is empty");
log.logFine("deQueue: nothing to do, queue is emtpy");
return doneSomething; // nothing to do
}

Expand All @@ -979,7 +979,10 @@ public boolean deQueue() {
plasmaSwitchboardQueue.Entry nextentry;

// if we were interrupted we should return now
if (Thread.currentThread().isInterrupted()) return false;
if (Thread.currentThread().isInterrupted()) {
log.logFine("deQueue: thread was interrupted");
return false;
}

// do one processing step
log.logFine("DEQUEUE: sbQueueSize=" + sbQueue.size() +
Expand All @@ -989,7 +992,10 @@ public boolean deQueue() {
", remoteStackSize=" + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_REMOTE));
try {
nextentry = sbQueue.pop();
if (nextentry == null) return false;
if (nextentry == null) {
log.logFine("deQueue: null entry on queue stack");
return false;
}
} catch (IOException e) {
log.logSevere("IOError in plasmaSwitchboard.deQueue: " + e.getMessage(), e);
return doneSomething;
Expand Down
11 changes: 6 additions & 5 deletions source/de/anomic/plasma/plasmaWordIndex.java
Expand Up @@ -79,7 +79,6 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {

private static final String indexAssortmentClusterPath = "ACLUSTER";
private static final int assortmentCount = 64;
public static final boolean useCollectionIndex = false;

private final File oldDatabaseRoot;
private final kelondroOrder indexOrder = new kelondroNaturalOrder(true);
Expand All @@ -89,11 +88,12 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
private final plasmaWordIndexAssortmentCluster assortmentCluster; // old database structure, to be replaced by CollectionRI
private final plasmaWordIndexFileCluster backend; // old database structure, to be replaced by CollectionRI
public boolean busyCacheFlush; // shows if a cache flush is currently performed
public boolean useCollectionIndex; // flag for usage of new collectionIndex db

public plasmaWordIndex(File oldDatabaseRoot, File newIndexRoot, int bufferkb, long preloadTime, serverLog log) {
public plasmaWordIndex(File oldDatabaseRoot, File newIndexRoot, int bufferkb, long preloadTime, serverLog log, boolean useCollectionIndex) {
this.oldDatabaseRoot = oldDatabaseRoot;
this.backend = new plasmaWordIndexFileCluster(oldDatabaseRoot, log);
this.ramCache = new indexRAMCacheRI(oldDatabaseRoot, log);
this.ramCache = new indexRAMCacheRI(oldDatabaseRoot, (useCollectionIndex) ? 256 : 64, log);

// create assortment cluster path
File assortmentClusterPath = new File(oldDatabaseRoot, indexAssortmentClusterPath);
Expand All @@ -109,6 +109,7 @@ public plasmaWordIndex(File oldDatabaseRoot, File newIndexRoot, int bufferkb, lo
collections = null;

busyCacheFlush = false;
this.useCollectionIndex = useCollectionIndex;
}

public File getRoot() {
Expand Down Expand Up @@ -170,7 +171,7 @@ public int getMaxWordCount() {
public void flushControl() {
// check for forced flush
synchronized (this) { ramCache.shiftK2W(); }
flushCache(ramCache.maxURLinWCache() - indexRAMCacheRI.wCacheReferenceLimit);
flushCache(ramCache.maxURLinWCache() - ramCache.wCacheReferenceLimit);
if (ramCache.wSize() > ramCache.getMaxWordCount()) {
flushCache(ramCache.wSize() + 500 - ramCache.getMaxWordCount());
}
Expand Down Expand Up @@ -762,7 +763,7 @@ public static void main(String[] args) {
// System.out.println(new Date(reverseMicroDateDays(microDateDays(System.currentTimeMillis()))));
File plasmadb = new File("D:\\dev\\proxy\\DATA\\PLASMADB");
File indexdb = new File("D:\\dev\\proxy\\DATA\\INDEX\\PRIVATE\\TEXT");
plasmaWordIndex index = new plasmaWordIndex(plasmadb, indexdb, 555, 1000, new serverLog("TESTAPP"));
plasmaWordIndex index = new plasmaWordIndex(plasmadb, indexdb, 555, 1000, new serverLog("TESTAPP"), false);
try {
Iterator containerIter = index.wordContainers("5A8yhZMh_Kmv", plasmaWordIndex.RL_WORDFILES, true);
while (containerIter.hasNext()) {
Expand Down
54 changes: 54 additions & 0 deletions source/de/anomic/server/serverPlainSwitch.java
@@ -0,0 +1,54 @@
// serverPlainSwitch.java
// (C) 2006 by Michael Peter Christen; mc@anomic.de, Frankfurt a. M., Germany
// first published 10.08.2006 on http://www.anomic.de
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
// $LastChangedRevision: 1986 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

package de.anomic.server;

public class serverPlainSwitch extends serverAbstractSwitch implements serverSwitch {

public serverPlainSwitch(String rootPath, String initPath, String configPath) {
super(rootPath, initPath, configPath);
}

public int queueSize() {
// no queueing
return 0;
}

public void enQueue(Object job) {
// no queueing: do nothing
}

public boolean deQueue() {
// no queueing
return false;
}

public serverObjects action(String actionName, serverObjects actionInput) {
// no acions
return null;
}

}
11 changes: 8 additions & 3 deletions source/yacy.java
Expand Up @@ -90,6 +90,8 @@
import de.anomic.server.serverCore;
import de.anomic.server.serverDate;
import de.anomic.server.serverFileUtils;
import de.anomic.server.serverPlainSwitch;
import de.anomic.server.serverSwitch;
import de.anomic.server.serverSystem;
import de.anomic.server.logging.serverLog;
import de.anomic.tools.enumerateFiles;
Expand Down Expand Up @@ -644,12 +646,13 @@ private static void genWordstat(String homePath) {
*/
public static void migrateWords(String homePath) {
// run with "java -classpath classes yacy -migratewords"
final serverSwitch sps = new serverPlainSwitch(homePath, "yacy.init", "DATA/SETTINGS/httpProxy.conf");
try {serverLog.configureLogging(new File(homePath, "DATA/LOG/yacy.logging"));} catch (Exception e) {}
File dbroot = new File(new File(homePath), "DATA/PLASMADB");
File indexRoot = new File(new File(homePath), "DATA/INDEX/PUBLIC/TEXT");
serverLog log = new serverLog("WORDMIGRATION");
log.logInfo("STARTING MIGRATION");
plasmaWordIndex wordIndexCache = new plasmaWordIndex(dbroot, indexRoot, 20000, 10000, log);
plasmaWordIndex wordIndexCache = new plasmaWordIndex(dbroot, indexRoot, 20000, 10000, log, sps.getConfigBool("useCollectionIndex", false));
enumerateFiles words = new enumerateFiles(new File(dbroot, "WORDS"), true, false, true, true);
String wordhash;
File wordfile;
Expand Down Expand Up @@ -685,6 +688,7 @@ else if (migrationCount > 0)
*/
public static void minimizeUrlDB(String homePath, int dbcache) {
// run with "java -classpath classes yacy -minimizeUrlDB"
final serverSwitch sps = new serverPlainSwitch(homePath, "yacy.init", "DATA/SETTINGS/httpProxy.conf");
try {serverLog.configureLogging(new File(homePath, "DATA/LOG/yacy.logging"));} catch (Exception e) {}
File dbroot = new File(new File(homePath), "DATA/PLASMADB");
File indexRoot = new File(new File(homePath), "DATA/INDEX/PUBLIC/TEXT");
Expand All @@ -704,7 +708,7 @@ public static void minimizeUrlDB(String homePath, int dbcache) {
int cacheMem = (int)((rt.maxMemory()-rt.totalMemory())/1024)-(2*cache + 8*1024);
if (cacheMem < 2048) throw new OutOfMemoryError("Not enough memory available to start clean up.");

plasmaWordIndex wordIndex = new plasmaWordIndex(dbroot, indexRoot, cacheMem, 10000, log);
plasmaWordIndex wordIndex = new plasmaWordIndex(dbroot, indexRoot, cacheMem, 10000, log, sps.getConfigBool("useCollectionIndex", false));
Iterator indexContainerIterator = wordIndex.wordContainers("------------", plasmaWordIndex.RL_WORDFILES, false);

long urlCounter = 0, wordCounter = 0;
Expand Down Expand Up @@ -1138,6 +1142,7 @@ private static void urldbcleanup(String homePath) {
private static void RWIHashList(String homePath, String targetName, String resource, String format) {
plasmaWordIndex WordIndex = null;
serverLog log = new serverLog("HASHLIST");
final serverSwitch sps = new serverPlainSwitch(homePath, "yacy.init", "DATA/SETTINGS/httpProxy.conf");
File homeDBroot = new File(new File(homePath), "DATA/PLASMADB");
File indexRoot = new File(new File(homePath), "DATA/INDEX/PUBLIC/TEXT");
String wordChunkStartHash = "------------";
Expand All @@ -1147,7 +1152,7 @@ private static void RWIHashList(String homePath, String targetName, String resou
try {
Iterator indexContainerIterator = null;
if (resource.equals("all")) {
WordIndex = new plasmaWordIndex(homeDBroot, indexRoot, 8*1024*1024, 3000, log);
WordIndex = new plasmaWordIndex(homeDBroot, indexRoot, 8*1024*1024, 3000, log, sps.getConfigBool("useCollectionIndex", false));
indexContainerIterator = WordIndex.wordContainers(wordChunkStartHash, plasmaWordIndex.RL_WORDFILES, false);
} else if (resource.equals("assortments")) {
plasmaWordIndexAssortmentCluster assortmentCluster = new plasmaWordIndexAssortmentCluster(new File(homeDBroot, "ACLUSTER"), 64, 16*1024*1024, 3000, log);
Expand Down
4 changes: 4 additions & 0 deletions yacy.init
Expand Up @@ -787,3 +787,7 @@ externalRedirector=
svnRevision=0

currentSkin=

// temporary flag for new database structure. set only true for testing
// ALL DATA THAT IS CREATED WITH THIS FLAG ON WILL BE VOID IN A FINAL VERSION
useCollectionIndex=false

0 comments on commit 3140214

Please sign in to comment.