Skip to content

Commit

Permalink
bugfixes and migration attempt toward new kelondroFlex db
Browse files Browse the repository at this point in the history
- more synchronization
- bugfix for remove in collections
- bugfix in kelondroFlex (wrong exception condition!)
- options to use RAM, FLEX and TREE tables for Crawl URL stacker
- default for Crawl URL stacker is now FLEX (!)

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2746 6c8d7289-2bf4-0310-a012-ef5d649a1542
  • Loading branch information
orbiter committed Oct 11, 2006
1 parent 48f81ac commit 6396f59
Show file tree
Hide file tree
Showing 9 changed files with 49 additions and 33 deletions.
10 changes: 5 additions & 5 deletions build.properties
Expand Up @@ -3,11 +3,11 @@ javacSource=1.4
javacTarget=1.4

# Release Configuration
releaseVersion=0.48
#releaseFile=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
#releaseDir=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}
releaseDir=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}
releaseVersion=0.481
releaseFile=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
#releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
releaseDir=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}
#releaseDir=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}
releaseFileParentDir=yacy
releaseNr=$Revision$

Expand Down
10 changes: 5 additions & 5 deletions source/de/anomic/kelondro/kelondroBytesIntMap.java
Expand Up @@ -37,13 +37,13 @@ public kelondroBytesIntMap(kelondroIndex ki) throws IOException {
this.ki = ki;
}

public int geti(byte[] key) throws IOException {
public synchronized int geti(byte[] key) throws IOException {
kelondroRow.Entry indexentry = ki.get(key);
if (indexentry == null) return -1;
return (int) indexentry.getColLong(1);
}

public int puti(byte[] key, int i) throws IOException {
public synchronized int puti(byte[] key, int i) throws IOException {
kelondroRow.Entry newentry = ki.row().newEntry();
newentry.setCol(0, key);
newentry.setCol(1, i);
Expand All @@ -52,18 +52,18 @@ public int puti(byte[] key, int i) throws IOException {
return (int) oldentry.getColLong(1);
}

public int removei(byte[] key) throws IOException {
public synchronized int removei(byte[] key) throws IOException {
if (ki.size() == 0) return -1;
kelondroRow.Entry indexentry = ki.remove(key);
if (indexentry == null) return -1;
return (int) indexentry.getColLong(1);
}

public int size() throws IOException {
public synchronized int size() throws IOException {
return ki.size();
}

public Iterator rows(boolean up, boolean rotating, byte[] firstKey) throws IOException {
public synchronized Iterator rows(boolean up, boolean rotating, byte[] firstKey) throws IOException {
// returns the row-iterator of the underlying kelondroIndex
// col[0] = key
// col[1] = integer as {b265}
Expand Down
4 changes: 3 additions & 1 deletion source/de/anomic/kelondro/kelondroCollectionIndex.java
Expand Up @@ -408,7 +408,9 @@ public kelondroRowSet delete(byte[] key) throws IOException {
synchronized (index) {
kelondroRow.Entry indexrow = index.get(key);
if (indexrow == null) return null;
return getdelete(indexrow, true, false);
kelondroRowSet removedCollection = getdelete(indexrow, true, false);
index.remove(key);
return removedCollection;
}
}

Expand Down
2 changes: 1 addition & 1 deletion source/de/anomic/kelondro/kelondroFixedWidthArray.java
Expand Up @@ -128,7 +128,7 @@ public synchronized int add(kelondroRow.Entry rowentry) throws IOException {
}

public synchronized void remove(int index) throws IOException {
if (index >= size()) throw new IOException("remove: index " + index + " out of bounds " + size());
if (index >= super.USAGE.allCount()) throw new IOException("remove: index " + index + " out of bounds " + super.USAGE.allCount());

// get the node at position index
Handle h = new Handle(index);
Expand Down
2 changes: 1 addition & 1 deletion source/de/anomic/kelondro/kelondroFlexTable.java
Expand Up @@ -155,7 +155,7 @@ public synchronized kelondroRow.Entry remove(byte[] key) throws IOException {
return r;
}

public Iterator rows(boolean up, boolean rotating, byte[] firstKey) throws IOException {
public synchronized Iterator rows(boolean up, boolean rotating, byte[] firstKey) throws IOException {
return new rowIterator(up, rotating, firstKey);
}

Expand Down
10 changes: 5 additions & 5 deletions source/de/anomic/kelondro/kelondroRAMIndex.java
Expand Up @@ -47,27 +47,27 @@ public kelondroOrder order() {
return this.order;
}

public int size() {
public synchronized int size() {
return this.index.size();
}

public kelondroRow row() {
return this.rowdef;
}

public Entry get(byte[] key) {
public synchronized Entry get(byte[] key) {
return (kelondroRow.Entry) index.get(key);
}

public Entry put(Entry row) {
public synchronized Entry put(Entry row) {
return (kelondroRow.Entry) index.put(row.getColBytes(0), row);
}

public Entry remove(byte[] key) {
public synchronized Entry remove(byte[] key) {
return (kelondroRow.Entry) index.remove(key);
}

public Iterator rows(boolean up, boolean rotating, byte[] firstKey) {
public synchronized Iterator rows(boolean up, boolean rotating, byte[] firstKey) {
return index.values().iterator();
}

Expand Down
37 changes: 25 additions & 12 deletions source/de/anomic/plasma/plasmaCrawlStacker.java
Expand Up @@ -64,6 +64,7 @@
import de.anomic.kelondro.kelondroException;
import de.anomic.kelondro.kelondroFlexTable;
import de.anomic.kelondro.kelondroIndex;
import de.anomic.kelondro.kelondroRAMIndex;
import de.anomic.kelondro.kelondroRow;
import de.anomic.kelondro.kelondroTree;
import de.anomic.net.URL;
Expand All @@ -75,6 +76,11 @@

public final class plasmaCrawlStacker {

// keys for different database types
public static final int QUEUE_DB_TYPE_RAM = 0;
public static final int QUEUE_DB_TYPE_TREE = 1;
public static final int QUEUE_DB_TYPE_FLEX = 2;

final WorkerPool theWorkerPool;
private GenericObjectPool.Config theWorkerPoolConfig = null;
final ThreadGroup theWorkerThreadGroup = new ThreadGroup("stackCrawlThreadGroup");
Expand All @@ -83,10 +89,10 @@ public final class plasmaCrawlStacker {
//private boolean stopped = false;
private stackCrawlQueue queue;

public plasmaCrawlStacker(plasmaSwitchboard sb, File dbPath, int dbCacheSize, long preloadTime, boolean newdb) {
public plasmaCrawlStacker(plasmaSwitchboard sb, File dbPath, int dbCacheSize, long preloadTime, int dbtype) {
this.sb = sb;

this.queue = new stackCrawlQueue(dbPath, dbCacheSize, preloadTime, newdb);
this.queue = new stackCrawlQueue(dbPath, dbCacheSize, preloadTime, dbtype);
this.log.logInfo(this.queue.size() + " entries in the stackCrawl queue.");
this.log.logInfo("STACKCRAWL thread initialized.");

Expand Down Expand Up @@ -248,7 +254,7 @@ public void checkInterruption() throws InterruptedException {
public String stackCrawl(String nexturlString, String referrerString, String initiatorHash, String name, Date loadDate, int currentdepth, plasmaCrawlProfile.entry profile) throws InterruptedException {
// stacks a crawl item. The position can also be remote
// returns null if successful, a reason string if not successful
this.log.logFinest("stackCrawl: nexturlString='" + nexturlString + "'");
//this.log.logFinest("stackCrawl: nexturlString='" + nexturlString + "'");

long startTime = System.currentTimeMillis();
String reason = null; // failure reason
Expand Down Expand Up @@ -384,8 +390,7 @@ public String stackCrawl(String nexturlString, String referrerString, String ini
boolean recrawl = (oldEntry != null) && (((System.currentTimeMillis() - oldEntry.loaddate().getTime()) / 60000) > profile.recrawlIfOlder());
if ((dbocc != null) && (!(recrawl))) {
reason = plasmaCrawlEURL.DOUBLE_REGISTERED + dbocc + ")";
this.log.logFine("URL '" + nexturlString + "' is double registered in '" + dbocc + "'. " +
"Stack processing time: " + (System.currentTimeMillis()-startTime) + "ms");
//this.log.logFine("URL '" + nexturlString + "' is double registered in '" + dbocc + "'. " + "Stack processing time: " + (System.currentTimeMillis()-startTime) + "ms");
return reason;
}

Expand Down Expand Up @@ -597,9 +602,9 @@ final class stackCrawlQueue {
private File cacheStacksPath;
private int bufferkb;
private long preloadTime;
private boolean newdb;
private int dbtype;

public stackCrawlQueue(File cacheStacksPath, int bufferkb, long preloadTime, boolean newdb) {
public stackCrawlQueue(File cacheStacksPath, int bufferkb, long preloadTime, int dbtype) {
// init the read semaphore
this.readSync = new serverSemaphore (0);

Expand All @@ -613,7 +618,7 @@ public stackCrawlQueue(File cacheStacksPath, int bufferkb, long preloadTime, boo
this.cacheStacksPath = cacheStacksPath;
this.bufferkb = bufferkb;
this.preloadTime = preloadTime;
this.newdb = newdb;
this.dbtype = dbtype;

openDB();
try {
Expand Down Expand Up @@ -650,9 +655,13 @@ public stackCrawlQueue(File cacheStacksPath, int bufferkb, long preloadTime, boo
}

private void deleteDB() {
if (this.newdb) {
if (this.dbtype == QUEUE_DB_TYPE_RAM) {
// do nothing..
}
if (this.dbtype == QUEUE_DB_TYPE_FLEX) {
kelondroFlexTable.delete(cacheStacksPath, "urlPreNotice1.table");
} else {
}
if (this.dbtype == QUEUE_DB_TYPE_TREE) {
File cacheFile = new File(cacheStacksPath, "urlPreNotice.db");
cacheFile.delete();
}
Expand All @@ -661,7 +670,10 @@ private void deleteDB() {
private void openDB() {
if (!(cacheStacksPath.exists())) cacheStacksPath.mkdir(); // make the path

if (this.newdb) {
if (this.dbtype == QUEUE_DB_TYPE_RAM) {
this.urlEntryCache = new kelondroRAMIndex(kelondroBase64Order.enhancedCoder, plasmaCrawlNURL.rowdef);
}
if (this.dbtype == QUEUE_DB_TYPE_FLEX) {
String newCacheName = "urlPreNotice1.table";
cacheStacksPath.mkdirs();
try {
Expand All @@ -670,7 +682,8 @@ private void openDB() {
e.printStackTrace();
System.exit(-1);
}
} else {
}
if (this.dbtype == QUEUE_DB_TYPE_TREE) {
File cacheFile = new File(cacheStacksPath, "urlPreNotice.db");
cacheFile.getParentFile().mkdirs();
this.urlEntryCache = kelondroTree.open(cacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, plasmaCrawlNURL.rowdef);
Expand Down
2 changes: 1 addition & 1 deletion source/de/anomic/plasma/plasmaSwitchboard.java
Expand Up @@ -606,7 +606,7 @@ ramEURL, getConfigBool("useFlexTableForEURL", true),
serverInstantThread.oneTimeJob(yc, "loadSeeds", yacyCore.log, 3000);

// initializing the stackCrawlThread
this.sbStackCrawlThread = new plasmaCrawlStacker(this, this.plasmaPath, ramPreNURL, ramPreNURL_time, getConfigBool("useFlexTableForPreNURL", false));
this.sbStackCrawlThread = new plasmaCrawlStacker(this, this.plasmaPath, ramPreNURL, ramPreNURL_time, (int) getConfigLong("tableTypeForPreNURL", 0));
//this.sbStackCrawlThread = new plasmaStackCrawlThread(this,this.plasmaPath,ramPreNURL);
//this.sbStackCrawlThread.start();

Expand Down
5 changes: 3 additions & 2 deletions yacy.init
Expand Up @@ -497,7 +497,7 @@ xpstopw=true
80_indexing_busysleep=100
80_indexing_memprereq=2097152
82_crawlstack_idlesleep=5000
82_crawlstack_busysleep=10
82_crawlstack_busysleep=0
82_crawlstack_memprereq=1048576
90_cleanup_idlesleep=300000
90_cleanup_busysleep=300000
Expand Down Expand Up @@ -815,10 +815,11 @@ currentSkin=

# temporary flag for new database structure. set only true for testing
# ALL DATA THAT IS CREATED WITH THIS FLAG ON WILL BE VOID IN A FINAL VERSION
# table-types: RAM = 0, TREE = 1, FLEX = 2;
useCollectionIndex=false
useFlexTableForNURL=false
useFlexTableForEURL=true
useFlexTableForPreNURL=false
tableTypeForPreNURL=2

# flag to show surftipps on index.html page
showSurftipps = true

0 comments on commit 6396f59

Please sign in to comment.