Skip to content

Commit

Permalink
enhancements in kelondroCollectionIndex:
Browse files Browse the repository at this point in the history
* synchronized array and index objects
* auto-fix function for slightly corrupted index entries
* generalized internal access methods

also extended kelondroIndex interface to support ordering access
which is used in kelondroCollectionIndex for string comparisments

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2366 6c8d7289-2bf4-0310-a012-ef5d649a1542
  • Loading branch information
orbiter committed Aug 7, 2006
1 parent ec5149f commit 718fbc2
Show file tree
Hide file tree
Showing 10 changed files with 123 additions and 99 deletions.
2 changes: 1 addition & 1 deletion htroot/PerformanceMemory_p.java
Expand Up @@ -287,7 +287,7 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
prop.put("namecache.hit",Long.toString(amount));
amount = httpc.nameCacheNoCachingListSize();
prop.put("namecache.noCache",Long.toString(amount));
amount = sb.urlBlacklist.blacklistCacheSize();
amount = plasmaSwitchboard.urlBlacklist.blacklistCacheSize();
prop.put("blacklistcache.size",Long.toString(amount));
// return rewrite values for templates
return prop;
Expand Down
9 changes: 8 additions & 1 deletion source/dbtest.java
Expand Up @@ -15,6 +15,8 @@
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroFlexTable;
import de.anomic.kelondro.kelondroIndex;
import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.kelondro.kelondroOrder;
import de.anomic.kelondro.kelondroProfile;
import de.anomic.kelondro.kelondroSplittedTree;
import de.anomic.kelondro.kelondroTree;
Expand Down Expand Up @@ -375,7 +377,8 @@ final class dbTable implements kelondroIndex {
private final String db_usr_str = "yacy";
private final String db_pwd_str = "yacy";

private Connection theDBConnection = null;
private Connection theDBConnection = null;
private final kelondroOrder order = new kelondroNaturalOrder(true);
private kelondroRow rowdef;

public dbTable(String dbType, kelondroRow rowdef) throws Exception {
Expand Down Expand Up @@ -540,6 +543,10 @@ public int columnSize(int column) {
// TODO Auto-generated method stub
return 0;
}

public kelondroOrder order() {
return this.order;
}
}


Expand Down
4 changes: 2 additions & 2 deletions source/de/anomic/index/indexContainer.java
Expand Up @@ -45,8 +45,8 @@ public interface indexContainer {
public String getWordHash();

public void setOrdering(kelondroOrder newOrder, int newColumn);
public kelondroOrder getOrdering();
public int getOrderColumn();
public kelondroOrder order();
public int orderColumn();

public int add(indexEntry entry);
public int add(indexEntry entry, long updateTime);
Expand Down
6 changes: 3 additions & 3 deletions source/de/anomic/index/indexRowSetContainer.java
Expand Up @@ -296,8 +296,8 @@ private static indexContainer joinConstructiveByTest(indexContainer small, index
private static indexContainer joinConstructiveByEnumeration(indexContainer i1, indexContainer i2, long time, int maxDistance) {
System.out.println("DEBUG: JOIN METHOD BY ENUMERATION");
indexContainer conj = new indexRowSetContainer(null); // start with empty search result
if (!((i1.getOrdering().signature().equals(i2.getOrdering().signature())) &&
(i1.getOrderColumn() == i2.getOrderColumn()))) return conj; // ordering must be equal
if (!((i1.order().signature().equals(i2.order().signature())) &&
(i1.orderColumn() == i2.orderColumn()))) return conj; // ordering must be equal
Iterator e1 = i1.entries();
Iterator e2 = i2.entries();
int c;
Expand All @@ -309,7 +309,7 @@ private static indexContainer joinConstructiveByEnumeration(indexContainer i1, i

long stamp = System.currentTimeMillis();
while ((System.currentTimeMillis() - stamp) < time) {
c = i1.getOrdering().compare(ie1.urlHash(), ie2.urlHash());
c = i1.order().compare(ie1.urlHash(), ie2.urlHash());
//System.out.println("** '" + ie1.getUrlHash() + "'.compareTo('" + ie2.getUrlHash() + "')="+c);
if (c < 0) {
if (e1.hasNext()) ie1 = (indexEntry) e1.next(); else break;
Expand Down
4 changes: 4 additions & 0 deletions source/de/anomic/kelondro/kelondroBytesIntMap.java
Expand Up @@ -70,4 +70,8 @@ public Iterator rows(boolean up, boolean rotating, byte[] firstKey) throws IOExc
return ki.rows(up, rotating, firstKey);
}

public kelondroOrder order() {
return ki.order();
}

}
184 changes: 94 additions & 90 deletions source/de/anomic/kelondro/kelondroCollectionIndex.java
Expand Up @@ -163,109 +163,98 @@ private int putmergeremove(byte[] key, kelondroRowCollection collection, boolean
return 0;
}

// first find an old entry, if one exists
kelondroRow.Entry oldindexrow = index.get(key);
synchronized (index) {
// first find an old entry, if one exists
kelondroRow.Entry oldindexrow = index.get(key);

if (oldindexrow == null) {
if ((collection != null) && (collection.size() > 0)) {
// the collection is new
overwrite(key, collection);
}
return 0;
} else {
// overwrite the old collection
// read old information
int oldchunksize = (int) oldindexrow.getColLongB256(idx_col_chunksize); // needed only for migration
int oldchunkcount = (int) oldindexrow.getColLongB256(idx_col_chunkcount);
int oldrownumber = (int) oldindexrow.getColLongB256(idx_col_indexpos);
int oldPartitionNumber = arrayIndex(oldchunkcount);
int oldSerialNumber = 0;
if (oldindexrow == null) {
if ((collection != null) && (collection.size() > 0)) {
// the collection is new
overwrite(key, collection);
}
return 0;
} else {
// overwrite the old collection
// read old information
int oldchunksize = (int) oldindexrow.getColLongB256(idx_col_chunksize); // needed only for migration
int oldchunkcount = (int) oldindexrow.getColLongB256(idx_col_chunkcount);
int oldrownumber = (int) oldindexrow.getColLongB256(idx_col_indexpos);
int oldPartitionNumber = arrayIndex(oldchunkcount);
int oldSerialNumber = 0;

if (merge) {
// load the old collection and join it with the old
// open array entry
kelondroFixedWidthArray oldarray = getArray(oldPartitionNumber, oldSerialNumber, oldchunksize);
//System.out.println("joining for key " + new String(key) + ", oldrow=" + oldrownumber + ", oldchunkcount=" + oldchunkcount + ", array file=" + oldarray.filename);
kelondroRow.Entry oldarrayrow = oldarray.get(oldrownumber);
if (oldarrayrow == null) throw new kelondroException(arrayFile(this.path, this.filenameStub, this.loadfactor, oldchunksize, oldPartitionNumber, oldSerialNumber).toString(), "array does not contain expected row");

// read the row and define a collection
kelondroRowSet oldcollection = new kelondroRowSet(this.rowdef, oldarrayrow.getColBytes(1)); // FIXME: this does not yet work with different rowdef in case of several rowdef.objectsize()

// join with new collection
oldcollection.addAll(collection);
collection = oldcollection;
}
if (merge) {
// load the old collection and join it with the old
kelondroRowSet oldcollection = getdelete(oldindexrow, false, false);

// join with new collection
oldcollection.addAll(collection);
collection = oldcollection;
}

int removed = 0;
if (removekeys != null) {
// load the old collection and remove keys
// open array entry
kelondroFixedWidthArray oldarray = getArray(oldPartitionNumber, oldSerialNumber, oldchunksize);
kelondroRow.Entry oldarrayrow = oldarray.get(oldrownumber);
if (oldarrayrow == null) throw new kelondroException(arrayFile(this.path, this.filenameStub, this.loadfactor, oldchunksize, oldPartitionNumber, oldSerialNumber).toString(), "array does not contain expected row");
int removed = 0;
if (removekeys != null) {
// load the old collection and remove keys
kelondroRowSet oldcollection = getdelete(oldindexrow, false, false);

// read the row and define a collection
kelondroRowSet oldcollection = new kelondroRowSet(this.rowdef, oldarrayrow.getColBytes(1)); // FIXME: this does not yet work with different rowdef in case of several rowdef.objectsize()

// remove the keys from the set
Iterator i = removekeys.iterator();
Object k;
while (i.hasNext()) {
k = i.next();
if (k instanceof byte[]) {if (oldcollection.remove((byte[]) k) != null) removed++;}
if (k instanceof String) {if (oldcollection.remove(((String) k).getBytes()) != null) removed++;}
// remove the keys from the set
Iterator i = removekeys.iterator();
Object k;
while (i.hasNext()) {
k = i.next();
if (k instanceof byte[]) {if (oldcollection.remove((byte[]) k) != null) removed++;}
if (k instanceof String) {if (oldcollection.remove(((String) k).getBytes()) != null) removed++;}
}
collection = oldcollection;
}
collection = oldcollection;
}

if (collection.size() == 0) {
if (deletecomplete) {
kelondroFixedWidthArray array = getArray(oldPartitionNumber, oldSerialNumber, oldchunksize);
array.remove(oldrownumber);
if (collection.size() == 0) {
if (deletecomplete) {
kelondroFixedWidthArray array = getArray(oldPartitionNumber, oldSerialNumber, oldchunksize);
array.remove(oldrownumber);
}
return removed;
}
return removed;
}

int newPartitionNumber = arrayIndex(collection.size());
int newSerialNumber = 0;
int newPartitionNumber = arrayIndex(collection.size());
int newSerialNumber = 0;

// see if we need new space or if we can overwrite the old space
if (oldPartitionNumber == newPartitionNumber) {
// we don't need a new slot, just write into the old one
// see if we need new space or if we can overwrite the old space
if (oldPartitionNumber == newPartitionNumber) {
// we don't need a new slot, just write into the old one

// find array file
kelondroFixedWidthArray array = getArray(newPartitionNumber, newSerialNumber, this.rowdef.objectsize());
// find array file
kelondroFixedWidthArray array = getArray(newPartitionNumber, newSerialNumber, this.rowdef.objectsize());

// define row
kelondroRow.Entry arrayEntry = array.row().newEntry();
arrayEntry.setCol(0, key);
arrayEntry.setCol(1, collection.exportCollection());
// define row
kelondroRow.Entry arrayEntry = array.row().newEntry();
arrayEntry.setCol(0, key);
arrayEntry.setCol(1, collection.exportCollection());

// overwrite entry in this array
array.set(oldrownumber, arrayEntry);
// overwrite entry in this array
array.set(oldrownumber, arrayEntry);

// update the index entry
oldindexrow.setColLongB256(idx_col_chunkcount, collection.size());
oldindexrow.setColLongB256(idx_col_lastwrote, kelondroRowCollection.daysSince2000(System.currentTimeMillis()));
index.put(oldindexrow);
} else {
// we need a new slot, that means we must first delete the old entry
// find array file
kelondroFixedWidthArray array = getArray(oldPartitionNumber, oldSerialNumber, oldchunksize);
// update the index entry
oldindexrow.setColLongB256(idx_col_chunkcount, collection.size());
oldindexrow.setColLongB256(idx_col_lastwrote, kelondroRowCollection.daysSince2000(System.currentTimeMillis()));
index.put(oldindexrow);
} else {
// we need a new slot, that means we must first delete the old entry
// find array file
kelondroFixedWidthArray array = getArray(oldPartitionNumber, oldSerialNumber, oldchunksize);

// delete old entry
array.remove(oldrownumber);
// delete old entry
array.remove(oldrownumber);

// write a new entry in the other array
overwrite(key, collection);
// write a new entry in the other array
overwrite(key, collection);
}
return removed;
}
return removed;
}
}

private void overwrite(byte[] key, kelondroRowCollection collection) throws IOException {
// helper method, should not be called directly
// helper method, should not be called directly and only within a synchronized(index) environment
// simply store a collection without check if the collection existed before

// find array file
Expand All @@ -292,19 +281,25 @@ private void overwrite(byte[] key, kelondroRowCollection collection) throws IOEx

public kelondroRowSet get(byte[] key, boolean deleteIfEmpty) throws IOException {
// find an entry, if one exists
kelondroRow.Entry indexrow = index.get(key);
if (indexrow == null) return null;
return getdelete(indexrow, false, deleteIfEmpty);
synchronized (index) {
kelondroRow.Entry indexrow = index.get(key);
if (indexrow == null) return null;
return getdelete(indexrow, false, deleteIfEmpty);
}
}

public kelondroRowSet delete(byte[] key) throws IOException {
// find an entry, if one exists
kelondroRow.Entry indexrow = index.get(key);
if (indexrow == null) return null;
return getdelete(indexrow, true, false);
synchronized (index) {
kelondroRow.Entry indexrow = index.get(key);
if (indexrow == null) return null;
return getdelete(indexrow, true, false);
}
}

private kelondroRowSet getdelete(kelondroRow.Entry indexrow, boolean remove, boolean deleteIfEmpty) throws IOException {
// call this only within a synchronized(index) environment

// read values
int chunksize = (int) indexrow.getColLongB256(idx_col_chunksize);
int chunkcount = (int) indexrow.getColLongB256(idx_col_chunkcount);
Expand All @@ -319,8 +314,17 @@ private kelondroRowSet getdelete(kelondroRow.Entry indexrow, boolean remove, boo

// read the row and define a collection
kelondroRowSet collection = new kelondroRowSet(this.rowdef, arrayrow.getColBytes(1)); // FIXME: this does not yet work with different rowdef in case of several rowdef.objectsize()
if (index.order().compare(arrayrow.getColBytes(0), indexrow.getColBytes(idx_col_key)) != 0) {
// check if we got the right row; this row is wrong
throw new kelondroException(arrayFile(this.path, this.filenameStub, this.loadfactor, chunksize, partitionnumber, serialnumber).toString(), "array contains wrong row '" + new String(arrayrow.getColBytes(0)) + "', expected is '" + new String(indexrow.getColBytes(idx_col_key) + "'"));
}
int chunkcountInArray = collection.size();
if (chunkcountInArray != chunkcount) throw new kelondroException(arrayFile(this.path, this.filenameStub, this.loadfactor, chunksize, partitionnumber, serialnumber).toString(), "array has different chunkcount than index: index = " + chunkcount + ", array = " + chunkcountInArray);
if (chunkcountInArray != chunkcount) {
// fix the entry in index
indexrow.setColLong(idx_col_chunkcount, chunkcountInArray);
index.put(indexrow);
array.logFailure("INCONSISTENCY in " + arrayFile(this.path, this.filenameStub, this.loadfactor, chunksize, partitionnumber, serialnumber).toString() + ": array has different chunkcount than index: index = " + chunkcount + ", array = " + chunkcountInArray + "; the index has been auto-fixed");
}

if ((remove) || ((chunkcountInArray == 0) && (deleteIfEmpty))) array.remove(rownumber);

Expand Down
4 changes: 4 additions & 0 deletions source/de/anomic/kelondro/kelondroFlexTable.java
Expand Up @@ -191,4 +191,8 @@ public void remove() {

}

public kelondroOrder order() {
return index.order();
}

}
1 change: 1 addition & 0 deletions source/de/anomic/kelondro/kelondroIndex.java
Expand Up @@ -55,6 +55,7 @@

public interface kelondroIndex {

public kelondroOrder order();
public int size() throws IOException;
public kelondroRow row() throws IOException;
public kelondroRow.Entry get(byte[] key) throws IOException;
Expand Down
4 changes: 2 additions & 2 deletions source/de/anomic/kelondro/kelondroRowSet.java
Expand Up @@ -220,11 +220,11 @@ public void setOrdering(kelondroOrder newOrder, int newColumn) {
}
}

public kelondroOrder getOrdering() {
public kelondroOrder order() {
return this.sortOrder;
}

public int getOrderColumn() {
public int orderColumn() {
return this.sortColumn;
}

Expand Down
4 changes: 4 additions & 0 deletions source/de/anomic/kelondro/kelondroSplittedTree.java
Expand Up @@ -231,5 +231,9 @@ public void remove() {

}

public kelondroOrder order() {
return this.order;
}


}

0 comments on commit 718fbc2

Please sign in to comment.