Skip to content

Commit

Permalink
enhanced count feature for kelondroRowSet. This is about twice as fas…
Browse files Browse the repository at this point in the history
…t as before. Should speed up the collection analysis (half time!)

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5698 6c8d7289-2bf4-0310-a012-ef5d649a1542
  • Loading branch information
orbiter committed Mar 11, 2009
1 parent 84e3738 commit 3e4c28e
Show file tree
Hide file tree
Showing 6 changed files with 72 additions and 25 deletions.
33 changes: 18 additions & 15 deletions source/de/anomic/kelondro/index/IntegerHandleIndex.java
Expand Up @@ -51,7 +51,7 @@
public class IntegerHandleIndex {

private final Row rowdef;
private ObjectIndex index;
private ObjectIndexCache index;

public IntegerHandleIndex(final int keylength, final ByteOrder objectOrder, final int space) {
this.rowdef = new Row(new Column[]{new Column("key", Column.celltype_binary, Column.encoder_bytes, keylength, "key"), new Column("int c-4 {b256}")}, objectOrder, 0);
Expand Down Expand Up @@ -133,8 +133,19 @@ public synchronized int put(final byte[] key, final int i) throws IOException {
if (oldentry == null) return -1;
return (int) oldentry.getColLong(1);
}

public synchronized int add(final byte[] key, int a) throws IOException {

public synchronized int inc(final byte[] key, int a) throws IOException {
assert key != null;
assert a > 0; // it does not make sense to add 0. If this occurres, it is a performance issue

final Row.Entry newentry = this.rowdef.newEntry();
newentry.setCol(0, key);
newentry.setCol(1, a);
long l = index.inc(key, 1, a, newentry);
return (int) l;
}
/*
public synchronized int inc(final byte[] key, int a) throws IOException {
assert key != null;
assert a > 0; // it does not make sense to add 0. If this occurres, it is a performance issue
Expand All @@ -146,20 +157,12 @@ public synchronized int add(final byte[] key, int a) throws IOException {
index.addUnique(newentry);
return 1;
} else {
int i = (int) indexentry.getColLong(1) + a;
indexentry.setCol(1, i);
long l = indexentry.incCol(1, a);
index.put(indexentry);
return i;
return (int) l;
}
}

public synchronized int inc(final byte[] key) throws IOException {
return add(key, 1);
}

public synchronized int dec(final byte[] key) throws IOException {
return add(key, -1);
}
*/

public synchronized void putUnique(final byte[] key, final int i) throws IOException {
assert i >= 0 : "i = " + i;
Expand Down Expand Up @@ -325,7 +328,7 @@ public static void main(String[] args) {
long start = System.currentTimeMillis();
try {
for (int i = 0; i < count; i++) {
idx.inc(FlatWordPartitionScheme.positionToHash(r.nextInt(count / 32)).getBytes());
idx.inc(FlatWordPartitionScheme.positionToHash(r.nextInt(count / 32)).getBytes(), 1);
}
} catch (IOException e) {
e.printStackTrace();
Expand Down
1 change: 1 addition & 0 deletions source/de/anomic/kelondro/index/ObjectIndex.java
Expand Up @@ -50,6 +50,7 @@ public interface ObjectIndex {
public void putMultiple(List<Row.Entry> rows) throws IOException; // for R/W head path optimization
public void addUnique(Row.Entry row) throws IOException; // no double-check
public void addUniqueMultiple(List<Row.Entry> rows) throws IOException; // no double-check
//public long inc(final byte[] key, int col, long add, Row.Entry initrow); // replace a column with a recomputed value
public ArrayList<RowCollection> removeDoubles() throws IOException; // removes all elements that are double (to be used after all addUnique)
public Row.Entry remove(byte[] key) throws IOException;
public Row.Entry removeOne() throws IOException;
Expand Down
28 changes: 26 additions & 2 deletions source/de/anomic/kelondro/index/ObjectIndexCache.java
Expand Up @@ -87,6 +87,7 @@ public synchronized boolean has(final byte[] key) {
return index1.has(key);
}

/*
public synchronized Row.Entry put(final Row.Entry entry) {
assert (entry != null);
finishInitialization();
Expand All @@ -100,8 +101,22 @@ public synchronized Row.Entry put(final Row.Entry entry) {
// else place it in the index1
return index1.put(entry);
}
*/
public synchronized Row.Entry put(final Row.Entry entry) {
assert (entry != null);
finishInitialization();
// if the new entry is within the initialization part, just overwrite it
assert index0.isSorted();
byte[] key = entry.getPrimaryKeyBytes();
if (index0.has(key)) {
// replace the entry
return index0.put(entry);
}
// else place it in the index1
return index1.put(entry);
}

public Entry put(final Entry row, final Date entryDate) {
public Entry put(final Entry row, final Date entryDate) {
return put(row);
}

Expand All @@ -128,7 +143,16 @@ public void addUniqueMultiple(final List<Entry> rows) {
while (i.hasNext()) addUnique(i.next());
}

public synchronized ArrayList<RowCollection> removeDoubles() {
public synchronized long inc(final byte[] key, int col, long add, Row.Entry initrow) {
assert (key != null);
finishInitialization();
assert index0.isSorted();
long l = index0.inc(key, col, add, null);
if (l != Long.MIN_VALUE) return l;
return index1.inc(key, col, add, initrow);
}

public synchronized ArrayList<RowCollection> removeDoubles() {
// finish initialization phase explicitely
index0.sort();
if (index1 == null) {
Expand Down
14 changes: 7 additions & 7 deletions source/de/anomic/kelondro/index/Row.java
Expand Up @@ -464,20 +464,20 @@ private final void setCol(final int encoder, final int offset, final int length,
}
}

public final void addCol(final int column, long c) {
public final long incCol(final int column, long c) {
int encoder = row[column].encoder;
int colstrt = colstart[column];
int cellwidth = row[column].cellwidth;
long l;
switch (encoder) {
case Column.encoder_b64e:
l = Base64Order.enhancedCoder.decodeLong(rowinstance, offset + colstrt, cellwidth);
Base64Order.enhancedCoder.encodeLong(l + c, rowinstance, offset, cellwidth);
return;
l = c + Base64Order.enhancedCoder.decodeLong(rowinstance, offset + colstrt, cellwidth);
Base64Order.enhancedCoder.encodeLong(l, rowinstance, offset + colstrt, cellwidth);
return l;
case Column.encoder_b256:
l = NaturalOrder.decodeLong(rowinstance, offset + colstrt, cellwidth);
NaturalOrder.encodeLong(l + c, rowinstance, offset, cellwidth);
return;
l = c + NaturalOrder.decodeLong(rowinstance, offset + colstrt, cellwidth);
NaturalOrder.encodeLong(l, rowinstance, offset + colstrt, cellwidth);
return l;
}
throw new kelondroException("ROW", "addCol did not find appropriate encoding");
}
Expand Down
19 changes: 19 additions & 0 deletions source/de/anomic/kelondro/index/RowSet.java
Expand Up @@ -135,6 +135,25 @@ public synchronized Row.Entry put(final Row.Entry entry) {
return oldentry;
}

public synchronized long inc(byte[] key, int col, long add, Row.Entry initrow) {
final int index = find(key, 0, key.length);
if (index >= 0) {
// the entry existed before
final Row.Entry entry = get(index, false); // no clone necessary
long l = entry.incCol(col, add);
set(index, entry);
return l;
} else if (initrow != null) {
// create new entry
super.addUnique(initrow);
return initrow.getColLong(col);
} else {
// if initrow == null just do nothing
// but return a Long.MIN_VALUE
return Long.MIN_VALUE;
}
}

private synchronized Row.Entry remove(final byte[] a, final int start, final int length) {
final int index = find(a, start, length);
if (index < 0) return null;
Expand Down
2 changes: 1 addition & 1 deletion source/de/anomic/kelondro/text/IndexCollection.java
Expand Up @@ -439,7 +439,7 @@ public static IntegerHandleIndex referenceHashes(
final RowSet collection = new RowSet(payloadrow, arrayrow);
final int chunkcountInArray = collection.size();
for (int j = 0; j < chunkcountInArray; j++) {
references.inc(collection.get(j, false).getColBytes(0));
references.inc(collection.get(j, false).getColBytes(0), 1);
}
count++;
// write a log
Expand Down

0 comments on commit 3e4c28e

Please sign in to comment.