Skip to content

Commit

Permalink
final version of collection entry type definition
Browse files Browse the repository at this point in the history
- the test phase of the new collection data structure is finished
- test data that had been generated is void. There will be no migration
- the new collection files are located in DATA/INDEX/PUBLIC/TEXT/RICOLLECTION
- the index dump is void. There will be no migration
- the new index dump is in DATA/INDEX/PUBLIC/TEXT/RICACHE

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2983 6c8d7289-2bf4-0310-a012-ef5d649a1542
  • Loading branch information
orbiter committed Nov 19, 2006
1 parent 58d79a9 commit e3d75f4
Show file tree
Hide file tree
Showing 19 changed files with 290 additions and 229 deletions.
5 changes: 3 additions & 2 deletions htroot/IndexControl_p.java
Expand Up @@ -304,7 +304,8 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
}
i++;
}
prop.put("keyhashsimilar_rows", rows);
prop.put("keyhashsimilar_rows_"+rows+"_cols", cols);
prop.put("keyhashsimilar_rows", rows + 1);
prop.put("result", "");
} catch (IOException e) {
prop.put("result", "unknown keys: " + e.getMessage());
Expand Down Expand Up @@ -439,7 +440,7 @@ public static serverObjects genUrlList(plasmaSwitchboard switchboard, String key

prop.put("genUrlList_keyHash", keyhash);

if (index.size() == 0) {
if ((index == null) || (index.size() == 0)) {
prop.put("genUrlList", 1);
} else {
final Iterator en = index.entries();
Expand Down
1 change: 0 additions & 1 deletion source/de/anomic/http/httpc.java
Expand Up @@ -83,7 +83,6 @@
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroMScoreCluster;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverByteBuffer;
import de.anomic.server.serverCore;
import de.anomic.server.serverFileUtils;
Expand Down
11 changes: 3 additions & 8 deletions source/de/anomic/index/indexCachedRI.java
Expand Up @@ -86,10 +86,6 @@ public long getUpdateTime(String wordHash) {
return entries.updated();
}

public indexContainer emptyContainer(String wordHash) {
return new indexContainer(wordHash, payloadrow);
}

public indexContainer addEntry(String wordHash, indexRWIEntry entry, long updateTime, boolean intern) {
// add the entry
if (intern) {
Expand Down Expand Up @@ -219,10 +215,9 @@ public void close(int waitingBoundSeconds) {
}

public indexContainer deleteContainer(String wordHash) {
indexContainer c = new indexContainer(wordHash, payloadrow);
c.add(riIntern.deleteContainer(wordHash), -1);
c.add(riExtern.deleteContainer(wordHash), -1);
c.add(backend.deleteContainer(wordHash), -1);
indexContainer c = riIntern.deleteContainer(wordHash);
if (c == null) c = riExtern.deleteContainer(wordHash); else c.add(riExtern.deleteContainer(wordHash), -1);
if (c == null) c = backend.deleteContainer(wordHash); else c.add(backend.deleteContainer(wordHash), -1);
return c;
}

Expand Down
8 changes: 4 additions & 4 deletions source/de/anomic/index/indexCollectionRI.java
Expand Up @@ -104,7 +104,7 @@ public Object next() {
byte[] key = (byte[]) oo[0];
kelondroRowSet collection = (kelondroRowSet) oo[1];
if (collection == null) return null;
return new indexContainer(new String(key), collection);
return new indexContainer(new String(key), collection, true);
}

public void remove() {
Expand All @@ -118,7 +118,7 @@ public synchronized indexContainer getContainer(String wordHash, Set urlselectio
kelondroRowSet collection = collectionIndex.get(wordHash.getBytes(), deleteIfEmpty);
if (collection != null) collection.select(urlselection);
if ((collection == null) || (collection.size() == 0)) return null;
return new indexContainer(wordHash, collection);
return new indexContainer(wordHash, collection, true);
} catch (IOException e) {
return null;
}
Expand All @@ -128,7 +128,7 @@ public synchronized indexContainer deleteContainer(String wordHash) {
try {
kelondroRowSet collection = collectionIndex.delete(wordHash.getBytes());
if (collection == null) return null;
return new indexContainer(wordHash, collection);
return new indexContainer(wordHash, collection, true);
} catch (IOException e) {
return null;
}
Expand All @@ -153,7 +153,7 @@ public synchronized int removeEntries(String wordHash, Set urlHashes, boolean de
}

public synchronized indexContainer addEntry(String wordHash, indexRWIEntry newEntry, long updateTime, boolean dhtCase) {
indexContainer container = new indexContainer(wordHash, collectionIndex.payloadRow());
indexContainer container = new indexContainer(wordHash, collectionIndex.payloadRow(), true);
container.add(newEntry);
return addEntries(container, updateTime, dhtCase);
}
Expand Down
41 changes: 29 additions & 12 deletions source/de/anomic/index/indexContainer.java
Expand Up @@ -41,30 +41,34 @@
public class indexContainer extends kelondroRowSet {

private String wordHash;
private boolean newRWI;

public indexContainer(String wordHash, kelondroRow rowdef, int objectCount, byte[] cache) {
public indexContainer(String wordHash, kelondroRow rowdef, int objectCount, byte[] cache, boolean newRWI) {
super(rowdef, objectCount, cache, kelondroBase64Order.enhancedCoder, 0, 0);
this.wordHash = wordHash;
this.newRWI = newRWI;
}

public indexContainer(String wordHash, kelondroRow rowdef) {
this(wordHash, rowdef, kelondroBase64Order.enhancedCoder, 0);
public indexContainer(String wordHash, kelondroRow rowdef, boolean newRWI) {
this(wordHash, rowdef, kelondroBase64Order.enhancedCoder, 0, newRWI);
}

public indexContainer(String wordHash, kelondroRowSet collection) {
public indexContainer(String wordHash, kelondroRowSet collection, boolean newRWI) {
super(collection);
this.wordHash = wordHash;
this.newRWI = newRWI;
}

public indexContainer(String wordHash, kelondroRow rowdef, kelondroOrder ordering, int column) {
public indexContainer(String wordHash, kelondroRow rowdef, kelondroOrder ordering, int column, boolean newRWI) {
super(rowdef);
this.wordHash = wordHash;
this.lastTimeWrote = 0;
this.setOrdering(ordering, column);
this.newRWI = newRWI;
}

public indexContainer topLevelClone() {
indexContainer newContainer = new indexContainer(this.wordHash, this.rowdef, this.sortOrder, this.sortColumn);
indexContainer newContainer = new indexContainer(this.wordHash, this.rowdef, this.sortOrder, this.sortColumn, this.newRWI);
newContainer.add(this, -1);
return newContainer;
}
Expand Down Expand Up @@ -123,7 +127,11 @@ private boolean addi(indexRWIEntry entry) {
if (oldEntryRow == null) {
return true;
} else {
indexRWIEntry oldEntry = new indexRWIEntryOld(oldEntryRow); // FIXME: see if cloning is necessary
indexRWIEntry oldEntry;
if (entry instanceof indexRWIEntryNew)
oldEntry = new indexRWIEntryNew(oldEntryRow);
else
oldEntry = new indexRWIEntryOld(oldEntryRow); // FIXME: see if cloning is necessary
if (entry.isOlder(oldEntry)) { // A more recent Entry is already in this container
this.put(oldEntry.toKelondroEntry()); // put it back
return false;
Expand All @@ -136,13 +144,19 @@ private boolean addi(indexRWIEntry entry) {
public indexRWIEntry get(String urlHash) {
kelondroRow.Entry entry = this.get(urlHash.getBytes());
if (entry == null) return null;
return new indexRWIEntryOld(entry);
if (this.newRWI)
return new indexRWIEntryNew(entry);
else
return new indexRWIEntryOld(entry);
}

public indexRWIEntry remove(String urlHash) {
kelondroRow.Entry entry = this.remove(urlHash.getBytes());
if (entry == null) return null;
return new indexRWIEntryOld(entry);
if (this.newRWI)
return new indexRWIEntryNew(entry);
else
return new indexRWIEntryOld(entry);
}

public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) {
Expand Down Expand Up @@ -178,7 +192,10 @@ public boolean hasNext() {
public Object next() {
kelondroRow.Entry rentry = (kelondroRow.Entry) rowEntryIterator.next();
if (rentry == null) return null;
return new indexRWIEntryOld(rentry);
if (newRWI)
return new indexRWIEntryNew(rentry);
else
return new indexRWIEntryOld(rentry);
}

public void remove() {
Expand Down Expand Up @@ -288,7 +305,7 @@ private static indexContainer joinConstructiveByTest(indexContainer small, index
assert small.rowdef.equals(large.rowdef) : "small = " + small.rowdef.toString() + "; large = " + large.rowdef.toString();
int keylength = small.rowdef.width(0);
assert (keylength == large.rowdef.width(0));
indexContainer conj = new indexContainer(null, small.rowdef); // start with empty search result
indexContainer conj = new indexContainer(null, small.rowdef, small.newRWI); // start with empty search result
Iterator se = small.entries();
indexRWIEntry ie0, ie1;
long stamp = System.currentTimeMillis();
Expand All @@ -311,7 +328,7 @@ private static indexContainer joinConstructiveByEnumeration(indexContainer i1, i
assert i1.rowdef.equals(i2.rowdef) : "i1 = " + i1.rowdef.toString() + "; i2 = " + i2.rowdef.toString();
int keylength = i1.rowdef.width(0);
assert (keylength == i2.rowdef.width(0));
indexContainer conj = new indexContainer(null, i1.rowdef); // start with empty search result
indexContainer conj = new indexContainer(null, i1.rowdef, i1.newRWI); // start with empty search result
if (!((i1.order().signature().equals(i2.order().signature())) &&
(i1.primarykey() == i2.primarykey()))) return conj; // ordering must be equal
Iterator e1 = i1.entries();
Expand Down
35 changes: 22 additions & 13 deletions source/de/anomic/index/indexRAMRI.java
Expand Up @@ -58,6 +58,7 @@ public final class indexRAMRI implements indexRI {
private String indexArrayFileName;
private kelondroRow payloadrow;
private kelondroRow bufferStructureBasis;
private boolean newRWI;

// calculated constants
private static String maxKey;
Expand All @@ -66,7 +67,7 @@ public final class indexRAMRI implements indexRI {
//minKey = ""; for (int i = 0; i < yacySeedDB.commonHashLength; i++) maxKey += '-';
}

public indexRAMRI(File databaseRoot, kelondroRow payloadrow, int wCacheReferenceLimitInit, String dumpname, serverLog log) {
public indexRAMRI(File databaseRoot, kelondroRow payloadrow, int wCacheReferenceLimitInit, String dumpname, serverLog log, boolean newRWI) {

// creates a new index cache
// the cache has a back-end where indexes that do not fit in the cache are flushed
Expand All @@ -78,6 +79,7 @@ public indexRAMRI(File databaseRoot, kelondroRow payloadrow, int wCacheReference
this.cacheMaxCount = 10000;
this.cacheReferenceLimit = wCacheReferenceLimitInit;
this.log = log;
this.newRWI = newRWI;
this.indexArrayFileName = dumpname;
this.payloadrow = payloadrow;
this.bufferStructureBasis = new kelondroRow(
Expand Down Expand Up @@ -178,7 +180,10 @@ private long restore() throws IOException {
if ((row == null) || (row.empty(0)) || (row.empty(3))) continue;
wordHash = row.getColString(0, "UTF-8");
//creationTime = kelondroRecords.bytes2long(row[2]);
wordEntry = new indexRWIEntryOld(row.getColBytes(3));
if (newRWI)
wordEntry = new indexRWIEntryNew(row.getColBytes(3));
else
wordEntry = new indexRWIEntryOld(row.getColBytes(3));
// store to cache
addEntry(wordHash, wordEntry, startTime, false);
urlCount++;
Expand Down Expand Up @@ -421,25 +426,29 @@ public synchronized int tryRemoveURLs(String urlHash) {
public synchronized indexContainer addEntries(indexContainer container, long updateTime, boolean dhtCase) {
// this puts the entries into the cache, not into the assortment directly
int added = 0;
if ((container == null) || (container.size() == 0)) return null;

// put new words into cache
// put container into wCache
String wordHash = container.getWordHash();
indexContainer entries = (indexContainer) cache.get(wordHash); // null pointer exception? wordhash != null! must be cache==null
if (entries == null) entries = new indexContainer(wordHash, container.row());
String wordHash = container.getWordHash();
indexContainer entries = (indexContainer) cache.get(wordHash); // null pointer exception? wordhash != null! must be cache==null
if (entries == null) {
entries = container.topLevelClone();
added = entries.size();
} else {
added = entries.add(container, -1);
if (added > 0) {
cache.put(wordHash, entries);
hashScore.addScore(wordHash, added);
hashDate.setScore(wordHash, intTime(updateTime));
}
entries = null;
}
if (added > 0) {
cache.put(wordHash, entries);
hashScore.addScore(wordHash, added);
hashDate.setScore(wordHash, intTime(updateTime));
}
entries = null;
return null;
}

public synchronized indexContainer addEntry(String wordHash, indexRWIEntry newEntry, long updateTime, boolean dhtCase) {
indexContainer container = (indexContainer) cache.get(wordHash);
if (container == null) container = new indexContainer(wordHash, this.payloadrow);
if (container == null) container = new indexContainer(wordHash, this.payloadrow, newEntry instanceof indexRWIEntryNew);
indexRWIEntry[] entries = new indexRWIEntry[] { newEntry };
if (container.add(entries, updateTime) > 0) {
cache.put(wordHash, container);
Expand Down
6 changes: 3 additions & 3 deletions source/de/anomic/index/indexRWIEntryNew.java
Expand Up @@ -52,7 +52,7 @@ public class indexRWIEntryNew implements Cloneable, indexRWIEntry {
new kelondroColumn("y", kelondroColumn.celltype_cardinal, kelondroColumn.encoder_b256, 1, "lother"),
new kelondroColumn("m", kelondroColumn.celltype_cardinal, kelondroColumn.encoder_b256, 1, "urlLength"),
new kelondroColumn("n", kelondroColumn.celltype_cardinal, kelondroColumn.encoder_b256, 1, "urlComps"),
new kelondroColumn("g", kelondroColumn.celltype_string, kelondroColumn.encoder_bytes, 1, "typeofword"),
new kelondroColumn("g", kelondroColumn.celltype_binary, kelondroColumn.encoder_bytes, 1, "typeofword"),
new kelondroColumn("z", kelondroColumn.celltype_binary, kelondroColumn.encoder_bytes, 4, "flags"),
new kelondroColumn("c", kelondroColumn.celltype_cardinal, kelondroColumn.encoder_b256, 1, "hitcount"),
new kelondroColumn("t", kelondroColumn.celltype_cardinal, kelondroColumn.encoder_b256, 2, "posintext"),
Expand Down Expand Up @@ -132,7 +132,7 @@ public indexRWIEntryNew(String urlHash,
this.entry.setCol(col_lother, outlinksOther);
this.entry.setCol(col_urlLength, urlLength);
this.entry.setCol(col_urlComps, urlComps);
this.entry.setCol(col_typeofword, 0); // TODO: grammatical classification
this.entry.setCol(col_typeofword, new byte[]{(byte) 0}); // TODO: grammatical classification
this.entry.setCol(col_flags, null); // TODO: generate flags
this.entry.setCol(col_hitcount, hitcount);
this.entry.setCol(col_posintext, posintext);
Expand All @@ -159,7 +159,7 @@ public indexRWIEntryNew(indexRWIEntryOld oldEntry) {
int domlen = plasmaURL.domLengthEstimation(oldEntry.urlHash());
this.entry.setCol(col_urlLength, domlen * 2); // estimated
this.entry.setCol(col_urlComps, domlen / 3); // estimated
this.entry.setCol(col_typeofword, 0);
this.entry.setCol(col_typeofword, new byte[]{(byte) 0});
this.entry.setCol(col_flags, null);
this.entry.setCol(col_hitcount, oldEntry.hitcount());
this.entry.setCol(col_posintext, oldEntry.posintext());
Expand Down
8 changes: 7 additions & 1 deletion source/de/anomic/index/indexURLEntryOld.java
Expand Up @@ -160,7 +160,13 @@ public indexURLEntryOld(Properties prop) {
this.snippet = prop.getProperty("snippet", "");
if (snippet.length() == 0) snippet = null;
else snippet = crypt.simpleDecode(snippet, null);
this.word = (prop.containsKey("word")) ? new indexRWIEntryOld(kelondroBase64Order.enhancedCoder.decodeString(prop.getProperty("word", ""))) : null;
this.word = null;
if (prop.containsKey("word")) {
this.word = new indexRWIEntryOld(kelondroBase64Order.enhancedCoder.decodeString(prop.getProperty("word", "")));
}
if (prop.containsKey("wi")) {
this.word = new indexRWIEntryNew(kelondroBase64Order.enhancedCoder.decodeString(prop.getProperty("wi", "")));
}
} catch (Exception e) {
serverLog.logSevere("PLASMA",
"INTERNAL ERROR in plasmaLURL.entry/2:"
Expand Down
4 changes: 2 additions & 2 deletions source/de/anomic/kelondro/kelondroBase64Order.java
Expand Up @@ -298,9 +298,9 @@ public final int compares(byte[] a, int aoffset, int alength, byte[] b, int boff
bc = b[boffset + i];
assert (bc >= 0) && (bc < 128) : "bc = " + bc + ", b = " + serverLog.arrayList(b, boffset, len);
acc = ahpla[ac];
assert (acc >= 0) : "acc = " + acc + ", a = " + serverLog.arrayList(a, aoffset, len) + ", aoffset = 0x" + Integer.toHexString(aoffset) + ", i = " + i + "\n" + serverLog.table(a, 16, aoffset);
assert (acc >= 0) : "acc = " + acc + ", a = " + serverLog.arrayList(a, aoffset, len) + "/" + new String(a, aoffset, len) + ", aoffset = 0x" + Integer.toHexString(aoffset) + ", i = " + i + "\n" + serverLog.table(a, 16, aoffset);
bcc = ahpla[bc];
assert (bcc >= 0) : "bcc = " + bcc + ", b = " + serverLog.arrayList(b, boffset, len) + ", boffset = 0x" + Integer.toHexString(boffset) + ", i = " + i + "\n" + serverLog.table(b, 16, boffset);
assert (bcc >= 0) : "bcc = " + bcc + ", b = " + serverLog.arrayList(b, boffset, len) + "/" + new String(b, boffset, len) + ", boffset = 0x" + Integer.toHexString(boffset) + ", i = " + i + "\n" + serverLog.table(b, 16, boffset);
if (acc > bcc) return 1;
if (acc < bcc) return -1;
// else the bytes are equal and it may go on yet undecided
Expand Down
8 changes: 4 additions & 4 deletions source/de/anomic/kelondro/kelondroRowCollection.java
Expand Up @@ -130,7 +130,7 @@ private kelondroRow exportRow(int chunkcachelength) {
}

public static final int exportOverheadSize = 14;

public byte[] exportCollection() {
// returns null if the collection is empty
trim();
Expand All @@ -147,15 +147,15 @@ public byte[] exportCollection() {
entry.setCol(exp_collection, chunkcache);
return entry.bytes();
}

public kelondroRow row() {
return this.rowdef;
}

private final void ensureSize(int elements) {
int needed = elements * rowdef.objectsize();
if (chunkcache.length >= needed) return;
byte[] newChunkcache = new byte[needed * 12 / 10]; // increase space by 20%
byte[] newChunkcache = new byte[needed * 2]; // increase space
System.arraycopy(chunkcache, 0, newChunkcache, 0, chunkcache.length);
chunkcache = newChunkcache;
newChunkcache = null;
Expand Down Expand Up @@ -441,7 +441,7 @@ private final void isort(int L, int R) {

protected final int swap(int i, int j, int p) {
if (i == j) return p;
if (this.chunkcount * this.rowdef.objectsize() < this.chunkcache.length) {
if ((this.chunkcount + 1) * this.rowdef.objectsize() < this.chunkcache.length) {
// there is space in the chunkcache that we can use as buffer
System.arraycopy(chunkcache, this.rowdef.objectsize() * i, chunkcache, chunkcache.length - this.rowdef.objectsize(), this.rowdef.objectsize());
System.arraycopy(chunkcache, this.rowdef.objectsize() * j, chunkcache, this.rowdef.objectsize() * i, this.rowdef.objectsize());
Expand Down

0 comments on commit e3d75f4

Please sign in to comment.