Skip to content

Commit

Permalink
- fixed a null pointer exception bug
Browse files Browse the repository at this point in the history
- switched off more write caches
- re-enabled index-abstracts search

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2885 6c8d7289-2bf4-0310-a012-ef5d649a1542
  • Loading branch information
orbiter committed Oct 31, 2006
1 parent 194d42b commit bd4f43c
Show file tree
Hide file tree
Showing 7 changed files with 52 additions and 38 deletions.
2 changes: 1 addition & 1 deletion doc/Download.html
Expand Up @@ -54,7 +54,7 @@ <h2>Download</h2>

<p><b>Latest Release:</b>
The latest YaCy release version is 0.48<br>
Nightly builds from compiles out of SVN can be obtained from <a href="http://latest.yacy-forum.de/">http://latest.yacy-forum.de/</a>.<br>
Nightly builds from compiles out of SVN can be obtained from <a href="http://latest.yacy-forum.net">http://latest.yacy-forum.de/</a>.<br>

<ul>
<li>Generic release of YaCy (all platforms with J2SE 1.4.2: Linux, Mac OS X, Windows, Solaris):</li>
Expand Down
16 changes: 10 additions & 6 deletions source/de/anomic/kelondro/kelondroFlexTable.java
Expand Up @@ -56,7 +56,7 @@ public kelondroFlexTable(File path, String tablename, long buffersize, long prel
if (indexfile.exists()) {
// use existing index file
System.out.println("*** Using File index " + indexfile);
ki = new kelondroCache(kelondroTree.open(indexfile, buffersize / 2, preloadTime, treeIndexRow(rowdef.width(0)), objectOrder, 2, 80), buffersize / 2, true, true);
ki = new kelondroCache(kelondroTree.open(indexfile, buffersize / 2, preloadTime, treeIndexRow(rowdef.width(0)), objectOrder, 2, 80), buffersize / 2, true, false);
RAMIndex = false;
} else if ((preloadTime >= 0) && (stt > preloadTime)) {
// generate new index file
Expand Down Expand Up @@ -127,21 +127,25 @@ private kelondroIndex initializeRamIndex(kelondroOrder objectOrder) throws IOExc
}

private kelondroIndex initializeTreeIndex(File indexfile, long buffersize, long preloadTime, kelondroOrder objectOrder) throws IOException {
kelondroIndex treeindex = new kelondroCache(new kelondroTree(indexfile, buffersize / 2, preloadTime, treeIndexRow(rowdef.width(0)), objectOrder, 2, 80), buffersize / 2, true, true);
kelondroIndex treeindex = new kelondroCache(new kelondroTree(indexfile, buffersize / 2, preloadTime, treeIndexRow(rowdef.width(0)), objectOrder, 2, 80), buffersize / 2, true, false);
Iterator content = super.col[0].contentNodes(-1);
kelondroRecords.Node node;
kelondroRow.Entry indexentry;
int i;
int i, c = 0, all = super.col[0].size();
long start = System.currentTimeMillis();
long last = start;
while (content.hasNext()) {
node = (kelondroRecords.Node) content.next();
i = node.handle().hashCode();
indexentry = treeindex.row().newEntry();
indexentry.setCol(0, node.getValueRow());
indexentry.setCol(1, i);
treeindex.put(indexentry);
if ((i % 10000) == 0) {
System.out.print('.');
treeindex.addUnique(indexentry);
c++;
if (System.currentTimeMillis() - last > 30000) {
System.out.println(".. generated " + c+ " entries, " + ((System.currentTimeMillis() - start) / c * (all - c) / 60000) + " minutes remaining");
System.out.flush();
last = System.currentTimeMillis();
}
}
return treeindex;
Expand Down
3 changes: 3 additions & 0 deletions source/de/anomic/net/URL.java
Expand Up @@ -44,6 +44,8 @@ public URL(String url) throws MalformedURLException {

public void parseURLString(String url) throws MalformedURLException {
// identify protocol
assert (url != null);
url = url.trim();
int p = url.indexOf(':');
if (p < 0) throw new MalformedURLException("protocol is not given in '" + url + "'");
this.protocol = url.substring(0, p).toLowerCase().trim();
Expand Down Expand Up @@ -104,6 +106,7 @@ public URL(File file) throws MalformedURLException {

public URL(URL baseURL, String relPath) throws MalformedURLException {
if (baseURL == null) throw new MalformedURLException("base URL is null");
if (relPath == null) throw new MalformedURLException("relPath is null");
int p = relPath.indexOf(':');
String relprotocol = (p < 0) ? null : relPath.substring(0, p).toLowerCase();
if (relprotocol != null) {
Expand Down
2 changes: 1 addition & 1 deletion source/de/anomic/plasma/plasmaCrawlLURLOldEntry.java
Expand Up @@ -117,7 +117,7 @@ public plasmaCrawlLURLOldEntry(
public plasmaCrawlLURLOldEntry(kelondroRow.Entry entry, indexEntry searchedWord) throws IOException {
try {
this.urlHash = entry.getColString(0, null);
this.url = new URL(entry.getColString(1, "UTF-8").trim());
this.url = new URL(entry.getColString(1, "UTF-8"));
this.descr = (entry.empty(2)) ? this.url.toString() : entry.getColString(2, "UTF-8").trim();
this.moddate = new Date(86400000 * entry.getColLong(3));
this.loaddate = new Date(86400000 * entry.getColLong(4));
Expand Down
28 changes: 16 additions & 12 deletions source/de/anomic/plasma/plasmaSearchEvent.java
Expand Up @@ -93,7 +93,7 @@ public plasmaSearchEvent(plasmaSearchQuery query,
this.snippetCache = snippetCache;
this.rcContainers = new indexContainer(null);
this.rcContainerFlushCount = 0;
this.rcAbstracts = new TreeMap();
this.rcAbstracts = (query.size() > 1) ? new TreeMap() : null; // generate abstracts only for combined searches
this.profileLocal = localTiming;
this.profileGlobal = remoteTiming;
this.postsort = postsort;
Expand Down Expand Up @@ -134,7 +134,7 @@ public plasmaSearchResult search() {
// do a global search
// the result of the fetch is then in the rcGlobal
log.logFine("STARTING " + fetchpeers + " THREADS TO CATCH EACH " + profileGlobal.getTargetCount(plasmaSearchTimingProfile.PROCESS_POSTSORT) + " URLs WITHIN " + (profileGlobal.duetime() / 1000) + " SECONDS");
long secondaryTimeout = System.currentTimeMillis() + profileGlobal.duetime() / 2;
long secondaryTimeout = System.currentTimeMillis() + profileGlobal.duetime() / 3 * 2;
long primaryTimeout = System.currentTimeMillis() + profileGlobal.duetime();
primarySearchThreads = yacySearch.primaryRemoteSearches(plasmaSearchQuery.hashSet2hashString(query.queryHashes), "",
query.prefer, query.urlMask, query.maxDistance, urlStore, rcContainers, rcAbstracts,
Expand All @@ -144,7 +144,8 @@ public plasmaSearchResult search() {
Map searchContainerMap = localSearchContainers(null);

// use the search containers to fill up rcAbstracts locally
if (searchContainerMap != null) {
/*
if ((rcAbstracts != null) && (searchContainerMap != null)) {
Iterator i, ci = searchContainerMap.entrySet().iterator();
Map.Entry entry;
String wordhash;
Expand All @@ -165,20 +166,19 @@ public plasmaSearchResult search() {
}
}
}
*/

// try to pre-fetch some LURLs if there is enough time
indexContainer rcLocal = localSearchJoin((searchContainerMap == null) ? null : searchContainerMap.values());
prefetchLocal(rcLocal, secondaryTimeout);

// evaluate index abstracts and start a secondary search
// this is temporary debugging code to learn that the index abstracts are fetched correctly
/*
while (System.currentTimeMillis() < secondaryTimeout + 10000) {
while (System.currentTimeMillis() < secondaryTimeout) {
if (yacySearch.remainingWaiting(primarySearchThreads) == 0) break; // all threads have finished
try {Thread.sleep(100);} catch (InterruptedException e) {}
}
if (query.size() > 1) prepareSecondarySearch();
*/
// evaluate index abstracts and start a secondary search
if (rcAbstracts != null) prepareSecondarySearch();

// catch up global results:
// wait until primary timeout passed
Expand All @@ -187,6 +187,7 @@ public plasmaSearchResult search() {
((secondarySearchThreads == null) || (yacySearch.remainingWaiting(secondarySearchThreads) == 0))) break; // all threads have finished
try {Thread.sleep(100);} catch (InterruptedException e) {}
}

int globalContributions = rcContainers.size();

// finished searching
Expand Down Expand Up @@ -243,19 +244,22 @@ private void prepareSecondarySearch() {
String url, urls, peer, peers;
String mypeerhash = yacyCore.seedDB.mySeed.hash;
boolean mypeerinvolved = false;
int mypeercount;
while (i1.hasNext()) {
entry1 = (Map.Entry) i1.next();
url = (String) entry1.getKey();
peers = (String) entry1.getValue();
System.out.println("DEBUG-INDEXABSTRACT: url " + url + ": from peers " + peers);
mypeercount = 0;
for (int j = 0; j < peers.length(); j = j + 12) {
peer = peers.substring(j, j + 12);
if (peers.indexOf(peer) < j) continue; // avoid doubles that may appear in the abstractJoin
if ((peer.equals(mypeerhash)) && (mypeercount++ > 1)) continue;
//if (peers.indexOf(peer) < j) continue; // avoid doubles that may appear in the abstractJoin
urls = (String) secondarySearchURLs.get(peer);
urls = (urls == null) ? url : urls + url;
secondarySearchURLs.put(peer, urls);
if (peer.equals(mypeerhash)) mypeerinvolved = true;
}
if (mypeercount == 1) mypeerinvolved = true;
}

// compute words for secondary search and start the secondary searches
Expand All @@ -269,8 +273,8 @@ private void prepareSecondarySearch() {
if (peer.equals(mypeerhash)) continue; // we dont need to ask ourself
urls = (String) entry1.getValue();
words = wordsFromPeer(peer, urls);
System.out.println("DEBUG-INDEXABSTRACT: peer " + peer + " has urls: " + urls);
System.out.println("DEBUG-INDEXABSTRACT: peer " + peer + " from words: " + words);
System.out.println("DEBUG-INDEXABSTRACT ***: peer " + peer + " has urls: " + urls);
System.out.println("DEBUG-INDEXABSTRACT ***: peer " + peer + " from words: " + words);
secondarySearchThreads[c++] = yacySearch.secondaryRemoteSearch(
words, urls, urlStore, rcContainers, peer, plasmaSwitchboard.urlBlacklist, snippetCache,
profileGlobal, ranking);
Expand Down
38 changes: 20 additions & 18 deletions source/de/anomic/yacy/yacyClient.java
Expand Up @@ -433,7 +433,8 @@ public static String[] search(
obj.put("maxdist", maxDistance);
obj.put("rankingProfile", rankingProfile.toExternalString());
obj.put(yacySeed.MYTIME, yacyCore.universalDateShortString(new Date()));

if (abstractCache != null) obj.put("abstracts", "auto");

//yacyCore.log.logDebug("yacyClient.search url=" + url);
final long timestamp = System.currentTimeMillis();

Expand Down Expand Up @@ -553,26 +554,27 @@ public static String[] search(
for (int m = 0; m < words; m++) { containerCache.add(container[m], -1); }

// read index abstract
Iterator i = result.entrySet().iterator();
Map.Entry entry;
TreeMap singleAbstract;
String wordhash;
serverByteBuffer ci;
while (i.hasNext()) {
entry = (Map.Entry) i.next();
if (((String) entry.getKey()).startsWith("indexabstract.")) {
wordhash = ((String) entry.getKey()).substring(14);
synchronized (abstractCache) {
singleAbstract = (TreeMap) abstractCache.get(wordhash); // a mapping from url-hashes to a string of peer-hashes
if (singleAbstract == null) singleAbstract = new TreeMap();
ci = new serverByteBuffer(((String) entry.getValue()).getBytes());
//System.out.println("DEBUG-ABSTRACTFETCH: for word hash " + wordhash + " received " + ci.toString());
indexURL.decompressIndex(singleAbstract, ci, targetPeer.hash);
abstractCache.put(wordhash, singleAbstract);
if (abstractCache != null) {
Iterator i = result.entrySet().iterator();
Map.Entry entry;
TreeMap singleAbstract;
String wordhash;
serverByteBuffer ci;
while (i.hasNext()) {
entry = (Map.Entry) i.next();
if (((String) entry.getKey()).startsWith("indexabstract.")) {
wordhash = ((String) entry.getKey()).substring(14);
synchronized (abstractCache) {
singleAbstract = (TreeMap) abstractCache.get(wordhash); // a mapping from url-hashes to a string of peer-hashes
if (singleAbstract == null) singleAbstract = new TreeMap();
ci = new serverByteBuffer(((String) entry.getValue()).getBytes());
System.out.println("DEBUG-ABSTRACTFETCH: for word hash " + wordhash + " received " + ci.toString());
indexURL.decompressIndex(singleAbstract, ci, targetPeer.hash);
abstractCache.put(wordhash, singleAbstract);
}
}
}
}

// generate statistics
long searchtime;
try {
Expand Down
1 change: 1 addition & 0 deletions source/de/anomic/yacy/yacySearch.java
Expand Up @@ -237,6 +237,7 @@ public static int remainingWaiting(yacySearch[] searchThreads) {
if (searchThreads == null) return 0;
int alive = 0;
for (int i = 0; i < searchThreads.length; i++) {
if (searchThreads == null) break; // may occur
if (searchThreads[i].isAlive()) alive++;
}
return alive;
Expand Down

0 comments on commit bd4f43c

Please sign in to comment.