Skip to content

Commit

Permalink
better search computation:
Browse files Browse the repository at this point in the history
- increased sort limit, now 3000 entries, before: 1000
  this should cause that more results can be shown in case
  of strong limitating constraints, like domain navigation
- enhanced the sort process
- check against domain navigator bugs
- fix in sort stack
- showing now all naviagtion pages at first search (not only next page)


git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6569 6c8d7289-2bf4-0310-a012-ef5d649a1542
  • Loading branch information
orbiter committed Jan 12, 2010
1 parent d126d6c commit 1817245
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 38 deletions.
2 changes: 1 addition & 1 deletion htroot/yacysearch.java
Expand Up @@ -591,7 +591,7 @@ public static serverObjects respond(final RequestHeader header, final serverObje
resnav.append(QueryParams.navurl("html", thispage - 1, display, theQuery, originalUrlMask, null, navigation));
resnav.append("\"><img src=\"env/grafics/navdl.gif\" width=\"16\" height=\"16\"></a>&nbsp;");
}
final int numberofpages = Math.min(10, Math.min(thispage + 2, totalcount / theQuery.displayResults()));
final int numberofpages = Math.min(10, Math.max(thispage + 1, totalcount / theQuery.displayResults()));
for (int i = 0; i < numberofpages; i++) {
if (i == thispage) {
resnav.append("<img src=\"env/grafics/navs");
Expand Down
2 changes: 1 addition & 1 deletion source/de/anomic/search/DocumentIndex.java
Expand Up @@ -201,7 +201,7 @@ public static final ArrayList<URIMetadataRow> findMetadata(

public static final ArrayList<URIMetadataRow> findMetadata(final QueryParams query, final ReferenceOrder order) {

RankingProcess rankedCache = new RankingProcess(query, order, 1000, 2);
RankingProcess rankedCache = new RankingProcess(query, order, SearchEvent.max_results_preparation, 2);
rankedCache.run();

ArrayList<URIMetadataRow> result = new ArrayList<URIMetadataRow>();
Expand Down
66 changes: 34 additions & 32 deletions source/de/anomic/search/RankingProcess.java
Expand Up @@ -67,7 +67,7 @@ public final class RankingProcess extends Thread {

private final QueryParams query;
private final int maxentries;
private final ConcurrentHashMap<String, Integer> urlhashes; // map for double-check; String/Long relation, addresses ranking number (backreference for deletion)
private final ConcurrentHashMap<String, Long> urlhashes; // map for double-check; String/Long relation, addresses ranking number (backreference for deletion)
private final int[] flagcount; // flag counter
private final TreeSet<String> misses; // contains url-hashes that could not been found in the LURL-DB
//private final int[] domZones;
Expand Down Expand Up @@ -99,7 +99,7 @@ public RankingProcess(final QueryParams query, final ReferenceOrder order, final
this.remote_indexCount = 0;
this.remote_resourceSize = 0;
this.local_resourceSize = 0;
this.urlhashes = new ConcurrentHashMap<String, Integer>(0, 0.75f, concurrency);
this.urlhashes = new ConcurrentHashMap<String, Long>(0, 0.75f, concurrency);
this.misses = new TreeSet<String>();
this.flagcount = new int[32];
for (int i = 0; i < 32; i++) {this.flagcount[i] = 0;}
Expand Down Expand Up @@ -172,7 +172,8 @@ public void add(final ReferenceContainer<WordReference> index, final boolean loc
String domhash;
boolean nav_hosts = this.query.navigators.equals("all") || this.query.navigators.indexOf("hosts") >= 0;
WordReferenceVars iEntry;
final ArrayList<WordReferenceVars> filteredEntries = new ArrayList<WordReferenceVars>();
Long r;
//final ArrayList<WordReferenceVars> filteredEntries = new ArrayList<WordReferenceVars>();

// apply all constraints
try {
Expand Down Expand Up @@ -225,39 +226,39 @@ public void add(final ReferenceContainer<WordReference> index, final boolean loc
}

// accept
filteredEntries.add(iEntry);
//filteredEntries.add(iEntry);

// increase counter for statistics
if (!local) this.remote_indexCount++;
if (!local) this.remote_indexCount++;/*
}
} catch (InterruptedException e) {}
// do the ranking
Long r;
for (WordReferenceVars fEntry: filteredEntries) {

// kick out entries that are too bad according to current findings
r = Long.valueOf(this.order.cardinal(fEntry));
assert maxentries != 0;
if (maxentries >= 0 && stack.size() >= maxentries && stack.bottom(r.longValue())) continue;

// insert
if ((maxentries < 0) || (stack.size() < maxentries)) {
// in case that we don't have enough yet, accept any new entry
if (urlhashes.containsKey(fEntry.metadataHash())) continue;
stack.push(fEntry, r);
} else {
// if we already have enough entries, insert only such that are necessary to get a better result
if (stack.bottom(r.longValue())) {
continue;
}
// double-check
if (urlhashes.containsKey(fEntry.metadataHash())) continue;
stack.push(fEntry, r);
}

}
// do the ranking
for (WordReferenceVars fEntry: filteredEntries) {
*/
// kick out entries that are too bad according to current findings
r = Long.valueOf(this.order.cardinal(iEntry));
assert maxentries != 0;

// double-check
if (urlhashes.containsKey(iEntry.metadataHash())) continue;

// insert
if (maxentries < 0 || stack.size() < maxentries) {
// in case that we don't have enough yet, accept any new entry
stack.push(iEntry, r);
} else {
// if we already have enough entries, insert only such that are necessary to get a better result
if (stack.bottom(r.longValue())) continue;

// take the entry. the stack is automatically reduced
// to the maximum size by deletion of elements at the bottom
stack.push(iEntry, r);
}
urlhashes.put(iEntry.metadataHash(), r);
}

} catch (InterruptedException e) {}

//if ((query.neededResults() > 0) && (container.size() > query.neededResults())) remove(true, true);
EventTracker.update("SEARCH", new ProfilingGraph.searchEvent(query.id(true), SearchEvent.PRESORT, index.size(), System.currentTimeMillis() - timer), false, 30000, ProfilingGraph.maxTime);
}
Expand Down Expand Up @@ -574,14 +575,15 @@ public ArrayList<NavigatorEntry> getHostNavigator(int count) {
URIMetadataRow mr;
DigestURI url;
String hostname;
for (int i = 0; i < rc; i++) {
loop: for (int i = 0; i < rc; i++) {
mr = this.query.getSegment().urlMetadata().load(hsa[i].hashsample, null, 0);
if (mr == null) continue;
url = mr.metadata().url();
if (url == null) continue;
hostname = url.getHost();
if (hostname == null) continue;
if (query.tenant != null && !hostname.contains(query.tenant) && !url.toNormalform(true, true).contains(query.tenant)) continue;
for (NavigatorEntry entry: result) if (entry.name.equals(hostname)) continue loop; // check if one entry already exists
result.add(new NavigatorEntry(hostname, hsa[i].count));
}
return result;
Expand Down
2 changes: 1 addition & 1 deletion source/de/anomic/search/SearchEvent.java
Expand Up @@ -59,7 +59,7 @@ public final class SearchEvent {
public static final String NORMALIZING = "normalizing";
public static final String FINALIZATION = "finalization";

private static final int max_results_preparation = 1000;
public static final int max_results_preparation = 3000;

// class variables that may be implemented with an abstract class
private long eventTime;
Expand Down
14 changes: 11 additions & 3 deletions source/net/yacy/kelondro/util/SortStack.java
Expand Up @@ -177,10 +177,18 @@ public boolean bottom(final long weight) {
// returns true if the element with that weight would be on the bottom of the stack after inserting
if (this.onstack.isEmpty()) return true;
Long l;
synchronized (this.onstack) {
l = (this.upward) ? this.onstack.lastKey() : this.onstack.firstKey();

if (this.upward) {
synchronized (this.onstack) {
l = this.onstack.lastKey();
}
return weight > l.longValue();
} else {
synchronized (this.onstack) {
l = this.onstack.firstKey();
}
return weight < l.longValue();
}
return weight > l.longValue();
}

public class stackElement {
Expand Down

0 comments on commit 1817245

Please sign in to comment.