Skip to content

Commit

Permalink
- added a new solr field references_i which stores the number of
Browse files Browse the repository at this point in the history
INCOMING links to the corresponding web page. This information is taken
from the reverse link index (a 'little sister' of the RWI index).
- this field can be of use to enhance the ranking because a web page
with more incoming links can be more more important than others. But
this is not true for typical link pages like menues. Therefore the
number of outgoing links is needed.
- added a new solr attribute 'bf' to solr queries which is a boost
function extension. this field can contain a formula which comuptes the
boost according to given field values. After some experiments the
following forumla is now default:
div(add(1,references_i),pow(add(1,inboundlinkscount_i),1.6))^0.4
This takes the number of references and the inbound links. Further
experiments are needed to enhance that forumula.
  • Loading branch information
Orbiter committed Dec 18, 2012
1 parent 7c3de8b commit 1052263
Show file tree
Hide file tree
Showing 6 changed files with 33 additions and 5 deletions.
3 changes: 3 additions & 0 deletions defaults/solr.keys.list
Expand Up @@ -65,6 +65,9 @@ httpstatus_i
## redirect url if the error code is 299 < httpstatus_i < 310
#httpstatus_redirect_s

## number of unique http references; used for ranking
references_i

### optional but highly recommended values, part of the index distribution process

## time when resource was loaded
Expand Down
4 changes: 2 additions & 2 deletions htroot/gsa/searchresult.java
Expand Up @@ -119,8 +119,8 @@ public static serverObjects respond(final RequestHeader header, final serverObje
post.put(CommonParams.Q, solrQ.toString());
post.put(CommonParams.ROWS, post.remove("num"));
post.put(CommonParams.ROWS, Math.min(post.getInt(CommonParams.ROWS, 10), (authenticated) ? 5000 : 100));
float f = Boost.RANKING.get(YaCySchema.fuzzy_signature_unique_b);
post.put("bq", YaCySchema.fuzzy_signature_unique_b.getSolrFieldName() + ":true^" + Float.toString(f)); // a boost query that moves double content to the back
post.put("bq", Boost.RANKING.getBoostQuery()); // a boost query that moves double content to the back
post.put("bf", Boost.RANKING.getBoostFunction()); // a boost function extension
post.put(CommonParams.FL,
YaCySchema.content_type.getSolrFieldName() + ',' +
YaCySchema.id.getSolrFieldName() + ',' +
Expand Down
19 changes: 18 additions & 1 deletion source/net/yacy/cora/federate/solr/Boost.java
Expand Up @@ -46,7 +46,8 @@ public class Boost extends LinkedHashMap<YaCySchema, Float> {
YaCySchema.description,
YaCySchema.keywords,
YaCySchema.text_t,
YaCySchema.synonyms_sxt
YaCySchema.synonyms_sxt,
YaCySchema.references_i
};

// for minTokenLen = 2 the quantRate value should not be below 0.24; for minTokenLen = 3 the quantRate value must be not below 0.5!
Expand Down Expand Up @@ -118,4 +119,20 @@ public int getMinTokenLen() {
return minTokenLen;
}

/**
* produce a string that can be added as a 'boost query' at the bq-attribute
* @return
*/
public String getBoostQuery() {
return YaCySchema.fuzzy_signature_unique_b.getSolrFieldName() + ":true^" + Float.toString(this.get(YaCySchema.fuzzy_signature_unique_b));
}

/**
* produce a boost function
* @return
*/
public String getBoostFunction() {
return "div(add(1,references_i),pow(add(1,inboundlinkscount_i),1.6))^0.4";
}

}
1 change: 1 addition & 0 deletions source/net/yacy/cora/federate/solr/YaCySchema.java
Expand Up @@ -47,6 +47,7 @@ public enum YaCySchema implements Schema {
failtype_s(SolrType.string, true, true, false, "fail type if a page was not loaded. This field is either empty, 'excl' or 'fail'"),
httpstatus_i(SolrType.num_integer, true, true, false, "html status return code (i.e. \"200\" for ok), -1 if not loaded"),
httpstatus_redirect_s(SolrType.num_integer, true, true, false, "html status return code (i.e. \"200\" for ok), -1 if not loaded"),
references_i(SolrType.num_integer, true, true, false, "number of unique http references; used for ranking"),

// optional but recommended, part of index distribution
load_date_dt(SolrType.date, true, true, false, "time when resource was loaded"),
Expand Down
7 changes: 7 additions & 0 deletions source/net/yacy/search/index/Segment.java
Expand Up @@ -383,6 +383,13 @@ public SolrInputDocument storeDocument(
}
}

// ENRICH DOCUMENT WITH RANKING INFORMATION
if (this.urlCitationIndex != null && this.fulltext.getSolrScheme().contains(YaCySchema.references_i)) {
int references = this.urlCitationIndex.count(url.hash());
if (references > 0) solrInputDoc.setField(YaCySchema.references_i.getSolrFieldName(), references);
}


// STORE TO SOLR
String error = null;
tryloop: for (int i = 0; i < 20; i++) {
Expand Down
4 changes: 2 additions & 2 deletions source/net/yacy/search/query/QueryParams.java
Expand Up @@ -428,8 +428,8 @@ public SolrQuery solrQuery() {
// construct query
final SolrQuery params = new SolrQuery();
params.setParam("defType", "edismax");
float f = Boost.RANKING.get(YaCySchema.fuzzy_signature_unique_b);
params.setParam("bq", YaCySchema.fuzzy_signature_unique_b.getSolrFieldName() + ":true^" + Float.toString(f)); // a boost query that moves double content to the back
params.setParam("bq", Boost.RANKING.getBoostQuery()); // a boost query that moves double content to the back
params.setParam("bf", Boost.RANKING.getBoostFunction()); // a boost function extension
params.setStart(this.offset);
params.setRows(this.itemsPerPage);
params.setFacet(false);
Expand Down

0 comments on commit 1052263

Please sign in to comment.