diff --git a/defaults/solr.keys.list b/defaults/solr.keys.list index 1739252553..78c51beb32 100644 --- a/defaults/solr.keys.list +++ b/defaults/solr.keys.list @@ -65,6 +65,9 @@ httpstatus_i ## redirect url if the error code is 299 < httpstatus_i < 310 #httpstatus_redirect_s +## number of unique http references; used for ranking +references_i + ### optional but highly recommended values, part of the index distribution process ## time when resource was loaded diff --git a/htroot/gsa/searchresult.java b/htroot/gsa/searchresult.java index 14a3f92786..7797426583 100644 --- a/htroot/gsa/searchresult.java +++ b/htroot/gsa/searchresult.java @@ -119,8 +119,8 @@ public static serverObjects respond(final RequestHeader header, final serverObje post.put(CommonParams.Q, solrQ.toString()); post.put(CommonParams.ROWS, post.remove("num")); post.put(CommonParams.ROWS, Math.min(post.getInt(CommonParams.ROWS, 10), (authenticated) ? 5000 : 100)); - float f = Boost.RANKING.get(YaCySchema.fuzzy_signature_unique_b); - post.put("bq", YaCySchema.fuzzy_signature_unique_b.getSolrFieldName() + ":true^" + Float.toString(f)); // a boost query that moves double content to the back + post.put("bq", Boost.RANKING.getBoostQuery()); // a boost query that moves double content to the back + post.put("bf", Boost.RANKING.getBoostFunction()); // a boost function extension post.put(CommonParams.FL, YaCySchema.content_type.getSolrFieldName() + ',' + YaCySchema.id.getSolrFieldName() + ',' + diff --git a/source/net/yacy/cora/federate/solr/Boost.java b/source/net/yacy/cora/federate/solr/Boost.java index c6a8c3e181..0348e1c3c2 100644 --- a/source/net/yacy/cora/federate/solr/Boost.java +++ b/source/net/yacy/cora/federate/solr/Boost.java @@ -46,7 +46,8 @@ public class Boost extends LinkedHashMap { YaCySchema.description, YaCySchema.keywords, YaCySchema.text_t, - YaCySchema.synonyms_sxt + YaCySchema.synonyms_sxt, + YaCySchema.references_i }; // for minTokenLen = 2 the quantRate value should not be below 0.24; for minTokenLen = 3 the quantRate value must be not below 0.5! @@ -118,4 +119,20 @@ public int getMinTokenLen() { return minTokenLen; } + /** + * produce a string that can be added as a 'boost query' at the bq-attribute + * @return + */ + public String getBoostQuery() { + return YaCySchema.fuzzy_signature_unique_b.getSolrFieldName() + ":true^" + Float.toString(this.get(YaCySchema.fuzzy_signature_unique_b)); + } + + /** + * produce a boost function + * @return + */ + public String getBoostFunction() { + return "div(add(1,references_i),pow(add(1,inboundlinkscount_i),1.6))^0.4"; + } + } diff --git a/source/net/yacy/cora/federate/solr/YaCySchema.java b/source/net/yacy/cora/federate/solr/YaCySchema.java index 857bc3be35..2436f53522 100644 --- a/source/net/yacy/cora/federate/solr/YaCySchema.java +++ b/source/net/yacy/cora/federate/solr/YaCySchema.java @@ -47,6 +47,7 @@ public enum YaCySchema implements Schema { failtype_s(SolrType.string, true, true, false, "fail type if a page was not loaded. This field is either empty, 'excl' or 'fail'"), httpstatus_i(SolrType.num_integer, true, true, false, "html status return code (i.e. \"200\" for ok), -1 if not loaded"), httpstatus_redirect_s(SolrType.num_integer, true, true, false, "html status return code (i.e. \"200\" for ok), -1 if not loaded"), + references_i(SolrType.num_integer, true, true, false, "number of unique http references; used for ranking"), // optional but recommended, part of index distribution load_date_dt(SolrType.date, true, true, false, "time when resource was loaded"), diff --git a/source/net/yacy/search/index/Segment.java b/source/net/yacy/search/index/Segment.java index f6ef24d750..c7fb42215e 100644 --- a/source/net/yacy/search/index/Segment.java +++ b/source/net/yacy/search/index/Segment.java @@ -383,6 +383,13 @@ public SolrInputDocument storeDocument( } } + // ENRICH DOCUMENT WITH RANKING INFORMATION + if (this.urlCitationIndex != null && this.fulltext.getSolrScheme().contains(YaCySchema.references_i)) { + int references = this.urlCitationIndex.count(url.hash()); + if (references > 0) solrInputDoc.setField(YaCySchema.references_i.getSolrFieldName(), references); + } + + // STORE TO SOLR String error = null; tryloop: for (int i = 0; i < 20; i++) { diff --git a/source/net/yacy/search/query/QueryParams.java b/source/net/yacy/search/query/QueryParams.java index 00afbc9f06..1bd921ee9a 100644 --- a/source/net/yacy/search/query/QueryParams.java +++ b/source/net/yacy/search/query/QueryParams.java @@ -428,8 +428,8 @@ public SolrQuery solrQuery() { // construct query final SolrQuery params = new SolrQuery(); params.setParam("defType", "edismax"); - float f = Boost.RANKING.get(YaCySchema.fuzzy_signature_unique_b); - params.setParam("bq", YaCySchema.fuzzy_signature_unique_b.getSolrFieldName() + ":true^" + Float.toString(f)); // a boost query that moves double content to the back + params.setParam("bq", Boost.RANKING.getBoostQuery()); // a boost query that moves double content to the back + params.setParam("bf", Boost.RANKING.getBoostFunction()); // a boost function extension params.setStart(this.offset); params.setRows(this.itemsPerPage); params.setFacet(false);