Permalink
Browse files

Use multiple rescores to apply script scoring

This should spit out almost exactly the same results but has the following
advantages:
1.  Script score is never applied to more then 8192 documents per shard.
2.  Script score isn't calculated twice for the phrase rescore.

This helps for searches that have a phrase rescore and find a ton of matches.
Mostly that is searches on large wikis for two common words.

Bug: 60151
Change-Id: I20f312a4b7c76b82e9e79970f5f835df20298763
  • Loading branch information...
Nik Everett
Nik Everett committed Apr 9, 2014
1 parent 65ec1a0 commit f83b8ffedf1b5aa1abf3c200debfd30454210a48
Showing with 21 additions and 33 deletions.
  1. +21 −33 includes/Searcher.php
View
@@ -98,9 +98,9 @@ class Searcher extends ElasticsearchIntermediary {
private $notFilters = array();
private $suggest = null;
/**
- * @var null|array of rescore configuration as used by elasticsearch. The query needs to be an Elastica query.
+ * @var array of rescore configurations as used by elasticsearch. The query needs to be an Elastica query.
*/
- private $rescore = null;
+ private $rescore = array();
/**
* @var float portion of article's score which decays with time. Defaults to 0 meaning don't decay the score
* with time since the last update.
@@ -449,7 +449,7 @@ function ( $matches ) use ( $searcher ) {
!$this->searchContainedSyntax &&
strpos( $queryStringQueryString, '"' ) === false &&
strpos( $queryStringQueryString, ' ' ) !== false ) {
- $this->rescore = array(
+ $this->rescore[] = array(
'window_size' => $wgCirrusSearchPhraseRescoreWindowSize,
'query' => array(
'rescore_query' => $this->buildSearchTextQueryForFields( $fields,
@@ -479,7 +479,7 @@ function ( $matches ) use ( $searcher ) {
'query' => $queryStringQueryString,
'default_operator' => 'AND',
) ) );
- $this->rescore = null; // Not worth trying in this state.
+ $this->rescore = array(); // Not worth trying in this state.
$result = $this->search( 'degraded_full_text', $originalTerm );
// If that doesn't work we're out of luck but it should. There no guarantee it'll work properly
// with the syntax we've built above but it'll do _something_ and we'll still work on fixing all
@@ -727,11 +727,15 @@ private function search( $type, $for ) {
if( $this->limit ) {
$query->setSize( $this->limit );
}
- if ( $this->rescore ) {
+ if ( count( $this->rescore ) ) {
// rescore_query has to be in array form before we send it to Elasticsearch but it is way easier to work
// with if we leave it in query for until now
- $this->rescore[ 'query' ][ 'rescore_query' ] = $this->rescore[ 'query' ][ 'rescore_query' ]->toArray();
- $query->setParam( 'rescore', $this->rescore );
+ $modifiedRescore = array();
+ foreach ( $this->rescore as $rescore ) {
+ $rescore[ 'query' ][ 'rescore_query' ] = $rescore[ 'query' ][ 'rescore_query' ]->toArray();
+ $modifiedRescore[] = $rescore;
+ }
+ $query->setParam( 'rescore', $modifiedRescore );
}
$query->addParam( 'stats', $type );
@@ -1166,32 +1170,16 @@ private function installBoosts() {
return;
}
- // Since Elasticsearch doesn't support multiple rescores we have to pick a strategy here....
- // TODO just use multiple rescores when Elasticsearch supports it (1.x)
-
- // If there isn't already a rescore then we can just add the boosting as a multiply rescore
- if ( !$this->rescore ) {
- $this->rescore = array(
- 'window_size' => $wgCirrusSearchFunctionRescoreWindowSize,
- 'query' => array(
- 'rescore_query' => $functionScore,
- 'query_weight' => 1.0,
- 'rescore_query_weight' => 1.0,
- 'score_mode' => 'multiply',
- )
- );
- return;
- }
-
- // Since there is already a rescore we have to wrap _both_ the rescore and the query in our
- // function score query. Nothing else really spits out the right numbers. The problem
- // with this is that the function score isn't just in the rescore which means that it can
- // be slow if the main query finds lots of results.
- $functionScore->setQuery( $this->query );
- $this->query = new \Elastica\Query\Simple( $functionScore->toArray() );
-
- $functionScore->setQuery( $this->rescore[ 'query' ][ 'rescore_query' ] );
- $this->rescore[ 'query' ][ 'rescore_query' ] = $functionScore;
+ // The function score is done as a rescore on top of everything else
+ $this->rescore[] = array(
+ 'window_size' => $wgCirrusSearchFunctionRescoreWindowSize,
+ 'query' => array(
+ 'rescore_query' => $functionScore,
+ 'query_weight' => 1.0,
+ 'rescore_query_weight' => 1.0,
+ 'score_mode' => 'multiply',
+ )
+ );
}
private static function getDefaultBoostTemplates() {

0 comments on commit f83b8ff

Please sign in to comment.