Skip to content

Commit

Permalink
LUCENE-10236: Update field-weight used in CombinedFieldQuery scoring …
Browse files Browse the repository at this point in the history
…calculation (apache#444)
  • Loading branch information
zacharymorn committed Nov 19, 2021
1 parent 6bd5c14 commit 07ee3ba
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,7 @@ public Scorer scorer(LeafReaderContext context) throws IOException {
}

MultiNormsLeafSimScorer scoringSimScorer =
new MultiNormsLeafSimScorer(simWeight, context.reader(), fields, true);
new MultiNormsLeafSimScorer(simWeight, context.reader(), fieldAndWeights.values(), true);
LeafSimScorer nonScoringSimScorer =
new LeafSimScorer(simWeight, context.reader(), "pseudo_field", false);
// we use termscorers + disjunction as an impl detail
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.search.Explanation;
Expand Down Expand Up @@ -61,7 +63,13 @@ final class MultiNormsLeafSimScorer {
if (needsScores) {
final List<NumericDocValues> normsList = new ArrayList<>();
final List<Float> weightList = new ArrayList<>();
final Set<String> duplicateCheckingSet = new HashSet<>();
for (FieldAndWeight field : normFields) {
assert duplicateCheckingSet.add(field.field)
: "There is a duplicated field ["
+ field.field
+ "] used to construct MultiNormsLeafSimScorer";

NumericDocValues norms = reader.getNormValues(field.field);
if (norms != null) {
normsList.add(norms);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@
*/
package org.apache.lucene.sandbox.search;

import static com.carrotsearch.randomizedtesting.RandomizedTest.atMost;
import static com.carrotsearch.randomizedtesting.RandomizedTest.randomBoolean;
import static com.carrotsearch.randomizedtesting.RandomizedTest.randomIntBetween;

import com.carrotsearch.randomizedtesting.generators.RandomPicks;
import java.io.IOException;
import java.util.Arrays;
Expand Down Expand Up @@ -165,6 +169,80 @@ public void testSameScore() throws IOException {
dir.close();
}

public void testScoringWithMultipleFieldTermsMatch() throws IOException {
int numMatchDoc = randomIntBetween(100, 500);
int numHits = atMost(100);
int boost1 = Math.max(1, random().nextInt(5));
int boost2 = Math.max(1, random().nextInt(5));

Directory dir = newDirectory();
Similarity similarity = randomCompatibleSimilarity();

IndexWriterConfig iwc = new IndexWriterConfig();
iwc.setSimilarity(similarity);
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);

// adding potentially matching doc
for (int i = 0; i < numMatchDoc; i++) {
Document doc = new Document();

int freqA = random().nextInt(20) + 1;
for (int j = 0; j < freqA; j++) {
doc.add(new TextField("a", "foo", Store.NO));
}

freqA = random().nextInt(20) + 1;
if (randomBoolean()) {
for (int j = 0; j < freqA; j++) {
doc.add(new TextField("a", "foo" + j, Store.NO));
}
}

freqA = random().nextInt(20) + 1;
for (int j = 0; j < freqA; j++) {
doc.add(new TextField("a", "zoo", Store.NO));
}

int freqB = random().nextInt(20) + 1;
for (int j = 0; j < freqB; j++) {
doc.add(new TextField("b", "zoo", Store.NO));
}

freqB = random().nextInt(20) + 1;
if (randomBoolean()) {
for (int j = 0; j < freqB; j++) {
doc.add(new TextField("b", "zoo" + j, Store.NO));
}
}

int freqC = random().nextInt(20) + 1;
for (int j = 0; j < freqC; j++) {
doc.add(new TextField("c", "bla" + j, Store.NO));
}
w.addDocument(doc);
}

IndexReader reader = w.getReader();
IndexSearcher searcher = newSearcher(reader);
searcher.setSimilarity(similarity);

CombinedFieldQuery query =
new CombinedFieldQuery.Builder()
.addField("a", (float) boost1)
.addField("b", (float) boost2)
.addTerm(new BytesRef("foo"))
.addTerm(new BytesRef("zoo"))
.build();

TopScoreDocCollector completeCollector =
TopScoreDocCollector.create(numHits, null, Integer.MAX_VALUE);
searcher.search(query, completeCollector);

reader.close();
w.close();
dir.close();
}

public void testNormsDisabled() throws IOException {
Directory dir = newDirectory();
Similarity similarity = randomCompatibleSimilarity();
Expand Down

0 comments on commit 07ee3ba

Please sign in to comment.