Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP

Loading…

a few fixes and additions for ZendSearch #8

Open
wants to merge 8 commits into from

2 participants

@luciole75w

Hello,

I use the lucene search module of the Zend Framework on a website for quite a while and i'm happy with that. Thank you for the php version by the way :)

Recently I decided to try out the refactored version of ZF2/ZendSearch to see if there was a feature I needed in the old ZF1 version, support for alternate terms (stems in my case) in exact phrase queries. The documentation (http://framework.zend.com/manual/1.12/en/zend.search.lucene.query-api.html#zend.search.lucene.queries.phrase-query) mentions synonyms at the same place but actually it is not implemented (at least in the latest RC).
So I added the feature and since it's useful for me, I thought it may be useful for others. Before putting it online, I tested the changes with my real site index (about 550 web pages) for both result accuracy and performances, didn't find any regression.

There are also a few other unrelated changes :

  • UTF8 short words token filter (the regular ShortWords filter expects singlebyte characters)
  • fix for a regression in SegmentInfo::termFreqs() (giving possibly wrong scores and php notices)
  • fixes for unit tests giving false positives due to index directory cleanup (at least on my local server : Wamp 2 / PHP 5.4.3)

That's all. I hope you'll find something interesting to merge back in your branch. And if you need further information about my changes, feel free to ask.

Cheers,
Thierry

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Commits on Oct 15, 2013
  1. @luciole75w
  2. @luciole75w
  3. @luciole75w

    added support for alternate terms in exact phrase queries (API)

    luciole75w authored
    - sloppy mode is not supported
  4. @luciole75w

    added unit tests for exact phrase queries with and without alternate …

    luciole75w authored
    …terms
    
    (also a bit of code factorization)
  5. @luciole75w

    fixed errors in SegmentInfo when used with a doc filter

    luciole75w authored
    > termFreqs() regression introduced between ZF 1.11.12 and ZF2/ZendSearch initial commit
Commits on Feb 4, 2014
  1. @moust
Commits on Feb 19, 2014
  1. @moust
Commits on Mar 3, 2014
  1. @luciole75w
This page is out of date. Refresh to see the latest.
View
61 library/ZendSearch/Lucene/Analysis/TokenFilter/ShortWordsUtf8.php
@@ -0,0 +1,61 @@
+<?php
+/**
+ * Zend Framework (http://framework.zend.com/)
+ *
+ * @link http://github.com/zendframework/zf2 for the canonical source repository
+ * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ * @package Zend_Search
+ */
+
+namespace ZendSearch\Lucene\Analysis\TokenFilter;
+
+use ZendSearch\Lucene\Analysis\Token;
+use ZendSearch\Lucene\Exception\ExtensionNotLoadedException;
+
+/**
+ * Token filter that removes short words. What is short word can be configured with constructor.
+ *
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Analysis
+ */
+class ShortWordsUtf8 implements TokenFilterInterface
+{
+ /**
+ * Minimum allowed term length
+ * @var integer
+ */
+ private $length;
+
+ /**
+ * Constructs new instance of this filter.
+ *
+ * @param integer $short minimum allowed length of term which passes this filter (default 2)
+ * @throws \ZendSearch\Lucene\Exception\ExtensionNotLoadedException
+ */
+ public function __construct($length = 2)
+ {
+ $this->length = $length;
+
+ if (!function_exists('mb_strlen')) {
+ // mbstring extension is disabled
+ throw new ExtensionNotLoadedException('Utf8 compatible short words filter needs mbstring extension to be enabled.');
+ }
+ }
+
+ /**
+ * Normalize Token or remove it (if null is returned)
+ *
+ * @param \ZendSearch\Lucene\Analysis\Token $srcToken
+ * @return \ZendSearch\Lucene\Analysis\Token
+ */
+ public function normalize(Token $srcToken)
+ {
+ if (mb_strlen($srcToken->getTermText(), 'UTF-8') < $this->length) {
+ return null;
+ } else {
+ return $srcToken;
+ }
+ }
+}
View
6 library/ZendSearch/Lucene/Index/SegmentInfo.php
@@ -1077,8 +1077,9 @@ public function termFreqs(Term $term, $shift = 0, DocsFilter $docsFilter = null)
}
} else {
$docId += $docDelta/2;
+ $freq = $frqFile->readVInt();
if (isset($filter[$docId])) {
- $result[$shift + $docId] = $frqFile->readVInt();
+ $result[$shift + $docId] = $freq;
$updatedFilterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
}
}
@@ -1099,8 +1100,9 @@ public function termFreqs(Term $term, $shift = 0, DocsFilter $docsFilter = null)
}
} else {
$docId += $docDelta/2;
+ $freq = $frqFile->readVInt();
if (isset($filter[$docId])) {
- $result[$shift + $docId] = $frqFile->readVInt();
+ $result[$shift + $docId] = $freq;
$updatedFilterData[$docId] = 1; // 1 is just some constant value, so we don't need additional var dereference here
}
}
View
4 library/ZendSearch/Lucene/MultiSearcher.php
@@ -132,7 +132,7 @@ public function count()
$count = 0;
foreach ($this->_indices as $index) {
- $count += $this->_indices->count();
+ $count += $index->count();
}
return $count;
@@ -160,7 +160,7 @@ public function numDocs()
$docs = 0;
foreach ($this->_indices as $index) {
- $docs += $this->_indices->numDocs();
+ $docs += $index->numDocs();
}
return $docs;
View
171 library/ZendSearch/Lucene/Search/Query/Phrase.php
@@ -35,6 +35,11 @@ class Phrase extends AbstractQuery
/**
* Term positions (relative positions of terms within the phrase).
+ *
+ * If several terms have the same offset, they will be considered as alternate
+ * terms for the word at this position, thus making stemming easier (for example).
+ * NOTE: This feature is supported only with exact search (i.e. slop = 0).
+ *
* Array of integers
*
* @var array
@@ -143,6 +148,8 @@ public function getSlop()
* Adds a term to the end of the query phrase.
* The relative position of the term is specified explicitly or the one immediately
* after the last term added.
+ * Duplicate offsets can be used to provide several variations for a word (UNSUPPORTED
+ * YET IN SLOPPY MODE).
*
* @param \ZendSearch\Lucene\Index\Term $term
* @param integer $position
@@ -207,10 +214,59 @@ public function rewrite(Lucene\SearchIndexInterface $index)
*/
public function optimize(Lucene\SearchIndexInterface $index)
{
- // Check, that index contains all phrase terms
- foreach ($this->_terms as $term) {
- if (!$index->hasTerm($term)) {
- return new EmptyResult();
+ // now look for possible alternate terms at one or more positions
+ $nbUniqueOffsets = count(array_flip($this->_offsets));
+
+ if (count($this->_offsets) > $nbUniqueOffsets) // alts found
+ {
+ if ($nbUniqueOffsets == 1)
+ {
+ // several terms but all at same offset (for example several stems of a single input word)
+ $optimizedQuery = new MultiTerm($this->_terms, array_fill(0, count($this->_terms), null));
+ $optimizedQuery->setBoost($this->getBoost());
+ return $optimizedQuery->optimize($index);
+ }
+
+ // first, group the query terms according to their offset
+ $offsetAlts = array_fill_keys($this->_offsets, array());
+ foreach ($this->_offsets as $termId => $offset)
+ $offsetAlts[$offset][] = $termId;
+
+ // then for each offset, check that the index contains at least one alt
+ foreach ($offsetAlts as $alts)
+ {
+ $check = false;
+
+ foreach ($alts as $termId)
+ {
+ if ($index->hasTerm($this->_terms[$termId]))
+ {
+ $check = true;
+
+ // PERFORMANCE NOTE
+ // we could break here to save hasTerm() calls (costly) but "usually" the time lost here is less than
+ // what we gain later if we keep processing the alts to unset them if possible (although it heavily
+ // depends on the index content, stemming efficiency and input queries)
+ //break;
+ }
+ else
+ {
+ unset($this->_terms[$termId]);
+ unset($this->_offsets[$termId]);
+ }
+ }
+
+ if (!$check)
+ return new EmptyResult();
+ }
+ }
+ else // only one term per offset
+ {
+ // Check, that index contains all phrase terms
+ foreach ($this->_terms as $term) {
+ if (!$index->hasTerm($term)) {
+ return new EmptyResult();
+ }
}
}
@@ -276,31 +332,61 @@ public function _exactPhraseFreq($docId)
{
$freq = 0;
- // Term Id with lowest cardinality
- $lowCardTermId = null;
+ // offset with the lowest total (for all alts if any) cardinality
+ $lowCardOffset = 0;
+ $lowCard = PHP_INT_MAX;
- // Calculate $lowCardTermId
- foreach ($this->_terms as $termId => $term) {
- if ($lowCardTermId === null ||
- count($this->_termsPositions[$termId][$docId]) <
- count($this->_termsPositions[$lowCardTermId][$docId]) ) {
- $lowCardTermId = $termId;
- }
- }
+ // group the terms according to their offset, also filtering alts not found in this doc
+ $docAlts = array_fill_keys($this->_offsets, array());
+ foreach ($this->_offsets as $termId => $offset)
+ if (isset($this->_termsPositions[$termId][$docId]))
+ $docAlts[$offset][] = $termId;
- // Walk through positions of the term with lowest cardinality
- foreach ($this->_termsPositions[$lowCardTermId][$docId] as $lowCardPos) {
- // We expect phrase to be found
- $freq++;
+ // look for the offset where total cardinality is the lowest
+ foreach ($docAlts as $offset => $alts)
+ {
+ $card = 0;
- // Walk through other terms
- foreach ($this->_terms as $termId => $term) {
- if ($termId != $lowCardTermId) {
- $expectedPosition = $lowCardPos +
- ($this->_offsets[$termId] -
- $this->_offsets[$lowCardTermId]);
+ foreach ($alts as $termId)
+ $card += count($this->_termsPositions[$termId][$docId]);
+
+ if ($card < $lowCard)
+ {
+ $lowCardOffset = $offset;
+ $lowCard = $card;
+ }
+ }
- if (!in_array($expectedPosition, $this->_termsPositions[$termId][$docId])) {
+ // split the term list
+ $lowCardAlts = $docAlts[$lowCardOffset];
+ unset($docAlts[$lowCardOffset]);
+
+ // Walk through positions of all the alts at the offset with lowest cardinality
+ foreach ($lowCardAlts as $lowCardTermId)
+ {
+ foreach ($this->_termsPositions[$lowCardTermId][$docId] as $lowCardPos)
+ {
+ // We expect phrase to be found
+ $freq++;
+
+ // Walk through other terms
+ foreach ($docAlts as $offset => $alts)
+ {
+ // at least one alt must fulfill each remaining position (other than lowCardPos)
+ $expectedPosition = $lowCardPos + $offset - $lowCardOffset;
+ $match = false;
+
+ foreach ($alts as $termId)
+ {
+ if (in_array($expectedPosition, $this->_termsPositions[$termId][$docId]))
+ {
+ $match = true;
+ break;
+ }
+ }
+
+ if (!$match)
+ {
$freq--; // Phrase wasn't found.
break;
}
@@ -402,16 +488,32 @@ public function execute(Lucene\SearchIndexInterface $reader, $docsFilter = null)
$this->_resVector = array();
}
+ $offsetDocs = array();
+
+ // merge docs ids matching terms at the same offset
+ foreach ($this->_terms as $termId => $term)
+ {
+ $offset = $this->_offsets[$termId];
+
+ if (isset($offsetDocs[$offset]))
+ $offsetDocs[$offset] = array_merge($offsetDocs[$offset], $reader->termDocs($term));
+ else
+ $offsetDocs[$offset] = $reader->termDocs($term);
+
+ $this->_termsPositions[$termId] = $reader->termPositions($term);
+ }
+
$resVectors = array();
$resVectorsSizes = array();
$resVectorsIds = array(); // is used to prevent arrays comparison
- foreach ($this->_terms as $termId => $term) {
- $resVectors[] = array_flip($reader->termDocs($term));
- $resVectorsSizes[] = count(end($resVectors));
- $resVectorsIds[] = $termId;
- $this->_termsPositions[$termId] = $reader->termPositions($term);
+ foreach ($offsetDocs as $offset => $docs)
+ {
+ $resVectors[] = array_flip($docs); // also deal with duplicates
+ $resVectorsSizes[] = count(end($resVectors));
+ $resVectorsIds[] = $offset;
}
+
// sort resvectors in order of subquery cardinality increasing
array_multisort($resVectorsSizes, SORT_ASC, SORT_NUMERIC,
$resVectorsIds, SORT_ASC, SORT_NUMERIC,
@@ -422,7 +524,6 @@ public function execute(Lucene\SearchIndexInterface $reader, $docsFilter = null)
$this->_resVector = $nextResVector;
} else {
//$this->_resVector = array_intersect_key($this->_resVector, $nextResVector);
-
/**
* This code is used as workaround for array_intersect_key() slowness problem.
*/
@@ -433,11 +534,11 @@ public function execute(Lucene\SearchIndexInterface $reader, $docsFilter = null)
}
}
$this->_resVector = $updatedVector;
- }
- if (count($this->_resVector) == 0) {
- // Empty result set, we don't need to check other terms
- break;
+ if (count($this->_resVector) == 0) {
+ // Empty result set, we don't need to check other terms
+ break;
+ }
}
}
View
6 tests/ZendSearch/Lucene/DocumentTest.php
@@ -176,7 +176,13 @@ public function testHtmlInlineTagsIndexing()
$hits = $index->find('ZendFramework');
$this->assertEquals(count($hits), 1);
+ // IMPORTANT : if we want to clean the directory, the instance of Index has to be actually destroyed first,
+ // so that it releases its file locks. In case of additional indirect references, we need a manual cycle
+ // of garbage collection to flush the pending objects.
unset($index);
+ unset($hits); // QueryHit instances hold a reference on their owner Index instance
+ gc_collect_cycles(); // force the destructors to be called right now
+
$this->_clearDirectory(__DIR__ . '/_index/_files');
}
View
30 tests/ZendSearch/Lucene/IndexTest.php
@@ -22,11 +22,6 @@
*/
class IndexTest extends \PHPUnit_Framework_TestCase
{
- public function tearDown()
- {
- $this->_clearDirectory(__DIR__ . '/_index/_files');
- }
-
private function _clearDirectory($dirName)
{
if (!file_exists($dirName) || !is_dir($dirName)) {
@@ -230,8 +225,6 @@ public function testDelete()
mkdir($tempIndexDir);
}
- $this->_clearDirectory($tempIndexDir);
-
$indexDir = opendir($sampleIndexDir);
while (($file = readdir($indexDir)) !== false) {
if (!is_dir($sampleIndexDir . '/' . $file)) {
@@ -254,6 +247,8 @@ public function testDelete()
$index1 = Lucene\Lucene::open($tempIndexDir);
$this->assertTrue($index1->isDeleted(2));
unset($index1);
+
+ $this->_clearDirectory($tempIndexDir);
}
public function testAddDocument()
@@ -296,6 +291,9 @@ public function testAddDocument()
$index1 = Lucene\Lucene::open(__DIR__ . '/_index/_files');
$this->assertTrue($index1 instanceof Lucene\SearchIndexInterface);
+
+ unset($index1);
+ $this->_clearDirectory(__DIR__ . '/_index/_files');
}
public function testOptimize()
@@ -352,6 +350,12 @@ public function testOptimize()
$hits = $index2->find('submitting');
$this->assertEquals(count($hits), 3);
+
+ // cf. comment about destruction steps @ testHtmlInlineTagsIndexing() in DocumentTest.php
+ unset($index2);
+ unset($hits);
+ gc_collect_cycles();
+ $this->_clearDirectory(__DIR__ . '/_index/_files');
}
public function testTerms()
@@ -435,6 +439,9 @@ public function testTermsStreamInterfaceSkipToTermsRetrievingZeroTermsCase()
$this->assertTrue($index->currentTerm() === null);
$index->closeTermsStream();
+
+ unset($index);
+ $this->_clearDirectory(__DIR__ . '/_index/_files');
}
public function testTermsStreamInterfaceSkipToTermsRetrievingOneTermsCase()
@@ -457,6 +464,9 @@ public function testTermsStreamInterfaceSkipToTermsRetrievingOneTermsCase()
$this->assertTrue($index->currentTerm() === null);
$index->closeTermsStream();
+
+ unset($index);
+ $this->_clearDirectory(__DIR__ . '/_index/_files');
}
public function testTermsStreamInterfaceSkipToTermsRetrievingTwoTermsCase()
@@ -479,6 +489,9 @@ public function testTermsStreamInterfaceSkipToTermsRetrievingTwoTermsCase()
$this->assertTrue($index->currentTerm() == new Index\Term('word', 'contents'));
$index->closeTermsStream();
+
+ unset($index);
+ $this->_clearDirectory(__DIR__ . '/_index/_files');
}
/**
@@ -494,5 +507,8 @@ public function testIsDeletedWithoutExplicitCommit()
$index->addDocument($document);
$this->assertFalse($index->isDeleted(0));
+
+ unset($index);
+ $this->_clearDirectory(__DIR__ . '/_index/_files');
}
}
View
160 tests/ZendSearch/Lucene/Search23Test.php
@@ -23,6 +23,18 @@
*/
class Search23Test extends \PHPUnit_Framework_TestCase
{
+ private function checkResults($hits, $expected)
+ {
+ $this->assertEquals(count($hits), count($expected));
+
+ foreach ($hits as $resId => $hit)
+ {
+ $this->assertEquals($hit->id, $expected[$resId][0]);
+ $this->assertTrue( abs($hit->score - $expected[$resId][1]) < 0.000001 );
+ $this->assertEquals($hit->path, $expected[$resId][2]);
+ }
+ }
+
public function testQueryParser()
{
$wildcardMinPrefix = Query\Wildcard::getMinPrefixLength();
@@ -158,16 +170,11 @@ public function testTermQuery()
$hits = $index->find('submitting');
- $this->assertEquals(count($hits), 3);
$expectedResultset = array(array(2, 0.114555, 'IndexSource/contributing.patches.html'),
array(7, 0.112241, 'IndexSource/contributing.bugs.html'),
array(8, 0.112241, 'IndexSource/contributing.html'));
- foreach ($hits as $resId => $hit) {
- $this->assertEquals($hit->id, $expectedResultset[$resId][0]);
- $this->assertTrue( abs($hit->score - $expectedResultset[$resId][1]) < 0.000001 );
- $this->assertEquals($hit->path, $expectedResultset[$resId][2]);
- }
+ $this->checkResults($hits, $expectedResultset);
}
public function testMultiTermQuery()
@@ -176,30 +183,23 @@ public function testMultiTermQuery()
$hits = $index->find('submitting AND wishlists');
- $this->assertEquals(count($hits), 1);
+ $expectedResultset = array(array(8, 0.141633, 'IndexSource/contributing.html'));
- $this->assertEquals($hits[0]->id, 8);
- $this->assertTrue( abs($hits[0]->score - 0.141633) < 0.000001 );
- $this->assertEquals($hits[0]->path, 'IndexSource/contributing.html');
+ $this->checkResults($hits, $expectedResultset);
}
- public function testPraseQuery()
+ public function testPhraseQuery()
{
$index = Lucene\Lucene::open(__DIR__ . '/_index23Sample/_files');
$hits = $index->find('"reporting bugs"');
- $this->assertEquals(count($hits), 4);
$expectedResultset = array(array(0, 0.247795, 'IndexSource/contributing.documentation.html'),
array(7, 0.212395, 'IndexSource/contributing.bugs.html'),
array(8, 0.212395, 'IndexSource/contributing.html'),
array(2, 0.176996, 'IndexSource/contributing.patches.html'));
- foreach ($hits as $resId => $hit) {
- $this->assertEquals($hit->id, $expectedResultset[$resId][0]);
- $this->assertTrue( abs($hit->score - $expectedResultset[$resId][1]) < 0.000001 );
- $this->assertEquals($hit->path, $expectedResultset[$resId][2]);
- }
+ $this->checkResults($hits, $expectedResultset);
}
public function testQueryParserKeywordsHandlingPhrase()
@@ -217,14 +217,9 @@ public function testQueryParserKeywordsHandlingPhrase()
$hits = $index->find('"IndexSource/contributing.bugs.html"');
- $this->assertEquals(count($hits), 1);
$expectedResultset = array(array(7, 1, 'IndexSource/contributing.bugs.html'));
- foreach ($hits as $resId => $hit) {
- $this->assertEquals($hit->id, $expectedResultset[$resId][0]);
- $this->assertTrue( abs($hit->score - $expectedResultset[$resId][1]) < 0.000001 );
- $this->assertEquals($hit->path, $expectedResultset[$resId][2]);
- }
+ $this->checkResults($hits, $expectedResultset);
}
public function testQueryParserKeywordsHandlingTerm()
@@ -242,7 +237,6 @@ public function testQueryParserKeywordsHandlingTerm()
$hits = $index->find('IndexSource\/contributing\.wishlist\.html AND Home');
- $this->assertEquals(count($hits), 9);
$expectedResultset = array(array(1, 1.000000, 'IndexSource/contributing.wishlist.html'),
array(8, 0.167593, 'IndexSource/contributing.html'),
array(0, 0.154047, 'IndexSource/contributing.documentation.html'),
@@ -253,11 +247,7 @@ public function testQueryParserKeywordsHandlingTerm()
array(5, 0.038530, 'IndexSource/authors.html'),
array(4, 0.036261, 'IndexSource/copyright.html'));
- foreach ($hits as $resId => $hit) {
- $this->assertEquals($hit->id, $expectedResultset[$resId][0]);
- $this->assertTrue( abs($hit->score - $expectedResultset[$resId][1]) < 0.000001 );
- $this->assertEquals($hit->path, $expectedResultset[$resId][2]);
- }
+ $this->checkResults($hits, $expectedResultset);
}
public function testBooleanQuery()
@@ -266,15 +256,10 @@ public function testBooleanQuery()
$hits = $index->find('submitting AND (wishlists OR requirements)');
- $this->assertEquals(count($hits), 2);
$expectedResultset = array(array(7, 0.095697, 'IndexSource/contributing.bugs.html'),
array(8, 0.075573, 'IndexSource/contributing.html'));
- foreach ($hits as $resId => $hit) {
- $this->assertEquals($hit->id, $expectedResultset[$resId][0]);
- $this->assertTrue( abs($hit->score - $expectedResultset[$resId][1]) < 0.000001 );
- $this->assertEquals($hit->path, $expectedResultset[$resId][2]);
- }
+ $this->checkResults($hits, $expectedResultset);
}
public function testBooleanQueryWithPhraseSubquery()
@@ -283,14 +268,9 @@ public function testBooleanQueryWithPhraseSubquery()
$hits = $index->find('"PEAR developers" AND Home');
- $this->assertEquals(count($hits), 1);
$expectedResultset = array(array(1, 0.168270, 'IndexSource/contributing.wishlist.html'));
- foreach ($hits as $resId => $hit) {
- $this->assertEquals($hit->id, $expectedResultset[$resId][0]);
- $this->assertTrue( abs($hit->score - $expectedResultset[$resId][1]) < 0.000001 );
- $this->assertEquals($hit->path, $expectedResultset[$resId][2]);
- }
+ $this->checkResults($hits, $expectedResultset);
}
public function testBooleanQueryWithNonExistingPhraseSubquery()
@@ -314,14 +294,9 @@ public function testFilteredTokensQueryParserProcessing()
$hits = $index->find('"PEAR developers" AND Home AND 123456787654321');
- $this->assertEquals(count($hits), 1);
$expectedResultset = array(array(1, 0.168270, 'IndexSource/contributing.wishlist.html'));
- foreach ($hits as $resId => $hit) {
- $this->assertEquals($hit->id, $expectedResultset[$resId][0]);
- $this->assertTrue( abs($hit->score - $expectedResultset[$resId][1]) < 0.000001 );
- $this->assertEquals($hit->path, $expectedResultset[$resId][2]);
- }
+ $this->checkResults($hits, $expectedResultset);
}
public function testWildcardQuery()
@@ -333,7 +308,6 @@ public function testWildcardQuery()
$hits = $index->find('*cont*');
- $this->assertEquals(count($hits), 9);
$expectedResultset = array(array(8, 0.328087, 'IndexSource/contributing.html'),
array(2, 0.318592, 'IndexSource/contributing.patches.html'),
array(7, 0.260137, 'IndexSource/contributing.bugs.html'),
@@ -344,11 +318,7 @@ public function testWildcardQuery()
array(5, 0.010150, 'IndexSource/authors.html'),
array(9, 0.003504, 'IndexSource/core.html'));
- foreach ($hits as $resId => $hit) {
- $this->assertEquals($hit->id, $expectedResultset[$resId][0]);
- $this->assertTrue( abs($hit->score - $expectedResultset[$resId][1]) < 0.000001 );
- $this->assertEquals($hit->path, $expectedResultset[$resId][2]);
- }
+ $this->checkResults($hits, $expectedResultset);
Query\Wildcard::setMinPrefixLength($wildcardMinPrefix);
}
@@ -362,7 +332,6 @@ public function testFuzzyQuery()
$hits = $index->find('tesd~0.4');
- $this->assertEquals(count($hits), 9);
$expectedResultset = array(array(2, 0.037139, 'IndexSource/contributing.patches.html'),
array(0, 0.008735, 'IndexSource/contributing.documentation.html'),
array(7, 0.002449, 'IndexSource/contributing.bugs.html'),
@@ -373,11 +342,7 @@ public function testFuzzyQuery()
array(8, 0.000414, 'IndexSource/contributing.html'),
array(4, 0.000345, 'IndexSource/copyright.html'));
- foreach ($hits as $resId => $hit) {
- $this->assertEquals($hit->id, $expectedResultset[$resId][0]);
- $this->assertTrue( abs($hit->score - $expectedResultset[$resId][1]) < 0.000001 );
- $this->assertEquals($hit->path, $expectedResultset[$resId][2]);
- }
+ $this->checkResults($hits, $expectedResultset);
Query\Fuzzy::setDefaultPrefixLength($defaultPrefixLength);
}
@@ -388,18 +353,13 @@ public function testInclusiveRangeQuery()
$hits = $index->find('[xml TO zzzzz]');
- $this->assertEquals(count($hits), 5);
$expectedResultset = array(array(4, 0.156366, 'IndexSource/copyright.html'),
array(2, 0.080458, 'IndexSource/contributing.patches.html'),
array(7, 0.060214, 'IndexSource/contributing.bugs.html'),
array(1, 0.009687, 'IndexSource/contributing.wishlist.html'),
array(5, 0.005871, 'IndexSource/authors.html'));
- foreach ($hits as $resId => $hit) {
- $this->assertEquals($hit->id, $expectedResultset[$resId][0]);
- $this->assertTrue( abs($hit->score - $expectedResultset[$resId][1]) < 0.000001 );
- $this->assertEquals($hit->path, $expectedResultset[$resId][2]);
- }
+ $this->checkResults($hits, $expectedResultset);
}
public function testNonInclusiveRangeQuery()
@@ -408,18 +368,13 @@ public function testNonInclusiveRangeQuery()
$hits = $index->find('{xml TO zzzzz}');
- $this->assertEquals(count($hits), 5);
$expectedResultset = array(array(2, 0.1308671, 'IndexSource/contributing.patches.html'),
array(7, 0.0979391, 'IndexSource/contributing.bugs.html'),
array(4, 0.0633930, 'IndexSource/copyright.html'),
array(1, 0.0157556, 'IndexSource/contributing.wishlist.html'),
array(5, 0.0095493, 'IndexSource/authors.html'));
- foreach ($hits as $resId => $hit) {
- $this->assertEquals($hit->id, $expectedResultset[$resId][0]);
- $this->assertTrue( abs($hit->score - $expectedResultset[$resId][1]) < 0.000001 );
- $this->assertEquals($hit->path, $expectedResultset[$resId][2]);
- }
+ $this->checkResults($hits, $expectedResultset);
}
public function testDefaultSearchField()
@@ -431,18 +386,13 @@ public function testDefaultSearchField()
Lucene\Lucene::setDefaultSearchField('path');
$hits = $index->find('contributing');
- $this->assertEquals(count($hits), 5);
$expectedResultset = array(array(8, 0.847922, 'IndexSource/contributing.html'),
array(0, 0.678337, 'IndexSource/contributing.documentation.html'),
array(1, 0.678337, 'IndexSource/contributing.wishlist.html'),
array(2, 0.678337, 'IndexSource/contributing.patches.html'),
array(7, 0.678337, 'IndexSource/contributing.bugs.html'));
- foreach ($hits as $resId => $hit) {
- $this->assertEquals($hit->id, $expectedResultset[$resId][0]);
- $this->assertTrue( abs($hit->score - $expectedResultset[$resId][1]) < 0.000001 );
- $this->assertEquals($hit->path, $expectedResultset[$resId][2]);
- }
+ $this->checkResults($hits, $expectedResultset);
Lucene\Lucene::setDefaultSearchField($storedDefaultSearchField);
}
@@ -491,17 +441,12 @@ public function testSortingResult()
$hits = $index->find('"reporting bugs"', 'path');
- $this->assertEquals(count($hits), 4);
$expectedResultset = array(array(7, 0.212395, 'IndexSource/contributing.bugs.html'),
array(0, 0.247795, 'IndexSource/contributing.documentation.html'),
array(8, 0.212395, 'IndexSource/contributing.html'),
array(2, 0.176996, 'IndexSource/contributing.patches.html'));
- foreach ($hits as $resId => $hit) {
- $this->assertEquals($hit->id, $expectedResultset[$resId][0]);
- $this->assertTrue( abs($hit->score - $expectedResultset[$resId][1]) < 0.000001 );
- $this->assertEquals($hit->path, $expectedResultset[$resId][2]);
- }
+ $this->checkResults($hits, $expectedResultset);
}
public function testSortingResultByScore()
@@ -510,31 +455,21 @@ public function testSortingResultByScore()
$hits = $index->find('"reporting bugs"', 'score', SORT_NUMERIC, SORT_ASC,
'path', SORT_STRING, SORT_ASC);
- $this->assertEquals(count($hits), 4);
$expectedResultset = array(array(2, 0.176996, 'IndexSource/contributing.patches.html'),
array(7, 0.212395, 'IndexSource/contributing.bugs.html'),
array(8, 0.212395, 'IndexSource/contributing.html'),
array(0, 0.247795, 'IndexSource/contributing.documentation.html'));
- foreach ($hits as $resId => $hit) {
- $this->assertEquals($hit->id, $expectedResultset[$resId][0]);
- $this->assertTrue( abs($hit->score - $expectedResultset[$resId][1]) < 0.000001 );
- $this->assertEquals($hit->path, $expectedResultset[$resId][2]);
- }
+ $this->checkResults($hits, $expectedResultset);
$hits = $index->find('"reporting bugs"', 'score', SORT_NUMERIC, SORT_ASC,
'path', SORT_STRING, SORT_DESC);
- $this->assertEquals(count($hits), 4);
$expectedResultset = array(array(2, 0.176996, 'IndexSource/contributing.patches.html'),
array(8, 0.212395, 'IndexSource/contributing.html'),
array(7, 0.212395, 'IndexSource/contributing.bugs.html'),
array(0, 0.247795, 'IndexSource/contributing.documentation.html'));
- foreach ($hits as $resId => $hit) {
- $this->assertEquals($hit->id, $expectedResultset[$resId][0]);
- $this->assertTrue( abs($hit->score - $expectedResultset[$resId][1]) < 0.000001 );
- $this->assertEquals($hit->path, $expectedResultset[$resId][2]);
- }
+ $this->checkResults($hits, $expectedResultset);
}
public function testLimitingResult()
@@ -547,17 +482,40 @@ public function testLimitingResult()
$hits = $index->find('"reporting bugs"', 'path');
- $this->assertEquals(count($hits), 3);
$expectedResultset = array(array(7, 0.212395, 'IndexSource/contributing.bugs.html'),
array(0, 0.247795, 'IndexSource/contributing.documentation.html'),
array(2, 0.176996, 'IndexSource/contributing.patches.html'));
- foreach ($hits as $resId => $hit) {
- $this->assertEquals($hit->id, $expectedResultset[$resId][0]);
- $this->assertTrue( abs($hit->score - $expectedResultset[$resId][1]) < 0.000001 );
- $this->assertEquals($hit->path, $expectedResultset[$resId][2]);
- }
+ $this->checkResults($hits, $expectedResultset);
Lucene\Lucene::setResultSetLimit($storedResultSetLimit);
}
+
+ public function testQueryAPIPhraseExact()
+ {
+ $index = Lucene\Lucene::open(__DIR__ . '/_index23Sample/_files');
+
+ $query = new Query\Phrase(array('latest', 'sources', 'of', 'the', 'package'));
+
+ $hits = $index->find($query);
+
+ $expectedResultset = array(array(2, 0.333882, 'IndexSource/contributing.patches.html'));
+
+ $this->checkResults($hits, $expectedResultset);
+ }
+
+ public function testQueryAPIPhraseExactWithAlts()
+ {
+ $index = Lucene\Lucene::open(__DIR__ . '/_index23Sample/_files');
+
+ $query = new Query\Phrase(array('latest', 'version', 'sources', 'of', 'the', 'package'),
+ array( 0, 1, 1, 2, 3, 4));
+
+ $hits = $index->find($query);
+
+ $expectedResultset = array(array(7, 0.626589, 'IndexSource/contributing.bugs.html'),
+ array(2, 0.369221, 'IndexSource/contributing.patches.html'));
+
+ $this->checkResults($hits, $expectedResultset);
+ }
}
Something went wrong with that request. Please try again.