Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Happened at least once that the value dropped out of the forward index,
but the index still contains the object, the unindex broke
- Loading branch information
Adam Groszer
committed
Nov 20, 2006
0 parents
commit ce4eaf6
Showing
2 changed files
with
242 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,140 @@ | ||
Field Indexes | ||
============= | ||
|
||
Field indexes index orderable values. Note that they don't check for | ||
orderability. That is, all of the values added to the index must be | ||
orderable together. It is up to applications to provide only mutually | ||
orderable values. | ||
|
||
>>> from zope.index.field import FieldIndex | ||
|
||
>>> index = FieldIndex() | ||
>>> index.index_doc(0, 6) | ||
>>> index.index_doc(1, 26) | ||
>>> index.index_doc(2, 94) | ||
>>> index.index_doc(3, 68) | ||
>>> index.index_doc(4, 30) | ||
>>> index.index_doc(5, 68) | ||
>>> index.index_doc(6, 82) | ||
>>> index.index_doc(7, 30) | ||
>>> index.index_doc(8, 43) | ||
>>> index.index_doc(9, 15) | ||
|
||
Fied indexes are searched with apply. The argument is a tuple | ||
with a minimum and maximum value: | ||
|
||
>>> index.apply((30, 70)) | ||
IFSet([3, 4, 5, 7, 8]) | ||
|
||
A common mistake is to pass a single value. If anything other than a | ||
tw-tuple is passed, a type error is raised: | ||
|
||
>>> index.apply('hi') | ||
Traceback (most recent call last): | ||
... | ||
TypeError: ('two-length tuple expected', 'hi') | ||
|
||
|
||
Open-ended ranges can be provided by provinding None as an end point: | ||
|
||
>>> index.apply((30, None)) | ||
IFSet([2, 3, 4, 5, 6, 7, 8]) | ||
|
||
>>> index.apply((None, 70)) | ||
IFSet([0, 1, 3, 4, 5, 7, 8, 9]) | ||
|
||
>>> index.apply((None, None)) | ||
IFSet([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) | ||
|
||
To do an exact value search, supply equal minimum and maximum values: | ||
|
||
>>> index.apply((30, 30)) | ||
IFSet([4, 7]) | ||
|
||
>>> index.apply((70, 70)) | ||
IFSet([]) | ||
|
||
Field indexes support basic statistics: | ||
|
||
>>> index.documentCount() | ||
10 | ||
>>> index.wordCount() | ||
8 | ||
|
||
Documents can be reindexed: | ||
|
||
>>> index.apply((15, 15)) | ||
IFSet([9]) | ||
>>> index.index_doc(9, 14) | ||
|
||
>>> index.apply((15, 15)) | ||
IFSet([]) | ||
>>> index.apply((14, 14)) | ||
IFSet([9]) | ||
|
||
Documents can be unindexed: | ||
|
||
>>> index.unindex_doc(7) | ||
>>> index.documentCount() | ||
9 | ||
>>> index.wordCount() | ||
8 | ||
>>> index.unindex_doc(8) | ||
>>> index.documentCount() | ||
8 | ||
>>> index.wordCount() | ||
7 | ||
|
||
>>> index.apply((30, 70)) | ||
IFSet([3, 4, 5]) | ||
|
||
Unindexing a document id that isn't present is ignored: | ||
|
||
>>> index.unindex_doc(8) | ||
>>> index.unindex_doc(80) | ||
>>> index.documentCount() | ||
8 | ||
>>> index.wordCount() | ||
7 | ||
|
||
We can also clear the index entirely: | ||
|
||
>>> index.clear() | ||
>>> index.documentCount() | ||
0 | ||
>>> index.wordCount() | ||
0 | ||
|
||
>>> index.apply((30, 70)) | ||
IFSet([]) | ||
|
||
Bugfix testing: | ||
--------------- | ||
Happened at least once that the value dropped out of the forward index, | ||
but the index still contains the object, the unindex broke | ||
|
||
>>> index.index_doc(0, 6) | ||
>>> index.index_doc(1, 26) | ||
>>> index.index_doc(2, 94) | ||
>>> index.index_doc(3, 68) | ||
>>> index.index_doc(4, 30) | ||
>>> index.index_doc(5, 68) | ||
>>> index.index_doc(6, 82) | ||
>>> index.index_doc(7, 30) | ||
>>> index.index_doc(8, 43) | ||
>>> index.index_doc(9, 15) | ||
|
||
>>> index.apply((None, None)) | ||
IFSet([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) | ||
|
||
Here is the damage: | ||
|
||
>>> del index._fwd_index[68] | ||
|
||
Unindex should succeed: | ||
|
||
>>> index.unindex_doc(5) | ||
>>> index.unindex_doc(3) | ||
|
||
>>> index.apply((None, None)) | ||
IFSet([0, 1, 2, 4, 6, 7, 8, 9]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
############################################################################## | ||
# | ||
# Copyright (c) 2002 Zope Corporation and Contributors. | ||
# All Rights Reserved. | ||
# | ||
# This software is subject to the provisions of the Zope Public License, | ||
# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution. | ||
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED | ||
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | ||
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS | ||
# FOR A PARTICULAR PURPOSE | ||
# | ||
############################################################################## | ||
"""Field index | ||
$Id$ | ||
""" | ||
import persistent | ||
|
||
from BTrees.IOBTree import IOBTree | ||
from BTrees.OOBTree import OOBTree | ||
from BTrees.IFBTree import IFTreeSet, IFSet, multiunion | ||
from BTrees.Length import Length | ||
|
||
import zope.interface | ||
|
||
from zope.index import interfaces | ||
|
||
class FieldIndex(persistent.Persistent): | ||
|
||
zope.interface.implements( | ||
interfaces.IInjection, | ||
interfaces.IStatistics, | ||
interfaces.IIndexSearch, | ||
) | ||
|
||
def __init__(self): | ||
self.clear() | ||
|
||
def clear(self): | ||
"""Initialize forward and reverse mappings.""" | ||
# The forward index maps indexed values to a sequence of docids | ||
self._fwd_index = OOBTree() | ||
# The reverse index maps a docid to its index value | ||
self._rev_index = IOBTree() | ||
self._num_docs = Length(0) | ||
|
||
def documentCount(self): | ||
"""See interface IStatistics""" | ||
return self._num_docs() | ||
|
||
def wordCount(self): | ||
"""See interface IStatistics""" | ||
return len(self._fwd_index) | ||
|
||
def index_doc(self, docid, value): | ||
"""See interface IInjection""" | ||
rev_index = self._rev_index | ||
if docid in rev_index: | ||
# unindex doc if present | ||
self.unindex_doc(docid) | ||
|
||
# Insert into forward index. | ||
set = self._fwd_index.get(value) | ||
if set is None: | ||
set = IFTreeSet() | ||
self._fwd_index[value] = set | ||
set.insert(docid) | ||
|
||
# increment doc count | ||
self._num_docs.change(1) | ||
|
||
# Insert into reverse index. | ||
rev_index[docid] = value | ||
|
||
def unindex_doc(self, docid): | ||
"""See interface IInjection""" | ||
rev_index = self._rev_index | ||
value = rev_index.get(docid) | ||
if value is None: | ||
return # not in index | ||
|
||
del rev_index[docid] | ||
|
||
try: | ||
set = self._fwd_index[value] | ||
set.remove(docid) | ||
except KeyError: | ||
# This is fishy, but we don't want to raise an error. | ||
# We should probably log something. | ||
# but keep it from throwing a dirty exception | ||
set = 1 | ||
|
||
if not set: | ||
del self._fwd_index[value] | ||
|
||
self._num_docs.change(-1) | ||
|
||
def apply(self, query): | ||
if len(query) != 2 or not isinstance(query, tuple): | ||
raise TypeError("two-length tuple expected", query) | ||
return multiunion(self._fwd_index.values(*query)) |