Skip to content

Commit

Permalink
Happened at least once that the value dropped out of the forward index,
Browse files Browse the repository at this point in the history
but the index still contains the object, the unindex broke
  • Loading branch information
Adam Groszer committed Nov 20, 2006
0 parents commit ce4eaf6
Show file tree
Hide file tree
Showing 2 changed files with 242 additions and 0 deletions.
140 changes: 140 additions & 0 deletions field/README.txt
@@ -0,0 +1,140 @@
Field Indexes
=============

Field indexes index orderable values. Note that they don't check for
orderability. That is, all of the values added to the index must be
orderable together. It is up to applications to provide only mutually
orderable values.

>>> from zope.index.field import FieldIndex

>>> index = FieldIndex()
>>> index.index_doc(0, 6)
>>> index.index_doc(1, 26)
>>> index.index_doc(2, 94)
>>> index.index_doc(3, 68)
>>> index.index_doc(4, 30)
>>> index.index_doc(5, 68)
>>> index.index_doc(6, 82)
>>> index.index_doc(7, 30)
>>> index.index_doc(8, 43)
>>> index.index_doc(9, 15)

Fied indexes are searched with apply. The argument is a tuple
with a minimum and maximum value:

>>> index.apply((30, 70))
IFSet([3, 4, 5, 7, 8])

A common mistake is to pass a single value. If anything other than a
tw-tuple is passed, a type error is raised:

>>> index.apply('hi')
Traceback (most recent call last):
...
TypeError: ('two-length tuple expected', 'hi')


Open-ended ranges can be provided by provinding None as an end point:

>>> index.apply((30, None))
IFSet([2, 3, 4, 5, 6, 7, 8])

>>> index.apply((None, 70))
IFSet([0, 1, 3, 4, 5, 7, 8, 9])

>>> index.apply((None, None))
IFSet([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

To do an exact value search, supply equal minimum and maximum values:

>>> index.apply((30, 30))
IFSet([4, 7])

>>> index.apply((70, 70))
IFSet([])

Field indexes support basic statistics:

>>> index.documentCount()
10
>>> index.wordCount()
8

Documents can be reindexed:

>>> index.apply((15, 15))
IFSet([9])
>>> index.index_doc(9, 14)

>>> index.apply((15, 15))
IFSet([])
>>> index.apply((14, 14))
IFSet([9])

Documents can be unindexed:

>>> index.unindex_doc(7)
>>> index.documentCount()
9
>>> index.wordCount()
8
>>> index.unindex_doc(8)
>>> index.documentCount()
8
>>> index.wordCount()
7

>>> index.apply((30, 70))
IFSet([3, 4, 5])

Unindexing a document id that isn't present is ignored:

>>> index.unindex_doc(8)
>>> index.unindex_doc(80)
>>> index.documentCount()
8
>>> index.wordCount()
7

We can also clear the index entirely:

>>> index.clear()
>>> index.documentCount()
0
>>> index.wordCount()
0

>>> index.apply((30, 70))
IFSet([])

Bugfix testing:
---------------
Happened at least once that the value dropped out of the forward index,
but the index still contains the object, the unindex broke

>>> index.index_doc(0, 6)
>>> index.index_doc(1, 26)
>>> index.index_doc(2, 94)
>>> index.index_doc(3, 68)
>>> index.index_doc(4, 30)
>>> index.index_doc(5, 68)
>>> index.index_doc(6, 82)
>>> index.index_doc(7, 30)
>>> index.index_doc(8, 43)
>>> index.index_doc(9, 15)

>>> index.apply((None, None))
IFSet([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

Here is the damage:

>>> del index._fwd_index[68]

Unindex should succeed:

>>> index.unindex_doc(5)
>>> index.unindex_doc(3)

>>> index.apply((None, None))
IFSet([0, 1, 2, 4, 6, 7, 8, 9])
102 changes: 102 additions & 0 deletions field/index.py
@@ -0,0 +1,102 @@
##############################################################################
#
# Copyright (c) 2002 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
"""Field index
$Id$
"""
import persistent

from BTrees.IOBTree import IOBTree
from BTrees.OOBTree import OOBTree
from BTrees.IFBTree import IFTreeSet, IFSet, multiunion
from BTrees.Length import Length

import zope.interface

from zope.index import interfaces

class FieldIndex(persistent.Persistent):

zope.interface.implements(
interfaces.IInjection,
interfaces.IStatistics,
interfaces.IIndexSearch,
)

def __init__(self):
self.clear()

def clear(self):
"""Initialize forward and reverse mappings."""
# The forward index maps indexed values to a sequence of docids
self._fwd_index = OOBTree()
# The reverse index maps a docid to its index value
self._rev_index = IOBTree()
self._num_docs = Length(0)

def documentCount(self):
"""See interface IStatistics"""
return self._num_docs()

def wordCount(self):
"""See interface IStatistics"""
return len(self._fwd_index)

def index_doc(self, docid, value):
"""See interface IInjection"""
rev_index = self._rev_index
if docid in rev_index:
# unindex doc if present
self.unindex_doc(docid)

# Insert into forward index.
set = self._fwd_index.get(value)
if set is None:
set = IFTreeSet()
self._fwd_index[value] = set
set.insert(docid)

# increment doc count
self._num_docs.change(1)

# Insert into reverse index.
rev_index[docid] = value

def unindex_doc(self, docid):
"""See interface IInjection"""
rev_index = self._rev_index
value = rev_index.get(docid)
if value is None:
return # not in index

del rev_index[docid]

try:
set = self._fwd_index[value]
set.remove(docid)
except KeyError:
# This is fishy, but we don't want to raise an error.
# We should probably log something.
# but keep it from throwing a dirty exception
set = 1

if not set:
del self._fwd_index[value]

self._num_docs.change(-1)

def apply(self, query):
if len(query) != 2 or not isinstance(query, tuple):
raise TypeError("two-length tuple expected", query)
return multiunion(self._fwd_index.values(*query))

0 comments on commit ce4eaf6

Please sign in to comment.