Skip to content
This repository has been archived by the owner on May 13, 2020. It is now read-only.

Commit

Permalink
Integration with Zope complete. ZCTextIndex is now a bonafide Plug-in…
Browse files Browse the repository at this point in the history
… index.

Some additional plug-in index APIs were added to ZCTextIndex and support APIs added to Index and Lexicon.

_apply_index does not use NBest since ZCatalog has an incompatible strategy for finding the top results. NBest might be abstracted from this product for general consumption in application code.
  • Loading branch information
caseman committed May 14, 2002
1 parent 2c33691 commit f16cc3b
Show file tree
Hide file tree
Showing 6 changed files with 111 additions and 41 deletions.
10 changes: 9 additions & 1 deletion Index.py
Expand Up @@ -23,6 +23,9 @@
from Products.ZCTextIndex.IIndex import IIndex
from Products.ZCTextIndex import WidCode

import ZODB
from Persistence import Persistent

# Instead of storing floats, we generally store scaled ints. Binary pickles
# can store those more efficiently. The default SCALE_FACTOR of 1024
# is large enough to get about 3 decimal digits of fractional info, and
Expand All @@ -39,7 +42,7 @@ def scaled_int(f, scale=SCALE_FACTOR):
# expensive.
return int(f * scale + 0.5)

class Index:
class Index(Persistent):

__implements__ = IIndex

Expand All @@ -59,6 +62,10 @@ def __init__(self, lexicon):
def length(self):
"""Return the number of documents in the index."""
return len(self._docwords)

def get_words(self, docid):
"""Returns the wordids for a given docid"""
return WidCode.decode(self._docwords[docid])

# Most of the computation for computing a relevance score for the
# document occurs in the search() method. The code currently
Expand Down Expand Up @@ -97,6 +104,7 @@ def index_doc(self, docid, text):
self._add_wordinfo(uniqwids[i], freqs[i], docid)
self._docweight[docid] = docweight
self._add_undoinfo(docid, wids)
return len(wids)

def unindex_doc(self, docid):
for wid in self._get_undoinfo(docid):
Expand Down
4 changes: 4 additions & 0 deletions Lexicon.py
Expand Up @@ -59,6 +59,10 @@ def termToWordIds(self, text):
if wid is not None:
wids.append(wid)
return wids

def get_word(self, wid):
"""Return the word for the given word id"""
return self.__words[wid]

def globToWordIds(self, pattern):
if not re.match("^\w+\*$", pattern):
Expand Down
85 changes: 73 additions & 12 deletions ZCTextIndex.py
Expand Up @@ -21,22 +21,30 @@

from Products.PluginIndexes.common.PluggableIndex \
import PluggableIndexInterface
from Products.PluginIndexes.common.util import parseIndexRequest

from Products.ZCTextIndex.Index import Index
from Products.ZCTextIndex.ILexicon import ILexicon
from Products.ZCTextIndex.Lexicon \
import Lexicon, Splitter, CaseNormalizer, StopWordRemover
from Products.ZCTextIndex.NBest import NBest
from Products.ZCTextIndex.QueryParser import QueryParser
from Globals import DTMLFile
from Globals import DTMLFile, InitializeClass
from Interface import verify_class_implementation
from AccessControl.SecurityInfo import ClassSecurityInfo

class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
"""Persistent TextIndex"""

__implements__ = PluggableIndexInterface

meta_type = 'ZCTextIndex'

manage_options= (
{'label': 'Settings', 'action': 'manage_main'},
)

query_options = ['query', 'nbest']

def __init__(self, id, extra, caller):
self.id = id
Expand All @@ -45,23 +53,46 @@ def __init__(self, id, extra, caller):

if lexicon is None:
raise LookupError, 'Lexicon "%s" not found' % extra.lexicon_id

verify_class_implementation(ILexicon, lexicon.__class__)


if not ILexicon.isImplementedBy(lexicon):
raise ValueError, \
'Object "%s" does not implement lexicon interface' \
% lexicon.getId()

self.lexicon = lexicon
self.index = Index(self.lexicon)
self.parser = QueryParser()

## Pluggable Index APIs ##

def index_object(self, docid, obj):
self.index.index_doc(docid, self._get_object_text(obj))
def index_object(self, docid, obj, threshold=None):
# XXX We currently ignore subtransaction threshold
count = self.index.index_doc(docid, self._get_object_text(obj))
self._p_changed = 1 # XXX
return count

def unindex_object(self, docid):
self.index.unindex_doc(docid)
self._p_changed = 1 # XXX

def _apply_index(self, req):
pass # XXX
def _apply_index(self, request, cid=''):
"""Apply the query specified by request which is a mapping
containing the query
Returns two object on success, the resultSet containing the
matching record numbers and a tuple containing the names of the
fields used
Returns None if request is not valid for this index.
"""
record = parseIndexRequest(request, self.id, self.query_options)
if record.keys==None:
return None
query_str = ' '.join(record.keys)
tree = self.parser.parseQuery(query_str)
results = tree.executeQuery(self.index)
return results, (self._fieldname,)


def query(self, query, nbest=10):
# returns a mapping from docids to scores
Expand All @@ -70,7 +101,20 @@ def query(self, query, nbest=10):
chooser = NBest(nbest)
chooser.addmany(results.items())
return chooser.getbest()


def numObjects(self):
"""Return number of object indexed"""
return self.index.length()

def getEntryForObject(self, documentId, default=None):
"""Return the list of words indexed for documentId"""
try:
word_ids = self.index.get_words(documentId)
except KeyError:
return default
get_word = self.lexicon.get_word
return [get_word(wid) for wid in word_ids]

def _get_object_text(self, obj):
x = getattr(obj, self._fieldname)
if callable(x):
Expand All @@ -82,6 +126,8 @@ def _get_object_text(self, obj):

manage_main = DTMLFile('dtml/manageZCTextIndex', globals())

InitializeClass(ZCTextIndex)

def manage_addZCTextIndex(self, id, extra=None, REQUEST=None,
RESPONSE=None):
"""Add a text index"""
Expand All @@ -93,15 +139,30 @@ def manage_addZCTextIndex(self, id, extra=None, REQUEST=None,
manage_addLexiconForm = DTMLFile('dtml/addLexicon', globals())

def manage_addLexicon(self, id, title, splitter=None, normalizer=None,
stopword=None, REQUEST=None):
stopwords=None, REQUEST=None):
"""Add ZCTextIndex Lexicon"""
elements = []
if splitter:
elements.append(Lexicon.Splitter())
elements.append(Splitter())
if normalizer:
elements.append(CaseNormalizer())
if stopwords:
elements.append(StopWordRemover())
lexicon = Lexicon(*elements)
lexicon = PLexicon(id, title, *elements)
self._setObject(id, lexicon)
if REQUEST is not None:
return self.manage_main(self, REQUEST, update_menu=1)

class PLexicon(Lexicon, Persistent, Acquisition.Implicit, SimpleItem):
"""Persistent Lexcion for ZCTextIndex"""

meta_type = 'ZCTextIndex Lexicon'

def __init__(self, id, title='', *pipeline):
self.id = str(id)
self.title = str(title)
PLexicon.inheritedAttribute('__init__')(self, *pipeline)

InitializeClass(PLexicon)


11 changes: 9 additions & 2 deletions __init__.py
Expand Up @@ -21,8 +21,15 @@ def initialize(context):

context.registerClass(
ZCTextIndex.ZCTextIndex,
permission='Add Pluggable Index',
constructors=(ZCTextIndex.manage_addZCTextIndexForm,
permission = 'Add Pluggable Index',
constructors = (ZCTextIndex.manage_addZCTextIndexForm,
ZCTextIndex.manage_addZCTextIndex),
visibility=None
)

context.registerClass(
ZCTextIndex.PLexicon,
permission = 'Add Vocabularies',
constructors = (ZCTextIndex.manage_addLexiconForm,
ZCTextIndex.manage_addLexicon),
)
6 changes: 3 additions & 3 deletions dtml/addLexicon.dtml
Expand Up @@ -33,7 +33,7 @@
splitter?
</td>
<td align="left" valign="top">
<input type="checkbox" name="splitter" />
<input type="checkbox" name="splitter" checked />
</td>
</tr>

Expand All @@ -43,7 +43,7 @@
case normalizer?
</td>
<td align="left" valign="top">
<input type="checkbox" name="normalizer" />
<input type="checkbox" name="normalizer" checked />
</td>
</tr>

Expand All @@ -53,7 +53,7 @@
remove stop words?
</td>
<td align="left" valign="top">
<input type="checkbox" name="stopword" />
<input type="checkbox" name="stopwords" checked />
</td>
</tr>

Expand Down
36 changes: 13 additions & 23 deletions dtml/addZCTextIndex.dtml
Expand Up @@ -27,23 +27,6 @@ from the most relevant to the least relevant.
</td>
</tr>

<tr>
<td align="left" valign="top">
<div class="form-label">
Vocabulary
</div>
</td>
<td>

<select name="extra.vocabulary:record">
<dtml-in "this().aq_parent.objectItems('Vocabulary')">
<option value="&dtml-sequence-key;">&dtml-sequence-key; (<dtml-var "_['sequence-item'].title">)
</dtml-in>
</select>

</td>
</tr>

<tr>
<td align="left" valign="top">
<div class="form-label">
Expand All @@ -60,12 +43,19 @@ from the most relevant to the least relevant.
Lexicon
</div></td>
<td>
<select name="extra.lexicon_id:record">
<dtml-in "this().aq_parent.objectItems('Lexicon')">
<option value="&dtml-sequence-key;">&dtml-sequence-key; (<dtml-var "_['sequence-item'].title">)
</dtml-in>
</select>

<dtml-in expr="superValues('ZCTextIndex Lexicon')">
<dtml-if sequence-start>
<select name="extra.lexicon_id:record">
</dtml-if>
<option value="&dtml-id;">
&dtml-id; <dtml-var title fmt="(%s)" missing>
</option>
<dtml-if sequence-end>
</select>
</dtml-if>
<dtml-else>
<em>You must create a ZCTextIndex Lexicon first.</em>
</dtml-in>
</td>
</tr>

Expand Down

0 comments on commit f16cc3b

Please sign in to comment.