Skip to content
This repository has been archived by the owner on May 13, 2020. It is now read-only.

Commit

Permalink
- converted ILexicon to z3 and bridged it back
Browse files Browse the repository at this point in the history
- ZCTextIndex now accepts lexicons with the z3 interface
  • Loading branch information
Unknown committed Oct 31, 2005
1 parent ab48ff3 commit 76c1837
Show file tree
Hide file tree
Showing 6 changed files with 124 additions and 78 deletions.
69 changes: 11 additions & 58 deletions ILexicon.py
Expand Up @@ -8,68 +8,21 @@
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
"""Lexicon z2 interfaces.
from Interface import Interface
$Id$
"""

class ILexicon(Interface):
"""Object responsible for converting text to word identifiers."""

def termToWordIds(text):
"""Return a sequence of ids of the words parsed from the text.
# create ILexicon
from Interface.bridge import createZope3Bridge
from interfaces import ILexicon as z3ILexicon
import ILexicon

The input text may be either a string or a list of strings.
createZope3Bridge(z3ILexicon, ILexicon, 'ILexicon')

Parse the text as if they are search terms, and skips words
that aren't in the lexicon.
"""

def sourceToWordIds(text):
"""Return a sequence of ids of the words parsed from the text.
The input text may be either a string or a list of strings.
Parse the text as if they come from a source document, and
creates new word ids for words that aren't (yet) in the
lexicon.
"""

def globToWordIds(pattern):
"""Return a sequence of ids of words matching the pattern.
The argument should be a single word using globbing syntax,
e.g. 'foo*' meaning anything starting with 'foo'.
Return the wids for all words in the lexicon that match the
pattern.
"""

def length():
"""Return the number of unique term in the lexicon."""

def get_word(wid):
"""Return the word for the given word id.
Raise KeyError if the word id is not in the lexicon.
"""

def get_wid(word):
"""Return the wird id for the given word.
Return 0 of the word is not in the lexicon.
"""

def parseTerms(text):
"""Pass the text through the pipeline.
Return a list of words, normalized by the pipeline
(e.g. stopwords removed, case normalized etc.).
"""

def isGlob(word):
"""Return true if the word is a globbing pattern.
The word should be one of the words returned by parseTerm().
"""
del createZope3Bridge
del z3ILexicon
14 changes: 11 additions & 3 deletions Lexicon.py
Expand Up @@ -8,9 +8,13 @@
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
"""Lexicon.
$Id$
"""

import re

Expand All @@ -20,15 +24,19 @@

import ZODB
from Persistence import Persistent
from zope.interface import implements

from Products.ZCTextIndex.ILexicon import ILexicon
from Products.ZCTextIndex.StopDict import get_stopdict
from Products.ZCTextIndex.ParseTree import QueryError
from Products.ZCTextIndex.PipelineFactory import element_factory
from ILexicon import ILexicon as z2ILexicon
from interfaces import ILexicon


class Lexicon(Persistent):

__implements__ = ILexicon
__implements__ = z2ILexicon
implements(ILexicon)

def __init__(self, *pipeline):
self._wids = OIBTree() # word -> wid
Expand Down
15 changes: 9 additions & 6 deletions ZCTextIndex.py
Expand Up @@ -33,17 +33,18 @@
from Products.PluginIndexes.common import safe_callable
from Products.PluginIndexes.interfaces import IPluggableIndex

from Products.ZCTextIndex.ILexicon import ILexicon
from Products.ZCTextIndex.Lexicon import \
Lexicon, Splitter, CaseNormalizer, StopWordRemover
from Products.ZCTextIndex.NBest import NBest
from Products.ZCTextIndex.QueryParser import QueryParser
from PipelineFactory import element_factory
from CosineIndex import CosineIndex
from ILexicon import ILexicon as z2ILexicon
from interfaces import ILexicon
from interfaces import IZCLexicon
from interfaces import IZCTextIndex
from OkapiIndex import OkapiIndex
from PipelineFactory import element_factory

from Products.ZCTextIndex.CosineIndex import CosineIndex
from Products.ZCTextIndex.OkapiIndex import OkapiIndex

index_types = {'Okapi BM25 Rank':OkapiIndex,
'Cosine Measure':CosineIndex}
Expand Down Expand Up @@ -89,7 +90,8 @@ def __init__(self, id, extra=None, caller=None, index_factory=None,
if lexicon is None:
raise LookupError, 'Lexicon "%s" not found' % escape(lexicon_id)

if not ILexicon.isImplementedBy(lexicon):
if not (ILexicon.providedBy(lexicon) or
z2ILexicon.isImplementedBy(lexicon)):
raise ValueError('Object "%s" does not implement '
'ZCTextIndex Lexicon interface'
% lexicon.getId())
Expand Down Expand Up @@ -134,7 +136,8 @@ def getLexicon(self):
return self._v_lexicon
except AttributeError:
lexicon = getattr(aq_parent(aq_inner(self)), self.lexicon_id)
if not ILexicon.isImplementedBy(lexicon):
if not (ILexicon.providedBy(lexicon) or
z2ILexicon.isImplementedBy(lexicon)):
raise TypeError('Object "%s" is not a ZCTextIndex Lexicon'
% repr(lexicon))
self._v_lexicon = lexicon
Expand Down
64 changes: 64 additions & 0 deletions interfaces.py
Expand Up @@ -24,6 +24,70 @@ class IZCTextIndex(Interface):
"""


class ILexicon(Interface):

"""Object responsible for converting text to word identifiers.
"""

def termToWordIds(text):
"""Return a sequence of ids of the words parsed from the text.
The input text may be either a string or a list of strings.
Parse the text as if they are search terms, and skips words
that aren't in the lexicon.
"""

def sourceToWordIds(text):
"""Return a sequence of ids of the words parsed from the text.
The input text may be either a string or a list of strings.
Parse the text as if they come from a source document, and
creates new word ids for words that aren't (yet) in the
lexicon.
"""

def globToWordIds(pattern):
"""Return a sequence of ids of words matching the pattern.
The argument should be a single word using globbing syntax,
e.g. 'foo*' meaning anything starting with 'foo'.
Return the wids for all words in the lexicon that match the
pattern.
"""

def length():
"""Return the number of unique term in the lexicon.
"""

def get_word(wid):
"""Return the word for the given word id.
Raise KeyError if the word id is not in the lexicon.
"""

def get_wid(word):
"""Return the wird id for the given word.
Return 0 of the word is not in the lexicon.
"""

def parseTerms(text):
"""Pass the text through the pipeline.
Return a list of words, normalized by the pipeline
(e.g. stopwords removed, case normalized etc.).
"""

def isGlob(word):
"""Return true if the word is a globbing pattern.
The word should be one of the words returned by parseTerm().
"""


class IZCLexicon(Interface):

"""Lexicon for ZCTextIndex.
Expand Down
35 changes: 27 additions & 8 deletions tests/testLexicon.py
Expand Up @@ -8,12 +8,17 @@
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
"""Lexicon unit tests.
$Id$
"""

import unittest

import os, sys
from unittest import TestCase, TestSuite, main, makeSuite

import ZODB
import transaction
Expand Down Expand Up @@ -64,7 +69,20 @@ def process(self, seq):
return res


class Test(TestCase):
class Test(unittest.TestCase):

def test_z2interfaces(self):
from Interface.Verify import verifyClass
from Products.ZCTextIndex.ILexicon import ILexicon

verifyClass(ILexicon, Lexicon)

def test_z3interfaces(self):
from Products.ZCTextIndex.interfaces import ILexicon
from zope.interface.verify import verifyClass

verifyClass(ILexicon, Lexicon)

def testSourceToWordIds(self):
lexicon = Lexicon(Splitter())
wids = lexicon.sourceToWordIds('cats and dogs')
Expand Down Expand Up @@ -145,7 +163,7 @@ def testUpgradeLength(self):
lexicon.sourceToWordIds('how now brown cow')
self.assert_(lexicon.length.__class__ is Length)

class TestLexiconConflict(TestCase):
class TestLexiconConflict(unittest.TestCase):

db = None

Expand Down Expand Up @@ -186,11 +204,12 @@ def testAddWordConflict(self):
self.assertEqual(copy.length(), 11)
self.assertEqual(copy.length(), len(copy._words))


def test_suite():
suite = TestSuite()
suite.addTest(makeSuite(Test))
suite.addTest(makeSuite(TestLexiconConflict))
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(Test))
suite.addTest(unittest.makeSuite(TestLexiconConflict))
return suite

if __name__=='__main__':
main(defaultTest='test_suite')
unittest.main(defaultTest='test_suite')
5 changes: 2 additions & 3 deletions tests/testZCTextIndex.py
Expand Up @@ -17,9 +17,6 @@
"""

import unittest
import Testing
import Zope2
Zope2.startup()

import re

Expand Down Expand Up @@ -577,9 +574,11 @@ class OkapiQueryTests(QueryTestsBase):
class PLexiconTests(unittest.TestCase):

def test_z3interfaces(self):
from Products.ZCTextIndex.interfaces import ILexicon
from Products.ZCTextIndex.interfaces import IZCLexicon
from zope.interface.verify import verifyClass

verifyClass(ILexicon, PLexicon)
verifyClass(IZCLexicon, PLexicon)


Expand Down

0 comments on commit 76c1837

Please sign in to comment.