From 76c1837fcd238ef88d3367addca90475d73a4786 Mon Sep 17 00:00:00 2001 From: Date: Mon, 31 Oct 2005 20:49:32 +0000 Subject: [PATCH] - converted ILexicon to z3 and bridged it back - ZCTextIndex now accepts lexicons with the z3 interface --- ILexicon.py | 69 +++++++--------------------------------- Lexicon.py | 14 ++++++-- ZCTextIndex.py | 15 +++++---- interfaces.py | 64 +++++++++++++++++++++++++++++++++++++ tests/testLexicon.py | 35 +++++++++++++++----- tests/testZCTextIndex.py | 5 ++- 6 files changed, 124 insertions(+), 78 deletions(-) diff --git a/ILexicon.py b/ILexicon.py index e06079a..aff4068 100644 --- a/ILexicon.py +++ b/ILexicon.py @@ -8,68 +8,21 @@ # THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED # WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS -# FOR A PARTICULAR PURPOSE +# FOR A PARTICULAR PURPOSE. # ############################################################################## +"""Lexicon z2 interfaces. -from Interface import Interface +$Id$ +""" -class ILexicon(Interface): - """Object responsible for converting text to word identifiers.""" - def termToWordIds(text): - """Return a sequence of ids of the words parsed from the text. +# create ILexicon +from Interface.bridge import createZope3Bridge +from interfaces import ILexicon as z3ILexicon +import ILexicon - The input text may be either a string or a list of strings. +createZope3Bridge(z3ILexicon, ILexicon, 'ILexicon') - Parse the text as if they are search terms, and skips words - that aren't in the lexicon. - """ - - def sourceToWordIds(text): - """Return a sequence of ids of the words parsed from the text. - - The input text may be either a string or a list of strings. - - Parse the text as if they come from a source document, and - creates new word ids for words that aren't (yet) in the - lexicon. - """ - - def globToWordIds(pattern): - """Return a sequence of ids of words matching the pattern. - - The argument should be a single word using globbing syntax, - e.g. 'foo*' meaning anything starting with 'foo'. - - Return the wids for all words in the lexicon that match the - pattern. - """ - - def length(): - """Return the number of unique term in the lexicon.""" - - def get_word(wid): - """Return the word for the given word id. - - Raise KeyError if the word id is not in the lexicon. - """ - - def get_wid(word): - """Return the wird id for the given word. - - Return 0 of the word is not in the lexicon. - """ - - def parseTerms(text): - """Pass the text through the pipeline. - - Return a list of words, normalized by the pipeline - (e.g. stopwords removed, case normalized etc.). - """ - - def isGlob(word): - """Return true if the word is a globbing pattern. - - The word should be one of the words returned by parseTerm(). - """ +del createZope3Bridge +del z3ILexicon diff --git a/Lexicon.py b/Lexicon.py index d989a78..6d458ba 100644 --- a/Lexicon.py +++ b/Lexicon.py @@ -8,9 +8,13 @@ # THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED # WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS -# FOR A PARTICULAR PURPOSE +# FOR A PARTICULAR PURPOSE. # ############################################################################## +"""Lexicon. + +$Id$ +""" import re @@ -20,15 +24,19 @@ import ZODB from Persistence import Persistent +from zope.interface import implements -from Products.ZCTextIndex.ILexicon import ILexicon from Products.ZCTextIndex.StopDict import get_stopdict from Products.ZCTextIndex.ParseTree import QueryError from Products.ZCTextIndex.PipelineFactory import element_factory +from ILexicon import ILexicon as z2ILexicon +from interfaces import ILexicon + class Lexicon(Persistent): - __implements__ = ILexicon + __implements__ = z2ILexicon + implements(ILexicon) def __init__(self, *pipeline): self._wids = OIBTree() # word -> wid diff --git a/ZCTextIndex.py b/ZCTextIndex.py index 7395e91..978ad78 100644 --- a/ZCTextIndex.py +++ b/ZCTextIndex.py @@ -33,17 +33,18 @@ from Products.PluginIndexes.common import safe_callable from Products.PluginIndexes.interfaces import IPluggableIndex -from Products.ZCTextIndex.ILexicon import ILexicon from Products.ZCTextIndex.Lexicon import \ Lexicon, Splitter, CaseNormalizer, StopWordRemover from Products.ZCTextIndex.NBest import NBest from Products.ZCTextIndex.QueryParser import QueryParser -from PipelineFactory import element_factory +from CosineIndex import CosineIndex +from ILexicon import ILexicon as z2ILexicon +from interfaces import ILexicon from interfaces import IZCLexicon from interfaces import IZCTextIndex +from OkapiIndex import OkapiIndex +from PipelineFactory import element_factory -from Products.ZCTextIndex.CosineIndex import CosineIndex -from Products.ZCTextIndex.OkapiIndex import OkapiIndex index_types = {'Okapi BM25 Rank':OkapiIndex, 'Cosine Measure':CosineIndex} @@ -89,7 +90,8 @@ def __init__(self, id, extra=None, caller=None, index_factory=None, if lexicon is None: raise LookupError, 'Lexicon "%s" not found' % escape(lexicon_id) - if not ILexicon.isImplementedBy(lexicon): + if not (ILexicon.providedBy(lexicon) or + z2ILexicon.isImplementedBy(lexicon)): raise ValueError('Object "%s" does not implement ' 'ZCTextIndex Lexicon interface' % lexicon.getId()) @@ -134,7 +136,8 @@ def getLexicon(self): return self._v_lexicon except AttributeError: lexicon = getattr(aq_parent(aq_inner(self)), self.lexicon_id) - if not ILexicon.isImplementedBy(lexicon): + if not (ILexicon.providedBy(lexicon) or + z2ILexicon.isImplementedBy(lexicon)): raise TypeError('Object "%s" is not a ZCTextIndex Lexicon' % repr(lexicon)) self._v_lexicon = lexicon diff --git a/interfaces.py b/interfaces.py index c518a11..e6699ed 100644 --- a/interfaces.py +++ b/interfaces.py @@ -24,6 +24,70 @@ class IZCTextIndex(Interface): """ +class ILexicon(Interface): + + """Object responsible for converting text to word identifiers. + """ + + def termToWordIds(text): + """Return a sequence of ids of the words parsed from the text. + + The input text may be either a string or a list of strings. + + Parse the text as if they are search terms, and skips words + that aren't in the lexicon. + """ + + def sourceToWordIds(text): + """Return a sequence of ids of the words parsed from the text. + + The input text may be either a string or a list of strings. + + Parse the text as if they come from a source document, and + creates new word ids for words that aren't (yet) in the + lexicon. + """ + + def globToWordIds(pattern): + """Return a sequence of ids of words matching the pattern. + + The argument should be a single word using globbing syntax, + e.g. 'foo*' meaning anything starting with 'foo'. + + Return the wids for all words in the lexicon that match the + pattern. + """ + + def length(): + """Return the number of unique term in the lexicon. + """ + + def get_word(wid): + """Return the word for the given word id. + + Raise KeyError if the word id is not in the lexicon. + """ + + def get_wid(word): + """Return the wird id for the given word. + + Return 0 of the word is not in the lexicon. + """ + + def parseTerms(text): + """Pass the text through the pipeline. + + Return a list of words, normalized by the pipeline + (e.g. stopwords removed, case normalized etc.). + """ + + def isGlob(word): + """Return true if the word is a globbing pattern. + + The word should be one of the words returned by parseTerm(). + """ + + class IZCLexicon(Interface): """Lexicon for ZCTextIndex. diff --git a/tests/testLexicon.py b/tests/testLexicon.py index bdfb0ee..a6175c2 100644 --- a/tests/testLexicon.py +++ b/tests/testLexicon.py @@ -8,12 +8,17 @@ # THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED # WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS -# FOR A PARTICULAR PURPOSE +# FOR A PARTICULAR PURPOSE. # ############################################################################## +"""Lexicon unit tests. + +$Id$ +""" + +import unittest import os, sys -from unittest import TestCase, TestSuite, main, makeSuite import ZODB import transaction @@ -64,7 +69,20 @@ def process(self, seq): return res -class Test(TestCase): +class Test(unittest.TestCase): + + def test_z2interfaces(self): + from Interface.Verify import verifyClass + from Products.ZCTextIndex.ILexicon import ILexicon + + verifyClass(ILexicon, Lexicon) + + def test_z3interfaces(self): + from Products.ZCTextIndex.interfaces import ILexicon + from zope.interface.verify import verifyClass + + verifyClass(ILexicon, Lexicon) + def testSourceToWordIds(self): lexicon = Lexicon(Splitter()) wids = lexicon.sourceToWordIds('cats and dogs') @@ -145,7 +163,7 @@ def testUpgradeLength(self): lexicon.sourceToWordIds('how now brown cow') self.assert_(lexicon.length.__class__ is Length) -class TestLexiconConflict(TestCase): +class TestLexiconConflict(unittest.TestCase): db = None @@ -186,11 +204,12 @@ def testAddWordConflict(self): self.assertEqual(copy.length(), 11) self.assertEqual(copy.length(), len(copy._words)) + def test_suite(): - suite = TestSuite() - suite.addTest(makeSuite(Test)) - suite.addTest(makeSuite(TestLexiconConflict)) + suite = unittest.TestSuite() + suite.addTest(unittest.makeSuite(Test)) + suite.addTest(unittest.makeSuite(TestLexiconConflict)) return suite if __name__=='__main__': - main(defaultTest='test_suite') + unittest.main(defaultTest='test_suite') diff --git a/tests/testZCTextIndex.py b/tests/testZCTextIndex.py index 7dc0c36..eba795f 100644 --- a/tests/testZCTextIndex.py +++ b/tests/testZCTextIndex.py @@ -17,9 +17,6 @@ """ import unittest -import Testing -import Zope2 -Zope2.startup() import re @@ -577,9 +574,11 @@ class OkapiQueryTests(QueryTestsBase): class PLexiconTests(unittest.TestCase): def test_z3interfaces(self): + from Products.ZCTextIndex.interfaces import ILexicon from Products.ZCTextIndex.interfaces import IZCLexicon from zope.interface.verify import verifyClass + verifyClass(ILexicon, PLexicon) verifyClass(IZCLexicon, PLexicon)