Skip to content
This repository has been archived by the owner on May 13, 2020. It is now read-only.

Commit

Permalink
Improved Zope integration
Browse files Browse the repository at this point in the history
  * A pipeline factory registry now allows registration of possible
    pipeline elements for use by Zope lexicons.

  * ZMI constructor form for lexicon uses pipeline registry to generate form
    fields

  * ZMI constructor form for ZCTextindex allows you to choose between
    Okapi and Cosine relevance algorithms
  • Loading branch information
caseman committed May 22, 2002
1 parent 3c80576 commit 511e918
Show file tree
Hide file tree
Showing 10 changed files with 257 additions and 53 deletions.
3 changes: 3 additions & 0 deletions HTMLSplitter.py
Expand Up @@ -13,6 +13,7 @@
##############################################################################

from Products.ZCTextIndex.ISplitter import ISplitter
from Products.ZCTextIndex.PipelineFactory import splitter_factory

import re

Expand Down Expand Up @@ -43,6 +44,8 @@ def _split(self, text):
rx = re.compile("[A-Za-z]")
return [word for word in text.split()
if len(word) > 1 and rx.search(word)]

splitter_factory.registerFactory('HTML Word Splitter', HTMLWordSplitter)

if __name__ == "__main__":
import sys
Expand Down
34 changes: 34 additions & 0 deletions IPipelineElementFactory.py
@@ -0,0 +1,34 @@
##############################################################################
#
# Copyright (c) 2002 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################

from Interface import Base as Interface

class IPipelineElementFactory(Interface):
"""Class for creating pipeline elements by name"""

def registerFactory(name, factory):
"""Registers a pipeline factory by name.
Each name can be registered only once. Duplicate registrations
will raise a ValueError
"""

def getFactoryNames():
"""Returns a sorted list of registered pipeline factory names
"""

def instantiate(name):
"""Instantiates a pipeline element by name. If name is not registered
raise a KeyError.
"""
8 changes: 8 additions & 0 deletions Lexicon.py
Expand Up @@ -18,6 +18,7 @@
from BTrees.OIBTree import OIBTree
from Products.ZCTextIndex.ILexicon import ILexicon
from Products.ZCTextIndex.StopDict import get_stopdict
from PipelineFactory import splitter_factory, element_factory

class Lexicon:

Expand Down Expand Up @@ -140,11 +141,15 @@ def processGlob(self, lst):
for s in lst:
result += self.rxGlob.findall(s)
return result

splitter_factory.registerFactory('Regex Splitter', Splitter)

class CaseNormalizer:

def process(self, lst):
return [w.lower() for w in lst]

element_factory.registerFactory('Case Normalizer', CaseNormalizer)

class StopWordRemover:

Expand All @@ -161,3 +166,6 @@ def process(self, lst):
else:
def process(self, lst):
return self._process(self.dict, lst)


element_factory.registerFactory('Stop Word Remover', StopWordRemover)
43 changes: 43 additions & 0 deletions PipelineFactory.py
@@ -0,0 +1,43 @@
##############################################################################
#
# Copyright (c) 2002 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################

from Products.ZCTextIndex.IPipelineElementFactory \
import IPipelineElementFactory

class PipelineElementFactory:

__implements__ = IPipelineElementFactory

def __init__(self):
self._elements = {}

def registerFactory(self, name, factory):
if self._elements.has_key(name):
raise ValueError, 'ZCTextIndex splitter named' + \
'"%s" already registered'

self._elements[name] = factory

def getFactoryNames(self):
names = self._elements.keys()
names.sort()
return names

def instantiate(self, name):
return self._elements[name]()


splitter_factory = PipelineElementFactory()

element_factory = PipelineElementFactory()
53 changes: 40 additions & 13 deletions ZCTextIndex.py
Expand Up @@ -26,12 +26,17 @@
PluggableIndexInterface
from Products.PluginIndexes.common.util import parseIndexRequest

from Products.ZCTextIndex.OkapiIndex import OkapiIndex
from Products.ZCTextIndex.ILexicon import ILexicon
from Products.ZCTextIndex.Lexicon import \
Lexicon, Splitter, CaseNormalizer, StopWordRemover
from Products.ZCTextIndex.NBest import NBest
from Products.ZCTextIndex.QueryParser import QueryParser
from PipelineFactory import splitter_factory, element_factory

from Products.ZCTextIndex.CosineIndex import CosineIndex
from Products.ZCTextIndex.OkapiIndex import OkapiIndex
index_types = {'Okapi BM25 Rank':OkapiIndex,
'Cosine Measure':CosineIndex}

class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
"""Persistent TextIndex"""
Expand All @@ -50,7 +55,7 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):

## Constructor ##

def __init__(self, id, extra, caller, index_factory=OkapiIndex):
def __init__(self, id, extra, caller, index_factory=None):
self.id = id
self._fieldname = extra.doc_attr
lexicon = getattr(caller, extra.lexicon_id, None)
Expand All @@ -64,7 +69,15 @@ def __init__(self, id, extra, caller, index_factory=OkapiIndex):
% lexicon.getId()

self.lexicon = lexicon
self._index_factory = index_factory

if index_factory is None:
if extra.index_type not in index_types.keys():
raise ValueError, 'Invalid index type "%s"' % extra.index_type
self._index_factory = index_types[extra.index_type]
self._index_type = extra.index_type
else:
self._index_factory = index_factory

self.clear()

## External methods not in the Pluggable Index API ##
Expand Down Expand Up @@ -144,6 +157,10 @@ def clear(self):
## User Interface Methods ##

manage_main = DTMLFile('dtml/manageZCTextIndex', globals())

def getIndexType(self):
"""Return index type string"""
return getattr(self, '_index_type', self._index_factory.__name__)

InitializeClass(ZCTextIndex)

Expand All @@ -157,29 +174,39 @@ def manage_addZCTextIndex(self, id, extra=None, REQUEST=None,

manage_addLexiconForm = DTMLFile('dtml/addLexicon', globals())

def manage_addLexicon(self, id, title, splitter=None, normalizer=None,
stopwords=None, REQUEST=None):
def manage_addLexicon(self, id, title='', splitter_name=None,
element_names=None, REQUEST=None):
"""Add ZCTextIndex Lexicon"""
elements = []
if splitter:
elements.append(Splitter())
if normalizer:
elements.append(CaseNormalizer())
if stopwords:
elements.append(StopWordRemover())

elements = [element_factory.instantiate(name) for name in element_names]

if splitter_name:
elements.insert(0, splitter_factory.instantiate(splitter_name))

lexicon = PLexicon(id, title, *elements)
self._setObject(id, lexicon)
if REQUEST is not None:
return self.manage_main(self, REQUEST, update_menu=1)

class PLexicon(Lexicon, Persistent, Acquisition.Implicit, SimpleItem):
"""Persistent Lexcion for ZCTextIndex"""
"""Persistent Lexicon for ZCTextIndex"""

meta_type = 'ZCTextIndex Lexicon'

manage_options = ({'label':'Overview', 'action':'manage_main'},) + \
SimpleItem.manage_options

def __init__(self, id, title='', *pipeline):
self.id = str(id)
self.title = str(title)
PLexicon.inheritedAttribute('__init__')(self, *pipeline)

## User Interface Methods ##

def getPipelineNames(self):
"""Return list of names of pipeline element classes"""
return [element.__class__.__name__ for element in self._pipeline]

manage_main = DTMLFile('dtml/manageLexicon', globals())

InitializeClass(PLexicon)
22 changes: 19 additions & 3 deletions __init__.py
Expand Up @@ -16,14 +16,17 @@
Experimental plugin text index for ZCatalog.
"""

from PipelineFactory import splitter_factory, element_factory
from Products.ZCTextIndex import ZCTextIndex, HTMLSplitter

def initialize(context):
from Products.ZCTextIndex import ZCTextIndex

context.registerClass(
ZCTextIndex.ZCTextIndex,
permission = 'Add Pluggable Index',
constructors = (ZCTextIndex.manage_addZCTextIndexForm,
ZCTextIndex.manage_addZCTextIndex),
ZCTextIndex.manage_addZCTextIndex,
getIndexTypes),
icon='www/index.gif',
visibility=None
)
Expand All @@ -32,6 +35,19 @@ def initialize(context):
ZCTextIndex.PLexicon,
permission = 'Add Vocabularies',
constructors = (ZCTextIndex.manage_addLexiconForm,
ZCTextIndex.manage_addLexicon),
ZCTextIndex.manage_addLexicon,
getSplitterNames, getElementNames),
icon='www/lexicon.gif'
)

## Functions below are for use in the ZMI constructor forms ##

def getSplitterNames(self):
return splitter_factory.getFactoryNames()

def getElementNames(self):
return element_factory.getFactoryNames()

def getIndexTypes(self):
return ZCTextIndex.index_types.keys()

49 changes: 25 additions & 24 deletions dtml/addLexicon.dtml
@@ -1,10 +1,15 @@
<dtml-var manage_page_header>

<dtml-var "manage_form_title(this(), _,
form_title='Add Lexicon',
form_title='Add ZCTextIndex Lexicon',
)">

<p class="form-help">
A ZCTextIndex Lexicon processes and stores the words of documents indexed
with a ZCTextIndex. Multiple ZCTextIndexes can share the same lexicon.
</p>

<FORM ACTION="manage_addLexicon" METHOD="POST">
<form action="manage_addLexicon" method="POST">
<table cellspacing="0" cellpadding="2" border="0">
<tr>
<td align="left" valign="top">
Expand All @@ -16,6 +21,7 @@
<input type="text" name="id" size="40" />
</td>
</tr>

<tr>
<td align="left" valign="top">
<div class="form-optional">
Expand All @@ -29,33 +35,28 @@

<tr>
<td align="left" valign="top">
<div class="form-label">
splitter?
</td>
<td align="left" valign="top">
<input type="checkbox" name="splitter" checked />
</td>
</tr>

<tr>
<td align="left" valign="top">
<div class="form-label">
case normalizer?
<div class="form-label">Word Splitter</div>
</td>
<td align="left" valign="top">
<input type="checkbox" name="normalizer" checked />
<select name="splitter_name">
<dtml-in name="getSplitterNames">
<option value="&dtml-sequence-item;">&dtml-sequence-item;</option>
</dtml-in>
</select>
</td>
</tr>

<tr>
<td align="left" valign="top">
<div class="form-label">
remove stop words?
</td>
<td align="left" valign="top">
<input type="checkbox" name="stopwords" checked />
</td>
</tr>
<dtml-in name="getElementNames">
<tr>
<td align="left" valign="top">
<div class="form-label">&dtml-sequence-item;</div>
</td>
<td align="left" valign="top">
<input type="checkbox" name="element_names:list"
value="&dtml-sequence-item;" checked />
</td>
</tr>
</dtml-in>

<tr>
<td align="left" valign="top">
Expand Down

0 comments on commit 511e918

Please sign in to comment.