Skip to content
This repository has been archived by the owner on May 13, 2020. It is now read-only.

Commit

Permalink
Added management interface to query words in the lexicon and in the p…
Browse files Browse the repository at this point in the history
…rocess uncovered a BTreeItems bug, whee!
  • Loading branch information
caseman committed Jun 5, 2002
1 parent 2bdf2c8 commit ba7c130
Show file tree
Hide file tree
Showing 5 changed files with 136 additions and 18 deletions.
50 changes: 47 additions & 3 deletions ZCTextIndex.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,8 +200,9 @@ class PLexicon(Lexicon, Acquisition.Implicit, SimpleItem):

meta_type = 'ZCTextIndex Lexicon'

manage_options = ({'label':'Overview', 'action':'manage_main'},) + \
SimpleItem.manage_options
manage_options = ({'label':'Overview', 'action':'manage_main'},
{'label':'Query', 'action':'queryLexicon'},
) + SimpleItem.manage_options

def __init__(self, id, title='', *pipeline):
self.id = str(id)
Expand All @@ -213,7 +214,50 @@ def __init__(self, id, title='', *pipeline):
def getPipelineNames(self):
"""Return list of names of pipeline element classes"""
return [element.__class__.__name__ for element in self._pipeline]


_queryLexicon = DTMLFile('dtml/queryLexicon', globals())

def queryLexicon(self, REQUEST, words=None, page=0, rows=20, cols=4):
"""Lexicon browser/query user interface
"""
if words:
wids = []
for word in words:
wids.extend(self.globToWordIds(word))
words = [self.get_word(wid) for wid in wids]
else:
words = self.words()

word_count = len(words)
rows = max(min(rows, 500),1)
cols = max(min(cols, 12), 1)
page_count = word_count / (rows * cols) + \
(word_count % (rows * cols) > 0)
page = max(min(page, page_count - 1), 0)
start = rows * cols * page
end = min(rows * cols * (page + 1), word_count)

if word_count:
words = list(words[start:end])
else:
words = []

columns = []
i = 0
while i < len(words):
columns.append(words[i:i + rows])
i += rows

return self._queryLexicon(self, REQUEST,
page=page,
rows=rows,
cols=cols,
start_word=start+1,
end_word=end,
word_count=word_count,
page_count=page_count,
page_columns=columns)

manage_main = DTMLFile('dtml/manageLexicon', globals())

InitializeClass(PLexicon)
4 changes: 2 additions & 2 deletions dtml/manageLexicon.dtml
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@
<span class="form-label">Input Pipeline Stages</span>
</p>

<p>
<p class="form-help">
Text indexed through this lexicon is processed by the following pipeline
stages
</p>

<ol>
<ol class="form-help">
<dtml-in name="getPipelineNames">
<li>&dtml-sequence-item;</li>
</dtml-in>
Expand Down
10 changes: 7 additions & 3 deletions dtml/manageZCTextIndex.dtml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,12 @@
<dtml-var manage_tabs>

<p class="form-help">

There is nothing to manage here. Move along.

The ZCTextIndex Lexicon in use by this index is:
<em><dtml-var expr="lexicon.getId()"></em>
</p>
<p class="form-help">
<em>Note:</em> You cannot change the lexicon assigned to a ZCTextIndex.
To use another lexicon, delete this index and create a new one that
uses the desired lexicon.
</p>
<dtml-var manage_page_footer>
71 changes: 71 additions & 0 deletions dtml/queryLexicon.dtml
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
<dtml-var manage_page_header>
<dtml-var manage_tabs>

<p class="form-help">
Browse the words in the lexicon or enter the word(s) you are interested in
below. Globbing characters (*, ?) are supported
</p>

<dtml-let words_str="' '.join(REQUEST.get('words',[]))">
<form action="&dtml-URL;">
<p class="form-element">
<span class="form-label">Word(s)</span>
<input name="words:tokens" size="20" value="&dtml-words_str;" />
<input type="submit" value="Query" />

<span class="form-label">&nbsp;Output Columns:</span>
<input name="cols:int" size="2" value="&dtml-cols;" />
<span class="form-label">&nbsp;Rows:</span>
<input name="rows:int" size="2" value="&dtml-rows;" />
</p>
</form>
<hr />
<form action="&dtml-URL;">
<table width="100%" cellpadding="2" cellspacing="0" border="0">

<tr class="section-bar">
<td><span class="form-label">
&dtml-word_count; Words Found<dtml-if word_count>,
Displaying &dtml-start_word;-&dtml-end_word;
</dtml-if>

<dtml-if expr="page_count > 0">
</span></td>
<td align="right"><span class="form-label">
Page:
<select name="page:int" onchange="this.form.submit()">
<dtml-in expr="_.range(page_count)" prefix="page">
<option value="&dtml-page_item;"
<dtml-if expr="page == page_item">
selected
</dtml-if>
>
<dtml-var expr="page_item+1">
</option>
</dtml-in>
</select>
of &dtml-page_count;
<input type="submit" value="Go" />
<input type="hidden" name="cols:int" value="&dtml-cols;" />
<input type="hidden" name="rows:int" value="&dtml-rows;" />
<input type="hidden" name="words:tokens" value="&dtml-words_str;" />
</dtml-if>
</span></td>
</tr>
</table>
</form>
</dtml-let>

<dtml-if name="page_columns">
<table width="100%" cellpadding="0" cellspacing="10" border="0">
<tr>
<dtml-in name="page_columns" prefix="column">
<td align="left" valign="top">
<dtml-var expr="'<br />'.join(column_item)">
</td>
</dtml-in>
</tr>
</table>
</dtml-if>

<dtml-var manage_page_footer
19 changes: 9 additions & 10 deletions help/Lexicon_Add.stx
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,13 @@ ZCTextIndex Lexicon - Add: Create a new ZCTextIndex Lexicon
tags. The HTML aware splitter gives best results when all of
the incoming content to index is HTML.

- **Stop Words** To conserve space in the vocabulary, and possibly increase
performance, you can select a stop word remover which subtracts
very common or single letter words from the Lexicon. Bear in
mind that you will not be able to search on removed stop words,
and they will also be removed from queries passed to search
ZCTextIndexes using the Lexicon.
- **Stop Words** To conserve space in the vocabulary, and possibly
increase performance, you can select a stop word remover which
subtracts very common or single letter words from the Lexicon.
Bear in mind that you will not be able to search on removed stop
words, and they will also be removed from queries passed to
search ZCTextIndexes using the Lexicon.

- **Case Normalizer** The case normalizer removes case information from the words in
the Lexicon. If case-sensitive searching is desires, then omit
this element from the pipeline.
S
- **Case Normalizer** The case normalizer removes case information
from the words in the Lexicon. If case-sensitive searching is
desires, then omit this element from the pipeline.

0 comments on commit ba7c130

Please sign in to comment.