Skip to content
This repository has been archived by the owner on May 13, 2020. It is now read-only.

Commit

Permalink
Add full globbing. This implements * and ? like in the shell,
Browse files Browse the repository at this point in the history
but the pattern may not begin with a glob character (else
someone specifying "*" as the pattern can tie up the CPU for
a long time).
  • Loading branch information
gvanrossum committed May 22, 2002
1 parent 511e918 commit 23649fb
Showing 1 changed file with 36 additions and 9 deletions.
45 changes: 36 additions & 9 deletions Lexicon.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,12 @@

from BTrees.IOBTree import IOBTree
from BTrees.OIBTree import OIBTree

from Products.ZCTextIndex.ILexicon import ILexicon
from Products.ZCTextIndex.StopDict import get_stopdict
from PipelineFactory import splitter_factory, element_factory
from Products.ZCTextIndex.ParseTree import QueryError
from Products.ZCTextIndex.PipelineFactory import \
splitter_factory, element_factory

class Lexicon:

Expand Down Expand Up @@ -78,7 +81,7 @@ def parseTerms(self, text):
return last

def isGlob(self, word):
return "*" in word
return "*" in word or "?" in word

def get_word(self, wid):
return self._words[wid]
Expand All @@ -87,17 +90,41 @@ def get_wid(self, word):
return self._wids.get(word, 0)

def globToWordIds(self, pattern):
# This currently only knows about trailing *;
# whatever splitter you use should match this
assert pattern.endswith("*")
prefix = pattern[:-1]
assert prefix and not prefix.endswith("*")
# Implement * and ? just as in the shell, except the pattern
# must not start with either of these
prefix = ""
while pattern and pattern[0] not in "*?":
prefix += pattern[0]
pattern = pattern[1:]
if not pattern:
# There were no globbing characters in the pattern
wid = self._wids.get(prefix, 0)
if wid:
return [wid]
else:
return []
if not prefix:
# The pattern starts with a globbing character.
# This is too efficient, so we raise an exception.
raise QueryError(
"pattern %r shouldn't start with glob character" % pattern)
pat = prefix
for c in pattern:
if c == "*":
pat += ".*"
elif c == "?":
pat += "."
else:
pat += re.escape(c)
pat += "$"
prog = re.compile(pat)
keys = self._wids.keys(prefix) # Keys starting at prefix
wids = []
for key in keys:
if not key.startswith(prefix):
break
wids.append(self._wids[key])
if prog.match(key):
wids.append(self._wids[key])
return wids

def _getWordIdCreate(self, word):
Expand Down Expand Up @@ -128,7 +155,7 @@ class Splitter:

import re
rx = re.compile(r"\w+")
rxGlob = re.compile(r"\w+\*?") # See globToWordIds() above
rxGlob = re.compile(r"\w+[\w*?]*") # See globToWordIds() above

def process(self, lst):
result = []
Expand Down

0 comments on commit 23649fb

Please sign in to comment.