Skip to content
This repository has been archived by the owner on May 13, 2020. It is now read-only.

Commit

Permalink
Add usage message and -h option.
Browse files Browse the repository at this point in the history
Add -w and -W option to dump the word list (by word and by wid,
respectively).

Except KeyboardInterrupt from unqualified except clauses.
  • Loading branch information
gvanrossum committed May 23, 2002
1 parent 21f3d62 commit 388fd66
Showing 1 changed file with 62 additions and 4 deletions.
66 changes: 62 additions & 4 deletions tests/mhindex.py
@@ -1,6 +1,34 @@
#! /usr/bin/env python2.1

"""MH mail indexer."""
"""MH mail indexer.
To index messages from a single folder (messages defaults to 'all'):
mhindex.py [options] -u +folder [messages ...]
To bulk index all messages from several folders:
mhindex.py [options] -b folder ...
To execute a single query:
mhindex.py [options] query
To enter interactive query mode:
mhindex.py [options]
Common options:
-d FILE -- specify the Data.fs to use (default ~/.Data.fs)
-w -- dump the word list in alphabetical order and exit
-W -- dump the word list ordered by word id and exit
Indexing options:
-O -- do a prescan on the data to compute optimal word id assignments;
this is only useful the first time the Data.fs is used
-t N -- commit a transaction after every N messages (default 20000)
-p N -- pack after every N commits (by default no packing is done)
Querying options:
-m N -- show at most N matching lines from the message (default 3)
-n N -- show the N best matching messages (default 3)
"""

import os
import re
Expand Down Expand Up @@ -36,10 +64,11 @@

def main():
try:
opts, args = getopt.getopt(sys.argv[1:], "bd:m:n:Op:t:u")
opts, args = getopt.getopt(sys.argv[1:], "bd:hm:n:Op:t:uwW")
except getopt.error, msg:
print msg
sys.exit(2)
print "use -h for help"
return 2
update = 0
bulk = 0
optimize = 0
Expand All @@ -48,11 +77,15 @@ def main():
datafs = os.path.expanduser(DATAFS)
pack = 0
trans = 20000
dumpwords = dumpwids = 0
for o, a in opts:
if o == "-b":
bulk = 1
if o == "-d":
datafs = a
if o == "-h":
print __doc__
return
if o == "-m":
maxlines = int(a)
if o == "-n":
Expand All @@ -65,7 +98,17 @@ def main():
trans = ont(a)
if o == "-u":
update = 1
if o == "-w":
dumpwords = 1
if o == "-W":
dumpwids = 1
ix = Indexer(datafs, writable=update or bulk, trans=trans, pack=pack)
if dumpwords:
ix.dumpwords()
if dumpwids:
ix.dumpwids()
if dumpwords or dumpwids:
return
if bulk:
if optimize:
ix.optimize(args)
Expand Down Expand Up @@ -127,6 +170,17 @@ def __init__(self, datafs, writable=0, trans=0, pack=0):
self.maxdocid = 0
print len(self.docpaths), "Document ids"
print len(self.path2docid), "Pathnames"
print self.index.lexicon.length(), "Words"

def dumpwids(self):
lexicon = self.index.lexicon
for wid in lexicon.wids():
print "%10d %s" % (wid, lexicon.get_word(wid))

def dumpwords(self):
lexicon = self.index.lexicon
for word in lexicon.words():
print "%10d %s" % (lexicon.get_wid(word), word)

def close(self):
self.root = None
Expand Down Expand Up @@ -162,6 +216,8 @@ def interact(self, nbest=NBEST, maxlines=MAXLINES):
continue
try:
results, n = self.timequery(text, top + nbest)
except KeyboardInterrupt:
raise
except:
reportexc()
text = ""
Expand Down Expand Up @@ -367,6 +423,8 @@ def getmessagetext(self, m, name=None):
self.getheaders(m, L)
try:
self.getmsgparts(m, L, 0)
except KeyboardInterrupt:
raise
except:
print "(getmsgparts failed:)"
reportexc()
Expand Down Expand Up @@ -471,4 +529,4 @@ def reportexc():
traceback.print_exc()

if __name__ == "__main__":
main()
sys.exit(main())

0 comments on commit 388fd66

Please sign in to comment.