Skip to content

Commit

Permalink
[python3] convert search_synonyms example
Browse files Browse the repository at this point in the history
  • Loading branch information
ojwb committed Nov 1, 2016
1 parent da236dc commit 76abddf
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 0 deletions.
61 changes: 61 additions & 0 deletions code/python3/search_synonyms.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#!/usr/bin/env python

import json
import logging
import sys
import xapian
import support

### Start of example code.
def search(dbpath, querystring, offset=0, pagesize=10):
# offset - defines starting point within result set
# pagesize - defines number of records to retrieve

# Open the database we're going to search.
db = xapian.WritableDatabase(dbpath)

# Start of adding synonyms
db.add_synonym("time", "calendar")
# End of adding synonyms

# Set up a QueryParser with a stemmer and suitable prefixes
queryparser = xapian.QueryParser()
queryparser.set_stemmer(xapian.Stem("en"))
queryparser.set_stemming_strategy(queryparser.STEM_SOME)
# Start of prefix configuration.
queryparser.add_prefix("title", "S")
queryparser.add_prefix("description", "XD")
# End of prefix configuration.

# Start of set database
queryparser.set_database(db)
# End of set database

# And parse the query
query = queryparser.parse_query(querystring, queryparser.FLAG_SYNONYM)

# Use an Enquire object on the database to run the query
enquire = xapian.Enquire(db)
enquire.set_query(query)

# And print out something about each match
matches = []
for match in enquire.get_mset(offset, pagesize):
fields = json.loads(match.document.get_data().decode('utf8'))
print(u"%(rank)i: #%(docid)3.3i %(title)s" % {
'rank': match.rank + 1,
'docid': match.docid,
'title': fields.get('TITLE', u''),
})
matches.append(match.docid)

# Finally, make sure we log the query and displayed results
support.log_matches(querystring, offset, pagesize, matches)
### End of example code.

if len(sys.argv) < 3:
print("Usage: %s DBPATH QUERYTERM..." % sys.argv[0])
sys.exit(1)

logging.basicConfig(level=logging.INFO)
search(dbpath = sys.argv[1], querystring = " ".join(sys.argv[2:]))
10 changes: 10 additions & 0 deletions code/python3/search_synonyms.py.db_time.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
1: #065 Electric time piece with hands but without dial (no pendulum
2: #058 The "Empire" clock, to show the time at various longitudes,
3: #041 Frequency and time measuring instrument type TSA3436 by Venn
4: #056 Single sandglass in 4 pillared wood mount, running time 15 1
5: #043 Loughborough-Hayes automatic timing apparatus. Used by the R
6: #011 "Timetrunk" by Hines and Co., Glasgow (a sandglass for timin
7: #016 Copy of the gearing of the Byzantine sundial-calendar (1983-
8: #045 Master clock of the "Silent Electric" type made by the Magne
9: #018 Solar/Sidereal verge watch with epicyclic maintaining power
INFO:xapian.search:'time'[0:10] = 65 58 41 56 43 11 16 45 18
11 changes: 11 additions & 0 deletions code/python3/search_synonyms.py.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
1: #016 Copy of the gearing of the Byzantine sundial-calendar (1983-
2: #072 German Perpetual Calendar in gilt metal
3: #065 Electric time piece with hands but without dial (no pendulum
4: #068 Ornate brass Perpetual Calendar
5: #058 The "Empire" clock, to show the time at various longitudes,
6: #041 Frequency and time measuring instrument type TSA3436 by Venn
7: #056 Single sandglass in 4 pillared wood mount, running time 15 1
8: #043 Loughborough-Hayes automatic timing apparatus. Used by the R
9: #026 Sundial and compass with perpetual calendar and lunar circles
10: #036 Universal 'Tri-Compax' chronographic wrist watch
INFO:xapian.search:'~time'[0:10] = 16 72 65 68 58 41 56 43 26 36

0 comments on commit 76abddf

Please sign in to comment.