Skip to content

Commit

Permalink
[c++] Implement search_ranges2
Browse files Browse the repository at this point in the history
  • Loading branch information
ojwb committed Nov 15, 2016
1 parent d655313 commit 88da8ec
Show file tree
Hide file tree
Showing 6 changed files with 185 additions and 0 deletions.
130 changes: 130 additions & 0 deletions code/c++/search_ranges2.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
#include <xapian.h>

#include <cctype>
#include <cerrno>
#include <cstdio>
#include <cstdlib>
#include <iomanip>
#include <iostream>
#include <string>

#include "support.h"

using namespace std;

static void
search(const string & dbpath, const string & querystring,
Xapian::doccount offset = 0, Xapian::doccount pagesize = 10)
{
// offset - defines starting point within result set.
// pagesize - defines number of records to retrieve.

// Open the database we're going to search.
Xapian::Database db(dbpath);

// Set up a QueryParser with a stemmer and suitable prefixes.
Xapian::QueryParser queryparser;
queryparser.set_stemmer(Xapian::Stem("en"));
queryparser.set_stemming_strategy(queryparser.STEM_SOME);
queryparser.add_prefix("title", "S");
queryparser.add_prefix("description", "XD");
// and add in range processors
// Start of custom RP code
class PopulationRangeProcessor : public Xapian::NumberRangeProcessor {
bool check_range_end(const string& v) {
if (v.empty()) return true;
if (!isdigit(v[0])) return false;
errno = 0;
const char * p = v.c_str();
char * q;
unsigned long u = strtoul(p, &q, 10);
return !errno && q - p == v.size() && u >= low && u <= high;
}

int low, high;

public:
PopulationRangeProcessor(Xapian::valueno slot, int low_, int high_)
: Xapian::NumberRangeProcessor(slot), low(low_), high(high_) { }

Xapian::Query operator()(const string& begin, const string& end) {
if (!check_range_end(begin))
return Xapian::Query(Xapian::Query::OP_INVALID);
if (!check_range_end(end))
return Xapian::Query(Xapian::Query::OP_INVALID);
return Xapian::NumberRangeProcessor::operator()(begin, end);
}
};

queryparser.add_rangeprocessor(
(new PopulationRangeProcessor(3, 500000, 50000000))->release());
// End of custom RP code
// Start of date example code
Xapian::DateRangeProcessor date_vrp(2, Xapian::RP_DATE_PREFER_MDY, 1860);
queryparser.add_rangeprocessor(&date_vrp);
Xapian::NumberRangeProcessor number_vrp(1);
queryparser.add_rangeprocessor(&number_vrp);
// End of date example code

// And parse the query.
Xapian::Query query = queryparser.parse_query(querystring);

// Use an Enquire object on the database to run the query.
Xapian::Enquire enquire(db);
enquire.set_query(query);

// And print out something about each match.
Xapian::MSet mset = enquire.get_mset(offset, pagesize);

clog << "'" << querystring << "'[" << offset << ":" << offset + pagesize
<< "] =";
for (Xapian::MSetIterator m = mset.begin(); m != mset.end(); ++m) {
const size_t DOC_FIELD_NAME = 0;
const size_t DOC_FIELD_DESCRIPTION = 1;
const size_t DOC_FIELD_MOTTO = 2;
const size_t DOC_FIELD_ADMITTED = 3;
const size_t DOC_FIELD_POPULATION = 4;

Xapian::docid did = *m;

const string & data = m.get_document().get_data();
const string & admitted = get_field(data, DOC_FIELD_ADMITTED);
struct tm tm;
tm.tm_year = 100;
tm.tm_mon = atoi(admitted.substr(4, 2).c_str()) - 1;
tm.tm_mday = 1;
char month[20];
strftime(month, sizeof(month), "%B", &tm);
char date[40];
sprintf(date, "%s %d, %d", month,
atoi(admitted.substr(6, 2).c_str()),
atoi(admitted.substr(0, 4).c_str()));
string population = get_field(data, DOC_FIELD_POPULATION);
for (int pos = population.size() - 3; pos > 0; pos -= 3)
population.insert(size_t(pos), ",");
cout << m.get_rank() + 1 << ": #" << setfill('0') << setw(3) << did
<< " " << get_field(data, DOC_FIELD_NAME) << " "
<< date << "\n Population "
<< population << endl;
// Log the document id.
clog << ' ' << did;
}
clog << endl;
}

int main(int argc, char** argv) {
if (argc < 3) {
cerr << "Usage: " << argv[0] << " DBPATH QUERYTERM..." << endl;
return 1;
}
const char * dbpath = argv[1];

// Join the rest of the arguments with spaces to make the query string.
string querystring;
for (argv += 2; *argv; ++argv) {
if (!querystring.empty()) querystring += ' ';
querystring += *argv;
}

search(dbpath, querystring);
}
15 changes: 15 additions & 0 deletions code/c++/search_ranges2.cc.statesdb_10000000...out
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
1: #007 State of California September 9, 1850
Population 37,253,956
2: #019 State of Texas December 29, 1845
Population 25,145,561
3: #027 State of Illinois December 3, 1818
Population 12,830,632
4: #030 State of Ohio March 1, 1803
Population 11,536,504
5: #035 State of Florida March 3, 1845
Population 18,801,310
6: #040 Commonwealth of Pennsylvania December 12, 1787
Population 12,702,379
7: #041 State of New York July 26, 1788
Population 19,378,102
'10000000..'[0:10] = 7 19 27 30 35 40 41
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
1: #001 State of Washington November 11, 1889
Population 6,744,496
2: #004 State of Montana November 8, 1889
Population 989,415
3: #005 Idaho July 3, 1890
Population 1,567,582
4: #010 State of Wyoming July 10, 1890
Population 563,626
'11/08/1889..07/10/1890'[0:10] = 1 4 5 10
5 changes: 5 additions & 0 deletions code/c++/search_ranges2.cc.statesdb_1780..1789_10000000...out
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
1: #040 Commonwealth of Pennsylvania December 12, 1787
Population 12,702,379
2: #041 State of New York July 26, 1788
Population 19,378,102
'1780..1789 10000000..'[0:10] = 40 41
21 changes: 21 additions & 0 deletions code/c++/search_ranges2.cc.statesdb_1800..1899.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
1: #001 State of Washington November 11, 1889
Population 6,744,496
2: #002 State of Arkansas June 15, 1836
Population 2,915,918
3: #003 State of Oregon February 14, 1859
Population 3,831,074
4: #004 State of Montana November 8, 1889
Population 989,415
5: #005 Idaho July 3, 1890
Population 1,567,582
6: #006 State of Nevada October 31, 1864
Population 2,700,551
7: #007 State of California September 9, 1850
Population 37,253,956
8: #009 State of Utah January 4, 1896
Population 2,763,885
9: #010 State of Wyoming July 10, 1890
Population 563,626
10: #011 State of Colorado August 1, 1876
Population 5,029,196
'1800..1899'[0:10] = 1 2 3 4 5 6 7 9 10 11
5 changes: 5 additions & 0 deletions code/c++/search_ranges2.cc.statesdb_spanish.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
1: #004 State of Montana November 8, 1889
Population 989,415
2: #019 State of Texas December 29, 1845
Population 25,145,561
'spanish'[0:10] = 4 19

0 comments on commit 88da8ec

Please sign in to comment.