-
Notifications
You must be signed in to change notification settings - Fork 66
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
185 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,130 @@ | ||
#include <xapian.h> | ||
|
||
#include <cctype> | ||
#include <cerrno> | ||
#include <cstdio> | ||
#include <cstdlib> | ||
#include <iomanip> | ||
#include <iostream> | ||
#include <string> | ||
|
||
#include "support.h" | ||
|
||
using namespace std; | ||
|
||
static void | ||
search(const string & dbpath, const string & querystring, | ||
Xapian::doccount offset = 0, Xapian::doccount pagesize = 10) | ||
{ | ||
// offset - defines starting point within result set. | ||
// pagesize - defines number of records to retrieve. | ||
|
||
// Open the database we're going to search. | ||
Xapian::Database db(dbpath); | ||
|
||
// Set up a QueryParser with a stemmer and suitable prefixes. | ||
Xapian::QueryParser queryparser; | ||
queryparser.set_stemmer(Xapian::Stem("en")); | ||
queryparser.set_stemming_strategy(queryparser.STEM_SOME); | ||
queryparser.add_prefix("title", "S"); | ||
queryparser.add_prefix("description", "XD"); | ||
// and add in range processors | ||
// Start of custom RP code | ||
class PopulationRangeProcessor : public Xapian::NumberRangeProcessor { | ||
bool check_range_end(const string& v) { | ||
if (v.empty()) return true; | ||
if (!isdigit(v[0])) return false; | ||
errno = 0; | ||
const char * p = v.c_str(); | ||
char * q; | ||
unsigned long u = strtoul(p, &q, 10); | ||
return !errno && q - p == v.size() && u >= low && u <= high; | ||
} | ||
|
||
int low, high; | ||
|
||
public: | ||
PopulationRangeProcessor(Xapian::valueno slot, int low_, int high_) | ||
: Xapian::NumberRangeProcessor(slot), low(low_), high(high_) { } | ||
|
||
Xapian::Query operator()(const string& begin, const string& end) { | ||
if (!check_range_end(begin)) | ||
return Xapian::Query(Xapian::Query::OP_INVALID); | ||
if (!check_range_end(end)) | ||
return Xapian::Query(Xapian::Query::OP_INVALID); | ||
return Xapian::NumberRangeProcessor::operator()(begin, end); | ||
} | ||
}; | ||
|
||
queryparser.add_rangeprocessor( | ||
(new PopulationRangeProcessor(3, 500000, 50000000))->release()); | ||
// End of custom RP code | ||
// Start of date example code | ||
Xapian::DateRangeProcessor date_vrp(2, Xapian::RP_DATE_PREFER_MDY, 1860); | ||
queryparser.add_rangeprocessor(&date_vrp); | ||
Xapian::NumberRangeProcessor number_vrp(1); | ||
queryparser.add_rangeprocessor(&number_vrp); | ||
// End of date example code | ||
|
||
// And parse the query. | ||
Xapian::Query query = queryparser.parse_query(querystring); | ||
|
||
// Use an Enquire object on the database to run the query. | ||
Xapian::Enquire enquire(db); | ||
enquire.set_query(query); | ||
|
||
// And print out something about each match. | ||
Xapian::MSet mset = enquire.get_mset(offset, pagesize); | ||
|
||
clog << "'" << querystring << "'[" << offset << ":" << offset + pagesize | ||
<< "] ="; | ||
for (Xapian::MSetIterator m = mset.begin(); m != mset.end(); ++m) { | ||
const size_t DOC_FIELD_NAME = 0; | ||
const size_t DOC_FIELD_DESCRIPTION = 1; | ||
const size_t DOC_FIELD_MOTTO = 2; | ||
const size_t DOC_FIELD_ADMITTED = 3; | ||
const size_t DOC_FIELD_POPULATION = 4; | ||
|
||
Xapian::docid did = *m; | ||
|
||
const string & data = m.get_document().get_data(); | ||
const string & admitted = get_field(data, DOC_FIELD_ADMITTED); | ||
struct tm tm; | ||
tm.tm_year = 100; | ||
tm.tm_mon = atoi(admitted.substr(4, 2).c_str()) - 1; | ||
tm.tm_mday = 1; | ||
char month[20]; | ||
strftime(month, sizeof(month), "%B", &tm); | ||
char date[40]; | ||
sprintf(date, "%s %d, %d", month, | ||
atoi(admitted.substr(6, 2).c_str()), | ||
atoi(admitted.substr(0, 4).c_str())); | ||
string population = get_field(data, DOC_FIELD_POPULATION); | ||
for (int pos = population.size() - 3; pos > 0; pos -= 3) | ||
population.insert(size_t(pos), ","); | ||
cout << m.get_rank() + 1 << ": #" << setfill('0') << setw(3) << did | ||
<< " " << get_field(data, DOC_FIELD_NAME) << " " | ||
<< date << "\n Population " | ||
<< population << endl; | ||
// Log the document id. | ||
clog << ' ' << did; | ||
} | ||
clog << endl; | ||
} | ||
|
||
int main(int argc, char** argv) { | ||
if (argc < 3) { | ||
cerr << "Usage: " << argv[0] << " DBPATH QUERYTERM..." << endl; | ||
return 1; | ||
} | ||
const char * dbpath = argv[1]; | ||
|
||
// Join the rest of the arguments with spaces to make the query string. | ||
string querystring; | ||
for (argv += 2; *argv; ++argv) { | ||
if (!querystring.empty()) querystring += ' '; | ||
querystring += *argv; | ||
} | ||
|
||
search(dbpath, querystring); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
1: #007 State of California September 9, 1850 | ||
Population 37,253,956 | ||
2: #019 State of Texas December 29, 1845 | ||
Population 25,145,561 | ||
3: #027 State of Illinois December 3, 1818 | ||
Population 12,830,632 | ||
4: #030 State of Ohio March 1, 1803 | ||
Population 11,536,504 | ||
5: #035 State of Florida March 3, 1845 | ||
Population 18,801,310 | ||
6: #040 Commonwealth of Pennsylvania December 12, 1787 | ||
Population 12,702,379 | ||
7: #041 State of New York July 26, 1788 | ||
Population 19,378,102 | ||
'10000000..'[0:10] = 7 19 27 30 35 40 41 |
9 changes: 9 additions & 0 deletions
9
code/c++/search_ranges2.cc.statesdb_11=2f08=2f1889..07=2f10=2f1890.out
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
1: #001 State of Washington November 11, 1889 | ||
Population 6,744,496 | ||
2: #004 State of Montana November 8, 1889 | ||
Population 989,415 | ||
3: #005 Idaho July 3, 1890 | ||
Population 1,567,582 | ||
4: #010 State of Wyoming July 10, 1890 | ||
Population 563,626 | ||
'11/08/1889..07/10/1890'[0:10] = 1 4 5 10 |
5 changes: 5 additions & 0 deletions
5
code/c++/search_ranges2.cc.statesdb_1780..1789_10000000...out
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
1: #040 Commonwealth of Pennsylvania December 12, 1787 | ||
Population 12,702,379 | ||
2: #041 State of New York July 26, 1788 | ||
Population 19,378,102 | ||
'1780..1789 10000000..'[0:10] = 40 41 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
1: #001 State of Washington November 11, 1889 | ||
Population 6,744,496 | ||
2: #002 State of Arkansas June 15, 1836 | ||
Population 2,915,918 | ||
3: #003 State of Oregon February 14, 1859 | ||
Population 3,831,074 | ||
4: #004 State of Montana November 8, 1889 | ||
Population 989,415 | ||
5: #005 Idaho July 3, 1890 | ||
Population 1,567,582 | ||
6: #006 State of Nevada October 31, 1864 | ||
Population 2,700,551 | ||
7: #007 State of California September 9, 1850 | ||
Population 37,253,956 | ||
8: #009 State of Utah January 4, 1896 | ||
Population 2,763,885 | ||
9: #010 State of Wyoming July 10, 1890 | ||
Population 563,626 | ||
10: #011 State of Colorado August 1, 1876 | ||
Population 5,029,196 | ||
'1800..1899'[0:10] = 1 2 3 4 5 6 7 9 10 11 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
1: #004 State of Montana November 8, 1889 | ||
Population 989,415 | ||
2: #019 State of Texas December 29, 1845 | ||
Population 25,145,561 | ||
'spanish'[0:10] = 4 19 |