Browse files

use postings header counts for single-term count operations

This significantly speeds up the case where you need a count for
a single term or single label.
  • Loading branch information...
1 parent 050204c commit a525f0b8099ec2a26fff807a31a50390a51dba11 @wmorgan committed Jun 9, 2012
Showing with 73 additions and 12 deletions.
  1. +53 −12 index.c
  2. +17 −0 segment.c
  3. +3 −0 segment.h
View
65 index.c
@@ -149,6 +149,57 @@ wp_error* wp_index_setup_query(wp_index* index, wp_query* query) {
return NO_ERROR;
}
+#define RESULT_BUF_SIZE 1024
+// count the results by running the query until it stops. slow!
+RAISING_STATIC(count_query_by_running_it(wp_index* index, wp_query* query, uint32_t* num_results)) {
+ uint64_t results[RESULT_BUF_SIZE];
+
+ *num_results = 0;
+ while(1) {
+ uint32_t this_num_results;
+ RELAY_ERROR(wp_index_run_query(index, query, RESULT_BUF_SIZE, &this_num_results, results));
+ *num_results += this_num_results;
+ if(this_num_results < RESULT_BUF_SIZE) break; // done
+ }
+
+ return NO_ERROR;
+}
+
+RAISING_STATIC(count_query_from_posting_list_header(wp_index* index, wp_query* query, uint32_t* num_results)) {
+ // make sure we have know about all segments (one could've been added by a writer)
+ RELAY_ERROR(grab_readlock(index));
+ RELAY_ERROR(ensure_all_segments(index));
+ RELAY_ERROR(release_lock(index));
+
+ *num_results = 0;
+ for(int i = 0; i < index->num_segments; i++) {
+ uint32_t this_num_results;
+
+ DEBUG("counting on segment %d", i);
+ wp_segment* seg = &index->segments[i];
+ RELAY_ERROR(wp_segment_grab_readlock(seg));
+ RELAY_ERROR(wp_segment_reload(seg));
+ RELAY_ERROR(wp_segment_count_term(seg, query->field, query->word, &this_num_results));
+ RELAY_ERROR(wp_segment_release_lock(seg));
+ *num_results += this_num_results;
+ DEBUG("got %d results from segment %d", this_num_results, i);
+ }
+
+ return NO_ERROR;
+}
+
+RAISING_STATIC(count_query(wp_index* index, wp_query* query, uint32_t* num_results)) {
+ switch(query->type) {
+ case WP_QUERY_TERM:
+ case WP_QUERY_LABEL:
+ RELAY_ERROR(count_query_from_posting_list_header(index, query, num_results));
+ break;
+ case WP_QUERY_EVERY: // TODO -- special case this
+ default:
+ RELAY_ERROR(count_query_by_running_it(index, query, num_results));
+ }
+ return NO_ERROR;
+}
// can be called multiple times to resume
wp_error* wp_index_run_query(wp_index* index, wp_query* query, uint32_t max_num_results, uint32_t* num_results, uint64_t* results) {
*num_results = 0;
@@ -209,20 +260,10 @@ wp_error* wp_index_run_query(wp_index* index, wp_query* query, uint32_t max_num_
return NO_ERROR;
}
-#define RESULT_BUF_SIZE 1024
-// count the results by just running the query until it stops. slow!
+// just count the results, don't return them
wp_error* wp_index_count_results(wp_index* index, wp_query* query, uint32_t* num_results) {
- uint64_t results[RESULT_BUF_SIZE];
-
- *num_results = 0;
RELAY_ERROR(wp_index_setup_query(index, query));
- while(1) {
- uint32_t this_num_results;
- RELAY_ERROR(wp_index_run_query(index, query, RESULT_BUF_SIZE, &this_num_results, results));
- *num_results += this_num_results;
- if(this_num_results < RESULT_BUF_SIZE) break; // done
- }
-
+ RELAY_ERROR(count_query(index, query, num_results));
RELAY_ERROR(wp_index_teardown_query(index, query));
return NO_ERROR;
View
17 segment.c
@@ -31,6 +31,23 @@ wp_error* wp_segment_release_lock(wp_segment* seg) {
return NO_ERROR;
}
+wp_error* wp_segment_count_term(wp_segment* seg, const char* field, const char* word, uint32_t* num_results) {
+ stringmap* sh = MMAP_OBJ(seg->stringmap, stringmap);
+ stringpool* sp = MMAP_OBJ(seg->stringpool, stringpool);
+ termhash* th = MMAP_OBJ(seg->termhash, termhash);
+
+ term t;
+ if(field == NULL) t.field_s = 0; // label sentinel
+ else t.field_s = stringmap_string_to_int(sh, sp, field);
+ t.word_s = stringmap_string_to_int(sh, sp, word);
+
+ posting_list_header* plh = termhash_get_val(th, t);
+ if(plh == NULL) *num_results = 0;
+ else *num_results = plh->count;
+
+ return NO_ERROR;
+}
+
static void postings_region_init(postings_region* pr, uint32_t initial_size, uint32_t postings_type_and_flags) {
pr->postings_type_and_flags = postings_type_and_flags;
pr->num_postings = 0;
View
3 segment.h
@@ -150,4 +150,7 @@ wp_error* wp_segment_ensure_fit(wp_segment* seg, uint32_t postings_bytes, uint32
// private: return the size on disk of a position array
wp_error* wp_segment_sizeof_posarray(wp_segment* seg, uint32_t num_positions, pos_t* positions, uint32_t* size) RAISES_ERROR;
+// private: count the number of occurences of a particular term
+wp_error* wp_segment_count_term(wp_segment* seg, const char* field, const char* term, uint32_t* num_results);
+
#endif

0 comments on commit a525f0b

Please sign in to comment.