Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP

We’re showing branches in this repository, but you can also compare across forks.

...
  • 3 commits
  • 5 files changed
  • 0 commit comments
  • 1 contributor
2  Makefile
View
@@ -212,7 +212,7 @@ test-integration: batch-run-queries integration-tests/enron1m.index0.pr
ruby integration-tests/eval.rb integration-tests/testset1.txt
bench: benchmark-queries integration-tests/enron1m.index0.pr
- ./benchmark-queries integration-tests/enron1m.index integration-tests/testset1.txt
+ ./benchmark-queries integration-tests/enron1m.index integration-tests/benchset1.txt
debug:
+make DEBUGOUTPUT=-DDEBUGOUTPUT
0  integration-tests/query-corpus1.txt → integration-tests/benchset1.txt
View
File renamed without changes
31 search.c
View
@@ -14,6 +14,7 @@ typedef struct label_search_state {
typedef struct term_search_state {
posting posting;
uint32_t docid_delta;
+ uint32_t posting_offset;
uint32_t next_posting_offset;
uint32_t block_offset;
int started;
@@ -262,11 +263,12 @@ RAISING_STATIC(term_init_search_state(wp_query* q, wp_segment* seg)) {
state->block_offset = offset;
postings_block* block = wp_postings_block_at(pr, offset);
state->docid_delta = block->max_docid;
+ state->posting_offset = block->postings_head;
DEBUG("docid_delta is %u", state->docid_delta);
// blocks are guaranteed to have one posting, so we can go ahead and read
// one without worrying about being done with the block
- RELAY_ERROR(wp_text_postings_region_read_posting_from_block(pr, block, block->postings_head, &state->next_posting_offset, &state->posting, 1));
+ RELAY_ERROR(wp_text_postings_region_read_posting_from_block(pr, block, state->posting_offset, &state->next_posting_offset, &state->posting, 1));
}
RELAY_ERROR(init_children(q, seg));
@@ -274,9 +276,16 @@ RAISING_STATIC(term_init_search_state(wp_query* q, wp_segment* seg)) {
return NO_ERROR;
}
+#define FREE_AND_NULL(v) do { \
+ if(v) { \
+ free(v); \
+ v = NULL; \
+ } \
+} while(0)
+
RAISING_STATIC(term_release_search_state(wp_query* q)) {
term_search_state* state = q->search_data;
- if(!state->done) free(state->posting.positions);
+ if(!state->done) FREE_AND_NULL(state->posting.positions);
free(state);
RELAY_ERROR(release_children(q));
return NO_ERROR;
@@ -448,6 +457,7 @@ RAISING_STATIC(term_next_doc(wp_query* q, wp_segment* seg, search_result* result
state->docid_delta = block->max_docid;
}
+ state->posting_offset = state->next_posting_offset;
RELAY_ERROR(wp_text_postings_region_read_posting_from_block(pr, block, state->next_posting_offset, &state->next_posting_offset, &state->posting, 1));
if(state->docid_delta <= state->posting.doc_id) RAISE_ERROR("have docid %u but posting %u", state->docid_delta, state->posting.doc_id);
state->docid_delta -= state->posting.doc_id;
@@ -487,7 +497,7 @@ RAISING_STATIC(term_advance_to_doc(wp_query* q, wp_segment* seg, docid_t doc_id,
state->block_offset = block->prev_block_offset;
if(state->block_offset != OFFSET_NONE) {
block = wp_postings_block_at(pr, state->block_offset);
- state->next_posting_offset = block->postings_head;
+ state->posting_offset = block->postings_head;
state->docid_delta = block->max_docid;
moved = 1;
}
@@ -506,12 +516,11 @@ RAISING_STATIC(term_advance_to_doc(wp_query* q, wp_segment* seg, docid_t doc_id,
postings_block* block = wp_postings_block_at(pr, state->block_offset);
if(moved) { // need to read in the first posting
- free(state->posting.positions);
- RELAY_ERROR(wp_text_postings_region_read_posting_from_block(pr, block, state->next_posting_offset, &state->next_posting_offset, &state->posting, 1));
+ FREE_AND_NULL(state->posting.positions);
+ RELAY_ERROR(wp_text_postings_region_read_posting_from_block(pr, block, state->posting_offset, &state->next_posting_offset, &state->posting, 0));
}
if(doc_id > block->max_docid) { // it's not in here!
- // TODO see if we can actually do this. this might break the advance() contract
DEBUG("docid %u is not in this block (min %u max %u). short-circuiting!", doc_id, block->min_docid, block->max_docid);
*found = 0;
}
@@ -519,8 +528,9 @@ RAISING_STATIC(term_advance_to_doc(wp_query* q, wp_segment* seg, docid_t doc_id,
DEBUG("starting or resuming advance to doc %u at position %u of %u in block of docids (%u, %u)", doc_id, state->next_posting_offset, block->size, block->min_docid, block->max_docid);
while((doc_id < state->docid_delta) && (state->next_posting_offset < block->size)) {
DEBUG("during advance, found docid %u", state->docid_delta);
- free(state->posting.positions);
- RELAY_ERROR(wp_text_postings_region_read_posting_from_block(pr, block, state->next_posting_offset, &state->next_posting_offset, &state->posting, 1));
+ FREE_AND_NULL(state->posting.positions);
+ state->posting_offset = state->next_posting_offset;
+ RELAY_ERROR(wp_text_postings_region_read_posting_from_block(pr, block, state->posting_offset, &state->next_posting_offset, &state->posting, 0));
if(state->docid_delta <= state->posting.doc_id) {
char buf[1024];
wp_query_to_s(q, 1024, buf);
@@ -539,7 +549,10 @@ RAISING_STATIC(term_advance_to_doc(wp_query* q, wp_segment* seg, docid_t doc_id,
}
DEBUG("[%s:'%s'] posting advanced to that of doc %u", q->field, q->word, state->docid_delta);
- if(*found) RELAY_ERROR(search_result_init(result, q->field, q->word, state->docid_delta, state->posting.num_positions, state->posting.positions));
+ if(*found) { // reread, with positions
+ RELAY_ERROR(wp_text_postings_region_read_posting_from_block(pr, block, state->posting_offset, &state->next_posting_offset, &state->posting, 1));
+ RELAY_ERROR(search_result_init(result, q->field, q->word, state->docid_delta, state->posting.num_positions, state->posting.positions));
+ }
return NO_ERROR;
}
2  test-segment.c
View
@@ -118,9 +118,7 @@ TEST(simple_conjunctive_queries) {
query = wp_query_add(query, wp_query_new_term("body", "one"));
query = wp_query_add(query, wp_query_new_term("body", "two"));
- printf("<<<\n");
RUN_QUERY(query);
- printf(">>>\n");
ASSERT_EQUALS_UINT(1, num_results);
ASSERT_EQUALS_UINT(1, results[0].doc_id);
2  text.c
View
@@ -194,7 +194,7 @@ RAISING_STATIC(add_posting_to_block(postings_block* block, posting* po)) {
}
#define MIN_BLOCK_SIZE 32
-#define SOFT_MAX_BLOCK_SIZE 128 // this seems to wrok the best -- but tweak me!
+#define SOFT_MAX_BLOCK_SIZE 512 // this seems to work the best -- but tweak me!
#define HARD_MAX_BLOCK_SIZE ((64 * 1024) - sizeof(postings_block)) // can never be exceeded
RAISING_STATIC(build_new_block(postings_region* pr, uint32_t min_size, uint32_t old_offset, uint32_t* new_offset)) {
DEBUG("going to make a new block to hold %u bytes", min_size);

No commit comments for this range

Something went wrong with that request. Please try again.