Permalink
Browse files

add benchmark-queries binary

  • Loading branch information...
wmorgan authored and William Morgan committed Mar 30, 2012
1 parent ffa5f9c commit bf7c922c4a60532d9f229d55387c3b90c365fed3
Showing with 99 additions and 1 deletion.
  1. +1 −0 .gitignore
  2. +8 −1 Makefile
  3. +73 −0 benchmark-queries.c
  4. +17 −0 integration-tests/query-corpus1.txt
View
@@ -26,3 +26,4 @@ integration-tests/enron1m.index*
ruby/pkg/
ruby/README
ruby/COPYING
+benchmark-queries
View
@@ -36,7 +36,7 @@ DUMPBIN=dump
ADDBIN=add
MBOXADDBIN=addmbox
TESTBIN = $(TESTFILES:.c=)
-ALLBIN=$(QUERYBIN) $(DUMPBIN) $(ADDBIN) $(MBOXADDBIN) batch-run-queries
+ALLBIN=$(QUERYBIN) $(DUMPBIN) $(ADDBIN) $(MBOXADDBIN) batch-run-queries benchmark-queries
all: $(ALLBIN)
@@ -50,6 +50,9 @@ loc: $(CSRCFILES) $(LEXFILES) $(YFILES) $(HEADERFILES)
batch-run-queries.o: batch-run-queries.c whistlepig.h defaults.h index.h \
segment.h stringmap.h stringpool.h error.h termhash.h query.h search.h \
mmap-obj.h entry.h khash.h query-parser.h timer.h
+benchmark-queries.o: benchmark-queries.c whistlepig.h defaults.h index.h \
+ segment.h stringmap.h stringpool.h error.h termhash.h query.h search.h \
+ mmap-obj.h entry.h khash.h query-parser.h timer.h
dump.o: dump.c whistlepig.h defaults.h index.h segment.h stringmap.h \
stringpool.h error.h termhash.h query.h search.h mmap-obj.h entry.h \
khash.h query-parser.h
@@ -123,6 +126,10 @@ test-tokenizer.o: test-tokenizer.c test.h tokenizer.lex.h segment.h \
tokenizer.lex.o: tokenizer.lex.c segment.h defaults.h stringmap.h \
stringpool.h error.h termhash.h query.h search.h mmap-obj.h
+benchmark-queries: benchmark-queries.o $(OBJ)
+ @$(ECHO) LINK $@
+ @$(CC) -o $@ $(CCOPT) $(DEBUG) $+
+
batch-run-queries: batch-run-queries.o $(OBJ)
@$(ECHO) LINK $@
@$(CC) -o $@ $(CCOPT) $(DEBUG) $+
View
@@ -0,0 +1,73 @@
+#include <stdio.h>
+#include "whistlepig.h"
+#include "timer.h"
+
+#define MAX_LINE_LENGTH 1024
+#define NUM_RESULTS_PER_QUERY 10
+
+int main(int argc, char* argv[]) {
+ wp_index* index;
+
+ if(argc != 3) {
+ fprintf(stderr, "Usage: %s <index basepath> <query corpus>\n", argv[0]);
+ return -1;
+ }
+
+ char buf[MAX_LINE_LENGTH];
+ wp_query** queries = malloc(sizeof(wp_query*));
+ int num_queries = 0;
+ int array_size = 1;
+
+ FILE* f = fopen(argv[2], "r");
+ while(!feof(f)) {
+ if(num_queries >= array_size) {
+ array_size = array_size * 2;
+ queries = realloc(queries, sizeof(wp_query*) * array_size);
+ }
+
+ char* ret = fgets(buf, MAX_LINE_LENGTH, f);
+ if(ret == NULL) break;
+ DIE_IF_ERROR(wp_query_parse(buf, "body", &queries[num_queries]));
+ num_queries++;
+ }
+
+ printf("read %d queries\n", num_queries);
+
+ uint64_t results[NUM_RESULTS_PER_QUERY];
+ uint32_t num_results_found;
+ uint32_t num_iters = 0;
+ uint64_t* per_query_times = calloc(num_queries, sizeof(uint64_t));
+ for(int i = 0; i < num_queries; i++) per_query_times[i] = 0;
+
+ DIE_IF_ERROR(wp_index_load(&index, argv[1]));
+
+ START_TIMER(total);
+ START_TIMER(chunk);
+ while(1) {
+ num_iters++;
+ for(int i = 0; i < num_queries; i++) {
+ START_TIMER(query);
+ DIE_IF_ERROR(wp_index_setup_query(index, queries[i]));
+ DIE_IF_ERROR(wp_index_run_query(index, queries[i], NUM_RESULTS_PER_QUERY, &num_results_found, results));
+ DIE_IF_ERROR(wp_index_teardown_query(index, queries[i]));
+ MARK_TIMER(query);
+ per_query_times[i] += TIMER_MS(query);
+ }
+
+ MARK_TIMER(total);
+ MARK_TIMER(chunk);
+ if(TIMER_MS(chunk) > 1000) {
+ for(int i = 0; i < num_queries; i++) {
+ wp_query_to_s(queries[i], MAX_LINE_LENGTH, buf);
+ printf("%10.1f qps: %s\n", (float)num_iters / (float)per_query_times[i] * 1000.0, buf);
+ }
+ printf("overall, ran %u queries in %.1fs = %.1f qps\n\n", num_queries * num_iters, (float)TIMER_MS(total) / 1000.0, (float)(num_queries * num_iters) / (float)TIMER_MS(total) * 1000.0);
+ RESET_TIMER(chunk);
+ }
+ }
+
+ DIE_IF_ERROR(wp_index_unload(index));
+
+ return 0;
+}
+
@@ -0,0 +1,17 @@
+"happy b-day"
+"happy b-day" chicago
+"happy b-day" -phone
+"happy b-day" "email me your phone"
+pate
+pate "college station"
+pate -tx
+"last day with"
+"last day with" enrq
+"last day with" "the risk team"
+"last day with" enrq OR "the risk team"
+"last day with" enrq OR "the risk team" OR trophy
+"last day with" enrq OR "the risk team" OR fdsafdasdfa
+asdlfj
+"lease violation"
+"lease violation" OR "lady gaga"
+"lease violation" -"complaining about"

0 comments on commit bf7c922

Please sign in to comment.