Permalink
Browse files

added n-way mergesort implementation in C

  • Loading branch information...
1 parent fcfc8e5 commit d0318b6c124ddc93a2d1924728aa2ea119193120 @valyala committed Jan 25, 2012
Showing with 374 additions and 177 deletions.
  1. +272 −12 galgorithm.h
  2. +22 −125 perftests.c
  3. +11 −11 perftests.cpp
  4. +68 −28 tests.c
  5. +1 −1 tests.cpp
View
@@ -36,7 +36,7 @@ static inline void galgorithm_partial_sort(const struct gheap_ctx *ctx,
/*
* Vtable for input iterators, which is passed to galgorithm_nway_merge().
*/
-struct galgorithm_nway_input_vtable
+struct galgorithm_nway_merge_input_vtable
{
/*
* Must advance the iterator to the next item.
@@ -57,9 +57,9 @@ struct galgorithm_nway_input_vtable
/*
* A collection of input iterators, which is passed to galgorithm_nway_merge().
*/
-struct galgorithm_nway_input
+struct galgorithm_nway_merge_input
{
- const struct galgorithm_nway_input_vtable *vtable;
+ const struct galgorithm_nway_merge_input_vtable *vtable;
/*
* An array of opaque contexts, which are passed to vtable functions.
@@ -84,7 +84,7 @@ struct galgorithm_nway_input
/*
* Vtable for output iterator, which is passed to galgorithm_nway_merge().
*/
-struct galgorithm_nway_output_vtable
+struct galgorithm_nway_merge_output_vtable
{
/*
* Must put data into the output and advance the iterator
@@ -96,9 +96,9 @@ struct galgorithm_nway_output_vtable
/*
* Output iterator, which is passed to galgorithm_nway_merge().
*/
-struct galgorithm_nway_output
+struct galgorithm_nway_merge_output
{
- const struct galgorithm_nway_output_vtable *vtable;
+ const struct galgorithm_nway_merge_output_vtable *vtable;
/*
* An opaque context, which is passed to vtable functions.
@@ -116,8 +116,34 @@ struct galgorithm_nway_output
* As a side effect the function shuffles input contextes.
*/
static inline void galgorithm_nway_merge(const struct gheap_ctx *ctx,
- const struct galgorithm_nway_input *input,
- const struct galgorithm_nway_output *output);
+ const struct galgorithm_nway_merge_input *input,
+ const struct galgorithm_nway_merge_output *output);
+
+/*
+ * Must sort the range [base[0] ... base[n-1]].
+ * ctx is small_range_sorter_ctx passed to galgorithm_nway_mergesort.
+ */
+typedef void (*galgorithm_nway_mergesort_small_range_sorter_t)(
+ const void *ctx, void *base, size_t n);
+
+/*
+ * Performs n-way mergesort for [base[0] ... base[range_size-1]] items.
+ *
+ * Uses small_range_sorter for sorting ranges containing no more
+ * than small_range_size items.
+ *
+ * Splits the input range into subranges with small_range_size size,
+ * sorts them using small_range_sorter and then merges them back
+ * using n-way merge with n = subranges_count.
+ *
+ * items_tmp_buf must point to an uninitialized memory, which can hold
+ * up to range_size items.
+ */
+static inline void galgorithm_nway_mergesort(const struct gheap_ctx *ctx,
+ void *base, size_t range_size,
+ galgorithm_nway_mergesort_small_range_sorter_t small_range_sorter,
+ const void *small_range_sorter_ctx,
+ size_t small_range_size, size_t subranges_count, void *items_tmp_buf);
/*******************************************************************************
@@ -198,7 +224,7 @@ struct _galgorithm_nway_merge_less_comparer_ctx
{
gheap_less_comparer_t less_comparer;
const void *less_comparer_ctx;
- const struct galgorithm_nway_input_vtable *vtable;
+ const struct galgorithm_nway_merge_input_vtable *vtable;
};
int _galgorithm_nway_merge_less_comparer(const void *const ctx,
@@ -207,14 +233,14 @@ int _galgorithm_nway_merge_less_comparer(const void *const ctx,
const struct _galgorithm_nway_merge_less_comparer_ctx *const c = ctx;
const gheap_less_comparer_t less_comparer = c->less_comparer;
const void *const less_comparer_ctx = c->less_comparer_ctx;
- const struct galgorithm_nway_input_vtable *const vtable = c->vtable;
+ const struct galgorithm_nway_merge_input_vtable *const vtable = c->vtable;
return less_comparer(less_comparer_ctx, vtable->get(b), vtable->get(a));
}
static inline void galgorithm_nway_merge(const struct gheap_ctx *const ctx,
- const struct galgorithm_nway_input *const input,
- const struct galgorithm_nway_output *const output)
+ const struct galgorithm_nway_merge_input *const input,
+ const struct galgorithm_nway_merge_output *const output)
{
void *const top_input = input->ctxs;
size_t inputs_count = input->ctxs_count;
@@ -251,4 +277,238 @@ static inline void galgorithm_nway_merge(const struct gheap_ctx *const ctx,
}
}
+static inline void _galgorithm_move_items(const struct gheap_ctx *const ctx,
+ void *const src, const size_t n, void *const dst)
+{
+ const gheap_item_mover_t item_mover = ctx->item_mover;
+
+ for (size_t i = 0; i < n; ++i) {
+ item_mover(
+ _galgorithm_get_item_ptr(ctx, dst, i),
+ _galgorithm_get_item_ptr(ctx, src, i));
+ }
+}
+
+static inline void _galgorithm_sort_subranges(const struct gheap_ctx *const ctx,
+ void *const base, const size_t range_size,
+ const galgorithm_nway_mergesort_small_range_sorter_t small_range_sorter,
+ const void *const small_range_sorter_ctx,
+ const size_t small_range_size)
+{
+ assert(small_range_size > 0);
+
+ const size_t last_full_range = range_size - range_size % small_range_size;
+ for (size_t i = 0; i != last_full_range; i += small_range_size) {
+ small_range_sorter(small_range_sorter_ctx,
+ _galgorithm_get_item_ptr(ctx, base, i), small_range_size);
+ }
+
+ /* Sort the last subrange, which contains less than small_range_size items. */
+ if (last_full_range < range_size) {
+ small_range_sorter(small_range_sorter_ctx,
+ _galgorithm_get_item_ptr(ctx, base, last_full_range),
+ range_size - last_full_range);
+ }
+}
+
+struct _galgorithm_nway_mergesort_input_ctx
+{
+ const struct gheap_ctx *ctx;
+ const void *next;
+ const void *last;
+};
+
+static inline int _galgorithm_nway_mergesort_input_next(void *ctx)
+{
+ struct _galgorithm_nway_mergesort_input_ctx *const c = ctx;
+
+ assert(c->next < c->last);
+ c->next = _galgorithm_get_item_ptr(c->ctx, c->next, 1);
+ assert(c->next <= c->last);
+ return (c->next < c->last);
+}
+
+static inline const void *_galgorithm_nway_mergesort_input_get(const void *ctx)
+{
+ const struct _galgorithm_nway_mergesort_input_ctx *const c = ctx;
+
+ assert(c->next < c->last);
+ return c->next;
+}
+
+static const struct galgorithm_nway_merge_input_vtable
+ _galgorithm_nway_mergesort_input_vtable = {
+ .next = &_galgorithm_nway_mergesort_input_next,
+ .get = &_galgorithm_nway_mergesort_input_get,
+};
+
+struct _galgorithm_nway_mergesort_output_ctx
+{
+ const struct gheap_ctx *ctx;
+ void *next;
+};
+
+static inline void _galgorithm_nway_mergesort_output_put(void *ctx,
+ const void *data)
+{
+ struct _galgorithm_nway_mergesort_output_ctx *const c = ctx;
+ const gheap_item_mover_t item_mover = c->ctx->item_mover;
+
+ item_mover(c->next, data);
+ c->next = _galgorithm_get_item_ptr(c->ctx, c->next, 1);
+}
+
+static const struct galgorithm_nway_merge_output_vtable
+ _galgorithm_nway_mergesort_output_vtable = {
+ .put = &_galgorithm_nway_mergesort_output_put,
+};
+
+static inline void _galgorithm_merge_subrange_tuples(
+ const struct gheap_ctx *const ctx, void *const base, const size_t range_size,
+ struct galgorithm_nway_merge_input *const input,
+ const struct galgorithm_nway_merge_output *const output,
+ const size_t subranges_count, const size_t subrange_size)
+{
+ assert(subranges_count > 1);
+ assert(subrange_size > 0);
+
+ struct _galgorithm_nway_mergesort_input_ctx *const input_ctxs = input->ctxs;
+ input->ctxs_count = subranges_count;
+
+ size_t i = 0;
+
+ /*
+ * Merge full subrange tuples. Each full subrange tuple contains
+ * subranges_count full subranges. Each full subrange contains
+ * subrange_size items.
+ */
+ if (subrange_size <= range_size / subranges_count) {
+ const size_t tuple_size = subrange_size * subranges_count;
+ const size_t last_full_tuple = range_size - range_size % tuple_size;
+
+ while (i != last_full_tuple) {
+ for (size_t j = 0; j < subranges_count; ++j) {
+ input_ctxs[j].next = _galgorithm_get_item_ptr(ctx, base, i);
+ i += subrange_size;
+ input_ctxs[j].last = _galgorithm_get_item_ptr(ctx, base, i);
+ }
+
+ galgorithm_nway_merge(ctx, input, output);
+ }
+ }
+
+ /*
+ * Merge tail subrange tuple. Tail subrange tuple contains less than
+ * subranges_count full subranges. It also may contain tail subrange
+ * with less than subrange_size items.
+ */
+ const size_t tail_tuple_size = range_size - i;
+ if (tail_tuple_size > 0) {
+ const size_t full_subranges_count = tail_tuple_size / subrange_size;
+ assert(full_subranges_count < subranges_count);
+ size_t tail_subranges_count = full_subranges_count;
+
+ for (size_t j = 0; j < full_subranges_count; ++j) {
+ input_ctxs[j].next = _galgorithm_get_item_ptr(ctx, base, i);
+ i += subrange_size;
+ input_ctxs[j].last = _galgorithm_get_item_ptr(ctx, base, i);
+ }
+
+ if (i < range_size) {
+ input_ctxs[full_subranges_count].next =
+ _galgorithm_get_item_ptr(ctx, base, i);
+ input_ctxs[full_subranges_count].last =
+ _galgorithm_get_item_ptr(ctx, base, range_size);
+ ++tail_subranges_count;
+ }
+
+ input->ctxs_count = tail_subranges_count;
+ galgorithm_nway_merge(ctx, input, output);
+ }
+}
+
+static inline void _galgorithm_nway_mergesort_input_ctx_mover(void *dst,
+ const void *src)
+{
+ *(struct _galgorithm_nway_mergesort_input_ctx *)dst =
+ *(struct _galgorithm_nway_mergesort_input_ctx *)src;
+}
+
+static inline void galgorithm_nway_mergesort(const struct gheap_ctx *const ctx,
+ void *const base, const size_t range_size,
+ const galgorithm_nway_mergesort_small_range_sorter_t small_range_sorter,
+ const void *const small_range_sorter_ctx,
+ const size_t small_range_size, const size_t subranges_count,
+ void *const items_tmp_buf)
+{
+ assert(small_range_size > 0);
+ assert(subranges_count > 1);
+
+ /* Preparation: Move items to a temporary buffer. */
+ _galgorithm_move_items(ctx, base, range_size, items_tmp_buf);
+
+ /*
+ * Step 1: split the range into subranges with small_range_size size each
+ * (except the last subrange, which may contain less than small_range_size
+ * items) and sort each of these subranges using small_range_sorter.
+ */
+ _galgorithm_sort_subranges(ctx, items_tmp_buf, range_size,
+ small_range_sorter, small_range_sorter_ctx, small_range_size);
+
+ /* Step 2: Merge subranges sorted at the previous step using n-way merge. */
+ struct _galgorithm_nway_mergesort_input_ctx input_ctxs[subranges_count];
+ for (size_t i = 0; i < subranges_count; ++i) {
+ input_ctxs[i].ctx = ctx;
+ }
+
+ struct galgorithm_nway_merge_input input = {
+ .vtable = &_galgorithm_nway_mergesort_input_vtable,
+ .ctxs = &input_ctxs,
+ .ctxs_count = subranges_count,
+ .ctx_size = sizeof(input_ctxs[0]),
+ .ctx_mover = &_galgorithm_nway_mergesort_input_ctx_mover,
+ };
+
+ struct _galgorithm_nway_mergesort_output_ctx output_ctx;
+ output_ctx.ctx = ctx;
+
+ const struct galgorithm_nway_merge_output output = {
+ .vtable = &_galgorithm_nway_mergesort_output_vtable,
+ .ctx = &output_ctx,
+ };
+
+ size_t subrange_size = small_range_size;
+ for (;;) {
+ /*
+ * First pass: merge items from the temporary buffer
+ * to the original location.
+ */
+ output_ctx.next = base;
+ _galgorithm_merge_subrange_tuples(ctx, items_tmp_buf, range_size,
+ &input, &output, subranges_count, subrange_size);
+
+ if (subrange_size > range_size / subranges_count) {
+ break;
+ }
+ subrange_size *= subranges_count;
+
+ /*
+ * Second pass: merge items from the original location
+ * to the temporary buffer.
+ */
+ output_ctx.next = items_tmp_buf;
+ _galgorithm_merge_subrange_tuples(ctx, base, range_size,
+ &input, &output, subranges_count, subrange_size);
+
+ if (subrange_size > range_size / subranges_count) {
+ /* Move items from the temporary buffer to the original location. */
+ _galgorithm_move_items(ctx, items_tmp_buf, range_size, base);
+ break;
+ }
+ subrange_size *= subranges_count;
+ }
+}
+
+
+
#endif
Oops, something went wrong.

0 comments on commit d0318b6

Please sign in to comment.