Skip to content

Commit

Permalink
Add top n collection and refactor raw query logic
Browse files Browse the repository at this point in the history
  • Loading branch information
xichen2020 committed Jan 29, 2019
1 parent 3978d57 commit 2614f8b
Show file tree
Hide file tree
Showing 22 changed files with 1,188 additions and 401 deletions.
1 change: 1 addition & 0 deletions .excludemetalint
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ values/template/
integration/
_mock.go
_gen.go
.gen.go
8 changes: 4 additions & 4 deletions document/field/field_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import (
)

func TestNewValuesLessThan(t *testing.T) {
v1 := []ValueUnion{
v1 := Values{
{
Type: StringType,
StringVal: "foo",
Expand All @@ -22,7 +22,7 @@ func TestNewValuesLessThan(t *testing.T) {
},
}

v2 := []ValueUnion{
v2 := Values{
{
Type: StringType,
StringVal: "foo",
Expand All @@ -47,7 +47,7 @@ func TestNewValuesLessThan(t *testing.T) {
}

func TestFilterValues(t *testing.T) {
v := []ValueUnion{
v := Values{
{
Type: StringType,
StringVal: "foo",
Expand All @@ -71,5 +71,5 @@ func TestFilterValues(t *testing.T) {
}
toExcludeIndices := []int{1, 4}
filteredValues := FilterValues(v, toExcludeIndices)
require.Equal(t, filteredValues, []ValueUnion{v[0], v[2], v[3]})
require.Equal(t, filteredValues, Values{v[0], v[2], v[3]})
}
8 changes: 4 additions & 4 deletions document/field/value.go
Original file line number Diff line number Diff line change
Expand Up @@ -299,15 +299,15 @@ func MustReverseCompareValue(v1, v2 ValueUnion) int {
}

// ValuesLessThanFn compares two value unions and returns true if `v1` is less than `v2`.
type ValuesLessThanFn func(v1, v2 []ValueUnion) bool
type ValuesLessThanFn func(v1, v2 Values) bool

// NewValuesLessThanFn creates a less than fn from a set of field value comparison functions.
// The logic is such that the function returned perform a prioritized ordering of results,
// where values at smaller indices of the array have higher priority and values at higher
// indices are only consulted if those at smaller indices are equal.
// Precondition: len(v1) == len(compareFns) && len(v2) == len(compareFns).
func NewValuesLessThanFn(compareFns []ValueCompareFn) ValuesLessThanFn {
return func(v1, v2 []ValueUnion) bool {
return func(v1, v2 Values) bool {
for idx, fn := range compareFns {
res := fn(v1[idx], v2[idx])
if res < 0 {
Expand All @@ -325,7 +325,7 @@ func NewValuesLessThanFn(compareFns []ValueCompareFn) ValuesLessThanFn {
// Precondition: Elements in `toExcludeIndices` are unique, monotonically increasing,
// and within range [0, len(values)).
// Postcondition: `values` is unmodified.
func FilterValues(values []ValueUnion, toExcludeIndices []int) []ValueUnion {
func FilterValues(values Values, toExcludeIndices []int) Values {
if len(values) == 0 || len(toExcludeIndices) == 0 {
return values
}
Expand All @@ -335,7 +335,7 @@ func FilterValues(values []ValueUnion, toExcludeIndices []int) []ValueUnion {
var (
valueIdx = 0
toExcludeIdx = 0
res = make([]ValueUnion, 0, len(values)-len(toExcludeIndices))
res = make(Values, 0, len(values)-len(toExcludeIndices))
)

for valueIdx < len(values) && toExcludeIdx < len(toExcludeIndices) {
Expand Down
15 changes: 8 additions & 7 deletions generated/generics/generate.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,12 +102,13 @@
//go:generate sh -c "cat $GOPATH/src/$PACKAGE/vendor/github.com/m3db/m3x/generics/hashmap/map.go | awk '/^package/{i++}i' | genny -pkg=query -imp \"github.com/xichen2020/eventdb/document/field\" -imp \"github.com/xichen2020/eventdb/calculation\" gen \"KeyType=Values:field.Values ValueType=ResultArray:calculation.ResultArray Map=ValuesResultArrayHash mapKey=valuesResultArrayHashKey mapOptions=valuesResultArrayHashOptions mapAlloc=valuesResultArrayHashAlloc\" | perl -p -e 's/ValuesResultArrayHashHash/ValuesResultArrayHashHash/gi' | perl -p -e 's/ValuesResultArrayHashEntry/ValuesResultArrayHashEntry/gi' > $GOPATH/src/$PACKAGE/query/values_result_array_map.gen.go"

// Heap related template instantiations.
//go:generate sh -c "cat $GOPATH/src/$PACKAGE/x/heap/generic.go | awk '/^package/{i++}i' | genny -out=$GOPATH/src/$PACKAGE/query/raw_result_heap.gen.go -pkg=query gen \"GenericValue=RawResult ValueHeap=RawResultHeap NewHeap=NewRawResultHeap\""
//go:generate sh -c "cat $GOPATH/src/$PACKAGE/x/heap/generic.go | awk '/^package/{i++}i' | genny -out=$GOPATH/src/$PACKAGE/query/multi_key_result_group_heap.gen.go -pkg=query gen \"GenericValue=multiKeyResultGroup ValueHeap=multiKeyResultGroupHeap NewHeap=newMultiKeyResultGroupHeap\""
//go:generate sh -c "cat $GOPATH/src/$PACKAGE/x/heap/generic.go | awk '/^package/{i++}i' | genny -out=$GOPATH/src/$PACKAGE/query/bool_result_group_heap.gen.go -pkg=query gen \"GenericValue=boolResultGroup ValueHeap=boolResultGroupHeap NewHeap=newBoolResultGroupHeap\""
//go:generate sh -c "cat $GOPATH/src/$PACKAGE/x/heap/generic.go | awk '/^package/{i++}i' | genny -out=$GOPATH/src/$PACKAGE/query/int_result_group_heap.gen.go -pkg=query gen \"GenericValue=intResultGroup ValueHeap=intResultGroupHeap NewHeap=newIntResultGroupHeap\""
//go:generate sh -c "cat $GOPATH/src/$PACKAGE/x/heap/generic.go | awk '/^package/{i++}i' | genny -out=$GOPATH/src/$PACKAGE/query/double_result_group_heap.gen.go -pkg=query gen \"GenericValue=doubleResultGroup ValueHeap=doubleResultGroupHeap NewHeap=newDoubleResultGroupHeap\""
//go:generate sh -c "cat $GOPATH/src/$PACKAGE/x/heap/generic.go | awk '/^package/{i++}i' | genny -out=$GOPATH/src/$PACKAGE/query/string_result_group_heap.gen.go -pkg=query gen \"GenericValue=stringResultGroup ValueHeap=stringResultGroupHeap NewHeap=newStringResultGroupHeap\""
//go:generate sh -c "cat $GOPATH/src/$PACKAGE/x/heap/generic.go | awk '/^package/{i++}i' | genny -out=$GOPATH/src/$PACKAGE/query/time_result_group_heap.gen.go -pkg=query gen \"GenericValue=timeResultGroup ValueHeap=timeResultGroupHeap NewHeap=newTimeResultGroupHeap\""
//go:generate sh -c "cat $GOPATH/src/$PACKAGE/x/heap/generic.go | awk '/^package/{i++}i' | genny -out=$GOPATH/src/$PACKAGE/query/raw_result_heap.gen.go -pkg=query gen \"GenericValue=RawResult ValueHeap=RawResultHeap NewHeap=NewRawResultHeap TopNValues=TopNRawResults NewTopValues=NewTopNRawResults ValueAddOptions=RawResultAddOptions\""
//go:generate sh -c "cat $GOPATH/src/$PACKAGE/x/heap/generic.go | awk '/^package/{i++}i' | genny -out=$GOPATH/src/$PACKAGE/query/multi_key_result_group_heap.gen.go -pkg=query gen \"GenericValue=multiKeyResultGroup ValueHeap=multiKeyResultGroupHeap NewHeap=newMultiKeyResultGroupHeap TopNValues=topNMultiKeyResultGroup NewTopValues=newTopNMultiKeyResultGroup ValueAddOptions=multiKeyResultGroupAddOptions\""
//go:generate sh -c "cat $GOPATH/src/$PACKAGE/x/heap/generic.go | awk '/^package/{i++}i' | genny -out=$GOPATH/src/$PACKAGE/query/bool_result_group_heap.gen.go -pkg=query gen \"GenericValue=boolResultGroup ValueHeap=boolResultGroupHeap NewHeap=newBoolResultGroupHeap TopNValues=topNBools NewTopValues=newTopNBools ValueAddOptions=boolAddOptions\""
//go:generate sh -c "cat $GOPATH/src/$PACKAGE/x/heap/generic.go | awk '/^package/{i++}i' | genny -out=$GOPATH/src/$PACKAGE/query/int_result_group_heap.gen.go -pkg=query gen \"GenericValue=intResultGroup ValueHeap=intResultGroupHeap NewHeap=newIntResultGroupHeap TopNValues=topNInts NewTopValues=newTopNInts ValueAddOptions=intAddOptions\""
//go:generate sh -c "cat $GOPATH/src/$PACKAGE/x/heap/generic.go | awk '/^package/{i++}i' | genny -out=$GOPATH/src/$PACKAGE/query/double_result_group_heap.gen.go -pkg=query gen \"GenericValue=doubleResultGroup ValueHeap=doubleResultGroupHeap NewHeap=newDoubleResultGroupHeap TopNValues=topNDoubles NewTopValues=newTopNDoubles ValueAddOptions=doubleAddOptions\""
//go:generate sh -c "cat $GOPATH/src/$PACKAGE/x/heap/generic.go | awk '/^package/{i++}i' | genny -out=$GOPATH/src/$PACKAGE/query/string_result_group_heap.gen.go -pkg=query gen \"GenericValue=stringResultGroup ValueHeap=stringResultGroupHeap NewHeap=newStringResultGroupHeap TopNValues=topNStrings NewTopValues=newTopNStrings ValueAddOptions=stringAddOptions\""
//go:generate sh -c "cat $GOPATH/src/$PACKAGE/x/heap/generic.go | awk '/^package/{i++}i' | genny -out=$GOPATH/src/$PACKAGE/query/time_result_group_heap.gen.go -pkg=query gen \"GenericValue=timeResultGroup ValueHeap=timeResultGroupHeap NewHeap=newTimeResultGroupHeap TopNValues=topNTimes NewTopValues=newTopNTimes ValueAddOptions=timeAddOptions\""
//go:generate sh -c "cat $GOPATH/src/$PACKAGE/x/heap/generic.go | awk '/^package/{i++}i' | genny -out=$GOPATH/src/$PACKAGE/storage/doc_id_values_heap.gen.go -pkg=storage gen \"GenericValue=docIDValues ValueHeap=docIDValuesHeap NewHeap=newDocIDValuesHeap TopNValues=topNDocIDValues NewTopValues=newTopNDocIDValues ValueAddOptions=docIDValuesAddOptions\""

package generics
75 changes: 75 additions & 0 deletions query/bool_result_group_heap.gen.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,3 +109,78 @@ func (h boolResultGroupHeap) heapify(i, n int) {
i = smallest
}
}

// topNBools keeps track of the top n values in a value sequence for the
// order defined by the `lessThanFn`. In particular if `lessThanFn` defines
// an increasing order (returning true if `v1` < `v2`), the collection stores
// the top N largest values, and vice versa.
type topNBools struct {
n int
lessThanFn func(v1, v2 boolResultGroup) bool
h *boolResultGroupHeap
}

// newTopNBools creates a new top n value collection.
func newTopNBools(
n int,
lessThanFn func(v1, v2 boolResultGroup) bool,
) *topNBools {
return &topNBools{
n: n,
lessThanFn: lessThanFn,
h: newBoolResultGroupHeap(n, lessThanFn),
}
}

// boolAddOptions provide the options for adding a value.
type boolAddOptions struct {
CopyOnAdd bool
CopyFn func(v boolResultGroup) boolResultGroup
CopyToFn func(src boolResultGroup, target *boolResultGroup)
}

// Len returns the number of items in the collection.
func (v topNBools) Len() int { return v.h.Len() }

// Cap returns the collection capacity.
func (v topNBools) Cap() int { return v.h.Cap() }

// RawData returns the underlying array backing the heap in no particular order.
func (v topNBools) RawData() []boolResultGroup { return v.h.RawData() }

// Min returns the "smallest" value according to the `lessThan` function.
func (v topNBools) Min() boolResultGroup { return v.h.Min() }

// Reset resets the internal array backing the heap.
func (v *topNBools) Reset() { v.h.Reset() }

// Add adds a value to the collection.
func (v *topNBools) Add(val boolResultGroup, opts boolAddOptions) {
if v.h.Len() < v.n {
if opts.CopyOnAdd {
val = opts.CopyFn(val)
}
v.h.Push(val)
return
}
if min := v.h.Min(); !v.lessThanFn(min, val) {
return
}
popped := v.h.Pop()
if !opts.CopyOnAdd {
v.h.Push(val)
return
}
// Reuse popped item from the heap.
opts.CopyToFn(val, &popped)
v.h.Push(popped)
}

// SortInPlace sorts the backing heap in place and returns the sorted data.
// NB: The value collection becomes invalid after this is called.
func (v *topNBools) SortInPlace() []boolResultGroup {
res := v.h.SortInPlace()
v.h = nil
v.lessThanFn = nil
return res
}
75 changes: 75 additions & 0 deletions query/double_result_group_heap.gen.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,3 +109,78 @@ func (h doubleResultGroupHeap) heapify(i, n int) {
i = smallest
}
}

// topNDoubles keeps track of the top n values in a value sequence for the
// order defined by the `lessThanFn`. In particular if `lessThanFn` defines
// an increasing order (returning true if `v1` < `v2`), the collection stores
// the top N largest values, and vice versa.
type topNDoubles struct {
n int
lessThanFn func(v1, v2 doubleResultGroup) bool
h *doubleResultGroupHeap
}

// newTopNDoubles creates a new top n value collection.
func newTopNDoubles(
n int,
lessThanFn func(v1, v2 doubleResultGroup) bool,
) *topNDoubles {
return &topNDoubles{
n: n,
lessThanFn: lessThanFn,
h: newDoubleResultGroupHeap(n, lessThanFn),
}
}

// doubleAddOptions provide the options for adding a value.
type doubleAddOptions struct {
CopyOnAdd bool
CopyFn func(v doubleResultGroup) doubleResultGroup
CopyToFn func(src doubleResultGroup, target *doubleResultGroup)
}

// Len returns the number of items in the collection.
func (v topNDoubles) Len() int { return v.h.Len() }

// Cap returns the collection capacity.
func (v topNDoubles) Cap() int { return v.h.Cap() }

// RawData returns the underlying array backing the heap in no particular order.
func (v topNDoubles) RawData() []doubleResultGroup { return v.h.RawData() }

// Min returns the "smallest" value according to the `lessThan` function.
func (v topNDoubles) Min() doubleResultGroup { return v.h.Min() }

// Reset resets the internal array backing the heap.
func (v *topNDoubles) Reset() { v.h.Reset() }

// Add adds a value to the collection.
func (v *topNDoubles) Add(val doubleResultGroup, opts doubleAddOptions) {
if v.h.Len() < v.n {
if opts.CopyOnAdd {
val = opts.CopyFn(val)
}
v.h.Push(val)
return
}
if min := v.h.Min(); !v.lessThanFn(min, val) {
return
}
popped := v.h.Pop()
if !opts.CopyOnAdd {
v.h.Push(val)
return
}
// Reuse popped item from the heap.
opts.CopyToFn(val, &popped)
v.h.Push(popped)
}

// SortInPlace sorts the backing heap in place and returns the sorted data.
// NB: The value collection becomes invalid after this is called.
func (v *topNDoubles) SortInPlace() []doubleResultGroup {
res := v.h.SortInPlace()
v.h = nil
v.lessThanFn = nil
return res
}
16 changes: 12 additions & 4 deletions query/grouped_result.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ const (
)

// GroupedResults is a collection of result groups.
// TODO(wjang): Add JSON marshaling / unmarshaling.
// TODO(xichen): Add JSON marshaling / unmarshaling.
type GroupedResults struct {
// GroupBy contains a list of field paths to group results by.
GroupBy [][]string
Expand Down Expand Up @@ -75,6 +75,9 @@ func (r *GroupedResults) Len() int {
return r.MultiKeyGroups.Len()
}

// IsEmpty returns true if the result collection is empty.
func (r *GroupedResults) IsEmpty() bool { return r.Len() == 0 }

// IsOrdered returns true if the grouped results are kept in order.
func (r *GroupedResults) IsOrdered() bool { return len(r.OrderBy) > 0 }

Expand Down Expand Up @@ -114,13 +117,13 @@ func (r *GroupedResults) NumGroupsLimit() int {

// MinOrderByValues returns the orderBy field values for the smallest result in
// the result collection.
func (r *GroupedResults) MinOrderByValues() []field.ValueUnion {
func (r *GroupedResults) MinOrderByValues() field.Values {
panic("not implemented")
}

// MaxOrderByValues returns the orderBy field values for the largest result in
// the result collection.
func (r *GroupedResults) MaxOrderByValues() []field.ValueUnion {
func (r *GroupedResults) MaxOrderByValues() field.Values {
panic("not implemented")
}

Expand All @@ -146,7 +149,12 @@ func (r *GroupedResults) Clear() {
// Precondition: The current grouped results and the other grouped results are generated from
// the same query.
func (r *GroupedResults) MergeInPlace(other *GroupedResults) error {
if other == nil {
if other == nil || other.IsEmpty() {
return nil
}
if r.IsEmpty() {
*r = *other
other.Clear()
return nil
}
// NB: This also compares the number of group by fields.
Expand Down
75 changes: 75 additions & 0 deletions query/int_result_group_heap.gen.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,3 +109,78 @@ func (h intResultGroupHeap) heapify(i, n int) {
i = smallest
}
}

// topNInts keeps track of the top n values in a value sequence for the
// order defined by the `lessThanFn`. In particular if `lessThanFn` defines
// an increasing order (returning true if `v1` < `v2`), the collection stores
// the top N largest values, and vice versa.
type topNInts struct {
n int
lessThanFn func(v1, v2 intResultGroup) bool
h *intResultGroupHeap
}

// newTopNInts creates a new top n value collection.
func newTopNInts(
n int,
lessThanFn func(v1, v2 intResultGroup) bool,
) *topNInts {
return &topNInts{
n: n,
lessThanFn: lessThanFn,
h: newIntResultGroupHeap(n, lessThanFn),
}
}

// intAddOptions provide the options for adding a value.
type intAddOptions struct {
CopyOnAdd bool
CopyFn func(v intResultGroup) intResultGroup
CopyToFn func(src intResultGroup, target *intResultGroup)
}

// Len returns the number of items in the collection.
func (v topNInts) Len() int { return v.h.Len() }

// Cap returns the collection capacity.
func (v topNInts) Cap() int { return v.h.Cap() }

// RawData returns the underlying array backing the heap in no particular order.
func (v topNInts) RawData() []intResultGroup { return v.h.RawData() }

// Min returns the "smallest" value according to the `lessThan` function.
func (v topNInts) Min() intResultGroup { return v.h.Min() }

// Reset resets the internal array backing the heap.
func (v *topNInts) Reset() { v.h.Reset() }

// Add adds a value to the collection.
func (v *topNInts) Add(val intResultGroup, opts intAddOptions) {
if v.h.Len() < v.n {
if opts.CopyOnAdd {
val = opts.CopyFn(val)
}
v.h.Push(val)
return
}
if min := v.h.Min(); !v.lessThanFn(min, val) {
return
}
popped := v.h.Pop()
if !opts.CopyOnAdd {
v.h.Push(val)
return
}
// Reuse popped item from the heap.
opts.CopyToFn(val, &popped)
v.h.Push(popped)
}

// SortInPlace sorts the backing heap in place and returns the sorted data.
// NB: The value collection becomes invalid after this is called.
func (v *topNInts) SortInPlace() []intResultGroup {
res := v.h.SortInPlace()
v.h = nil
v.lessThanFn = nil
return res
}
Loading

0 comments on commit 2614f8b

Please sign in to comment.