-
Notifications
You must be signed in to change notification settings - Fork 0
/
top_values_stat.go
102 lines (93 loc) · 2.49 KB
/
top_values_stat.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
// SPDX-License-Identifier: Apache-2.0
// Copyright © 2022 Wrangle Ltd
package diffprof
import (
"math"
"github.com/wrgl/wrgl/pkg/objects"
)
type ValueCountDiff struct {
Value string `json:"value"`
OldCount uint32 `json:"oldCount"`
NewCount uint32 `json:"newCount"`
OldPct byte `json:"oldPct"`
NewPct byte `json:"newPct"`
}
func compareValueCounts(newRowsCount, oldRowsCount uint32, newVC, oldVC objects.ValueCounts) []ValueCountDiff {
result := []ValueCountDiff{}
newM := map[string]uint32{}
for _, vc := range newVC {
newM[vc.Value] = vc.Count
}
oldM := map[string]uint32{}
for _, vc := range oldVC {
oldM[vc.Value] = vc.Count
vcd := ValueCountDiff{
Value: vc.Value,
OldCount: vc.Count,
OldPct: byte(math.Round(float64(vc.Count) / float64(oldRowsCount) * 100)),
}
if c, ok := newM[vc.Value]; ok {
vcd.NewCount = c
vcd.NewPct = byte(math.Round(float64(c) / float64(newRowsCount) * 100))
}
result = append(result, vcd)
}
for _, vc := range newVC {
if _, ok := oldM[vc.Value]; ok {
continue
}
vcd := ValueCountDiff{
Value: vc.Value,
NewCount: vc.Count,
NewPct: byte(math.Round(float64(vc.Count) / float64(newRowsCount) * 100)),
}
result = append(result, vcd)
}
return result
}
func (s *ValueCountDiff) Unchanged() bool {
return s.OldCount == s.NewCount && s.OldPct == s.NewPct
}
type TopValuesStat struct {
Name string `json:"name"`
ShortName string `json:"shortName"`
NewAddition bool `json:"newAddition,omitempty"`
Removed bool `json:"removed,omitempty"`
Values []ValueCountDiff `json:"values"`
}
func topValuesStatFactory(name, sname string, getField func(col *objects.ColumnProfile) objects.ValueCounts) statDiffFactory {
return func(newTblProf, oldTblProf *objects.TableProfile, newColProf, oldColProf *objects.ColumnProfile) interface{} {
sd := &TopValuesStat{
Name: name,
ShortName: sname,
}
var ov, nv objects.ValueCounts
if oldColProf != nil {
ov = getField(oldColProf)
}
if newColProf != nil {
nv = getField(newColProf)
}
if nv.IsEmpty() {
if ov.IsEmpty() {
return nil
}
sd.Removed = true
} else if ov.IsEmpty() {
sd.NewAddition = true
}
sd.Values = compareValueCounts(newTblProf.RowsCount, oldTblProf.RowsCount, nv, ov)
return sd
}
}
func (s *TopValuesStat) Unchanged() bool {
if s.NewAddition || s.Removed {
return false
}
for _, v := range s.Values {
if !v.Unchanged() {
return false
}
}
return true
}