1
+ package Tests ;
2
+
3
+ import java .util .Collection ;
4
+ import java .util .HashMap ;
5
+ import java .util .function .Function ;
6
+
7
+ import org .junit .Assert ;
8
+ import org .junit .Test ;
9
+
10
+ import YASL .CEstimationFor ;
11
+ import YASL .CEstimator ;
12
+ import YASL .CExactCounter ;
13
+ import YASL .IEstimator ;
14
+ import YASL .IItemsCounter ;
15
+ import YASL .Collectors .KTopCollector ;
16
+ import YASL .Counters .CCMCounter ;
17
+ import YASL .Counters .CCountMinParams ;
18
+ import YASL .Hashing .ChgBasic ;
19
+
20
+ public class Test_CountingStrings {
21
+ private static String [] generateTestset (int N ) {
22
+ final String [] res = new String [N ];
23
+ for (int i = 0 ; i < N ; i ++) {
24
+ res [i ] = "String #" + i ;
25
+ }
26
+ return res ;
27
+ }
28
+
29
+ private IEstimator <String > makeEstimator (IItemsCounter <String > counter ) {
30
+ return new CEstimator <>(counter , new KTopCollector <>(Long .MAX_VALUE ));
31
+ }
32
+
33
+ private Collection <CMergedEstimations > merge ( //
34
+ IEstimator <String > first , //
35
+ IEstimator <String > second //
36
+ ) {
37
+ HashMap <String , CMergedEstimations > res = new HashMap <>();
38
+ Function <String , CMergedEstimations > For = x -> {
39
+ CMergedEstimations me = res .get (x );
40
+ if (null == me ) {
41
+ me = new CMergedEstimations (x );
42
+ res .put (x , me );
43
+ }
44
+ return me ;
45
+ };
46
+
47
+ for (CEstimationFor <String > item : first .estimate ()) {
48
+ For .apply (item .Item ).first = item .Count ;
49
+ }
50
+ for (CEstimationFor <String > item : second .estimate ()) {
51
+ For .apply (item .Item ).second = item .Count ;
52
+ }
53
+ return res .values ();
54
+ }
55
+
56
+ @ Test
57
+ public void estimatedErrorInNormalRange () {
58
+ IEstimator <String > exact = makeEstimator (new CExactCounter <>());
59
+ IEstimator <String > CM = makeEstimator (new CCMCounter <String >( //
60
+ new CCountMinParams (0.01 , 0.9 ), //
61
+ new ChgBasic <String >() //
62
+ ));
63
+
64
+ int total = 0 ;
65
+ String [] testset = generateTestset (1_000 );
66
+ for (int ittr = 0 ; ittr < 1 ; ittr ++) {
67
+ for (int i = 0 ; i < testset .length ; i ++) {
68
+ exact .add (testset [i ], 1 + i );
69
+ CM .add (testset [i ], 1 + i );
70
+ total += 1 + i ;
71
+ }
72
+ }
73
+
74
+ int percent = total / 100 ;
75
+ Collection <CMergedEstimations > estimations = merge (exact , CM );
76
+ Assert .assertEquals (testset .length , estimations .size ());
77
+ for (CMergedEstimations res : estimations ) {
78
+ Assert .assertTrue ( //
79
+ "Estimated value below exact." , //
80
+ res .first <= res .second //
81
+ );
82
+ Assert .assertTrue ( //
83
+ "Estimated error more than 1%." , //
84
+ res .second <= res .first + percent //
85
+ );
86
+ }
87
+ }
88
+ }
89
+
90
+ class CMergedEstimations {
91
+ public long first = -1 ;
92
+ public long second = -1 ;
93
+ public String Item ;
94
+
95
+ public CMergedEstimations (String item ) {
96
+ this .Item = item ;
97
+ }
98
+
99
+ }
0 commit comments