Skip to content

Commit 76db68f

Browse files
wip
1 parent 158b212 commit 76db68f

File tree

4 files changed

+119
-5
lines changed

4 files changed

+119
-5
lines changed

src/Tests/Test_CountingStrings.java

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
package Tests;
2+
3+
import java.util.Collection;
4+
import java.util.HashMap;
5+
import java.util.function.Function;
6+
7+
import org.junit.Assert;
8+
import org.junit.Test;
9+
10+
import YASL.CEstimationFor;
11+
import YASL.CEstimator;
12+
import YASL.CExactCounter;
13+
import YASL.IEstimator;
14+
import YASL.IItemsCounter;
15+
import YASL.Collectors.KTopCollector;
16+
import YASL.Counters.CCMCounter;
17+
import YASL.Counters.CCountMinParams;
18+
import YASL.Hashing.ChgBasic;
19+
20+
public class Test_CountingStrings {
21+
private static String[] generateTestset(int N) {
22+
final String[] res = new String[N];
23+
for (int i = 0; i < N; i++) {
24+
res[i] = "String #" + i;
25+
}
26+
return res;
27+
}
28+
29+
private IEstimator<String> makeEstimator(IItemsCounter<String> counter) {
30+
return new CEstimator<>(counter, new KTopCollector<>(Long.MAX_VALUE));
31+
}
32+
33+
private Collection<CMergedEstimations> merge( //
34+
IEstimator<String> first, //
35+
IEstimator<String> second //
36+
) {
37+
HashMap<String, CMergedEstimations> res = new HashMap<>();
38+
Function<String, CMergedEstimations> For = x -> {
39+
CMergedEstimations me = res.get(x);
40+
if (null == me) {
41+
me = new CMergedEstimations(x);
42+
res.put(x, me);
43+
}
44+
return me;
45+
};
46+
47+
for (CEstimationFor<String> item : first.estimate()) {
48+
For.apply(item.Item).first = item.Count;
49+
}
50+
for (CEstimationFor<String> item : second.estimate()) {
51+
For.apply(item.Item).second = item.Count;
52+
}
53+
return res.values();
54+
}
55+
56+
@Test
57+
public void estimatedErrorInNormalRange() {
58+
IEstimator<String> exact = makeEstimator(new CExactCounter<>());
59+
IEstimator<String> CM = makeEstimator(new CCMCounter<String>( //
60+
new CCountMinParams(0.01, 0.9), //
61+
new ChgBasic<String>() //
62+
));
63+
64+
int total = 0;
65+
String[] testset = generateTestset(1_000);
66+
for (int ittr = 0; ittr < 1; ittr++) {
67+
for (int i = 0; i < testset.length; i++) {
68+
exact.add(testset[i], 1 + i);
69+
CM.add(testset[i], 1 + i);
70+
total += 1 + i;
71+
}
72+
}
73+
74+
int percent = total / 100;
75+
Collection<CMergedEstimations> estimations = merge(exact, CM);
76+
Assert.assertEquals(testset.length, estimations.size());
77+
for (CMergedEstimations res : estimations) {
78+
Assert.assertTrue( //
79+
"Estimated value below exact.", //
80+
res.first <= res.second //
81+
);
82+
Assert.assertTrue( //
83+
"Estimated error more than 1%.", //
84+
res.second <= res.first + percent //
85+
);
86+
}
87+
}
88+
}
89+
90+
class CMergedEstimations {
91+
public long first = -1;
92+
public long second = -1;
93+
public String Item;
94+
95+
public CMergedEstimations(String item) {
96+
this.Item = item;
97+
}
98+
99+
}

src/YASL/CEstimatedItems.java

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
package YASL;
22

3-
import java.util.Collection;
3+
import java.util.Iterator;
4+
import java.util.List;
45

5-
public class CEstimatedItems<T> {
6-
private final Collection<CEstimationFor<T>> _estimation;
6+
public class CEstimatedItems<T> implements Iterable<CEstimationFor<T>> {
7+
private final List<CEstimationFor<T>> _estimation;
78

8-
public CEstimatedItems(Collection<CEstimationFor<T>> estimation) {
9+
public CEstimatedItems(List<CEstimationFor<T>> estimation) {
910
this._estimation = estimation;
1011
}
1112

@@ -20,4 +21,13 @@ public String toString() {
2021
}
2122
return res.toString();
2223
}
24+
25+
public CEstimationFor<T> get(int i) {
26+
return _estimation.get(i);
27+
}
28+
29+
@Override
30+
public Iterator<CEstimationFor<T>> iterator() {
31+
return _estimation.iterator();
32+
}
2333
}

src/YASL/Counters/CCMCounter.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import YASL.Hashing.IHashingGenerator;
66

77
public class CCMCounter<T> implements IItemsCounter<T> {
8-
private final IHasher<T> _bucketsProvider;
8+
private final IHasher<T> _bucketsProvider;
99
protected final long[][] _sketch;
1010

1111
public CCMCounter( //

src/YASL/Counters/CCountMinParams.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,9 @@ public CCountMinParams(double err, double confidence) {
1212
// width = (int) Math.ceil(2 * Math.E / err);
1313
// depth = (int) Math.ceil(-Math.log(1 - confidence) / Math.log(2));
1414
}
15+
16+
public CCountMinParams(int width, int depth) {
17+
this.width = width;
18+
this.depth = depth;
19+
}
1520
}

0 commit comments

Comments
 (0)