Skip to content

Commit f477d52

Browse files
Bog and messy(
1 parent 57201c3 commit f477d52

14 files changed

+308
-4
lines changed
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
package Tests;
2+
3+
import java.util.HashSet;
4+
import java.util.Set;
5+
6+
import org.junit.Assert;
7+
import org.junit.Test;
8+
9+
import YASL.Hashing.ChgCombined;
10+
import YASL.Hashing.IHasher;
11+
12+
public class Test_BasicHashingGenerator {
13+
@Test
14+
public void uniqueSequence() {
15+
ChgCombined<Integer> gen = new ChgCombined<Integer>( //
16+
(x, r) -> ~x * 23 * r, //
17+
x -> x, x -> x >> 16, x -> ~x //
18+
);
19+
final IHasher<Integer> hasher = gen.generate(5000, 3);
20+
long notUnique = 0;
21+
for (int i = 0; i < 1_000_000; i++) {
22+
final int[] res = hasher.apply(i);
23+
final Set<Integer> values = new HashSet<>();
24+
for (int j = 0; j < res.length; j++) {
25+
if (!values.add(res[j])) {
26+
notUnique++;
27+
break;
28+
}
29+
}
30+
}
31+
Assert.assertEquals(1, notUnique);
32+
33+
}
34+
}

src/Tests/Test_CountMinCounter.java

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
package Tests;
2+
3+
import java.util.Random;
4+
5+
import org.junit.Assert;
6+
import org.junit.Test;
7+
8+
import YASL.CExactCounter;
9+
import YASL.IItemsCounter;
10+
import YASL.Counters.CCMCounter;
11+
import YASL.Counters.CCountMinParams;
12+
import YASL.Hashing.ChgBasic;
13+
14+
public class Test_CountMinCounter {
15+
@Test
16+
public void notLessThanExact() {
17+
final int variations = 10000;
18+
final Random r = new Random(0);
19+
final IItemsCounter<Long> exact = new CExactCounter<>();
20+
final IItemsCounter<Long> MC = new CCMCounter<Long>( //
21+
new CCountMinParams(0.01, 0.01), //
22+
new ChgBasic<>() //
23+
);
24+
25+
for (long i = 0; i < 100_000; i++) {
26+
final long value = r.nextInt(variations);
27+
final int cnt = 1 + r.nextInt(10);
28+
final long exactCount = exact.put(value, cnt);
29+
final long minCount = MC.put(value, cnt);
30+
Assert.assertTrue(exactCount <= minCount);
31+
}
32+
}
33+
}

src/Tests/Test_Estimator.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@
44
import org.junit.Test;
55

66
import YASL.CEstimator;
7-
import YASL.CExactCollector;
87
import YASL.CExactCounter;
98
import YASL.IEstimator;
9+
import YASL.Collectors.CExactCollector;
1010

1111
public class Test_Estimator {
1212
@Test

src/Tests/Test_KTopCollector.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import org.junit.Test;
55

66
import YASL.IEstimationCollector;
7-
import YASL.KTopCollector;
7+
import YASL.Collectors.KTopCollector;
88

99
public class Test_KTopCollector {
1010

src/Tests/Test_hgCombined.java

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
package Tests;
2+
3+
import org.junit.Assert;
4+
import org.junit.Test;
5+
6+
import YASL.Hashing.ChgCombined;
7+
import YASL.Hashing.IHasher;
8+
9+
public class Test_hgCombined {
10+
@Test
11+
public void returnHashesFirst() {
12+
IHasher<Integer> gen = new ChgCombined<Integer>( //
13+
x -> 1, x -> 2 //
14+
).generate(Integer.MAX_VALUE, 2);
15+
16+
Assert.assertArrayEquals( //
17+
new int[] { 1, 2 }, //
18+
gen.apply(1) //
19+
);
20+
}
21+
22+
@Test
23+
public void respectRange() {
24+
IHasher<Integer> gen = new ChgCombined<Integer>( //
25+
x -> 21, x -> -24//
26+
).generate(5, 2);
27+
28+
Assert.assertArrayEquals( //
29+
new int[] { 1, 4 }, //
30+
gen.apply(1) //
31+
);
32+
}
33+
34+
@Test
35+
public void useOnlyWhatNeed() {
36+
IHasher<Integer> gen = new ChgCombined<Integer>( //
37+
x -> 1, //
38+
x -> {
39+
throw new RuntimeException("Don't touch me!");
40+
}//
41+
).generate(5, 1);
42+
43+
Assert.assertArrayEquals( //
44+
new int[] { 1 }, //
45+
gen.apply(1) //
46+
);
47+
}
48+
49+
@Test
50+
public void applySalting() {
51+
IHasher<Integer> gen = new ChgCombined<Integer>( //
52+
(x, r) -> x * r * 4, //
53+
x -> x, x -> x + 1 //
54+
).generate(Integer.MAX_VALUE, 5);
55+
56+
Assert.assertArrayEquals( //
57+
new int[] { 1, 2, 4, 5, 8 }, //
58+
gen.apply(1) //
59+
);
60+
}
61+
}

src/YASL/CExactCollector.java renamed to src/YASL/Collectors/CExactCollector.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
1-
package YASL;
1+
package YASL.Collectors;
22

33
import java.util.HashMap;
44
import java.util.Map;
55
import java.util.stream.Collectors;
66

7+
import YASL.CEstimatedItems;
8+
import YASL.CEstimationFor;
9+
import YASL.IEstimationCollector;
10+
711
public class CExactCollector<T> implements IEstimationCollector<T> {
812

913
private Map<T, Long> _storage = new HashMap<>();

src/YASL/KTopCollector.java renamed to src/YASL/Collectors/KTopCollector.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
1-
package YASL;
1+
package YASL.Collectors;
22

33
import java.util.Comparator;
44
import java.util.PriorityQueue;
55
import java.util.stream.Collectors;
66

7+
import YASL.CEstimatedItems;
8+
import YASL.CEstimationFor;
9+
import YASL.IEstimationCollector;
10+
711
public class KTopCollector<T> implements IEstimationCollector<T> {
812
private final long _K;
913
private final PriorityQueue<CEstimationFor<T>> _queue;

src/YASL/Counters/CCMCounter.java

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
package YASL.Counters;
2+
3+
import YASL.IItemsCounter;
4+
import YASL.Hashing.IHasher;
5+
import YASL.Hashing.IHashingGenerator;
6+
7+
public class CCMCounter<T> implements IItemsCounter<T> {
8+
private final IHasher<T> _bucketsProvider;
9+
protected final long[][] _sketch;
10+
11+
public CCMCounter( //
12+
int width, int buckets, //
13+
IHasher<T> bucketsProvider//
14+
) {
15+
_bucketsProvider = bucketsProvider;
16+
_sketch = new long[width][buckets];
17+
}
18+
19+
public CCMCounter( //
20+
CCountMinParams params, //
21+
IHashingGenerator<T> hGen //
22+
) {
23+
this( //
24+
params.width, params.depth, //
25+
hGen.generate(params.width, params.depth) //
26+
);
27+
}
28+
29+
@Override
30+
public long put(T item, long count) {
31+
final int[] positions = _bucketsProvider.apply(item);
32+
count += count(positions);
33+
update(positions, count);
34+
return count;
35+
}
36+
37+
public void update(T item, long count) {
38+
update(_bucketsProvider.apply(item), count);
39+
}
40+
41+
protected void update(int[] positions, long count) {
42+
for (int i = 0; i < positions.length; i++) {
43+
final int pos = positions[i];
44+
if (_sketch[pos][i] < count)
45+
_sketch[pos][i] = count;
46+
}
47+
}
48+
49+
public long count(T item) {
50+
return count(_bucketsProvider.apply(item));
51+
}
52+
53+
private long count(int[] positions) {
54+
long res = Long.MAX_VALUE;
55+
for (int i = 0; i < positions.length; i++) {
56+
final long cnt = _sketch[positions[i]][i];
57+
res = (cnt < res) ? cnt : res;
58+
}
59+
return res;
60+
}
61+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
package YASL.Counters;
2+
3+
public class CCountMinParams {
4+
public final int width;
5+
public final int depth;
6+
7+
public CCountMinParams(double err, double confidence) {
8+
// http://dimacs.rutgers.edu/~graham/pubs/papers/cmencyc.pdf
9+
width = (int) Math.ceil(Math.E / err);
10+
depth = (int) Math.ceil(Math.log(1 / (1 - confidence)));
11+
// alternative
12+
// width = (int) Math.ceil(2 * Math.E / err);
13+
// depth = (int) Math.ceil(-Math.log(1 - confidence) / Math.log(2));
14+
}
15+
}

src/YASL/Hashing/ChgBasic.java

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
package YASL.Hashing;
2+
3+
import java.util.Random;
4+
5+
public class ChgBasic<T> implements IHashingGenerator<T> {
6+
7+
@Override
8+
public IHasher<T> generate(int range, int levels) {
9+
return x -> {
10+
long hash = x.hashCode();
11+
final Random r = new Random(hash);
12+
final int[] res = new int[levels];
13+
for (int i = 0; i < levels; i++) {
14+
hash *= r.nextInt(range);
15+
hash += r.nextLong();
16+
17+
res[i] = (int) (hash % range);
18+
res[i] = (res[i] < 0) ? -res[i] : res[i];
19+
}
20+
return res;
21+
};
22+
}
23+
}

src/YASL/Hashing/ChgCombined.java

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
package YASL.Hashing;
2+
3+
public class ChgCombined<T> implements IHashingGenerator<T> {
4+
public interface ISalter<T> {
5+
public T apply(T value, int round);
6+
}
7+
8+
private final IHashingAlgorithm<T>[] _hashes;
9+
private final ISalter<T> _salting;
10+
11+
@SafeVarargs
12+
public ChgCombined(ISalter<T> salter, IHashingAlgorithm<T>... hashes) {
13+
_hashes = hashes;
14+
_salting = salter;
15+
}
16+
17+
@SafeVarargs
18+
public ChgCombined(IHashingAlgorithm<T>... hashes) {
19+
this( //
20+
(x, r) -> {
21+
throw new RuntimeException("No salter.");
22+
}, //
23+
hashes //
24+
);
25+
}
26+
27+
@Override
28+
public IHasher<T> generate(int range, int levels) {
29+
return x -> {
30+
final int[] res = new int[levels];
31+
int pos = populate(x, res, 0);
32+
33+
int round = 0;
34+
while (pos < levels) {
35+
round++;
36+
pos = populate(_salting.apply(x, round), res, pos);
37+
}
38+
39+
for (int i = 0; i < res.length; i++)
40+
res[i] = Math.abs(res[i] % range);
41+
return res;
42+
};
43+
}
44+
45+
private int populate(T x, int[] res, int pos) {
46+
final int N = Math.min(_hashes.length, res.length - pos);
47+
for (int i = 0; i < N; i++) {
48+
res[pos] = _hashes[i].hash(x);
49+
pos++;
50+
}
51+
return pos;
52+
}
53+
}

src/YASL/Hashing/IHasher.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
package YASL.Hashing;
2+
3+
import java.util.function.Function;
4+
5+
public interface IHasher<T> extends Function<T, int[]> {
6+
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
package YASL.Hashing;
2+
3+
public interface IHashingAlgorithm<T> {
4+
public int hash(T value);
5+
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
package YASL.Hashing;
2+
3+
public interface IHashingGenerator<T> {
4+
public IHasher<T> generate(int range, int levels);
5+
}

0 commit comments

Comments
 (0)