Skip to content

Commit 0319dd5

Browse files
Implemented Augmented Sketch
1 parent 08af0be commit 0319dd5

25 files changed

+839
-236
lines changed
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
package YASL.AugmentedSketch;
2+
3+
import java.io.IOException;
4+
import java.util.List;
5+
import java.util.stream.Collectors;
6+
7+
import YASL.CEstimatedItems;
8+
import YASL.CEstimationFor;
9+
import YASL.IItemsCounter;
10+
import YASL.MinHeap.CBasicMinHeap;
11+
import YASL.MinHeap.IMinHeap;
12+
import YASL.Streams.TypedOutputStream;
13+
14+
public class CAugmentedSketch<T> implements IAugmentedSketch<T> {
15+
private final IMinHeap<CasItem<T>, T> _heap;
16+
private final IItemsCounter<T> _counter;
17+
18+
public CAugmentedSketch(IMinHeap<CasItem<T>, T> heap, IItemsCounter<T> counter) {
19+
this._heap = heap;
20+
this._counter = counter;
21+
}
22+
23+
/**
24+
* @param sz
25+
* count of estimated items.
26+
* @param counter
27+
* counter for low-frequency items.
28+
*/
29+
public CAugmentedSketch(int sz, IItemsCounter<T> counter) {
30+
this( //
31+
new CBasicMinHeap<>(sz), //
32+
counter //
33+
);
34+
}
35+
36+
private long putToCounter(T item, long count) {
37+
if (!_heap.isFull()) {
38+
_heap.add(new CasItem<T>(item, count, 0));
39+
return count;
40+
}
41+
42+
final long cnt = _counter.put(item, count);
43+
if (_heap.lowest().Priority() < cnt) {
44+
final CasItem<T> oldMin = _heap.poll();
45+
if (0 < oldMin.delta())
46+
_counter.put(oldMin.Value(), oldMin.delta());
47+
_heap.add(new CasItem<T>(item, cnt));
48+
}
49+
return cnt;
50+
}
51+
52+
@Override
53+
public long putIfFrequent(T item, long count) {
54+
final CasItem<T> ratted = _heap.get(item);
55+
if (null != ratted) {
56+
ratted.inc(count);
57+
_heap.update(item);
58+
return ratted.Priority();
59+
}
60+
return 0;
61+
}
62+
63+
@Override
64+
public long put(T item, long count) {
65+
final long heapCount = putIfFrequent(item, count);
66+
if (0 < heapCount)
67+
return heapCount;
68+
69+
return putToCounter(item, count);
70+
}
71+
72+
@Override
73+
public long count(T item) {
74+
final CasItem<T> ratted = _heap.get(item);
75+
if (null == ratted) {
76+
return _counter.count(item);
77+
} else {
78+
return ratted.Priority();
79+
}
80+
}
81+
82+
@Override
83+
public void add(T item, long count) {
84+
put(item, count);
85+
}
86+
87+
@Override
88+
public CEstimatedItems<T> estimate() {
89+
List<CEstimationFor<T>> est = _heap.items() //
90+
.stream() //
91+
.map(x -> new CEstimationFor<>(x.value, x.Priority())) //
92+
.collect(Collectors.toList());
93+
return new CEstimatedItems<>(est);
94+
}
95+
96+
@Override
97+
public void store(TypedOutputStream<T> stream) throws IOException {
98+
throw new UnsupportedOperationException();
99+
}
100+
}
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
package YASL.AugmentedSketch;
2+
3+
import YASL.MinHeap.CpiSimple;
4+
5+
public class CasItem<T> extends CpiSimple<T> {
6+
public final long startPriority;
7+
8+
public CasItem(T value, long priority, long startPriority) {
9+
super(value, priority);
10+
this.startPriority = startPriority;
11+
}
12+
13+
public CasItem(T item, long count) {
14+
this(item, count, count);
15+
}
16+
17+
public void update(long cnt) {
18+
_priority = cnt;
19+
}
20+
21+
public long delta() {
22+
return _priority - startPriority;
23+
}
24+
25+
public void inc(long count) {
26+
_priority += count;
27+
}
28+
29+
@Override
30+
public String toString() {
31+
return String.format( //
32+
"CasItem(p:%d, d:%d, v:%s)", //
33+
Priority(), delta(), String.valueOf(Value()) //
34+
);
35+
}
36+
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
package YASL.AugmentedSketch;
2+
3+
import YASL.IEstimator;
4+
import YASL.IItemsCounter;
5+
6+
/**
7+
* Based on
8+
* <a href="http://www.ntu.edu.sg/home/arijit.khan/Papers/asketch.pdf">this
9+
* article</a>. This is a hybrid of items counter and estimator. High-frequency
10+
* items are put into separated, more precised, counters, which is, mainly,
11+
* reduce overestimation of low-frequency items.
12+
*/
13+
public interface IAugmentedSketch<T> extends IEstimator<T>, IItemsCounter<T> {
14+
/**
15+
* Workaround for increasing only high-frequency items.
16+
*
17+
* @return new estimation for item. Return 0, if item aren't frequent.
18+
*/
19+
public long putIfFrequent(T item, long count);
20+
}

YASL-Core/src/YASL/Collectors/Heap/CprSimple.java

Lines changed: 0 additions & 30 deletions
This file was deleted.

YASL-Core/src/YASL/Collectors/KTopCollector.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,12 @@
77
import YASL.CEstimatedItems;
88
import YASL.CEstimationFor;
99
import YASL.IEstimationCollector;
10-
import YASL.Collectors.Heap.CFixedMinHeap;
11-
import YASL.Collectors.Heap.CprSimple;
10+
import YASL.MinHeap.CFixedMinHeap;
11+
import YASL.MinHeap.CpiSimple;
1212
import YASL.Streams.TypedOutputStream;
1313

1414
public class KTopCollector<T> implements IEstimationCollector<T> {
15-
private static class CTopItem<T> extends CprSimple<T> {
15+
private static class CTopItem<T> extends CpiSimple<T> {
1616
public CTopItem(T value, long priority) {
1717
super(value, (int) priority);
1818
}

YASL-Core/src/YASL/Collectors/Heap/CFixedMinHeap.java renamed to YASL-Core/src/YASL/MinHeap/CBasicMinHeap.java

Lines changed: 14 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,23 @@
1-
package YASL.Collectors.Heap;
1+
package YASL.MinHeap;
22

33
import java.lang.reflect.Array;
44
import java.util.ArrayList;
55
import java.util.HashMap;
66
import java.util.List;
77
import java.util.Map;
88

9-
public class CFixedMinHeap<T extends IPrioritizedItem<TKey>, TKey> {
9+
public class CBasicMinHeap<T extends IPrioritizedItem<TKey>, TKey> implements IMinHeap<T, TKey> {
1010
private final CbhItem<T>[] _items;
1111
private final Map<TKey, CbhItem<T>> _byValue;
1212
private int _count = 0;
1313

1414
@SuppressWarnings("unchecked")
15-
public CFixedMinHeap(int size) {
15+
public CBasicMinHeap(int size) {
1616
_items = (CbhItem[]) Array.newInstance(CbhItem.class, size);
1717
_byValue = new HashMap<>();
1818
}
1919

20+
@Override
2021
public T poll() {
2122
final T res = lowest();
2223
_count--;
@@ -28,35 +29,34 @@ public T poll() {
2829
return res;
2930
}
3031

32+
@Override
3133
public T lowest() {
3234
return _items[0].Item;
3335
}
3436

37+
@Override
3538
public void update(TKey key) {
3639
final CbhItem<T> item = _byValue.get(key);
3740
bubbleUp(item);
3841
bubbleDown(item);
3942
}
4043

44+
@Override
4145
public void add(T item) {
42-
if (_count == _items.length) {
43-
if (item.Priority() < lowest().Priority())
44-
return;
45-
poll();
46-
}
47-
4846
final CbhItem<T> node = new CbhItem<T>(item, _count);
4947
_items[_count] = node;
5048
_byValue.put(item.Value(), node);
5149
bubbleUp(node);
5250
_count++;
5351
}
5452

53+
@Override
5554
public T get(TKey key) {
5655
final CbhItem<T> item = _byValue.get(key);
5756
return (null == item) ? null : item.Item;
5857
}
5958

59+
@Override
6060
public String print() {
6161
final StringBuilder res = new StringBuilder();
6262
String sepp = "";
@@ -108,10 +108,11 @@ private void bubbleUp(CbhItem<T> node) {
108108
}
109109

110110
private boolean isBigger(CbhItem<T> first, CbhItem<T> second) {
111-
final int diff = first.Item.Priority() - second.Item.Priority();
111+
final long diff = first.Item.Priority() - second.Item.Priority();
112112
return 0 < diff;
113113
}
114114

115+
@Override
115116
public List<T> items() {
116117
List<T> res = new ArrayList<>(_count);
117118
for (int i = 0; i < _count; i++) {
@@ -121,14 +122,8 @@ public List<T> items() {
121122
return res;
122123
}
123124

124-
125-
private static class CbhItem<T> {
126-
public final T Item;
127-
public int Index;
128-
129-
public CbhItem(T item, int index) {
130-
Item = item;
131-
Index = index;
132-
}
125+
@Override
126+
public boolean isFull() {
127+
return _count == _items.length;
133128
}
134129
}
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
package YASL.MinHeap;
2+
3+
public class CFixedMinHeap<T extends IPrioritizedItem<TKey>, TKey> //
4+
extends CBasicMinHeap<T, TKey> //
5+
{
6+
public CFixedMinHeap(int size) {
7+
super(size);
8+
}
9+
10+
@Override
11+
public void add(T item) {
12+
if (isFull()) {
13+
if (item.Priority() < lowest().Priority())
14+
return;
15+
poll();
16+
}
17+
super.add(item);
18+
}
19+
}
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
package YASL.MinHeap;
2+
3+
class CbhItem<T> {
4+
public final T Item;
5+
public int Index;
6+
7+
public CbhItem(T item, int index) {
8+
Item = item;
9+
Index = index;
10+
}
11+
}
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
package YASL.MinHeap;
2+
3+
public class CpiSimple<T> implements IPrioritizedItem<T>, Comparable<CpiSimple<T>> {
4+
public final T value;
5+
protected long _priority;
6+
7+
public CpiSimple(T value, long priority) {
8+
this.value = value;
9+
_priority = priority;
10+
}
11+
12+
@Override
13+
public long Priority() {
14+
return _priority;
15+
}
16+
17+
@Override
18+
public T Value() {
19+
return value;
20+
}
21+
22+
@SuppressWarnings("unchecked")
23+
@Override
24+
public int compareTo(CpiSimple<T> o) {
25+
final long diff = o._priority - _priority;
26+
if (0 != diff)
27+
return Long.signum(diff);
28+
return ((Comparable<T>) o.value).compareTo(value);
29+
}
30+
}
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
package YASL.MinHeap;
2+
3+
import java.util.List;
4+
5+
public interface IMinHeap<T extends IPrioritizedItem<TKey>, TKey> {
6+
public T poll();
7+
8+
public T lowest();
9+
10+
public void update(TKey key);
11+
12+
public void add(T item);
13+
14+
public T get(TKey key);
15+
16+
public String print();
17+
18+
public List<T> items();
19+
20+
public boolean isFull();
21+
22+
}

0 commit comments

Comments
 (0)