In [9]:
import sys
sys.path.append('../algorithms')  # Add path to import module

from misra_gries import MisraGries
from collections import Counter
import random

# Create a synthetic stream with skewed distribution
stream = ['a'] * 40 + ['b'] * 30 + ['c'] * 20 + ['d'] * 10 + ['e'] * 5
random.shuffle(stream)

# Initialize with k = 3
mg = MisraGries(k=3)

for item in stream:
    mg.update(item)

print("Misra-Gries estimated counts:")
print(mg.get_counts())

true_counts = Counter(stream)
print("\nTrue counts:")
print(true_counts)


Misra-Gries estimated counts:
{'a': 8, 'd': 1}

True counts:
Counter({'a': 40, 'b': 30, 'c': 20, 'd': 10, 'e': 5})


In [10]:
from count_min_sketch import CountMinSketch

# Create the same synthetic stream
stream = ['a'] * 40 + ['b'] * 30 + ['c'] * 20 + ['d'] * 10 + ['e'] * 5
random.shuffle(stream)

# Initialize CMS with width and depth
cms = CountMinSketch(width=50, depth=5)

# Feed in the stream
for item in stream:
    cms.update(item)

# Get estimates for all items
cms_estimates = {item: cms.estimate(item) for item in set(stream)}

# Print comparisons
print("Count-Min Sketch estimates:")
print(cms_estimates)

print("\nTrue counts:")
print(dict(Counter(stream)))


Count-Min Sketch estimates:
{'c': 20, 'd': 10, 'b': 30, 'e': 5, 'a': 40}

True counts:
{'a': 40, 'b': 30, 'e': 5, 'c': 20, 'd': 10}


In [11]:
stream1 = [1] * 40 + [2] * 30 + [3] * 20 + [4] * 10 + [5] * 5
random.shuffle(stream1)

# Initialize CMS with width and depth
cms1 = CountMinSketch(width=50, depth=5)

# Feed in the stream
for item in stream1:
    cms1.update(item)

# Get estimates for all items
cms_estimates1 = {item: cms1.estimate(item) for item in set(stream1)}

# Print comparisons
print("Count-Min Sketch estimates:")
print(cms_estimates1)

print("\nTrue counts:")
print(dict(Counter(stream1)))

Count-Min Sketch estimates:
{1: 40, 2: 30, 3: 20, 4: 10, 5: 5}

True counts:
{1: 40, 2: 30, 3: 20, 5: 5, 4: 10}


In [None]:
from count_sketch import BasicCountSketch

# Create a stream
stream = ['a'] * 40 + ['b'] * 30 + ['c'] * 20 + ['d'] * 10 + ['e'] * 5
random.shuffle(stream)

# Initialize basic Count Sketch
bcs = BasicCountSketch(width=50)

# Feed the stream
for item in stream:
    bcs.update(item)

# Estimate frequencies
bcs_estimates = {item: bcs.estimate(item) for item in set(stream)}

print("Basic Count Sketch estimates:")
print(bcs_estimates)

print("\nTrue counts:")
print(dict(Counter(stream)))


Basic Count Sketch estimates:
{'c': 20, 'd': 10, 'b': 30, 'e': 5, 'a': 40}

True counts:
{'b': 30, 'a': 40, 'e': 5, 'c': 20, 'd': 10}


In [13]:
from count_median import CountSketchMedian

# Create the same synthetic stream
stream = ['a'] * 40 + ['b'] * 30 + ['c'] * 20 + ['d'] * 10 + ['e'] * 5
random.shuffle(stream)

# Initialize Count Sketch
cs = CountSketchMedian(width=50, depth=5)

# Feed the stream
for item in stream:
    cs.update(item)

# Estimate counts
cs_estimates = {item: cs.estimate(item) for item in set(stream)}

# Print comparisons
print("Count Sketch estimates:")
print(cs_estimates)

print("\nTrue counts:")
print(dict(Counter(stream)))


Count Sketch estimates:
{'c': 20, 'd': 10, 'b': 30, 'e': 5, 'a': 40}

True counts:
{'b': 30, 'a': 40, 'e': 5, 'c': 20, 'd': 10}
