In [3]:
import time
from random import randint
from collections import Counter
from multiset import Multiset

In [4]:
def timeit(f, name):
    start = time.time()
    res = f()
    print(f'{name} took {time.time() - start:.2f} seconds.')
    return res

`Multiset` is comparable to `collections.Counter` for initialization, and updating, but MUCH faster for finding the most common element:

In [5]:
data = [randint(0, 100_000) for _ in range(1_000_000)]
counter = timeit(lambda: Counter(data), 'Counter init')
multiset = timeit(lambda: Multiset(data), 'Multiset init')

def check():
    for e, c in counter.items():
        assert multiset.count(e) == c

check()

Counter init took 0.11 seconds.
Multiset init took 0.11 seconds.


In [6]:
to_add = {}
for _ in range(20):
    to_add[randint(0, 100_000)] = randint(1, 10)

def update_counter(n = 10_000):
    for _ in range(n):
        counter.update(to_add)
        counter.subtract(to_add)

def update_multiset(n = 10_000):
    for _ in range(n):
        for e, c in to_add.items():
            multiset.add(e, c)
        for e, c in to_add.items():
            multiset.remove(e, c)

timeit(update_counter, 'Counter update')
timeit(update_multiset, 'Multiset update')

check()

Counter update took 0.08 seconds.
Multiset update took 0.03 seconds.


In [7]:
def counter_most_common(n=10_000):
    for _ in range(n):
        counter.most_common(1)

def multiset_most_common(n=10_000):
    for _ in range(n):
        multiset.most_common

timeit(counter_most_common, 'Counter most_common')
timeit(multiset_most_common, 'Multiset most_common')

Counter most_common took 26.46 seconds.
Multiset most_common took 0.00 seconds.


In [10]:
# When intereweaving adverserial updates and most common searches we're still MUCH faster:

def counter_update_and_find_most_common(n=10_000):
    for _ in range(n):
        counter['new top'] += 1000000
        counter.most_common(1)
        counter['new top'] -= 1000000

def multiset_update_and_find_most_common(n=10_000):
    for _ in range(n):
        multiset.add('new top', 1000000)
        multiset.most_common
        multiset.remove('new top', 1000000)


timeit(counter_update_and_find_most_common, 'counter_update_and_find_most_common')
timeit(multiset_update_and_find_most_common, 'multiset_update_and_find_most_common')

counter_update_and_find_most_common took 25.76 seconds.
multiset_update_and_find_most_common took 0.02 seconds.
