In [1]:
import random
import uuid
from collections import Counter

from typed_lists import TypedList, _initialize_global_thread_pool

## Examples

In [2]:
# can set number of threads to use for parallel operations
# this is a global setting and can only be set once
# calling this function again will raise an error

_initialize_global_thread_pool(4)

In [3]:
# examples of creating a typed list 
print(TypedList(range(10)) * TypedList(range(10)))
print(TypedList(["a", "b", "c", "d", "e"]) + TypedList(["f", "g", "h", "i", "j"]))
print(TypedList([1.0, 2.0, 3.0, 4.0, 5.0]) / TypedList([1.0, 2.0, 3.0, 4.0, 5.0]))
print(TypedList([bool(i%2) for i in range(10)], "bool"))

IntTypedList([0, 1, 4, 9, 16]...)
StringTypedList (["af", "bg", "ch", "di", "ej"])
FloatTypedList ([1.0, 1.0, 1.0, 1.0, 1.0])
BoolTypedList([false, true, false, true, false]...)


In [4]:
# parallelized sorting
uuids = TypedList([str(uuid.uuid4()) for _ in range(1_000_000)])
uuids.sort()
uuids

StringTypedList(["00000ee9-a0bd-4bf2-96f6-635258d91f4a", "00001791-10d5-4b60-b2cd-0da67f440e3e", "00001af4-6d91-466c-90d5-992a1a7e1033", "0000202e-c5a3-4d8e-be5a-1a65a78b5baa", "00002d5d-97e1-45a5-8ad5-31558f4c3e08"]...)

In [5]:
# parallelized counting
ids = TypedList([random.randint(0, 4) for _ in range(1_000_000)])
ids.count()

{1: 199539, 0: 200291, 4: 199998, 3: 199703, 2: 200469}

In [6]:
# broadcasting and parallelized operations
a = TypedList(range(1_000_000))
b = TypedList(range(1_000_000))
a + b * 3

IntTypedList([0, 4, 8, 12, 16]...)

In [7]:
# slice access
print(TypedList(range(10))[1:5])

# slice assignement
my_typed_list = TypedList(range(10))
my_typed_list[:4] = TypedList(range(4, 0, -1))
print(my_typed_list)

IntTypedList ([1, 2, 3, 4])
IntTypedList([4, 3, 2, 1, 4]...)


In [8]:
# for bool you can use the &, |, and ^ operators
# and they're parallelized and broadcasted
# Note: __bool__ still corresponds to the length of the list

# fizzbuzz example
fizz = TypedList(range(1, 1_000_000)) % 3 == 0
buzz = TypedList(range(1, 1_000_000)) % 5 == 0
print(f"fizz buzz: {(fizz & buzz).count()}")

fizz buzz: {True: 66666, False: 933333}


In [9]:
# comparing typed lists always generates a BoolTypedList 
print(TypedList(range(10)) == 2)
print(TypedList(range(10)) == TypedList(range(10)))

# BoolTypedLists can be collapsed to a single bool w/ any or all
print((TypedList(range(10)) == 2).any())

# BoolTypedLists can be used as a mask
print(
    TypedList(range(10))[TypedList([bool(i%2) for i in range(10)])]
)

BoolTypedList([false, false, true, false, false]...)
BoolTypedList([true, true, true, true, true]...)
True
IntTypedList ([1, 3, 5, 7, 9])


## Performance Example

In [10]:
# count "random" strings
rust_strs = TypedList([str(random.randint(1, 100)) for _ in range(100)] * 10_000)
py_strs = rust_strs.data

In [11]:
%%timeit
_ = rust_strs.count()

14.4 ms ± 219 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [12]:
%%timeit
_ = Counter(py_strs)

34.1 ms ± 258 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [13]:
# fizzbuzz example
# note: creating the TypedList is a a bit slower, 
# but the operations are faster
candidates = range(1, 10_000_000)
typed_list = TypedList(candidates)
py_list = list(candidates)

In [14]:
%%timeit
fizz = typed_list % 3 == 0
buzz = typed_list % 5 == 0
_ = typed_list[fizz & buzz]

22.9 ms ± 912 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [15]:
%%timeit
fizzbuzz = [i for i in py_list if  i % 3 == 0 and i % 5 == 0]

333 ms ± 4.38 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
