In [1]:
import random
import uuid
from collections import Counter

from typed_lists import TypedList, _initialize_global_thread_pool

## Examples

In [2]:
# can set number of threads to use for parallel operations
# this is a global setting and can only be set once
# calling this function again will raise an error

_initialize_global_thread_pool(4)

In [3]:
# examples of creating a typed list 
print(TypedList(range(10)) * TypedList(range(10)))
print(TypedList(["a", "b", "c", "d", "e"]) + TypedList(["f", "g", "h", "i", "j"]))
print(TypedList([1.0, 2.0, 3.0, 4.0, 5.0]) / TypedList([1.0, 2.0, 3.0, 4.0, 5.0]))
print(TypedList([bool(i%2) for i in range(10)]))

IntTypedList([0, 1, 4, 9, 16]...)
StringTypedList (["af", "bg", "ch", "di", "ej"])
FloatTypedList ([1.0, 1.0, 1.0, 1.0, 1.0])
BoolTypedList([false, true, false, true, false]...)


In [4]:
# parallelized sorting
uuids = TypedList([str(uuid.uuid4()) for _ in range(1_000_000)])
uuids.sort()
uuids

StringTypedList(["01bd44d2-9549-4919-b508-04511d271189", "ed14e793-eb7b-4ff9-b44d-5386fa8eabae", "df30b527-9d9b-4a26-9c0b-b35f94a9c7b0", "54a79251-d5e8-444f-8854-558e1e3781ae", "b1c9c7ca-618c-4272-ae78-77ce1aea8be0"]...)

In [5]:
# parallelized counting
ids = TypedList([random.randint(0, 4) for _ in range(1_000_000)])
ids.count()

{0: 199099, 4: 200329, 3: 200150, 1: 200440, 2: 199982}

In [6]:
# broadcasting and parallelized operations
a = TypedList(range(1_000_000))
b = TypedList(range(1_000_000))
a + b * 3

IntTypedList([0, 4, 8, 12, 16]...)

In [7]:
# slice access
print(TypedList(range(10))[1:5])

# slice assignement
my_typed_list = TypedList(range(10))
my_typed_list[:4] = TypedList(range(4, 0, -1))
print(my_typed_list)

IntTypedList ([1, 2, 3, 4])
IntTypedList([4, 3, 2, 1, 4]...)


In [8]:
# for bool you can use the &, |, and ^ operators
# and they're parallelized and broadcasted
# Note: __bool__ still corresponds to the length of the list

# fizzbuzz example
fizz = TypedList(range(1, 1_000_000)) % 3 == 0
buzz = TypedList(range(1, 1_000_000)) % 5 == 0
print(f"fizz buzz: {(fizz & buzz).count()}")

fizz buzz: {False: 933333, True: 66666}


In [9]:
# comparing typed lists always generates a BoolTypedList 
print(TypedList(range(10)) == 2)
print(TypedList(range(10)) == TypedList(range(10)))

# BoolTypedLists can be collapsed to a single bool w/ any or all
print((TypedList(range(10)) == 2).any())

# BoolTypedLists can be used as a mask
print(
    TypedList(range(10))[TypedList([bool(i%2) for i in range(10)])]
)

BoolTypedList([false, false, true, false, false]...)
BoolTypedList([true, true, true, true, true]...)
True
IntTypedList ([1, 3, 5, 7, 9])


In [10]:
# find the closest point to origin
x1, x2 = (
    TypedList((random.random() for _ in range(1_000))),
    TypedList((random.random() for _ in range(1_000))),
)
y1, y2 = (0, 0)
dists = ((x1 - y1) ** 2 + (x2 - y2) ** 2) ** 0.5
closest = dists.argmin()
x1[closest], x2[closest]

(0.028905274293394068, 0.0008829249669073391)

## Performance Example

In [11]:
# count "random" Strs
rust_strs = TypedList([str(random.randint(1, 100)) for _ in range(100)] * 10_000)
py_strs = rust_strs.data

In [12]:
%%timeit
_ = rust_strs.count()

11.4 ms ± 172 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [13]:
%%timeit
_ = Counter(py_strs)

37.5 ms ± 115 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [14]:
# fizzbuzz example
# note: creating the TypedList is a a bit slower, 
# but the operations are faster
candidates = range(1, 10_000_000)
typed_list = TypedList(candidates)
py_list = list(candidates)

In [15]:
%%timeit
fizz = typed_list % 3 == 0
buzz = typed_list % 5 == 0
_ = typed_list[fizz & buzz]

22.7 ms ± 847 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [16]:
%%timeit
fizzbuzz = [i for i in py_list if  i % 3 == 0 and i % 5 == 0]

341 ms ± 2.34 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [17]:
# find closest point to origin
x1, x2 = (
    [random.random() for _ in range(1_000_000)],
    [random.random() for _ in range(1_000_000)],
)
typed_x1, typed_x2 = TypedList(x1), TypedList(x2)
y1, y2 = (0, 0)

In [18]:
%%timeit
dists = ((typed_x1 - y1) ** 2 + (typed_x2 - y2) ** 2)
min_ix = dists.argmin()
_ = x1[min_ix], x2[min_ix]

8.25 ms ± 217 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [19]:
%%timeit
indices = range(len(x1))
min_ix = min(indices, key = lambda i: (x1[i] - y1) ** 2 + (x2[i] - y2) ** 2)
_ = x1[min_ix], x2[min_ix]

170 ms ± 1.33 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
