### Performance of Python data structures

<font size = "4">

Create a list of integers $0, 1, 2, \dots, 999$ in one of 4 ways. Which one is fastest?

In [None]:
def test1():
    l = []
    for i in range(1000):
        l = l + [i]


def test2():
    l = []
    for i in range(1000):
        l.append(i)


def test3():
    l = [i for i in range(1000)]


def test4():
    l = list(range(1000))

In [None]:
from timeit import timeit, Timer

num_repeats = 1000

total_time1 = timeit(stmt = "f()", number = num_repeats, 
    globals = {"f" : test1})
print(f"For test1, the average time was {1000*total_time1/num_repeats} milliseconds")

total_time2 = timeit(stmt = "f()", number = num_repeats, 
    globals = {"f" : test2})
print(f"For test2, the average time was {1000*total_time2/num_repeats} milliseconds")

total_time3 = timeit(stmt = "f()", number = num_repeats, 
    globals = {"f" : test3})
print(f"For test3, the average time was {1000*total_time3/num_repeats} milliseconds")

total_time4 = timeit(stmt = "f()", number = num_repeats, 
    globals = {"f" : test4})
print(f"For test4, the average time was {1000*total_time4/num_repeats} milliseconds")

<font size = "4">

- We can also time the code using instances of the `Timer` class.

- The `timeit` function actually uses an instance of `Timer` internally.

In [None]:
t1 = Timer("test1()", "from __main__ import test1")
print(f"concatenation: {t1.timeit(number=1000):15.4f} milliseconds")
t2 = Timer("test2()", "from __main__ import test2")
print(f"appending: {t2.timeit(number=1000):19.4f} milliseconds")
t3 = Timer("test3()", "from __main__ import test3")
print(f"list comprehension: {t3.timeit(number=1000):10.4f} milliseconds")
t4 = Timer("test4()", "from __main__ import test4")
print(f"list range: {t4.timeit(number=1000):18.4f} milliseconds")

<font size = "4">

Using `Timer` is convenient when the code you are testing changes a mutable object.

In [None]:
# demonstration of ".pop()" method

x = [1, 2, 3, 4, 5]
val = x.pop()
print("popped value:", val)
print("x =", x)
print()

x = [1, 2, 3, 4, 5]
val = x.pop(0)
print("popped value:", val)
print("x =", x)


In [None]:
# This will cause an error, because x.pop() changes x
x = list(range(200))
total_time = timeit(stmt = "x.pop()", number = num_repeats, 
    globals = {"x" : x})
print(f"x.pop(), the average time was {1000*total_time/num_repeats} milliseconds")

In [None]:
pop_zero = Timer("x.pop(0)", "from __main__ import x")
pop_end = Timer("x.pop()", "from __main__ import x")

x = list(range(2000000))
time1 = pop_zero.timeit(number=1000)
time2 = pop_end.timeit(number=1000)


print(f"pop(0): {time1:10.8f} milliseconds")
print(f"pop(): {time2:11.8f} milliseconds")


### Test: the `in` operator with lists and dictionaries

In [None]:
import random

n = 8 
x = list(range(n))
y = {j: None for j in range(n)}
print("x:", x)
print("y:", y , '\n')
t = random.randrange(n)
print("t:", t)
print("t in x:", t in x)
print("t in y:",t in y)

In [None]:
n_vals = [10_000, 100_000, 1_000_000, 10_000_000]

num_repeats = 100

print("List test:")
for n in n_vals:
    x = list(range(n))
    total_time = timeit(stmt="f(n) in x", number = num_repeats, 
        globals = {"f" : random.randrange, "n" : n, "x" : x})
    print(f"For n = {n}, the average time was {1000*total_time/num_repeats} milliseconds")
print()
print("Dict test:")
for n in n_vals:
    y = {j: None for j in range(n)}
    total_time = timeit(stmt="f(n) in y", number = num_repeats, 
        globals = {"f" : random.randrange, "n" : n, "y" : y})
    print(f"For n = {n}, the average time was {1000*total_time/num_repeats} milliseconds")

<font size = "4">

- The following reference summarizes computational cost for the standard built-in Python data structures: [Time Complexity Wiki](https://wiki.python.org/moin/TimeComplexity)

- An exhaustive look at complexity in Python can be found at [pythoncomplexity.com](https://pythoncomplexity.com/)