In [1]:
%%timeit l = list(range(10))
l[5]

64.3 ns ± 4.14 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)


In [2]:
%%timeit l = list(range(10_000_000))
l[100_000]

70.1 ns ± 10.6 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)


In [3]:
import timeit

def linear_search(needle, array):
    for i, item in enumerate(array):
        if item == needle:
            return i
    return -1


if __name__ == "__main__":
    setup = "from __main__ import (linear_search, haystack, needle)"
    iterations = 1000

    for haystack_size in (10000, 100000, 1000000):
        haystack = range(haystack_size)
        for needle in (1, 6000, 9000, 1000000):
            index = linear_search(needle, haystack)
            t = timeit.timeit(
                stmt="linear_search(needle, haystack)", setup=setup, number=iterations
            )
            print(
                f"Value {needle: <8} found in haystack of "
                f"size {len(haystack): <8} at index "
                f"{index: <8} in {t/iterations:.5e} seconds"
            )

Value 1        found in haystack of size 10000    at index 1        in 5.55400e-07 seconds
Value 6000     found in haystack of size 10000    at index 6000     in 9.80884e-04 seconds
Value 9000     found in haystack of size 10000    at index 9000     in 1.63080e-03 seconds
Value 1000000  found in haystack of size 10000    at index -1       in 1.67250e-03 seconds
Value 1        found in haystack of size 100000   at index 1        in 8.71400e-07 seconds
Value 6000     found in haystack of size 100000   at index 6000     in 1.00930e-03 seconds
Value 9000     found in haystack of size 100000   at index 9000     in 1.46334e-03 seconds
Value 1000000  found in haystack of size 100000   at index -1       in 1.61621e-02 seconds
Value 1        found in haystack of size 1000000  at index 1        in 5.55200e-07 seconds
Value 6000     found in haystack of size 1000000  at index 6000     in 1.01926e-03 seconds
Value 9000     found in haystack of size 1000000  at index 9000     in 1.54333e-03 seconds

In [1]:
from itertools import islice


def index_sequence(key, mask=0b111, PERTURB_SHIFT=5):
    perturb = hash(key) #hash返回一个整数，而CPython中的实际C代码使用一个无符号整数。因此，这个伪代码不能完全复制CPython中的行为；但是，这是一个很好的近似值。
    i = perturb & mask
    yield i
    while True:
        perturb >>= PERTURB_SHIFT
        i = (i * 5 + perturb + 1) & mask
        yield i


class ForceHash(object):
    def __init__(self, force_hash):
        self.force_hash = force_hash

    def __hash__(self):
        return self.force_hash

    def __repr__(self):
        return f"<ForceHash 0b{self.force_hash:08b}>"


def sample_probe(force_hash, num_samples=10):
    probe_values = index_sequence(force_hash)
    indexes = islice(probe_values, num_samples)
    print(f"First {num_samples} samples for hash {force_hash}: {list(indexes)}")


if __name__ == "__main__":
    sample_probe(ForceHash(0b00000111))
    sample_probe(ForceHash(0b11100111))
    sample_probe(ForceHash(0b01110111))
    sample_probe(ForceHash(0b01110001))
    sample_probe(ForceHash(0b01110000))


First 10 samples for hash <ForceHash 0b00000111>: [7, 4, 5, 2, 3, 0, 1, 6, 7, 4]
First 10 samples for hash <ForceHash 0b11100111>: [7, 3, 0, 1, 6, 7, 4, 5, 2, 3]
First 10 samples for hash <ForceHash 0b01110111>: [7, 7, 4, 5, 2, 3, 0, 1, 6, 7]
First 10 samples for hash <ForceHash 0b01110001>: [1, 1, 6, 7, 4, 5, 2, 3, 0, 1]
First 10 samples for hash <ForceHash 0b01110000>: [0, 4, 5, 2, 3, 0, 1, 6, 7, 4]
