# Binary search

Implement binary search. It should work first time without twiddling with indices!

In [None]:
def binary_search(arr, target) -> bool:
    l, r = 0, len(arr)

    while l < r:
        m = l + (r - l) // 2
        if target == arr[m]:
            return True
        if target < arr[m]:
            r = m
        elif target > arr[m]:
            l = m + 1

    return False

In [None]:
array = [x for x in range(1, 11)]
cases = ((1, True), (5, True), (10, True), (11, False), (0, False), (-1, False))

for target, result in cases:
    assert binary_search(array, target) == result

Really, binary search is an application of another algorithm - partition. Partition takes a binary predicate and finds the point at which the predicate changes value.

In [None]:
def partition(arr, pred) -> int:
    l, r = 0, len(arr)

    while l < r:
        m = l + (r - l) // 2
        # Assume we want Falses on the right
        if pred(arr[m]):
            l = m + 1
        else:
            r = m

    return l # l == r

In [None]:
import bisect

assert partition(array, lambda x: x < 5) == bisect.bisect_left(array, 5)
assert partition(array, lambda x: x <= 5) == bisect.bisect_right(array, 5)

In [None]:
def binary_search2(arr, target) -> bool:
    i = partition(arr, lambda x: x < target)
    return i < len(arr) and arr[i] == target

Is it more performant to avoid the early return? It can return early in some cases but adds an extra branch which is extra overhead on each step.

In [None]:
def binary_search3(arr, target) -> bool:
    l, r = 0, len(arr)

    while r - l > 1:
        m = l + (r - l) // 2
        if target < arr[m]:
            r = m
        elif target >= arr[m]:
            l = m

    return arr[l] == target

In [None]:
import timeit
import random

def test(search):
    array = [2*x for x in range(10)]
    targets = [random.randint(1, 10) for _ in range(1000)]

    for target in targets:
        assert search(array, target) == (target % 2 == 0)

early_return = timeit.timeit(lambda: test(binary_search), number=1)
partition_search = timeit.timeit(lambda: test(binary_search2), number=1)
no_early_return = timeit.timeit(lambda: test(binary_search3), number=1)

print(f"early return: {1000 * early_return:.3f} milliseconds")
print(f"partition search: {1000 * partition_search:.3f} milliseconds")
print(f"no early return: {1000 * no_early_return:.3f} milliseconds")

It turns out that the extra branching does make a difference. It's more efficient to just search all the way to the end then return. Also, the partition call adds some overhead.