# CS460 Algorithms and Their Analysis 
## Programming Assignment 3: Implement heap sort algorithm

**Author:** Yang Xu, Assistant Professor of Computer Science, San Diego State University

**Total points: 15 + 2(bonus)**

In [54]:
import numpy as np
import timeit
import heapq

## Task 1: Implement the function of testing max-heap property
**Points: 3**

Implement the `is_maxheap()` function, which test whether a given list satisfies the max-heap property.

This algorithm is not covered in the lecture slides, but we can use the same strategy as in `build_maxheap()`, i.e., we go through the first half of the elements in the array, and if any of them breaks the maxheap property, then return `False`; if none of them breaks, then return `True`.

*Hint*: Since the index starts with 0 in Python, the way to calcualte node `i`'s left child and right child's indices should be slicely different from the formulas given in the textbook. 

In [55]:
def is_maxheap(A, heap_size:int = None) -> bool:
    if heap_size:
        assert heap_size <= len(A)
    else:
        heap_size = len(A)

    flag = True # Initialize the boolean variable to be returned

    ### START YOUR CODE ###
    for i in range(heap_size): # Fill in the correct range
        left_child = 2 * i + 1
        right_child = 2 * i + 2
        if (left_child < heap_size) and (A[i] < A[left_child]):
            flag = False
            break
        if (right_child < heap_size) and (A[i] < A[right_child]):
            flag = False
            break
    ### END YOUR CODE ###

    return flag

In [56]:
# Do NOT change the test code here.

arr1 = [15, 8, 10, 5, 7, 9, 6]
print(is_maxheap(arr1, len(arr1)))

arr2 = [15, 8, 10, 9, 7, 9, 6]
print(is_maxheap(arr2, len(arr2)))

arr3 = [15, 8, 10, 5, 9, 9, 6]
print(is_maxheap(arr3, len(arr3)))

arr4 = [15, 8, 10, 5, 7, 11, 6]
print(is_maxheap(arr4, len(arr4)))

True
False
False
False


**Expected output:** \
True \
False \
False \
False


---
## Task 2: Implement max_heapify()
**Points: 2**

Implement `max_heapify()` following the pseudo-code in lecture slides/textbook. Note that there is a slice difference in the additional argument `heap_size`.

In [57]:
def max_heapify(A, idx, heap_size):
    ### START YOUR CODE ###
    largest = None
    left = 2 * idx + 1
    right = 2 * idx + 2
    if left < heap_size and A[left] > A[idx]:
        largest = left
    else:
        largest = idx
    if right < heap_size and A[right] > A[largest]:
        largest = right
    if largest != idx:
        A[idx], A[largest] = A[largest], A[idx]
        max_heapify(A, largest, heap_size)
    ### END YOUR CODE ###

In [58]:
# Do NOT change the test code here.

arr = [16, 4, 10, 14, 7, 9, 3, 2, 8, 1]
print('Before:', arr)
print(is_maxheap(arr))
print()

max_heapify(arr, 1, heap_size=len(arr))
print('After:', arr)
print(is_maxheap(arr))

Before: [16, 4, 10, 14, 7, 9, 3, 2, 8, 1]
False

After: [16, 14, 10, 8, 7, 9, 3, 2, 4, 1]
True


**Expected output:** \
Before: [16, 4, 10, 14, 7, 9, 3, 2, 8, 1]\
False

After: [16, 14, 10, 8, 7, 9, 3, 2, 4, 1]\
True

---
## Task 3: Implement build_maxheap()
**Points: 2**

Implement `build_maxheap()` by following the pseudo-code in lecture slides/textbook. 

*Hint*: You can use a negative step in `range()` to implement a reversed for loop.

In [59]:
def build_maxheap(A):
    for i in range((len(A))//2 - 1, -1, -1):
        max_heapify(A, i, len(A))

In [60]:
# Do NOT change the test code here.

arr = [1, 2, 8, 3, 9, 7, 14, 4, 10, 16]

print('Before:', is_maxheap(arr))
print(arr)
print()

build_maxheap(arr)
print('After:', is_maxheap(arr))
print(arr)

Before: False
[1, 2, 8, 3, 9, 7, 14, 4, 10, 16]

After: True
[16, 10, 14, 4, 9, 7, 8, 1, 3, 2]


**Expected output:** \
Before: False\
[1, 2, 8, 3, 9, 7, 14, 4, 10, 16]

After: True\
[16, 10, 14, 4, 9, 7, 8, 1, 3, 2]

---
## Task 4: Implement heapsort()
**Points: 3**

Implement `heapsort()` by following the pseudo-code in lecture slides/textbook. 

*Hint*: Specifiy the `heap_size` argument correctly when calling the `max_heapify()` function.

In [61]:
def heapsort(A):
    ### START YOUR CODE ###
    heap_size = len(A)
    build_maxheap(A)
    for i in range(len(A)-1, 0, -1):
        A[0], A[i] = A[i], A[0]
        heap_size = heap_size - 1
        max_heapify(A, 0, heap_size)
    ### END YOUR CODE ###

In [62]:
# Do NOT change the test code here.
np.random.seed(0)

arr = np.random.randint(100, size=20).tolist()
print('Unsorted:', arr)

arr1 = arr.copy()
heapsort(arr1)
print('Sorted by heapsort():', arr1)

arr2 = sorted(arr)
print('Sorted by sorted():  ', arr2)

Unsorted: [44, 47, 64, 67, 67, 9, 83, 21, 36, 87, 70, 88, 88, 12, 58, 65, 39, 87, 46, 88]
Sorted by heapsort(): [9, 12, 21, 36, 39, 44, 46, 47, 58, 64, 65, 67, 67, 70, 83, 87, 87, 88, 88, 88]
Sorted by sorted():   [9, 12, 21, 36, 39, 44, 46, 47, 58, 64, 65, 67, 67, 70, 83, 87, 87, 88, 88, 88]


**Expected output:** 

Unsorted: [44, 47, 64, 67, 67, 9, 83, 21, 36, 87, 70, 88, 88, 12, 58, 65, 39, 87, 46, 88]

Sorted by heapsort(): [9, 12, 21, 36, 39, 44, 46, 47, 58, 64, 65, 67, 67, 70, 83, 87, 87, 88, 88, 88]

Sorted by sorted():   [9, 12, 21, 36, 39, 44, 46, 47, 58, 64, 65, 67, 67, 70, 83, 87, 87, 88, 88, 88]

---

## Task 5 (Bonus task): Implement heapsort with `heapq` module
**Points: 2** (bonus)

Implement heapsort_v2() using the built-in `heapq` module of Python, and compare its performance with `heapsort()`.

*Hint:* `heapq` provides an implementation of the heap queue algorithm. In Python, we can view heap as a regular list, you can create a heap by transforming a populated list into a heap via function `heapify()`, or by calling `heappush()` iteratively.\
Go to https://docs.python.org/3/library/heapq.html for more references.

In [63]:
def heapsort_v2(A):
    heap = []
    ### START YOUR CODE ###
    # Intialize heap with all the elements in A
    for i in A:
        heapq.heappush(heap, i)
    ### END YOUR CODE ###
    
    A_sorted = []
    ### START YOUR CODE ###
    # Implement heapsort. The sorted results stored in A_sorted
    A_sorted = [heapq.heappop(heap) for j in range(len(heap))]
    ### END YOUR CODE ###
    
    return A_sorted

In [64]:
# Test heapsort_v2()
# Do NOT change the test code here.
np.random.seed(0)

arr = np.random.randint(100, size=20).tolist()

arr_sorted = heapsort_v2(arr)
print('Sorted by heapsort_v2():', arr_sorted)

Sorted by heapsort_v2(): [9, 12, 21, 36, 39, 44, 46, 47, 58, 64, 65, 67, 67, 70, 83, 87, 87, 88, 88, 88]


**Expected output**

Sorted by heapsort_v2(): [9, 12, 21, 36, 39, 44, 46, 47, 58, 64, 65, 67, 67, 70, 83, 87, 87, 88, 88, 88]

---

Run the cell below to see which version of heapsort is faster. How does it compared to the built-in `sorted()`?

In [65]:
# Do NOT change the test code here.

def test1():
    np.random.seed(1)
    arr = np.random.randint(1000, size=100).tolist()
    heapsort(arr)

def test2():
    np.random.seed(1)
    arr = np.random.randint(1000, size=100).tolist()
    heapsort_v2(arr)

def test3():
    np.random.seed(1)
    arr = np.random.randint(1000, size=100).tolist()
    sorted(arr)

print('heapsort()', timeit.timeit('test1()', globals=globals(), number=1000))
print('heapsort_v2()', timeit.timeit('test2()', globals=globals(), number=1000))
print('sorted()', timeit.timeit('test3()', globals=globals(), number=1000))

heapsort() 0.1907060419998743
heapsort_v2() 0.027592207999987295
sorted() 0.009916791000250669
