Heap

key methods: 
    
    heappush(list, item), heappop(list, item), the elements are ordered using comparisons between item
    heapify(list), turn list into heap

In [6]:
# use heapq for heapsort

from heapq import *

def heapsort(iterable):
    h = []
    for value in iterable:
        heappush(h, value)
    return [heappop(h) for _ in range(len(h))]

print(heapsort([9, 4, 1, 2, 3, 6, 8, 3, 6 ]))

# print(heapsort([(1, 2), 4, 1, 2, 3, 6, 8, 3, 6 ])) error: since int and tuple cannot be compared


def heapsort2(iterable):
    heapify(iterable)
    return [heappop(iterable) for _ in range(len(iterable))]

print(heapsort2([9, 4, 1, 2, 3, 6, 8, 3, 6 ]))

[1, 2, 3, 3, 4, 6, 6, 8, 9]
[1, 2, 3, 3, 4, 6, 6, 8, 9]


In [None]:
## Find the kth largest element in an unsorted array. Note that it is the kth largest 
## element in the sorted order, not the kth distinct element.

class Solution:
    def findKthLargest(self, nums: List[int], k: int) -> int:
        h = []
        
        for i in nums:
            heapq.heappush(h, i)
            if len(h) > k:
                heapq.heappop(h)
                
        ## for every element popout, there is at least k elements greater than it
        ## Therefore, the remaining k elements are the largest k elements
            
        return heapq.heappop(h)
        

In [None]:
# top K freqent words
692. Top K Frequent Words
Given a non-empty list of words, return the k most frequent elements.

Your answer should be sorted by frequency from highest to lowest. If two words 
have the same frequency, then the word with the lower alphabetical order comes first.


# O(N + k log N) algorithm
class Solution:
    def topKFrequent(self, words: List[str], k: int) -> List[str]:
        
        count = collections.Counter(words)
        
        h = []
        # only cost O(N)
        for key, val in count.items():
            heapq.heappush(h, (-val, key))
        
        return [heapq.heappop(h)[1] for _ in range(k)]
    
# another solution using nsmallest

class Solution:
    def topKFrequent(self, words: List[str], k: int) -> List[str]:
        
        count = collections.Counter(words)
        
        h = []
        for key, val in count.items():
            h.append((-val, key))
        
        res = heapq.nsmallest(k, h)
        return [e[1] for e in res]
    
class Solution:
    def topKFrequent(self, words: List[str], k: int) -> List[str]:
        
        count = collections.Counter(words)
        
        return heapq.nsmallest(k, count, lambda w: (-count[w], w))

## pitfall 1, Nlog K algorithm we cannot use nlargest, because the output will be sorted from larger letter to small letter
class Solution:
    def topKFrequent(self, words: List[str], k: int) -> List[str]:
        
        count = collections.Counter(words)
        
        h = []
        for key, val in count.items():
            heapq.heappush(h, (val, key))
            if len(h) > k:
                heapq.heappop(h)
        
        res = [e[1] for e in h]
        
        return res[::-1]
    
    
## pitfall 2, we cannot use nlargest, because the output will be sorted from larger letter to small letter
class Solution:
    def topKFrequent(self, words: List[str], k: int) -> List[str]:
        
        count = collections.Counter(words)
        
        h = []
        for key, val in count.items():
            h.append((val, key))
        
        res = heapq.nlargest(k, h)
        return [e[1] for e in res]
    
https://leetcode.com/problems/top-k-frequent-words/discuss/108348/Python-3-solution-with-O(nlogk)-and-O(n)/186252/    
class Solution:
    # Time Complexity = O(n + nlogk)
    # Space Complexity = O(n)
    def topKFrequent(self, words, k):
        count = collections.Counter(words)
        heap = []
        for key, value in count.items():
            heapq.heappush(heap, Word(value, key))
            if len(heap) > k: ## only keep and k largest elements
                heapq.heappop(heap)
        res = []
        for _ in range(k):
            res.append(heapq.heappop(heap).word)
        return res[::-1]

class Word:
    def __init__(self, freq, word):
        self.freq = freq
        self.word = word
    
    def __lt__(self, other):
        if self.freq == other.freq:
            return self.word > other.word
        return self.freq < other.freq
    
    def __eq__(self, other):
        return self.freq == other.freq and self.word == other.word


## how to solve K smallest or K largest problem via heap

https://stackoverflow.com/questions/23038756/how-does-heapq-nlargest-work

## Two heap method
### 295. Find Median from Data Stream
### 480. Sliding Window Median
### 502. IPO