# Heap
### Implementation of heap
### Heapsort
### Kth largest element using minHeap
### Lintcode545. Top k Largest Numbers II (data stream)
### 692. Top K Frequent Words

In [6]:
class Solution:
    """
    @param: A: Given an integer array
    @return: nothing
    """
    def heapify(self, A):
        # write your code here
        for i in range(len(A) // 2, -1, -1):
            self.siftDown(A, i)
        
    def siftDown(self, A, idx): 
        while idx < len(A):       
            left = 2 * idx + 1
            right = 2 * idx + 2
            minIdx = idx
            if left < len(A) and A[left] < A[minIdx]:
                minIdx = left
            if right < len(A) and A[right] < A[minIdx]:
                minIdx = right
            
            # current A[idx] is the minimum
            if minIdx == idx:
                break
            
            A[minIdx], A[idx] = A[idx], A[minIdx]
            idx = minIdx
                
    def insert(self, A, val):    
        A.append(val)   
        idx = len(A) - 1    
        while idx > 0: # cannot use = since parentIdx can goto negative
            parentIdx = (idx - 1) // 2
            
            if A[parentIdx] > A[idx]:
                A[idx], A[parentIdx] = A[parentIdx], A[idx]
                idx = parentIdx
            else:
                break

    
    def pop(self, A):  
        node = A[0] 
        A[0], A[-1] = A[-1], A[0] 
        A.pop()
        self.siftDown(A, 0) 
        return node

# use heapq for heapsort
Heap

key methods: 
    
    heappush(list, item), heappop(list, item), the elements are ordered using comparisons between item
    heapify(list), turn list into heap
from heapq import *

def heapsort(iterable):
    h = []
    for value in iterable:
        heappush(h, value)
    return [heappop(h) for _ in range(len(h))]

print(heapsort([9, 4, 1, 2, 3, 6, 8, 3, 6 ]))

# print(heapsort([(1, 2), 4, 1, 2, 3, 6, 8, 3, 6 ])) error: since int and tuple cannot be compared


def heapsort2(iterable):
    heapify(iterable)
    return [heappop(iterable) for _ in range(len(iterable))]

print(heapsort2([9, 4, 1, 2, 3, 6, 8, 3, 6 ]))

[1, 2, 3, 3, 4, 6, 6, 8, 9]
[1, 2, 3, 3, 4, 6, 6, 8, 9]


In [None]:
## Find the kth largest element in an unsorted array. Note that it is the kth largest 
## element in the sorted order, not the kth distinct element.

class Solution:
    def findKthLargest(self, nums: List[int], k: int) -> int:
        h = []
        
        for i in nums:
            heapq.heappush(h, i)
            if len(h) > k:
                heapq.heappop(h)
                
        ## for every element popout, there is at least k elements greater than it
        ## Therefore, the remaining k elements are the largest k elements
            
        return heapq.heappop(h)
        

In [None]:
545. Top k Largest Numbers II
Implement a data structure, provide two interfaces:

add(number). Add a new number in the data structure.
topk(). Return the top k largest numbers in this data structure. k is given when we create the data structure.

import heapq
class Solution:
    """
    @param: k: An integer
    """
    def __init__(self, k):
        # do intialization if necessary
        self.h = []
        self.k = k
    """
    @param: num: Number to be added
    @return: nothing
    """
    def add(self, num):
        # write your code here
        
        heapq.heappush(self.h, num)
        
        while len(self.h) > self.k:
            heapq.heappop(self.h)
            
    """
    @return: Top k element
    """
    def topk(self):
        # write your code here
        res = sorted([e for e in self.h], reverse=True)
        
        return res

In [None]:
# top K freqent words
692. Top K Frequent Words
Given a non-empty list of words, return the k most frequent elements.

Your answer should be sorted by frequency from highest to lowest. If two words 
have the same frequency, then the word with the lower alphabetical order comes first.


# O(N + k log N) algorithm
class Solution:
    def topKFrequent(self, words: List[str], k: int) -> List[str]:
        
        count = collections.Counter(words)
        
        h = []
        # only cost O(N)
        for key, val in count.items():
            heapq.heappush(h, (-val, key))
        
        return [heapq.heappop(h)[1] for _ in range(k)]
    
# another solution using nsmallest

class Solution:
    def topKFrequent(self, words: List[str], k: int) -> List[str]:
        
        count = collections.Counter(words)
        
        h = []
        for key, val in count.items():
            h.append((-val, key))
        
        res = heapq.nsmallest(k, h)
        return [e[1] for e in res]
    
class Solution:
    def topKFrequent(self, words: List[str], k: int) -> List[str]:
        
        count = collections.Counter(words)
        
        return heapq.nsmallest(k, count, lambda w: (-count[w], w))

## pitfall 1, Nlog K algorithm we cannot use nlargest, because the output will be sorted from larger letter to small letter
class Solution:
    def topKFrequent(self, words: List[str], k: int) -> List[str]:
        
        count = collections.Counter(words)
        
        h = []
        for key, val in count.items():
            heapq.heappush(h, (val, key))
            if len(h) > k:
                heapq.heappop(h)
        
        res = [e[1] for e in h]
        
        return res[::-1]
    
    
## pitfall 2, we cannot use nlargest, because the output will be sorted from larger letter to small letter
class Solution:
    def topKFrequent(self, words: List[str], k: int) -> List[str]:
        
        count = collections.Counter(words)
        
        h = []
        for key, val in count.items():
            h.append((val, key))
        
        res = heapq.nlargest(k, h)
        return [e[1] for e in res]
    
https://leetcode.com/problems/top-k-frequent-words/discuss/108348/Python-3-solution-with-O(nlogk)-and-O(n)/186252/    
class Solution:
    # Time Complexity = O(n + nlogk)
    # Space Complexity = O(n)
    def topKFrequent(self, words, k):
        count = collections.Counter(words)
        heap = []
        for key, value in count.items():
            heapq.heappush(heap, Word(value, key))
            if len(heap) > k: ## only keep and k largest elements
                heapq.heappop(heap)
        res = []
        for _ in range(k):
            res.append(heapq.heappop(heap).word)
        return res[::-1]

class Word:
    def __init__(self, freq, word):
        self.freq = freq
        self.word = word
    
    def __lt__(self, other):
        if self.freq == other.freq:
            return self.word > other.word
        return self.freq < other.freq
    
    def __eq__(self, other):
        return self.freq == other.freq and self.word == other.word


## how to solve K smallest or K largest problem via heap

https://stackoverflow.com/questions/23038756/how-does-heapq-nlargest-work


### 378. Kth Smallest Element in a Sorted Matrix
### 373. Find K Pairs with Smallest Sums
### 264. Ugly Number II
## Two heap method
### 295. Find Median from Data Stream
### 480. Sliding Window Median
### 502. IPO

In [None]:
378. Kth Smallest Element in a Sorted Matrix

Given a n x n matrix where each of the rows and columns are sorted in ascending order, find the kth smallest element in the matrix.

Note that it is the kth smallest element in the sorted order, not the kth distinct element.

Example:

# add corner first
# every time add down and right (be cautious on the repeated elements)
class Solution:
    def kthSmallest(self, matrix: List[List[int]], k: int) -> int:
        
        h = [(matrix[0][0], 0, 0)]
        H = len(matrix)
        W = len(matrix[0])
        visited = set([(0, 0)])
        for i in range(k):
            
            e, x, y = heapq.heappop(h)
            if x < H - 1 and (x + 1, y) not in visited:
                visited.add((x + 1, y))
                heapq.heappush(h, (matrix[x + 1][y], x + 1, y))
            if y < W - 1 and (x, y + 1) not in visited:
                visited.add((x, y + 1))
                heapq.heappush(h, (matrix[x][y + 1], x, y + 1))
        return e

# add the first column
# then add right elements
class Solution:
    def kthSmallest(self, matrix: List[List[int]], k: int) -> int:
        

        H = len(matrix)
        W = len(matrix[0])
        
        h = [(matrix[j][0], j , 0) for j in range(H)]
        heapq.heapify(h)
        for i in range(k):
            
            e, x, y = heapq.heappop(h)
            
            if y + 1 < W:
                heapq.heappush(h, (matrix[x][y + 1], x, y + 1))
                
        return e

In [None]:
373. Find K Pairs with Smallest Sums

You are given two integer arrays nums1 and nums2 sorted in ascending order and an integer k.

Define a pair (u,v) which consists of one element from the first array and one element from the second array.

Find the k pairs (u1,v1),(u2,v2) ...(uk,vk) with the smallest sums.

Example 1:

Input: nums1 = [1,7,11], nums2 = [2,4,6], k = 3
Output: [[1,2],[1,4],[1,6]] 
Explanation: The first 3 pairs are returned from the sequence: 
             [1,2],[1,4],[1,6],[7,2],[7,4],[11,2],[7,6],[11,4],[11,6]
            
class Solution:
    def kSmallestPairs(self, nums1: List[int], nums2: List[int], k: int) -> List[List[int]]:
        
        if not nums1 or not nums2:
            return []
        
        h = [(nums1[0] + nums2[0], 0, 0)]
        
        res = []
        visited = set()
        while h:
            
            val, i, j = heapq.heappop(h)
            res.append([nums1[i], nums2[j]])
            
            # becase one element could be visted from two positions, need to filter duplicate positions
            if i + 1 < len(nums1) and (i + 1, j) not in visited:
                heapq.heappush(h, (nums1[i+1]+nums2[j], i + 1, j))
                visited.add((i + 1, j))
            if j + 1 < len(nums2) and (i, j + 1) not in visited:
                heapq.heappush(h, (nums1[i]+nums2[j + 1], i, j + 1))
                visited.add((i, j + 1))
            if len(res) == k:
                break
                
        return res
    

# need to filter before adding in, otherwise TLE    
class Solution:
    def kSmallestPairs(self, nums1: List[int], nums2: List[int], k: int) -> List[List[int]]:
        
        if not nums1 or not nums2:
            return []
        
        h = [(nums1[0] + nums2[0], 0, 0)]
        
        res = []
        visited = set()
        while h:
            
            val, i, j = heapq.heappop(h)
            if (i, j) not in visited:
                res.append([nums1[i], nums2[j]])
                visited.add((i, j))
            
            
            if i + 1 < len(nums1):
                heapq.heappush(h, (nums1[i+1]+nums2[j], i + 1, j))
            if j + 1 < len(nums2):
                heapq.heappush(h, (nums1[i]+nums2[j + 1], i, j + 1))
                
            if len(res) == k:
                break
                
        return res

In [None]:
264. Ugly Number II

Write a program to find the n-th ugly number.

Ugly numbers are positive numbers whose prime factors only include 2, 3, 5. 

# starting from the smallest ugly number, then everytime add candidates to be the next one
class Solution:
    def nthUglyNumber(self, n: int) -> int:
        
        h = [1]
        visited = set()
        while len(visited) < n:
            
            num = heapq.heappop(h)
            visited.add(num)

            heapq.heappush(h, num * 2)
            heapq.heappush(h, num * 3)
            heapq.heappush(h, num * 5)
            
        return num
    
# a more general control on the exponenet can be realized by
class Solution:
    def nthUglyNumber(self, n: int) -> int:
        
        h = [(1, 0, 0, 0)]
        visited = set()
        while len(visited) < n:
            
            num, i, j, k = heapq.heappop(h)
            visited.add(num)
           
            heapq.heappush(h, (num * 2, i + 1, j, k))
            heapq.heappush(h, (num * 3, i, j + 1, k))
            heapq.heappush(h, (num * 5, i, j, k + 1))
            
            
            
        return num

In [None]:
295. Find Median from Data Stream

Median is the middle value in an ordered integer list. If the size of the list is even, there is no middle value. So the median is the mean of the two middle value.

For example,
[2,3,4], the median is 3

[2,3], the median is (2 + 3) / 2 = 2.5

Design a data structure that supports the following two operations:

void addNum(int num) - Add a integer number from the data stream to the data structure.
double findMedian() - Return the median of all elements so far.


class MedianFinder:

    def __init__(self):
        """
        initialize your data structure here.
        """
        # every element in minHeap is greater than all elements in maxHeap 
        self.maxHeap = []
        self.minHeap = []

    def addNum(self, num: int) -> None:
        
        # rule: for elements greater 
        # add to min heap first
        heapq.heappush(self.minHeap, (num, num))
        
        # push the smallest to max heap
        _, out = heapq.heappop(self.minHeap)
        heapq.heappush(self.maxHeap, (-out, out))

        # if minHeap is smaller in size, fetch one back from max heap
        if len(self.minHeap) < len(self.maxHeap):
            _, outMax = heapq.heappop(self.maxHeap)
            heapq.heappush(self.minHeap, (outMax, outMax))
            
            
        
        
    def findMedian(self) -> float:

        if len(self.minHeap) > len(self.maxHeap):
            return self.minHeap[0][1]
        else:
            return (self.minHeap[0][1] + self.maxHeap[0][1]) * 0.5
        
        


# Your MedianFinder object will be instantiated and called as such:
# obj = MedianFinder()
# obj.addNum(num)
# param_2 = obj.findMedian()