In [None]:
# Top K Frequent Words 692 Medium
# Top K Frequent Elements 347 Medium

# Top K Frequent Words 692

In [14]:
# Top K Frequent Words 692 Medium
# https://leetcode.com/problems/top-k-frequent-words/

# Given an array of strings words and an integer k, return the k most frequent strings.
# Return the answer sorted by the frequency from highest to lowest. Sort the words with the same frequency by their lexicographical order.

# Constraints:
# 1 <= words.length <= 500
# 1 <= words[i] <= 10
# words[i] consists of lowercase English letters.
# k is in the range [1, The number of unique words[i]]
# Follow-up: Could you solve it in O(n log(k)) time and O(n) extra space?

from collections import Counter

class Solution692:
  def topKFrequent(self, words, k):
    counts = Counter(words)
    result = [[] for _ in range(len(words)+1)]
    for word, count in counts.items():
        result[count].append(word)
    pairs = []
    for i in reversed(range(len(words))):
        for word in result[i]:
            pairs.append((-i, word))
        if len(pairs) >= k:
            break
    pairs.sort()
    return [pair[1] for pair in pairs[:k]]


if __name__ == '__main__':
  words = ["i", "love", "leetcode", "i", "love", "coding"]
  k = 2
  out_692 = ['i', 'love']
  # Explanation: "i" and "love" are the two most frequent words.
  # Note that "i" comes before "love" due to a lower alphabetical order.
  print(f'Input: {words}')
  print(f'Solution692: {Solution692().topKFrequent(words, k)}\nExpected: {out_692}\n')

  words_1 = ["the", "day", "is", "sunny", "the", "the", "the", "sunny", "is", "is"]
  k_1 = 4
  out_692_1 = ["the", "is", "sunny", "day"]
  # Explanation: "the", "is", "sunny" and "day" are the four most frequent words, 
  # with the number of occurrence being 4, 3, 2 and 1 respectively.
  print(f'Input: {words_1}')
  print(f'Solution692: {Solution692().topKFrequent(words_1, k_1)}\nExpected: {out_692_1}\n')

Input: ['i', 'love', 'leetcode', 'i', 'love', 'coding']
Solution692: ['i', 'love']
Expected: ['i', 'love']

Input: ['the', 'day', 'is', 'sunny', 'the', 'the', 'the', 'sunny', 'is', 'is']
Solution692: ['the', 'is', 'sunny', 'day']
Expected: ['the', 'is', 'sunny', 'day']



# # Top K Frequent Elements 347

In [16]:
# Top K Frequent Elements 347 Medium
# https://leetcode.com/problems/top-k-frequent-elements/

# Given an integer array nums and an integer k, return the k most frequent elements. You may return the answer in any order.

# Constraints:
# 1 <= nums.length <= 105
# k is in the range [1, the number of unique elements in the array].
# It is guaranteed that the answer is unique.
# Follow up: Your algorithm's time complexity must be better than O(n log n), where n is the array's size.
from collections import Counter

class Solution347:
  def topKFrequent(self, nums, k):
    
    return [i for i, _ in Counter(nums).most_common(k)]


if __name__ == '__main__':
  nums = [1, 1, 1, 2, 2, 3] 
  k = 2
  out_347 = [1, 2]
  print(f'Input nums: {nums}\nInput k: {k}')
  print(f'Solution: {Solution347().topKFrequent(nums, k)}\nExpected: {out_347}\n')

  nums_1 = [1]
  k_1 = 1
  out_347_1 = [1]
  print(f'Input nums: {nums_1}\nInput k: {k_1}')
  print(f'Solution: {Solution347().topKFrequent(nums_1, k_1)}\nExpected: {out_347_1}')


Input nums: [1, 1, 1, 2, 2, 3]
Input k: 2
Solution: [1, 2]
Expected: [1, 2]

Input nums: [1]
Input k: 1
Solution: [1]
Expected: [1]


Approach 1: Heap
Let's start from the simple heap approach with \mathcal{O}(N \log k)O(Nlogk) time complexity. To ensure that \mathcal{O}(N \log k)O(Nlogk) is always less than \mathcal{O}(N \log N)O(NlogN), the particular case k = Nk=N could be considered separately and solved in \mathcal{O}(N)O(N) time.

Algorithm

The first step is to build a hash map element -> its frequency. In Java, we use the data structure HashMap. Python provides dictionary subclass Counter to initialize the hash map we need directly from the input array.
This step takes \mathcal{O}(N)O(N) time where N is a number of elements in the list.

The second step is to build a heap of size k using N elements. To add the first k elements takes a linear time \mathcal{O}(k)O(k) in the average case, and \mathcal{O}(\log 1 + \log 2 + ... + \log k) = \mathcal{O}(log k!) = \mathcal{O}(k \log k)O(log1+log2+...+logk)=O(logk!)=O(klogk) in the worst case. It's equivalent to heapify implementation in Python. After the first k elements we start to push and pop at each step, N - k steps in total. The time complexity of heap push/pop is \mathcal{O}(\log k)O(logk) and we do it N - k times that means \mathcal{O}((N - k)\log k)O((N−k)logk) time complexity. Adding both parts up, we get \mathcal{O}(N \log k)O(Nlogk) time complexity for the second step.

The third and the last step is to convert the heap into an output array. That could be done in \mathcal{O}(k \log k)O(klogk) time.

In Python, library heapq provides a method nlargest, which combines the last two steps under the hood and has the same \mathcal{O}(N \log k)O(Nlogk) time complexity.

In [19]:
from collections import Counter
class Solution:
    def topKFrequent(self, nums, k): 
        # O(1) time 
        if k == len(nums):
            return nums
        
        # 1. build hash map : character and how often it appears
        # O(N) time
        count = Counter(nums)   
        # 2-3. build heap of top k frequent elements and
        # convert it into an output array
        # O(N log k) time
        return heapq.nlargest(k, count.keys(), key=count.get)

if __name__ == '__main__':
  nums = [1, 1, 1, 2, 2, 3] 
  k = 2
  out_347 = [1, 2]
  print(f'Input nums: {nums}\nInput k: {k}')
  print(f'Solution: {Solution347().topKFrequent(nums, k)}\nExpected: {out_347}\n')

  nums_1 = [1]
  k_1 = 1
  out_347_1 = [1]
  print(f'Input nums: {nums_1}\nInput k: {k_1}')
  print(f'Solution: {Solution347().topKFrequent(nums_1, k_1)}\nExpected: {out_347_1}')

Input nums: [1, 1, 1, 2, 2, 3]
Input k: 2
Solution: [1, 2]
Expected: [1, 2]

Input nums: [1]
Input k: 1
Solution: [1]
Expected: [1]


Complexity Analysis

Time complexity : \mathcal{O}(N \log k)O(Nlogk) if k < Nk<N and \mathcal{O}(N)O(N) in the particular case of N = kN=k. That ensures time complexity to be better than \mathcal{O}(N \log N)O(NlogN).

Space complexity : \mathcal{O}(N + k)O(N+k) to store the hash map with not more NN elements and a heap with kk elements.

In [20]:
# Algorithm
# The algorithm is quite straightforward :
# Build a hash map element -> its frequency and convert its keys into the array unique of unique elements. 
# Note that elements are unique, but their frequencies are not. That means we need a partition algorithm that works fine with duplicates.
# Work with unique array. Use a partition scheme (please check the next section) to place the pivot into its perfect position pivot_index in the sorted array, 
# move less frequent elements to the left of pivot, and more frequent or of the same frequency - to the right.

# Compare pivot_index and N - k.
# If pivot_index == N - k, the pivot is N - kth most frequent element, and all elements on the right are more frequent or of the same frequency. 
# Return these top kk frequent elements.

# Otherwise, choose the side of the array to proceed recursively.

from collections import Counter
class Solution:
    def topKFrequent(self, nums, k):
        count = Counter(nums)
        unique = list(count.keys())
        
        def partition(left, right, pivot_index) -> int:
            pivot_frequency = count[unique[pivot_index]]
            # 1. move pivot to end
            unique[pivot_index], unique[right] = unique[right], unique[pivot_index]  
            
            # 2. move all less frequent elements to the left
            store_index = left
            for i in range(left, right):
                if count[unique[i]] < pivot_frequency:
                    unique[store_index], unique[i] = unique[i], unique[store_index]
                    store_index += 1

            # 3. move pivot to its final place
            unique[right], unique[store_index] = unique[store_index], unique[right]  
            
            return store_index
        
        def quickselect(left, right, k_smallest) -> None:
            """
            Sort a list within left..right till kth less frequent element
            takes its place. 
            """
            # base case: the list contains only one element
            if left == right: 
                return
            
            # select a random pivot_index
            pivot_index = random.randint(left, right)     
                            
            # find the pivot position in a sorted list   
            pivot_index = partition(left, right, pivot_index)
            
            # if the pivot is in its final sorted position
            if k_smallest == pivot_index:
                 return 
            # go left
            elif k_smallest < pivot_index:
                quickselect(left, pivot_index - 1, k_smallest)
            # go right
            else:
                quickselect(pivot_index + 1, right, k_smallest)
         
        n = len(unique) 
        # kth top frequent element is (n - k)th less frequent.
        # Do a partial sort: from less frequent to the most frequent, till
        # (n - k)th less frequent element takes its place (n - k) in a sorted array. 
        # All element on the left are less frequent.
        # All the elements on the right are more frequent.  
        quickselect(0, n - 1, n - k)
        # Return top k frequent elements
        return unique[n - k:]

if __name__ == '__main__':
  nums = [1, 1, 1, 2, 2, 3] 
  k = 2
  out_347 = [1, 2]
  print(f'Input nums: {nums}\nInput k: {k}')
  print(f'Solution: {Solution347().topKFrequent(nums, k)}\nExpected: {out_347}\n')

  nums_1 = [1]
  k_1 = 1
  out_347_1 = [1]
  print(f'Input nums: {nums_1}\nInput k: {k_1}')
  print(f'Solution: {Solution347().topKFrequent(nums_1, k_1)}\nExpected: {out_347_1}')

Input nums: [1, 1, 1, 2, 2, 3]
Input k: 2
Solution: [1, 2]
Expected: [1, 2]

Input nums: [1]
Input k: 1
Solution: [1]
Expected: [1]
