## Design a Data Structure

<b> 295.Find Median from Data Stream </b>

The add operation for heapq is Log(N)  
Find median is to keep 2 heap, with same length, one keep smaller value, another keep larger value

In [1]:
import heapq
class MedianFinder:
    def __init__(self):
        """initialize the data structure, left to keep half smaller num, right to keep another half larger number"""
        self.left = []
        self.right = []
    def addNum(self, num):
        if len(self.left) == len(self.right):
            # consider adding to right part with priority
            # to ensure left part pop out large value first, save the number as negative value
            # always ensure self.right has num >= self.left
            heapq.heappush(self.right, -heapq.heappushpop(self.left, -num))
        else:
            # same the so far smallest number to left part
            heapq.heappush(self.left, -heapq.heappushpop(self.right, num))
   
    def findMedian(self):
        if len(self.right) == len(self.left):
            # CANT use pop! next time the value is no longer there to consider median
#             return (heapq.heappop(self.right)-heapq.heappop(self.left))/2
            return (self.right[0]-self.left[0])/2
        else:
#             return heapq.heappop(self.right)
            return self.right[0]
          

In [2]:
obj = MedianFinder()
obj.addNum(1)
obj.addNum(2)
print(obj.findMedian()) 
obj.addNum(3)
print(obj.findMedian()) 

1.5
2


<b>146. LRU Cache </b>  

Least Recently Used cache. get( ) and push( ) are all O(1)   
because of O(1) => dictionary as hashmap.   
for tracking if it is least recently used, use posistion. => orderedDict  or Double linked List

For OrderedDict in python,  
popitem: by default it treats the dict as a stack, LIFO, if given param last=False => FIFO.   
move_to_end: move to the end of space like the item is newly added. if given param last=False => move to the beginning of the space

In [7]:
from collections import OrderedDict
# class Node:
#     """double linked list"""
#     def __init__(self, key, val):
#         self.key = key
#         self.val = val
#         self.prev = None
#         self.next = None
    
class LRUCache:
    def __init__(self, capacity):
        self.capacity = capacity
        self.cache = OrderedDict()
    
    def get(self, key):
        if key in self.cache:
            self.cache.move_to_end(key)
            return self.cache[key]
        else:
            return -1
    def put(self, key, value):
        if key in self.cache:
            del self.cache[key]
        self.cache[key] = value
        if len(self.cache) > self.capacity:
            self.cache.popitem(False)

In [8]:
cache = LRUCache( 2 );

cache.put(1, 1);
cache.put(2, 2);
cache.get(1);       # returns 1
cache.put(3, 3);    # evicts key 2
cache.get(2);       # returns -1 (not found)
cache.put(4, 4);    # evicts key 1
cache.get(1);       # returns -1 (not found)
cache.get(3);       # returns 3
cache.get(4);       # returns 4

In [9]:
cache.cache

OrderedDict([(3, 3), (4, 4)])

here is how to implement the double linked list by self

In [10]:
class Node:
    """double linked list"""
    def __init__(self, k, v):
        self.key = k
        self.val = v
        self.next = None
        self.prev = None
class LRUCache:
    def __init__(self, capacity):
        self.capacity = capacity
        self.head = Node(0, 0)  # assume 0 is not a valid value from user input
        self.tail = Node(0, 0)
        self.head.next = self.tail  # imagine two nodes connected, become double linked list
        self.tail.prev = self.head
        self.cache = {}  # a dict of double linked list
        
    def _remove(self, node):
        """get rid of the linkage when deleting the node"""
        prev_node = node.prev
        next_node = node.next
        prev_node.next = next_node
        next_node.prev = prev_node
        
    def _add(self, node):
        """add the new node to the tail of the list, so nodes in the tail direction are recently used nodes"""
        prev_node = self.tail.prev
        node.prev = prev_node
        node.next = self.tail
        prev_node.next = node
        self.tail.prev = node
        
    def get(self, key):
        if key in self.cache:
            node = self.cache[key]
            self._remove(node)
            self._add(node)  # recently used, should be moved to tail
            return node.val
        else:
            return -1
            
    def put(self, key, value):
        if key in self.cache:
            node = self.cache[key]
            self._remove(node)
        node = Node(key, value)
        self._add(node)
        self.cache[key] = node
        if len(self.cache) > self.capacity:
            del_node = self.head.next # the least recently used node is located near head
            self._remove(del_node)
            del self.cache[del_node.key]
        
        
        

In [11]:
cache = LRUCache( 2 );

cache.put(1, 1);
cache.put(2, 2);
cache.get(1);       # returns 1
cache.put(3, 3);    # evicts key 2
cache.get(2);       # returns -1 (not found)
cache.put(4, 4);    # evicts key 1
cache.get(1);       # returns -1 (not found)
cache.get(3);       # returns 3
cache.get(4);       # returns 4
cache.cache

{3: <__main__.Node at 0x7f88f9a322e0>, 4: <__main__.Node at 0x7f88f9a2f640>}

<b> 155 Min Stack </b>  

How to store the min value is the key of this question. when a new number, which we'll call x, is placed on a Stack, the numbers below it will not change for as long as number x remains on the Stack. whenever number x is the top of the Stack, the minimum will always be the same, as it's simply the minimum out of x and all the numbers below it.

So save the value and min value at that time together

the time complexity for push, pop, top, getmin are all O(1).

In [12]:
class MinStack:

    def __init__(self):
        """
        initialize your data structure here.
        """
        self.stack = []
        

    def push(self, x: int) -> None:
        if not self.stack:
            min_val = x
        else:
            min_val = min(x, self.stack[-1][-1])
        self.stack.append((x, min_val))
        
        
    def pop(self) -> None:
        return self.stack.pop()[0]

    def top(self) -> int:
        return self.stack[-1][0]

    def getMin(self) -> int:
        return self.stack[-1][-1]


# Your MinStack object will be instantiated and called as such:
# obj = MinStack()
# obj.push(x)
# obj.pop()
# param_3 = obj.top()
# param_4 = obj.getMin()

In [14]:
obj = MinStack()
obj.push(3)
obj.push(4)
print(obj.top())
print(obj.getMin())

4
3


<b> 295 find Median from data stream </b>

Both the heaps are balanced (or nearly balanced)  
The max-heap contains all the smaller numbers while the min-heap contains all the larger numbers  

how to balancing the two heaps are the key. That is why the heapq.heappushpop() method is doing

In [15]:
import heapq
class MedianFinder:
    def __init__(self):
        """
        initialize your data structure here.
        """
        self.left = []
        self.right = []
    def addNum(self, num: int) -> None:
        # using -1 to reverse the order 
        # add value to left part if two list have same amount of num
        if len(self.left)==len(self.right):
            heapq.heappush(self.left, -1*heapq.heappushpop(self.right, num))
        else:
            # left is longer, so add to right to keep balance
            heapq.heappush(self.right, -1*heapq.heappushpop(self.left, -1*num))
        
    def findMedian(self) -> float:
        # even number, get largest value out of left part and smallest value out of right part
        if len(self.left)==len(self.right):
            return (-1*self.left[0]+self.right[0])/2
        else:
            return -1*self.left[0]

In [17]:
obj = MedianFinder()
obj.addNum(1)
obj.addNum(2)
print(obj.findMedian())
obj.addNum(3)
print(obj.findMedian())

1.5
2


<b> 297 Serialize and Deseriazlie Binary Tree </b>. 

The genius part is to use iter() and next() when constructing the tree back.  
in this way the list of node value is used in the same order as how the list is constructed when doing pre-order-traversal

In [None]:
# Definition for a binary tree node.
class TreeNode(object):
    def __init__(self, x):
        self.val = x
        self.left = None
        self.right = None
class Codec:
    def serialize(self, root):
        """Encodes a tree to a single string.
        
        :type root: TreeNode
        :rtype: str
        """
        def pre_order_traversal(node):
            if not node:
                res.append("X")
            else:
                res.append(str(node.val))  # cant use join on [int]
                pre_order_traversal(node.left)
                pre_order_traversal(node.right)
        
        res = []
        pre_order_traversal(root)
        return " ".join(res)  # into string
    def deserialize(self, data):
        """Decodes your encoded data to tree.
        
        :type data: str
        :rtype: TreeNode
        """
        def build_tree():
            val = next(vals) # smart to use next!!!
            if val == "X":
                return None
            else:
                node = TreeNode(int(val))
                node.left = build_tree() # next will get the next value in list
                node.right = build_tree()
                return node
        vals = iter(data.split())  # into a iterable!!
        root = build_tree()
        return root

<b> 348. design tic-tac-toe </b>

use 2 list of ints to track each row and col's count, and 2 numbers to track the count in diagnol and anti-diagnol.  
if player 1, add 1, if player 2, add -1. if the abs value == the size, there must be some place (either row, or col or diagonal) having full marks, the player must be the one who just made a move.

smart points:  
1. player A use 1, player B use -1, => only need to check abs value == n or not
2. if find the full mark, the winning player must be the player who just made the move

In [18]:
class TicTacToe:
    def __init__(self, n):
        """
        Initialize your data structure here.
        """
        self.rows=[0]*n
        self.cols=[0]*n
        self.diagonal = 0
        self.anti_diagonal = 0
        self.n=n
        
    def move(self, row, col, player):
        """
        Player {player} makes a move at ({row}, {col}).
        @param row The row of the board.
        @param col The column of the board.
        @param player The player, can be either 1 or 2.
        @return The current winning condition, can be either:
                0: No one wins.
                1: Player 1 wins.
                2: Player 2 wins.
        """
        val = 1 if player==1 else -1
        self.rows[row]+=val
        self.cols[col]+=val
        if row == col:
            self.diagonal+=val
        if row+col == self.n-1:
            self.anti_diagonal+=val
        if abs(self.rows[row])==self.n or abs(self.cols[col])==self.n or abs(self.diagonal)==self.n or abs(self.anti_diagonal)==self.n:
            return player
        return 0

In [20]:
a= TicTacToe(3)
print(a.move(0,0,1))
print(a.move(0,2,2))
print(a.move(2,2,1))
print(a.move(1,1,2))
print(a.move(2,0,1))
print(a.move(1,0,2))
print(a.move(2,1,1))

0
0
0
0
0
0
1


<b> 895. Maximun Frequency Stack </b>

Composed of three elements in the stack:  a counter dictionary to track what number has what count; a default dictionary with list, to track what frequency has what number; a integer tracking what is the max frequency now.
The genius step is the default dictionary with list for {freq: [num] }.  That means if x has freq n, x will appear in freq_stack[1], freq_stack[2], ... freq_stack[n]
Max_freq integer is the key to know which number to pop out from defaultdict, how to update the max_freq based on the pop.


In [21]:
from collections import Counter, defaultdict
class FreqStack:

    def __init__(self):
        self.freq = Counter()  # {num: count}
        self.freq_stack = defaultdict(list)  # {freq: [num]}
        self.max_f = 0 # max freq

    def push(self, x: int) -> None:
        self.freq[x] += 1
        self.max_f = max(self.max_f, self.freq[x])
        # genius step! if x has freq n, x will appear in freq_stack[1], freq_stack[2], ... freq_stack[n]
        self.freq_stack[self.freq[x]].append(x) 

    def pop(self) -> int:
        val = self.freq_stack[self.max_f].pop()
        # if not empty, that means still have num with max_f as count
        # if empty, max_f should be reduced. because max_f increases linearly, so -1 will brings to the next max freq
        if not self.freq_stack[self.max_f]:
            self.max_f -= 1
        self.freq[val] -= 1
        return val
        


# Your FreqStack object will be instantiated and called as such:
# obj = FreqStack()
# obj.push(x)
# param_2 = obj.pop()

In [22]:
stack = FreqStack()
stack.push(5)
stack.push(7)
stack.push(5)
stack.push(7)
stack.push(4)
stack.push(5)  # => [5,7,5,7,4,5]
print(stack.pop())
print(stack.pop())
print(stack.pop())
print(stack.pop())

5
7
5
4


<b> 208. Implement Trie (Prefix Tree) </b>   

Using a nested hash map structure to store the words, since words with same prefix are stored in a chain, it uses less space than storing all words directly in hash map, which when amount of words increase, the hash map’s collision rate will increase too.  
The Trie structure is used in many scenarios, like autocomplete, spell checker, IP routing etc.
M is the key length (the word length)  
It supports insert(): O(M)  search(): O(M). Search prefix: O(M)  


In [23]:
class TrieNode:
    def __init__(self):
        self.children = {}
        self.is_end = False # to check if the word is actually a word, or still half of a word

class Trie:
    """imagine it as a tree, words with same prefix are coming out of same parent node"""
    def __init__(self):
        self.root = TrieNode()
    def insert(self, word):
        """insert a word into the trie. loop each char to find the right place"""
        current = self.root
        for c in word:
            if c not in current.children:
                current.children[c]=TrieNode()  # nested hashmap 
            current = current.children[c]
        current.is_end = True # mark the word is end, the node is the leaf node
    
    def search(self, word):
        """check if the word in the trie"""
        current = self.root
        for c in word:
            if c not in current.children:
                return False
            current = current.children[c]
        return current.is_end   # incase the word is actually half of another word
    
    def startsWith(self, prefix):
        """returns if any word in the trie that starts with prefix"""
        current = self.root
        for c in prefix:
            if c not in current.children:
                return False
            current = current.children[c]
        return True

In [25]:
# Your Trie object will be instantiated and called as such:
obj = Trie()
obj.insert("apple")
print(obj.search("apple"))
print(obj.startsWith("app"))
print(obj.search("app"))
obj.insert("app")
print(obj.search("app"))

True
True
False
True


<b> 211 Design Add and Search Words Data Structure </b>

Using Trie structure.   
in Search function, create a helper function as a recurisive helper to loop all possible routes. 

In [28]:
class TrieNode:
    def __init__(self):
        self.children = {}
        self.is_end = False
        
class WordDictionary:
    def __init__(self):
        self.root = TrieNode()   # dont use __init__(self, root=TrieNode), the reference will cause problem
    
    def addWord(self, word):
        """
        Adds a word into the data structure.
        """
        current = self.root
        for c in word:
            if c not in current.children:
                current.children[c] = TrieNode()
            current = current.children[c]
        current.is_end = True
    
    def search(self, word):
        
        def helper_search(word, node):
            for i, c in enumerate(word):
                if c not in node.children:
                    if c == '.':
                        for next_node in node.children.values():
                            if helper_search(word[i+1:], next_node):
                                return True
                    return False
                else:
                    node = node.children[c]
            return node.is_end
        
        return helper_search(word, self.root)

In [29]:
# Your WordDictionary object will be instantiated and called as such:
obj = WordDictionary()
print(obj.search("a"))
obj.addWord("word")
print(obj.search("wor"))
print(obj.search("word"))

False
False
True


<b> 212. Word Search II </b>

Trie plus Backtacking recursion. a very smart solution.  
the list of words needed to be searched is used to construct the trie.  
backtracking run through the whole board in 4 directions to compare with the trie node.

in the Trie structure, is_end can be replaced by other attributes, like here self.word, which stores the entire target word in the words list, marking a word is found, making it easier to know what the word is found

In [32]:
class TrieNode:
    def __init__(self):
        self.children = {}
        # self.is_end = False
        self.word = False   # save the actual complete word at the end of the word char
        
class Trie:
    def __init__(self):
        self.root = TrieNode()
        
    def insert(self, word):
        node = self.root
        for c in word:
            if c not in node.children:
                node.children[c] = TrieNode()
            node = node.children[c]
        node.word = word   # marking the end of the word, also save the entire word here for future usage

class Solution:
    def findWords(self, board, words):
        """trie structure and backtracking
        making a trie for the words list, NOT the board
        backtracking the board to exhaust every possible direciton, like the island question
        """
        if (len(board)==0) or len(board[0])==0:
            return []
        trie = Trie()
        node = trie.root
        res = []
        
        for w in words:
            trie.insert(w)
        
        for i in range(len(board)):
            for j in range(len(board[0])):
                self.dfs_helper(board, i, j, node, res)
        return res
    
    def dfs_helper(self, board, i, j, node, res):
        """backtracking"""
        
        # base case 1: find the word
        if node.word:
            res.append(node.word)
            node.word = False # avoid duplicates
            # dont return here because this node may have children which mark other words
            
        # base case 2: i, j are out of bound
        if i<0 or j<0 or i>=len(board) or j>=len(board[0]):
            return
        
        # normal case
        c = board[i][j]
        node = node.children.get(c)
        if not node:
            # [i][j] is not the right path
            return
        
        # c is the next char, continue
        board[i][j] = '#'
        # node now is a new node
        self.dfs_helper(board, i+1, j, node, res)
        self.dfs_helper(board, i-1, j, node, res)
        self.dfs_helper(board, i, j+1, node, res)
        self.dfs_helper(board, i, j-1, node, res)
        # hey backtracking pattern
        board[i][j] = c

In [33]:
board = [['o','a','a','n'],
  ['e','t','a','e'],
  ['i','h','k','r'],
  ['i','f','l','v']]
words = ["oath","pea","eat","rain"]
word_search = Solution()
word_search.findWords(board, words)

['oath', 'eat']

<b> 642. Design Search Autocomplete System </b>

Trie structure, each branch represents a sentence. each node is a character, including empty space as valid char.  
dfs recursion is used to find all possible sentences with same prefix phrase.    
sorted by ranking to give the proper recommendations  


In [59]:
class TrieNode:
    def __init__(self):
        self.children = {}
        # self.is_end = False
        self.sent = None
        self.rank = 0
        
class AutocompleteSystem:
    """ implement a Trie inside of this system
        The Trie's children is still char, the branch of Trie is a sentence, including space as valid char
    """

    def __init__(self, sentences, times):
        self.root = TrieNode()
        self.keyword = ""  # used to save what has typed before sent ended
        for i, sentence in enumerate(sentences):
            self.add_record(sentence, times[i])
        
    def add_record(self, sentence, hot_degree):
        node = self.root
        for c in sentence:   # loop each char in the sentence
            if c not in node.children:
                node.children[c] = TrieNode()
            node = node.children[c]
        # at the end of the sentence
        node.sent = sentence
        node.rank -= hot_degree
    
    def search(self, phrase):
        """return historical sents with phrase as prefix
            return a list of sentences
        """
        node = self.root
        for c in phrase:
            if c not in node.children:
                return []
            node = node.children[c]
        # now the node is representing the end of the phrase
        # in Trie, this node can have many sub branch 
        return self.dfs_helper(node)
    
    def dfs_helper(self, node):
        """return a list of sentences"""
        res = []
        if node:
            if node.sent:
                # node happens to be the end of a complete sentence
                res.append((node.rank, node.sent))
            
            # NOTE HERE: even though node is not the end of sentence, still need to recursively check its children    
            for next_node in node.children.values():
                res.extend(self.dfs_helper(next_node))
        return res
        

    def input(self, c):
        result = []
        if c!= '#':
            # sentence not complete yet
            self.keyword += c
            # search the Trie against the so far typed words
            result = self.search(self.keyword)
        else:
            # sentence ended, add this new sentence to the Trie
            self.add_record(self.keyword, 1)
            self.keyword = ""
        return [item[1] for item in sorted(result)[:3]]


In [60]:
auto = AutocompleteSystem(["i love you","island","iroman","i love leetcode"],[5,3,2,2])
print(auto.input("i"))
print(auto.input(" "))
print(auto.input("a"))
print(auto.input("#"))

[(-5, 'i love you'), (-2, 'i love leetcode'), (-3, 'island'), (-2, 'iroman')]
['i love you', 'island', 'i love leetcode']
[(-5, 'i love you'), (-2, 'i love leetcode')]
['i love you', 'i love leetcode']
[]
[]
[]
[]
