## Find the lowest common ancestor of two nodes in a binary tree

Find the paths to the two nodes from the root. The last common node in the two pathes is lca. For example, if one path is a-b-c and the other is a-b-d, b is the lca. Or the first intersecting element in the two paths from the two nodes to the root.

In [86]:
class Tree(object):
    def __init__(self, key):
        self.key = key
        self.left = None
        self.right = None
        self.parent = None
        
    def __str__(self):
        return str(self.key)
    
    def set_left(self, tree):
        self.left = tree
        tree.parent = self

    def set_right(self, tree):
        self.right = tree
        tree.parent = self

def search_tree(tree, key):
    if not tree:
        return None
    if tree.key == key:
        return tree
    left = search_tree(tree.left, key)
    right = search_tree(tree.right, key)
    return left if left else right

In [87]:
a = Tree(1)
b = Tree(2)
c = Tree(3)
d = Tree(4)
e = Tree(5)
a.set_left(b)
a.set_right(c)
b.set_left(d)
b.set_right(e)
binary_tree = a
print(search_tree(a, 2))
print(search_tree(a, 3))
print(search_tree(a, 6))

2
3
None


In [88]:
def path_from_root(tree, path):
    if tree.parent:
        path_from_root(tree.parent, path)
    path.append(tree)
    
def print_path(path):
    print 'path:',
    for tree in path:
        print tree,
    print

In [89]:
def find_lca(tree, x, y):
    tree_x = search_tree(tree, x)
    path_x = []
    path_from_root(tree_x, path_x)
    tree_y = search_tree(tree, y)
    path_y = []
    path_from_root(tree_y, path_y)
    for tree1, tree2 in zip(path_x, path_y):
        if not tree1 == tree2:
            break
        lca = tree1
    return lca

In [90]:
lca = find_lca(a, 3, 5)
print('lca: {}'.format(lca))
lca = find_lca(a, 4, 5)
print('lca: {}'.format(lca))

lca: 1
lca: 2


In [102]:
# A more efficient, recursive approach, O(n)
def find_lca(tree, x, y):
    if not tree:
        return None
    
    if tree.key == x or tree.key == y:
        return tree
    
    left = find_lca(tree.left, x, y)
    right = find_lca(tree.right, x, y)
    
    if left and right:
        return tree
    
    return left if left else right

In [103]:
lca = find_lca(a, 3, 5)
print(lca)
lca = find_lca(a, 4, 5)
print(lca)

1
2


## Find the lowest common ancestor of two nodes in a binary search tree

The first algorithm for binary trees above works for binary search trees as well. There is a more efficient algorithm based on a property of binary search trees, the keys in the left subtree is always less than those of the right subtree. Recursively check the key of each node. The first node whose key is between x and y or equals to x or y is the lca.

In [93]:
def insert(tree, x, parent=None):
    if not tree:
        tree = Tree(x)
        tree.parent = parent
        if parent.key > x:
            parent.left = tree
        else:
            parent.right = tree
    else:
        if tree.key > x:
            insert(tree.left, x, tree)
        else:
            insert(tree.right, x, tree)
            
def inorder_traverse(tree):
    if tree:
        inorder_traverse(tree.left)
        process_tree(tree)
        inorder_traverse(tree.right)

def process_tree(tree):
    print(tree.key)

In [94]:
tree = Tree(3)
bst = tree
insert(tree, 1)
insert(tree, 2)
insert(tree, 4)
insert(tree, 5)
inorder_traverse(tree) # This is for validating if the tree is a BST.

1
2
3
4
5


In [95]:
def find_lca(tree, x, y):
    if not tree:
        return None
    if tree.key > min(x, y) and tree.key < max(x, y):
        return tree
    if tree.key == x or tree.key == y:
        return tree
    left = find_lca(tree.left, x, y)
    right = find_lca(tree.right, x, y)
    return left if left else right

In [96]:
lca = find_lca(tree, 3, 5)
print(lca)
lca = find_lca(tree, 4, 5)
print(lca)

3
4


## Print a binary search tree level by level

BFS but with two queues.

In [97]:
from Queue import Queue

def print_bst(tree):
    current_level = Queue()
    current_level.put(tree)
    next_level = Queue()
    while not current_level.empty():
        node = current_level.get()
        print node.key,
        if node.left:
            next_level.put(node.left)
        if node.right:
            next_level.put(node.right)
        if current_level.empty():
            current_level = next_level
            next_level = Queue()
            print

In [98]:
print_bst(tree)

3
1 4
2 5


## Validate a binary search tree

Algorithm 1:
Do in-order traverse. The current node should be greater than the previously processed node.
O(n)

Algorithm 2: min-max
The root, a, must be in [-inf, inf].
The left child, b, of the root must be in [-inf, a].
The right child, c, of the root must be in [a, inf].
The left child, d, of b must be in [-inf, b].
The right child, e, of b must be in [b, a].
The left child, f, of c must be in [a, c].
The right child, g, of c must be in [c, inf]
...
O(n)

In [99]:
def process_tree(tree):
    global stack
    stack.append(tree.key)

def validate_bst(tree):
    global stack
    inorder_traverse(tree)
    prev_key = -float('inf')
    for key in stack:
        if key < prev_key:
            return False
        prev_key = key
    return True
stack = []
assert validate_bst(bst) is True
stack = []
assert validate_bst(binary_tree) is False

In [105]:
def validate_bst(tree, min_value, max_value):
    if not tree:
        return True
    if tree.key < min_value or tree.key > max_value:
        return False
    return validate_bst(tree.left, min_value, tree.key) and validate_bst(tree.right, tree.key, max_value)
min_value = -float('inf')
max_value = float('inf')
assert validate_bst(bst, min_value, max_value)
assert not validate_bst(binary_tree, min_value, max_value)

## Two Sum

O(n) using a hash table

In [3]:
def find_twosum(target, data):
    dict_ = {}
    result = []
    for i, _ in enumerate(data):
        if data[i] in dict_:
            result.append((dict_[data[i]], i))
        else:
            dict_[target - data[i]] = i
    return result

data = [1, 7, 3, 8, 9]
target = 10
print find_twosum(target, data)

[(1, 2), (0, 4)]


## Three Sum

```
sort(s)
for i = 0 to n-3
    a = s[i]
    start = i + 1
    end = n - 1
    while start < end
        b = s[start]
        c = s[end]
        if a + b + c = 0 then
            output a, b, c
            end -= 1
        else if a + b + c > 0 then
            end -= 1
        else
            start += 1
```
Worst case O(n^2)

In [11]:
def find_threesum(target, data):
    data = sorted(data)
    result = []
    n = len(data)
    for i in range(n - 2):
        a = data[i]
        start = i + 1
        end = n - 1
        while start < end:
            b = data[start]
            c = data[end]
            if a + b + c == target:
                result.append((a, b, c))
                end -= 1
            elif a + b + c > target:
                end -= 1
            else:
                start += 1
    return result

data = [1, 8, 2, 3, 9, 7, 4, 5]
target = 10
print(find_threesum(target, data))

[(1, 2, 7), (1, 4, 5), (2, 3, 5)]


## Evaluate infix expressions

This is a infix-to-postfix conversion algorithm but the logic is same:

```
Scan the token list from left to right.
If the token is an operand, append it to the end of the output list.
If the token is a left parenthesis, push it on the opstack.
If the token is a right parenthesis, pop the opstack until the corresponding left parenthesis is removed. Append each operator to the end of the output list.
If the token is an operator, *, /, +, or -, push it on the opstack. However, first remove any operators already on the opstack that have higher or equal precedence and append them to the output list.
When the input expression has been completely processed, check the opstack. Any operators still on the stack can be removed and appended to the end of the output list.```

In [44]:
from tokenize import generate_tokens
from StringIO import StringIO

def validate_expr(expr):
    # If expr starts with -, pad with 0.
    # All parenteses are balanced.
    pass

def compute(num2, num1, op):
    print 'Compute {} {} {}'.format(num1, op, num2)
    if op == '+':
        return num1 + num2
    elif op == '-':
        return num1 - num2
    elif op == '*':
        return num1 * num2
    elif op == '/':
        return num1 / num2
    else:
        print('Invalid operator')
        return None
    
def eval_infix(expr):
    num_stack = []
    op_stack = []
    priority = {'(': 0, ')': 0, '+': 1, '-': 1, '*': 2, '/': 2}

    # Handle the leading negative symbol
    if expr.startswith('-'):
        expr = '1*(0-1)*' + expr[1:]
    
    tokens = generate_tokens(StringIO(expr).readline)
    for token_num, token_val, _, _, _ in tokens:
        if not token_val:
            break
        elif token_val.isdigit():
            num_stack.append(float(token_val))
        elif token_val == '(':
            op_stack.append(token_val)
        elif token_val == ')':
            while not op_stack[-1] == '(':
                num_stack.append(compute(num_stack.pop(), num_stack.pop(), op_stack.pop()))
            op_stack.pop()
        else: # token_val is an operator.
            while op_stack and priority[token_val] < priority[op_stack[-1]]:
                num_stack.append(compute(num_stack.pop(), num_stack.pop(), op_stack.pop()))
            op_stack.append(token_val)
    
    while op_stack:
        num_stack.append(compute(num_stack.pop(), num_stack.pop(), op_stack.pop()))
    
    return num_stack[0] if num_stack else 0

expr = '-12+2*(1-9)'
eval_infix(expr)

Compute 0.0 - 1.0
Compute -1.0 * 12.0
Compute 1.0 * -12.0
Compute 1.0 - 9.0
Compute 2.0 * -8.0
Compute -12.0 + -16.0


-28.0

## Max. No of Meeting Rooms

```
Flatten the (start, end) pairs.
Sort the times.
For each time
  If the time is start
    count++
  else
    count--
Return max. count
```

memory O(n)

time O(nlogn) for sorting the list

where n is the no. of meetings

In [12]:
meetings = [(1, 4), (5, 6), (8, 9), (2, 6)]
times = []
for time in meetings:
    times.append((time[0], 0))
    times.append((time[1], 1))

times.sort(key = lambda time: time[0])

count = 0
max_count = 0
for time, type_ in times:
    if type_ == 0:
        count += 1
    else:
        count -= 1
    max_count = max(count, max_count)

print max_count

2


## Check if a query word with a wildcard exists in a given list of words

```
Sort the list of words. O(nlogn)
Build an inverted index using the first alphabet. O(n)
Iterate through the words that have the same first alphabet as the query word. O(nm) worst case if all words start with the same alphabet.
```

In [35]:
def search_word(query, words):
    n = len(words)
    words.sort()

    # Build index
    current_alpha = words[0][0]
    start = 0
    index = {}
    for i in range(1, n):
        if not current_alpha == words[i][0]:
            end = i - 1
            index[current_alpha] = (start, end)
            start = i
            current_alpha = words[i][0]
    end = n - 1
    index[current_alpha] = (start, end)

    # Search through the words that start with the same alphabet
    start, end = index[query[0]]
    for i in range(start, end + 1):
        match = True
        for j, ch in enumerate(query):
            if not ch == '.' and not ch == words[i][j]:
                match = False
                break
        if match:
            return True
    
    return False

words = ['cat', 'bat', 'bot', 'car', 'rat']
query = 'c.r'
print(search_word(query, words))

True


## Find the sorted uninon of two sorted lists

Something similar to the merge part of the merge sort

In [3]:
a1 = [1, 3, 5, 7]
a2 = [2, 3, 4, 5, 6, 7]
n1 = len(a1)
n2 = len(a2)
result = []
i = j = 0
while i < n1 and j < n2:
    if a1[i] < a2[j]:
        result.append(a1[i])
        i += 1
    elif a1[i] > a2[j]:
        result.append(a2[j])
        j += 1
    else:
        result.append(a1[i])
        i += 1
        j += 1

while i < n1:
    result.append(a1[i])
    i += 1
    
while j < n2:
    result.append(a2[j])
    j += 1
    
print(result)

[1, 2, 3, 4, 5, 6, 7]


## Palindrome

In [5]:
def palindrome(x, low, high):
    if low == high:
        return True
    return True if x[low] == x[high] else False

    palindrome(x, low + 1, high - 1)

x = 'madam'
assert palindrome(x, 0, len(x) - 1) == True
x = 'lonely'
assert palindrome(x, 0, len(x) - 1) == False

## Maximum Contiguous Sum

Iterate through the elements and calculate the contiguous sum. Once the sum becomes negative, it doesn't help increase the contiguous sum. It'd better start over. Return the max. of the contiguous sums.

In [4]:
def max_contiguous_sum(x):
    n = len(x)
    s = n*[0]
    s[0] = x[0]
    
    # The following for-loop can be replaced with the following one liner:
    # s[i] = max(s[i - 1] + x[i], x[i])
    for i in range(1, n):
        if s[i - 1] < 0:
            s[i] = x[i]
        else:
            s[i] = s[i - 1] + x[i]
    return max(s)

x = [1, 3, -1, 2, -6, 4]
assert max_contiguous_sum(x) == 5

## Longest Increasing Subsequence

The longest increasing subsequence at position i is the longest increasing subsequnce so far at position j added by 1 only if x[i] > x[j].


In [3]:
def longest_increasing_subsequence(x):
    n = len(x)
    l = n*[0]
    l[0] = 1
    for i in range(1, n):
        max_length = 0
        for j in range(i):
            if x[i] > x[j]:
                max_length = max(max_length, l[j])
        l[i] = max_length + 1
    return max(l)

x = [1, 3, -1, 2, -6, 4]
assert longest_increasing_subsequence(x) == 3

## BFS

From the search tree, one can find the shortest path to each vertex from the starting vertex. Note that find_path(u, v, parent) is valid only when bfs is done starting from u. 

In [2]:
from collections import defaultdict

adj = {}
adj[1] = [2, 5, 6]
adj[2] = [1, 3, 5]
adj[3] = [2, 4]
adj[4] = [3, 5]
adj[5] = [1, 2, 4]
adj[6] = [1]

parent = defaultdict(lambda: None)
def init_search():
    for key in parent.keys():
        parent[key] = None

def bfs(adj, start):
    init_search()
    discovered = defaultdict(lambda: False)
    processed = defaultdict(lambda: False)
    queue = []
    
    discovered[start] = True
    queue.append(start)
    while queue:
        u = queue.pop(0)
        for v in adj[u]:
            if not discovered[v]:
                discovered[v] = True
                parent[v] = u
                queue.append(v)
        process_vertex(u)
        processed[u] = True

def process_vertex(v):
    print v

def find_path(start, end, parent):
    stack = []
    v = end
    while v:
        stack.append(v)
        v = parent[v]
    
    print 'Path from {} to {}:'.format(start, end)
    while stack:
        print stack.pop()

print 'bfs starting from 1'
bfs(adj, 1)
find_path(1, 4, parent)

print 'bfs starting from 2'
bfs(adj, 2)
find_path(2, 5, parent)

bfs starting from 1
1
2
5
6
3
4
Path from 1 to 4:
1
5
4
bfs starting from 2
2
1
3
5
6
4
Path from 2 to 5:
2
5


## DFS

BFS with a stack instead of a queue. This is same as the recursive implmentation with printing each vertex in process_vertex_early(). This implementation cannot be used for topological sorting. In topological sorting each vertex should be pushed to a stach in process_vertex late().

In [3]:
from collections import defaultdict

adj = {}
adj[1] = [2, 5, 6]
adj[2] = [1, 3, 5]
adj[3] = [2, 4]
adj[4] = [3, 5]
adj[5] = [1, 4]
adj[6] = [1]

def dfs(adj, start):
    discovered = defaultdict(lambda: False)
    processed = defaultdict(lambda: False)
    stack = []
    
    discovered[start] = True
    stack.append(start)
    while stack:
        u = stack.pop()
        for v in adj[u]:
            if not discovered[v]:
                discovered[v] = True
                stack.append(v)
        process_vertex(u)
        processed[u] = True

def process_vertex(v):
    print v

dfs(adj, 1)

1
6
5
4
3
2


This dfs algorithm works for topological sorting. All vertices are initially white. A discovered vertex is grey while A process one is black.
1. color all vertices white
2. initialize an empty stack S
3. while there is still a white vertex u
4. color[u] = grey
5. vactive = u
6. do
7. if vactive has a white neighbor v
8. color[v] = grey
9. insert vactive into S
10. vactive = v
11. else
12. color[vactive ] = black
13. pop the top vertex of S, and set it to vactive
14. while vactive 6= ∅

## Find a postion to insert a number to a sorted array using binary search

In [27]:
def find_position(x, low, high, key):
    if high - low == 1:
        print 'high - low = 1. low {}, high {}'.format(low, high)
        if key < x[low]:
            return low
        elif key > x[high]:
            return high + 1
        else:
            return high

    if low == high:
        if x[low] > key:
            return low
        else:
            return low + 1

    if low > high:
        return

    mid = (low + high)/2
    if x[mid] == key:
        return mid
    if x[mid] > key:
        return find_position(x, low, mid - 1, key)
    else:
        return find_position(x, mid + 1, high, key)
        
x = [1, 3, 4, 8, 12]
n = len(x)
print find_position(x, 0, n - 1, 2)
print find_position(x, 0, n - 1, 3)
print find_position(x, 0, n - 1, 7)
print find_position(x, 0, n - 1, 9)
print find_position(x, 0, n - 1, 13)

high - low = 1. low 0, high 1
1
high - low = 1. low 0, high 1
1
high - low = 1. low 3, high 4
3
high - low = 1. low 3, high 4
4
high - low = 1. low 3, high 4
5


## Median of a stream of integers

- Unsorted array

O(1) for insert, O(n) for median

- Sorted array

median = x[n/2]

O(n) for insert, O(1) for median

- Binary search tree

In-order traverse. Return the n/2-th node.

O(h) for insert, O(n) for median. h is n in the worst case.

- Balanced binary search tree

Keep the no. of nodes in the left and right trees balanced. Specificaly, keep the left tree height equal or taller than the right tree by 1. The median is the root if the no. of nodes is even. Otherwise, the median is the avg. of the root and the logical predecessor of the root, i.e., the right most node of the left subtree of the root.

O(h) for insert, O(h) for median. h is n the worst case.
Finding median can be O(1) if update the pointer to the predecessor on each insertion.

- Two binary heaps

Store an incoming integer to either maxheap or minheap. Similarly to the balanced binary search tree, keep the size of maxheap equal to or greater than the size of minheap by 1. The median is the root of the maxheap when the no. of the integers odd. Otherwise, the avg. of the roots of the two heaps.

O(log n) for insertion, O(1) for median.

In [15]:
import heapq

def insert(minheap, maxheap, x):
    if not minheap or x < minheap[0]: 
        print 'Insert {} to maxheap'.format(x)
        heapq.heappush(maxheap, -x)
    else:
        print 'Insert {} to minheap'.format(x)
        heapq.heappush(minheap, x)
        if len(minheap) - len(maxheap) > 0:
            y = heapq.heappop(minheap)
            print 'Move {} to maxheap'.format(y)
            heapq.heappush(maxheap, -y)

    if len(maxheap) - len(minheap) > 1: 
        y = -heapq.heappop(maxheap)
        print 'Move {} from maxheap to minheap'.format(y)
        heapq.heappush(minheap, y)

def find_median(minheap, maxheap):
    n1 = len(minheap)
    n2 = len(maxheap)
    if (n1 + n2) % 2 == 0:
        return 0.5*(minheap[0] - maxheap[0])
    else:
        return maxheap[0]

minheap, maxheap = [], []
x = [1, 3, 5, 9, 7, 5]
for item in x:
    insert(minheap, maxheap, item)
assert find_median(minheap, maxheap) == 5.0

Insert 1 to maxheap
Insert 3 to maxheap
Move 3 from maxheap to minheap
Insert 5 to minheap
Move 3 to maxheap
Insert 9 to minheap
Insert 7 to minheap
Move 5 to maxheap
Insert 5 to maxheap
Move 5 from maxheap to minheap


## Write atoi()

Iterate from the first character to the end and calculate a running sum. At each digit, (running sum) * 10 + toInt(current character). If the string starts with '-', remember that. The iteration above should start from the frist numerical character. Return the running sum appropriately depending on the starting character. 

In [7]:
def atoi(string):
    chToNum = {"0": 0, "1": 1, "2": 2, "3": 3, "4": 4, "5": 5, "6": 6, "7": 7, "8": 8, "9": 9}
    isPositive = True
    if string.startswith('-'):
        isPositive = False
        string = string[1:]

    current_sum = 0
    for ch in string:
        if ch not in chToNum:
            print "Not a digig."
            return None
        current_sum = current_sum*10 + chToNum[ch]
    return current_sum if isPositive else -current_sum

atoi('-42')

-42

## Linked List

In [2]:
class Node(object):
    def __init__(self, x):
        self.data = x
        self.next = None
    
    def __str__(self):
        return str(self.data)
    
def traverse_linked_list(l):
    result = []
    while l:
        result.append(l.data)
        l = l.next
    return result

### Unsorted List

In [3]:
# Replace the head with the new node
def insert(l, x):
    node = Node(x)
    node.next = l
    return node

myList = insert(None, 1)
myList = insert(myList, 2)
myList = insert(myList, 3)
assert traverse_linked_list(myList) == [3, 2, 1]

In [6]:
def search(l, x):
    while l:
        if l.data == x:
            return l
        l = l.next
    return None

myList = Node(1)
myList = insert(myList, 2)
myList = insert(myList, 3)
assert search(myList, 1).data == 1
assert search(myList, 2).data == 2
assert search(myList, 3).data == 3
assert search(myList, 4) is None

In finding a predecessor the key is that you iteratively check node.next instead of node.

In [18]:
def predecessor(l, x):
    # Either head or the end of the list
    if not l or not l.next:
        return None

    while l.next:
        if l.next.data == x:
            return l
        l = l.next
    return None

# Recursive implentation
def predecessor(l, x):
    if not l or not l.next:
        return None
    if l.next.data == x:
        return l
    else:
        return predecessor(l.next, x)

myList = Node(1)
myList = insert(myList, 2)
myList = insert(myList, 3)
assert traverse_linked_list(myList) == [3, 2, 1]
assert predecessor(myList, 2).data == 3
assert predecessor(myList, 1).data == 2
assert predecessor(myList, 3) is None

Search the target value and its predecessor. Rewire the predecessor to the next of the search result.
Think about the following cases:
- The given list is empty.
- The given list doesn't contain the target value.
- The head of the given list contains the target value so that the predecessor is None.

In [30]:
def delete(l, x):
    if l:
        head = l
        target = search(l, x)
        # If target != None and pred == None, target is the head.
        # If target != None and pred != None, target is a node between the head and the tail.
        if target:            
            pred = predecessor(l, x)
            if pred:
                pred.next = target.next
            else:
                head = target.next
        return head
    return None

myList = Node(1)
myList = insert(myList, 2)
myList = insert(myList, 3)
myList = delete(myList, 2)
assert traverse_linked_list(myList) == [3, 1]

In [14]:
def split(l, pivot):
    lt, gt = None, None
    while l:
        if l.data < pivot:
            lt = insert(lt, l.data)
        else:
            gt = insert(gt, l.data)
        l = l.next
        
    return lt, gt

my_list = insert(None, 3)
my_list = insert(my_list, 5)
my_list = insert(my_list, 2)
my_list = insert(my_list, 1)
my_list = insert(my_list, 4)
assert traverse_linked_list(my_list) == [4, 1, 2, 5, 3]

lt, gt = split(my_list, 3)
assert traverse_linked_list(lt) == [2, 1]
assert traverse_linked_list(gt) == [3, 5, 4]

In [21]:
# This split implementation is more memory-efficient than the previous one because it re-wires nodes
# instead of creating a copy of each node. 

def insert_node(l, node):
    node.next = l
    return node

def split(l, pivot):
    lt, gt = None, None
    head = l
    while l:
        next_ = l.next
        if l.data < pivot:
            lt = insert_node(lt, l)
        else:
            gt = insert_node(gt, l)
        l = next_
    return lt, gt

my_list = insert(None, 3)
my_list = insert(my_list, 5)
my_list = insert(my_list, 2)
my_list = insert(my_list, 1)
my_list = insert(my_list, 4)
assert traverse_linked_list(my_list) == [4, 1, 2, 5, 3]

lt, gt = split(my_list, 3)
assert traverse_linked_list(lt) == [2, 1]
assert traverse_linked_list(gt) == [3, 5, 4]

### Sorted List

Think about the following cases:
- The given list is empty.
- The head of the given list is greather than the inserting value.
- The inserting value is between the head and the tail.

In [19]:
# Insert to a sorted list
def insert_sorted(l, x):
    new_node = Node(x)
    
    # If the list is empty...
    if not l:
        return new_node
    
    head = l
    # If head is greather than x...
    if l.data >= x:
        new_node.next = l
        head = new_node
        return head

    while l.next:
        if l.next.data < x:
            l = l.next
        else:
            l.next, new_node.next = new_node, l.next
            return head
    
    # If all nodes in the list are less than x...
    if not l.next:
        l.next = new_node
        return head

myList = Node(2)
myList = insert_sorted(myList, 3)
myList = insert_sorted(myList, 1)
assert traverse_linked_list(myList) == [1, 2, 3]

### Cycle in a Linked List
A. Use a hash table. Traverse the list. At each node check if its item is already in the hash table. If yes, the list has a cycle. Otherwise, store the item in the table.
B. Traverse with one-node forward and two-node forward. If two traversing reaches to the same node, there is a cycle in the list.

In [19]:
def find_cycle(l):
    l1 = l
    l2 = l
    while l1 and l2:
        l1 = l1.next        
        if l2.next:
            l2 = l2.next.next
        else:
            break
        if id(l1) == id(l2):
            return True
    return False

my_list = insert(None, 1)
my_list = insert(my_list, 2)
my_list = insert(my_list, 3)
my_list = insert(my_list, 4)
my_list = insert(my_list, 5)
node1 = search(my_list, 1)
node2 = search(my_list, 2)
node5 = search(my_list, 5)
node1.next = node5
assert find_cycle(my_list) == True

### Middle of a Linked List
Traverse the list in two ways: one-step forward and two-step forward. When the two-step forward traversing meets the end of the list, the node where the one-step forward traversing is located is the middle of the list.

In [4]:
def find_middle(l):
    l1 = l
    l2 = l
    while l2:
        if l2.next:
            l2 = l2.next.next
            l1 = l1.next
        else:
            break
    return l1.data

my_list = insert(None, 1)
my_list = insert(my_list, 2)
my_list = insert(my_list, 3)
my_list = insert(my_list, 4)
my_list = insert(my_list, 5)
assert find_middle(my_list) == 3

### Reverse the Individual Words in a String 

In [4]:
def reverse_words(s):
    s = list(s)
    found_start = False
    start = 0
    end = 0
    for i in range(len(s)):
        if not found_start:
            if s[i] == ' ':
                continue
            else:
                start = i
                found_start = True
        else:
            if s[i] == ' ':
                end = i - 1
                reverse(s, start, end)
                found_start = False
            else:
                continue
    s = ''.join(s)
    return s
    
def reverse(l, low, high):
    while low < high:
        l[low], l[high] = l[high], l[low]
        low += 1
        high -= 1

l = list(' hello ')
reverse(l, 1, 5)
s = ''.join(l)
assert s == ' olleh '

s = ' hello  world '
assert reverse_words(s) == ' olleh  dlrow '

### Reverse the order of the words in a String
First, reverse the whole string. Second, reverse each word.

Clarify how to handle spaces.

In [5]:
def reverse_words_order(s):
    s = ''.join(reversed(s))
    return reverse_words(s)

s = ' hello world  '
reverse_words_order(s)

'  world hello '

### Strip Whitespaces in a String
The initial value for the index of the first space is 0. Iterate over the characters. Swap if the character of the current position is not a space. Move the position of the first space to the right. Similar to Skiena's implementation of quicksort. In the end strip off all trailing spaces. O(n)

In [2]:
def strip_spaces(s):
    first_space = 0
    s = list(s)
    for i, char in enumerate(s):
        if not char == ' ':
            swap(s, first_space, i)
            first_space += 1
    s = s[:first_space]
    return ''.join(s)

def swap(l, i, j):
    l[i], l[j] = l[j], l[i]

s = ' hello  world '
assert strip_spaces(s) == 'helloworld'

### Remove duplicate characters in a string
'aa  bbb' -> 'a b'

In [3]:
def remove_duplicates(s):
    result = [s[0]]
    for i in range(1, len(s)):
        if not s[i] == s[i-1]:
            result.append(s[i])
    return ''.join(result)

s = 'aa bbb'
assert remove_duplicates(s) == 'a b'

### Find the first non-repeating character in a string
'abca' -> 'b'

A.

Store the count of each alphabet in a dictionary.

Scan the string. For each char, increment its count.

Scan again. For each char, return if its count is one.

O(len(s)), but have to scan twice.

B.

If the string is long and the size of alphabets is small, algorithm A is inefficient.

Store the (count, the index of the first appearance) tuples in a dictionary.

Scan the string. For each char, increment its count.

Scan the count dictionary instead of the string. Return the char at the min index among the chars whose counts are one.

Still O(len(s)) if len(s) > len(alphabets), but the second scanning is much faster than the first scanning.

In [13]:
from collections import defaultdict

def unique_char(s):
    counts = defaultdict(lambda: (0, 0))
    for i, char in enumerate(s):
        if counts[char][0] == 0:
            counts[char] = (1, i)
        else:
            count = counts[char][0] + 1
            index = counts[char][1]
            counts[char] = (count, index)
        
    min_index = len(s)
    for k, (count, index) in counts.iteritems():
        if count == 1:
            min_index = min(min_index, index)
    
    return s[min_index]
    
assert unique_char('abca') == 'b'

### Longest increasing subsequence
A. Dynamic programming, O(n^2)

In [9]:
def longest_increasing_subsequence(s):
    n = len(s)
    l = n*[0]
    l[0] = 1
    for i in range(n):
        for j in range(i):
            if s[i] > s[j]:                    
                l[i] = max(l[i], l[j] + 1)
    return max(l)

assert longest_increasing_subsequence([1, 3, 2, 7, 8, 5]) == 4

Can you return the longest increasing subsequnce, too?

In [19]:
def longest_increasing_subsequence(s):
    n = len(s)
    l = n*[0]
    l[0] = 1
    subseq = [[] for i in range(n)]
    subseq[0] = [s[0]]

    for i in range(n):
        for j in range(i):
            if s[i] > s[j]:
                length = l[j] + 1
                if l[i] < length:
                    l[i] = length
                    subseq[i] = subseq[j] + [s[i]] 
    
    max_length = max(l)
    longest_subseq = subseq[l.index(max_length)] # index returns the first occurence.
    return max_length, longest_subseq

length, subseq = longest_increasing_subsequence([1, 3, 2, 7, 8, 5])
assert length == 4
assert subseq == [1, 3, 7, 8]

Topological sorting and longest path from each node in DAG. O(n + m) but m ~ O(n^2) so that no efficiency gain.

### Max contiguous sum
Scan the array and calculate the contiguous sum up to the current position. If adding the current item to the contiguous sum up to the previous item doesn't increase the contiguous sum, reset.

In [22]:
def max_countiguous_sum(a):
    n = len(a)
    s = n*[0]
    s[0] = a[0]
    for i in range(1, n):
        s[i] = max(a[i], s[i-1] + a[i])
    return max(s)

assert max_countiguous_sum([1, 3, -5, 1, 2]) == 4