### 1. Check brackets

In [179]:
from collections import namedtuple
import re 
Bracket = namedtuple("Bracket", ["char", "position"])

input_str = '[](()'
def find_mismatch(input_str: str) -> None | int:
    stack = []
    for index, char in enumerate(input_str):
        if char in ['{', '[', '(']:
            stack.append((index,char))
        elif char in ['}', ']', ')']:
            if len(stack) == 0:
                return index+1
            latest_open_bracket_index, latest_open_bracket = stack.pop()
            if latest_open_bracket == '{':
                if char != '}':
                    return index+1
            elif latest_open_bracket == '[':
                if char != ']':
                    return index+1
            elif latest_open_bracket == '(':
                if char != ')':
                    return index+1
        else:
            continue
    
    if len(stack) == 0:
        return "Success"
    return stack[-1][0]+1
                
find_mismatch(input_str)

3

### 2. Compute tree height

In [84]:
# def compute_height(n, parents):
#     ...

from dataclasses import dataclass, field
from typing import Type

@dataclass
class Node:
    index: int
    parent: list[Type['Node']] = field(default_factory=list)
    child: list[Type['Node']] = field(default_factory=list)

input = [-1, 0, 4, 0, 3]

## build tree: O(N)
def build_tree(input):
    all_nodes = [Node(index=index) for index in range(len(input))]
    for index, node in enumerate(all_nodes):
        if input[index] == -1:
            root = node
            node.parent = None
        else:
            node.parent = all_nodes[input[index]]
            node.parent.child.append(all_nodes[index])
    
    return root
    
root = build_tree(input)

## Find height: O(N)
def find_height(root):
    if root.child == []:
        return 1
    max_height = 0
    for child in root.child:
        subtree_root = child
        subtree_height = find_height(subtree_root)
        max_height = max(
            max_height, 
            subtree_height+1
        )
    return max_height

# find_height(root)

In [158]:
class Node:
    def __init__(self, index, parent=[], child=[]):
        self.index = index
        self.parent = parent
        self.child = child
    
    def __repr__(self):
        return f"Node({self.index=}, {self.parent=}, {self.child=})"

def build_tree(parents):
    all_nodes = [Node(index=index, parent=[], child=[]) for index in range(len(parents))]

    for index in range(len(all_nodes)):        
        if parents[index] == -1:
            root = all_nodes[index]
            all_nodes[index].parent = None
        else:
            all_nodes[index].parent = all_nodes[parents[index]]
            all_nodes[index].parent.child.append(all_nodes[index])
                
    return root, all_nodes

def compute_height(root):    
    if root.child == []:
        return 1
    max_height = 0
    for child in root.child:
        subtree_root = child
        subtree_height = compute_height(subtree_root)
        max_height = max(
            max_height, 
            subtree_height+1
        )
    return max_height

# parents = [-1,0,4,0,3]
# # root, all_nodes = build_tree(parents)
# root, all_nodes = build_tree(parents)
# print(compute_height(root))

In [164]:
parents=np.random.randint(0,n_nodes-1,n_nodes)
parents[:5]

array([43284, 51708,  6420, 67470, 29720])

In [172]:
import numpy as np
n_nodes=np.random.randint(1,1e6,1)[0]
parents=np.random.randint(0,n_nodes-1,n_nodes)
parents[0] = -1
print(n_nodes)
display(parents[:5])

root, all_nodes = build_tree(parents)
print(compute_height(root))

169214


array([    -1,  14303, 160376, 109098,  40235])

1


| | with open lineage | without open lineage |
| - | - | - |
| log level debug | succeed | succeed |
| log level warn | fail | ? |

### 3. Network packet processing simulation

In [86]:
from collections import namedtuple

Request = namedtuple("Request", ["arrived_at", "time_to_process"])
Response = namedtuple("Response", ["was_dropped", "started_at"])

In [149]:
from collections import deque
buffer_size = 2
all_requests = [(0,2),(1,4),(5,3)] #None #[(0,0)]
n_requests = len(all_requests)

class Buffer:
    def __init__(self, size):
        self.size = size
        self.finish_time = deque()

    def _process(self, request):
        curr_request_start_time, curr_request_processing_time = request
        curr_request_end_time = curr_request_start_time+curr_request_processing_time

        ## if nothing in queue...
        if not self.finish_time:
            ## add the end of the processing time to finish time
            self.finish_time.append(curr_request_end_time)
            return Response(False, curr_request_start_time)
            
        ## else if something in queue...
        else:
            ## if the new item starts after the previous item ends
            if curr_request_start_time >= self.finish_time[-1]:
                
                ## Reset deque
                self.finish_time = deque()
                self.finish_time.append(curr_request_end_time)
                return Response(False, curr_request_start_time)

            ## if new item starts before previous item ends
            else:
                
                ## Check start time of new item against the earliest finish time of previous items. If it does not exceed, it means the earliest item is still processing when this item arrives. Else, it means the earliest is already completed, so pop from the queue
                while curr_request_start_time >= self.finish_time[0]:
                    self.finish_time.popleft()

                # if the queue is full, the item is dropped
                if len(self.finish_time) >= self.size:
                    return Response(True, -1)
                
                # otherwise, the item is added to the queue, and the time of processing is delayed by the end time of the last processing end time in the queue
                else:
                    last_processing_end_time = self.finish_time[-1]
                    self.finish_time.append(
                        last_processing_end_time + curr_request_processing_time
                    )
                    return Response(False, last_processing_end_time)
        
    def process_requests(self, requests):
        responses = []
        for request in requests:
            # print(self.finish_time)
            response = self._process(request)
            responses.append(response)
        return responses

requests = []
for i in range(n_requests):    
    arrived_at, time_to_process = all_requests[i]
    requests.append(Request(arrived_at, time_to_process))

buffer = Buffer(buffer_size)
responses = buffer.process_requests(requests)

for response in responses:
    print(response.started_at if not response.was_dropped else -1)

deque([])
deque([2])
deque([2, 6])
0
2
6


### 4. Extending stack interface

In [88]:
class StackWithMax():
    '''
    To get a max value in O(1) time, we need to keep a record of the maximum value. There are 2 ways to do this
        - We can do this in O(N) space, by maintaining another stack with max values. With every pop or push operation, we recompute what the maximum value is up to that index, and whenever we are asked, simply read the last index in the max stack
    '''
    def __init__(self):
        self.__stack = []
        self.__maxstack = []

    def Push(self, a):
        self.__stack.append(a)
        
        if self.__maxstack == []:
            self.__maxstack.append(a)
        else:
            if self.__maxstack[-1] >= a:
                self.__maxstack.append(self.__maxstack[-1])
            else:
                self.__maxstack.append(a)

    def Pop(self):
        if len(self.__stack):
            stackval = self.__stack.pop()
            self.__maxstack.pop()
            return stackval
        else:
            return None


    def Max(self):
        if len(self.__stack):
            return self.__maxstack[-1]
        return None
        
stack = StackWithMax()

num_queries = 10
queries = [
    'push 2', 
    'push 3',
    'push 9',
    'push 7',
    'push 2',
    'max',
    'max',
    'max',
    'pop',
    'max',
    'pop',
    'max',
    'pop',
    'max',
    'pop',
    'max',
    'pop',
    'max',
]

for i in range(len(queries)):
    query = queries[i].split()

    if query[0] == "push":
        stack.Push(int(query[1]))
    elif query[0] == "pop":
        stack.Pop()
    elif query[0] == "max":
        print(stack.Max())
    else:
        assert(0)


9
9
9
9
9
3
2
None


### 5. Maximum in Sliding Window

- There are 3 ways to implement this. We'll implement all 3 and discuss their pros and cons

In [89]:
def max_sliding_window_naive(sequence, m):
    maximums = []
    for i in range(len(sequence) - m + 1):
        maximums.append(max(sequence[i:i + m]))

    return maximums

n = 8
input_sequence=[2,7,3,1,5,2,6,2]
assert len(input_sequence) == n
window_size = 4

print(*max_sliding_window_naive(input_sequence, window_size))

7 7 5 6 6


#### 1. Implement a queue using two stacks

- The idea here is that we implement the $m$ size window as a queue with 2 stacks, where the stacks keep track of the max value (as per Q4)
    - One stack holds the $m$ window elements in the order of the actual input 
    - One stack holds the $m$ window elements in reverse order
- With every shift, we enqueue a new value on the right, and dequeue the leftmost value. How?
    - Pop all values from stack A (in order) onto stack B (reverse order)
    - Pop the latest value of stack B
    - Pop values of stack B to stack A
    - Push latest value onto stack A
- At each point, we simply use `max` operation from StackWithMax to find the max value in the window, which is O(1) as we established in Q4
- Overall, sliding the window across the entire array takes O(N), so the overall operation is O(N) time complexity, with an additional O(N) space needed to store max values
    - The dequeue operation is actually O(N), but is considered amortized constant time O(1). Why?
    - Because the actual copying from stack A to stack B only has to happen once. Once stack B is complete, you no longer need to copy it the next time you want to pop something, until stack B is fully empty
    - Hence, for each element, you are pushed once onto stack A, popped once from stack A, and pushed once onto stack B
    - Then you are popped for stack B
    - So each element's `dequeue` operation is really only **amortized** O(1)! That is, O(1) most of the time, except the times when it is O(N)

In [34]:
import math

input_list = [2,7,3,1,5,2,6,2]
window_size = 4

class StackWithMax:
    def __init__(self, input_list):
        self.maxval = []
        self.stackval = input_list
    
    def push(self, val):
        if self.maxval == []:
            self.maxval.append(val)
        else:
            if self.maxval[-1] >= val:
                self.maxval.append(self.maxval[-1])
            else:
                self.maxval.append(val)
        
        self.stackval.append(val)

    def pop(self):
        if self.maxval != []:
            self.maxval.pop()
        
        if self.stackval != []:
            return self.stackval.pop()
        else:
            return None

    def max(self):
        if self.maxval != []:
            return self.maxval[-1]
        return -math.inf

    def is_empty(self):
        return len(self.stackval) == 0

# def window_max(input_list, window_size, verbose=False):
stack = StackWithMax([])
rev_stack = StackWithMax([])
output_max = []
for index in range(len(input_list)):

    ## If we're in the portion of the array with no legit window, just push to stack
    if index < window_size-1:
        stack.push(input_list[index])
    
    ## Otherwise, if we're in the portion of the array with proper window...
    else:
        ## If there is nothing in the reverse stack
        if rev_stack.is_empty():
            # pop all values in the stack and place on reverse array
            while not stack.is_empty():
                rev_stack.push(stack.pop())

        ## If there is something in the reverse stack, pop it. This represents removing the value that drops of the sliding window
        else:
            rev_stack.pop()
            
        ## Then push incoming value onto stack
        stack.push(input_list[index])

        ## The maximum value is max of stack and rev stack
        output_max.append(max(stack.max(), rev_stack.max()))
        # print(stack.maxval)
        # print(stack.max())
        # print(rev_stack.max())

output_max

[7, 7, 5, 6, 6]

#### 2. Preprocess block suffixes and prefixes

- The idea here is quite brilliant. We'll illustrate with an example:
    - Suppose $\text{input} = [1,2,3,4,5,1,2,3,4,5,1,2,3,4,5]$, and $\text{window size} = 4$
    - We partition the array into windows of 4, which can be denoted as $$ [1,2,3,4] \quad [5,1,2,3] \quad [4,5,1,2] \quad [3,4,5] $$
    - For every window, we will compute the prefix and suffix max 
        - The suffix max array in position $i$ is simply the maximum value of the array of the suffix starting from and including $i$ to the end of the array
        - The prefix max array in position $i$ is simply the maximum value of the array of the prefix starting from index 0 and ending at and including $i$ 
        $$\begin{aligned} 
            [1,2,3,4] &\rightarrow \quad \text{Suffix Max: } [4,4,4,4] &\quad \text{Prefix Max: } [1,2,3,4] \\
            [5,1,2,3] &\rightarrow \quad \text{Suffix Max: } [5,3,3,3] &\quad \text{Prefix Max: } [5,5,5,5] \\
            [4,5,1,2] &\rightarrow \quad \text{Suffix Max: } [5,5,2,2] &\quad \text{Prefix Max: } [4,5,5,5] \\
            [3,4,5] &\rightarrow \quad \text{Suffix Max: } [5,5,5] &\quad \text{Prefix Max: } [3,4,5]
            \end{aligned} $$
        - How long does this computation take?
            - For a given window size $k$, there must be $\frac{n}{k}$ arrays
            - For each array, to compute prefix/suffix max, either (i) there is 1 value in the prefix/suffix, and that is the max value, or (ii) we compare the new value with the prevailing max value. This gives us $k$ comparisons per window
            - As such, the creation of this suffix/prefix preprocessing takes $O(\frac{nk}{k}) = O(n)$ time!
    - Let's suppose we take a random window of size 4. Either (i) this window coincides with one of our partitions above, or (ii) it comprises the suffix of one block + the prefix of the next
        - But given a starting position $i$, we know quite which block is needed!
            - If the starting position $i$ coincides with one of the precomputed blocks, the window max is simply the suffix max
            - If it does not, then the window max is simply the maximum of the suffix max of that block from $i$, and the prefix max in the next block from $i+k!$

        - From our example, let's suppose we want the max in [2,3,4,5] i.e. $$[1,2,3,4,5,1] \quad \| 2,3,4,5 \| \quad [1,2,3,4,5]$$   
            - That is, we want the window starting at position $i = 6$
            - We know that $\text{suffix sum}_6 = 3$ 
            - We know that $\text{prefix sum}_{6+4-1=9} = 5$ 
            - So taking $\max(3,5) = 5$
            - The maximum value is 5

        - The final $\max$ computation is $O(1)$, because we are just comparing 2 values!

    - Taken together, the algorithm takes $O(\frac{nk}{k}) + O(1) = O(n)$    

In [11]:
input_list = [2,7,3,1,5,2,6,2]
window_size = 4

def window_max(input_list, window_size, verbose=False):


    n_partitions = (len(input_list) // window_size) + 1 
    suffix_max = [] 
    prefix_max = []

    ## O(N) to build prefix and suffix max 
    ## There are n/k partitions...
    for index in range(n_partitions):
        subarray = input_list[(index*window_size):((index+1)*window_size)]
        
        # ...and each partition runs k iterations of the inner loop
        for i in range(len(subarray)):
            suffix_max.append(max(subarray[i:]))
            prefix_max.append(max(subarray[:i+1]))

    # display(suffix_max)
    # display(prefix_max)
    
    window_max_list = []
    
    ## O(N) to loop across all possible windows
    for window_end in range(window_size-1, len(input_list)):
        window_start = window_end - window_size + 1
        window_max_list.append(max(suffix_max[window_start], prefix_max[window_end]))
    return window_max_list
        
window_max(input_list, window_size)

[7, 7, 5, 6, 6]

#### 3. Store relevant items in a deque

- The idea here is, again, to do some amortized O(N) approach
    - Suppose $\text{input} = [2,7,3,1,5,2,6,2]$, and $\text{window size} = 4$
    - We introduce a deque to hold the **index** of our candidate max values. Moving from left to right...
        1. We check that the incoming value at index $i$ is larger than the value at the leftmost index in the deque. If it is, pop the leftmost index of the deque until this is no longer true 
            - Why do we pop the values that are smaller?
                - The new index in question will always be larger than the existing indices, because we are moving from left to right in the sliding window
                - Since we are moving from left to right, we know that the incoming value will be a component of more windows that the current deque values (i.e. the values in the deque will drop out sooner, because they entered sooner) 
                - We know that values on the deque left (tail) are not larger than the current maximum in the deque
                - If they are also not larger than the incoming value, then there is no circumstance that they will be the max value in any window! 
            - Why do we pop from the left?
                - Because we want to remove all values that are smaller than the incoming, and to do this, we must go from small to large values
        2. Next, check that the rightmost value of the deque is still within the current sliding window. If it is not, pop until the remaining values are in the window
            - Why do we pop values from the right now?
                - Because we only ever append values on the left, and the sliding window goes from left to right
                - So it stands to reason that the rightmost value must also be the the leftmost index in the deque, and it must also be true that, going from right to left, the index values increase
    - After we pass the 2 checks above, the following must be true;
        - If the incoming value is larger than all the existing values, then the deque will be empty from step 1
        - All remaining items in the deque must within the window, from step 2
        - So we do an appendleft for the incoming index. 
            - If it is larger than all values in existing deque, it will be the only value in the deque
            - Else, it will form a potential future max value in the deque

- How do we analyse the time complexity of the 2 while loops in steps 1 and 2 above?
    - At any point, there is at most $k$ items in the deque (since we remove everything outside the window in step 2)
    - So it might look like the 2 while loops will be $O(NK)$
    - But it is not true, because you cannot pop without a push. 
    - So this is actually $O(N)$




In [129]:
from collections import deque

def window_max(input_list, window_size, verbose=False):
    '''
    Time complexity O(N): 
        - O(N) from iterating through each element of the input list. For each element, a fixed number of comparisons are made. 
        - At most k comparisons from both while loops

        - Taken together, it may seem like O(NK)
        - But notice that every element in the deque is added only once and popped only once. So the `while` loop is actually amortized O(N)
        - As such, time complexity is O(N)
    Space complexity O(N):
        - Storing the deque takes O(K) space in the worst case (since we are constantly popping items that are outside the window)
    '''
    maxval_index_deque = deque()
    output_index = 0
    output_list = []

    for i in range(len(input_list)):
        if verbose:
            print('='*50)
            print(f"{i=}, {maxval_index_deque=}, {output_list=}")
            if maxval_index_deque:
                print(f"{[input_list[x] for x in maxval_index_deque]=}")
        
        ## If the deque is not empty, remove all values smaller than the current input, starting from left of deque. 
        ## Because of the 2 while loops, the deque arranged from small to large going from left to right
        ## As such, it is guaranteed that indices i+1... after index i must be larger than index i
        ## So if we stop popping from maxval_index_deque, there is no need to check the subsequent values
        while (maxval_index_deque) and (input_list[maxval_index_deque[0]] < input_list[i]):
            maxval_index_deque.popleft()

        ## If the deque is not empty, remove all values outside the window of interest, starting from the right; that is, going from large to small. 
        ## Why right to left?
            ## At every step of the loop, we remove values outside the relevant window, and we remove all values that are smaller than the latest addition
            ## So it stands to reason that 
                ## (i) either the latest addition is the current max, and there are no other values in the deque
                ## or (ii) the latest value is not the current max, and the current maximum occurred somewhere before the latest added index
                ## As such, we should expect that the deque must be of strictly decreasing order, because there is never a case where we append a larger index to the right
        while (maxval_index_deque) and (maxval_index_deque[-1] <= (i-window_size)):
            maxval_index_deque.pop()

        ## Having done the above preocessing, append the latest index on the left
        maxval_index_deque.appendleft(i)

        ## Starting from the `window_size-1`-th index, every run of the loop is a possible window. Hence, we return the maximum value of the window by taking the rightmost value in the deque
        if i >= (window_size-1):
            output_list.append(input_list[maxval_index_deque[-1]])

        if verbose:
            print(f"{i=}, {maxval_index_deque=}, {output_list=}")
            if maxval_index_deque:
                print(f"{[input_list[x] for x in maxval_index_deque]=}")

    return output_list

input_list = [2,7,3,1,5,2,6,2]
window_size = 4
window_max(input_list=input_list, window_size=window_size, verbose=False)

[7, 7, 5, 6, 6]