# Data Structures - Concrete Examples and Interview Questions

## Arrays

In [1]:
# Define array structure
array = [10, 2, 7, 5]
print(array)

[10, 2, 7, 5]


In [2]:
# Random Indexing: Indexes starts with 0
print(array[0])
print(array[1])
print(array[2])
print(array[0:2])
print(array[1:3])
print(array[:-1])
print(array[:-2])
print(array[:])


10
2
7
[10, 2]
[2, 7]
[10, 2, 7]
[10, 2]
[10, 2, 7, 5]


In [3]:
# Python can mix data types in an array
array1 = [10, 2, "Jacques", 5]
print(array1)

[10, 2, 'Jacques', 5]


In [4]:
# Update array with index
array1[1] = "Aldous"
print(array1)

[10, 'Aldous', 'Jacques', 5]


In [5]:
# Find max number in array  - This is called linear search O(N)
array2 = [10,42,55,2,1,0]
max = array2[0]
for num in array2:
    if num > max:
        max = num
print(max)

55


In [6]:
# Find min number in array
min = array2[0]
for num in array2:
    if num < min:
        min = num
print(min)

0


## Reverse Order of Array

In [15]:
# Interview Question - Reverse order of array
def reverse(nums):

    # pointing to the first item
    start_index = 0
    # index pointing to the last item
    end_index = len(nums)-1

    while end_index > start_index:
        # keep swapping the items
        nums[start_index], nums[end_index] = nums[end_index], nums[start_index]
        # Increment the start_index by 1
        start_index = start_index + 1
        # Decrement the end_index by -1
        end_index = end_index - 1

# Entry point of the Python application
if __name__ == '__main__':

    n = [1,2,3,4]
    reverse(n)
    print(n)

[4, 3, 2, 1]


## Palindrome Problem

In [8]:
# Interview Question - Palindrome Problem
# it has O(s) so basically linear running time complexity as far as the number
# of letters in the string is concerned
def is_palindrome(s):

    original_string = s
    # this is what we have implemented in the previous lecture in O(N)
    reversed_string = reverse(s)

    if original_string == reversed_string:
        return True

    return False


# O(N) linear running time where N is the number of letters in string s N=len(s)
def reverse(data):

    # string into a list of characters
    data = list(data)

    # pointing to the first item
    start_index = 0
    # index pointing to the last item
    end_index = len(data)-1

    while end_index > start_index:
        # keep swapping the items

        data[start_index], data[end_index] = data[end_index], data[start_index]
        start_index = start_index + 1
        end_index = end_index - 1

    # transform the list of letters into a string
    return ''.join(data)


if __name__ == '__main__':
    print(is_palindrome('Kevin'))
    print(is_palindrome('madam'))

False
True


In [9]:
# Palindrome Problem in Python
def palindrome_python(s):

    # We start at the end of the string and decrement by 1 until we arrive at the beginning of the string
    # s[::-1] signifies we are going to consider letter in a reverse order
    if s == s[::-1]:
        return True

    return False

if __name__ == '__main__':
    print(palindrome_python('car'))
    print(palindrome_python('madam'))

False
True


## Integer Reversion Problem

In [10]:
# Interview Question - Integer reversion
def reverse_integer(n):

    reversed_integer = 0
    while n > 0:
        # Divide the original number by 10 and thus use the modulo (%) operator
        remainder = n % 10
        reversed_integer = reversed_integer * 10 + remainder
        n = n // 10

    return reversed_integer


if __name__ == '__main__':
    print(reverse_integer(12345678))
    print(reverse_integer(12340))

87654321
4321


## Anagram Problem

In [11]:
# Interview Question - Anagram Problem

def is_anagram(str1, str2):

    # if the length of the strings differ - they are not anagrams
    if len(str1) != len(str2):
        return False

    # we have to sort the letters of the strings and then we have to compare
    # the letters with the same indexes
    # this is the bottleneck because it has O(NlogN) - linearithmic complexity
    str1 = sorted(str1)
    str2 = sorted(str2)

    # after that we have to check the letters with the same indexes
    # O(N) running time
    for i in range(len(str1)):
        if str1[i] != str2[i]:
            return False

    # overall running time is O(NlogN)+O(N)=O(NlogN) and thus linearithmic complexity

    return True


if __name__ == '__main__':

    s1 = ['f', 'l', 'u', 's', 't', 'e', 'r']
    s2 = ['r', 'e', 's', 't', 'f', 'e', 'l']
    s3 = ['r', 'e', 's', 't', 'f', 'u', 'l']
    print(is_anagram(s1, s2))
    print(is_anagram(s1, s3))


False
True


## Dutch National Flag Problem
The problem is that we want to sort a <b>T[]</b> one-dimensional array of integers in <b>O(N)</b> running time - and without any extra memmory. <b>The array can contain values : 0,1 and 2</b>

In [16]:
def dutch_flag_problem(nums, pivot=1):
    i = 0
    j = 0
    k = len(nums)-1

    while j <= k:
        
        # Current element is 0
        if nums[j] < pivot:
            swap(nums, i, j)
            i= i + 1
            j= j+ 1
        # current element is 2
        elif nums[j] > pivot:
            swap(nums, j, k)
            k = k - 1
        # Current element is 1
        else:
            j = j + 1
        
    return nums

def swap(nums,index1,index2):
    nums[index1], nums[index2] = nums[index2], nums[index1]


if __name__ == '__main__':
    print(dutch_flag_problem([0,1,2,2,1,0,0,2,2,1]))

[0, 0, 0, 1, 1, 1, 2, 2, 2, 2]


## Trapping Rain Water Problem

Given n non-negative integers representing an elevation map where the width of each bar is 1. Compute how much water it can trap after raining.

In [13]:
# Interview Question - Trapping Rain Water Problem - 
# NOTE: Run this program in Python script. Running in Jupyter notebook will return a "TypeError: 'int' object is not callable"
def trapping_water_problem(heights):
    if len(heights) < 3:
        return 0

    # Create list data structures for left_max and right_max
    # '_' is a placeholder for the number of values in heights
    left_max = [0 for _ in range(len(heights))]
    right_max = [0 for _ in range(len(heights))]

    # dealing with the left max values
    for i in range(1, len(heights)):
        left_max[i] = max(left_max[i - 1], heights[i - 1])

    # dealing with the right max values. We start with -2 since we already know that the initial position is 0
    # And because the index is exclusive, we define by -1 and we keep incrementing the index by -1
    for i in range(len(heights) - 2, -1, -1):
        right_max[i] = max(right_max[i + 1], heights[i + 1])

    # consider all the items in O(N) and sum up the trapped rain water units
    trapped = 0

    # We start with index 1 because we know that index 0 cannot trap any water
    for i in range(1, len(heights) - 1):
        if min(left_max[i], right_max[i]) > heights[i]:
            trapped += min(left_max[i], right_max[i]) - heights[i]

    return trapped


if __name__ == '__main__':
    print(trapping_water_problem([1, 0, 2, 1, 3, 1, 2, 0, 3]))

TypeError: 'int' object is not callable

# Linked Lists

In [22]:
# Define Node data structure
class Node:

    def __init__(self, data):
        self.data = data
        self.next_node = None

    def __repr__(self):
        return str(self.data)

# Define linked list data structure
class LinkedList:

    def __init__(self):
        # this is the first node of the linked list
        # WE CAN ACCESS THIS HEAD NODE EXCLUSIVELY !!!
        self.head = None
        self.num_of_nodes = 0

    # O(1) constant running time because we only havw to update the references
    def insert_start(self, data):
        self.num_of_nodes += 1
        new_node = Node(data)

        # the head is NULL (so the data structure is empty)
        if self.head is None:
            self.head = new_node
        # so this is when the linked list is not empty
        else:
            # we have to update the references
            new_node.next_node = self.head
            self.head = new_node

    # O(N) linear running time
    def insert_end(self, data):
        self.num_of_nodes += 1
        new_node = Node(data)

        # check if the linked list is empty
        if self.head is None:
            self.head = new_node
        else:
            # this is when the linked list is not empty
            actual_node = self.head

            # this is why it has O(N) linear running time
            while actual_node.next_node is not None:
                actual_node = actual_node.next_node

            # actual_node is the last node: so we insert the new_node
            # right after the actual_node
            # this is how we dinf the last item of the list
            # O(N) linear running time
            actual_node.next_node = new_node

    # O(1) constant running time
    def size_of_list(self):
        return self.num_of_nodes

    # O(N) linear running time - Traverse through every item in the linked list
    def traverse(self):

        actual_node = self.head

        while actual_node is not None:
            print(actual_node.data)
            actual_node = actual_node.next_node

    # O(N) linear running time
    def remove(self, data):

        # the list is empty
        if self.head is None:
            return

        actual_node = self.head
        # we have to track the previous node for future pointer updates
        # this is why doubly linked lists are better - we can get the previous
        # node (here with linked lists it is impossible)
        previous_node = None

        # search for the item we want to remove (data)
        while actual_node is not None and actual_node.data != data:
            previous_node = actual_node
            actual_node = actual_node.next_node

        # search miss
        if actual_node is None:
            return

        # update the references (so we have the data we want to remove)
        # the head node is the one we want to remove
        if previous_node is None:
            self.head = actual_node.next_node
        else:
            # remove an internal node by updating the pointers
            # NO NEED TO del THE NODE BECAUSE THE GARBAGE COLLECTOR WILL DO THAT
            previous_node.next_node = actual_node.next_node


if __name__ == '__main__':
    linked_list = LinkedList()
    linked_list.insert_end(10)
    linked_list.insert_start(100)
    linked_list.insert_start(1000)
    linked_list.insert_end('Adam')
    linked_list.insert_end(7.5)
    linked_list.traverse()
    print('-------')
    linked_list.remove(1000)
    linked_list.traverse()

1000
100
10
Adam
7.5
-------
100
10
Adam
7.5


# Doubly Linked List

In [26]:
# Assign class
class Node:

    # Initialization block
    def __init__(self, data):
        self.data = data
        self.next = None
        self.previous = None

# Assign class
class DoublyLinkedList:

    # Initialization block
    def __init__(self):
        self.head = None
        self.tail = None

    # this operation inserts items at the end of the linked list
    # so we have to manipulate the tail node in O(1) running time
    def insert(self, data):

        new_node = Node(data)

        # when the list is empty
        if self.head is None:
            self.head = new_node
            self.tail = new_node
        # there is at least 1 item in the data structure
        # we keep inserting items at the end of the linked list
        else:
            new_node.previous = self.tail
            self.tail.next = new_node
            self.tail = new_node

    # we can traverse a doubly linked list in both directions
    def traverse_forward(self):

        actual_node = self.head

        while actual_node is not None:
            print("%d" % actual_node.data)
            actual_node = actual_node.next

    def traverse_backward(self):

        actual_node = self.tail

        while actual_node is not None:
            print("%d" % actual_node.data)
            actual_node = actual_node.previous


if __name__ == '__main__':

    linked_list = DoublyLinkedList()
    linked_list.insert(1)
    linked_list.insert(2)
    linked_list.insert(3)

    # 1 2 3
    linked_list.traverse_forward()

    # 3 2 1
    linked_list.traverse_backward()


1
2
3
3
2
1


# Comparing Running Times of Linked Lists and Arrays

In [37]:
import time

# Define Node data structure
class Node:

    def __init__(self, data):
        self.data = data
        self.next_node = None

    def __repr__(self):
        return str(self.data)

# Define linked list data structure
class LinkedList:

    def __init__(self):
        # this is the first node of the linked list
        # WE CAN ACCESS THIS HEAD NODE EXCLUSIVELY !!!
        self.head = None
        self.num_of_nodes = 0

    # O(1) constant running time because we only havw to update the references
    def insert_start(self, data):
        self.num_of_nodes += 1
        new_node = Node(data)

        # the head is NULL (so the data structure is empty)
        if self.head is None:
            self.head = new_node
        # so this is when the linked list is not empty
        else:
            # we have to update the references
            new_node.next_node = self.head
            self.head = new_node

    # O(N) linear running time
    def insert_end(self, data):
        self.num_of_nodes += 1
        new_node = Node(data)

        # check if the linked list is empty
        if self.head is None:
            self.head = new_node
        else:
            # this is when the linked list is not empty
            actual_node = self.head

            # this is why it has O(N) linear running time
            while actual_node.next_node is not None:
                actual_node = actual_node.next_node

            # actual_node is the last node: so we insert the new_node
            # right after the actual_node
            # this is how we dinf the last item of the list
            # O(N) linear running time
            actual_node.next_node = new_node

    # O(1) constant running time
    def size_of_list(self):
        return self.num_of_nodes

    # O(N) linear running time - Traverse through every item in the linked list
    def traverse(self):

        actual_node = self.head

        while actual_node is not None:
            print(actual_node.data)
            actual_node = actual_node.next_node

    # O(N) linear running time
    def remove(self, data):

        # the list is empty
        if self.head is None:
            return

        actual_node = self.head
        # we have to track the previous node for future pointer updates
        # this is why doubly linked lists are better - we can get the previous
        # node (here with linked lists it is impossible)
        previous_node = None

        # search for the item we want to remove (data)
        while actual_node is not None and actual_node.data != data:
            previous_node = actual_node
            actual_node = actual_node.next_node

        # search miss
        if actual_node is None:
            return

        # update the references (so we have the data we want to remove)
        # the head node is the one we want to remove
        if previous_node is None:
            self.head = actual_node.next_node
        else:
            # remove an internal node by updating the pointers
            # NO NEED TO del THE NODE BECAUSE THE GARBAGE COLLECTOR WILL DO THAT
            previous_node.next_node = actual_node.next_node

if __name__ == '__main__':

    linked_list = LinkedList()

    now = time.time()

    print('Inserting at the start of a Linked List:')
    print('---------------------------------------')

    for i in range(5000):
        linked_list.insert_start(i)

    print('Inserting items into Linked List in %ss' % str(time.time() - now))

    array = []
    now = time. time()

    for i in range(5000):
        array.insert(0,i)

    print('Inserting items into Array in %ss' % str(time.time() - now))
    print('---------------------------------------')
    print('Inserting at the end of a Linked List:')
    print('---------------------------------------')
    for i in range(5000):
        linked_list.insert_end(i)

    print('Inserting items into Linked List in %ss' % str(time.time() - now))

    array = []
    now = time. time()

    for i in range(5000):
        array.insert(0,i)
    
    print('Inserting items into Array in %ss' % str(time.time() - now))

Inserting at the start of a Linked List:
---------------------------------------
Inserting items into Linked List in 0.004602670669555664s
Inserting items into Array in 0.00859379768371582s
---------------------------------------
Inserting at the end of a Linked List:
---------------------------------------
Inserting items into Linked List in 2.914346933364868s
Inserting items into Array in 0.008578062057495117s
