# <a href= "http://interactivepython.org/runestone/static/pythonds/SortSearch/toctree.html"> Chapter 5: Searching and Sorting </a>

<h3> <a href= "
http://interactivepython.org/runestone/static/pythonds/SortSearch/ProgrammingExercises.html    "> 
Programming Exercises¶
</a>
</h3>

Implement the following for the hash table Map ADT implementation.
<ul> 
  <li>  len method (__len__)</li>
  <li> in method (__contains__)</li>
  <li> del method using the following for collision resolution</li>
  <ul>
         <li>  chaining </li>
         <li> open addressing</li>
           <li>quadratic probing</li>
  </ul>
          
</ul>



In [2]:
### Hash implementation in python
def custom_hash(key):
    """
    Return the hash value of the given key. Uses dbj2

    @param key: String or unicode
    """
    result = 5381
    multiplier = 33

    if isinstance(key, int):
        return key
    
    for char in key:
        result = 33 * result + ord(char)
    return result

class Hash(object):
    def __init__(self, size=8, hashfunction=custom_hash):
        # Total block size which can be array or list
        self._size = 8
        # Initial hashtable size
        self.__initial_size = self._size
        # Counter for holding total used slots
        self._used_slots = 0
        # Counter for holding deleted keys
        self._dummy_slots = 0
        # Holds all the keys
        self._keys = [None] * self._size
        # Holds all the values
        self._values = [None] * self._size
        # Alias for custom_hash function
        self.hash = custom_hash
        # threshold is used for increasing hash table
        self._max_threshold = 0.70
    
    def should_expand(self):
        """Returns True or False
        
        If used slots and dummy slots are more than 70% resize the hash table.
        """
        return (float(self._used_slots + self._dummy_slots) / self._size) >= self._max_threshold

    def _probing(self, current_position):
        """Quadratic probing to get new position when collision occurs.

        @param current_position: position at already element is present.
        """
        # Algorithm is copied from CPython http://hg.python.org/cpython/file/52f68c95e025/Objects/dictobject.c#l69
        return ((5 + current_position) + 1) % self._size
    
    def _set_item_at_pos(self, position, key, value):
        self._keys[position] = key
        self._values[position] = value
        
        self._used_slots += 1
        
    def _set_item(self, position, key, value):
        """sets key and value in the given position.
        If position has already value in it, calls _probing to get next position

        @param position: index
        @param key: key
        @param value: value
        """
        existing_key = self._keys[position]
        
        if existing_key is None or existing_key == key:
            # Empty or update
            self._set_item_at_pos(position, key, value)
        else:
            # Collision needs a probing. This needs to be recursive.
            new_position = self._probing(position)
            self._set_item(new_position, key, value)
        
    def _reposition(self, keys, values):
        """Reposition all the keys and values.
        This is called whenever load factor or threshold has crossed the limit.
        """
        for (key, value) in zip(keys, values):
            if key is not None:
                hashvalue = self.hash(key)
                position = self._calculate_position(hashvalue)
                
                self._set_item(position, key, value)
       
    def _resize(self):
        old_keys = self._keys
        old_values = self._values
        
        # New size
        self._size = self._size * 4
        
        # create new block of memory and clean up old keys positions
        self._keys = [None] * self._size
        self._values = [None] * self._size
        self._used_slots = 0
        self._dummy_slots = 0
        
        # Now reposition the keys and values
        
        self._reposition(old_keys, old_values)
        
    def _calculate_position(self, hashvalue):
        return hashvalue % self._size
        
    def raise_if_not_acceptable_key(self, key):
        if not isinstance(key, (basestring, int)):
            raise TypeError("Key should be int or string or unicode")
        
    def put(self, key, value):
        """Given a key and value add to the hashtable.

        Key should be int or string or unicode.
        """
        self.raise_if_not_acceptable_key(key)
        
        if self.should_expand():
            self._resize()
            
        position = self._calculate_position(self.hash(key))
        self._set_item(position, key, value)
        
    def _get_pos_recursively(self, position, key):
        new_position = self._probing(position)
        tmp_key = self._keys[new_position]
        
        if tmp_key == None:
            # At new position the key is empty raise ane exception
            raise KeyError(u"{} key not found".format(key))
        elif tmp_key != key:
            # Again check for next position
            return self._get_pos_recursively(new_position, key)
        else:
            return new_position
        
    def _get_pos(self, key):
        """
        Returns position of the key
        """
        self.raise_if_not_acceptable_key(key)
        position = self._calculate_position(self.hash(key))
        
        tmp_key = self._keys[position]

        if tmp_key == None:
            raise KeyError("{} doesn't exist".format(key))
            
        elif tmp_key != key:
            # Probably collision and get next position using probing
            return self._get_pos_recursively(position, key) 
        else:
            return position
        
    def get(self, key):
        position = self._get_pos(key)

        if position is None:
            return None
        
        return self._values[position]
    
    def _delete_item(self, position, key):
        self._keys[position] = None
        self._values[position] = None
        
        self._dummy_slots += 1
        
    def delete(self, key):
        """Deletes the key if present. KeyError is raised if Key is missing.
        """
        position = self._get_pos(key)
        
        if position is None:
            raise KeyError(key)
            
        self._delete_item(position, key)
          

In [3]:
x = Hash()
x[1] = 1
print (x)

TypeError: 'Hash' object does not support item assignment

Implement the mergeSort function without using the slice operator.

In [None]:
# Iterative Merge sort with Bottom up

def mergeSort(a): 

    current_size = 1

    # Outer loop for traversing Each 
    # sub array of current_size 
    while current_size < len(a) - 1: 

        left = 0
        # Inner loop for merge call 
        # in a sub array 
        # Each complete Iteration sorts 
        # the iterating sub array 
        while left < len(a)-1: 

            # mid index = left index of 
            # sub array + current sub 
            # array size - 1 
            mid = left + current_size - 1

            # (False result,True result) 
            # [Condition] Can use current_size 
            # if 2 * current_size < len(a)-1 
            # else len(a)-1 
            right = ((2 * current_size + left - 1, 
                    len(a) - 1)[2 * current_size 
                        + left - 1 > len(a)-1]) 

            # Merge call for each sub array 
            merge(a, left, mid, right) 
            left = left + current_size*2

        # Increasing sub array size by 
        # multiple of 2 
        current_size = 2 * current_size 

# Merge Function 
def merge(a, l, m, r): 
    n1 = m - l + 1
    n2 = r - m 
    L = [0] * n1 
    R = [0] * n2 
    for i in range(0, n1): 
        L[i] = a[l + i] 
    for i in range(0, n2): 
        R[i] = a[m + i + 1] 

    i, j, k = 0, 0, l 
    while i < n1 and j < n2: 
        if L[i] > R[j]: 
            a[k] = R[j] 
            j += 1
        else: 
            a[k] = L[i] 
            i += 1
        k += 1

    while i < n1: 
        a[k] = L[i] 
        i += 1
        k += 1

    while j < n2: 
        a[k] = R[j] 
        j += 1
        k += 1


a = [12, 11, 13, 5, 6, 7] 
print("Given array is ") 
print(a) 

mergeSort(a) 

print("Sorted array is ") 
print(a) 


Using a random number generator, create a list of 500 integers. Perform a benchmark analysis two sorting algorithmsfrom this chapter. The first one has to be one of the insertion/selection/bubble sorting algorithms and the other is one of the quick/merge sorting algorithm.  What is the difference in execution speed?

In [15]:
import random
import timeit

def bubbleSort(alist):

   #Setting the range for comparison (first round: n, second round: n-1  and so on)
   for i in range(len(alist)-1,0,-1):

      #Comparing within set range
       for j in range(i):

           #Comparing element with its right side neighbor
           if alist[j] > alist[j+1]:

               #swapping
               temp = alist[j]
               alist[j] = alist[j+1]
               alist[j+1] = temp

   return alist


def mergeSort(x):
    if len(x) < 2:
        return x
    result = []
    mid = int(len(x) / 2)
    y = mergeSort(x[:mid])
    z = mergeSort(x[mid:])
    i = 0
    j = 0
    while i < len(y) and j < len(z):
        if y[i] > z[j]:
            result.append(z[j])
            j += 1
        else:
            result.append(y[i])
            i += 1
    result += y[i:]
    result += z[j:]
    return result

    
items = random.sample(range(1,1000,), 500)
start1 = timeit.default_timer()
print("Bubble Sort: ",bubbleSort(items))
print("-" *100)
stop1 = timeit.default_timer()
print("Bubble Sort Run Time analysis: ", stop1-start1, " Sec")
print("-" *100)

start2 = timeit.default_timer()
print("Merge sort: ", mergeSort(items))
stop2 = timeit.default_timer()
print("-" *100)
print("Merge Sort Run Time analysis: ", stop2-start2, " Sec")
print("-" *100)

Bubble Sort:  [3, 6, 8, 9, 13, 18, 22, 23, 26, 27, 30, 31, 32, 34, 38, 40, 46, 48, 50, 52, 54, 56, 58, 61, 62, 69, 71, 72, 73, 75, 76, 78, 79, 81, 82, 84, 86, 91, 92, 94, 95, 97, 98, 99, 100, 103, 104, 105, 106, 108, 111, 114, 116, 117, 119, 121, 122, 123, 124, 126, 130, 131, 132, 133, 134, 135, 136, 139, 140, 141, 142, 144, 145, 146, 148, 149, 151, 155, 156, 158, 159, 162, 163, 164, 166, 167, 169, 171, 172, 176, 178, 179, 180, 181, 182, 184, 185, 186, 190, 196, 197, 198, 200, 201, 205, 206, 207, 209, 211, 212, 213, 215, 218, 219, 220, 221, 226, 229, 230, 232, 233, 244, 245, 246, 247, 249, 255, 258, 260, 262, 265, 268, 269, 272, 273, 276, 277, 279, 283, 285, 287, 289, 290, 294, 297, 300, 301, 305, 307, 308, 309, 310, 311, 313, 314, 315, 317, 319, 323, 329, 330, 331, 332, 333, 334, 338, 339, 340, 341, 342, 343, 346, 347, 348, 351, 352, 353, 354, 355, 356, 358, 360, 361, 363, 368, 369, 370, 371, 375, 377, 379, 383, 386, 389, 390, 391, 394, 397, 398, 399, 400, 403, 406, 408, 409, 410, 412

2.4 Given a list of numbers in random order, write an algorithm that works
      in O(nlog(n)) to find the kth smallest number in the list.

In [16]:

def kthSmallest(arr, n, k): 

    # Sort the given array 
    arr.sort() 


    return arr[k-1] 

if __name__=='__main__': 
    arr = [12, 3, 5, 7, 19] 
    n = len(arr) 
    k = 2
    print("K'th smallest element is", 
        kthSmallest(arr, n, k)) 

K'th smallest element is 5


2.5 Improve the algorithm from the previous problem to be linear? (Hints: the qsort algorithms.) 


In [22]:
def quickselect(items, item_index):

    def select(lst, l, r, index):

        # base case
        if r == l:
            return lst[l]

        # choose random pivot
        pivot_index = random.randint(l, r)

        # move pivot to beginning of list
        lst[l], lst[pivot_index] = lst[pivot_index], lst[l]

        # partition
        i = l
        for j in range(l+1, r+1):
            if lst[j] < lst[l]:
                i += 1
                lst[i], lst[j] = lst[j], lst[i]

        # move pivot to correct location
        lst[i], lst[l] = lst[l], lst[i]

        # recursively partition one side only
        if index == i:
            return lst[i]
        elif index < i:
            return select(lst, l, i-1, index)
        else:
            return select(lst, i+1, r, index)

    if items is None or len(items) < 1:
        return None

    if item_index < 0 or item_index > len(items) - 1:
        raise IndexError()

    return select(items, 0, len(items) - 1, item_index)

a = [10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
for i in range(0, len(a)):
    print('{0:2} found in position {1}.'.format(i, quickselect(a, i)))


 0 found in position 0.
 1 found in position 1.
 2 found in position 2.
 3 found in position 3.
 4 found in position 4.
 5 found in position 5.
 6 found in position 6.
 7 found in position 7.
 8 found in position 8.
 9 found in position 9.
10 found in position 10.


(Optional) Research perfect hash functions. Using a list of names (classmates, family members, etc.), generate the hash values using the perfect hash algorithm.