# YSC2229 Assignment 2
## A0149963M

Stress testing functions for the maximum subarray problem. The unit test generates a random array to test the divide and conquer implementation against the naive implementation which we are confident that works

In [9]:
import random

def generate_random_array():
    """
    Generates random array for testing purposes.
    """
    l = random.randint(1, 100)
    out = []
    for i in range(l):
        out.append(random.randint(0,200) - 100)
    return out


def stress_test_subarray(fun1, fun2):
    """
    Stress test to test divide and conquer method for maximum subarray
    vs the naive implementation. 
    
    Maximum subarray value is non-unique, but maximum subarray itself
    could be non-unique. Therefore, 
    """
    for z in range(0, 2000):
        test_arr = generate_random_array()
        if fun2(test_arr, 0, len(test_arr) - 1)[2] != fun1(test_arr, 0, len(test_arr) - 1)[2]:
            print("Test failed for: ", test_arr)
            print("efficient algo output: ", fun2(test_arr, 0, len(test_arr) - 1))
            print("naive algo output: ", fun1(test_arr, 0, len(test_arr) - 1))
            return False
    return True

Naive implementation of the maximum subarray

In [10]:
def max_subarray_naive(arr, low, high):
    """
    arr - input array
    low - leftmost index 
    high - rightmost index
    
    Naively calculating the maximum subarray in O(n^2)
    """
    # base case if only 1 point in array
    if low == high:
        return(low, high, arr[low])
    
    maxSum = float('-inf')
    
    # for-loop that iterates all possibilites 
    for i in range(len(arr)):
        total = 0
        for j in range(i, len(arr)):
            total += arr[j]
            if maxSum < total :
                first = i
                last = j
                maxSum = total
                
    
    return (first, last, maxSum)

Efficient way of computing the maximum subarray using the divide-and-conquer approach

In [11]:
def max_crossing_subarray(arr, low, mid, high):
    """
    arr - input array
    low - leftmost index
    mid - midpoint index
    high - rightmost index
    
    finds the maximum crossing subarray in an array in O(n)
    """
    
    # left subarray
    leftSum = float('-inf')
    maxLeft = float('-inf')
    total = 0
    for i in reversed(range(low, mid + 1)):
        total += arr[i]
        if total > leftSum:
            leftSum = total
            maxLeft = i
    
    # right subarray
    rightSum = float('-inf')
    maxRight = float('-inf')
    total = 0
    for j in range(mid + 1, high + 1):
        total += arr[j]
        if total > rightSum:
            rightSum = total
            maxRight = j
    return (maxLeft, maxRight, max(leftSum, max(rightSum, leftSum + rightSum)))


def max_subarray_eff(arr, low, high):
    """
    arr - input array
    low - leftmost index 
    high - rightmost index 
    
    Calculates maximum subarray in O(nlogn) time
    """
    
    if high == low:
        return(low, high, arr[low])
    else:
        mid = (low + high) // 2
        leftLow, leftHigh, leftSum = max_subarray_eff(arr, low, mid)
        rightLow, rightHigh, rightSum = max_subarray_eff(arr, mid + 1, high)
        crossLow, crossHigh, crossSum = max_crossing_subarray(arr, low, mid, high)
        
        if leftSum >= rightSum and leftSum >= crossSum:
            return (leftLow, leftHigh, leftSum)
        elif rightSum >= leftSum and rightSum >= crossSum:
            return (rightLow, rightHigh, rightSum)
        else: 
            return (crossLow, crossHigh, crossSum)
        

In [12]:
# running the stress test

stress_test_subarray(max_subarray_naive, max_subarray_eff)

True

Stress testing functions for the nearest pair of points problem. The unit test generates a random array of points to test the divide and conquer implementation against the naive implementation which we are confident that works.

In [13]:
# testing functions
import random

def generate_random_point():
    """
    Generates random point (x, y). 
    Appended in the array
    """
    x = random.uniform(0, 30) - 15
    y = random.uniform(0, 30) - 15

    return (x, y)

def generate_random_array():
    """
    Generates random array. 
    Minimum length of 2 
    """
    count = random.randint(2, 30)
    out = []
    for i in range(count):
        out.append(generate_random_point())
    return out

def stress_test_points(candidate, witness):
    for i in range(2000):
        test = generate_random_array()
        test_p1, test_p2, dist_test = candidate(test)
        actual_p1, actual_p2, dist_actual = witness(test)
        if dist_test != dist_actual:
            print("Test failed for ", test)
            print("Test algorithm gave: ", dist_test)
            print("Test algorithm gave the following pairs of points: ")
            print("Point 1: ", test_p1)
            print("Point 2: ", test_p2)
            print("Witness algorithm gave: ", dist_actual)
            print("Witness algorithm gave the following pairs of points: ")
            print("Point 1: ", actual_p1)
            print("Point 2: ", actual_p2)
            return False
    return True


Naive algorithm for finding the closest pair of points by enumerating all possibilities

In [14]:
def dist(p1, p2):
    (x1,y1),(x2,y2) = p1, p2 
    return ((x1-x2)**2 + (y1-y2)**2)**0.5

def closest_naive(P): 
    """
    P - list containing points in the coordinate form (x, y)
    n - length of list (number of points, at least 2)
    
    Naive algorithm for finding closest points in a list in O(n^2).
    """
    # distance set to infinite first and updated later
    min_val = float('inf') 
    n = len(P)
    # enumerates all possibilities to find minimum distance
    for i in range(n - 1): 
        for j in range(i + 1, n): 
            if dist(P[i], P[j]) < min_val: 
                min_val = dist(P[i], P[j])
                left = i
                right = j
    return P[left], P[right], min_val


Efficient implementation that uses divide and conquer algorithm, that operates in O(n) time

In [15]:
def find_dist(p1, p2):
    (x1,y1),(x2,y2) = p1, p2 
    return ((x1-x2)**2 + (y1-y2)**2)**0.5

def find_strip(X, Y, m, d):
    """
    initializes the strip that stretches from the midpoint
    including points with a distance <= d from the midpoint
    """
    mid_dist = X[m][0]
    strip = []
    for point in Y:
        if abs(point[0] - mid_dist) <= d:
            strip.append(point)
    return strip

def crossing(X,Y,m,d):
    """
    X - array of points sorted in x-coordinates
    Y - array of points sorted in y-coordinates
    m - middle index
    d - minimum distance so far
    
    O(n) algorithm trying to find points within the strip
    with minimal distance
    """
    strip = find_strip(X,Y,m,d)
    if len(strip) <= 1:
        return (m, float("inf"), strip[0], strip[0])
    min_dist = float("inf")
    for i in range(len(strip)):
        j = i+1
        # comment out (strip[j][1] - strip[i][1] <= min_dist) if this doesn't work
        while (j < len(strip) and (strip[j][1] - strip[i][1] <= min_dist)):
            dist = find_dist(strip[i], strip[j])
            if dist < min_dist:
                p1, p2 = strip[i], strip[j]
                min_dist = dist
            j+=1
    return (m, min_dist, p1, p2)

def find_closest(X, Y, l, r, p1=None, p2=None):   
    """
    X - array of points sorted in x-coordinates
    Y - array of points sorted in y-coordinates
    l - leftmost index
    r - rightmost index
    p1 - first point in the pair of points with shortest distance
    p2 - second point in the pair of points with shortest distance
    
    Recursive engine of the algorithm that breaks the problem
    down to T(n/2) + O(n)
    """
    if l == r:
        return (r, float("inf"), X[r], X[r])        
    m = (r+l) // 2
    (midl, d1, p1_left, p2_left) = find_closest(X, Y, l, m, p1, p2)
    (midr, d2, p1_right, p2_right) = find_closest(X, Y, m+1, r, p1, p2)   
    (midc, d3, p1_cross, p2_cross) = crossing(X, Y, midl, min(d1, d2))
    if d3 < d1 and d3 < d2:
        return (midc, d3, p1_cross, p2_cross)
    elif d1 < d2:
        return (midl, d1, p1_left, p2_left)
    else: 
        return (midr, d2, p1_right, p2_right)

def closest_pair(A):
    """
    Main function.
    A - input array of points
    """
    Y = A.copy()    
    A.sort(key=lambda tup: tup[0]) 
    Y.sort(key=lambda tup: tup[1]) 
    l = 0
    r = len(A) - 1
    (_, closest_distance, p1, p2) = find_closest(A, Y, l, r)
    # print("smallest distance is: ", closest_distance)
    # print("the first point is: ", p1)
    # print("the second point is: ", p2)
    return p1, p2, closest_distance

In [16]:
stress_test_points(closest_pair,closest_naive)

True