In [1]:
import sys
sys.executable

'/usr/local/opt/python/bin/python3.7'

In [2]:
import numpy as np
import pandas as pd
from scipy.spatial import KDTree
from scipy.stats import uniform, expon, poisson, describe
import math

import matplotlib.pyplot as plt

from copy import deepcopy
from collections import defaultdict

from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

import pickle

In [3]:
# this notebook is to fix heuristic 1 and develop heuristic 2

In [18]:
filename = 'slurm scripts/data/08-14-20/path_points/'

points = []
distances = []
lengths_based = []
counts = []

T_n_indices = defaultdict(list)

for i in range(1,501):
    # something weird happened, if i is 0 mod 50 the file doesn't exist...
    #if i % 50 == 0:
    #    continue
    
    f = open(filename+str(i)+'-dim2-n5000000_pathpoints.pkl', "rb")
    pathpoints = pickle.load(f)
    points.append(pathpoints)
    s_n = 0
    for j in range(len(pathpoints)-2):
        s_n += np.linalg.norm(pathpoints[j+1] - pathpoints[j])
    lengths_based.append(s_n)
    distances.append(s_n + np.linalg.norm(pathpoints[-1] - pathpoints[-2]))
    counts.append(len(pathpoints)-2)
    T_n_indices[len(pathpoints)-2].append(i)
    
    if pathpoints[-1][0] != 0.9 or pathpoints[-1][1] != 0.9:
        print(i)

        
distances = np.array(distances)
lengths_based = np.array(lengths_based)
counts_based = np.array(counts)

In [12]:
n = 5000000
D = 2

ell = 0.8 * np.sqrt(2)

x_init = np.array([0, 0])
x_goal = np.array([ell, 0])

In [4]:
# r_n function
def r(n, D):
    return (n ** (-1/(2*D))) / 5
    # designed for n = 5 * 10^6

In [23]:
# Creates random sample of n points (not including init and goal) -- poisson functionality added
def SampleFree(n, d, x_init, x_goal, distr='unif'):
    assert distr in {'unif', 'pois'}, "distr parameter must be one of the following: 'unif', 'pois'"
    
    # start with init point (index 0)
    V = [x_init]
    
    if distr is 'pois':
        num_points = np.random.poisson(lam=n)
    else:
        num_points = n
    
    for i in range(num_points):
        point = np.random.uniform(size=d)
        if (point[0]-0.5)**2 - 1.997 * (point[0]-0.5) * (point[1]-0.5) + (point[1]-0.5)**2 > 1/1800:
            continue
        V.append(point)
        
    # add the goal point (index n + 1)
    V.append(x_goal)
    
    return V

In [6]:
# Creates adjacency list, no need for also storing their distances
def Near(V, r_n):
    
    # Create a KD Tree first
    KDT = KDTree(data=V)
    
    # Create the KD Tree to search against
    search_against = KDTree(data=V)

    # run query_ball_tree; returns list of lists
    results = KDT.query_ball_tree(other=search_against, r=r_n)
    
    # for vertex indexed by i, results[i] contains all indices j such that dist(v[i],v[j]) < r
    edges = [None] * len(V)
    for i in range(len(V)):
        edges[i] = []
        for j in results[i]:
            if i != j:
                edges[i].append(j)

In [7]:
# Afore-mentioned graph heuristic
def path_algorithm(V, E):
    # V is the vertex set, E is the output of the KDT function (both numpy arrays)
    x_init = V[0]
    x_goal = V[-1]
    
    # tracks where we've been (array of indices)
    piece = 0
    visited = [0]
    distance = 0
    
    # boolean indicator of something going wrong, e.g. no further point or repeat node
    failure = False
    #print((failure is False) and (piece != len(V)-1))
    
    while (failure is False) and (piece != len(V)-1):
        candidates = np.take(V, np.array(E[piece]), axis=0)
        angles = []
        for p in candidates:
            vect_1 = p - V[piece]
            vect_2 = x_goal - V[piece]
            angle = math.atan2( vect_1[0]*vect_2[1] - vect_1[1]*vect_2[0], vect_1[0]*vect_2[0] + vect_1[1]*vect_2[1])
            angles.append(np.abs(angle))             # absolute angle in radians
        next_piece = E[piece][np.array(angles).argmin()]
        if next_piece in visited:
            #print('failed')
            #print(next_piece)
            #print(visited)
            failure = True            # means we have already been to this node
        else:
            distance += np.linalg.norm(V[next_piece] - V[piece])
            visited.append(next_piece)
            piece = next_piece
            
    path_nodes = []
    for v in visited:
        path_nodes.append(V[v])
    
    if failure:
        return path_nodes, float('inf')
    
    else:
        return path_nodes, distance

In [14]:
def run_simulation_rej(seed, process=1):
    np.random.seed(seed) 
    
    results = defaultdict(lambda: float('inf'))
    results['seed'] = seed
    
    X = [x_init[0]]
    Y = [x_init[1]]
    
    lmbda = (n * (r(n, D) ** 2) * math.pi)
    
    # WARNING: note the 0-index entries! these are not actual data and will mess with the statistics
    R = [r(n, D)]
    Theta = [0]
    S = [0]
    Gamma = [0]

    t = 0
    while np.linalg.norm(np.array([X[-1], Y[-1]]) - x_goal) > r(n, D) and t < 2000:
        t += 1 # timestep index

        # three samples for each time step
        # rejection sampling (to account for dependence between balls of adjacent timesteps)
        reject = True
        while reject:
            r_sample = r(n, D) * (np.random.uniform() ** 0.5)
            s_sample = np.random.choice(a=[-1, 1])
            if process == 1:
                th_sample = (math.pi / lmbda) * np.random.exponential()       # uses asymptotics on the # of points in a ball explicitly
            elif process == 2:
                n_B = np.random.poisson((r(n, D) ** 2) * math.pi * n)         # simulates the number of points in a ball, leading to a mixture of exponentials
                th_sample = (math.pi / n_B) * np.random.exponential()
            
            # reject if (r_next < r(n)-r_now AND theta_next < theta_now)
            if not (r_sample < r(n, D) - R[-1] and th_sample < Theta[-1]):
                reject = False
            
        R.append(r_sample)
        S.append(s_sample)
        if process == 1:
            Theta.append(th_sample)   
        elif process == 2:   
            Theta.append(th_sample)
                                   
        # now we can determine the rest
        X.append(X[t-1] + R[t] * np.cos(Gamma[t-1] - Theta[t] * S[t]))
        Y.append(Y[t-1] + R[t] * np.sin(Gamma[t-1] - Theta[t] * S[t]))

        g = np.arcsin((R[t] / np.linalg.norm(np.array([X[t], Y[t]]) - x_goal)) * np.sin(Theta[t] * S[t]))
        Gamma.append(Gamma[t-1] + g)
        
    if t < 1000:
        results['T'] = t
        results['length'] = sum(R[1:])
        results['last_point'] = (X[-1], Y[-1])
        results['distance_to_goal'] = np.linalg.norm(np.array([X[-1], Y[-1]]) - x_goal) 
        results['R'] = R
        results['Theta'] = Theta
        results['S'] = S
        results['X'] = X
        results['Y'] = Y
        
    return results

In [15]:
simulation_outputs_1 = defaultdict(dict)
simulation_outputs_2 = defaultdict(dict)

for i in range(1, 1001):
    simulation_outputs_1[i] = run_simulation_rej(i * 100, 1)
    simulation_outputs_2[i] = run_simulation_rej(i * 200, 2)

In [16]:
lengths_free = {1 : [], 2 : []}
for k in simulation_outputs_1.keys():
    lengths_free[1].append(simulation_outputs_1[k]['length'])
    lengths_free[2].append(simulation_outputs_2[k]['length'])
    
lengths_free[1] = np.array(lengths_free[1])
lengths_free[2] = np.array(lengths_free[2])

counts_free = {1 : [], 2 : []}
for k in simulation_outputs_1.keys():
    counts_free[1].append(simulation_outputs_1[k]['T'])
    counts_free[2].append(simulation_outputs_2[k]['T'])
    
counts_free[1] = np.array(counts_free[1])
counts_free[2] = np.array(counts_free[2])

In [21]:
# statistics on length
const = ell - r(n, D)

print(describe(lengths_based - const))
print(describe(lengths_free[1] - const))
print(describe(lengths_free[2] - const))

DescribeResult(nobs=500, minmax=(0.00017891251733326285, 0.00422558055945399), mean=0.001826468253166701, variance=1.0785253729415668e-06, skewness=0.2915675690835668, kurtosis=-0.7972677266609938)
DescribeResult(nobs=1000, minmax=(0.00015239665612232223, 0.00432112358874881), mean=0.0017220764890354128, variance=1.0789817741103442e-06, skewness=0.4119366401436809, kurtosis=-0.8156486069069042)
DescribeResult(nobs=1000, minmax=(0.00013226630043128296, 0.004252147560128483), mean=0.0017376367930726077, variance=1.0385456389154292e-06, skewness=0.3757084198615512, kurtosis=-0.7640511156893557)


In [19]:
# statistics on T

print(describe(counts_based)) # graph based
print(describe(counts_free[1]))  # graph free process 1
print(describe(counts_free[2]))  # graph free process 2

DescribeResult(nobs=500, minmax=(367, 397), mean=380.966, variance=26.485815631262525, skewness=0.13994369244472263, kurtosis=0.02224633502166906)
DescribeResult(nobs=1000, minmax=(368, 403), mean=384.177, variance=32.099770770770775, skewness=0.05793985668671628, kurtosis=0.02372214346417323)
DescribeResult(nobs=1000, minmax=(368, 403), mean=384.138, variance=29.092048048048053, skewness=0.24501947709587263, kurtosis=0.06816466747428596)


In [22]:
# statistics on L(sqrt T)

print(describe((counts_based ** 0.5) * lengths_based )) # graph based
print(describe((counts_free[1] ** 0.5) * lengths_free[1] ))  # graph free process 1
print(describe((counts_free[2] ** 0.5) * lengths_free[2] ))  # graph free process 2

DescribeResult(nobs=500, minmax=(21.60927272174488, 22.528879188945318), mean=22.03509885359079, variance=0.02300926832944089, skewness=0.1357297046350247, kurtosis=0.06549345589638111)
DescribeResult(nobs=1000, minmax=(21.643974041947722, 22.669086061470452), mean=22.125619616516676, variance=0.02735575210970632, skewness=0.037950637750920606, kurtosis=0.008051285554572463)
DescribeResult(nobs=1000, minmax=(21.65623214232296, 22.705496413777496), mean=22.124864955671402, variance=0.025111127806937295, skewness=0.23657585351558258, kurtosis=0.06467974529432974)


In [None]:
simulation_outputs_1 = defaultdict(dict)
simulation_outputs_2 = defaultdict(dict)

for i in range(1, 1001):
    simulation_outputs_1[i] = run_simulation_rej(i * 100, 1)
    simulation_outputs_2[i] = run_simulation_rej(i * 200, 2)
    
lengths_free = {1 : [], 2 : []}
for k in simulation_outputs_1.keys():
    lengths_free[1].append(simulation_outputs_1[k]['length'])
    lengths_free[2].append(simulation_outputs_2[k]['length'])
    
lengths_free[1] = np.array(lengths_free[1])
lengths_free[2] = np.array(lengths_free[2])

counts_free = {1 : [], 2 : []}
for k in simulation_outputs_1.keys():
    counts_free[1].append(simulation_outputs_1[k]['T'])
    counts_free[2].append(simulation_outputs_2[k]['T'])
    
counts_free[1] = np.array(counts_free[1])
counts_free[2] = np.array(counts_free[2])


# statistics on length
const = ell - r(n, D)
print('L_n')
print(describe(lengths_based - const))
print(describe(lengths_free[1] - const))
print(describe(lengths_free[2] - const))

# statistics on T

print('T_n')
print(describe(counts_based)) # graph based
print(describe(counts_free[1]))  # graph free process 1
print(describe(counts_free[2]))  # graph free process 2

# statistics on L(sqrt T)

print('asymp')
print(describe((counts_based ** 0.5) * lengths_based )) # graph based
print(describe((counts_free[1] ** 0.5) * lengths_free[1] ))  # graph free process 1
print(describe((counts_free[2] ** 0.5) * lengths_free[2] ))  # graph free process 2