In [1]:
import sys
sys.executable

'/usr/local/opt/python/bin/python3.7'

In [2]:
import numpy as np
import pandas as pd
from scipy.spatial import KDTree
from scipy.stats import uniform, expon, poisson, describe
import math

import matplotlib.pyplot as plt

from copy import deepcopy
from collections import defaultdict

from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

import pickle

In [3]:
n = 5000000
D = 2

ell = 0.8 * np.sqrt(2)

x_init = np.array([0, 0])
x_goal = np.array([ell, 0])

In [4]:
# r_n function
def r(n, D):
    return (n ** (-1/(2*D))) / 5
    # designed for n = 5 * 10^6

In [23]:
ell / r(n ,D)

267.49612199056884

In [5]:
filename = 'slurm scripts/data/08-14-20/path_points/'

points = []
distances = []
lengths_based = []
counts = []

T_n_indices = defaultdict(list)

for i in range(1,501):
    # something weird happened, if i is 0 mod 50 the file doesn't exist...
    #if i % 50 == 0:
    #    continue
    
    f = open(filename+str(i)+'-dim2-n5000000_pathpoints.pkl', "rb")
    pathpoints = pickle.load(f)
    points.append(pathpoints)
    s_n = 0
    for j in range(len(pathpoints)-2):
        s_n += np.linalg.norm(pathpoints[j+1] - pathpoints[j])
    lengths_based.append(s_n)
    distances.append(s_n + np.linalg.norm(pathpoints[-1] - pathpoints[-2]))
    counts.append(len(pathpoints)-2)
    T_n_indices[len(pathpoints)-2].append(i)
    
    if pathpoints[-1][0] != 0.9 or pathpoints[-1][1] != 0.9:
        print(i)

        
distances = np.array(distances)
lengths_based = np.array(lengths_based)
counts_based = np.array(counts)

In [19]:
len(lengths_based)

500

In [11]:
(r(n,D) ** 2) * math.pi * n

280.9925892416291

In [12]:
def run_simulation(seed, process=1):
    np.random.seed(seed) 
    
    results = defaultdict(lambda: float('inf'))
    results['seed'] = seed
    
    X = [x_init[0]]
    Y = [x_init[1]]
    
    lmbda = (n * (r(n, D) ** 2) * math.pi)

    R = [None]
    Theta = [None]
    S = [None]
    Gamma = [0]

    t = 0
    while np.linalg.norm(np.array([X[-1], Y[-1]]) - x_goal) > r(n, D) and t < 2000:
        t += 1 # timestep index

        # three samples for each time step
        R.append(r(n, D) * (np.random.uniform() ** 0.5))
        S.append(np.random.choice(a=[-1, 1]))
        if process == 1:
            Theta.append((math.pi / lmbda) * np.random.exponential())   # uses asymptotics on the # of points in a ball explicitly
        elif process == 2:
            n_B = np.random.poisson((r(n, D) ** 2) * math.pi * n)       # simulates the number of points in a ball, leading to a mixture of exponentials
            Theta.append((math.pi / n_B) * np.random.exponential())
                                   
        # now we can determine the rest
        X.append(X[t-1] + R[t] * np.cos(Gamma[t-1] - Theta[t] * S[t]))
        Y.append(Y[t-1] + R[t] * np.sin(Gamma[t-1] - Theta[t] * S[t]))

        g = np.arcsin((R[t] / np.linalg.norm(np.array([X[t], Y[t]]) - x_goal)) * np.sin(Theta[t] * S[t]))
        Gamma.append(Gamma[t-1] + g)
        
    if t < 1000:
        results['T'] = t
        results['length'] = sum(R[1:])
        results['last_point'] = (X[-1], Y[-1])
        results['distance_to_goal'] = np.linalg.norm(np.array([X[-1], Y[-1]]) - x_goal) 
        results['R'] = R
        results['Theta'] = Theta
        results['S'] = S
        results['X'] = X
        results['Y'] = Y
        
    return results

In [13]:
simulation_outputs_1 = defaultdict(dict)
simulation_outputs_2 = defaultdict(dict)

for i in range(1, 1001):
    simulation_outputs_1[i] = run_simulation(i * 100, 1)
    simulation_outputs_2[i] = run_simulation(i * 200, 2)

In [9]:
def show_results(idx, key, process):
    if process==1:
        print(simulation_outputs_1[idx][key])
    else:
        print(simulation_outputs_2[idx][key])

interact(show_results, idx=widgets.IntSlider(min=1, max=1000, step=1, value=1), key=widgets.Dropdown(options=simulation_outputs_1[1].keys()), process=widgets.Dropdown(options=[1,2]))

interactive(children=(IntSlider(value=1, description='idx', max=1000, min=1), Dropdown(description='key', opti…

<function __main__.show_results(idx, key, process)>

In [14]:
lengths_free = {1 : [], 2 : []}
for k in simulation_outputs_1.keys():
    lengths_free[1].append(simulation_outputs_1[k]['length'])
    lengths_free[2].append(simulation_outputs_2[k]['length'])
    
lengths_free[1] = np.array(lengths_free[1])
lengths_free[2] = np.array(lengths_free[2])

counts_free = {1 : [], 2 : []}
for k in simulation_outputs_1.keys():
    counts_free[1].append(simulation_outputs_1[k]['T'])
    counts_free[2].append(simulation_outputs_2[k]['T'])
    
counts_free[1] = np.array(counts_free[1])
counts_free[2] = np.array(counts_free[2])

In [7]:
# statistics on length

print(describe(lengths_based - const))
print(describe(lengths_free[1] - const))
print(describe(lengths_free[2] - const))

DescribeResult(nobs=500, minmax=(0.00017891251733326285, 0.00422558055945399), mean=0.001826468253166701, variance=1.0785253729415668e-06, skewness=0.2915675690835668, kurtosis=-0.7972677266609938)


NameError: name 'lengths_free' is not defined

In [29]:
print(np.median(lengths_based - const))
print(np.median(lengths_free[1] - const))
print(np.median(lengths_free[2] - const))

0.001814303273684259
0.0016364237807000581
0.0016712247163491734


In [21]:
# statistics on T

print(describe(counts_based)) # graph based
print(describe(counts_free[1]))  # graph free process 1
print(describe(counts_free[2]))  # graph free process 2

DescribeResult(nobs=500, minmax=(367, 397), mean=380.966, variance=26.485815631262525, skewness=0.13994369244472263, kurtosis=0.02224633502166906)
DescribeResult(nobs=1000, minmax=(377, 425), mean=400.383, variance=53.33965065065066, skewness=0.1822804910453342, kurtosis=-0.009043922347493183)
DescribeResult(nobs=1000, minmax=(383, 430), mean=400.399, variance=47.219018018018026, skewness=0.3488901008104228, kurtosis=0.1037307330989039)


In [30]:
print(np.median(counts_based)) # graph based
print(np.median(counts_free[1]))  # graph free process 1
print(np.median(counts_free[2]))  # graph free process 2

381.0
400.0
400.0


In [22]:
# statistics on L(sqrt T)

print(describe((counts_based ** 0.5) * lengths_based )) # graph based
print(describe((counts_free[1] ** 0.5) * lengths_free[1] ))  # graph free process 1
print(describe((counts_free[2] ** 0.5) * lengths_free[2] ))  # graph free process 2

DescribeResult(nobs=500, minmax=(21.60927272174488, 22.528879188945318), mean=22.03509885359079, variance=0.02300926832944089, skewness=0.1357297046350247, kurtosis=0.06549345589638111)
DescribeResult(nobs=1000, minmax=(21.89789043140138, 23.245437519822417), mean=22.587674817442725, variance=0.042659445852897215, skewness=0.13874358844795132, kurtosis=0.023272869662248397)
DescribeResult(nobs=1000, minmax=(22.0866096453326, 23.39938857682867), mean=22.588416870455863, variance=0.038297549615985964, skewness=0.30022848640342853, kurtosis=0.00667076243875897)


In [6]:
const = ell - r(n, D)