In [1]:
import pandas as pd
import numpy as np
import itertools
import random
import ast
import matplotlib
import matplotlib.pyplot as plt
from scipy.stats import spearmanr, wilcoxon, friedmanchisquare, kendalltau
from matplotlib import animation
from mpl_toolkits import mplot3d
from scipy.spatial import distance
from pyxdameraulevenshtein import damerau_levenshtein_distance

pd.set_option('display.max_rows', 500)

In [59]:
df = pd.read_csv('all_task_environments_new.csv', header=0, skiprows=[1])

In [144]:
objects = ['t', 'n', 's', 'p', 'c']

coordinates = {'c': (1,4,4),
              'n': (1,1,2),
              'p': (1,4,4),
              's': (1,2,2),
              't': (1,1,2),
              'start': (2,1,2),
              'table': (4,3,2)}

c1 = {'c': 1.5,
    'n': 1.0,
    'p': 1.5,
    's': 1.5,
    't': 1.0}

k1 = {'c': 1.0,
    'n': 0.4,
    'p': 0.4,
    's': 1.0,
    't': 0.3}

start_coordinates = [[2,1.5,2],[4,3,2],[4,3,2],[4,3,2],[4,3,2]]

In [3]:
def predict_sequence(objects, coordinates, start_coordinates, c, k, dimension=[3,], steps=1):
    prediction = []
    possible_items = dict.fromkeys(objects, 0) # generate dict from object list
    coord_index = 0
    start_coords = start_coordinates
    coords = coordinates
    new_coords = {}
    new_start_coords = []
    
    if dimension[0] == 3: # no changes if 3D
        new_coords = coords
        new_start_coords = start_coords
        
    elif dimension[0] == 2: # 2D: remove obsolete coordinate
        if dimension[1] == 'xy':
            new_coords = {key: value[:-1] for key, value in coords.items()}
            new_start_coords = [x[:-1] for x in start_coords]
                
        elif dimension[1] == 'xz':
            new_start_coords = [[x[0], x[-1]] for x in start_coords]
            
            for key, value in coords.items():
                new_value = (value[0], value[-1])
                new_coords[key] = new_value
        
        elif dimension[1] == 'yz':
            new_coords = {key: value[1:] for key, value in coords.items()}
            new_start_coords = [x[1:] for x in start_coords]
                
    elif dimension[0] == 1: # 1D: choose appropriate coordinate
        if dimension[1] == 'x':
            new_coords = {key: value[0] for key, value in coords.items()}
            new_start_coords = [x[0] for x in start_coords]
        
        elif dimension[1] == 'y':
            new_coords = {key: value[1] for key, value in coords.items()}
            new_start_coords = [x[1] for x in start_coords]
        
        elif dimension[1] == 'z':
            new_coords = {key: value[2] for key, value in coords.items()}
            new_start_coords = [x[2] for x in start_coords]
    
    while bool(possible_items) == True: # while dict not empty
        for obj in possible_items.keys():
            possible_items[obj] = ((distance.euclidean(
                                new_start_coords[coord_index], 
                                new_coords[obj])
                                ) ** k[obj]) * c[obj]
        
        #print(possible_items)                      
        minval = min(possible_items.values())
        minval = [k for k, v in possible_items.items() if v == minval]
        minval = random.choice(minval) # choose prediction randomly if multiple items have same cost
        #print(minval)
        del possible_items[minval]
        
        if steps == 2 and bool(possible_items) == True:
            for obj in possible_items.keys():
                possible_items[obj] = ((distance.euclidean(
                                    new_coords[minval],
                                    new_coords[obj])
                                    )
                                    ** k[obj]) * c[obj]
            #print(possible_items)
            minval_step2 = min(possible_items.values())
            minval_step2 = [k for k, v in possible_items.items() if v == minval_step2]
            minval_step2 = random.choice(minval_step2) # choose prediction randomly if multiple items have same cost
            #print(minval_step2 + '____')
            del possible_items[minval_step2]
            prediction.append(''.join([minval, minval_step2]))
            coord_index += 1
            
        else:
            prediction.append(minval)
          
        coord_index += 1
        
    return prediction

In [146]:
predict_sequence(objects, coordinates, start_coordinates, c1, k1, steps=2)

['tn', 'pc', 's']

In [4]:
def get_average(objects, coordinates, start_coordinates, c, k, dimension, sequence, steps=1):

    edit_list = []

    for x in range(0,100):
        result = ''.join(predict_sequence(objects, coordinates, start_coordinates, c, k, dimension))
        dl = damerau_levenshtein_distance(sequence,result)
        edit_list.append(dl)
    
    avg = np.mean(edit_list)
    return avg

In [5]:
def get_avg_editdist(data, steps=1):
    results = pd.DataFrame()
    #dimensions = [[1,'x'],[1,'y'],[1,'z'],[2,'xy'],[2,'xz'],[2,'yz'],[3,'xyz']]
    dimensions = [[3,'xyz']]
    
    for row in range(0,len(df)):
        objects = list(df.at[row,'objects'].split(','))
        strong_k = list(df.at[row,'strong_k'].split(','))
        mid_k = list(df.at[row,'mid_k'].split(','))
        coordinates = {key: ast.literal_eval(value) for key, value in (elem.split(': ') for elem in df.at[row,'coordinates'].split(';'))}
        start_coordinates = list(ast.literal_eval(df.at[row, 'start_coordinates']))
        sequence = str(df.at[row,'sequence'])
        
        #for k in np.arange(0.0,1.0,0.1):
        for k in np.arange(0.1,0.9,0.1):
            k_strong = round(k,2)
            k_mid = round(k + 0.1,2)
            k1 = {obj: k_strong if obj in strong_k else k_mid if obj in mid_k else 1.0 for obj in objects}
                    
            for c in np.arange(1.1,2.0,0.1):
                c = round(c, 1)
                c1 = {obj: c if obj in df.at[row, 'containment'] else 1.0 for obj in objects}
                
                for dim in dimensions:                
                    # get average edit distance
                    edit_dist = get_average(objects, coordinates, start_coordinates, c1, k1, dim, sequence)
                    edit_dist = edit_dist / len(sequence)
                    
                    #params = 'c: ' + str(c) + ', k: ' + str(k_strong) + ',' + str(k_mid) + ', dim: ' + str(dim[1])
                    params = 'c: ' + str(c) + '; k: ' + str(k_strong) + ',' + str(k_mid) + '; ' + str(dim[1])
                    results.at[row,params] = edit_dist
                    
    return results

### Generate predictions for 1 and 2 steps of planning

In [60]:
#%%timeit -n1 -r1
# generate data for 2 steps of planning
results_new = get_avg_editdist(df, steps=2)

In [61]:
results_new

Unnamed: 0,"c: 1.1; k: 0.1,0.2; xyz","c: 1.2; k: 0.1,0.2; xyz","c: 1.3; k: 0.1,0.2; xyz","c: 1.4; k: 0.1,0.2; xyz","c: 1.5; k: 0.1,0.2; xyz","c: 1.6; k: 0.1,0.2; xyz","c: 1.7; k: 0.1,0.2; xyz","c: 1.8; k: 0.1,0.2; xyz","c: 1.9; k: 0.1,0.2; xyz","c: 1.1; k: 0.2,0.3; xyz",...,"c: 1.9; k: 0.7,0.8; xyz","c: 1.1; k: 0.8,0.9; xyz","c: 1.2; k: 0.8,0.9; xyz","c: 1.3; k: 0.8,0.9; xyz","c: 1.4; k: 0.8,0.9; xyz","c: 1.5; k: 0.8,0.9; xyz","c: 1.6; k: 0.8,0.9; xyz","c: 1.7; k: 0.8,0.9; xyz","c: 1.8; k: 0.8,0.9; xyz","c: 1.9; k: 0.8,0.9; xyz"
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2
1,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,...,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2
3,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.333333,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,...,0.0,0.333333,0.333333,0.333333,0.333333,0.333333,0.333333,0.333333,0.333333,0.333333
5,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,...,0.6,0.8,0.8,0.8,0.8,0.8,0.8,0.8,0.8,0.8
6,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5
7,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.333333,0.333333,0.333333,0.333333,0.333333,0.333333,0.333333,0.333333,0.333333,0.333333,...,0.333333,0.333333,0.333333,0.333333,0.333333,0.333333,0.333333,0.333333,0.333333,0.333333


In [62]:
# generate data for 1 step of planning
results1 = get_avg_editdist(df, steps=1)

In [9]:
def get_lowest_error(results):
    for col in list(results):
        results.loc['mean',col] = results[col].mean()
        results.loc['median',col] = results[col].median()
    lowest = min(results.loc['mean'])
    lowest_median = min(results.loc['median'])
    mean = list(results.loc['mean'])
    
    return lowest, results.columns[(results.loc['mean'] == lowest)], mean, results

### Compare results

In [65]:
# results for 2 steps
lowest, lowest_idx, list_mean, results_mean = get_lowest_error(results_new)
lowest, lowest_idx, results_mean['c: 1.7; k: 0.5,0.6; xyz']['median']

(0.41439191595441593, Index(['c: 1.7; k: 0.5,0.6; xyz'], dtype='object'), 0.4)

In [66]:
# results for 1 step
lowest1, lowest_idx1, list_mean1, results_mean1 = get_lowest_error(results1)
lowest1, lowest_idx1, results_mean1['c: 1.9; k: 0.5,0.6; xyz']['median']

(0.4158323158323159, Index(['c: 1.9; k: 0.5,0.6; xyz'], dtype='object'), 0.4)

In [67]:
# results for 2 steps
list_xyz = []
median_xyz = []

for col in results_mean:
    if col.split(';')[2].strip() == 'xyz':
        list_xyz.append(results_mean.at['mean',col])
        median_xyz.append(results_mean.at['median',col])
        
avg_xyz = np.mean(list_xyz)
med_xyz = np.median(median_xyz)
print('Average xyz: ' + str(avg_xyz) + ', median: ' + str(med_xyz))

Average xyz: 0.44312787726676617, median: 0.4


In [68]:
# results for 1 step
list_xyz1 = []
median_xyz1 = []

for col in results_mean1:
    if col.split(';')[2].strip() == 'xyz':
        list_xyz1.append(results_mean1.at['mean',col])
        median_xyz1.append(results_mean1.at['median',col])
        
avg_xyz1 = np.mean(list_xyz1)
med_xyz1 = np.median(median_xyz1)
print('Average xyz: ' + str(avg_xyz1) + ', median: ' + str(med_xyz1))

Average xyz: 0.4429239442720346, median: 0.4


In [69]:
avg_1step = 0
avg_2steps = 0
tie = 0

for i in range(0,len(list_mean)):
    if list_mean[i] > list_mean1[i]:
        avg_1step += 1
    elif list_mean[i] < list_mean1[i]:
        avg_2steps +=1
    elif list_mean[i] == list_mean1[i]:
        tie += 1

print(avg_1step, avg_2steps, tie)

39 33 0


### Stat analysis

In [70]:
# compare 1 vs 2 steps: mean + median

stat, p = wilcoxon(list_xyz, list_xyz1, zero_method='wilcox')
print('Wilcoxon for xyz: T = %.3f, p = %.3f' % (stat, p))

stat, p = wilcoxon(median_xyz, median_xyz1, zero_method='wilcox')
print('Wilcoxon for xyz median: T = %.3f, p = %.3f' % (stat, p))

Wilcoxon for xyz: T = 1114.000, p = 0.262
Wilcoxon for xyz median: T = 46.000, p = 0.049


### Plot 1 vs 2 steps (xyz)

In [27]:
# get data for plot (results for both)
c2 = [float(x[3:6]) for x in results_new.columns.tolist()]
k2 = [float(x[11:14]) for x in results_new.columns.tolist()]
error2 = results_mean.loc['mean'].tolist()
mean2 = results_mean.loc['median'].tolist()

c1 = [float(x[3:6]) for x in results1.columns.tolist()]
k1 = [float(x[11:14]) for x in results1.columns.tolist()]
error1 = results_mean1.loc['mean'].tolist()
mean1 = results_mean1.loc['median'].tolist()

In [58]:
%matplotlib qt

# create figure, 3d grid, set background to white
fig2 = plt.figure(figsize=(12,8))
ax2 = fig2.add_subplot(111, projection='3d')
ax2.w_xaxis.set_pane_color((1.0,1.0,1.0,1.0))
ax2.w_yaxis.set_pane_color((1.0,1.0,1.0,1.0))
ax2.w_zaxis.set_pane_color((1.0,1.0,1.0,1.0))

# create plot
step1 = ax2.scatter(c1, k1, error1, alpha=0.5, s=38, c='blue')
step2 = ax2.scatter(c2, k2, error2, alpha=0.5, s=38, c='red')

# set labels
ax2.set_ylabel('parameter k', fontsize=20, labelpad=10)
ax2.set_xlabel('parameter c', fontsize=20, labelpad=10)
ax2.set_zlabel('normalized edit distance', fontsize=20, labelpad=7)

# plot horizontal plane for baseline
#xx, yy = np.meshgrid(np.linspace(1,1.9), np.linspace(0,0.9))
#zz = xx * 0 + 0.603
#ax2.plot_surface(xx, yy, zz, alpha=0.5)

ax2.legend(loc=(0.5,0.5,0.5), frameon=0)

# add legend
plt.legend((step1, step2),
           ('1 step', '2 steps'),
           scatterpoints=4,
           fontsize=18,
           loc='lower left', fancybox=True, framealpha=0.2)

plt.show()

No handles with labels found to put in legend.


### Plot model for all dimensions (2 steps)

In [19]:
# get data for plot (results for 2 steps)
c = [float(x[3:6]) for x in results_new.columns.tolist()]
k = [float(x[11:14]) for x in results_new.columns.tolist()]
dim = [x.strip() for x in results_new.columns.str.split(';').str[2]]
error = results_mean.loc['mean'].tolist()

In [21]:
# create plot (results for 2 steps)

%matplotlib qt

cm = ['red','blue','green','magenta','cyan','orange','grey']
dim_num = [0 if x=='x' else 1 if x=='y' else 2 if x=='z' else 3 if x=='xy' else 4 if x=='xz' else 5 if x=='yz' else 6 for x in dim]

cmap = matplotlib.colors.ListedColormap(cm)

ticks = ['x', 'y', 'z', 'xy', 'xz', 'yz', 'xyz']
norm = matplotlib.colors.BoundaryNorm(ticks, cmap.N)

# create figure, 3d grid, set background to white
fig2 = plt.figure(figsize=(12,8))
ax2 = fig2.add_subplot(111, projection='3d')
ax2.w_xaxis.set_pane_color((1.0,1.0,1.0,1.0))
ax2.w_yaxis.set_pane_color((1.0,1.0,1.0,1.0))
ax2.w_zaxis.set_pane_color((1.0,1.0,1.0,1.0))

# create plot
img = ax2.scatter(c, k, error, alpha=0.5, s=38, c=dim_num, cmap=cmap)

# set labels
ax2.set_ylabel('parameter k', fontsize=14, labelpad=7)
ax2.set_xlabel('parameter c', fontsize=14, labelpad=7)
ax2.set_zlabel('normalized edit distance', fontsize=14, labelpad=7)
plt.title('Average edit distance (2 steps)', fontsize=16)

# create colorbar
cb = plt.colorbar(img, cax = fig2.add_axes([0.9,0.3,0.03,0.4]))
cb.ax.set_yticklabels(ticks)
plt.show()

In [281]:
# generate rotating image

fig3 = plt.figure(figsize=(12,8))
ax3 = mplot3d.Axes3D(fig3)
ax3.w_xaxis.set_pane_color((1.0,1.0,1.0,1.0))
ax3.w_yaxis.set_pane_color((1.0,1.0,1.0,1.0))
ax3.w_zaxis.set_pane_color((1.0,1.0,1.0,1.0))
ax3.set_ylabel('parameter k', fontsize=14, labelpad=7)
ax3.set_xlabel('parameter c', fontsize=14, labelpad=7)
ax3.set_zlabel('normalized edit distance', fontsize=14, labelpad=7)

def init():
    ax3.scatter(c, k, error, alpha=0.5, s=38, c=dim_num, cmap=cmap)
    
    return fig,

def animate(i):
    ax3.view_init(elev=10, azim=i*4)
    return fig,

animated = animation.FuncAnimation(fig3, animate, init_func=init, frames=90, interval=50, blit=True)

In [284]:
# save animated fig

plt.rcParams['animation.ffmpeg_path'] = '/usr/bin/ffmpeg'

fig_name = 'rotate_model_lowesterror_3D'
#animated.save(fig_name+'.gif', writer='imagemagick', fps=1000/50)

Writer = animation.writers['ffmpeg']
writer = Writer(fps=5, bitrate=10000)
animated.save(fig_name+'.mp4', writer=writer)