In [288]:
import pandas as pd
import numpy as np
import itertools
import random
import ast
import plotly
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
from matplotlib import animation
from mpl_toolkits import mplot3d
from scipy.spatial import distance
from pyxdameraulevenshtein import damerau_levenshtein_distance

pd.set_option('display.max_rows', 500)

In [251]:
df = pd.read_csv('all_task_environments.csv', header=0, skiprows=[1])

In [6]:
def predict_sequence(objects, coordinates, start_coordinates, c, k, dimension=[3,]):
    prediction = []
    possible_items = dict.fromkeys(objects, 0) # generate dict from object list
    coord_index = 0
    start_coords = start_coordinates
    coords = coordinates
    new_coords = {}
    new_start_coords = []
    
    if dimension[0] == 3: # no changes if 3D
        new_coords = coords
        new_start_coords = start_coords
        
    elif dimension[0] == 2: # 2D: remove obsolete coordinate
        if dimension[1] == 'xy':
            new_coords = {key: value[:-1] for key, value in coords.items()}
            new_start_coords = [x[:-1] for x in start_coords]
                
        elif dimension[1] == 'xz':
            new_start_coords = [[x[0], x[-1]] for x in start_coords]
            
            for key, value in coords.items():
                new_value = (value[0], value[-1])
                new_coords[key] = new_value
        
        elif dimension[1] == 'yz':
            new_coords = {key: value[1:] for key, value in coords.items()}
            new_start_coords = [x[1:] for x in start_coords]
                
    elif dimension[0] == 1: # 1D: choose appropriate coordinate
        if dimension[1] == 'x':
            new_coords = {key: value[0] for key, value in coords.items()}
            new_start_coords = [x[0] for x in start_coords]
        
        elif dimension[1] == 'y':
            new_coords = {key: value[1] for key, value in coords.items()}
            new_start_coords = [x[1] for x in start_coords]
        
        elif dimension[1] == 'z':
            new_coords = {key: value[2] for key, value in coords.items()}
            new_start_coords = [x[2] for x in start_coords]
    
    while bool(possible_items) == True: # while dict not empty
        for obj in possible_items.keys():
            possible_items[obj] = ((distance.euclidean(
                                new_start_coords[coord_index], 
                                new_coords[obj])
                                ) ** k[obj]) * c[obj]
        #print(possible_items)                      
        minval = min(possible_items.values())
        minval = [k for k, v in possible_items.items() if v == minval]
        minval = random.choice(minval) # choose prediction randomly if multiple items have same cost
        prediction.append(minval)
        del possible_items[minval]
        coord_index += 1
        
    return prediction

In [7]:
def get_average(objects, coordinates, start_coordinates, c, k, dimension, sequence):

    edit_list = []

    for x in range(0,100):
        result = ''.join(predict_sequence(objects, coordinates, start_coordinates, c, k, dimension))
        dl = damerau_levenshtein_distance(sequence,result)
        edit_list.append(dl)
    
    avg = np.mean(edit_list)
    return avg

In [244]:
def get_avg_editdist(data):
    results = pd.DataFrame()
    dimensions = [[1,'x'],[1,'y'],[1,'z'],[2,'xy'],[2,'xz'],[2,'yz'],[3,'xyz']]
        
    for row in range(0,len(df)):
        objects = list(df.at[row,'objects'].split(','))
        strong_k = list(df.at[row,'strong_k'].split(','))
        mid_k = list(df.at[row,'mid_k'].split(','))
        coordinates = {key: ast.literal_eval(value) for key, value in (elem.split(': ') for elem in df.at[row,'coordinates'].split(';'))}
        start_coordinates = list(ast.literal_eval(df.at[row, 'start_coordinates']))
        sequence = str(df.at[row,'sequence'])
        
        for k in np.arange(0.0,1.0,0.1):
            k_strong = round(k,2)
            k_mid = round(k + 0.05,2)
            k1 = {obj: k_strong if obj in strong_k else k_mid if obj in mid_k else 1.0 for obj in objects}
                    
            for c in np.arange(1.0,2.0,0.1):
                c = round(c, 1)
                c1 = {obj: c if obj in df.at[row, 'containment'] else 1.0 for obj in objects}
                
                for dim in dimensions:                
                    # get average edit distance
                    edit_dist = get_average(objects, coordinates, start_coordinates, c1, k1, dim, sequence)
                    edit_dist = edit_dist / len(sequence)
                    
                    #params = 'c: ' + str(c) + ', k: ' + str(k_strong) + ',' + str(k_mid) + ', dim: ' + str(dim[1])
                    params = 'c: ' + str(c) + '; k: ' + str(k_strong) + ',' + str(k_mid) + '; ' + str(dim[1])
                    results.at[row,params] = edit_dist
                    
    return results

#TODO:
# range for diff between strong/mid k?
# range for c and k in general?

In [252]:
#%%timeit -n1 -r1
results_new = get_avg_editdist(df)

In [253]:
results_new

Unnamed: 0,"c: 1.0; k: 0.0,0.05; x","c: 1.0; k: 0.0,0.05; y","c: 1.0; k: 0.0,0.05; z","c: 1.0; k: 0.0,0.05; xy","c: 1.0; k: 0.0,0.05; xz","c: 1.0; k: 0.0,0.05; yz","c: 1.0; k: 0.0,0.05; xyz","c: 1.1; k: 0.0,0.05; x","c: 1.1; k: 0.0,0.05; y","c: 1.1; k: 0.0,0.05; z",...,"c: 1.8; k: 0.9,0.95; xz","c: 1.8; k: 0.9,0.95; yz","c: 1.8; k: 0.9,0.95; xyz","c: 1.9; k: 0.9,0.95; x","c: 1.9; k: 0.9,0.95; y","c: 1.9; k: 0.9,0.95; z","c: 1.9; k: 0.9,0.95; xy","c: 1.9; k: 0.9,0.95; xz","c: 1.9; k: 0.9,0.95; yz","c: 1.9; k: 0.9,0.95; xyz"
0,0.4,0.756,0.716,0.47,0.212,0.296,0.21,0.184,0.534,0.696,...,0.304,0.332,0.108,0.202,0.674,0.484,0.31,0.284,0.318,0.106
1,0.41,0.712,0.692,0.332,0.342,0.696,0.33,0.216,0.698,0.7,...,0.49,0.548,0.506,0.188,0.538,0.534,0.514,0.498,0.554,0.508
2,0.38,0.712,0.704,0.216,0.194,0.698,0.19,0.2,0.7,0.71,...,0.308,0.482,0.292,0.188,0.5,0.456,0.294,0.31,0.476,0.314
3,0.566,0.4,0.4,0.342,0.33,0.4,0.332,0.382,0.4,0.4,...,0.086,0.27,0.078,0.42,0.282,0.284,0.112,0.096,0.28,0.102
4,0.323333,0.666667,0.666667,0.51,0.333333,0.666667,0.333333,0.0,0.666667,0.333333,...,0.0,0.486667,0.333333,0.0,0.53,0.666667,0.486667,0.0,0.473333,0.333333
5,0.696,0.518,0.8,0.596,0.4,0.3,0.4,0.718,0.3,0.8,...,0.4,0.8,0.8,0.65,0.4,0.8,0.628,0.4,0.8,0.8
6,0.7875,0.7075,0.8075,1.0,0.775,1.0,1.0,0.72,0.82,0.74,...,0.715,0.5,0.5,0.765,0.5,0.77,0.5,0.76,0.5,0.5
7,0.805,1.0,0.695,1.0,0.77,1.0,1.0,1.0,1.0,0.7125,...,1.0,1.0,1.0,1.0,1.0,0.7125,1.0,1.0,1.0,1.0
8,0.173333,0.293333,0.47,0.0,0.18,0.366667,0.0,0.163333,0.266667,0.423333,...,0.156667,0.32,0.0,0.17,0.346667,0.463333,0.0,0.153333,0.32,0.0
9,0.163333,0.54,0.176667,0.333333,0.196667,0.333333,0.333333,0.156667,0.503333,0.203333,...,0.173333,0.333333,0.333333,0.176667,0.496667,0.176667,0.333333,0.176667,0.333333,0.333333


In [75]:
def get_lowest_error(results):
    for col in list(results):
        results.loc['mean',col] = results[col].mean()
    lowest = min(results.loc['mean'])
    
    return lowest, results.columns[(results.loc['mean'] == lowest)], results

In [254]:
lowest, lowest_idx, results_mean = get_lowest_error(results_new)
lowest, lowest_idx

(0.43196858465608456, Index(['c: 1.5; k: 0.2,0.25; xy'], dtype='object'))

In [255]:
c = [float(x[3:6]) for x in results_new.columns.tolist()]
k = [float(x[11:14]) for x in results_new.columns.tolist()]
dim = [x.strip() for x in results_new.columns.str.split(';').str[2]]
error = results_mean.loc['mean'].tolist()


In [249]:
fig = plt.figure(figsize=(12,8))
ax = plt.axes(projection='3d')

ax.plot_trisurf(c, k, error, linewidth=0.2)
ax.set_ylabel('k', fontsize=14, labelpad=7)
ax.set_xlabel('c', fontsize=14, labelpad=7)
plt.title('Average edit distance', fontsize=16)
plt.show()

In [313]:
#%matplotlib qt

cm = ['red','blue','green','magenta','cyan','orange','grey']
dim_num = [0 if x=='x' else 1 if x=='y' else 2 if x=='z' else 3 if x=='xy' else 4 if x=='xz' else 5 if x=='yz' else 6 for x in dim]

cmap = matplotlib.colors.ListedColormap(cm)

ticks = ['x', 'y', 'z', 'xy', 'xz', 'yz', 'xyz']
norm = matplotlib.colors.BoundaryNorm(ticks, cmap.N)

# create figure, 3d grid, set background to white
fig2 = plt.figure(figsize=(12,8))
ax2 = fig2.add_subplot(111, projection='3d')
ax2.w_xaxis.set_pane_color((1.0,1.0,1.0,1.0))
ax2.w_yaxis.set_pane_color((1.0,1.0,1.0,1.0))
ax2.w_zaxis.set_pane_color((1.0,1.0,1.0,1.0))

# create plot
img = ax2.scatter(c, k, error, alpha=0.5, s=38, c=dim_num, cmap=cmap)

# set labels
ax2.set_ylabel('parameter k', fontsize=14, labelpad=7)
ax2.set_xlabel('parameter c', fontsize=14, labelpad=7)
ax2.set_zlabel('normalized edit distance', fontsize=14, labelpad=7)
plt.title('Average edit distance', fontsize=16)

# create colorbar
cb = plt.colorbar(img, cax = fig2.add_axes([0.9,0.3,0.03,0.4]))
cb.ax.set_yticklabels(ticks)
plt.show()

In [281]:
# generate rotating image

fig3 = plt.figure(figsize=(12,8))
ax3 = mplot3d.Axes3D(fig3)
ax3.w_xaxis.set_pane_color((1.0,1.0,1.0,1.0))
ax3.w_yaxis.set_pane_color((1.0,1.0,1.0,1.0))
ax3.w_zaxis.set_pane_color((1.0,1.0,1.0,1.0))
ax3.set_ylabel('parameter k', fontsize=14, labelpad=7)
ax3.set_xlabel('parameter c', fontsize=14, labelpad=7)
ax3.set_zlabel('normalized edit distance', fontsize=14, labelpad=7)

def init():
    ax3.scatter(c, k, error, alpha=0.5, s=38, c=dim_num, cmap=cmap)
    
    return fig,

def animate(i):
    ax3.view_init(elev=10, azim=i*4)
    return fig,

animated = animation.FuncAnimation(fig3, animate, init_func=init, frames=90, interval=50, blit=True)

In [284]:
# save animated fig

plt.rcParams['animation.ffmpeg_path'] = '/usr/bin/ffmpeg'

fig_name = 'rotate_model_lowesterror_3D'
#animated.save(fig_name+'.gif', writer='imagemagick', fps=1000/50)

Writer = animation.writers['ffmpeg']
writer = Writer(fps=5, bitrate=10000)
animated.save(fig_name+'.mp4', writer=writer)