In [160]:
import pandas as pd
import numpy as np
import itertools
import random
import ast
from scipy.spatial import distance
from pyxdameraulevenshtein import damerau_levenshtein_distance

pd.set_option('display.max_rows', 500)

In [468]:
df = pd.read_csv('all_task_environments.csv', header=0, skiprows=[1])

In [494]:
def get_avg_editdist(data):
    results = pd.DataFrame()
    dimensions = [[1,'x'],[1,'y'],[1,'z'],[2,'xy'],[2,'xz'],[2,'yz'],[3,'xyz']]
        
    for row in range(0,len(df)):
        objects = list(df.at[row,'objects'].split(','))
        strong_k = list(df.at[row,'strong_k'].split(','))
        mid_k = list(df.at[row,'mid_k'].split(','))
        coordinates = {key: ast.literal_eval(value) for key, value in (elem.split(': ') for elem in df.at[row,'coordinates'].split(';'))}
        start_coordinates = list(ast.literal_eval(df.at[row, 'start_coordinates']))
        sequence = str(df.at[row,'sequence'])
        
        for k in np.arange(0.0,1.0,0.1):
            k_strong = round(k,2)
            k_mid = round(k + 0.05,2)
            k1 = {obj: k_strong if obj in strong_k else k_mid if obj in mid_k else 1.0 for obj in objects}
                    
            for c in np.arange(1.0,2.0,0.1):
                c = round(c, 1)
                c1 = {obj: c if obj in df.at[row, 'containment'] else 1.0 for obj in objects}
                
                for dim in dimensions:                
                    # get average edit distance
                    edit_dist = get_average(objects, coordinates, start_coordinates, c1, k1, dim, sequence)
                    
                    params = 'c: ' + str(c) + ', k: ' + str(k_strong) + ',' + str(k_mid) + ', dim: ' + str(dim[1])
                    results.at[row,params] = edit_dist
                    
    return results

#TODO:
# fix problem with xz
# range for diff between strong/mid k?
# range for c and k in general?

In [533]:
%%timeit -n1 -r1
results_new = get_avg_editdist(df)

5min 24s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [534]:
results_new

Unnamed: 0,"c: 1.0, k: 0.0,0.05, dim: x","c: 1.0, k: 0.0,0.05, dim: y","c: 1.0, k: 0.0,0.05, dim: z","c: 1.0, k: 0.0,0.05, dim: xy","c: 1.0, k: 0.0,0.05, dim: xz","c: 1.0, k: 0.0,0.05, dim: yz","c: 1.0, k: 0.0,0.05, dim: xyz","c: 1.1, k: 0.0,0.05, dim: x","c: 1.1, k: 0.0,0.05, dim: y","c: 1.1, k: 0.0,0.05, dim: z",...,"c: 1.8, k: 0.9,0.95, dim: xz","c: 1.8, k: 0.9,0.95, dim: yz","c: 1.8, k: 0.9,0.95, dim: xyz","c: 1.9, k: 0.9,0.95, dim: x","c: 1.9, k: 0.9,0.95, dim: y","c: 1.9, k: 0.9,0.95, dim: z","c: 1.9, k: 0.9,0.95, dim: xy","c: 1.9, k: 0.9,0.95, dim: xz","c: 1.9, k: 0.9,0.95, dim: yz","c: 1.9, k: 0.9,0.95, dim: xyz"
0,2.06,3.82,3.53,2.38,0.99,1.45,1.0,1.05,2.8,3.48,...,1.53,1.71,0.55,0.98,3.33,2.39,1.53,1.57,1.7,0.5
1,1.85,3.47,3.44,1.7,1.67,3.5,1.66,0.95,3.58,3.55,...,2.5,2.57,2.55,0.87,2.65,2.71,2.51,2.55,2.64,2.48
2,2.04,3.47,3.56,1.01,0.98,3.43,1.14,1.05,3.48,3.45,...,1.4,2.44,1.45,0.93,2.23,2.38,1.51,1.46,2.4,1.45
3,2.78,2.0,2.0,1.72,1.65,2.0,1.67,2.13,2.0,2.0,...,0.52,1.25,0.41,2.0,1.36,1.3,0.52,0.51,1.29,0.55
4,1.01,2.0,2.0,1.57,1.0,2.0,1.0,0.0,2.0,1.0,...,0.0,1.46,1.0,0.0,1.45,2.0,1.55,0.0,1.52,1.0
5,3.46,2.55,4.0,2.82,2.0,1.47,2.0,3.53,1.47,4.0,...,2.0,4.0,4.0,3.28,2.0,4.0,3.08,2.0,4.0,4.0
6,3.09,2.81,3.27,4.0,3.18,4.0,4.0,2.92,3.04,2.8,...,3.02,2.0,2.0,2.94,2.0,2.92,2.0,3.04,2.0,2.0
7,3.0,4.0,2.93,4.0,3.12,4.0,4.0,4.0,4.0,2.93,...,4.0,4.0,4.0,4.0,4.0,2.81,4.0,4.0,4.0,4.0
8,0.5,0.92,1.21,0.0,0.53,0.96,0.0,0.43,0.94,1.34,...,0.44,0.98,0.0,0.48,0.94,1.26,0.0,0.54,1.02,0.0
9,0.5,1.48,0.46,1.0,0.48,1.0,1.0,0.56,1.5,0.4,...,0.52,1.0,1.0,0.51,1.44,0.58,1.0,0.45,1.0,1.0


In [526]:
def get_lowest_error(results):
    for col in list(results):
        results.loc['mean',col] = results[col].mean()
    lowest = min(results.loc['mean'])
    
    return lowest, results.columns[(results.loc['mean'] == lowest)]
    

In [535]:
get_lowest_error(results_new)

(2.4524999999999997, Index(['c: 1.5, k: 0.4,0.45, dim: xy'], dtype='object'))

In [515]:
def predict_sequence(objects, coordinates, start_coordinates, c, k, dimension=[3,]):
    prediction = []
    possible_items = dict.fromkeys(objects, 0) # generate dict from object list
    coord_index = 0
    start_coords = start_coordinates
    coords = coordinates
    new_coords = {}
    new_start_coords = []
    
    if dimension[0] == 3: # no changes if 3D
        new_coords = coords
        new_start_coords = start_coords
        
    elif dimension[0] == 2: # 2D: remove obsolete coordinate
        if dimension[1] == 'xy':
            new_coords = {key: value[:-1] for key, value in coords.items()}
            new_start_coords = [x[:-1] for x in start_coords]
                
        elif dimension[1] == 'xz':
            new_start_coords = [[x[0], x[-1]] for x in start_coords]
            
            for key, value in coords.items():
                new_value = (value[0], value[-1])
                new_coords[key] = new_value
        
        elif dimension[1] == 'yz':
            new_coords = {key: value[1:] for key, value in coords.items()}
            new_start_coords = [x[1:] for x in start_coords]
                
    elif dimension[0] == 1: # 1D: choose appropriate coordinate
        if dimension[1] == 'x':
            new_coords = {key: value[0] for key, value in coords.items()}
            new_start_coords = [x[0] for x in start_coords]
        
        elif dimension[1] == 'y':
            new_coords = {key: value[1] for key, value in coords.items()}
            new_start_coords = [x[1] for x in start_coords]
        
        elif dimension[1] == 'z':
            new_coords = {key: value[2] for key, value in coords.items()}
            new_start_coords = [x[2] for x in start_coords]
    
    while bool(possible_items) == True: # while dict not empty
        for obj in possible_items.keys():
            possible_items[obj] = ((distance.euclidean(
                                new_start_coords[coord_index], 
                                new_coords[obj])
                                ) ** k[obj]) * c[obj]
        #print(possible_items)                      
        minval = min(possible_items.values())
        minval = [k for k, v in possible_items.items() if v == minval]
        minval = random.choice(minval) # choose prediction randomly if multiple items have same cost
        prediction.append(minval)
        del possible_items[minval]
        coord_index += 1
        
    return prediction

In [528]:
def get_average(objects, coordinates, start_coordinates, c, k, dimension, sequence):

    edit_list = []

    for x in range(0,100):
        result = ''.join(predict_sequence(objects, coordinates, start_coordinates, c, k, dimension))
        dl = damerau_levenshtein_distance(sequence,result)
        edit_list.append(dl)
    
    avg = np.mean(edit_list)
    return avg