In [1]:
import numpy as np
import pandas as pd
import itertools
import random
import ast
import matplotlib
import matplotlib.pyplot as plt
from scipy.stats import wilcoxon, friedmanchisquare
from mpl_toolkits import mplot3d
from scipy.spatial import distance
from pyxdameraulevenshtein import damerau_levenshtein_distance

from sklearn.model_selection import ParameterGrid

In [2]:
# read data

df = pd.read_csv('all_task_environments_new.csv', header=0, skiprows=[1])

In [3]:
c = [round(x,2) for x in np.arange(1.1,2.0,0.1)]
k_strong = [round(x,2) for x in np.arange(0.1,0.9,0.1)]
k_medium = [round(x + 0.1,2) for x in k_strong]
k_food = [round(x,2) for x in np.arange(1.1,2.0,0.1)]
dim = [[1,'x'],[1,'y'],[1,'z'],[2,'xy'],[2,'xz'],[2,'yz'],[3,'xyz']]

In [4]:
param_grid = [{'c': c, 'k_strong': k_strong, 'k_medium': k_medium, 'k_food': k_food, 'dim': dim}]

In [89]:
df.iloc[0,:]['sequence']

'tnpsc'

In [5]:
parameter_grid = [x for x in ParameterGrid(param_grid) if x['k_strong'] < x['k_medium']]

In [6]:
parameter_grid[1]

{'c': 1.1, 'dim': [1, 'x'], 'k_food': 1.1, 'k_medium': 0.3, 'k_strong': 0.1}

In [7]:
def predict_sequence(objects, coordinates, start_coordinates, c, k, dimension=[3,]):
    ''' Predicts sequence based on required objects, object coordinates, start coordinates of subject,
        parameters (c+k) and dimensionality.
        Input: Objects, object coordinates, start coordinates, c, k, dimension
        Output: Sequence of objects as str
    '''
    prediction = []
    possible_items = dict.fromkeys(objects, 0) # generate dict from object list
    coord_index = 0
    start_coords = start_coordinates
    coords = coordinates
    new_coords = {}
    new_start_coords = []
    
    if dimension[0] == 3: # no changes if 3D
        new_coords = coords
        new_start_coords = start_coords
        
    elif dimension[0] == 2: # 2D: remove obsolete coordinate
        if dimension[1] == 'xy':
            new_coords = {key: value[:-1] for key, value in coords.items()}
            new_start_coords = [x[:-1] for x in start_coords]
                
        elif dimension[1] == 'xz':
            new_start_coords = [[x[0], x[-1]] for x in start_coords]
            
            for key, value in coords.items():
                new_value = (value[0], value[-1])
                new_coords[key] = new_value
        
        elif dimension[1] == 'yz':
            new_coords = {key: value[1:] for key, value in coords.items()}
            new_start_coords = [x[1:] for x in start_coords]
                
    elif dimension[0] == 1: # 1D: choose appropriate coordinate
        if dimension[1] == 'x':
            new_coords = {key: value[0] for key, value in coords.items()}
            new_start_coords = [x[0] for x in start_coords]
        
        elif dimension[1] == 'y':
            new_coords = {key: value[1] for key, value in coords.items()}
            new_start_coords = [x[1] for x in start_coords]
        
        elif dimension[1] == 'z':
            new_coords = {key: value[2] for key, value in coords.items()}
            new_start_coords = [x[2] for x in start_coords]
    
    while bool(possible_items) == True: # while dict not empty
        for obj in possible_items.keys():
            possible_items[obj] = ((distance.euclidean(
                                new_start_coords[coord_index], 
                                new_coords[obj])
                                ) ** k[obj]) * c[obj]
        #print(possible_items)                      
        minval = min(possible_items.values())
        minval = [k for k, v in possible_items.items() if v == minval]
        minval = random.choice(minval) # choose prediction randomly if multiple items have same cost
        prediction.append(minval)
        del possible_items[minval]
        coord_index += 1
        
    return prediction

In [8]:
def get_median(df, row, parameters, n):
    ''' Returns median edit distance (Damerau-Levenshtein) for n trials of sequence prediction.
    '''  
    c = parameters['c']
    k_strong = parameters['k_strong']
    k_medium = parameters['k_medium']
    k_food = parameters['k_food']
    dim = parameters['dim']
        
    #for row in range(0,len(df)):
    objects = list(df.at[row,'objects'].split(','))
    strong_k = list(df.at[row,'strong_k'].split(','))
    medium_k = list(df.at[row,'mid_k'].split(','))
    food_k = list(df.at[row,'food_k'].split(','))
    coordinates = {key: ast.literal_eval(value) for key, value in (elem.split(': ') for elem in df.at[row,'coordinates'].split(';'))}
    start_coordinates = list(ast.literal_eval(df.at[row, 'start_coordinates']))
    sequence = str(df.at[row,'sequence'])
    
    #objects = list(df.iloc[])
        
    k1 = {obj: k_strong if obj in strong_k else k_medium if obj in medium_k else k_food if obj in food_k else 1.0 for obj in objects}
    c1 = {obj: c if obj in df.at[row, 'containment'] else 1.0 for obj in objects}
    
    editdists = [damerau_levenshtein_distance(sequence,''.join(predict_sequence(objects,coordinates,start_coordinates,c1,k1,dim))) for x in range(0,n)]   
    median = np.median(editdists) / len(sequence)
        
    params = 'c: ' + str(c) + '; k: ' + str(k_strong) + ',' + str(k_medium) + ',' + str(k_food) + '; ' + str(dim)
    
    return median, params, sequence

In [9]:
get_median(df, 16, {'c': 1.1, 'dim': [1, 'x'], 'k_food': 1.1, 'k_medium': 0.3, 'k_strong': 0.1}, 10)

(0.8, "c: 1.1; k: 0.1,0.3,1.1; [1, 'x']", 'awgps')

In [18]:
def fill_dataframe(data, params_grid, n):
    ''' Calculates average edit distance for all combinations of parameters (c, k, dimension).
        Input: Dataframe with objects, coordinates, start coordinates, object categories
        Output: Dataframe with edit distance results (col name: parameters used)
    '''
    results = pd.DataFrame(index=data['sequence'])
    
    for row in range(0,len(data)):
        for line in params_grid:
            #print(line)
            median, params, sequence = get_median(df, row, line, n)
            results.at[sequence,params] = median
            #results.at['mean',params] = results[params].mean()
                                
    return results

In [19]:
#%%timeit -n1 -r1
results_test = fill_dataframe(df, parameter_grid, 1)

In [20]:
results_test

Unnamed: 0_level_0,"c: 1.1; k: 0.1,0.2,1.1; [1, 'x']","c: 1.1; k: 0.1,0.3,1.1; [1, 'x']","c: 1.1; k: 0.2,0.3,1.1; [1, 'x']","c: 1.1; k: 0.1,0.4,1.1; [1, 'x']","c: 1.1; k: 0.2,0.4,1.1; [1, 'x']","c: 1.1; k: 0.3,0.4,1.1; [1, 'x']","c: 1.1; k: 0.1,0.5,1.1; [1, 'x']","c: 1.1; k: 0.2,0.5,1.1; [1, 'x']","c: 1.1; k: 0.3,0.5,1.1; [1, 'x']","c: 1.1; k: 0.4,0.5,1.1; [1, 'x']",...,"c: 1.9; k: 0.6,0.8,1.9; [3, 'xyz']","c: 1.9; k: 0.7,0.8,1.9; [3, 'xyz']","c: 1.9; k: 0.1,0.9,1.9; [3, 'xyz']","c: 1.9; k: 0.2,0.9,1.9; [3, 'xyz']","c: 1.9; k: 0.3,0.9,1.9; [3, 'xyz']","c: 1.9; k: 0.4,0.9,1.9; [3, 'xyz']","c: 1.9; k: 0.5,0.9,1.9; [3, 'xyz']","c: 1.9; k: 0.6,0.9,1.9; [3, 'xyz']","c: 1.9; k: 0.7,0.9,1.9; [3, 'xyz']","c: 1.9; k: 0.8,0.9,1.9; [3, 'xyz']"
sequence,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
tnpsc,0.4,0.4,0.4,0.2,0.2,0.4,0.4,0.2,0.4,0.4,...,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2
tnpcs,0.2,0.2,0.4,0.2,0.4,0.2,0.0,0.0,0.2,0.2,...,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4
tnpsc,0.4,0.4,0.4,0.2,0.2,0.4,0.4,0.2,0.4,0.4,...,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2
tnspc,0.4,0.4,0.2,0.4,0.4,0.6,0.2,0.4,0.6,0.2,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
pms,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.333333,0.0,0.333333,0.333333,0.333333,0.333333,0.333333,0.333333,0.333333,0.333333
rhcse,0.6,0.6,0.8,0.6,0.8,0.6,0.4,0.6,0.8,0.8,...,0.6,0.6,0.8,0.8,0.8,0.8,0.8,0.8,0.8,0.8
pbhs,1.0,0.5,1.0,0.5,1.0,1.0,0.5,1.0,1.0,0.5,...,1.0,0.5,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.5
hscp,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
pbs,0.333333,0.0,0.333333,0.0,0.333333,0.0,0.333333,0.333333,0.333333,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
pgs,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.333333,0.333333,...,0.333333,0.333333,0.333333,0.333333,0.333333,0.333333,0.333333,0.333333,0.333333,0.333333


In [154]:
def fill_dataframe_test(data, params_grid, n):
    ''' Calculates average edit distance for all combinations of parameters (c, k, dimension).
        Input: Dataframe with objects, coordinates, start coordinates, object categories
        Output: Dataframe with edit distance results (col name: parameters used)
    '''
    results = pd.DataFrame(index=data['sequence'])
    
    #medians, params, sequences = [((median,params,sequence) for (median,params,sequence) in get_median(df,row,line,n)) 
    # for row in range(0,len(data)) for line in params_grid]
    
    
    
                                
    return medians, params, sequences

In [155]:
medians, params, sequences = fill_dataframe_test(df, param_grid, 1)

IndexError: list index out of range

In [126]:
def get_lowest_error(results):
    ''' Returns lowest error in dataframe and index of lowest error.
    '''
    for col in list(results):
        results.loc['mean',col] = results[col].mean()
        results.loc['median',col] = results[col].median()
    lowest = min(results.loc['mean'])
    mean = list(results.loc['mean'])
    
    return lowest, results.columns[(results.loc['mean'] == lowest)], mean, results

In [127]:
lowest, lowest_idx, list_mean, results_mean = get_lowest_error(results_new)
lowest, lowest_idx

ValueError: cannot reindex from a duplicate axis

In [114]:
testdf = pd.DataFrame(index=df['sequence'])

tnpsc
tnpcs
tnpsc
tnspc
pms
rhcse
pbhs
hscp
pbs
pgs
pnsgb


In [77]:
testdf = testdf.reindex(testdf.columns.to_list() + [str(row) for row in parameter_grid], axis=1)

In [78]:
testdf

Unnamed: 0_level_0,"{'c': 1.1, 'dim': [1, 'x'], 'k_food': 1.1, 'k_medium': 0.2, 'k_strong': 0.1}","{'c': 1.1, 'dim': [1, 'x'], 'k_food': 1.1, 'k_medium': 0.3, 'k_strong': 0.1}","{'c': 1.1, 'dim': [1, 'x'], 'k_food': 1.1, 'k_medium': 0.3, 'k_strong': 0.2}","{'c': 1.1, 'dim': [1, 'x'], 'k_food': 1.1, 'k_medium': 0.4, 'k_strong': 0.1}","{'c': 1.1, 'dim': [1, 'x'], 'k_food': 1.1, 'k_medium': 0.4, 'k_strong': 0.2}","{'c': 1.1, 'dim': [1, 'x'], 'k_food': 1.1, 'k_medium': 0.4, 'k_strong': 0.3}","{'c': 1.1, 'dim': [1, 'x'], 'k_food': 1.1, 'k_medium': 0.5, 'k_strong': 0.1}","{'c': 1.1, 'dim': [1, 'x'], 'k_food': 1.1, 'k_medium': 0.5, 'k_strong': 0.2}","{'c': 1.1, 'dim': [1, 'x'], 'k_food': 1.1, 'k_medium': 0.5, 'k_strong': 0.3}","{'c': 1.1, 'dim': [1, 'x'], 'k_food': 1.1, 'k_medium': 0.5, 'k_strong': 0.4}",...,"{'c': 1.9, 'dim': [3, 'xyz'], 'k_food': 1.9, 'k_medium': 0.8, 'k_strong': 0.6}","{'c': 1.9, 'dim': [3, 'xyz'], 'k_food': 1.9, 'k_medium': 0.8, 'k_strong': 0.7}","{'c': 1.9, 'dim': [3, 'xyz'], 'k_food': 1.9, 'k_medium': 0.9, 'k_strong': 0.1}","{'c': 1.9, 'dim': [3, 'xyz'], 'k_food': 1.9, 'k_medium': 0.9, 'k_strong': 0.2}","{'c': 1.9, 'dim': [3, 'xyz'], 'k_food': 1.9, 'k_medium': 0.9, 'k_strong': 0.3}","{'c': 1.9, 'dim': [3, 'xyz'], 'k_food': 1.9, 'k_medium': 0.9, 'k_strong': 0.4}","{'c': 1.9, 'dim': [3, 'xyz'], 'k_food': 1.9, 'k_medium': 0.9, 'k_strong': 0.5}","{'c': 1.9, 'dim': [3, 'xyz'], 'k_food': 1.9, 'k_medium': 0.9, 'k_strong': 0.6}","{'c': 1.9, 'dim': [3, 'xyz'], 'k_food': 1.9, 'k_medium': 0.9, 'k_strong': 0.7}","{'c': 1.9, 'dim': [3, 'xyz'], 'k_food': 1.9, 'k_medium': 0.9, 'k_strong': 0.8}"
sequence,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
tnpsc,,,,,,,,,,,...,,,,,,,,,,
tnpcs,,,,,,,,,,,...,,,,,,,,,,
tnpsc,,,,,,,,,,,...,,,,,,,,,,
tnspc,,,,,,,,,,,...,,,,,,,,,,
pms,,,,,,,,,,,...,,,,,,,,,,
rhcse,,,,,,,,,,,...,,,,,,,,,,
pbhs,,,,,,,,,,,...,,,,,,,,,,
hscp,,,,,,,,,,,...,,,,,,,,,,
pbs,,,,,,,,,,,...,,,,,,,,,,
pgs,,,,,,,,,,,...,,,,,,,,,,
