In [14]:
import ast
import numpy as np
import pandas as pd
import random

from collections import Counter
from opportunistic_planning import prediction, processing
from scipy.stats import wilcoxon

In [2]:
data = pd.read_csv('all_task_environments_list_2022-09-12_unique_sequences.csv', header=0)

In [4]:
ts = data[:168]

In [6]:
distances_dict = processing.generate_distances_dict(ts, use_string_for_seq=False,
                                                   dimensions=[[2, 'xy']])

### revised functions

In [9]:
def calculate_prediction_error(data, distances_dict, error_function, n=10, 
                             dimensions=[[2, 'xy'], [3, 'xyz']], 
                             seqcol='sequence', coords='coordinates', error='error',
                             use_string_for_seq=False):

    results = pd.DataFrame()
    
    for row in range(0, len(data)):
        # get episode information from input row
        coordinates = {key: ast.literal_eval(value) for key, value in
                       (elem.split(': ') for elem in data.at[row, coords].split(';'))}

        start_coordinates = list(ast.literal_eval(data.at[row, 'start_coordinates']))
        ID = str(data.at[row,'ID'])
        
        if use_string_for_seq == True:
            seq = str(data.at[row, seqcol])
            objects = list(data.at[row, seqcol])
        else:
            seq = [elem for elem in data.at[row, seqcol].split(',')]
            objects = [elem for elem in data.at[row, seqcol].split(',')]

        # get list of objects that have relational dependencies, if any (else set to empty list)
        try:
            strong_k = list(data.at[row, 'strong_k'].split(','))
        except AttributeError:
            strong_k = []

        try:
            mid_k = list(data.at[row, 'mid_k'].split(','))
        except AttributeError:
            mid_k = []

        try:
            food_k = list(data.at[row, 'food_k'].split(','))
        except AttributeError:
            food_k = []
            
        try:
            containment = list(data.at[row, 'containment']).split(',')
        except AttributeError:
            containment = []

        # go through parameter ranges
        # set k to current param if object has relational dependencies, else 1.0
        for k2 in np.arange(1.1, 2.0, 0.1):
            k_food = round(k2, 2)
            k1 = {obj: k_food if obj in food_k else 1.0 for obj in objects}

            for k in np.arange(0, 0.9, 0.1):
                k_strong = round(k, 2)
                k_mid = round(k + 0.1, 2)
                k1 = {obj: k_strong if obj in strong_k else k_mid if obj in mid_k else round(k1[obj], 2) for obj in
                      objects}

                for c in np.arange(1.0, 2.0, 0.1):
                    c = round(c, 1)
                    # set c to current param if object contained, else 1.0
                    c1 = {obj: c if obj in containment else 1.0 for obj in objects}

                    for dim in dimensions:
                        # get median error for parameter combination based on error function
                        median = get_median_error(error_function, row, ID, objects, 
                                                             coordinates, start_coordinates, 
                                                             c1, k1, dim,
                                                             seq, distances_dict, n)

                        # save parameter combination as column name in results
                        params = 'c: ' + str(c) + '; k: ' + str(k_strong) + ',' + str(k_mid) + ',' + str(
                            k_food) + '; ' + str(dim[1])

                        results.at[row, params] = median

        #results.at[row, 'sequence'] = seq
        results.at[row, 'error'] = data.at[row, error]
        results.at[row, 'ID'] = ID

    return results

In [15]:
def get_median_error(error_function, row, ID, objects, coordinates, start_coordinates, c, k, dimension, sequence, 
                             distances_dict, n=1):

    error_list = []

    for x in range(0, n):
        # get median error using edit distance (predict whole sequence, then compare)
        if error_function == 'editdist':
            # get predicted sequence for list of objects
            prediction = ''.join(predict_editdist(distances_dict, ID, objects, coordinates, 
                                          start_coordinates, sequence, c, k, dimension))

            # calculate normalized error between predicted and given sequence
            dl = 1 - damerauLevenshtein(sequence, prediction)

            error_list.append(dl)
        
        # get median summed error using prequential method (predict only for each next step)
        elif error_function == 'prequential':
            errors = predict_prequential(distances_dict, ID, objects, coordinates,
                                         start_coordinates, sequence, c, k, dimension)
            summed = sum(errors)
            error_list.append(summed)
                        
    median = np.nanmedian(error_list)
    return median

In [26]:
def predict_prequential(distances_dict, ID, objects, coordinates, start_coordinates, sequence, 
                                 c, k, dimension=[3, ]):
    
    i = 0
    errors = []
    possible_items = dict.fromkeys(objects, 0)  # generate dict from object list
    item_count = Counter(objects)
    
    coord_index = 0
    
    new_coords, new_start_coords = filter_for_dimension(dimension, coordinates, start_coordinates)

    while i < len(sequence) - 1:
        for obj in possible_items.keys():            
            try:
                position = tuple(new_start_coords[coord_index])
            except TypeError:
                position = str(new_start_coords[coord_index])
            
            possible_items[obj] = distances_dict[dimension[1]][ID][position][obj] * k[obj] * c[obj]

        minval = min(possible_items.values())
        minval = [k for k, v in possible_items.items() if v == minval]
        minval = random.choice(minval)  # choose prediction randomly if multiple items have same cost
        
        prediction = minval
        observed = sequence[i]
        
        if prediction == observed:
            error = 0
        else:
            error = 1
        
        errors.append(error)
        
        if item_count[sequence[i]] > 1:
            item_count[sequence[i]] = item_count[sequence[i]] - 1
        else:
            del possible_items[sequence[i]]
        
        coord_index += 1
        i += 1
    
    return errors

In [18]:
def filter_for_dimension(dimension, coordinates, start_coordinates):
    
    new_coords =  {}
    new_start_coords = []
    
    if dimension[0] == 3:  # no changes if 3D
        new_coords = coordinates
        new_start_coords = start_coordinates

    elif dimension[0] == 2:  # 2D: remove obsolete coordinate
        if dimension[1] == 'xy':
            new_coords = {key: value[:-1] for key, value in coordinates.items()}
            new_start_coords = [x[:-1] for x in start_coordinates]

        elif dimension[1] == 'xz':
            new_start_coords = [[x[0], x[-1]] for x in start_coordinates]

            for key, value in coordinates.items():
                new_value = (value[0], value[-1])
                new_coords[key] = new_value

        elif dimension[1] == 'yz':
            new_coords = {key: value[1:] for key, value in coordinates.items()}
            new_start_coords = [x[1:] for x in start_coordinates]

    elif dimension[0] == 1:  # 1D: choose appropriate coordinate
        if dimension[1] == 'x':
            new_coords = {key: value[0] for key, value in coordinates.items()}
            new_start_coords = [x[0] for x in start_coordinates]

        elif dimension[1] == 'y':
            new_coords = {key: value[1] for key, value in coordinates.items()}
            new_start_coords = [x[1] for x in start_coordinates]

        elif dimension[1] == 'z':
            new_coords = {key: value[2] for key, value in coordinates.items()}
            new_start_coords = [x[2] for x in start_coordinates]
            
    return new_coords, new_start_coords

In [27]:
results = calculate_prediction_error(ts, distances_dict, error_function='prequential',
                                    n=100, dimensions=[[2, 'xy']], seqcol='sequence',
                                    coords='coordinates', error='error',
                                    use_string_for_seq=False)

  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, pa

  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, pa

  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, pa

  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, pa

  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, pa

  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, pa

  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, params] = median
  results.at[row, 'error'] = data.at[row, error]
  results.at[row, 'ID'] = ID


In [28]:
lowest_mean, lowest_mean_idx, lowest_median, results_median = processing.get_lowest_error(results)

In [29]:
lowest_mean, lowest_median, lowest_mean_idx

(4.145833333333333,
 4.0,
 Index(['c: 1.0; k: 0.2,0.3,1.5; xy', 'c: 1.9; k: 0.0,0.1,1.6; xy',
        'c: 1.7; k: 0.2,0.3,1.9; xy'],
       dtype='object'))

In [30]:
processing.save_results(results, 'results/results_alternative_functional_form_n100_2D_2024-02-20.csv')

### Compare to previous (normal range) results

In [32]:
results_old = pd.read_csv('results/results_tablesetting_2D_n100_fitted_to_ts_2023-06-22.csv', header=0)

In [33]:
results_old

Unnamed: 0,"c: 1.0; k: 0.0,0.1,1.1; xy","c: 1.1; k: 0.0,0.1,1.1; xy","c: 1.2; k: 0.0,0.1,1.1; xy","c: 1.3; k: 0.0,0.1,1.1; xy","c: 1.4; k: 0.0,0.1,1.1; xy","c: 1.5; k: 0.0,0.1,1.1; xy","c: 1.6; k: 0.0,0.1,1.1; xy","c: 1.7; k: 0.0,0.1,1.1; xy","c: 1.8; k: 0.0,0.1,1.1; xy","c: 1.9; k: 0.0,0.1,1.1; xy",...,"c: 1.2; k: 0.8,0.9,1.9; xy","c: 1.3; k: 0.8,0.9,1.9; xy","c: 1.4; k: 0.8,0.9,1.9; xy","c: 1.5; k: 0.8,0.9,1.9; xy","c: 1.6; k: 0.8,0.9,1.9; xy","c: 1.7; k: 0.8,0.9,1.9; xy","c: 1.8; k: 0.8,0.9,1.9; xy","c: 1.9; k: 0.8,0.9,1.9; xy",error,ID
0,3.000000,3.000000,3.000000,3.000000,3.000000,3.000000,3.000000,3.000000,3.000000,3.000000,...,4.00000,4.000000,4.00000,4.000000,4.00000,4.000000,4.000000,4.000000,0.723,a1
1,6.000000,6.000000,6.000000,6.000000,6.000000,6.000000,6.000000,6.000000,6.000000,6.000000,...,7.00000,7.000000,7.00000,7.000000,7.00000,7.000000,7.000000,7.000000,0.785,a3
2,6.000000,6.000000,6.000000,6.000000,6.000000,6.000000,6.000000,6.000000,6.000000,6.000000,...,4.00000,4.000000,4.00000,4.000000,4.00000,4.000000,4.000000,4.000000,0.806,a5
3,9.000000,9.000000,9.000000,9.000000,9.000000,9.000000,9.000000,9.000000,9.000000,9.000000,...,7.00000,7.000000,7.00000,7.000000,7.00000,7.000000,7.000000,7.000000,0.862,a11
4,6.000000,6.000000,6.000000,6.000000,6.000000,6.000000,6.000000,6.000000,6.000000,6.000000,...,6.00000,6.000000,6.00000,6.000000,6.00000,6.000000,6.000000,6.000000,0.806,a13
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
183,2.000000,2.000000,3.000000,3.000000,2.000000,2.000000,3.000000,3.000000,2.000000,2.000000,...,2.00000,3.000000,2.50000,2.000000,2.00000,2.000000,2.500000,2.000000,0.723,v7
184,3.500000,4.000000,3.500000,3.000000,3.000000,3.000000,3.000000,3.000000,3.000000,4.000000,...,4.00000,4.000000,3.00000,4.000000,3.00000,4.000000,4.000000,3.000000,0.723,v8
185,2.000000,2.000000,2.000000,3.000000,2.000000,2.000000,2.000000,2.000000,3.000000,3.000000,...,2.00000,2.000000,3.00000,3.000000,2.00000,2.000000,3.000000,3.000000,0.723,v9
186,4.180108,4.139785,4.137097,4.137097,4.096774,4.142473,4.147849,4.150538,4.123656,4.155914,...,4.63172,4.615591,4.66129,4.602151,4.61828,4.642473,4.629032,4.634409,,


In [34]:
lowest_mean_old, lowest_mean_idx_old, lowest_median_old, results_median_old = processing.get_lowest_error(results_old)

In [35]:
lowest_mean_old, lowest_median_old, lowest_mean_idx_old

(4.0427819720887666,
 4.0,
 Index(['c: 1.7; k: 0.2,0.3,1.2; xy'], dtype='object'))

In [37]:
lowest_new = results['c: 1.7; k: 0.2,0.3,1.9; xy'].tolist()
len(lowest_new)

170

In [38]:
lowest_old = results_old['c: 1.7; k: 0.2,0.3,1.2; xy'].tolist()[:-20]
len(lowest_old)

170

In [39]:
stat, p = wilcoxon(lowest_new, lowest_old, zero_method='wilcox')
print('Wilcoxon: W = %.3f, p = %.5f' % (stat, p))

Wilcoxon: W = 496.000, p = 0.60084


In [77]:
medians_new = results_median.tail(1).values
medians_new = medians_new.tolist()[0][:-2]
np.mean(medians_new)

4.004567166372722

In [78]:
medians_old = results_median_old.tail(1).values
medians_old = medians_old.tolist()[0][:-2]
np.mean(medians_old)

4.0