In [1]:
import ast
import numpy as np
import pandas as pd

In [2]:
sequences = pd.read_csv('data/kit_sequenzen.csv', header=0)

In [4]:
df = pd.DataFrame(columns=['ID', 'sequence', 'coordinates', 'start_coordinates', 'set_0',
                          'set_1', 'set_2', 'set_3', 'set_4', 'set_5'])

In [21]:
data = pd.read_csv('data/kit_annotation_clustered.csv', header=0)

In [22]:
all_items = []

for col in data.columns:
    if 'geschirr' not in col and 'head' not in col and 'x' in col:
        all_items.append(col[:-2])

In [23]:
ids = set(data['ID'])

In [24]:
df['ID'] = sorted(list(ids))

### get coordinates of all items

In [25]:
for row in range(0, len(df)):
    locations_dict = {}
    ID = df.at[row, 'ID']
    filtered_df = data.loc[data['annotation'] == 'start'].set_index('ID')
    
    for item in all_items:
        locations_dict[item] = [filtered_df.at[ID, str(item) + '_x'],
                               filtered_df.at[ID, str(item) + '_y'], 2.0]
    df.at[row, 'coordinates'] = locations_dict

### get subject locations

In [26]:
for row in range(0, len(df)):
    subject_locations = []
    ID = df.at[row, 'ID']
    filtered_annotation = data.loc[data['ID'] == ID].reset_index()
    
    for row2 in range(0, len(filtered_annotation)):
        if filtered_annotation.at[row2, 'annotation'] == 'start':
            subject_locations.append([filtered_annotation.at[row2, 'head_x'],
                                      filtered_annotation.at[row2, 'head_y'], 2.0])
        elif filtered_annotation.at[row2, 'annotation'] == 'nehmen_objekte':
            subject_locations.append([filtered_annotation.at[row2 - 1, 'head_x'],
                                     filtered_annotation.at[row2 - 1, 'head_y'], 2.0])
    
    #print(subject_locations)
    df.at[row, 'start_coordinates'] = subject_locations

### get sequences
- if multiple items are taken together, define sets of items

In [27]:
object_cols = ['erstes_objekt', 'zweites_objekt', 'drittes_objekt', 
               'viertes_objekt', 'fuenftes_objekt']

for row in range(0 ,len(df)):
    ID = df.at[row, 'ID']
    filtered_annotation = data.loc[(data['ID'] == ID) & (data['annotation'] == 'nehmen_objekte')].reset_index()
    sequence = []
    
    for row2 in range(0, len(filtered_annotation)):
        if pd.isna(filtered_annotation.at[row2, 'zweites_objekt']):
            sequence.append(filtered_annotation.at[row2, 'erstes_objekt'])
        else:
            set_name = str('set') + '_' + str(row2)
            sequence.append(set_name)
            set_items = []
            
            if set_name not in df.columns:
                df[set_name] = None
            
            for col in object_cols:
                if not pd.isna(filtered_annotation.at[row2, col]):
                    set_items.append(filtered_annotation.at[row2, col])
                    
            df.at[row, set_name] = set_items
        
    df.at[row, 'sequence'] = sequence 

### get coordinates for sets

In [28]:
set_cols = []

for col_name in df.columns:
    if 'set' in col_name:
        set_cols.append(col_name)
        
for row in range(0, len(df)):
    coords = df.at[row, 'coordinates']
    for set_name in set_cols:
        item_coords = []
        try:
            items = df.at[row, set_name]
            #print(items)
            for item in items:
                item_coords.append(coords[item])
        
            #print(item_coords)
            set_coords = list(np.mean(item_coords, axis=0))
            #print(set_coords)
            coords[set_name] = set_coords
        
        except:
            coords[set_name] = []
        
        df.at[row, 'coordinates'] = coords

### remove set coords of sets that don't occur in sequence

In [29]:
for row in range(0, len(df)):
    coords = df.at[row, 'coordinates']
    to_remove = []
    
    for k, v in coords.items():
        if v == []:
            to_remove.append(k)
            
    for item in to_remove:
        del coords[item]

### check for sets in sequence that don't have coordinates

In [30]:
for row in range(0, len(df)):
    sequence = df.at[row, 'sequence']
    coords = df.at[row, 'coordinates']
    ID = df.at[row, 'ID']
    
    for item in sequence:
        if item not in coords:
            print(ID, item)

In [31]:
df.to_csv('kit_task_envs_2022-08-30.csv', header=True, index=False)