In [1]:
import json, random, re, collections, itertools, base64, sys
from dataclasses import dataclass
from copy import deepcopy
from pathlib import Path
from tqdm.notebook import tqdm
import numpy as np
import pandas as pd
from operator import itemgetter as at
from IPython.core.display import display, HTML
from ipywidgets import interact
display_html = lambda x: display(HTML(str(x)))
from pprint import pprint as pp
from edit_distance import edit_distance
ls = lambda path: list(map(str, path.iterdir()))

sys.path.append('../src')
annot_path = Path("../annotations")
data_path = Path("../data")
preprocessed_path = Path("../preprocessed")
mturk_path = Path("../mturk/results")

def read_annotation(annotation_id, mturk_batch=None):
    if mturk_batch:
        annotation = annotation_io.mturk_annotation(mturk_batch, annotation_id)
    else:
        annotation = annotation_io.get_annotation(annotation_id)
    return annotation

import annotation_io
import read_data

%load_ext autoreload
%autoreload 2

## Read mapping

In [12]:
with (preprocessed_path / "ingredients.json").open("r") as f:
    ingredients = json.load(f)
with (preprocessed_path / "labels.json").open("r") as f:
    idx2label = [tuple(t) for t in json.load(f)]
    label2idx = {r: i for i, r in enumerate(idx2label)}
with (preprocessed_path / "resources.json").open("r") as f:
    resources = json.load(f)
ingredient_dict = {k: v for k, v in ingredients}
resource_dict = {k: v for k, v in resources}

In [3]:
ls(preprocessed_path)

['../preprocessed/103308.npz',
 '../preprocessed/ingredients.json',
 '../preprocessed/resources.json',
 '../preprocessed/labels.json',
 '../preprocessed/103308.json']

In [4]:
states = np.load(str(preprocessed_path/ "103308.npz" ))
states.files

['103308_0',
 '103308_1',
 '103308_2',
 '103308_3',
 '103308_4',
 '103308_5',
 '103308_6',
 '103308_7',
 '103308_8',
 '103308_9']

In [5]:
states['103308_3']

array([[False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False]])

In [6]:
from instruction_parsing import program, program_step, Instruction, execute

def human_program(annotation):
    """Runs the program and translates to human language"""
    actions = [
        (
            a.ts,
            a.command.name,
            ingredient_dict.get(a.arg, resource_dict.get(a.arg, "")),
            resource_dict.get(a.resource),
        )
        for a in program(annotation)
    ]
    return actions

In [11]:
annotation_id="35L9RVQFCUDMD74V0GR1EN7XINTUHO"
annotation = read_annotation(annotation_id, mturk_batch)


pp (human_program(annotation))

KeyError: '35L9RVQFCUDMD74V0GR1EN7XINTUHO'

In [14]:
mturk_batch=4289268#4286064

with (mturk_path / f"{mturk_batch}.json").open('r') as f:
    annotations = json.load(f)
annotations

{'8626': ['3LO69W1SU985ULMQ7WHI20ZDRGJGLI', '3TOK3KHVJZDU7DEMRLTGO4QENIYO7X'],
 '9035': ['37QW5D2ZRMHD5JBY872VM9WAVRD8S7',
  '3IAEQB9FMKFITVOY9KNC5QDWCVVWD5',
  '3VE8AYVF8SS41E6C2N2WFADV61CF8O'],
 '9401': ['36WLNQG7855VXY4NAO20OUIW260BEJ', '38JBBYETQU5BCZJEV9SDGGCQSE74EU'],
 '13484': ['3S3AMIZX3009FXITSTNBTW6TKFNDCN'],
 '18886': ['38SKSKU7R7SJQCSLXKHLNCCT6T9LID', '3VHHR074H9CM1SBPETDOWNMKSDV7L1'],
 '20040': ['3HMIGG0U4R1A15N77DUK4VKBCCBY8D',
  '3HPZF4IVNSOCD8DKOPAP6HGU232CY3',
  '3QY7M81QHDHBHMT7TT1NHN953C9K7S'],
 '46922': ['33LKR6A5KKFW9J4SYMY6H45OJ4QT12', '3EKVH9QME4ZWHFQRV0Y5HJX78O7D2B'],
 '50704': ['308Q0PEVBE8OPARC8NEVI3LY6ZZI9K',
  '31QTRG6Q2Z8V7XMOTUWVQHF11M4PYH',
  '3OSWBBLG1KSXIVTOJPJY79R3LXFDXT'],
 '57372': ['37Q970SNZK3VUJRD9MTG7Z92HKUS1S', '3HOSI13XH5JJKPO7SPIMD2OB5F0DDO'],
 '72007': ['340UGXU9D4WRV8ZF5585TOY6RLEUVR', '3SNLUL3WOAIOZ3NLR16Z6F040O4LUT'],
 '128385': ['3EJJQNKU9X0UXFCEWMWW9CKDC8LRHI',
  '3OS4RQUCRFAN3C4J5ZM66C3WE75FBV',
  '3TESA3PJ375PSD0XIBGSFIYZN6ZMMA'],
 '21

In [15]:
def annotator_agreement(annotation1,annotation2):
    lst1 = [(x.command.name,x.arg,x.resource) for x in program(annotation1)]
    lst2 = [(x.command.name,x.arg,x.resource) for x in program(annotation2)]
    dst,_ = edit_distance(lst1, lst2)
    return 1-dst/(len(lst1)+len(lst2))


In [16]:
html = []
for recipe_id in annotations:
    html.append(f"<h2>{recipe_id}</h2>")
    for p1,p2 in itertools.combinations(annotations[recipe_id],2):
        sim = annotator_agreement(read_annotation(p1, mturk_batch),read_annotation(p2, mturk_batch))
        line = f'<a href="http://127.0.0.1:8080/annotate/{mturk_batch}/{p1}" target="_new">{p1}</a>'
        line += ' ({s:0.2f}) '.format(s=sim)
        line += f'<a href="http://127.0.0.1:8080/annotate/{mturk_batch}/{p2}" target="_new">{p2}</a>'
        html.append(line)
html = ("<br />".join(html))
display_html(html)

In [17]:
for a,b in zip(annotations["103308"], annotations["103308"][1:]):
    last = str(max(map(int,a.keys())))
    last = set(filter(lambda t: t[1].startswith('I'),map(tuple, a[last])))
    first = set(filter(lambda t: t[1].startswith('I'),map(tuple, b['0'])))
    print (last-first)

KeyError: '103308'

In [None]:
program_step(annotations["103308"][1])

In [None]:
s=set()
s.add('a')
s.remove('a')

In [None]:
edit_similarity([1,2,4,5,6,7],[1,2])