In [2]:
import json, random, re, collections, itertools, base64, hashlib
from datetime import datetime, timedelta
from pathlib import Path
from tqdm.notebook import tqdm
import pandas as pd
from operator import itemgetter as at
from IPython.core.display import display, HTML
from ipywidgets import interact
display_html = lambda x: display(HTML(x))
annot_path = Path("../annotations")
data_path = Path("../data")

# Read meta data

In [3]:
ingredients = []
# Read ingredients map
with (data_path/ "ingredients_map.json").open('r') as f:
    ingredients.extend([(k,v) for v,k in json.load(f).items()])
# Read implicit ingredients
with (data_path/ "implicit_ingredients.json").open('r') as f:
    ingredients.extend(json.load(f).items())
# Read tools
with (data_path/ "tools.json").open('r') as f:
    ingredients.extend(json.load(f).items())
# Read time_lengths
with (data_path/ "time_lengths.json").open('r') as f:
    ingredients.extend(json.load(f).items())    
ingredients [-10:]

[('TSKEWER', 'Skewers'),
 ('TSKILLET', 'Skillet'),
 ('TSTIR', 'Stirring spoon'),
 ('TSPATULA', 'Spatula'),
 ('TWOK', 'Wok'),
 ('TOTHER', 'Other'),
 ('LTIME', 'For X minutes'),
 ('LCOLOR', 'Until color change'),
 ('LTEXTURE', 'Until texture change'),
 ('LTEMPTRATURE', 'Until cool/boil')]

In [4]:
with (data_path/ "resources.json").open('r') as f:
    resources=json.load(f)
    resources = [(res["id"], res["name"]) for res_category in resources for res in res_category["children"]]
resources

[('W1', 'Combine'),
 ('W2', 'Roll'),
 ('W3', 'Fold'),
 ('W4', 'Massage'),
 ('W5', 'Knead'),
 ('W6', 'Peel'),
 ('C1', 'Chop finely'),
 ('C2', 'Cut to chunks'),
 ('GW1', 'Low'),
 ('GW2', 'Medium-Low'),
 ('GW3', 'Medium'),
 ('GW4', 'Medium-High'),
 ('GW5', 'High'),
 ('BL1', 'Low'),
 ('BL2', 'Medium'),
 ('BL3', 'High'),
 ('R1', 'Marinade'),
 ('R2', 'Chill'),
 ('R3', 'Freeze'),
 ('S1', 'Wash'),
 ('S2', 'Drain'),
 ('O1', 'Grind'),
 ('O2', 'Blend')]

In [29]:
print (f"# of resources: {len(resources)}")
print (f"# of ingredients: {len(ingredients)}")
print (f"Vector size {len(ingredients)*len(resources)}")

# of resources: 23
# of ingredients: 753
Vector size 17319


In [34]:
idx2label=list(itertools.product(map(at(0), resources), map(at(0), ingredients)))
label2idx={r:i for i,r in enumerate(idx2label)}

# Read annotations

In [27]:
def handle_instruction_label(lst):
    events = list(map(at("start", "end", "action", "resource") ,lst))
    ret = collections.defaultdict(list)
    for start,end,action, resource in events:
        start = (datetime.strptime(start, "%Y-%m-%dT00:00:00") - datetime(2020,1,1)).days
        end = (datetime.strptime(end, "%Y-%m-%dT00:00:00") - datetime(2020,1,1)).days
        for i in range(start, end):
            ret[i].append((resource, action))
    return dict(ret)


annotations = dict()
instructions = dict()
for p in annot_path.iterdir():
    annotaion_id = p.name.split('.',1)[0]
    with p.open('r') as f:
        annotation = json.load(f)
    if int(annotation["status"])<=0:
        continue
    instructions[annotaion_id] = annotation["instructions"]
    annotations[annotaion_id]=list(map(handle_instruction_label,annotation["labels"]))
    #annotations[annotaion_id]=itertools.product
with (data_path/"annotaions.json").open('w') as f:
    json.dump(annotations, f)
annotations

{'104600': [{0: [('S1', 'Ieg3R-oQ_')], 1: [('S2', 'Ieg3R-oQ_')]},
  {0: [('S1', 'Ieg3R-oQ_')],
   1: [('S2', 'Ieg3R-oQ_')],
   2: [('W1', 'TBOWL'),
    ('W1', 'I6xvdoZzb'),
    ('W1', 'Iv00mZbtk'),
    ('W1', 'IKeeuKzTy'),
    ('W1', 'IpyJf3OVq'),
    ('W1', 'IXclyBsV3')],
   3: [('W1', 'TBOWL'),
    ('W1', 'Ieg3R-oQ_'),
    ('W1', 'I6xvdoZzb'),
    ('W1', 'Iv00mZbtk'),
    ('W1', 'IKeeuKzTy'),
    ('W1', 'IpyJf3OVq'),
    ('W1', 'IXclyBsV3')]},
  {0: [('S1', 'Ieg3R-oQ_')],
   1: [('S2', 'Ieg3R-oQ_')],
   2: [('W1', 'TBOWL'),
    ('W1', 'I6xvdoZzb'),
    ('W1', 'Iv00mZbtk'),
    ('W1', 'IKeeuKzTy'),
    ('W1', 'IpyJf3OVq'),
    ('W1', 'IXclyBsV3')],
   3: [('W1', 'TBOWL'),
    ('W1', 'Ieg3R-oQ_'),
    ('W1', 'I6xvdoZzb'),
    ('W1', 'Iv00mZbtk'),
    ('W1', 'IKeeuKzTy'),
    ('W1', 'IpyJf3OVq'),
    ('W1', 'IXclyBsV3')],
   4: [('GW3', 'TSKILLET')]},
  {0: [('S1', 'Ieg3R-oQ_')],
   1: [('S2', 'Ieg3R-oQ_')],
   2: [('W1', 'TBOWL'),
    ('W1', 'I6xvdoZzb'),
    ('W1', 'Iv00mZbtk'),
    (