In [1]:
import collections, itertools, json, re, operator
from functools import reduce
from pathlib import Path
import pandas as pd

ls = lambda p, g="*.*": print("\n".join(map(str, p.glob(g))))

data_dir = Path("../data/scone")
annot_dir = Path("../annotations/scone")
pd.set_option('display.max_colwidth', 100)
pd.set_option('display.max_rows', 100)

In [2]:
ls(data_dir)

../data/scone/scene-test.tsv
../data/scone/alchemy-test.tsv
../data/scone/alchemy-train.tsv
../data/scone/scene-dev.tsv
../data/scone/README.md
../data/scone/alchemy-train-orig.tsv
../data/scone/scene-train-orig.tsv
../data/scone/tangrams-test.tsv
../data/scone/scene-train.tsv
../data/scone/tangrams-train-orig.tsv
../data/scone/tangrams-train.tsv
../data/scone/alchemy-dev.tsv
../data/scone/tangrams-dev.tsv


In [3]:
parsed = []
with (data_dir / "alchemy-dev.tsv").open('r') as f:
    for line in f:
        idx, *parts = line.strip().split('\t')
        states       = [{int(v.split(':',1)[0]):v.split(':')[1].strip() for v in p.split(' ')} for i,p in enumerate(parts) if i%2==0]
        instructions = [p for i,p in enumerate(parts) if i%2==1]
        instructions.insert(0,"INIT")
        seq = 0
        for state, instruction in zip(states,instructions):
            d = state
            d["instruction"]=instruction
            d['id']=idx
            d['seq']=seq
            parsed.append(d)
            seq+=1
df = pd.DataFrame(parsed)
df.head(100)

Unnamed: 0,1,2,3,4,5,6,7,instruction,id,seq
0,_,g,p,o,g,r,y,INIT,dev-1830,0
1,_,g,p,_,g,r,y,throw out the orange chemical,dev-1830,1
2,_,_,p,_,g,r,yg,"then, add the leftmost beaker of green chemical to the yellow chemical",dev-1830,2
3,_,_,p,_,g,r,bb,mix it,dev-1830,3
4,_,_,p,_,_,r,bbg,"then, add the remaining green chemical to it",dev-1830,4
5,_,_,p,_,_,r,bbb,mix that too,dev-1830,5
6,y,_,y,y,p,g,ooo,INIT,dev-1831,0
7,y,_,y,y,p,_,ooo,throw out green beaker,dev-1831,1
8,y,_,y,y,_,_,ooo,throw out purple one,dev-1831,2
9,y,_,_,yy,_,_,ooo,pour third beaker into fourth one,dev-1831,3


In [4]:
lst = ['a','b']
lst.insert(0,'c')
lst

['c', 'a', 'b']

In [5]:
beakers = reduce(operator.or_, [set(df[i].unique()) for i in range(1,8)], set())
colors = list({c for b in beakers for c in b if '_' not in c})

In [6]:
len(beakers)

114

In [7]:
colors

['b', 'p', 'g', 'r', 'o', 'y']

In [8]:
desc = {"p": "Purple", "o": "Orange", "y": "Yellow", "b": "Brown", "g": "Green", "r":"Red"}

In [9]:
ingredients = {"I" + (c).upper() : desc[c] for c in colors}
tools = {"T" + (c + c).upper() : desc[c] for c in colors}
time_lengths = {"L" + (c + c + c).upper() : desc[c] for c in colors}
ingredients

{'IB': 'Brown',
 'IP': 'Purple',
 'IG': 'Green',
 'IR': 'Red',
 'IO': 'Orange',
 'IY': 'Yellow'}

In [10]:
tools

{'TBB': 'Brown',
 'TPP': 'Purple',
 'TGG': 'Green',
 'TRR': 'Red',
 'TOO': 'Orange',
 'TYY': 'Yellow'}

In [11]:
time_lengths

{'LBBB': 'Brown',
 'LPPP': 'Purple',
 'LGGG': 'Green',
 'LRRR': 'Red',
 'LOOO': 'Orange',
 'LYYY': 'Yellow'}

In [14]:
def init_instruction(d):
    ret = ""
    for i in range(1,8):
        if d[i]=='_':
            continue
        if len(d[i])==1:
            u = "1 unit"
        elif len(d[i])==2:
            u = "2 units"
        elif len(d[i])==3:
            u = "3 units"
        else:
            u = "ERROR"
        ret += "Put " + u+ " of " + desc[d[i][0]] + " color in beaker #" + str(i) + ", "
    return ret

In [15]:
df.loc[df["instruction"]=="INIT", "instruction"]=df[df["instruction"]=="INIT"].apply(init_instruction,axis=1)
df

Unnamed: 0,1,2,3,4,5,6,7,instruction,id,seq
0,_,g,p,o,g,r,y,"Put 1 unit of Green color in beaker 2, Put 1 unit of Purple color in beaker 3, Put 1 unit of Ora...",dev-1830,0
1,_,g,p,_,g,r,y,throw out the orange chemical,dev-1830,1
2,_,_,p,_,g,r,yg,"then, add the leftmost beaker of green chemical to the yellow chemical",dev-1830,2
3,_,_,p,_,g,r,bb,mix it,dev-1830,3
4,_,_,p,_,_,r,bbg,"then, add the remaining green chemical to it",dev-1830,4
...,...,...,...,...,...,...,...,...,...,...
1465,r,_,gg,o,g,r,y,throw out second beaker,dev-2128,1
1466,r,_,gg,oy,g,r,_,pour yellow beaker into orange one,dev-2128,2
1467,_,_,gg,oyr,g,r,_,pour first beaker into fourth one,dev-2128,3
1468,_,_,gg,bbb,g,r,_,it turns completely brown,dev-2128,4


In [41]:
def serialize(grp):
    grp=grp.sort_values(by="seq")
    ret = {"instructions": [], "ingredients":list(ingredients.values()), "normalized_ingredients": ingredients, "title": grp.head(1)["id"].values[0], "status": 0, "validations": [], "photo_url": "", "url": "", "labels": []}
    for idx,row in grp.iterrows():
        ret["instructions"].append(str(row["instruction"]))
        ret["labels"].append([])
    return ret
ds_annotations = df.groupby("id").apply(serialize)
ds_annotations

id
dev-1830    {'instructions': ['Put 1 unit of Green color in beaker 2, Put 1 unit of Purple color in beaker 3...
dev-1831    {'instructions': ['Put 1 unit of Yellow color in beaker 1, Put 1 unit of Yellow color in beaker ...
dev-1834    {'instructions': ['Put 1 unit of Orange color in beaker 1, Put 1 unit of Green color in beaker 2...
dev-1835    {'instructions': ['Put 1 unit of Yellow color in beaker 1, Put 3 units of Green color in beaker ...
dev-1836    {'instructions': ['Put 1 unit of Purple color in beaker 1, Put 1 unit of Purple color in beaker ...
                                                           ...                                                 
dev-2123    {'instructions': ['Put 1 unit of Red color in beaker 1, Put 1 unit of Orange color in beaker 2, ...
dev-2124    {'instructions': ['Put 1 unit of Yellow color in beaker 1, Put 1 unit of Orange color in beaker ...
dev-2125    {'instructions': ['Put 1 unit of Red color in beaker 2, Put 1 unit of Yellow color in bea

In [42]:
for idx,v in ds_annotations.iteritems():
    idx=idx.split('-',1)[1]
    with (annot_dir/f"{idx}.json").open('w') as f:
        try:
            json.dump(v,f,indent=4)
        except TypeError:
            print({k:type(w) for k,w in v.items()})

In [44]:
df[df["id"]=="dev-2035"]

Unnamed: 0,1,2,3,4,5,6,7,instruction,id,seq
1002,r,o,ooo,o,o,rrr,o,"Put 1 unit of Red color in beaker 1, Put 1 unit of Orange color in beaker 2, Put 3 units of Oran...",dev-2035,0
1003,r,o,ooo,o,o,r,o,drain 2 units from the right beaker of red chemical,dev-2035,1
1004,r,o,ooo,_,o,ro,o,"then, add the orange chemical from the fourth beaker to it",dev-2035,2
1005,r,o,o,_,o,ro,o,drain 2 units from the third beaker,dev-2035,3
1006,r,o,o,_,o,bb,o,"next, mix the second to last beaker",dev-2035,4
1007,_,o,o,_,o,bbr,o,add the remaining red chemical to it,dev-2035,5
