In [374]:
import json
import numpy as np
import random
from itertools import combinations
import copy

In [375]:
shapes=['cylinder', 'cube', 'sphere']
colors = ['brown','gray','green','purple','blue','yellow','cyan','red']
sizes=['large','small']
materials=['rubber', 'metal']

shape_onehot_size = len(shapes)
color_onehot_size = len(colors)
size_onehot_size = len(sizes)
material_onehot_size = len(materials)

per_obj_attribute_size = shape_onehot_size + color_onehot_size + size_onehot_size+ material_onehot_size

min_num_obj = 3
max_num_obj = 10 

In [376]:
def string_shape_to_onehot(shape):
    index = shapes.index(shape)
    arry = np.zeros(shape_onehot_size).tolist()
    arry[index] = 1
    return arry

def string_color_to_onehot(color):
    index = colors.index(color)
    arry = np.zeros(color_onehot_size).tolist()
    arry[index] = 1
    return arry

def string_size_to_onehot(size):
    index = sizes.index(size)
    arry = np.zeros(size_onehot_size).tolist()
    arry[index] = 1
    return arry

def string_material_to_onehot(material):
    index = materials.index(material)
    arry = np.zeros(material_onehot_size).tolist()
    arry[index] = 1
    return arry
    
def get_source_attribute(scene_graph):
    empty = [0]*per_obj_attribute_size
    obj_counter = 0
    obj_idx_pos_idx =[]
    object_attributes = {}
    string_attributes = {}
    len_obj = len(scene_graph['objects'])
    for obj_idx in range(len_obj):
        obj_idx_pos_idx.append([obj_idx, scene_graph['objects'][obj_idx]['pixel_coords'][0]])
        
        shape_s = scene_graph['objects'][obj_idx]['shape']
        color_s = scene_graph['objects'][obj_idx]['color']
        size_s = scene_graph['objects'][obj_idx]['size']
        material_s = scene_graph['objects'][obj_idx]['material']
        string_per_attr = [shape_s,color_s,size_s,material_s]
        string_attributes[obj_idx] = string_per_attr
        
        shape = string_shape_to_onehot(shape_s)
        color = string_color_to_onehot(color_s)
        size = string_size_to_onehot(size_s)
        material = string_material_to_onehot(material_s)
        
        object_attributes[obj_idx] = shape+color+size+material
    obj_idx_pos_idx = sorted(obj_idx_pos_idx, key=lambda x: x[1])
    source_attribute = []
    string_attributes_ordered = []
    for i in obj_idx_pos_idx:
        source_attribute+= object_attributes[i[0]]
        string_attributes_ordered.append(string_attributes[i[0]])
    if len_obj < max_num_obj:
        source_attribute+= empty*(max_num_obj - len_obj)
    return source_attribute, len_obj, string_attributes_ordered

In [377]:
actions = ['move', 'change', 'add', 'remove']
actions_min = ['move','change','add']
actions_max = ['move','change','remove']

move_commands=['move the','shift the','transfer the']
change_commands =['change the', 'alter the', 'modify the']
add_commands = ['add a','append a']
remove_commands = ['remove the', 'delete the','erase the' ]

In [378]:
def edited_str_attr_to_target_attr(esa):
    target_attribute = []
    empty = [0]*per_obj_attribute_size
    for obj in esa:
        shape = string_shape_to_onehot(obj[0])
        color = string_color_to_onehot(obj[1])
        size = string_size_to_onehot(obj[2])
        material = string_material_to_onehot(obj[3])
        obj_per = shape+color+size+material
        target_attribute+=obj_per
    if len(esa) < max_num_obj:
        target_attribute+=empty*(max_num_obj-len(esa))
    return target_attribute

In [1]:
def move_gen(len_obj,string_attribute):
    #choose Object to move
    num_attr_move_obj = random.randrange(4)
    obj_to_move = random.randrange(len_obj)
    shape = string_attribute[obj_to_move][0]
    attr_move_obj = [(0,shape)]
    for i in range(num_attr_move_obj):
        attr_move_obj.insert(0, random.choice([i for i in list(enumerate(string_attribute[obj_to_move])) if i not in attr_move_obj ]))
    
    #choose Direction to move
    direction = random.choice(['to the right of the', 'to the left of the'])
    
    #choose reference object
    num_attr_ref_obj = random.randrange(4)
    the_same = True
    while the_same:
        obj_to_ref = random.randrange(len_obj)
        if obj_to_ref != obj_to_move:
            the_same = False
    attr_ref_obj = [(0,string_attribute[obj_to_ref][0])]
    for i in range(num_attr_ref_obj):
        attr_ref_obj.insert(0, random.choice([i for i in list(enumerate(string_attribute[obj_to_ref])) if i not in attr_ref_obj ]))
    
    #make input text
    input_text = random.choice(move_commands)
    for i in attr_move_obj:
        input_text+=' '+i[1]
    input_text+=' '+direction
    for i in attr_ref_obj:
        input_text+=' '+i[1]
    input_text+='.'
    
    #get objects that can be moved
    objs_to_move = []
    for obj_idx in range(len(string_attribute)):
        valid_obj = True
        for attr in attr_move_obj:
            if string_attribute[obj_idx][attr[0]]!=attr[1]:
                valid_obj = False
        if valid_obj:
            objs_to_move.append(obj_idx)
            
    #get objects that can be references
    objs_to_ref = []
    for obj_idx in range(len(string_attribute)):
        valid_obj = True
        for attr in attr_ref_obj:
            if string_attribute[obj_idx][attr[0]]!=attr[1]:
                valid_obj = False
        if valid_obj:
            objs_to_ref.append(obj_idx)
    
    #get combinations, and ignore combinations with same index
    two_comb = np.array(np.meshgrid(objs_to_move, objs_to_ref)).T.reshape(-1,2)
    target_attributes = []
    for i in two_comb:
        esa = copy.deepcopy(string_attribute)
        if i[0]==i[1]:
            continue
        elif direction == 'to the right of the' and i[0]-1 ==i[1] :
            target_attributes.append(edited_str_attr_to_target_attr(esa))
        elif direction == 'to the left of the' and i[0]+1 == i[1]:
            target_attributes.append(edited_str_attr_to_target_attr(esa))
        else:
            if i[0]>i[1]:
                x = esa.pop(i[0])
                if direction == 'to the right of the':
                    esa.insert(i[1]+1,x)
                else:
                    esa.insert(i[1],x)
            else:
                x = esa.pop(i[0])
                if direction == 'to the right of the':
                    esa.insert(i[1],x)
                else:
                    esa.insert(i[1]-1,x)
        target_attributes.append(edited_str_attr_to_target_attr(esa))
    return input_text, target_attributes

In [380]:
#assumptions, the attributes that are not listed in second part of sentence do not change
def change_gen(len_obj,string_attribute):

    #get new attr for the obj
    num_attr_change = random.randrange(4)
    attr_new_obj = [(0, random.choice(shapes))]
    color = random.choice(colors)
    size = random.choice(sizes)
    material = random.choice(materials)
    for i in range(num_attr_change):
        attr_new_obj.insert(0,random.choice([x for x in [(1,color),(2,size),(3,material)] if x not in attr_new_obj]))

    #get attr of an obj to change
    num_attr_ident = random.randrange(4)
    obj_to_change = random.randrange(len_obj)
    shape = string_attribute[obj_to_change][0]
    attr_in_obj = [(0, shape)]
    for i in range(num_attr_ident):
        attr_in_obj.insert(0,random.choice([i for i in list(enumerate(string_attribute[obj_to_change])) if i not in attr_in_obj ]))

    

    #create input text
    input_text = random.choice(change_commands)
    for i in attr_in_obj:
        input_text+=' '+i[1]
    input_text += ' '+'to a'
    for i in attr_new_obj:
        input_text+=' '+i[1]
    input_text+='.'
    
    
    #objects that can change
    obj_to_edit = []
    for obj_idx in range(len(string_attribute)):
        valid_obj = True
        for attr in attr_in_obj:
            if string_attribute[obj_idx][attr[0]]!=attr[1]:
                valid_obj = False
        if valid_obj:
            obj_to_edit.append(obj_idx)
   
    #get combinations of objects to remove (ex. [0],[1],[0,1])
    obj_combinations = sum([list(map(list, combinations(obj_to_edit, i))) for i in range(len(obj_to_edit) + 1)], [])
    obj_combinations = obj_combinations[1:] #first one is empty list
    
    #create an edited string attribute for each combination
    target_attributes = []
    for i in obj_combinations:
        edited_string_attribute = copy.deepcopy(string_attribute)
        for l in i:
            for attr in attr_new_obj:
                edited_string_attribute[l][attr[0]] = attr[1]
        target_attributes.append(edited_str_attr_to_target_attr(edited_string_attribute))
    
    
    
    return input_text, target_attributes

In [381]:
for_adding = [shapes,colors,sizes,materials]
def add_gen(len_obj,string_attribute):
    #new object text
    num_attr_add = random.randrange(4)
    attr_in_added_obj = [(0,random.choice(shapes))]
    color = random.choice(colors)
    size = random.choice(sizes)
    material = random.choice(materials)
    for i in range(num_attr_add):
        attr_in_added_obj.insert(0,random.choice([x for x in [(1,color),(2,size),(3,material)] if x not in attr_in_added_obj]))
        
    #object marker text
    num_attr = random.randrange(4)
    obj_mark = random.randrange(len_obj)
    shape = string_attribute[obj_mark][0]
    attribute_in_text = [(0,shape)]
    
    for i in range(num_attr):
        attribute_in_text.insert(0,random.choice([i for i in list(enumerate(string_attribute[obj_mark])) if i not in attribute_in_text ]))
     
    #pick left right in respect to this object
    direction = random.choice(['to the right of the', 'to the left of the'])
        
    #make input text
    input_text = random.choice(add_commands)
    for i in attr_in_added_obj:
        input_text+=' '+i[1]
    input_text += ' '+direction
    for i in attribute_in_text:
        input_text+=' '+i[1]
    input_text+='.'
    
    #Finding all new_object possibilities
    new_objects_list = []
    new_objects_possibilities = [0,0,0,0]
    for i in attr_in_added_obj:
        new_objects_possibilities[i[0]] = i[1]
    
    to_combinations = []    
    for i in range(4):
        if new_objects_possibilities[i]==0:
            to_combinations.append((for_adding[i],i))
    
    if len(to_combinations) ==3:
        three_comb = np.array(np.meshgrid(to_combinations[0][0], to_combinations[1][0], to_combinations[2][0])).T.reshape(-1,3)
        three_comb.tolist()
        for i in three_comb:
            new_objects_possibilities1 = new_objects_possibilities.copy()
            new_objects_possibilities1[to_combinations[0][1]] = i[0]
            new_objects_possibilities1[to_combinations[1][1]] = i[1]
            new_objects_possibilities1[to_combinations[2][1]] = i[2]
            new_objects_list.append(new_objects_possibilities1)
    elif len(to_combinations)==2:
        two_comb = np.array(np.meshgrid(to_combinations[0][0], to_combinations[1][0])).T.reshape(-1,2)
        two_comb.tolist()
        for i in two_comb:
            new_objects_possibilities1 = new_objects_possibilities.copy()
            new_objects_possibilities1[to_combinations[0][1]] = i[0]
            new_objects_possibilities1[to_combinations[1][1]] = i[1]
            new_objects_list.append(new_objects_possibilities1)
    elif len(to_combinations)==1:
        for i in to_combinations[0][0]:
            new_objects_possibilities1 = new_objects_possibilities.copy()
            new_objects_possibilities1[to_combinations[0][1]] = i
            new_objects_list.append(new_objects_possibilities1)
    else:
        new_objects_list.append(new_objects_possibilities)
               
    #finding all objects markers that can take direction command
    obj_to_mark = []
    for obj_idx in range(len(string_attribute)):
        valid_obj = True
        for attr in attribute_in_text:
            if string_attribute[obj_idx][attr[0]]!=attr[1]:
                valid_obj = False
        if valid_obj:
            obj_to_mark.append(obj_idx)
    
    #creatign the edited_string_attribute texts and target_attributes
    target_attributes = []
    for i in obj_to_mark:
        for l in new_objects_list:
            edited_string_attribute = copy.deepcopy(string_attribute)
            if direction=='to the right of the':
                edited_string_attribute.insert(i+1,l)
            elif direction=='to the left of the':
                edited_string_attribute.insert(i,l)
            target_attributes.append(edited_str_attr_to_target_attr(edited_string_attribute))
    
    return input_text, target_attributes

In [382]:
def remove_gen(len_obj,string_attribute):
    num_attr = random.randrange(4)  #in addition to shape, shape always taken.
    obj_to_remove = random.randrange(len_obj)
    shape = string_attribute[obj_to_remove][0]
    attribute_in_text = [(0,shape)]
    
    for i in range(num_attr):
        attribute_in_text.insert(0,random.choice([i for i in list(enumerate(string_attribute[obj_to_remove])) if i not in attribute_in_text ]))
        
    input_text = random.choice(remove_commands)
    for i in attribute_in_text:
        input_text+=' '+i[1]
    input_text+='.'
    
    #Get objects that satisfy input text for removal
    obj_to_remove = []
    for obj_idx in range(len(string_attribute)):
        valid_obj = True
        for attr in attribute_in_text:
            if string_attribute[obj_idx][attr[0]]!=attr[1]:
                valid_obj = False
        if valid_obj:
            obj_to_remove.append(obj_idx)
    
    #get combinations of objects to remove (ex. [0],[1],[0,1])
    obj_combinations = sum([list(map(list, combinations(obj_to_remove, i))) for i in range(len(obj_to_remove) + 1)], [])
    obj_combinations = obj_combinations[1:] #first one is empty list
    
    #create an edited string attribute for each combination
    edited_string_attributes = []
    for i in obj_combinations:
        if (len_obj -len(i)) < min_num_obj:
            continue
        edited_string_attribute = copy.deepcopy(string_attribute)
        how_many_popped = 0
        for l in i:
            edited_string_attribute.pop(l-how_many_popped)
            how_many_popped+=1
        edited_string_attributes.append(edited_string_attribute)
    target_attributes = []
    for esa in edited_string_attributes:
        target_attributes.append(edited_str_attr_to_target_attr(esa))
        
    return input_text, target_attributes

In [383]:
def get_target_input( len_obj, string_attribute):
    if len_obj>min_num_obj and len_obj<max_num_obj: 
        choice = random.choice(actions)
    elif len_obj <=min_num_obj:
        choice = random.choice(actions_min)
    else:
        choice = random.choice(actions_max)

    if choice=='move':
        input_text, target_attributes = move_gen(len_obj,string_attribute)
    if choice=='change':
        input_text, target_attributes = change_gen(len_obj,string_attribute)
    if choice=='add':
        input_text, target_attributes = add_gen(len_obj,string_attribute)
    if choice=='remove':
        input_text, target_attributes = remove_gen(len_obj,string_attribute)
        
    return input_text, target_attributes

In [384]:
def main(clvr_attr):
    data_set = []
    for i in clvr_attr['scenes']:
        single_entry = {}
        source_attribute, len_obj, string_attribute = get_source_attribute(i)
        input_text, target_attributes = get_target_input(len_obj,string_attribute)
        single_entry['a'] = source_attribute
        single_entry['ta']= target_attributes
        single_entry['cd']= input_text
        single_entry['im'] = i['image_filename']
        data_set.append(single_entry)
    return data_set

In [386]:
clvr_attr_filename = "CLEVR_val_scenes.json"
with open(clvr_attr_filename) as f:
    clvr_attr = json.load(f)
    
data_set = main(clvr_attr)
with open('CLEVR_val_data_full.json', 'w') as o:
    json.dump(data_set, o)

[['cube', 'green', 'large', 'rubber'], ['sphere', 'gray', 'small', 'rubber'], ['cube', 'blue', 'large', 'metal']]
[['cube', 'brown', 'large', 'rubber'], ['cube', 'brown', 'small', 'rubber'], ['cylinder', 'cyan', 'small', 'rubber'], ['sphere', 'gray', 'small', 'rubber']]
[['cube', 'red', 'small', 'metal'], ['cube', 'gray', 'small', 'rubber'], ['cube', 'purple', 'large', 'metal'], ['sphere', 'blue', 'large', 'metal'], ['cylinder', 'cyan', 'small', 'metal'], ['cylinder', 'gray', 'large', 'metal'], ['sphere', 'red', 'small', 'rubber']]
[['cube', 'red', 'small', 'metal'], ['cube', 'purple', 'large', 'metal'], ['cube', 'gray', 'small', 'rubber'], ['sphere', 'blue', 'large', 'metal'], ['cylinder', 'cyan', 'small', 'metal'], ['cylinder', 'gray', 'large', 'metal'], ['sphere', 'red', 'small', 'rubber']]
[['cube', 'purple', 'large', 'metal'], ['sphere', 'brown', 'large', 'metal'], ['cylinder', 'cyan', 'small', 'rubber'], ['sphere', 'brown', 'large', 'metal'], ['cylinder', 'red', 'large', 'metal']

In [371]:
#cell for testing
source_attribute, len_obj, string_attribute = get_source_attribute(clvr_attr['scenes'][1])

[['cube', 'brown', 'small', 'rubber'], ['sphere', 'red', 'small', 'rubber'], ['sphere', 'green', 'small', 'rubber'], ['cube', 'gray', 'large', 'rubber'], ['sphere', 'gray', 'small', 'metal'], ['cylinder', 'yellow', 'small', 'rubber'], ['cube', 'yellow', 'small', 'rubber'], ['sphere', 'yellow', 'small', 'metal'], ['sphere', 'blue', 'large', 'rubber']]


In [None]:
#you can test things in this cell and the above
input_text, target_attributes = move_gen(len_obj,string_attribute)