In [1]:
""" Creates a spatio-temporal scene graph dictionary 

This file transforms Charades and Action Genome annotations to a spatio-temporal scene graph. 
This graph has inter and intra class connections

Possible exports:
    dictionary based on an array of ids
    IDX = {key=annotation: value: index}
    ENG = {key=index: value: annotation}
"""

' Creates a spatio-temporal scene graph dictionary \n\nThis file transforms Charades and Action Genome annotations to a spatio-temporal scene graph. \nThis graph has inter and intra class connections\n\nPossible exports:\n    dictionary based on an array of ids\n    IDX = {key=annotation: value: index}\n    ENG = {key=index: value: annotation}\n'

In [2]:
import json
import random
import pickle
import numpy as np
import cv2
import time
import pandas as pd

In [3]:
# import ag annotations
f = open('../data/stsgs/object_bbox_and_relationship.pkl','rb')
OBJ_REL = pickle.load(f)
f.close()

In [6]:
""" Cell Summary: 

    functions to parse text files into english and index pairs
"""
def snip(arr):
    for i in range(len(arr)): 
        arr[i] = arr[i][:-1] #delete last char (\n)

def readLines(path):
    f = open(path)
    lines = f.readlines()
    f.close()
    snip(lines)
    return lines

def makeIdxEng(paths):
    """ Creates two reference dictionaries
    
    Args:
        paths: an array of paths to textfiles with rows "idx eng"
    
    Returns:
        idx: {key=english: value=idx}
        eng: {key=idx: value=english}
    """
    idx = {}
    eng = {}
    
    for p in paths:
        lines = readLines(p)
        to_add = {s.split(" ", 1)[1]: s.split(" ", 1)[0] for s in lines} #{'english': idx}
        idx = {**idx, **to_add}
        to_add = {s.split(" ", 1)[0]: s.split(" ", 1)[1] for s in lines} #{'english': idx}
        eng = {**eng, **to_add}
        
    
        
    return idx, eng

IDX, ENG = makeIdxEng(['../data/stsg_generation/object_classes copy.txt',
                         '../data/stsg_generation/object_classes.txt',
                       '../data/stsg_generation/Charades_v1_verbclasses.txt', # note: verb need to be before relationships
                     '../data/stsg_generation/relationship_classes.txt', 
                     '../data/stsg_generation/Charades_v1_classes copy.txt',
                      '../data/stsg_generation/Charades_v1_classes.txt',
                      '../data/stsg_generation/pres_part.txt',
                      '../data/stsg_generation/tricky_verbs.txt',
                     ])

In [12]:
""" Cell Summary: 
    
    General functions.
"""

BANNED = {'other_relationship', IDX['other_relationship'], 
          'other relationship', IDX['other relationship'], 
          'unsure', IDX['unsure'],
         }


def addListIfUnique(lst, item):
    if item not in lst: 
        lst.append(item)
    return lst

In [13]:
""" Cell Summary: 

    Functions for pre-processing Charades Actions
"""


data = pd.read_csv("../data/stsg_generation/Charades_v1_train.csv")
CHARADES = data[['id', 'actions', 'scene']].copy() # Only take these columns 

LENGTHS = data[['id', 'length']].copy()

CHARADES['scene'] = CHARADES.iloc[:,2].str.split('/', 1) # Take first location ann

for row in range(CHARADES['scene'].size): 
    CHARADES.iloc[row, 2] = CHARADES.iloc[row, 2][0]


def getCharadesActions(video_id):
    """ Splits actions of a particular video into (string name, start, end)
    
    Args: 
        video_id: the Charades video identification string
    
    Returns: 
        A list of tuples of the video annotations for the given video. 
        
        [... (char_id, start time, end time) ...] 
    """

    row = CHARADES.index[CHARADES['id'] == video_id][0]
    
    array = CHARADES.iloc[row,1]
    if type(array) != str:
        #print("No annotated actions for row ", row)
        return
    s1 = array.split(";")
    s2 = [i.split() for i in s1]
    return [(i[0], i[1], i[2]) for i in s2]


def getFPS(video_id):
    """ Finds frames per second from mp4 video
    
    Args:
        video_id: the video identification string
        
    Returns:
        The frames per second of the associated video
    """
    
    vid_path = '../Charades_v1_480/%s.mp4' % (video_id)

    # Start default camera
    cap = cv2.VideoCapture(vid_path)
    
    # Check if camera opened successfully 
    if not cap.isOpened():  
        print("Error opening " + video_id) 
        return
        
    fps = cap.get(cv2.CAP_PROP_FPS)  # round(cap.get(cv2.CAP_PROP_FPS))
    return fps
    
    
def getSeconds(frames_list, fps): 
    """ Finds the second at which each frame occured
    
    Args: 
        frames_list: the list of frame_ids in a video
        fps: frames per second
    
    Returns:
        a list of the location of each frame of the video in seconds
    """
    seconds = []
    for frame in frames_list:
        sec = int(frame) / fps
        seconds.append(round(sec,1))
    return seconds


def matchFramesAndActions(actions, frames_list, fps):
    """ Determines which frames occured within each action
    
    Args: 
        actions: a list of tuples (action_id, start time, end time)
        frames_list: the list of frames within the video
        fps: the frames persecond of the video
    """
    
    seconds = getSeconds(frames_list, fps)
    frames_per_action = {}
    
    
    for action in actions: 
        start = float(action[1])
        end = float(action[2])
        f = []
        
        closest = None
        closest_range = 999999
        for i in range(len(seconds)): 
            sec = seconds[i]
            diff = abs(sec - end)
            if diff <= closest_range:
                closest_range = diff
                closest = i
            if sec < start:
                continue
            elif sec <= end :
                f.append(frames_list[i])
                
            else:
                break
        if len(f) == 0:
            if start < seconds[0]:
                f = [frames_list[0]]
            else:
                if end <= seconds[-1]:
                    if closest is None:
                        print("shouldn't be here end = ", end, " seconds[-1] == ", seconds[-1])
                    f = [frames_list[closest]]
                else: # jesus effing christ....... was just replaing things this whole time....... :(:(:(:(
                    f = [frames_list[-1]]
        frames_per_action[action[0]] = f
    
    
    
    
    
    return frames_per_action, seconds


def actionSplit(): 
    """ Associates each Charades action with a subject and a verb
    
    Returns:
        A dictionary mapping actions to their respective subject and verb. For example:
        
        {key='c001': ('o1', 'v009')}
    """
    
    file = open('../data/stsg_generation/Charades_v1_mapping.txt')
    classes = file.readlines()
    file.close()
    snip(classes)
    actionSV = {}
    reversedActionSV = {}
    
    existing = set()
    for s in classes:
        sp = s.split(" ")
        actionSV[sp[0]] = (sp[1], sp[2])
        reversedActionSV[(sp[1], sp[2])] = sp[0]
        
        if s in existing:
            print('    ', s,  'is already in existing')
        existing.add(s)
        #print()
        #print(s)
        ##print("actionSV[", sp[0], '] = ', (sp[1], sp[2]))
        #print("rverseactionSV[", (sp[1], sp[2]), '] = ', sp[0])
        
    return actionSV, reversedActionSV

ACTION_SV, REVERSED_ACTION_SV = actionSplit()

In [14]:
""" Supplement AG objects and relationships: 

    Action subject/verb
    verb relationships
    certain excepions
"""

entailment_file = readLines('../data/stsg_generation/entailing_relationships.txt')
ENTAILMENTS = {}
for e in entailment_file:
    sp = e.split(" ")
    ENTAILMENTS[sp[0]] = sp[1].split("/") 

def vTag(v):
    """ Find the type of vertex given index

    Args:
        v: an string index of a vertex

    Returns:
        string type of vertex
    """
    t = v[0]
    num = int(v[1:])
    if t == 'o' and num <= 36:
        return 'objects'
    elif t == 'c' and num <= 156:
        return 'actions'
    elif t == 'v' and num <= 32:
        return 'verb_relationship'
    elif t == 'r':
        if num <= 3:
            return 'attention_relationship'
        elif num <= 9:
            return 'spatial_relationship'
        elif num <= 26:
            return "contacting_relationship"

    print('invalid tag', v)

    
def getObjInAnnotations(annotations, obj_class):
    for a in annotations: 
        if a['class'] == obj_class:
            return a
    return

def makeAnnotation(obj_class, ref_ann):
    ann = {'class': obj_class,
            'bbox': None,
            'attention_relationship': [],
            'spatial_relationship': [],
            'contacting_relationship': [],
            'verb_relationship': [],
            'visible': True, #TODO: need to assume for this...
            'metadata': ref_ann['metadata']
          }
    return ann

def updateAGAnnotations(video):
    for f_id in video:
        for ann in video[f_id]:
            ann['verb_relationship'] = []
            if ann['attention_relationship'] == None:
                ann['attention_relationship'] = []
            if ann['contacting_relationship'] == None:
                ann['contacting_relationship'] = []
            if ann['spatial_relationship'] == None:
                ann['spatial_relationship'] = []
    
    return video


def inRange(sec, rang):
    sec = float(sec)
    sp = rang.split('-')
    
    low = float(sp[0])
    hi = float(sp[1])
    
    answer = sec >= low and sec <= hi
    
    return answer


def addActionSVNoFrame2Sec(video, actions, frames_list, fpa, secs, switch_by_sec):
    
    
    secs_to_frames = {}
    for idx in range(len(secs)):
        secs_to_frames[secs[idx]] = frames_list[idx]
        
    
    
    switch_by_frame = {}
    for sec in secs_to_frames:
        frame = secs_to_frames[sec]
        switch_by_frame[frame] = {}
        
        for rang in switch_by_sec:
            if inRange(sec, rang):
                switch_by_frame[frame] = {**switch_by_frame[frame], **switch_by_sec[rang]} 
                
    video = objDuplicateFixes(video, switch_by_frame)
    
    
    for action in actions: #TODO: do we need to take care of when there's multple actions of same type?
        obj, verb = ACTION_SV[action[0]]
        if obj == IDX["None"]:
            continue
        for frame_id in fpa[action[0]]:
            obj_ann = getObjInAnnotations(video[frame_id], ENG[obj]) # also switched here
            if obj_ann == None:
                obj_ann = makeAnnotation(ENG[obj], video[frame_id][0]) # NOTE TO SELF: i think this was the idx issue - but what changed from before?
                video[frame_id].append(obj_ann)
                # add annotation
            verb_type = vTag(verb)
            
            obj_ann[verb_type] = addListIfUnique(obj_ann[verb_type], ENG[verb]) 
            
   # print('by sec', switch_by_sec)
    #print("sec to frame", secs_to_frames)
    #print("by frame", switch_by_frame)  
    return video, secs_to_frames, switch_by_frame
    


def addActionSV(video, actions, frames_list, fpa, secs, switch_by_sec):
    
    
    secs_to_frames = {}
    frames_to_sec = {}
    for idx in range(len(secs)):
        s = secs[idx]
        f = frames_list[idx]
        if s not in secs_to_frames:
            secs_to_frames[s] = []
            
        secs_to_frames[s].append(f)
        
        frames_to_sec[f] = s
        
    
    
    switch_by_frame = {}
    for frame in frames_to_sec:
        sec = frames_to_sec[frame]
        switch_by_frame[frame] = {}
        
        for rang in switch_by_sec:
            if inRange(sec, rang):
                switch_by_frame[frame] = {**switch_by_frame[frame], **switch_by_sec[rang]} 
                
    video = objDuplicateFixes(video, switch_by_frame)
    
    
    for action in actions: #TODO: do we need to take care of when there's multple actions of same type?
        obj, verb = ACTION_SV[action[0]]
        if obj == IDX["None"]:
            continue
        for frame_id in fpa[action[0]]:
            obj_ann = getObjInAnnotations(video[frame_id], ENG[obj]) # also switched here
            if obj_ann == None:
                obj_ann = makeAnnotation(ENG[obj], video[frame_id][0]) # NOTE TO SELF: i think this was the idx issue - but what changed from before?
                video[frame_id].append(obj_ann)
                # add annotation
            verb_type = vTag(verb)
            
            obj_ann[verb_type] = addListIfUnique(obj_ann[verb_type], ENG[verb]) 
            
   # print('by sec', switch_by_sec)
    #print("sec to frame", secs_to_frames)
    #print("by frame", switch_by_frame)  
    return video, secs_to_frames, frames_to_sec, switch_by_frame
    
    
def newVideoAnn(new_class, ann):
    newAnn = ann.copy()
    newAnn['class'] = new_class
    
    return newAnn
    
    
def addSandwichEntailments(video):
    for f_id in video:
        frame = video[f_id]
        
        for ann in frame:
            if ann['class'] != 'sandwich':
                continue
            frame.append(newVideoAnn('food', ann))
            
    return video
    
def addEntailments(video, rel_tags):
    # first, get ENTAILMENTS dict from above
    # for each frame
    # then, go thorugh all existing annotations, and collect a set of things to add
    # then, go through that set and add them all to respective relationships, if don't already exists
    
    for f_id in video:
        frame = video[f_id]
        
        for ann in frame:
            entailments = set()
            for r_idx in range(len(rel_tags)):
                rel_tag = rel_tags[r_idx]
                #print("reltag", rel_tag)
                for rel in ann[rel_tag]:
                    #print("    rel", rel, IDX[rel])
                    if IDX[rel] in ENTAILMENTS: # TODO had to change from being idx
                        #print("        in entailemnts", ENTAILMENTS[IDX[rel]])
                        for entailment in ENTAILMENTS[IDX[rel]]: # also changed here
                            #if entailment == 'touching' and 'not_contacting' in ann[rel_tag]:
                            #    break
                            entailments.add(entailment)
                            
            #print()
            #print()
            #print("before", ann)
            #print()
            #print('add entaiilemnts: ', entailments)
            #print()
            for entailment in entailments:
                verb_type = vTag(IDX[entailment])
                ann[verb_type] = addListIfUnique(ann[verb_type], entailment)
                #print('adding')
            #print("after", ann)
            
                
    # If there is a sandwich, there is also food. Have same rels
    
    #video = addSandwichEntailments(video)
                
    return video

In [15]:


blacklisted = ["sandwich", "bag", "closet", "door", "paper", "broom", "cup", "pillow", "dish", "laptop", 
               "shelf", "window", "picture", "table", "mirror", "floor", "shoe"]

all_above = ["bed", "sofa", "chair", "sofa/couch"]
all_beneath = ["doorway"]

threshold_to_keep = 0.6

def isSpatial(obj):
    if obj['spatial_relationship'] is None:
        return 0
    else:
        print('Is annotation', obj['spatial_relationship'])
        return 1

def deleteBelowThreshold(video):
    print('hello')
    objs_spat = []
    spatial = 0
    total = 0
    
    for frame_id in video:
        for ann in video[frame_id]:
            objs_spat.append({
                'spatial': isSpatial(ann),
                'frame': frame_id,
                'class': ann['class'],
                'total': 1
            })
            
            spatial += isSpatial(ann)
            total += 1
            
            #print('Total', ann['spatial_relationship'])
    if spatial / total >= threshold_to_keep:
        print("IS Spatial")
        return video
    
    for frame_id in video:
        for ann in video[frame_id]:
            ann['spatial_relationship'] = []
            
    print("NOt spatial")
    return video
        
def adjustSpatialAnn(video):
    
    #video = deleteBelowThreshold(video)
    
    for frame_id in video:
        for ann in video[frame_id]:
            spatials = ann['spatial_relationship']
            obj = ann['class']
            
            if "in_front_of" in spatials and "behind" in spatials:
                spatials.remove("in_front_of")
                spatials.remove("behind")
            
            elif "in_front_of" in spatials:
                spatials.remove("in_front_of")
                spatials.append("behind")
            
            elif "behind" in spatials:
                spatials.remove("behind")
                spatials.append("in_front_of")
            
            
            if "beneath" in spatials and "above" in spatials:
                if obj in blacklisted:
                    spatials.remove("beneath")
                    spatials.remove("above")
                elif obj in all_above:
                    spatials.remove('beneath')
                elif obj in all_beneath:
                    spatials.remove("above")
                    
                    
                    
                #print("AFTER", obj, frame_id, spatials)
            
            elif obj in blacklisted:
                if "beneath" in spatials:
                    spatials.remove("beneath")
                elif "above" in spatials:
                    spatials.remove("above")
                    
            elif obj in all_above:
                if "beneath" in spatials:
                    spatials.remove('beneath')
                    spatials.append('above')
                    
            elif obj in all_beneath:
                if "above" in spatials:
                    spatials.remove('above')
                    spatials.append('beneath')
                    
            else:
                if "above" in spatials: 
                    spatials.remove('above')
                    spatials.append('beneath')
                elif "beneath" in spatials:
                    spatials.remove('beneath')
                    spatials.append('above')

    return video


In [17]:
def engList(lst):
    newList = []
    for l in lst:
        eng = ENG[l]
        newList.append(eng)
        
    return newList

#for v_id in stsgs:
#    word = IDX['beneath'] 
#    spatials = stsgs[v_id]['spatial']
#    if word in spatials:
#        print(v_id)
#        names = engList(spatials[word]['names'])
#        print(" ", engList(spatials[word]['names']))

In [18]:
act_seq_priors_data = readLines('../data/stsg_generation/action_seq_priors.txt')
print(len(act_seq_priors_data))

act_seq_priors = {}

for seq in act_seq_priors_data:
    before, after = seq.split(" ")
    if before not in act_seq_priors:
        act_seq_priors[before] = []
    act_seq_priors[before].append(after)


151


In [19]:
same_obj = {
    'person': [], 
    'bag': [], 
    'bed': ['chair', 'sofa'], 
    'blanket': ['clothes', 'towel'], 
    'book': ['paper', 'picture'], 
    'box': [], 
    'broom': [], 
    'chair': ['sofa', 'bed'], 
    'closet': ['door', 'refrigerator'], 
    'clothes': ['blanket', 'towel'], 
    'cup': ['dish'], 
    'dish': ['cup'], 
    'door': ['refrigerator', 'closet'], 
    'doorknob': [], 
    'doorway': [], 
    'floor': [], 
    'food': ['sandwich', 'groceries', 'medicine'], 
    'groceries': ['food', 'sandwich', 'medicine'], 
    'laptop': [], 
    'light': [], 
    'medicine': ['food', 'groceries', 'sandwich'], 
    'mirror': [], 
    'paper': ['book', 'picture'], 
    'phone': [], 
    'picture': ['paper', 'book'], 
    'pillow': [], 
    'refrigerator': ['door', 'closet'], 
    'sandwich': ['food', 'groceries', 'medicine'], 
    'shelf': ['table'], 
    'shoe': [], 
    'sofa': ['bed', 'chair'], 
    'table': ['shelf'], 
    'television': [], 
    'towel': ['clothes', 'blanket'], 
    'vacuum': [], 
    'window': [], 
    'hands': [], 
    'hair': [], 
    'None': [], 
}


# SOFA bed chair sofa
# TOWEL blanket clothes towel
# PAPER book paper picture
# DISH cup dish
# CLOSET door refrigerator closet
# FOOD food sandwich groceries medicine
# SHELF shelf table

priority_obj = {
    # note all alphabetical
    # note - will have to translate between eng and idx
    'bed/chair/sofa': 'sofa',
    'bed/chair': 'bed',
    'chair/sofa': 'sofa',
    'bed/sofa': 'sofa',
    'blanket/clothes/towel': 'towel',
    'blanket/clothes': 'clothes',
    'blanket/towel': 'towel', 
    'clothes/towel': 'towel', 
    'book/paper/picture': 'paper',
    'book/paper': 'paper', # NOTE: CHANGE DEF'book', 
    'paper/picture': 'picture', 
    'book/picture': 'book', 
    'cup/dish': 'dish', # NOTE: CHANGE DEF'cup', 
    'closet/door/refrigerator': 'closet', 
    'closet/door': 'closet',
    'door/refrigerator': 'refrigerator',
    'closet/refrigerator': 'refrigerator', 
    'food/groceries/medicine/sandwich': 'food',
    'food/groceries/medicine': 'food', 
    'food/groceries/sandwich': 'food', 
    'food/medicine/sandwich': 'food', 
    'groceries/medicine/sandwich': 'groceries', 
    'food/groceries': 'groceries', 
    'food/medicine': 'medicine', 
    'food/sandwich': 'sandwich',
    'groceries/medicine': 'groceries',
    'groceries/sandwich': 'sandwich',
    'medicine/sandwich': 'sandwich', 
    'shelf/table': 'shelf'
}

#Sofa —> bed
#Towel —> blankets
#Dish —> cup
#Sandwich —> food
#Groveries —> food
#Paper —> book
#Shelf —> table
total_replacement = {
    'sofa': 'bed',
    'sofa/couch': 'bed',
    'towel': 'blanket',
    'cup': 'dish',
    'sandwich': 'food',
    'groceries': 'food',
    'book': 'paper',
    'shelf': 'table'
}

In [20]:


def setUp(key, dic):
    if key not in dic: 
        dic[key] = []
        
    return dic


def timeIOU(idx1, idx2, actions):
    a1, s1, _, _ = actions[idx1]
    a2, s2, _, _ = actions[idx2]
    
    banned = ['c156']
    if a1 in banned or a2 in banned:
        return False, s1, s2
    
    
    # This is intended so only conmbine potential overlaps
    sp1 = a1.split('/')
    sp2 = a2.split('/')
    
    if len(sp1) == 1:
        a1 = ENG[ACTION_SV[sp1[0]][0]]
    else:
        a1 = sp1[0]
        
    if len(sp2) == 1:
        a2 = ENG[ACTION_SV[sp2[0]][0]]
    else:
        a2 = sp2[0]
        
    if a2 not in same_obj[a1]:
        return False, s1, s2
    
        
        
    if s1 < s2: 
        before, s1, e1, _ = actions[idx1]
        after, s2, e2, _ = actions[idx2]
    else:
        before, s1, e1, _ = actions[idx2]
        after, s2, e2, _ = actions[idx1]

        
    
    if s1 == s2:
        if e1 > e2:
            return True, s1, e1
        else: 
            return True, s1, e2
        
    if e1 < s2:
        return False, s1, e2

    if e1 >= e2:
        return True, s1, e1
    
    # at this point we know is s1, s2, e1, e2
    
    intersection = e1 - s2
    union = e2 - s1
    
    iou = intersection / union
    
    same = iou >= .25
    
    return same, s1, e2

def makeObjectsConsistent(actions):
    # so I only want to do the ones where theyoverlap. 
    # I think put each action into a dic by verb makes sense. 
    by_verb = {}
    by_obj = {}
    
    for i in range(len(actions)):
        act, start, finish = actions[i]
        start = float(start)
        finish = float(finish)
        
        obj, verb = ACTION_SV[act]
        
        by_verb = setUp(verb, by_verb)
        by_obj = setUp(obj, by_obj)
        
        by_verb[verb].append((act, start, finish, [i]))
        by_obj[obj].append((act, start, finish, [i]))
        
    #Maybe go by time. keep combining until not possible to combine anymore?
    
    for verb in by_verb:
        potential_actions = by_verb[verb]
        
        num_combined = 1
        while num_combined > 0: #potential actions cannot be combined anymore? Maybe a var that's only true if you go thorugh the whole thing and dont get it? or count, so while count == 0
            num_combined = 0
            
            to_merge = [] # or this keeps indices? then go thorugh and find all indices?
            for i in range(len(potential_actions)):
                # maybe merge and then break? b/c indices are going to get weird
                for j in range(len(potential_actions)):
                    if j <= i:
                        continue
                    iou, start, end = timeIOU(i, j, potential_actions)
                    if iou:
                        # j is higher than i so first ow indices mess up
                        eng1 = potential_actions[i][0]
                        eng2 = potential_actions[j][0]
                        idxs1 = potential_actions[i][3]
                        idxs2 = potential_actions[j][3]
                        combo_idxs = idxs1 + idxs2
                        items = set()
                        for k in [eng1, eng2]:
                            if '/' in k:
                                sp = k.split("/")
                                for l in sp:
                                    items.add(l)
                            else:
                                items.add(ENG[ACTION_SV[k][0]])
                        items = sorted(list(items))
                        
                        combo_obj = ''
                        for k in items:
                            if combo_obj == '':
                                combo_obj = k
                            else:
                                combo_obj = combo_obj + '/' + k
                        
                        del potential_actions[j]
                        del potential_actions[i]
                        potential_actions.append((combo_obj, start, end, combo_idxs))
                        
                        num_combined = 1
                        break
                if num_combined > 1:
                    break
        
    idx_to_delete = set()
    to_add = []
    switch_by_sec = {}
    
    for verb in by_verb:
        for action in by_verb[verb]:
            objs, start, finish, idxs = action
            
            sp = objs.split("/")
            if len(sp) == 1:
                continue
                
            
            subj = priority_obj[objs]
            
            for i in idxs:
                idx_to_delete.add(i)
                
            new_act = REVERSED_ACTION_SV[(IDX[subj], verb)]
            to_add.append((new_act, str(start), str(finish)))
            key = "%s-%s" % (start, finish)
            if key not in switch_by_sec:
                switch_by_sec[key] = {}
            
            for obj in sp: 
                if obj == subj: 
                    continue
                switch_by_sec[key][obj] = subj
            
            
    #print("switch by sec", switch_by_sec)
    #print("actions", actions)
    #print("to delete", idx_to_delete)
    #print('to add', to_add)
    new_actions = []
            
    for i in range(len(actions)):
        if i in idx_to_delete:
            continue
            
        new_actions.append(actions[i])
    
    for i in to_add:
        new_actions.append(i)
        
    return new_actions, switch_by_sec
                   
                        
                        
            
        
    
    # while doing this, keep track of how many action annotations per object
    # then have a dic where ther's all diff rows (sofa/bed/chair is one) it corresponds to. then for every pair in the one by verb, 
    # if they could possibly be in the same row, compare them to see if IOU more than 50% overlap. if it does, keep track of that pair
    # also keep track of list of all ones that could possibly be replaced.
    # for each group that will be choosing a replacement, make the replacement either the most used one, or in case of tie, the most generic one
    # go through all to change and do that, combining the times and making the action the replacement one (will have to find action by value? in actionSV)
    # dont forget to turn them back into strings

In [21]:
ORDERED_FRAMES = readLines('../data/stsg_generation/frame_list.txt')


concurrent = {
    'holding': ['tidying'],# 'talking'],
    'tidying': ['holding'], 
    #'talking': ['holding'],
    'throwing': ['putting'],
    'putting': ['throwing'],
    'watching': ['playing on', 'working on'],
    'playing on': ['watching', 'working on'],
    'working on': ['playing on', 'watching'],
}

# For these - if's ok if the key surrounds the value
surrounding = {
    'holding': ['tidying', 'talking'],
    'watching': ['playing on', 'working on'],
}
# combine same if overlapping twice? or do differently with each round?


# blacklist grasphing doorknob with before/after opening?
# potential change for future is certain things that must co-occur. like washing and holding a cup


    


def overlapInOrder(before_idx, after_idx, actions):
    before, before_start, before_end = actions[before_idx]
    after, after_start, after_end = actions[after_idx]
    
    #print("testing (%s, %s, %s) before (%s, %s, %s)" % (before, before_start, before_end, after, after_start, after_end) )
    
    before_start = float(before_start)
    before_end = float(before_end)
    after_start = float(after_start)
    after_end = float(after_end)
    
    if after_start < before_start: 
        if after_end <= before_start:
            return False, 0, 0
        
        num_after_after = abs(after_end - before_end)
        num_after_before = abs(before_start - after_start)
        
        if num_after_before >= num_after_after:
            return False, 0, 0
        
        return True, before_end, after_start
        
        
    return (before_end >= after_start and after_end >= before_start), before_end, after_start


def formatTime(time):
    if type(time) != float:
        print("INCORRECT TYPE")
        return
    return str(round(time, 2))


def adjustSegmentation(before_idx, after_idx, before_end, after_start, actions):
    before, before_start, before_end = actions[before_idx]
    after, after_start, after_end = actions[after_idx]
    
    margin = .01
    
    before_start = float(before_start)
    before_end = float(before_end)
    after_start = float(after_start)
    after_end = float(after_end)
    
    diff = before_end - after_start
    
    if diff < 0:
        print("We have an issue!")
        
    change = diff / 2
    #print(before_end, after_start, change)
    before_len = before_end - before_start
    after_len = after_end - after_start
    
    if before_len <= 1 and after_len <= 1:
        return actions
    
    if before_len <= 1:
        # take all difference out of afterlen
        actions[after_idx] = (after, formatTime(after_start + diff + margin), formatTime(after_end))
        return actions
    elif after_len <= 1:
        #take all difference out of before_len
        actions[before_idx] = (before, formatTime(before_start), formatTime(before_end - diff - margin))
        return actions
    
    if diff >= before_len + after_len:
        return
    
    if change >= (before_len * .9) or change >= (after_len * .9):
        if change >= (before_len * .9):
            while change >= (before_len * .9):
                change = change * .75
            if margin + diff - change >= (after_len * .9):
                return actions
            actions[before_idx] = (before, formatTime(before_start), formatTime(before_end - change - margin))
            actions[after_idx] = (after, formatTime(after_start + (diff - change + margin)), formatTime(after_end))
                
        
        if change >= (after_len * .9):
            while change >= (after_len * .9):
                change = change * .75
            if margin + diff - change >= (before_len * .9):
                return actions
            actions[before_idx] = (before, formatTime(before_start), formatTime(before_end - diff + change - margin))
            actions[after_idx] = (after, formatTime(after_start + change + margin), formatTime(after_end))
        
        return actions
    actions[before_idx] = (before, formatTime(before_start), formatTime(before_end - change - margin))
    actions[after_idx] = (after, formatTime(after_start + change + margin), formatTime(after_end))
    
    return actions


def editCharadesSegmentations(actions):
    all_actions = []
    for act, _, _ in actions:
        all_actions.append(act)
    
    for act_idx in range(len(actions)):
        act, start, finish = actions[act_idx]
        if act in act_seq_priors:
            priors = act_seq_priors[act]
            
            for prior_idx_in_priors in range(len(priors)):
                prior = priors[prior_idx_in_priors]
                
                for prior_idx in range(len(all_actions)):
                    if prior == all_actions[prior_idx]:
                        overlapping, before_end, after_start = overlapInOrder(act_idx, prior_idx, actions)
                        if overlapping:
                            actions = adjustSegmentation(act_idx, prior_idx, before_end, after_start, actions)
        
    return actions


def combineVagueAndSpecific(vague_actions, relevant_verb, actions): 
    putting_vague = []
    putting = []
    
    for i in range(len(actions)):
        act = actions[i][0]
        action_verb =IDX[ENG[ACTION_SV[act][1]]]
        if act in vague_actions:
            putting_vague.append(i)
        elif action_verb == relevant_verb:
            putting.append(i)
            
    to_del = []
    to_add = []
    for i in putting_vague:
        act_vague, start_vague, finish_vague = actions[i]
        start_vague = float(start_vague)
        finish_vague = float(finish_vague)
        len_vague = finish_vague - start_vague
        
        for j in putting:
            act_put, start_put, finish_put = actions[j]
            start_put = float(start_put)
            finish_put = float(finish_put)
            
            if start_vague <= start_put:
                if finish_vague <= start_put:
                    continue
                else:
                    overlap = finish_vague - start_put
                        
            if start_put <= start_vague:
                if finish_put <= start_vague:
                    continue
                        
                else:
                    overlap = finish_put - start_vague
                        
            if overlap / len_vague >= .3:
                to_del.append(i)
                to_add.append((act_vague, str(start_put), str(finish_put)))
                break #(so chooses 1)
             
    new_actions = []
    
    for i in range(len(actions)):
        if i not in to_del:
            new_actions.append(actions[i])
            
        #else:
        #    print('not adding ', actions[i])
        
    for i in to_add:
        new_actions.append(i)
        
    return new_actions
            

    c
    
    
    
def syncronizeSomething(actions):
    actions = combineVagueAndSpecific(['c009', 'c081'], IDX['putting'], actions) # putting something on a table/shelf
    actions = combineVagueAndSpecific(['c044'], IDX['taking'], actions) # taking something from a box
    actions = combineVagueAndSpecific(['c126'], IDX['throwing'], actions) # throwing something on the floor
    actions = combineVagueAndSpecific(['c127'], IDX['tidying'], actions) # tidying something on the floor
    
    
        
    return actions
            
                

def combineSameIfOverlapping(actions):
    print()
    print("COMBINE SAME OVERLAPPING")
    # note - this doesnt work as well for 3.... but not worth at this point
    to_del = []
    to_add = []
        
    
        
    for i in range(len(actions)):
        act1, start1, after1 = actions[i]
        start1 = float(start1)
        after1 = float(after1)
        if i in to_del:
            continue
            
        for j in range(len(actions)):
            act2, start2, after2 = actions[j]
            start2 = float(start2)
            after2 = float(after2)
            if j == i or j in to_del:
                continue
                
            v1 = ENG[ACTION_SV[act1][1]]
            v2 = ENG[ACTION_SV[act2][1]]
            
            
            o1 = ENG[ACTION_SV[act1][0]]
            o2 = ENG[ACTION_SV[act2][0]]
            
            if act1 != act2:
                if v1 not in concurrent: 
                    continue
                if v2 not in concurrent[v1]:
                    continue
                if o1 != o2:
                    continue
            
            
            if start1 <= start2: 
                if after1 <= start2:
                    continue
                    
                    
                if after1 >= after2:
                    if v1 not in surrounding:
                        to_del.append(j)
                        to_add.append((act2, str(start1), str(after1)))
                        # ADD ONE THAT HAS THE WHOLE TIME?
                        continue
                    if v2 not in surrounding[v1]:
                        to_del.append(j)
                        to_add.append((act2, str(start1), str(after1)))
                        # ADD ONE THAT HAS THE WHOLE TIME?
                        continue
                    if v2 in surrounding[v1]:
                        continue
                # so here we know its s1, s2, a1, a2
                to_del.append(j)
                to_del.append(i)
                #to_add.append((act1, str(start1), str(after2)))
                
                if act1 == act2:
                    to_add.append((act1, str(start1), str(after2)))
                else: 
                    to_add.append((act1, str(start1), str(after2)))
                    to_add.append((act2, str(start1), str(after2)))
            
            if start2 < start1: 
                if after2 <= start1:
                    continue
                    
                if after2 >= after1:
                    if v2 not in surrounding:
                        to_del.append(i) # TODO: which do you need to delete and which do you need to append?
                        to_add.append((act1, str(start2), str(after2)))
                        continue
                    if v1 not in surrounding[v2]:
                        to_del.append(i)
                        to_add.append((act1, str(start2), str(after2)))
                        continue
                    
                    if v1 in surrounding[v2]:
                        continue
                # so here we know its s1, s2, a1, a2
                to_del.append(j)
                to_del.append(i)
                if act1 == act2:
                    to_add.append((act1, str(start2), str(after1)))
                else: 
                    to_add.append((act1, str(start2), str(after1)))
                    to_add.append((act2, str(start2), str(after1)))
                
    new_actions = []
    print('to_del', to_del)
    print('to_add', to_add)
    
    for i in range(len(actions)):
        if i not in to_del:
            new_actions.append(actions[i])
        else:
            print('not adding: ', actions[i])
        
    for i in to_add:
        new_actions.append(i)
        
    return new_actions
            
    
    

def combineSameIfOverlapping2(actions):
    #print()
    #print("COMBINE SAME OVERLAPPING2")
    # note - this doesnt work as well for 3.... but not worth at this point
    to_del = []
    to_add = []
        
    
        
    for i in range(len(actions)):
        act1, start1, after1 = actions[i]
        start1 = float(start1)
        after1 = float(after1)
        if i in to_del:
            continue
            
        for j in range(len(actions)):
            act2, start2, after2 = actions[j]
            start2 = float(start2)
            after2 = float(after2)
            if j == i or j in to_del:
                continue
                
            min_start = min(start1, start2)
            max_after = max(after1, after2)
                
            v1 = ENG[ACTION_SV[act1][1]]
            v2 = ENG[ACTION_SV[act2][1]]
            
            
            o1 = ENG[ACTION_SV[act1][0]]
            o2 = ENG[ACTION_SV[act2][0]]
            
            if act1 != act2:
                if v1 not in concurrent: 
                    continue
                if v2 not in concurrent[v1]:
                    continue
                if o1 != o2:
                    continue
            
            
            if start1 <= start2: 
                if after1 <= start2:
                    continue
                    
            if start2 < start1: 
                if after2 <= start1:
                    continue
            ### Rest of loop was added
            to_del.append(i)
            to_del.append(j)
            to_add.append((act2, str(min_start), str(max_after)))
                
    new_actions = []
   # print('to_del', to_del)
    #print('to_add', to_add)
    
    for i in range(len(actions)):
        if i not in to_del:
            new_actions.append(actions[i])
        #else:
        #    print('not adding: ', actions[i])
        
    for i in to_add:
        new_actions.append(i)
        
    return new_actions


def replaceUniversalVague(actions):
    # replace these action all over the place
    
    # awakening somewhere = awakening in bed
    # cooking something --> making some food
    # eating something = eating some food (below)
    #actions = combineVagueAndSpecific(['c156'], IDX['eating'], actions) # eating something
    
    for i in range(len(actions)):
        a, s, e = actions[i]
        if a == IDX['awakening somewhere']:
            actions[i] = (IDX['awakening in bed'], s, e)
        if a == IDX['cooking something']:
            actions[i] = (IDX['making some food'], s, e)
            
        if a == IDX['eating something']:
            actions[i] = (IDX['eating some food'], s, e)
    
    return actions

In [22]:
assumedLengths = {
    'taking': 3,
    'holding': 10, 
    'putting down': 4,
}


def act(s, v):
    if v == 'v008':
        v = 'r15'
    
    if (s, v) not in REVERSED_ACTION_SV:
        #print(s, v, 'not in reversed act sv')
        return None
    
    return REVERSED_ACTION_SV[(s, v)]


def addActionSequencingEntailment(first, second, actions, acts, v_id):
    
    if not first or not second:
        return actions, acts
    
    # if both or neither in it, return
    if not (first in acts) ^ (second in acts):
        return actions, acts
    
    if first in acts:
        # this implies second is not. dang need to know what the last possible start time ss
        
        # need the v id 
        
        l = LENGTHS.loc[LENGTHS['id'] == v_id]['length'].values[0]
        
        s1, e1 = acts[first][0]
        
        _, v = ACTION_SV[second]
        
        new_e = e1 + assumedLengths[ENG[v]]
        
        if new_e > l:
            return actions, acts
        
        actions.append((second, str(e1), str(new_e)))
        acts[second] = [(e1, new_e)]
        
        return actions, acts
        
    s2, e2 = acts[second][0]
    
    _, v = ACTION_SV[first]
    
    new_s = s2 - assumedLengths[ENG[v]]
    
    if new_s < 0:
        return actions, acts
    
    actions.append((first, str(new_s), str(s2)))
    acts[first] = [(new_s, s2)]
    
    return actions, acts
        
    
        
        
    # if only taking.... what if hold till end? I think so.... 
        
        

def assumedSequences(actions, v_id):
    #print("call assumed", v_id)
    
    # NOT YET Going standing to going from standing to sitting → sitting → standing up
    # Taking → holding → putting
    
    # since most things happen once, honestly jsut look to see if exists. its messy enough allready
    # need to do for all
    
    # split action into s/v
    # if s == blanket, skip
    # if verb is taking and no holding, add holding (if not at end)
    
    
    # make actions also into a mapping from action to instances
    
    # FIRST get a list of objects and then do this per object
    acts = {}
    
    for a, s, e in actions:
        if a not in acts:
            acts[a] = []
            
        acts[a].append((float(s), float(e)))
    
    t = IDX['taking']
    h = IDX['holding']
    p = IDX['putting']
    
    
    objs = set()
    for i in range(len(actions)):
        # action, start, end
        a, s, e = actions[i]
        
        # object, verb
        o, v = ACTION_SV[a]
        
        if o == 'blanket':
            continue
            
        if act(o, t):
            objs.add(o)
    
    #print("objects", objs)
    
    #if 'o11' in objs:
    #    print()
    #    print(v_id, ' before: ')
    #    for a, s, e in actions:
    #        o, v = ACTION_SV[a]
            
    #        if o == 'o11':
    #            print(ENG[a], s, e)
    
    
    for o in objs:
        #print("object: ", o)
        # taking and putting but no holding --> holding
        ot = act(o, t)
        oh = act(o, h)
        op = act(o, p)
        
        #print(ot, oh, op)
        
        
        
        if ot in acts and oh not in acts and op in acts:
            _, s = acts[ot][0]
            e, _ = acts[op][0]
            
            if s >= e:
                print("trying to put 'holding' between overlapping annotations")
                continue
            
            actions.append((oh, s, e))
            continue
        
        
        # taking --> holding
        # taking <-- holding
        actions, acts = addActionSequencingEntailment(ot, oh, actions, acts, v_id)
        
        
        # holding --> putting
        # holding <-- putting
        actions, acts = addActionSequencingEntailment(oh, op, actions, acts, v_id)
        
        
        # taking <-- holding
        actions, acts = addActionSequencingEntailment(ot, oh, actions, acts, v_id)
    
    
    
    #if 'o11' in objs:
    #    print('after')
        #print(v_id, ' after: ', actions)
    #    for a, s, e in actions:
    #        o, v = ACTION_SV[a]
            
    #        if o == 'o11':
    #            print(ENG[a], s, e)
    
    
    return actions

In [23]:
# doing enveloping here. 

# cant make them mess up the previous steps....

# OR make the eneloped one smaller instead of the eveloping bigger

envel = {
    IDX['sitting at a table']: [IDX['working at a table']],
    IDX['watching a picture']: [IDX['laughing at a picture']],
    IDX['watching television']: [IDX['laughing at television']],
    IDX['watching something in a mirror']: [IDX['smiling in a mirror']],
    IDX['watching a book']: [IDX['smiling at a book']],
    IDX['watching a laptop or something on a laptop']: [IDX['playing on a laptop']],
    IDX['holding a cup of something']: [IDX['drinking from a cup']],
    IDX['holding a phone']: [IDX['playing with a phone'], IDX['talking on a phone'], IDX['taking a picture of something']],
    IDX['holding some food']: [IDX['eating some food']],
    IDX['holding a broom']: [IDX['tidying up with a broom']],
    IDX['holding a dish']: [IDX['washing a dish']],
    IDX['holding some medicine']: [IDX['consuming some medicine']],
}


def envelopActions(actions):
    # make acts
    
    acts = {}
    
    for a, s, e in actions:
        if a not in acts:
            acts[a] = []
            
        acts[a].append((float(s), float(e)))
    
    
    
    # for each pair see if both exist
    for large in envel:
        for small in envel[large]:
            if small not in acts:
                continue
                
            ss, es = acts[small][0]
            
                
            if large not in acts:
                # add in large during that time
                #print(large, ' not in actions so add')
                acts[large] = [(ss, es)]
                actions.append((large, str(ss), str(es)))
                continue
                
            # so now both in acts
            
            
            sl, el = acts[large][0]
            
            # if small within large, continue
            if es <= el and ss >= sl:
                #print("small is within large: es <= el", es <= el, "sl >= ss", ss >= sl)
                continue
                
                
            # if not overlap, then add one durring other time
            if (el < ss) or (es < sl):
                #print('not overlapping!')
                acts[large].append((ss, es))
                actions.append((large, str(ss), str(es)))
                continue
            
            # if do overlap, shrink small
            if es > el:
                #print("short ending needs chopping")
                # change the short actions ending to be same
                acts[small][0] = (ss, el)
                es = el
                # change in actions
                
                for i in range(len(actions)):
                    a, s, e = actions[i]
                    
                    s = float(s)
                    e = float(e)
                    if a != small:
                        continue
                        
                    #print(ss, s)
                    if ss == s:
                        #print('found right action', actions[i])
                        actions[i] = (a, str(ss), str(el))
                        break
                
            if sl >= ss:
                #print("short start needs chopping")
                # change short action start to be the same as large
                acts[small][0] = (sl, es)
                ss = sl
                
                for i in range(len(actions)):
                    a, s, e = actions[i]
                    
                    s = float(s)
                    e = float(e)
                    
                    if a != small:
                        continue
                        
                    #print(es, e)
                    if es == e:
                        #print('found right action', actions[i])
                        actions[i] = (a, str(sl), str(es))
                        break
        
   # print(acts)
    return actions
    # if smaller one exists, and other doesnt, make the other one exist. 
    
    # if both already exist and smaller one spilling cut it off!
    
    
    # unless not overlappign!

    
##############################
# needs to be a separate one for verbs not actions 

In [24]:
def removeFloorSpatial(objs):
    
    for obj in objs:
        if obj['class'] != 'floor':
            continue
            
        obj['spatial_relationship'] = []
        
        
    return objs

In [25]:

    
    
    
def getVideoAnnotations(video_id): 
    """ Associates all annotations for a video id
    
    Args: 
        video_id: the video identification string
    
    Returns: 
        video: a dictionary of action genome annotations {key=frame: val=ann}
        frames: a list of frames annotated in action genome
        actions: tuples of (charades_action, start time, end time)
    """
    video = {}
    frames = []
    
    actions = getCharadesActions(video_id)
    if actions is None:
        return None, None, None, None
    #print()
    
    #print('A', actions)
    
    
    ##### replace dish --> cup etc
    actions = totalReplacementOfObjectsInActions(actions)
    
    
    #print()
    #print('B', actions)
    
    
    # actually - i think this might be making stuff concurrent, not deleting? 
    ##### if 2 objs the same and overlapping, combine
    actions = combineSameIfOverlapping2(actions)
    
    
    #print()
    #print('C', actions)
    
    ##### if double annotated and have certain pairs of objects, assume they are one 
    actions, switch_by_sec = makeObjectsConsistent(actions)
    #print()
    #print('D', actions)
    
    ##### cooking something --> making food
    actions = replaceUniversalVague(actions)
    
    
    ##### if overlapping and should be sequenced, sequence
    actions = editCharadesSegmentations(actions)
    #print()
    #print('E', actions)
    
    
    ##### syncronize non-universal vagues with their specific counterparts
    actions = syncronizeSomething(actions)
    #print()
    #print('F', actions)
    
    actions = assumedSequences(actions, video_id)
    
    ##### if concurent, overlap. Maybe do this before orderign? idkkkk. no b/c better to have equal than p directly after other
    #actions = equalizeConcurrentActions(actions)
    
    actions = envelopActions(actions)
    
    ##### if 2 objs the same and overlapping, combine
    actions = combineSameIfOverlapping2(actions)
    #print()
    
    #for i in range(len(actions)):
    #    act1, start1, after1 = actions[i]
    #    print(act1)
    
    #print('G', actions)
   # print()

    
    for i in ORDERED_FRAMES:
        #for i in OBJ_REL:
        sp = i.split("/")
        if sp[0] != '%s.mp4' % (video_id) :
            continue
        objs = OBJ_REL[i]
        
        #objs = removeFloorSpatial(objs) TODO 3-14 ADD BACK
        
        video[sp[1][:-4]] = objs
        frames.append(sp[1][:-4])
        
    
    
        

    return video, frames, actions, switch_by_sec


In [26]:
def totalReplacementOfObjectsInActions(actions):
    to_del = []
    to_add = []
    for i in range(len(actions)):
        act, start, finish = actions[i]
        obj, verb = ACTION_SV[act]
        if act == 'c081':
            new_act = 'c009'
        else:
            if ENG[obj] not in total_replacement:
                continue
            replacement = IDX[total_replacement[ENG[obj]]]
        
            new_act = REVERSED_ACTION_SV[(replacement, verb)]
        #if act == 'c081':
        #    print('act is c081, obj is', obj, 'and in total repalcement: ', ENG[obj] in total_replacement, "replaced with", new_act)
        to_del.append(i)
        to_add.append((new_act, start, finish))
        
        
    
    new_actions = []
    
    
    for i in range(len(actions)):
        if i not in to_del:
            new_actions.append(actions[i])
        
    for i in to_add:
        new_actions.append(i)
        
        
    for new_act, _, _ in new_actions:
        if new_act == 'c081':
            print()
            print("oh shit, in total replacement adding c081")
            print('actions', actions)
            print('todel', to_del)
            print('toadd', to_add)
    return new_actions
            

In [27]:
object_class_translations = {
    'closet/cabinet': 'closet',
    'cup/glass/bottle': 'cup',
    'paper/notebook': 'paper',
    'phone/camera': 'phone',
    'sofa/couch': 'sofa',
}


def mergeObjsAnnotations(idx1, idx2, anns, new_class):
    ann1 = anns[idx1]
    ann2 = anns[idx2]
    
    if new_class is None:
        obj1 = ann1['class']
        if obj1 in object_class_translations:
            obj1 = object_class_translations[ann1['class']]


        obj2 = ann2['class']
        if obj2 in object_class_translations:
            obj2 = object_class_translations[ann2['class']]

        if obj1 != obj2:
            print("trying to merge different objects", ann1['class'], ann2['class'])
            return
    sp1 = ann1['metadata']['tag']
    sp2 = ann2['metadata']['tag']
    if sp1[0] != sp2[0] or sp1[2] != sp2[2]:
        print("trying to merge objs from two videos or frames", ann1['metadata']['tag'], ann2['metadata']['tag'])
        return
    
    bbox1 = ann1['bbox']
    bbox2 = ann2['bbox']
    
    if bbox1 is None and bbox2 is None:
        bbox = None
    elif bbox1 is None:
        bbox = bbox2
    elif bbox2 is None:
        bbox = bbox1
    else:
        x1, y1, w1, h1 = bbox1
        x2, y2, w2, h2 = bbox2
        bbox = (min(x1, x2), min(y1, y2), max(w1, w2), max(h1, h2))
    
    if new_class is None:
        new_class = ann1['class']
    
    new_obj = {
        'class': new_class,
        'metadata': ann1['metadata'],
        'visible': ann1['visible'] and ann2['visible'],
        'bbox': bbox,
        'attention_relationship': ann1['attention_relationship'] + ann2['attention_relationship'],
        'spatial_relationship': ann1['spatial_relationship'] + ann2['spatial_relationship'],
        'contacting_relationship':  ann1['contacting_relationship'] + ann2['contacting_relationship'],
        'verb_relationship':  ann1['verb_relationship'] + ann2['verb_relationship']
    }
    
    return new_obj


def iouObjs(idx1, idx2, anns):
    bbox1 = anns[idx1]['bbox']
    bbox2 = anns[idx2]['bbox']

    
    
    if bbox1 is None or bbox2 is None:
        return False
    
    bb1x1, bb1y1, bb1w, bb1h = bbox1
    bb2x1, bb2y1, bb2w, bb2h = bbox2
    
    bb1x2 = bb1x1 + bb1w
    bb1y2 = bb1y1 + bb1h
    
    bb2x2 = bb2x1 + bb2w
    bb2y2 = bb2y1 + bb2h
    
    inter_x1 = max(bb1x1, bb2x1)
    inter_y1 = max(bb1y1, bb2y1)
    
    inter_x2 = min(bb1x2, bb2x2)
    inter_y2 = min(bb1y2, bb2y2)
    
    inter_area = (inter_x2 - inter_x1) * (inter_y2 - inter_y1)
    
    union_area = (bb1w * bb1h) + (bb2w * bb2h) - inter_area
    
    return (inter_area / union_area) > .5


def objDuplicateFixes(video, switches):
    #print("switchies', ", switches)
    
    # 1 - if the class changed from actions, change
    all_classes = set()
    new_video = {}
    for f_id in video:
        #print()
        #print(f_id)
        #print()
        #print('before')
        #for ann in video[f_id]:
        #    print(ann)
        #    all_classes.add(ann['class'])
        new_video[f_id] = []
        anns = video[f_id]
        for ann in anns:
            obj = ann['class']
            if obj in object_class_translations:
                obj = object_class_translations[ann['class']]
            if obj in total_replacement:
                ann['class'] = total_replacement[obj]
                continue
            if f_id in switches:
                if obj in switches[f_id]:
                    #print("switching class from ", obj, " to ", switches[f_id][obj])
                    ann['class'] = switches[f_id][obj]
            else:
                temp_switch = {}
                for s_frame in switches:
                    if int(s_frame) > int(f_id):
                        continue
                    temp_switch = switches[s_frame]
                if obj in temp_switch:
                    #print("switching class from ", obj, " to ", temp_switch[obj])
                    ann['class'] = temp_switch[obj]
                    

        #2 - if there are duplicates, combine. 
        idx_to_del = set()
        to_add = []
        
        num_changed = 1
        curr_idx = 0

        something_changed = True
        while something_changed:
            something_changed = False
            for i in range(len(anns)):
                if i < curr_idx:
                    continue
                curr_idx = i + 1
                for j in range(len(anns)):
                    if j <= i:
                        continue
                    obj1 = anns[i]['class']
                    if obj1 in object_class_translations:
                        obj1 = object_class_translations[anns[i]['class']]
                        
                    #print('anssj', anns[j])
                    #print("none in ans!", None in anns)
                    
                    # Note: if get none in ans weeor, b/c merge added a "none"
                    obj2 = anns[j]['class']
                    if obj2 in object_class_translations:
                        obj2 = object_class_translations[anns[j]['class']]
                        
                    #print(i, j, obj1, obj2)
                    if obj1 == obj2:
                        anns.append(mergeObjsAnnotations(i, j, anns, None))
                        
                        del anns[j]
                        del anns[i]
                        curr_idx = i
                        something_changed = True
                        break
                if something_changed:
                    break
                else:
                    curr_idx = i + 1
                
                
        # 3- check for valid pairs that are >50% IOU, and merge (should happen rarely)
        something_changed = True
        curr_idx = 0
        while something_changed:
            something_changed = False
            for i in range(len(anns)):
                if i < curr_idx:
                    continue
                curr_idx = i + 1
                for j in range(len(anns)):
                    if j <= i:
                        continue
                        
                    obj1 = anns[i]['class']
                    if obj1 in object_class_translations:
                        obj1 = object_class_translations[anns[i]['class']]
                        
                    # Note: if get none in ans weeor, b/c merge added a "none"
                    obj2 = anns[j]['class']
                    if obj2 in object_class_translations:
                        obj2 = object_class_translations[anns[j]['class']]
                        
                    if obj2 not in same_obj[obj1]:
                        continue
                    
                    #print("checking ious for ", obj1, obj2)
                    overlap = iouObjs(i, j, anns)
                    
                    if overlap:
                        #print("DOING AN OVERLPA")
                        items = sorted([obj1, obj2])
                        key = "%s/%s" % (items[0], items[1])
                        
                        new_class = priority_obj[key]
                            
                        
                        anns.append(mergeObjsAnnotations(i, j, anns, new_class))
                        
                        del anns[j]
                        del anns[i]
                        curr_idx = i
                        something_changed = True
                        break
                if something_changed:
                    break
        #print()
        #print('updated')
        #for ann in video[f_id]:
        #    print(ann)
    
    #print(all_classes)
    
    return video

In [28]:
""" Cell Summary:

    Functions for initializing different types of vertices
"""


def makeFrameVertex(id, second) :
    """ Makes a frame vertex
    
    Args:
        id: the id for this particular vertex
        second: the second at which this frame occured
    
    Returns:
        A frame vertex object
    """
    
    newFrame = {
        'id': id, 
        'secs': second,
        'type': "frame",
        'metadata': 'test', # TODO: how to switch
        'objects': { 'names': [], 'vertices': [] },
        'attention': { 'names': [], 'vertices': [] },
        'contact': { 'names': [], 'vertices': [] },
        'spatial': { 'names': [], 'vertices': [] },
        'verb': { 'names': [], 'vertices': [] },
        'actions': { 'names': [], 'vertices': [] },
        'next': None,
        'prev': None,
    }
    return newFrame


def makeObjectVertex(id, annotations, second):
    """ Makes an object vertex
    
    Args:
        id: the id for this particular vertex
        annotations: the action genome annotations for this vertex
        second: the second at which this frame occured
    
    Returns:
        An object vertex object
    """
    
    sp = id.rsplit('/', 1)
    newObj = {
        'id': id,
        'type': "object",
        'class': sp[0],
        'attention': [],
        'contact': [],
        'spatial': [],
        'verb': [],
        'visible': annotations['visible'],
        'bbox': annotations['bbox'],
        'metadata': annotations['metadata'],
        'frame_num': sp[1],
        'secs': second,
        'next': None,
        'prev': None,
    }
    
    return newObj


def makeRelationshipVertex(id, rel_type, second):
    """ Makes a relationship vertex
    
    Args:
        id: the id for this particular vertex
        rel_type: the type of relationship (attention, contact, spatial)
        second: the second at which this frame occured
    
    Returns:
        A relationship vertex object
    """
    
    sp = id.rsplit('/',1)
    newRel = {
        'id': id,
        'type': rel_type,
        'class': sp[0],
        'objects': [],
        'metadata': 'test', #TODO; change
        'frame': sp[1],
        'secs': second,
        'next': None,
        'prev': None,
    }

    return newRel


def makeActionVertex(id, data, frames): 
    """ Makes an action vertex
    
    Args:
        id: the id for this particular vertex
        data: the Charades action annotations
        frames: the frames annotated within this action
        
    Returns:
        An action vertex object
    """
    
    char_id = data[0]
    phrase = ENG[char_id]
    
    newAct = {
        'id': id,
        'charades': char_id,
        'phrase': phrase,
        'type': "action",
        'start': float(data[1]),
        'end': float(data[2]),
        'length': float(data[2]) - float(data[1]),
        'objects': [],
        'attention': [],
        'contact': [],
        'spatial': [],
        'verb': [],
        'metadata': 'test', #TODO: how to get?
        'all_f': frames,
        'object_id': ACTION_SV[char_id][0],
        'verb_id': ACTION_SV[char_id][1],
        'next_discrete': None, 
        'prev_discrete': None, 
        'next_instance': None, 
        'prev_instance': None, 
        'while': [],
    }
    return newAct

In [29]:
def makeVertices(video, rel_types, rel_tags, action_list, fpa, secs): 
    """ Makes all the vertex objects for a video
    
    Args:
        video: a dictionary mapping frames to action genome annotations
        rel_types: the categories of relationships
        rel_tags: the way relationships are referenced in action genome
        action_list: list of Charades annotation tuples (action, start, end)
        fpa: a dictionary mapping actions to lists of frames occuring within the action
        secs: the seconds associated with each frame
    
    Returns:
        objects: dictionary mapping object vertex ids to object vertices
        rel_dicts: a list of 3 dictionaries [attention, contact spatial]
                   Each dictionary mapps their vertex ids to relationship vertices of that type
        frames: a dictionary mapping frame vertex ids to frame vertices
        actions: a dictionary mapping action vertex ids to action vertices
        object_categories: a list of the objects present in the video
        action_categories: a list of actions present in the video
        rel_cat: a list [a_cat, c_cat, s_cat, v_cat] of attention, contact, spatial, and verb relationships present in the video        
    """
    
    object_categories, a_cat, c_cat, s_cat, v_cat, action_categories = [], [], [], [], [], {}

    objects, attention, contact, verb, spatial, frames, actions = {}, {}, {}, {}, {}, {}, {}

    rel_dicts = [attention, contact, spatial, verb]
    rel_cat = [a_cat, c_cat, s_cat, v_cat]

    secs_idx = 0
    
    min_frame = 9999999
    
    if len(video) == 0:
        first_frame = '000000'
    
    for frame_id in video: 
        if int(frame_id) < min_frame:
            first_frame = frame_id
            min_frame = int(frame_id)
            
        second = secs[secs_idx]

        # get frame vertex
        if frame_id not in frames:
            secs_idx = secs_idx + 1
            frames[frame_id] = makeFrameVertex(frame_id, second)
        frame = frames[frame_id]

        # for every object within this frame, create object and relationship vertices
        for j in video[frame_id]:
            obj_idx = IDX[j['class']] #NOTE I Changed this but idk where it went wrong
            #obj_idx = j['class']
            id = "%s/%s" % (obj_idx, frame_id)
            objects[id] = makeObjectVertex(id, j, second)
            verts = frame['objects']['vertices']
            names = frame['objects']['names']
            verts.append(objects[id]) 
            names = addListIfUnique(names, obj_idx) # TODO WAS ADDING IDX THE MOVE?
            object_categories = addListIfUnique(object_categories, obj_idx)

            # add vertices for every relationship in the frame
            for k in range(len(rel_tags)):
                rel = rel_tags[k]
                if j[rel] is None:
                    j[rel] = []

              # for all the relationships of this type, make a vertex if not one already
                for m in j[rel]:
                    rel_idx = IDX[m] #tODO also had to change this but not sure why used to be m in forloop
                    if rel_idx in BANNED:
                        continue
                    id = "%s/%s" % (rel_idx, frame_id)
                    if id not in rel_dicts[k]:
                        rel_type = rel_types[k]
                        rel_dicts[k][id] = makeRelationshipVertex(id, rel_type, second)
                        frame[rel_type]['vertices'].append(rel_dicts[k][id])
                        frame[rel_type]['names'] = addListIfUnique(frame[rel_type]['names'], rel_idx)

                    # keep track of all seen relationships
                    rel_cat[k] = addListIfUnique(rel_cat[k], rel_idx)

    # Make action vertices
    for action in action_list: 
        char_id = action[0]
        if char_id not in action_categories: 
            action_categories[char_id] = 1
        else: 
            action_categories[char_id] = action_categories[char_id] + 1
        id = "%s/%s" % (char_id,  str(action_categories[char_id]))
        actions[id] = makeActionVertex(id, action, fpa[char_id])
    
    return objects, rel_dicts, frames, actions, object_categories, action_categories, rel_cat, first_frame

In [30]:
def makeIntraFrameEdges(video, objects, rel_dicts, rel_types, rel_tags, actions, frames):
    """ Make edges between vertices in the same frame. For example
    
    object['contact'] = list of contact relationship vertices occuring on object in this frame
    
    Args:
        video: a dictionary mapping frames to action genome annotations
        objects: dictionary mapping object vertex ids to object vertices
        rel_dicts: a list of 3 dictionaries [attention, contact spatial]
                   Each dictionary mapps their vertex ids to relationship vertices of that type
        rel_types: the categories of relationships
        rel_tags: the way relationships are referenced in action genome
        actions: a dictionary mapping action vertex ids to action vertices
        frames: a dictionary mapping frame vertex ids to frame vertices
    
    Returns:
        objects: dictionary mapping object vertex ids to object vertices with intra-frame edges
        rel_dicts: a list of 3 dictionaries [attention, contact spatial]
                   Each dictionary mapps their vertex ids to relationship vertices of that type with intra-frame edges
        actions: a dictionary mapping action vertex ids to action vertices with intra-frame edges
    """
    
    #objects and relationships
    for frame_id in video:
        vid_frame_ann = video[frame_id]

        #for every object annotation from ag, get the object vertex and update
        for obj_ann in vid_frame_ann:
            obj_instance_id = "%s/%s" % (IDX[obj_ann['class']], frame_id) # TODO also had to change this? not index
            obj_vertex = objects[obj_instance_id]

            for rel_type_idx in range(len(rel_types)):
                for rel in obj_ann[rel_tags[rel_type_idx]]:
                    if rel in BANNED: 
                        continue
                    rel_dict = rel_dicts[rel_type_idx]
                    rel_instance_id = "%s/%s" % (IDX[rel], frame_id) # TODO also had to change this? to not idxrel
                    rel_vertex = rel_dict[rel_instance_id]
                    obj_vertex[rel_types[rel_type_idx]].append(rel_vertex) 
                    rel_vertex['objects'].append(obj_vertex) 
    
    #actions -- separate for loop b/c they have edges to multiple frames
    for action_id in actions: 
        action = actions[action_id]
        
        for f_id in action['all_f']:
            f = frames[f_id]
            f['actions']['names'].append(action['charades'])
            f['actions']['vertices'].append(action)
            for o in f['objects']['vertices']: 
                action['objects'].append(o) 
                
            for i in range(len(rel_types)):
                rel_type = rel_types[i]
                rel_dict = rel_dicts[i]
                for r in f[rel_type]['vertices']:
                    action[rel_type].append(r)
                    
                    
    
    return objects, rel_dicts, actions


In [31]:
# add edges to action vertex objects that are between frames (next and previous)
def makeActionsInterFrameEdges(actions) :
    """ Connect actions to next and previous discrete action, and next and previous instance of the same action.
    
    Args:
        actions: a dictionary mapping action vertex ids to action vertices
        
    Returns:
        actions: a dictionary mapping action vertex ids to action vertices with inter-frame edges
    """
    
    for a_id in actions:
        sp = a_id.split('/', 1)
        itr = int(sp[1])
        
             
        # for next discrete action
        a1 = actions[a_id]
        a1_s = float(a1['start'])
        a1_e = float(a1['end'])
        s_min = 9999999 #idk how to do this TODO
        e_min = 9999999
        
        for a_id2 in actions: 
            if a_id2 == a_id:
                continue;
            a2 = actions[a_id2]
            a2_s = float(a2['start'])
            a2_e = float(a2['end'])
            
            # if a2 starts after a1 ends
            if a2_s > a1_e: # todo: should this be > or >=?
                diff = a2_s - a1_e
                if diff < e_min:
                    e_min = diff
                    a1['next_discrete'] = a2 
                    
            # if a2 ends before a1 starts
            if a2_e < a1_s:
                diff = a1_s - a2_e
                if diff < s_min:
                    s_min = diff
                    a1['prev_discrete'] = a2
                    
            # if a2 starts before a1 ends but does not end before a1 starts (concurrent)
            if a2_s <= a1_e and a1_s <= a2_e:
                a1['while'].append(a2) 
                
        #for next instance of same action category
        if itr == 1:
            continue
        else: 
            a = actions[a_id]
            prev_id = "%s/%s" % (sp[0], str(itr - 1))
            prev = actions[prev_id]
            prev['next_instance'] = a
            a['prev_instance'] = prev
            
    return actions

def makeInterFrameEdges(objects, rel_dicts, rel_types, actions, frames, object_categories, relationship_categories): 
    """ Make edges between vertices of the same type in different frames
    
    object['next'] = next instance of the same class of objects
    
    Args:
        objects: dictionary mapping object vertex ids to object vertices
        rel_dicts: a list of 3 dictionaries [attention, contact spatial]
                   Each dictionary mapps their vertex ids to relationship vertices of that type
        rel_types: the categories of relationships
        actions: a dictionary mapping action vertex ids to action vertices
        frames: a dictionary mapping frame vertex ids to frame vertices
        object_categories: a list of the objects present in the video
        relationship_categories: a list of relationships present in the video
    
    Returns:
        objects: dictionary mapping object vertex ids to object vertices with inter-frame edges
        rel_dicts: a list of 3 dictionaries [attention, contact spatial]
                   Each dictionary mapps their vertex ids to relationship vertices of that type with inter-frame edges
        actions: a dictionary mapping action vertex ids to action vertices with inter-frame edges
    """
    
    prev_obj = [None]*len(object_categories)
    prev_arel = [None]*len(relationship_categories[0])
    prev_crel = [None]*len(relationship_categories[1])
    prev_srel = [None]*len(relationship_categories[2])
    prev_vrel = [None]*len(relationship_categories[3])
    prev_rels = [prev_arel, prev_crel, prev_srel, prev_vrel]
    prev_frame = None

    for frame_id in frames: # TODO assuming in order
        # go thorugh objects
        frame = frames[frame_id]
        
        frame['prev'] = prev_frame
        if prev_frame is not None:
            prev_frame['next'] = frame
        prev_frame = frame
        
        frame_objs_verts = frame['objects']['vertices']

        for obj_vertex in frame_objs_verts:
            cat_idx = object_categories.index(obj_vertex['class'])
            prev = prev_obj[cat_idx]
        
            if prev is not None:
                obj_vertex['prev'] = prev 
                prev['next'] = obj_vertex 

            prev_obj[cat_idx] = obj_vertex

        # go through relationships
        for i in range(len(rel_types)):
            rel_type = rel_types[i]
            rel_dict = rel_dicts[i]
            prev_rel = prev_rels[i]

            for rel_vertex in frame[rel_type]['vertices']:
                cat_idx = relationship_categories[i].index(rel_vertex['class'])
                prev = prev_rel[cat_idx]

                if prev is not None:
                    rel_vertex['prev'] = prev 
                    prev['next'] = rel_vertex 

                prev_rel[cat_idx] = rel_vertex
        
    # go through actions 
    actions = makeActionsInterFrameEdges(actions)
        
    return objects, rel_dicts, actions

In [32]:
def sortDictByCategory(dic, v_search):
    """ Resorts the dictionary of annotations by class. 
        Within each class, lists the names of the classes of other vertices it interacts with
    
    Args: 
        dic: a dictionary mapping strings of type "class/frame_number" to vertex objects
        v_search: the categories with which each vertex interacts
        
    Returns:
        A dictionary that is sorted by class For example:
        
        {'class1': {
                'names': ['interacts1', 'interats2', ...]
                'vertices': [{vertex_obj1}, {vertex_obj2}, ...]
            }
        }
    """
    
    new_dict = {}
    
    for i in dic:
        sp = i.rsplit('/', 1)
        cat = sp[0]
        if cat not in new_dict:
            new_dict[cat] = {
                'names': [],
                'vertices': []
            }
        new_dict[cat]['vertices'].append(dic[i])
        for j in v_search:
            for k in dic[i][j]:
                new_dict[cat]['names'] = addListIfUnique(new_dict[cat]['names'], k['class'])
    
    return new_dict


def organizeDict(video_id, objects, rel_dicts, rel_types, actions, frames, frames_list, fps, obj_cat, act_cat, rel_cat, secs_to_frames, frames_to_sec): 
    """ Reformats lists of vertices into a spatio-temporal scene graph
    
    Args:
        video_id: a string video identification
        objects: dictionary mapping object vertex ids to object vertices
        rel_dicts: a list of 3 dictionaries [attention, contact spatial]
                   Each dictionary mapps their vertex ids to relationship vertices of that type
        rel_types: the categories of relationships
        actions: a dictionary mapping action vertex ids to action vertices
        frames: a dictionary mapping frame vertex ids to frame vertices
        fps: frames per second
        object_cat: a list of the objects present in the video
        action_cat: a list of actions present in the video
        rel_cat: a list of relationships present in the video
        secs_to_frames: a dictionary mapping seconds to their corresponding frames
    
    Returns:
        A dictionary with spatio-temporal scene graph information 
    """
    
    dic = {}
    
    # spatiotemporal scene graph
    stsg = {}
    stsg['actions'] = actions
    stsg['objects'] = objects
    for i in range(len(rel_types)):
        rel_type = rel_types[i]
        rel_dict = rel_dicts[i]
        stsg[rel_type] = rel_dict  
        dic[rel_type] = sortDictByCategory(rel_dict, ['objects'])
    dic['stsg'] = stsg
    
    dic['objects'] = sortDictByCategory(objects, ['attention', 'contact', 'spatial'])
    dic['actions'] = sortDictByCategory(actions, ['objects', 'attention', 'contact', 'spatial'])
    
    dic['frames'] = frames
    dic['ordered_frames'] = frames_list
    dic['fps'] = fps
    
    # location
    char_actions_row = CHARADES.index[CHARADES['id'] == video_id][0]
    dic['location'] = CHARADES.iloc[char_actions_row, 2]
    
    # lists of categories
    dic['obj_names'] = obj_cat
    dic['act_names'] = act_cat
    dic['arel_names'] = rel_cat[0]
    dic['crel_names'] = rel_cat[1]
    dic['srel_names'] = rel_cat[2]
    dic['vrel_names'] = rel_cat[3]
    
    # add secs_to_frames
    dic['secs_to_frames'] = secs_to_frames
    dic['frames_to_sec'] = frames_to_sec
    
    # add video id
    dic['video_id'] = video_id
    return dic
    

In [33]:
def makeSTSGDict(video_id):#, fps):
    """ Makes a spatio-temporal scene graph formatted as a dictionary
    
    Args:
        video_id: video identification string
        fps: frames per second
        
    Returns: 
        A spatio-temporal scene graph dictionary
    """
    
    #print('A')
    #doOBJREL()
    
    fps = getFPS(video_id)
    
    #fps = 15

    #print('B')
    #doOBJREL()
    
    rel_types = ['attention', 'contact', 'spatial', 'verb']
    rel_tags = ['attention_relationship', 'contacting_relationship', 'spatial_relationship', 'verb_relationship']

    # get the ag, charades annotations, and match actions to frames
    video, frames_list, actions, switch_by_sec = getVideoAnnotations(video_id)
    
    
    
    #print('C')
    #doOBJREL()
    # TODO: right now just doing if they have both Charades and AG annotations
    if actions is None:
        return
    
    #print('D')
    #doOBJREL()
    if len(frames_list) == 0:
        print(video_id, "frames list is 0")
        return
    
    #a_ids = [i[0] for i in actions]
    #if 'c081' in a_ids:
    #    print("c081 in actions")
    
    fpa, secs = matchFramesAndActions(actions, frames_list, fps)
    
    #print('E')
    #doOBJREL()
    
    # TODO: eventually may want to inlcude but rn skippig if action with no frames
    for i in fpa:
        if len(fpa[i]) == 0:
            print(video_id, " had action with no frames")
            return
    
    # supplement here
    video = updateAGAnnotations(video)
    
    #print('F')
    #doOBJREL()
    
    video, secs_to_frames, frames_to_sec, switch_by_frame = addActionSV(video, actions, frames_list, fpa, secs, switch_by_sec)
    
    
    #print('G')
    #doOBJREL()
    
    video = addEntailments(video, rel_tags)
    
    #print('H')
    #doOBJREL()
    
    video = adjustSpatialAnn(video)
    
    
    #print('I')
    #doOBJREL()
    # SO! everything in f2s is in of, but not vice versa
    
    if False:
        for f_id in video:
            for obj in video[f_id]:
                #print()
                #print(obj)
                for i in ['contacting_relationship', 'attention_relationship', 'spatial_relationship', 'verb_relationship']:

                    # note it was after here when try to replace all _ that it failed
                    new_rels = []
                    for j in obj[i]:
                        new_str = j.replace("_", " ")
                        if new_str == 'have it on the back':
                            new_str = 'having it on the back'
                        new_rels.append(new_str)

                    obj[i] = new_rels
                    obj[i] = list(set(obj[i]))

                cont = obj['contacting_relationship']
                verb = obj['verb_relationship']



                #for v in ['holding', 'eating', 'sitting on', 'sitting_on', 'standing_on', 'standing on', 'lying_on', 'lying on', 'drinking from', 'drinking from']:
                for v in ['holding', 'eating', 'standing_on', 'standing on']:
                    if v in cont:
                        cont.remove(v)
                        verb.append(v)


                obj['contacting_relationship'] = cont
                obj['verb_relationship'] = list(set(verb))
            
    for f_id in video:
        frame = video[f_id]
        for obj in frame:
            contact = obj['contacting_relationship']
            #print(list(contact))
            if 'holding' in contact and 'touching' not in contact:
                print("hi")
                
    
    video = deleteBelowThreshold(video)
    #print('J')
    #doOBJREL()
    
    # create the vertices and edges
    objects, rel_dicts, frames, actions, obj_cat, act_cat, rel_cat, first_frame = makeVertices(video, rel_types, rel_tags, actions, fpa, secs)
    objects, rel_dicts, actions = makeIntraFrameEdges(video, objects, rel_dicts, rel_types, rel_tags, actions, frames)
    objects, rel_dicts, actions = makeInterFrameEdges(objects, rel_dicts, rel_types, actions, frames, obj_cat, rel_cat)
    
    # structure the dictionary
    stsg = organizeDict(video_id, objects, rel_dicts, rel_types, actions, frames, frames_list, fps, obj_cat, act_cat, rel_cat, secs_to_frames, frames_to_sec)
    return stsg

In [34]:
def makeSTSGDictFromIDArray(ids):
    """ Creates a dictionary of spatio-temporal scene graphs
    
    Args:
        ids: an array of video id strings
        fps: an array of associated frames per second
    
    Returns:
        A dictionary mapping video_ids to their respective spatio-temporal scene graph
    """
    
    anns = set()
    videos = {}
    for i in range(len(ids)):
        dic = makeSTSGDict(ids[i])
        if dic is not None:
            videos[ids[i]] = dic
    return videos

In [8]:


import sys
sys.setrecursionlimit(10000)


def pickleDump(var, destination):
    f = open(destination,"wb")
    pickle.dump(var,f)
    f.close() 
    

def pickleLoad(destination):
    f = open(destination,"rb")
    x = pickle.load(f)
    f.close() 
    
    return x

In [36]:


objs_spat = []

def isSpatial(obj):
    if obj['spatial_relationship'] is None:
        return 0
    if len(obj['spatial_relationship']) == 0:
        return 0
    else:
        return 1
    
    
def doOBJREL():
    tot = 0
    for i in OBJ_REL:
        v_id, f_id =  i.split('/')
        if v_id[:5] not in ids:
            continue
            
        objs = OBJ_REL[i]
        for obj in objs:
            tot += 1
            name = obj['class'] + v_id
            objs_spat.append({
                'name': name,
                'spatial': isSpatial(obj),
                'spatial_names': obj['spatial_relationship'],
                'frame': f_id,
                'video': v_id,
                'class': obj['class'],
                'total': 1
            })
            if isSpatial(obj):
                print('    ', obj['spatial_relationship'], f_id)
    print("TOTAL:", tot)
    df = None
    df = pd.DataFrame(objs_spat)
    
    #print(df[df['spatial'] == 1][['spatial_names', 'frame']])
    

In [37]:
#ids = ['00T4B', '00X3U', '00ZCA', '02GMI', '02V54', '02XLP', '03AA8', '03XSP', '06EDS', '06L9P']
#ids = ['CUSTU']

OBJ_REL = None
OBJ_REL = pickleLoad('../data/stsg/object_bbox_and_relationship.pkl')
# so some still have _ between words, see i that makes a difference
ids = CHARADES['id'].tolist()
#ids = ['BIQGN']

stsgs = makeSTSGDictFromIDArray(ids)
#small_stsgs = makeSTSGDictFromIDArray(ids)

hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial


hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
AKKWU frames list is 0
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
IS S

hello
NOt spatial
hello
NOt spatial
hello
NOt spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
NOt spa

hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
he

hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
trying to put 'holding' between overlapping annotations
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
trying to put 'holding' between overlapping annotations
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello


hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
NOt spatial
trying to put 'holding' between overlapping annotations
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
I

hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
F0PNU frames list is 0
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello


hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spa

hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spat

hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS S

hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt sp

hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
trying to put 'holding' between overlapping annotations
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spa

hello
IS Spatial
hello
NOt spatial
trying to put 'holding' between overlapping annotations
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
EKPAQ frames list is 0
hello
NOt spatial
hello
IS Spatial
trying to put 'holding' between overlapping annotations
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
IOL8Q frames list is 0
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
N

hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
NOt spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt

hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatia

hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
NOt spatial
hello
NOt spatial
hello
NOt spatial
hello
NOt spatial
hello
IS 

hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
C93UU frames list is 0
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
NOt spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spatial
hello
IS Spati

In [None]:

def exists(data, metric, val):  # TODO: SOMETHING FOR IF METRIC/VAL IS NONE
    """ Confirms if a data structure contains an object with a certain attribute

    Args:
        data: the data structure to search
        metric: the part of the data objects to reference
        val: the value we are looking for

    Returns:
        True if an object with this value exists, false otherwise
    """

    if data is None:
        return False

    if val is None:
        return len(data) != 0

    if metric is None:
        for i in data:
            if i == val:
                return True
    else:
        for i in data:
            if i[metric] == val:
                return True

    return False
import grammar as g
stsg = stsgs['CUSTU']
for f_id in stsg['frames']:
    frame = stsg['frames'][f_id]
    
    objs = frame['contact']['names']
    if IDX['standing on'] in objs:
        print('hiowe')
    
    
print(stsg['crel_names'], stsg['vrel_names'])
    
# so... it's type is contact, but its under 'verb'

In [None]:
#pickleDump(stsgs, '../data/testing_feb_version.pkl')

In [None]:
stsg = stsgs['06EDS']

def printEnglish(lst): 
    itr = [ENG[i] for i in lst]
    print(itr)

printEnglish(stsg['obj_names'])
printEnglish(stsg['act_names'])
printEnglish(stsg['arel_names'])
printEnglish(stsg['crel_names'])
printEnglish(stsg['srel_names'])


def findObjHe(tp):
    for i in stsg[tp]:
        objs = stsg[tp][i]['names']
        print(ENG[i])
        printEnglish(objs)
        print()

findObjHe('contact')

#for i in stsgs:
#    print(stsgs[i]['obj_names'])
    


In [None]:
for i in stsgs:
    print(i)

In [None]:
#f = open("videos_stsg.pkl","wb")
#pickle.dump(stsgs,f)
#f.close()

In [None]:
""" Cell Summary: 
    
    find existing obj-rel pairs in the data
"""

def addToSet(obj, rel_list, dic):
    """ Update dic[obj] with everything in rel_list

    Args: 
        obj: object key of dictionary
            rel_list: list of relationships to add
        dic: dictionary of the counts existing object-relationship pairs
    """
    
    obj = IDX[obj]
    if rel_list is None:
        return
    for rel in rel_list:
        rel = IDX[rel]
        if rel in BANNED:
            continue;
        if obj not in dic:
            dic[obj] = {}
        o = dic[obj]
        if rel not in o:
            o[rel] = 0
        o[rel] = o[rel] + 1

def deleteSmall(dic, min_size):
    """ Delete all relationships with too small of a count

    Args:
        dic: the dictionary of obj/rel pairs
    """
    
    for obj in dic: 
        toPop = []
        for rel in dic[obj]:
            if dic[obj][rel] < min_size:
                toPop.append(rel)
        for rel in toPop:
            del dic[obj][rel]

def findExistingObjRel():
    """ Find all object0relationship pairs in Charades data
    
    Returns:
        exist: dictionary of obj/rel pairs
        exist_a: dictionary of obj/attention-rel pairs
        exist_c: dictionary of obj/contact-rel pairs
        exist_s: dictionary of obj/spatial-rel pairs
    """
    
    exist_a = {}
    exist_c = {}
    exist_s = {}
    exist_v = {}

    min_size = 10 #only include combos with counts >= this

    for f_id in OBJ_REL:
        f = OBJ_REL[f_id]
        for ann in f:
            obj = ann['class']
            addToSet(obj, ann['attention_relationship'], exist_a)
            addToSet(obj, ann['contacting_relationship'], exist_c)
            addToSet(obj, ann['spatial_relationship'], exist_s)
            
    # for every pair in action sv, add that possible. dont delete small though (yet, though idk if it's relevant here)
    for a_idx in ACTION_SV:
        subject, verb = ACTION_SV[a_idx]
        
        if subject not in exist_v:
            exist_v[subject] = {}
        
        if verb not in exist_v[subject]:
            exist_v[subject][verb] = 1

    deleteSmall(exist_a, min_size)
    deleteSmall(exist_c, min_size)
    deleteSmall(exist_s, min_size)

    exist = {}
    for i in exist_a:
        exist[i] = {**exist_a[i], **exist_c[i], **exist_s[i], **exist_v[i]}
    
    exist['o39'] = {}
    return exist, exist_a, exist_c, exist_s, exist_v

exist, exist_a, exist_c, exist_s, exist_v = findExistingObjRel()

In [None]:
data = pd.read_csv("../data/Charades_v1_train.csv")
LENGTHS_TRAIN = data[['id', 'length']].copy()
data = pd.read_csv("../data/Charades_v1_test.csv")
LENGTHS_TEST = data[['id', 'length']].copy()
lengths_train = pd.Series(LENGTHS_TRAIN.length.values,index=LENGTHS_TRAIN.id).to_dict()
lengths_test = pd.Series(LENGTHS_TEST.length.values,index=LENGTHS_TEST.id).to_dict()
lengths = {**lengths_train, **lengths_test}

In [9]:

#pickleDump(stsgs, "../data/test_stsgs.pkl")
#pickleDump(IDX, "../data/idx.pkl")
#pickleDump(ENG, "../data/eng.pkl")
#pickleDump(ACTION_SV, "../data/actionSV.pkl")
#pickleDump(REVERSED_ACTION_SV, "../data/svAction.pkl")
#pickleDump(lengths, "../data/video_lengths.pkl")