# Google

### Labels -> ID

In [4]:
import pandas as pd

label_ids_path = '../../gt_data/triple_inspected_May23rd/google/label_idx.csv'

labels_dict = {}

df = pd.read_csv(label_ids_path)

for i, row in df.iterrows():
    label = row['label']
    ilabel = row['ilabel']
    if label in labels_dict:
        raise ValueError("Label happening multiple times!")
    labels_dict[label] = ilabel

In [5]:
labels_dict

## Sanity Rules

In [6]:
alone_id = labels_dict['alone']
couple_id = labels_dict['couple/2people']
group_id = labels_dict['group']

def valid_conditions_combo(label_ids):
    if alone_id in label_ids and couple_id in label_ids:
        return False
    
    if alone_id in label_ids and group_id in label_ids:
        return False
    
    if couple_id in label_ids and group_id in label_ids:
        return False
    
    return True

In [7]:
sitting_id = labels_dict['sitting']
standing_id = labels_dict['standing']
walking_id = labels_dict['walking']
running_id = labels_dict['running']
biking_id = labels_dict['biking']
riding_carriage_id = labels_dict['riding carriage']
on_wheelchair_id = labels_dict['on wheelchair']
mobility_aid_id = labels_dict['mobility aids']

crosswalk_id = labels_dict['crossing crosswalk']
dining_id = labels_dict['dining']
vendor_id = labels_dict['street vendors']
shopping_id = labels_dict['shopping']
load_unload_id = labels_dict['load/unload packages from car/truck']
waiting_bus_id = labels_dict['waiting in bus station']
working_laptop_id = labels_dict['working/laptop']
push_wheelchair_id = labels_dict['pushing wheelchair']
hugging_id = labels_dict['hugging']

pet_id = labels_dict['pet']


In [8]:
def valid_conditions_state_action_combos(label_ids):
    if alone_id in label_ids and sitting_id in label_ids and crosswalk_id in label_ids:
        return False
    
    if (vendor_id in label_ids) and (sitting_id not in label_ids or standing_id not in label_ids):
        return False
    
    if (shopping_id in label_ids) and (sitting_id not in label_ids or standing_id not in label_ids):
        return False
    
    if (load_unload_id in label_ids) and (sitting_id not in label_ids or standing_id not in label_ids):
        return False
    
    if (waiting_bus_id in label_ids) and (sitting_id not in label_ids or standing_id not in label_ids):
        return False
    
    
    if (working_laptop_id in label_ids) and (sitting_id not in label_ids or standing_id not in label_ids):
        return False
    
    
    if alone_id in label_ids and on_wheelchair_id in label_ids and push_wheelchair_id in label_ids:
        return False
    
    return True
    
    

In [9]:
state_labels = set([on_wheelchair_id, walking_id, sitting_id, standing_id, running_id, biking_id, riding_carriage_id, mobility_aid_id])
condition_labels = set([alone_id, couple_id, group_id])

def valid_alone_state_counts(label_ids):
    if alone_id in label_ids or couple_id in label_ids:
        intersection = set(label_ids).intersection(state_labels)
        if len(intersection) > 1 and alone_id in label_ids:
            return False
        
        if len(intersection) > 2 and couple_id in label_ids:
            return False
        
    return True

def valid_no_condition(label_ids):
    intersection = set(label_ids).intersection(condition_labels)
    if len(intersection) == 0:
        if pet_id in label_ids:
            return True
        return False
    return True

def valid_no_state(label_ids):
    intersection = set(label_ids).intersection(state_labels)
    if len(intersection) == 0:
        if pet_id in label_ids:
            return True
        return False
    return True
    

## Sanity Checks

In [10]:
import pandas as pd
import numpy as np

In [11]:
def converter(instr):
    return np.fromstring(instr[1:-1],sep=',', dtype=int)

In [12]:
gt_bbox_labels_path = '../../gt_data/triple_inspected_May23rd/google/per_file_per_box.csv'

In [13]:
gt_bbox_labesl = pd.read_csv(gt_bbox_labels_path, converters={'ilabel':converter})
gt_bbox_labesl.head()

In [14]:
gt_bbox_labesl.columns

In [15]:
gt_bbox_labesl['ilabel'] = gt_bbox_labesl['ilabel'].apply(lambda x: x.tolist())
gt_bbox_labesl['ilabel'].head()

In [16]:
gt_bbox_labesl['is_valid'] = gt_bbox_labesl['ilabel'].apply(lambda x: valid_conditions_combo(x))
gt_bbox_labesl['is_valid'] = gt_bbox_labesl['ilabel'].apply(lambda x: valid_conditions_state_action_combos(x))
gt_bbox_labesl['is_valid'] = gt_bbox_labesl['ilabel'].apply(lambda x: valid_alone_state_counts(x))
gt_bbox_labesl['is_valid'] = gt_bbox_labesl['ilabel'].apply(lambda x: valid_no_condition(x))
gt_bbox_labesl['is_valid'] = gt_bbox_labesl['ilabel'].apply(lambda x: valid_no_state(x))

In [17]:
gt_bbox_labesl['is_valid'].head()

In [18]:
gt_bbox_labesl['is_valid'].sum()

In [19]:
gt_bbox_labesl.shape

In [20]:
100 * gt_bbox_labesl['is_valid'].sum() / gt_bbox_labesl.shape[0]

In [23]:
gt_bbox_labesl_sanity_invalid = gt_bbox_labesl[gt_bbox_labesl['is_valid']==False]
gt_bbox_labesl_sanity_invalid

In [20]:
ids_labels_dict = {v: k for k, v in labels_dict.items()}

In [21]:
print(ids_labels_dict[7], ids_labels_dict[34], ids_labels_dict[39])

#### Dump sanity checked output

In [22]:
gt_bbox_labesl_sanity_checked = gt_bbox_labesl[gt_bbox_labesl['is_valid']==True]
gt_bbox_labesl_sanity_checked.shape

In [23]:
new_path = './gt_data/google/label_per_box_sanity_checked.csv'
gt_bbox_labesl_sanity_checked.to_csv(new_path, index=False)