In [None]:
import json
from pprint import pprint
import h5py
import scipy.io
import numpy as np
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import SGD
from keras.utils import np_utils
import os
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import cv2

In [None]:
def get_image(data,data_in,CATEGORIES):
    img = data[data_in].get('filename','')
    DATADIR = "sg_dataset"
    
    for category in CATEGORIES:
        path = os.path.join(DATADIR,category)
        img_array = cv2.imread(os.path.join(path,img), cv2.IMREAD_COLOR)
        break
    return img_array

In [None]:
def get_object(data,data_in,rel_in):
    ob1 = data[data_in].get('relationships',)[rel_in].get('objects','')[0]
    ob2 = data[data_in].get('relationships',)[rel_in].get('objects','')[1]
    return ob1,ob2

In [None]:
def get_coordinate(data,data_in,ob1,ob2):
    y1 = data[data_in].get('objects','')[ob1].get('bbox','').get('y','')
    x1 = data[data_in].get('objects','')[ob1].get('bbox','').get('x','')
    w1 = data[data_in].get('objects','')[ob1].get('bbox','').get('w','')
    h1 = data[data_in].get('objects','')[ob1].get('bbox','').get('h','')

    y2 = data[data_in].get('objects','')[ob2].get('bbox','').get('y','')
    x2 = data[data_in].get('objects','')[ob2].get('bbox','').get('x','')
    w2 = data[data_in].get('objects','')[ob2].get('bbox','').get('w','')
    h2 = data[data_in].get('objects','')[ob2].get('bbox','').get('h','')
    return x1,y1,w1,h1,x2,y2,w2,h2

In [None]:
def UnionBox(x1,x2,y1,y2,h1,h2,w1,w2):
    ub_x = 0
    ub_y = 0
    ub_w = 0
    ub_h = 0

    if x1 > x2:
        ub_x = x2
    else:
        ub_x = x1

    if y1 > y2:
        ub_y = y2
    else:
        ub_y = y1

    if x1+w1 > x2+w2:
        ub_w = x1+w1-ub_x
    else:
        ub_w = x2+w2-ub_x

    if y1+h1 > y2+h2:
        ub_h = y1+h1-ub_y
    else:
        ub_h = y2+h2-ub_y
        
    return ub_x,ub_y,ub_w,ub_h

In [None]:
def crop_image(img_arr,ub_y,ub_x,ub_w,ub_h):
    #crop the image with the union box and return it
    #Use this for Union_WB_B method to black out the exterior
    crop_img = img_arr[int(ub_y):int(ub_y+ub_h), int(ub_x):int(ub_x+ub_w)].copy()
    return crop_img

In [None]:
def check_rel(rel):
    #predicats = ['above','behind','beneath','left of','right of',
    #             'below','has','hold','in','inside','in front of','near',
    #             'next to','on','on the left of','on the right of',
    #             'under','wear','at','has on','adjacent to','is behind the','is to left of',
    #             'is right of the','']
    
    spatial  = ['about','above','across','against','along','alongside','around'
                ,'at','atop','behind','beneath','below','beside','beyond'
                ,'close to','far from','front','inside','opposite','outside'
                ,'near','next','top','over','past','through','toward','within'
                'left','right','under']
    special = ['on','in','by','up']
    rel1 = rel
    #print(rel)
    for i in spatial:
        if rel in special:
            return rel
        elif rel.find(i) != -1:
            #print(rel1)
            #print(i)
            rel = i
            return rel
        #elif i.find(rel) != -1:
            #rel = i
            #return rel
            #print("FAILURE ",rel1)
            
        #    print ("Found",rel)
    #rel = None
    #if rel not in spatial:
    #    rel = None
    rel = None
    return rel

#Objects aren't needed only saving relationships not training object classifier
def check_obj(obj1,obj2):
    objs = ['airplane','bag','ball','basket','bear','bed','bench',
            'bike','boat','bottle','bowl','box','building','bus',
            'bush','cabinet','camera','can','car','cart','cat','chair',
            'clock','coat','computer','cone','counter','cup','desk','dog',
            'elephant','engine','face','faucet','giraffe','glasses','grass',
            'hand','hat','helmet','horse','hydrant','jacket','jeans','keyboard',
            'kite','lamp','laptop','luggage','monitor','motorcycle','mountain',
            'mouse','oven','pants','paper','person','phone','pillow','pizza',
            'plane','plant','plate','post','pot','ramp','refrigerator','road',
            'roof','sand','shelf','shirt','shoe','shoes','shorts','sink','skateboard',
            'skis','sky','snowboard','sofa','stove','street',
            'suitcase','sunglasses','surfboard','table','tie',
            'tower','traffic light','train','trash can','tree',
            'trees','truck','umbrella','van','vase','watch','wheel']
    
    #if obj1 not in objs:
    #    obj1 = None
    
    #if obj2 not in objs:
    #    obj2 = None
    
    return obj1,obj2

In [None]:
def write_to_file(data,data_in,rel_in,crop_img,new_dir):
    
    ob1_name = data[data_in].get('relationships',)[rel_in].get('text','')[0]
    rel = data[data_in].get('relationships',)[rel_in].get('text','')[1]
    ob2_name = data[data_in].get('relationships',)[rel_in].get('text','')[2]
    rel = check_rel(rel)
    #ob1_name,ob2_name = check_obj(ob1_name,ob2_name)
    
    if rel != None:
        image_name = data[data_in].get('filename',)[:-4]+"_"+ob1_name+"_"+rel+"_"+ob2_name+"0"+".jpg"
        name = rel
        #Name directory after the label
        #name = rel
        dirName = new_dir+name
        try:
            # Create target Directory
            os.mkdir(dirName)
            #print("Directory " , dirName ,  " Created ") 
        except FileExistsError:
            pass

        filename = dirName+"/"+image_name.replace(" ", "")
        exists = False
        i = int(filename[len(filename)-5:-4])
        while exists != True:
            if (os.path.isfile(filename) != True):
                cv2.imwrite(filename,crop_img)
                exists = True
            else:
                i += 1
                filename = filename[:-5]+str(i)+".jpg"

In [None]:
def Union_WB_B(img_array,x1,y1,h1,w1,x2,y2,h2,w2):
    
    img_array[0:img_array.shape[0] , 0:img_array.shape[1]] = (0,0,0)
    img_array[int(y1):int(y1+h1), int(x1):int(x1+w1)] = (0,255,0)
    img_array[int(y2):int(y2+h2), int(x2):int(x2+w2)] = (255,0,0)
    
    return img_array

def Union_WB(img_array,x1,y1,h1,w1,x2,y2,h2,w2):
    obj1 = img_array[int(y1):int(y1+h1), int(x1):int(x1+w1),:].copy()
    obj2 = img_array[int(y2):int(y2+h2), int(x2):int(x2+w2),:].copy()

    #temp_array = img_array.copy()
    temp_array = np.zeros(img_array.shape, np.uint8)
    #temp_array[int(y1):int(y1+h1), int(x1):int(x1+w1),:] = obj1
    #temp_array[:,:,:] = img_array[0:img_array.shape[0] , 0:img_array.shape[1],:]
    temp_array[int(y1):int(y1+h1), int(x1):int(x1+w1),:] = img_array[int(y1):int(y1+h1), int(x1):int(x1+w1),:].copy()
    temp_array[int(y2):int(y2+h2), int(x2):int(x2+w2),:] = img_array[int(y2):int(y2+h2), int(x2):int(x2+w2),:].copy()

    return temp_array

In [None]:
def execute_images(method,data,CATEGORIES):
    #Extracting the required data
    for i in range(len(data)):
        img_array = get_image(data,i,CATEGORIES)
        for j in range(len(data[i].get('relationships',))):
            try:
                obj1 , obj2 = get_object(data,i,j)
                x1,y1,w1,h1,x2,y2,w2,h2 = get_coordinate(data,i,obj1,obj2)
                ub_x,ub_y,ub_w,ub_h = UnionBox(x1,x2,y1,y2,h1,h2,w1,w2)
                
                if method is "Union_WB_B":
                    img_array1 = Union_WB_B(img_array.copy(),x1,y1,h1,w1,x2,y2,h2,w2)
                    crop_img = crop_image(img_array1,ub_y,ub_x,ub_w,ub_h)
                    crop_img = cv2.resize(crop_img, (224, 224))
                    write_to_file(data,i,j,crop_img,"Union_WB_B/")
                elif method is "Union":
                    crop_img = crop_image(img_array,ub_y,ub_x,ub_w,ub_h)
                    crop_img = cv2.resize(crop_img, (224, 224))
                    write_to_file(data,i,j,crop_img,"Union/")
                elif method is "Union_WB":
                    img_array1 = Union_WB(img_array.copy(),x1,y1,h1,w1,x2,y2,h2,w2)
                    crop_img = crop_image(img_array1,ub_y,ub_x,ub_w,ub_h)
                    crop_img = cv2.resize(crop_img, (224, 224))
                    write_to_file(data,i,j,crop_img,"Union_WB/") 
                else:
                    print("ERROR")
                    break
            except:
                pass

In [None]:
with open('sg_dataset/sg_train_annotations.json') as f:
    data = json.load(f)

execute_images("Union",data,["sg_train_images"])
execute_images("Union_WB",data,["sg_train_images"])
execute_images("Union_WB_B",data,["sg_train_images"])

with open('sg_dataset/sg_test_annotations.json') as f1:
    data1 = json.load(f1)

execute_images("Union",data1,["sg_test_images"])
execute_images("Union_WB",data1,["sg_test_images"])
execute_images("Union_WB_B",data1,["sg_test_images"])

In [None]:
from imutils import paths
import random
import cv2
import os

def write_cvs(dataset):
    # grab all image paths and create a training and testing split
    imagePaths = sorted(list(paths.list_images(dataset)))
    random.shuffle(imagePaths)
    i = int(len(imagePaths) * 0.20)
    j = int(len(imagePaths) * 0.60)
    
    trainPaths = imagePaths[:j]
    testPaths = imagePaths[j:j+i]
    validPaths = imagePaths[j+i:]
    
    csv_path_training = dataset+"_training.csv"
    csv_path_testing = dataset+"_testing.csv"
    csv_path_validation = dataset+"_validation.csv"

    #define the datasets
    datasets = [
        ("training", trainPaths, csv_path_training),
        ("testing", testPaths, csv_path_testing),
        ("validation", validPaths, csv_path_validation)
    ]
    
    # loop over the data splits
    for (dType, imagePaths, outputPath) in datasets:
        # open the output CSV file for writing
        print("[INFO] building '{}' split...".format(dType))
        f = open(outputPath, "w")

        # loop over all input images
        for imagePath in imagePaths:
            try:
                # load the input image and resize it to 64x64 pixels
                image = cv2.imread(imagePath)
                image = cv2.resize(image, (224, 224))

                # create a flattened list of pixel values
                image = [str(x) for x in image.flatten()]

                # extract the class label from the file path and write the
                # label along pixels list to disk
                label = imagePath.split(os.path.sep)[-2].split("_")
                label = label[0]+','+label[1]
                f.write("{},{}\n".format(label, ",".join(image)))
            except:
                pass

        # close the output CSV file
        f.close()


In [None]:
write_cvs("Union")
write_cvs("Union_WB")
write_cvs("Union_WB_B")

In [None]:
#Writing clothes set
write_cvs("datasetClothes")

In [None]:
with open('sg_dataset/sg_test_annotations.json') as f:
    data = json.load(f)

In [None]:
new_vrd = []
for i in range(len(data)):
    filename = data[i].get('filename','')
    for j in range(len(data[i].get('relationships',''))):
        
        relationship = data[i].get('relationships','')[j].get('text','')[1]
        relationship = check_rel(relationship)
        
        if relationship != None:    
            
            ob1,ob2 = get_object(data,i,j)
            object_name_1 = data[i].get('relationships','')[j].get('text','')[0]
            object_name_2 = data[i].get('relationships','')[j].get('text','')[2]
            x1,y1,w1,h1,x2,y2,w2,h2 = get_coordinate(data,i,ob1,ob2)
            input_vrd = [filename,object_name_1,object_name_2,[x1,y1,w1,h1],[x2,y2,w2,h2],[relationship]]
            new_vrd.append(input_vrd)
        
        #print(object1+" "+relationship+" "+object2)
        #rel = data[data_in].get('relationships',)[rel_in].get('text','')[1]

In [None]:
new_appeneded = []
preposition = []
old_vrd = new_vrd
lenght_of_list = len(new_vrd)
j = 0 
i = 0
while j < lenght_of_list:
    #print(i , j , lenght_of_list)
    if i > 5:
        i = j-5
    else:
        i = j
    while i < lenght_of_list:
        #print(new_vrd[j][0] ,new_vrd[i][0])
        if (i != j) and (new_vrd[j][0] == new_vrd[i][0]) and (new_vrd[j][1] == new_vrd[i][1]) and (new_vrd[j][2] == new_vrd[i][2]) and (new_vrd[j][3] == new_vrd[i][3]) and (new_vrd[j][4] == new_vrd[i][4]):
            set1 = set(new_vrd[j][5])
            set2 = set(new_vrd[i][5])
            print(set1 , set2 , i , j)
            new_appeneded = []
            preposition = []
            if set1 != set2 and ((not (set1.issubset(set2))) and (not (set2.issubset(set1)))):
                for k in range(len(new_vrd[j][5])):
                    preposition.append(new_vrd[j][5][k])
                for k1 in range(len(new_vrd[i][5])):
                    preposition.append(new_vrd[i][5][k1])
                    
                new_appeneded = [new_vrd[j][0],new_vrd[j][1],new_vrd[j][2],new_vrd[j][3],new_vrd[j][4],list(set(preposition))]
                new_vrd.remove(new_vrd[i])
                print(new_appeneded)
                new_vrd.insert(i,new_appeneded)
                #lenght_of_list -= 1
                j -= 7
                i -= 7
            elif (set1 != set2) and (set1.issubset(set2)):
                new_vrd.remove(new_vrd[j])
                lenght_of_list -= 1
                j -= 7
                i -= 7
            elif (set1 == set2) or (set2.issubset(set1)):
                #print("GETTING REMOVED 2",new_vrd[i])
                #print("GETTING REMOVED 21",new_vrd[j])
                new_vrd.remove(new_vrd[i])
                lenght_of_list -= 1
                j -= 7
                i -= 7
                #i = j-1
                #i -= 1

        i += 1
    j += 1

In [None]:
for x in range(len(new_vrd)):
    for y in range(len(new_vrd)):
        if (new_vrd[x][0] == new_vrd[y][0]) and (new_vrd[x][1] == new_vrd[y][1]) and (new_vrd[x][2] == new_vrd[y][2]) and (x != y) and (new_vrd[x][3] == new_vrd[y][3]) and (new_vrd[x][4] == new_vrd[y][4]):
            print(new_vrd[x],x,y)

In [None]:
import csv

with open("Multi_label_predictions.csv","a",newline="") as f: 
    cw = csv.writer(f)
    cw.writerows(r+[""] for r in new_vrd)
f.close()
    
#print(set(new_vrd))

In [75]:
import csv
import ast
from collections import Counter

with open('Multi_label_predictions.csv', 'r') as f:
    reader = csv.reader(f)
    your_list = list(reader)

all_labels = []
number_multi_labels = 0
singel_labels = 0 
combinations = []
for i in range(len(your_list)):
    x = your_list[i][5]
    x = ast.literal_eval(x)
    x = [n.strip() for n in x]
    if len(x) > 1:
        number_multi_labels += 1
        combinations.append(x)
        for j in x:
            singel_labels += 1
            all_labels.append(j)
    else:
        #print(x[0])
        #print(x)
        singel_labels += 1
        all_labels.append(x[0])

print(number_multi_labels)
print(singel_labels)
dictionary_labels = Counter(all_labels)
#dictionary_combinations = Counter(combinations)
        #print(your_list[i][5])


4378
78349


In [None]:
print(dictionary_labels)

In [None]:
new_counter = Counter(tuple(item) for item in combinations)
#print(new_counter)
for k, v in new_counter.items():
    print(k,v)
    #print(k , v)

In [96]:
#Store the counters of the labels
train_labels = []
test_labels =[]
valid_labels = []
#Store information of image
train_info = []
test_info = []
valid_info = []

#This should only iterate for multi labels first then
#It will go through 
all_labels = []
multi_labels_count = 0
single_labels_count = 0
count1 = 0
count2 = 0
count3 = 0
count4 = 0
count5 = 0
count6 = 0
count7 = 0
for i in range(len(your_list)):
    x = your_list[i][5]
    x = ast.literal_eval(x)
    x = [n.strip() for n in x]
    
    #print(Counter(test_labels).get(x[0]))
    #This is for multi labels
    if len(x) > 1:
        multi_labels_count += 1
        if (Counter(test_labels).get(x[0]) == None):
            count1 += 1
            test_info.append(your_list[i])
            for j in x:
                test_labels.append(j)
        elif (Counter(valid_labels).get(x[0]) == None):
            count2 += 1
            valid_info.append(your_list[i])
            for j in x:
                valid_labels.append(j)
        elif (Counter(train_labels).get(x[0]) == None):
            count3 += 1
            train_info.append(your_list[i])
            for j in x:
                train_labels.append(j)
        else:
            if Counter(test_labels).get(x[0]) < Counter(valid_labels).get(x[0]):
                count4 += 1
                test_info.append(your_list[i])
                for j in x:
                    test_labels.append(j)
            elif Counter(test_labels).get(x[0]) > Counter(valid_labels).get(x[0]):
                count5 += 1
                valid_info.append(your_list[i])
                for j in x:
                    valid_labels.append(j)
            elif Counter(train_labels).get(x[0]) <= ((Counter(valid_labels).get(x[0]) + Counter(train_labels).get(x[0]) + Counter(test_labels).get(x[0])) * 0.60):
                count6 += 1
                train_info.append(your_list[i])
                for j in x:
                    train_labels.append(j)
            elif Counter(test_labels).get(x[0]) == Counter(valid_labels).get(x[0]):
                count4 += 1
                test_info.append(your_list[i])
                for j in x:
                    test_labels.append(j)
            else:
                print(Counter(test_labels).get(x[0]))
                print(Counter(valid_labels).get(x[0]))
                print(Counter(train_labels).get(x[0]))
                count7 += 1
    else:
        single_labels_count += 1
        if (Counter(test_labels).get(x[0]) == None):
            count1 += 1
            test_info.append(your_list[i])
            test_labels.append(x[0])
            
        elif (Counter(valid_labels).get(x[0]) == None):
            count2 += 1
            valid_info.append(your_list[i])
            valid_labels.append(x[0])
        elif (Counter(train_labels).get(x[0]) == None):
            count3 += 1
            train_info.append(your_list[i])
            train_labels.append(x[0])
        else:
            if Counter(test_labels).get(x[0]) < Counter(valid_labels).get(x[0]):
                count4 += 1
                test_info.append(your_list[i])
                test_labels.append(x[0])
            elif Counter(test_labels).get(x[0]) > Counter(valid_labels).get(x[0]):
                count5 += 1
                valid_info.append(your_list[i])
                valid_labels.append(x[0])
            elif Counter(train_labels).get(x[0]) <= ((Counter(valid_labels).get(x[0]) + Counter(train_labels).get(x[0]) + Counter(test_labels).get(x[0])) * 0.60):
                count6 += 1
                train_info.append(your_list[i])
                train_labels.append(x[0])
            elif Counter(test_labels).get(x[0]) == Counter(valid_labels).get(x[0]):
                count4 += 1
                test_info.append(your_list[i])
                test_labels.append(x[0])
            else:
                print(Counter(test_labels).get(x[0]))
                print(Counter(valid_labels).get(x[0]))
                print(Counter(train_labels).get(x[0]))
                count7 += 1
            
            
    #This is for single labels.
    #else:
        #print(x[0])
        #print(x)
    #    singel_labels += 1
    #    all_labels.append(x[0])



In [97]:
print(multi_labels_count)
print(single_labels_count)

print(len(train_info))
print(len(test_info))
print(len(valid_info))

print("Train Labels Counter")
print(Counter(train_labels))
print("Test Labels Counter")
print(Counter(test_labels))
print("Valid Labels Counter")
print(Counter(valid_labels))

print(count1)
print(count2)
print(count3)
print(count4)
print(count5)
print(count6)
print(count7)
#print(range(Counter(all_labels).get('on')))

4378
68952
43992
14652
14686
Train Labels Counter
Counter({'on': 12763, 'above': 5206, 'next': 4544, 'behind': 4523, 'front': 3179, 'under': 3066, 'near': 2686, 'in': 2221, 'below': 1811, 'at': 1597, 'beside': 1435, 'over': 1291, 'by': 792, 'right': 731, 'top': 592, 'inside': 145, 'against': 131, 'around': 88, 'across': 55, 'outside': 49, 'along': 28, 'beyond': 13, 'far from': 11, 'toward': 8, 'past': 7, 'close to': 6, 'through': 5, 'opposite': 4, 'about': 4})
Test Labels Counter
Counter({'on': 4254, 'above': 1737, 'next': 1515, 'behind': 1508, 'front': 1060, 'under': 1022, 'near': 897, 'in': 741, 'below': 604, 'at': 533, 'beside': 479, 'over': 432, 'by': 264, 'right': 244, 'top': 198, 'inside': 49, 'against': 44, 'around': 31, 'across': 19, 'outside': 17, 'along': 10, 'beyond': 5, 'far from': 4, 'through': 3, 'past': 3, 'toward': 3, 'about': 3, 'close to': 2, 'opposite': 2})
Valid Labels Counter
Counter({'on': 4254, 'above': 1736, 'next': 1515, 'behind': 1508, 'front': 1060, 'under': 

In [98]:
import csv

with open("Training_Multi_label_predictions.csv","w",newline="") as f: 
    cw = csv.writer(f)
    cw.writerows(r+[""] for r in train_info)
f.close()


with open("Testing_Multi_label_predictions.csv","w",newline="") as f: 
    cw = csv.writer(f)
    cw.writerows(r+[""] for r in test_info)
f.close()


with open("Validation_Multi_label_predictions.csv","w",newline="") as f: 
    cw = csv.writer(f)
    cw.writerows(r+[""] for r in valid_info)
f.close()