# Demo object and attribute predictions


### 1. Setup

* First, set up Python, `numpy`, and `matplotlib`.

In [None]:
# set up Python environment: numpy for numerical routines, and matplotlib for plotting
import numpy as np
import matplotlib
import sys
import platform
print(platform.python_version())
sys.path.append('/home/dxwang/bottom-up-attention/caffe/python')
import caffe
import matplotlib.pyplot as plt
import pylab
from skimage import transform
# display plots in this notebook
%matplotlib inline

import os

# set display defaults
plt.rcParams['figure.figsize'] = (12, 9)        # small images
plt.rcParams['image.interpolation'] = 'nearest'  # don't interpolate: show square pixels
plt.rcParams['image.cmap'] = 'gray'  # use grayscale output rather than a (potentially misleading) color heatmap

* Load `caffe`.

In [None]:
# Change dir to caffe root or prototxt database paths won't work wrong
import os
print os.getcwd()
os.chdir('..')
print os.getcwd()

In [3]:
# The caffe module needs to be on the Python path;
#  we'll add it here explicitly.
import sys
sys.path.insert(0, './caffe/python/')
sys.path.insert(0, './lib/')
sys.path.insert(0, './tools/')

import caffe

In [5]:
data_path = '.../bottom-up-attention/data/genome/1600-400-20'

# Load classes
classes = ['__background__']
with open(os.path.join(data_path, 'objects_vocab.txt')) as f:
    for object in f.readlines():
        classes.append(object.split(',')[0].lower().strip())

# Load attributes
attributes = ['__no_attribute__']
with open(os.path.join(data_path, 'attributes_vocab.txt')) as f:
    for att in f.readlines():
        attributes.append(att.split(',')[0].lower().strip())

In [6]:
# Check object extraction
from fast_rcnn.config import cfg, cfg_from_file
from fast_rcnn.test import im_detect,_get_blobs
from fast_rcnn.nms_wrapper import nms
import cv2

GPU_ID = 0   # if we have multiple GPUs, pick one 
caffe.set_device(GPU_ID)  
caffe.set_mode_gpu()
net = None
cfg_from_file('.../bottom-up-attention/experiments/cfgs/faster_rcnn_end2end_resnet.yml')

weights = '.../faster_rcnn_models/resnet101_faster_rcnn_final.caffemodel'
prototxt = '.../bottom-up-attention/models/vg/ResNet-101/faster_rcnn_end2end_final/test.prototxt'

net = caffe.Net(prototxt, caffe.TEST, weights=weights)

ImportError: No module named fast_rcnn.config

In [6]:
def load_flickr30k_image_ids(split_name):
    ''' Load a list of (path,image_id tuples). Modify this to suit your data locations. '''
    assert split_name in ['flickr30K-train', 'flickr30K-val', 'flickr30K-test']
    split_name = split_name.split('-')[-1]
    IMAGES_DIR = 'multi30k-entities-dataset/data/images/flickr30k-images'
    IMAGES_DIR += '/task1' if split_name == 'test' else ''
    ANNOTS_DIR = os.path.join('data/Multi30K_DE/')
    split = []  

    with open(os.path.join(ANNOTS_DIR, '%s_images.txt' % split_name), 'r') as f:
        items = [s.strip() for s in f.readlines()]
        for i, item in enumerate(items):
            filepath = os.path.join(IMAGES_DIR, item)
            split.append((filepath, i))      
    return split

def load_ambiguouscoco_image_ids(split_name):
    ''' Load a list of (path,image_id tuples). Modify this to suit your data locations. '''
    IMAGES_DIR = '.../data/AmbiguousCOCO/translated_images'
    IMAGES_DIR += '/task1' if split_name == 'test' else ''
    ANNOTS_DIR = os.path.join('.../data/AmbiguousCOCO')
    split = []  

    with open(os.path.join(ANNOTS_DIR, 'image_list.txt'), 'r') as f:
        items = [s.split('#')[0] for s in f.readlines()]
        for i, item in enumerate(items):
            filepath = os.path.join(IMAGES_DIR, item)
            split.append((filepath, i))  
    return split

def get_obj_infos(im_file, visualization=False):
    ###########################
    # Similar to get_detections_from_im
    conf_thresh = 0.4
    min_boxes = 10
    max_boxes = 20

    im = cv2.imread(im_file)
    scores, boxes, attr_scores, rel_scores = im_detect(net, im)

    # Keep the original boxes, don't worry about the regression bbox outputs
    rois = net.blobs['rois'].data.copy()
    # unscale back to raw image space
    blobs, im_scales = _get_blobs(im, None)
    
    cls_boxes = rois[:, 1:5] / im_scales[0]
    cls_prob = net.blobs['cls_prob'].data
    attr_prob = net.blobs['attr_prob'].data
    pool5 = net.blobs['pool5_flat'].data

    # Keep only the best detections
    max_conf = np.zeros((rois.shape[0]))
    
    for cls_ind in range(1,cls_prob.shape[1]):
        cls_scores = scores[:, cls_ind]
        dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
        keep = np.array(nms(dets, cfg.TEST.NMS))
        max_conf[keep] = np.where(cls_scores[keep] > max_conf[keep], cls_scores[keep], max_conf[keep])
    
    keep_boxes = np.where(max_conf >= conf_thresh)[0]
    keep_boxes = np.argsort(max_conf)[::-1][:max_boxes]
    if len(keep_boxes) < min_boxes:
        keep_boxes = np.argsort(max_conf)[::-1][:min_boxes]
    elif len(keep_boxes) > max_boxes:
        keep_boxes = np.argsort(max_conf)[::-1][:max_boxes]
    ############################
    boxes = cls_boxes[keep_boxes]
    objects = np.argmax(cls_prob[keep_boxes][:,1:], axis=1)
    attr_thresh = 0.1
    attr = np.argmax(attr_prob[keep_boxes][:,1:], axis=1)
    attr_conf = np.max(attr_prob[keep_boxes][:,1:], axis=1)
    
    
    attrs = []
    objs = []
    attr_confs = []
    objs_confs =  np.max(cls_prob[keep_boxes][:,1:], axis=1)
    
    
    for i in range(len(keep_boxes)):
        bbox = boxes[i]
        if bbox[0] == 0:
            bbox[0] = 1
        if bbox[1] == 0:
            bbox[1] = 1
        cls = classes[objects[i]+1]
        if attr_conf[i] > attr_thresh:
            cls = attributes[attr[i]+1] + " " + cls
            attr_confs.append(attr_conf[i])
        objs.append(cls)
            
#     print('attrs = ', attrs)
#     print('objs = ', objs)
#     print('objs confs = ', objs_confs)
#     print('cls_boxes =', cls_boxes)
#     print 'boxes=%d' % (len(keep_boxes))
    infors = {}
    infors['obj_categories'] = objs
    infors['obj_confs'] = objs_confs
    infors['obj_boxes'] = boxes
    infors['boxes'] = keep_boxes
    infors['image_name'] = im_file
    infors['features'] = pool5[keep_boxes]

    
    if visualization:
        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        plt.imshow(im)
        plt.gca().add_patch(
            plt.Rectangle((bbox[0], bbox[1]),
                          bbox[2] - bbox[0],
                          bbox[3] - bbox[1], fill=False,
                          edgecolor='red', linewidth=2, alpha=0.5)
                )
        plt.gca().text(bbox[0], bbox[1] - 2,
                    '%s' % (cls),
                    bbox=dict(facecolor='blue', alpha=0.5),
                    fontsize=10, color='white')
    return infors


import json
import datetime
import numpy as np
class JsonEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        elif isinstance(obj, datetime):                                 
            return obj.__str__()
        else:
            return super(MyEncoder, self).default(obj)
def save_dict(filename, dic):
    '''save dict into json file'''
    with open(filename,'w') as json_file:
        json.dump(dic, json_file, ensure_ascii=False, cls=JsonEncoder)
def save_dicts(filename, dic):
    '''save dict into json file'''
    with open(filename,'a+') as json_file:
        json.dump(dic, json_file, ensure_ascii=False, cls=JsonEncoder)
        json_file.write('\n')
def load_dict(filename):
    '''load dict from json file'''
    with open(filename,"r") as json_file:
        dic = json.load(json_file)
    return dic

In [39]:
split_names = ['flickr30K-test']#, 'flickr30K-val', 'flickr30K-train']

## multi30K
for split_name in split_names:
    im_files = load_flickr30k_image_ids(split_name)
    file_num = len(im_files)
    
    save_file = '.../data/'+split_name+'.json'

    if os.path.exists(save_file): os.remove(save_file)
    
    split_name = split_name.split('-')[-1]
    for i, im_file in enumerate(im_files):
        if i % 1000 == 0: print 'processing', float(i)/float(file_num), '...'
        im_file = im_file[0]
        info = get_obj_infos(im_file)
#         features.append(info['features'])
        save_dicts(save_file, info)


processing 0 ...
processing 0 ...
processing 0 ...
processing 0 ...
processing 0 ...
processing 0 ...
processing 0 ...
processing 0 ...
processing 0 ...
processing 0 ...
processing 0 ...
processing 0 ...
processing 0 ...
processing 0 ...
processing 0 ...
processing 0 ...
processing 0 ...
processing 0 ...
processing 0 ...
processing 0 ...
processing 0 ...
processing 0 ...
processing 0 ...
processing 0 ...
processing 0 ...
processing 0 ...
processing 0 ...
processing 0 ...
processing 0 ...
processing 0 ...
processing 0 ...
processing 0 ...


In [8]:
## ambiguous
im_files = load_ambiguouscoco_image_ids('')
file_num = len(im_files)
save_file = '.../data/ambiguou.json'
if os.path.exists(save_file): os.remove(save_file)
for i, im_file in enumerate(im_files):
    if i % 1000 == 0: print 'processing', float(i)/float(file_num), '...'
    im_file = im_file[0]
    info = get_obj_infos(im_file)
    save_dicts(save_file, info)
print('done')

processing 0.0 ...
done
