In [1]:
import torch
import os.path as op
import logging
import numpy as np
import base64
import json
import os
from iopath.common.file_io import PathManager as pm
import lmdb
import pickle

PathManager = pm()

In [2]:
class TSVFile(object):
    def __init__(self, tsv_file, generate_lineidx=False):
        self.tsv_file = tsv_file
        self.lineidx = op.splitext(tsv_file)[0] + '.lineidx'
        self._fp = None
        self._lineidx = None
        # the process always keeps the process which opens the file. 
        # If the pid is not equal to the currrent pid, we will re-open the file.
        self.pid = None
        # generate lineidx if not exist
        if not op.isfile(self.lineidx) and generate_lineidx:
            generate_lineidx_file(self.tsv_file, self.lineidx)

    def __del__(self):
        if self._fp:
            self._fp.close()

    def __str__(self):
        return "TSVFile(tsv_file='{}')".format(self.tsv_file)

    def __repr__(self):
        return str(self)

    def num_rows(self):
        self._ensure_lineidx_loaded()
        return len(self._lineidx)

    def seek(self, idx):
        self._ensure_tsv_opened()
        self._ensure_lineidx_loaded()
        try:
            pos = self._lineidx[idx]
        except:
            logging.info('{}-{}'.format(self.tsv_file, idx))
            raise
        self._fp.seek(pos)
        return [s.strip() for s in self._fp.readline().split('\t')]

    def seek_first_column(self, idx):
        self._ensure_tsv_opened()
        self._ensure_lineidx_loaded()
        pos = self._lineidx[idx]
        self._fp.seek(pos)
        return read_to_character(self._fp, '\t')

    def __getitem__(self, index):
        return self.seek(index)

    def __len__(self):
        return self.num_rows()

    def _ensure_lineidx_loaded(self):
        if self._lineidx is None:
            logging.info('loading lineidx: {}'.format(self.lineidx))
            with open(self.lineidx, 'r') as fp:
                self._lineidx = [int(i.strip()) for i in fp.readlines()]

    def _ensure_tsv_opened(self):
        if self._fp is None:
            self._fp = open(self.tsv_file, 'r')
            self.pid = os.getpid()

        if self.pid != os.getpid():
            logging.info('re-open {} because the process id changed'.format(self.tsv_file))
            self._fp = open(self.tsv_file, 'r')
            self.pid = os.getpid()
            

In [6]:
data_path = "/data/home/zmykevin/vinvl_data/nlvr2/nlvr2_features/nlvr2_X152C4_frcnnbig2_exp168model_0060000model.roi_heads.nm_filter_2_model.roi_heads.score_thresh_0.2/test/inference/model_0060000"
feature_tsv = os.path.join(data_path, "features.tsv")
prediction_tsv = os.path.join(data_path, "predictions.tsv")

feat_tsv = TSVFile(feature_tsv)
prediction_tsv = TSVFile(prediction_tsv)

In [9]:
for i in range(feat_tsv.num_rows()):
    print(feat_tsv.seek(i)[0])
    num_boxes = int(feat_tsv.seek(i)[1])
    print(num_boxes)
    features = np.frombuffer(base64.b64decode(feat_tsv.seek(i)[2]), np.float32
                ).reshape((num_boxes, -1))
    print(features.shape)
    #print(features[0][-6:])
    break

test1-0-0-img0
21
(21, 2054)


In [10]:
for i in range(prediction_tsv.num_rows()):
    print(prediction_tsv.seek(i)[0])
    print(json.loads(prediction_tsv.seek(i)[1]).keys())
    #print(json.loads(prediction_tsv.seek(i)[1])['objects'])
    print(type(prediction_tsv.seek(i)[0]))
    break
#     if i > 10:
#         break


test1-0-0-img0
dict_keys(['image_h', 'image_w', 'num_boxes', 'objects', 'predicates', 'relations'])
<class 'str'>


In [2]:
class PaddedFasterRCNNFeatureReader:
    def __init__(self, max_loc):
        self.max_loc = max_loc
        self.first = True
        self.take_item = False

    def _load(self, image_feat_path):
        image_info = {}
        image_info["features"] = load_feat(image_feat_path)

        info_path = "{}_info.npy".format(image_feat_path.split(".npy")[0])
        if PathManager.exists(info_path):
            image_info.update(load_feat(info_path).item())

        return image_info

    def read(self, image_feat_path):
        image_info = self._load(image_feat_path)
        if self.first:
            self.first = False
            if (
                image_info["features"].size == 1
                and "image_feat" in image_info["features"].item()
            ):
                self.take_item = True

        image_feature = image_info["features"]
#         print(image_info["features"].size)
        if self.take_item:
            item = image_info["features"].item()
            if "image_text" in item:
                image_info["image_text"] = item["image_text"]
                image_info["is_ocr"] = item["image_bbox_source"]
                image_feature = item["image_feat"]

            if "info" in item:
                if "image_text" in item["info"]:
                    image_info.update(item["info"])
                image_feature = item["feature"]

        # Handle case of features with class probs
        if (
            image_info["features"].size == 1
            and "features" in image_info["features"].item()
        ):
            item = image_info["features"].item()
            image_feature = item["features"]
            image_info["image_height"] = item["image_height"]
            image_info["image_width"] = item["image_width"]

            # Resize these to self.max_loc
            image_loc, _ = image_feature.shape
            image_info["cls_prob"] = np.zeros(
                (self.max_loc, item["cls_prob"].shape[1]), dtype=np.float32
            )
            image_info["cls_prob"][0:image_loc,] = item["cls_prob"][: self.max_loc, :]
            image_info["bbox"] = np.zeros(
                (self.max_loc, item["bbox"].shape[1]), dtype=np.float32
            )
            image_info["bbox"][0:image_loc,] = item["bbox"][: self.max_loc, :]
            image_info["num_boxes"] = item["num_boxes"]

        # Handle the case of ResNet152 features
        if len(image_feature.shape) > 2:
            shape = image_feature.shape
            image_feature = image_feature.reshape(-1, shape[-1])

        image_loc, image_dim = image_feature.shape
        tmp_image_feat = np.zeros((self.max_loc, image_dim), dtype=np.float32)
        tmp_image_feat[0:image_loc,] = image_feature[: self.max_loc, :]  # noqa
        image_feature = torch.from_numpy(tmp_image_feat)

        del image_info["features"]
        image_info["max_features"] = torch.tensor(image_loc, dtype=torch.long)
        return image_feature, image_info


class LMDBFeatureReader(PaddedFasterRCNNFeatureReader):
    def __init__(self, max_loc, base_path):
        super().__init__(max_loc)
        self.db_path = base_path

        if not PathManager.exists(self.db_path):
            raise RuntimeError(
                "{} path specified for LMDB features doesn't exists.".format(
                    self.db_path
                )
            )
        self.env = None

    def _init_db(self):
        self.env = lmdb.open(
            self.db_path,
            subdir=os.path.isdir(self.db_path),
            readonly=True,
            lock=False,
            readahead=False,
            meminit=False,
        )
        with self.env.begin(write=False, buffers=True) as txn:
            self.image_ids = pickle.loads(txn.get(b"keys"))
            self.image_id_indices = {
                self.image_ids[i]: i for i in range(0, len(self.image_ids))
            }

    def _load(self, image_file_path):
        #print("env is: {}".format(self.env))
        if self.env is None:
            #print("initialize db")
            self._init_db()

        split = os.path.relpath(image_file_path, self.db_path).split(".npy")[0]
        print(split)
        try:
            image_id = int(split.split("_")[-1])
            # Try fetching to see if it actually exists otherwise fall back to
            # default
            img_id_idx = self.image_id_indices[str(image_id).encode()]
        except (ValueError, KeyError):
            # The image id is complex or involves folder, use it directly
            image_id = str(split).encode()
            img_id_idx = self.image_id_indices[image_id]

        with self.env.begin(write=False, buffers=True) as txn:
            image_info = pickle.loads(txn.get(self.image_ids[img_id_idx]))

        return image_info

In [6]:
ann_data_path = "/fsx/zmykevin/data/mmf_data/datasets/nlvr2/defaults/annotations/test1.jsonl"
with open(ann_data_path, "r") as f:
    for line in f:
        sample = json.loads(line)
        print(json.loads(line))
        break

{'validation': {'28': 'False'}, 'sentence': 'There is an empty glass.', 'left_url': 'http://www.belgiansmaak.com/wp-content/gallery/belgian-beers-post/dynamic/31.-Tilquin.jpg-nggid041318-ngg0dyn-0x0x100-00f0w010c010r110f110r010t010.jpg', 'writer': '103', 'label': 'False', 'right_url': 'https://www.craftbrewingbusiness.com/wp-content/uploads/2017/09/unnamed-1.jpg', 'synset': 'beer bottle', 'query': 'group of beer bottles41', 'identifier': 'test1-0-1-0', 'extra_validations': {'56': 'False', '83': 'False', '19': 'False', '92': 'False'}}


In [7]:
base_path = "/fsx/zmykevin/data/mmf_data/datasets/nlvr2/defaults/features/test_vinvl.lmdb"
feature_reader = LMDBFeatureReader(100, base_path)

In [8]:
feats, info = feature_reader.read("/fsx/zmykevin/data/mmf_data/datasets/nlvr2/defaults/features/test_vinvl.lmdb/{}-img0.npy".format("-".join(sample["identifier"].split("-")[:-1])))

test1-0-1-img0


In [9]:
print(feats.size())

torch.Size([100, 2048])


In [10]:
print(info.keys())

dict_keys(['image_height', 'image_width', 'num_boxes', 'objects', 'bbox', 'cls_prob', 'max_features'])


In [None]:
print(info["max_features"])