In [1]:
import numpy as np
import lmdb
import os
import pickle
import torch
# from mmf.utils.file_io import PathManager
from iopath.common.file_io import PathManager as pm

PathManager = pm()

In [2]:
data_path = "/fsx/zmykevin/data/mmf_data/datasets/vqa2/defaults/annotations/imdb_minival2014.npy"

In [3]:
val_annotation = np.load(data_path, allow_pickle=True)

In [4]:
print(val_annotation.shape)

(3001,)


In [5]:
print(val_annotation[1])

{'image_name': 'COCO_val2014_000000573843', 'image_id': 573843, 'question_id': 573843005, 'feature_path': 'COCO_val2014_000000573843.npy', 'question_str': 'Are there clouds?', 'question_tokens': ['are', 'there', 'clouds'], 'all_answers': ['yes', 'yes', 'no', 'yes', 'yes', 'yes', 'yes', 'yes', 'yes', 'yes'], 'ocr_tokens': [], 'answers': ['yes', 'yes', 'no', 'yes', 'yes', 'yes', 'yes', 'yes', 'yes', 'yes']}


In [8]:
class PaddedFasterRCNNFeatureReader:
    def __init__(self, max_loc):
        self.max_loc = max_loc
        self.first = True
        self.take_item = False

    def _load(self, image_feat_path):
        image_info = {}
        image_info["features"] = load_feat(image_feat_path)

        info_path = "{}_info.npy".format(image_feat_path.split(".npy")[0])
        if PathManager.exists(info_path):
            image_info.update(load_feat(info_path).item())

        return image_info

    def read(self, image_feat_path):
        image_info = self._load(image_feat_path)
        if self.first:
            self.first = False
            if (
                image_info["features"].size == 1
                and "image_feat" in image_info["features"].item()
            ):
                self.take_item = True

        image_feature = image_info["features"]
#         print(image_info["features"].size)
        if self.take_item:
            item = image_info["features"].item()
            if "image_text" in item:
                image_info["image_text"] = item["image_text"]
                image_info["is_ocr"] = item["image_bbox_source"]
                image_feature = item["image_feat"]

            if "info" in item:
                if "image_text" in item["info"]:
                    image_info.update(item["info"])
                image_feature = item["feature"]

        # Handle case of features with class probs
        if (
            image_info["features"].size == 1
            and "features" in image_info["features"].item()
        ):
            item = image_info["features"].item()
            image_feature = item["features"]
            image_info["image_height"] = item["image_height"]
            image_info["image_width"] = item["image_width"]

            # Resize these to self.max_loc
            image_loc, _ = image_feature.shape
            image_info["cls_prob"] = np.zeros(
                (self.max_loc, item["cls_prob"].shape[1]), dtype=np.float32
            )
            image_info["cls_prob"][0:image_loc,] = item["cls_prob"][: self.max_loc, :]
            image_info["bbox"] = np.zeros(
                (self.max_loc, item["bbox"].shape[1]), dtype=np.float32
            )
            image_info["bbox"][0:image_loc,] = item["bbox"][: self.max_loc, :]
            image_info["num_boxes"] = item["num_boxes"]

        # Handle the case of ResNet152 features
        if len(image_feature.shape) > 2:
            shape = image_feature.shape
            image_feature = image_feature.reshape(-1, shape[-1])

        image_loc, image_dim = image_feature.shape
        tmp_image_feat = np.zeros((self.max_loc, image_dim), dtype=np.float32)
        tmp_image_feat[0:image_loc,] = image_feature[: self.max_loc, :]  # noqa
        image_feature = torch.from_numpy(tmp_image_feat)

        del image_info["features"]
        image_info["max_features"] = torch.tensor(image_loc, dtype=torch.long)
        return image_feature, image_info


class LMDBFeatureReader(PaddedFasterRCNNFeatureReader):
    def __init__(self, max_loc, base_path):
        super().__init__(max_loc)
        self.db_path = base_path

        if not PathManager.exists(self.db_path):
            raise RuntimeError(
                "{} path specified for LMDB features doesn't exists.".format(
                    self.db_path
                )
            )
        self.env = None

    def _init_db(self):
        self.env = lmdb.open(
            self.db_path,
            subdir=os.path.isdir(self.db_path),
            readonly=True,
            lock=False,
            readahead=False,
            meminit=False,
        )
        with self.env.begin(write=False, buffers=True) as txn:
            self.image_ids = pickle.loads(txn.get(b"keys"))
            self.image_id_indices = {
                self.image_ids[i]: i for i in range(0, len(self.image_ids))
            }

    def _load(self, image_file_path):
        #print("env is: {}".format(self.env))
        if self.env is None:
            #print("initialize db")
            self._init_db()

        split = os.path.relpath(image_file_path, self.db_path).split(".npy")[0]

        try:
            image_id = int(split.split("_")[-1])
            # Try fetching to see if it actually exists otherwise fall back to
            # default
            img_id_idx = self.image_id_indices[str(image_id).encode()]
        except (ValueError, KeyError):
            # The image id is complex or involves folder, use it directly
            image_id = str(split).encode()
            img_id_idx = self.image_id_indices[image_id]

        with self.env.begin(write=False, buffers=True) as txn:
            image_info = pickle.loads(txn.get(self.image_ids[img_id_idx]))

        return image_info

In [9]:
base_path = "/fsx/zmykevin/data/mmf_data/datasets/coco/defaults/features/test2015_vinvl_nopadding.lmdb"

In [10]:
#Load the Image Feature
feature_reader = LMDBFeatureReader(30, base_path)

In [11]:
feats, info = feature_reader.read("/fsx/zmykevin/data/mmf_data/datasets/coco/defaults/features/test2015_vinvl_nopadding.lmdb/COCO_test2015_000000262144.npy")

In [12]:
print(feats.size())

torch.Size([30, 2048])


In [13]:
print(info['cls_prob'].shape)

(30, 1601)


In [14]:
print(info['max_features'])

tensor(51)


In [15]:
print(info.keys())

dict_keys(['feature_path', 'image_height', 'image_width', 'num_boxes', 'objects', 'bbox', 'cls_prob', 'max_features'])


In [21]:
print(info['bbox'].shape)  #bbox should be the normalized detected bounding box rect;

(30, 4)


In [22]:
print(info['num_boxes'])

38


In [23]:
print(info['feature_path'])

COCO_test2015_000000000001


In [49]:
print(info['objects'])

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0]
