In [1]:
# Imports for core lib
import torch as th
from torchvision import transforms
import numpy as np

In [2]:
# Imports for visualization
import cv2
from matplotlib import pyplot as plt
%matplotlib inline

In [3]:
# Imports for dataset
from objectron_dataset import (Objectron, SampleObjectron,
                               DecodeImage, ParseFixedLength, _skip_none)
from augment import PhotometricAugment

ModuleNotFoundError: No module named 'objectron_dataset'

In [None]:
# Configure experiment
batch_size = 2
image_shape = (480,640)
sequence_length = 8
num_workers = 0
use_cached_samples = True

In [None]:
# Configure options, transforms
# instantiate dataset and loader
obj_cls = SampleObjectron if use_cached_samples else Objectron
opts = obj_cls.Settings()
xfm = transforms.Compose([
    DecodeImage(size=image_shape),
    ParseFixedLength(ParseFixedLength.Settings()),
    PhotometricAugment(PhotometricAugment.Settings())
])
dataset = obj_cls(opts, xfm)
loader = th.utils.data.DataLoader(
    dataset, batch_size=batch_size, num_workers=num_workers,
    collate_fn=_skip_none)

In [None]:
# Get single data from loader. Takes a VERY long time.
data = next(iter(loader))

In [None]:
contexts, features = data
features['object/class']

In [None]:
def draw_objects_with_pose(feat):
    image = feat['image'].cpu().numpy()
    _, h, w = image.shape
    
    # NOTE(ycho): copy required due to OpenCV restrictions (contiguous)
    image = image.transpose(1, 2, 0)
    image = image.copy()
    
    # 3D Bounding-box vertices.
    box_verts = [
        (-0.5,-0.5,-0.5),
        (-0.5,-0.5,+0.5),
        (-0.5,+0.5,-0.5),
        (-0.5,+0.5,+0.5),
        (+0.5,-0.5,-0.5),
        (+0.5,-0.5,+0.5),
        (+0.5,+0.5,-0.5),
        (+0.5,+0.5,+0.5),
    ]
    box_verts = np.asarray(box_verts, dtype=np.float32)
    
    num_inst = feat['instance_num'][0]
    for i in range(num_inst):
        irxn = feat['object/orientation'][i*9:(i+1)*9]
        itxn = feat['object/translation'][i*3:(i+1)*3]
        iscale = feat['object/scale'][i*3:(i+1)*3]
        
        T_scale = np.diag(np.r_[iscale.cpu().numpy(), 1.0])
        # BBOX3D transform
        T_box = np.eye(4)
        T_box[:3,:3] = irxn.reshape(3,3).cpu().numpy()
        T_box[:3,-1] = itxn.cpu().numpy()
        
        # camera transforms
        T_p = feat['camera/projection'].cpu().numpy().reshape(4,4)
#         T_v = feat['camera/view'].cpu().numpy().reshape(4,4)
        
        # Compose all transforms
        # NOTE(ycho): Looks like `camera/view` is not needed.
        # Perhaps it's been fused into object/{translation,orientation}.
        T = T_p @ T_box @ T_scale
        
        # apply transform
        v = box_verts @ T[:3,:3].T + T[:3,-1]
        # project
        v[..., :-1] /= v[..., -1:]
        
        # TODO(ycho): Consider also incorporating
        # NDC transform into the above composed xfm.
        v[...,0] = (1 + v[...,0]) * (0.5 * h)
        v[...,1] = (1 + v[...,1]) * (0.5 * w)
        y, x = v[...,0], v[..., 1]
        
        for (px, py) in zip(x,y):
            cv2.circle(image, (int(px),int(py)), 16, (0,0,255), -1)
    return image

In [None]:
# TODO(ycho): Deal with cases where collation results < batch_size samples
# due to short sequence or network error. For now, no such error checking is performed.
fig, axs = plt.subplots(batch_size, sequence_length, figsize=(12,8), dpi= 200)
for i_batch in range(batch_size):
    for i_seq in range(sequence_length):
#         print({k:v.shape for (k,v) in features.items()})
        ctx = {k : v[i_batch] for (k,v) in contexts.items()}
        feat = {k : v[i_batch,i_seq] for (k,v) in features.items()}
        image = draw_objects_with_pose(feat)
        
        axs[i_batch,i_seq].imshow(image)