# Frustum Pointnet

In [None]:
from IPython.display import Image
import numpy as np
import plotly.graph_objs as go
import plotly.offline as py
import pickle

EQUAL_ASPECT_RATIO_LAYOUT = dict(
    margin={
        'l': 0,
        'r': 0,
        'b': 0,
        't': 0
    }, scene=dict(
    aspectmode='data'
))


def color(x, cmap='Reds'):
    cmap = plt.get_cmap(cmap)
    x = (x - np.min(x)) / np.max(x)
    
    return cmap(x)

%matplotlib inline

In [None]:
Image(url='http://stanford.edu/~rqi/frustum-pointnets/images/teaser.jpg')

Given RGB-D data, we first generate 2D object region proposals in the RGB image using a CNN. Each 2D region is then extruded to a 3D viewing frustum in which we get a point cloud from depth data. Finally, our frustum PointNet predicts a (oriented and amodal) 3D bounding box for the object from the points in frustum.

# Получение frustum-примеров из сырых данных

In [None]:
Image(url='http://www.cvlibs.net/datasets/kitti/images/passat_sensors.jpg')

In [None]:
Image(url='http://www.cvlibs.net/datasets/kitti/images/setup_top_view.png')

Getting the data:

In [None]:
with open('projection_example.npy', 'rb') as f:
    data_example = pickle.load(f, encoding='latin1')

Sample image, point cloud data, calibration and transforms:

In [None]:
xyz = data_example['lidar_xyz']
intensity = data_example['intensity']
car_to_cam = data_example['car_to_cam']
car_to_lidar = data_example['car_to_lidar']
intrinsic = data_example['intrinsics']
image = data_example['image']

In [None]:
intrinsic

In [None]:
car_to_cam.shape

Let's plot the point cloud:

In [None]:
plt.figure(figsize=(20, 20))
plt.scatter(xyz[:, 0], xyz[:, 1], s=0.1)
plt.xlim(0, 60)
plt.ylim(-35, 35);

Do the same with plotly:

In [None]:
fig = go.Figure(layout=EQUAL_ASPECT_RATIO_LAYOUT)
fig.add_scatter3d(**{
    'x': xyz[:,0],
    'y': xyz[:,1],
    'z': xyz[:,2],
    'mode': 'markers',
    'marker': {
        'size': 1,
        'color': color(intensity, 'tab20')
    }
})

py.iplot(fig)


Sample image:

In [None]:
plt.figure(figsize=(15, 10))
plt.imshow(image);

Setting up a frustum (e.g., a bounding box predicted by 2D detection pipeline):

In [None]:
bbox = np.array(((350, 750), (460, 810)))

In [None]:
def bbox_to_vertices(bbox):
    return np.array([bbox[0], [bbox[0, 0], bbox[1, 1]], bbox[1], [bbox[1, 0], bbox[0, 1]], bbox[0]])

In [None]:
vertices

In [None]:
vertices = bbox_to_vertices(bbox)
plt.figure(figsize=(15, 10))
plt.imshow(image)
plt.plot(vertices[:, 1], vertices[:, 0], c='g');

# Что хотим сделать? 

* Спроецируем точки лидара в камеру
* Посмотрим какие точки попали в коробку
* Оставим только эти точки

Применяем два трансформа за одну операцию:
* car_frame -> camera_frame,
* camera_frame -> image_frame.

In [None]:
transform = intrinsic.dot(car_to_cam)

In [None]:
transform.shape

Переводим в однородные координаты:

In [None]:
xyz.shape

In [None]:
xyz_homogen = np.pad(xyz, ((0, 0), (0, 1)), mode='constant', constant_values=1)

In [None]:
xyz_homogen.shape

In [None]:
xyz_homogen[0]

Теперь при домножении на число, новый набор координат будет продолжать задавать ту же точку в 3D-пространстве, что и до домножения.

Это пригодится для применения посчитанного трансформа:

In [None]:
xyw = xyz_homogen.dot(transform.T)

In [None]:
xyw[0]

Можно вернуть на холст камеры, поделив на последнюю координату (обратное преобразование):

In [None]:
xy_cam = xyw[:, :2] / xyw[:, 2:]

In [None]:
xy_cam

In [None]:
xy_cam.shape

Пофильтровать по маске:

In [None]:
xy_cam_mask = (xy_cam[:, 1] < 540) * (xy_cam[:, 1] > 0) *\
    (xy_cam[:, 0] < 1240) * (xy_cam[:, 0] > 0) * (xyz[:, 0] > 0)

Визуализируем по дальности:

In [None]:
r = np.linalg.norm(xyz[:, :2], axis=1)

In [None]:
plt.figure(figsize=(20, 10))
plt.imshow(image)
plt.scatter(xy_cam[xy_cam_mask, 0], xy_cam[xy_cam_mask, 1], c=np.log1p(r[xy_cam_mask]), s=1);

In [None]:
xy_cam = xy_cam[xy_cam_mask]

Точки внутри фрустума:

In [None]:
in_box = (xy_cam[:, 1] > bbox[0, 0]) * (xy_cam[:, 1] < bbox[1, 0]) *\
    (xy_cam[:, 0] < bbox[1, 1]) * (xy_cam[:, 0] > bbox[0, 1]) 

In [None]:
plt.figure(figsize=(20, 10))
plt.imshow(image)
plt.scatter(xy_cam[in_box, 0], xy_cam[in_box, 1], s=1)

In [None]:
xyz_frustum = xyz[xy_cam_mask][in_box]

In [None]:
plt.scatter(xyz_frustum[:, 0], xyz_frustum[:, 1], s=0.5)
plt.xlim(0, 70)
plt.ylim(-25, 25)

In [None]:
fig = go.Figure(layout=EQUAL_ASPECT_RATIO_LAYOUT)
fig.add_scatter3d(**{
    'x': xyz_frustum[:,0],
    'y': xyz_frustum[:,1],
    'z': xyz_frustum[:,2],
    'mode': 'markers',
    'marker': {
        'size': 1,
        'color': color(intensity, 'tab20')
    }
})

py.iplot(fig)