In [2]:
from datetime import datetime
from multiprocessing import Pool
import os
os.environ["OMP_NUM_THREADS"] = "1"

import matplotlib.pyplot as plt
%matplotlib inline
import torch
import pandas as pd
import cv2
from PIL import Image
import numpy as np
from tqdm import tqdm, tqdm_notebook
import scipy
import scipy.ndimage
import scipy.special
from scipy.spatial.transform import Rotation as R

from lyft_dataset_sdk.lyftdataset import LyftDataset
from lyft_dataset_sdk.utils.data_classes import LidarPointCloud, Box, Quaternion
from lyft_dataset_sdk.utils.geometry_utils import view_points, transform_matrix

In [3]:
!ln -s /media/bob/data/lyft/train_images images
!ln -s /media/bob/data/lyft/train_maps maps
!ln -s /media/bob/data/lyft/train_lidar lidar

In [43]:
# Some hyperparameters we'll need to define for the system
voxel_size = (0.2, 0.2, 1.0)
z_offset = -2.0
bev_shape = (672, 672, 3)

In [4]:
level5data = LyftDataset(data_path='.', json_path='/media/bob/data/lyft/train_data', verbose=False)
classes = ["car", "motorcycle", "bus", "bicycle", "truck", "pedestrian", "other_vehicle", 
           "animal", "emergency_vehicle"]

In [5]:
records = [(level5data.get('sample', record['first_sample_token'])['timestamp'], record) for record in
        level5data.scene]

entries = []

for start_time, record in sorted(records):
    start_time = level5data.get('sample', record['first_sample_token'])['timestamp'] / 1000000

    token = record['token']
    name = record['name']
    date = datetime.utcfromtimestamp(start_time)
    host = "-".join(record['name'].split("-")[:2])
    first_sample_token = record["first_sample_token"]

    entries.append((host, name, date, token, first_sample_token))
            
df = pd.DataFrame(entries, columns=["host", "scene_name", "date", "scene_token", "first_sample_token"])

In [6]:
host_count_df = df.groupby("host")['scene_token'].count()
print(host_count_df)

host
host-a004    42
host-a005     1
host-a006     3
host-a007    26
host-a008     5
host-a009     9
host-a011    51
host-a012     2
host-a015     6
host-a017     3
host-a101    20
host-a102    12
Name: scene_token, dtype: int64


In [17]:
validation_hosts = ["host-a008", "host-a009","host-a012", "host-a015","host-a017" ]

validation_df = df[df["host"].isin(validation_hosts)]
vi = validation_df.index
train_df = df[~df.index.isin(vi)]

In [57]:
def draw_border(img, point1, point2, point3, point4, line_length):

    x1, y1 = point1
    x2, y2 = point2
    x3, y3 = point3
    x4, y4 = point4    

    cv2.circle(img, (x1, y1), 3, (255, 0, 255), -1)    #-- top_left
    cv2.circle(img, (x2, y2), 3, (255, 0, 255), -1)    #-- bottom-left
    cv2.circle(img, (x3, y3), 3, (255, 0, 255), -1)    #-- top-right
    cv2.circle(img, (x4, y4), 3, (255, 0, 255), -1)    #-- bottom-right

    cv2.line(img, (x1, y1), (x1 , y1 + line_length), (0, 255, 0), 2)  #-- top-left
    cv2.line(img, (x1, y1), (x1 + line_length , y1), (0, 255, 0), 2)

    cv2.line(img, (x2, y2), (x2 , y2 - line_length), (0, 255, 0), 2)  #-- bottom-left
    cv2.line(img, (x2, y2), (x2 + line_length , y2), (0, 255, 0), 2)

    cv2.line(img, (x3, y3), (x3 - line_length, y3), (0, 255, 0), 2)  #-- top-right
    cv2.line(img, (x3, y3), (x3, y3 + line_length), (0, 255, 0), 2)

    cv2.line(img, (x4, y4), (x4 , y4 - line_length), (0, 255, 0), 2)  #-- bottom-right
    cv2.line(img, (x4, y4), (x4 - line_length , y4), (0, 255, 0), 2)

    return img

In [7]:
def create_transformation_matrix_to_voxel_space(shape, voxel_size, offset):
    """
    Constructs a transformation matrix given an output voxel shape such that (0,0,0) ends up in the center.
    Voxel_size defines how large every voxel is in world coordinate, (1,1,1) would be the same as Minecraft voxels.
    
    An offset per axis in world coordinates (metric) can be provided, this is useful for Z (up-down) in lidar points.
    No rotation, only a scale and translation.
    """
    
    shape, voxel_size, offset = np.array(shape), np.array(voxel_size), np.array(offset)
    
    tm = np.eye(4, dtype=np.float32) #(4,4)
    translation = shape/2 + offset/voxel_size #(3,)
    
    tm = tm * np.array(np.hstack((1/voxel_size, [1]))) # (4,4)

    tm[:3, 3] = np.transpose(translation)
    return tm

def transform_points(points, transf_matrix):
    """
    Transform (3,N) or (4,N) points using transformation matrix.
    """
    if points.shape[0] not in [3,4]:
        raise Exception("Points input should be (3,N) or (4,N) shape, received {}".format(points.shape))
    return transf_matrix.dot(np.vstack((points[:3, :], np.ones(points.shape[1]))))[:3, :]

def car_to_voxel_coords(points, shape, voxel_size, z_offset=0):
    if len(shape) != 3:
        raise Exception("Voxel volume shape should be 3 dimensions (x,y,z)")
        
    if len(points.shape) != 2 or points.shape[0] not in [3, 4]:
        raise Exception("Input points should be (3,N) or (4,N) in shape, found {}".format(points.shape))

    tm = create_transformation_matrix_to_voxel_space(shape, voxel_size, (0, 0, z_offset))
    p = transform_points(points, tm) #(3, N)
    return p

In [65]:
def move_boxes_to_car_space(boxes, ego_pose):
    """
    Move boxes from world space to car space.
    Note: mutates input boxes.
    """
    translation = -np.array(ego_pose['translation'])
    rotation = Quaternion(ego_pose['rotation']).inverse
    
    for box in boxes:
        # Bring box to car space
        box.translate(translation)
        box.rotate(rotation)
        
def scale_boxes(boxes, factor):
    """
    Note: mutates input boxes
    """
    for box in boxes:
        box.wlh = box.wlh * factor

def draw_boxes(im, voxel_size, boxes, classes, z_offset=0.0):
    for box in boxes:
        # We only care about the bottom corners
        corners = box.bottom_corners()
        corners_voxel = car_to_voxel_coords(corners, im.shape, voxel_size, z_offset).transpose(1,0)
        corners_voxel = corners_voxel[:,:2] # Drop z coord
        #vertex = np.int0(corners_voxel[0,:]).reshape(2)
        #print(vertex.shape)

        #class_color = classes.index(box.name) + 1
        
        #if class_color == 0:
        #    raise Exception("Unknown class: {}".format(box.name))

        im = draw_border(im, np.int0(corners_voxel[0,:]), np.int0(corners_voxel[1,:]), 
                      np.int0(corners_voxel[2,:]), np.int0(corners_voxel[3,:]), 3)

In [66]:
my_sample_token = train_df.first_sample_token.values[1]
sample = level5data.get("sample", my_sample_token)
sample_lidar_token = sample["data"]["LIDAR_TOP"]
lidar_data = level5data.get("sample_data", sample_lidar_token)
lidar_filepath = level5data.get_sample_data_path(sample_lidar_token)

ego_pose = level5data.get("ego_pose", lidar_data["ego_pose_token"])
calibrated_sensor = level5data.get("calibrated_sensor", lidar_data["calibrated_sensor_token"])
boxes = level5data.get_boxes(sample_lidar_token)
move_boxes_to_car_space(boxes, ego_pose)
im = cv2.imread('/media/bob/lyft/data/lyft_bev672/lyft_bev/bev_train_data/{}_input.png'.format(my_sample_token))

draw_boxes(im, voxel_size, boxes, classes, z_offset)
cv2.imshow('bev',im)
k = cv2.waitKey(0)
if k == 27:         # wait for ESC key to exit
    cv2.destroyAllWindows()

In [10]:
my_sample_token

'24b0962e44420e6322de3f25d9e4e5cc3c7a348ec00bfa69db21517e4ca92cc8'

In [17]:
boxes[0].center

array([1048.15595023, 1691.8102354 ,  -23.30494345])

In [21]:
move_boxes_to_car_space(boxes, ego_pose)
boxes[0].center

array([20.85948982, 48.68037288,  1.06134113])

In [22]:
ego_pose

{'rotation': [-0.6004078747001647,
  -0.000868287440477653,
  0.0018651459228554272,
  0.7996912850004297],
 'translation': [1007.2332778546752, 1725.4217301399465, -24.58000073380586],
 'token': '2d673d4bee560c77788b91e2ee24503538e74a23e7972e3e0099b92015f76dde',
 'timestamp': 1557858039302414.8}

In [18]:
import pandas as pd
train_df = pd.read_csv('/media/bob/data/lyft/train.csv')

In [20]:
train_df[train_df.Id==my_sample_token]

Unnamed: 0,Id,PredictionString
11539,24b0962e44420e6322de3f25d9e4e5cc3c7a348ec00bfa...,1048.155950230245 1691.8102354006162 -23.30494...


In [None]:
def prepare_target_for_NN(first_sample_token, output_folder='./bev_target/'):
    """
    Given a first sample token (in a scene), output rasterized input volumes and targets in birds-eye-view perspective.
    

    """
    sample_token = first_sample_token
    
    while sample_token:
        
        sample = level5data.get("sample", sample_token)

        sample_lidar_token = sample["data"]["LIDAR_TOP"]
        lidar_data = level5data.get("sample_data", sample_lidar_token)
        lidar_filepath = level5data.get_sample_data_path(sample_lidar_token)

        ego_pose = level5data.get("ego_pose", lidar_data["ego_pose_token"])
        boxes = level5data.get_boxes(sample_lidar_token)
        move_boxes_to_car_space(boxes, ego_pose)
        
        label_path = self.label_folder.joinpath(f"{sample_token}.txt")
        with open(label_path, "w") as label_file:
            for box in boxes:
                corners = box.bottom_corners()
                corners_voxel = car_to_voxel_coords(corners, im.shape, voxel_size, z_offset).transpose(1,0)
                corners_voxel = corners_voxel[:,:2]
        
        sample_token = sample["next"]