In [1]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import mayavi.mlab as mlab
import moviepy.editor as mpy
from skimage import io
import imageio
import matplotlib.image as mpimg
from matplotlib.lines import Line2D
from matplotlib.patches import Rectangle
from tqdm.notebook import trange, tqdm


In [2]:
colors = sns.color_palette('Paired', 9 * 2)
names = ['Car', 'Van', 'Truck', 'Pedestrian', 'Person_sitting', 'Cyclist', 'Tram', 'Misc', 'DontCare']

N_SAMPLES = 160

3d bounding box to point cloud

<img src="images/pc/000101_labels.png" width="750" align="center">

In [3]:
%%capture out

id = 0

def make_frame(t):
    global id
    file_id = f'{id:06}'
    
    # load point clouds
    scan_dir = f'gta/training/velodyne/{file_id}.bin'
    scan = np.fromfile(scan_dir, dtype=np.float32).reshape(-1, 4)

    # load labels
    label_dir = f'gta/training/label_2/{file_id}.txt'
    with open(label_dir, 'r') as f:
        labels = f.readlines()

        fig = mlab.figure(bgcolor=(0, 0, 0), size=(1280, 720))
        # draw point cloud
        plot = mlab.points3d(scan[:, 0], scan[:, 1], scan[:, 2], mode="point", figure=fig)

    for line in labels:
        line = line.split()
        lab, _, _, _, _, _, _, _, h, w, l, x, y, z, rot = line
        h, w, l, x, y, z, rot = map(float, [h, w, l, x, y, z, rot])
        if lab != 'DontCare':
            x_corners = [l / 2, l / 2, -l / 2, -l / 2, l / 2, l / 2, -l / 2, -l / 2]
            y_corners = [0, 0, 0, 0, -h, -h, -h, -h]
            z_corners = [w / 2, -w / 2, -w / 2, w / 2, w / 2, -w / 2, -w / 2, w / 2]
            corners_3d = np.vstack([x_corners, y_corners, z_corners])  # (3, 8)

            # transform the 3d bbox from object coordiante to camera_0 coordinate
            R = np.array([[np.cos(rot), 0, np.sin(rot)],
                            [0, 1, 0],
                            [-np.sin(rot), 0, np.cos(rot)]])
            corners_3d = np.dot(R, corners_3d).T + np.array([x, y, z])

            # transform the 3d bbox from camera_0 coordinate to velodyne coordinate
            corners_3d = corners_3d[:, [2, 0, 1]] * np.array([[1, -1, -1]])


            def draw(p1, p2, front=1):
                mlab.plot3d([p1[0], p2[0]], [p1[1], p2[1]], [p1[2], p2[2]],
                            color=colors[names.index(lab) * 2 + front],
                            tube_radius=None, line_width=2, figure=fig)


            # draw the upper 4 horizontal lines
            draw(corners_3d[0], corners_3d[1], 0)  # front = 0 for the front lines
            draw(corners_3d[1], corners_3d[2])
            draw(corners_3d[2], corners_3d[3])
            draw(corners_3d[3], corners_3d[0])

            # draw the lower 4 horizontal lines
            draw(corners_3d[4], corners_3d[5], 0)
            draw(corners_3d[5], corners_3d[6])
            draw(corners_3d[6], corners_3d[7])
            draw(corners_3d[7], corners_3d[4])

            # draw the 4 vertical lines
            draw(corners_3d[4], corners_3d[0], 0)
            draw(corners_3d[5], corners_3d[1], 0)
            draw(corners_3d[6], corners_3d[2])
            draw(corners_3d[7], corners_3d[3])

    mlab.view(azimuth=180, elevation=60, distance=100, focalpoint=(0,0,0))
    mlab.savefig(filename=f'images/pc/{file_id}_labels.png')
    screenshot = mlab.screenshot(antialiased=True) # return a RGB image
    mlab.close()
    id = id + 1
    return screenshot;

In [4]:
%%capture out

id = 0
fps = 2

animation = mpy.VideoClip(make_frame, duration=N_SAMPLES/fps)
animation.write_gif("gif/pc.gif", fps=fps)

kitti_3dbox_to_img

<img src="images/camera/000101_3dbox_img.png" width="750" align="center">

In [5]:
%%capture out

def kitti_3dbox_to_img(id):
    
    file_id = f'{id:06}'

    # load image
    img = np.array(io.imread(f'gta/training/image_2/{file_id}.png'), dtype=np.int32)

    # load labels
    with open(f'gta/training/label_2/{file_id}.txt', 'r') as f:
        labels = f.readlines()

    # load calibration file
    with open(f'gta/training/calib/{file_id}.txt', 'r') as f:
        lines = f.readlines()
        P2 = np.array(lines[2].strip().split(' ')[1:], dtype=np.float32).reshape(3, 4)

    fig = plt.figure()
    # draw image
    plt.imshow(img)

    for line in labels:
        line = line.split()
        lab, _, _, _, _, _, _, _, h, w, l, x, y, z, rot = line
        h, w, l, x, y, z, rot = map(float, [h, w, l, x, y, z, rot])
        if lab != 'DontCare':
            x_corners = [l / 2, l / 2, -l / 2, -l / 2, l / 2, l / 2, -l / 2, -l / 2]
            y_corners = [0, 0, 0, 0, -h, -h, -h, -h]
            z_corners = [w / 2, -w / 2, -w / 2, w / 2, w / 2, -w / 2, -w / 2, w / 2]
            corners_3d = np.vstack([x_corners, y_corners, z_corners])  # (3, 8)

            # transform the 3d bbox from object coordiante to camera_0 coordinate
            R = np.array([[np.cos(rot), 0, np.sin(rot)],
                            [0, 1, 0],
                            [-np.sin(rot), 0, np.cos(rot)]])
            corners_3d = np.dot(R, corners_3d).T + np.array([x, y, z])

            # transform the 3d bbox from camera_0 coordinate to camera_x image
            corners_3d_hom = np.concatenate((corners_3d, np.ones((8, 1))), axis=1)
            corners_img = np.matmul(corners_3d_hom, P2.T)
            corners_img = corners_img[:, :2] / corners_img[:, 2][:, None]


            def line(p1, p2, front=1):
                plt.gca().add_line(Line2D((p1[0], p2[0]), (p1[1], p2[1]), color=colors[names.index(lab) * 2 + front]))


            # draw the upper 4 horizontal lines
            line(corners_img[0], corners_img[1], 0)  # front = 0 for the front lines
            line(corners_img[1], corners_img[2])
            line(corners_img[2], corners_img[3])
            line(corners_img[3], corners_img[0])

            # draw the lower 4 horizontal lines
            line(corners_img[4], corners_img[5], 0)
            line(corners_img[5], corners_img[6])
            line(corners_img[6], corners_img[7])
            line(corners_img[7], corners_img[4])

            # draw the 4 vertical lines
            line(corners_img[4], corners_img[0], 0)
            line(corners_img[5], corners_img[1], 0)
            line(corners_img[6], corners_img[2])
            line(corners_img[7], corners_img[3])

    # fig.patch.set_visible(False)
    plt.axis('off')
    plt.tight_layout()
    plt.savefig(f'images/camera/3dboxes/{file_id}_3dbox_img.png', bbox_inches='tight');
    #plt.show()

In [6]:
%%capture out

for i in trange(N_SAMPLES):
    kitti_3dbox_to_img(i)

In [7]:
fps = 2

images = []
for file_id in trange(N_SAMPLES):
    filename = f'images/camera/3dboxes/{file_id:06}_3dbox_img.png'
    images.append(imageio.imread(filename))

imageio.mimsave('gif/3dboxes.gif', images, fps=fps)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=160.0), HTML(value='')))




cloud2img

<img src="images/camera/000101_overlap.png" width="750" align="center">

In [8]:
%%capture out

def cloud2img(id):
    file_id = f'{id:06}'

    # load point clouds
    scan_dir = f'gta/training/velodyne/{file_id}.bin'
    scan = np.fromfile(scan_dir, dtype=np.float32).reshape(-1, 4)

    # load image
    img = np.array(io.imread(f'gta/training/image_2/{file_id}.png'), dtype=np.int32)

    # load labels
    with open(f'gta/training/label_2/{file_id}.txt', 'r') as f:
        labels = f.readlines()

    # load calibration file
    with open(f'gta/training/calib/{file_id}.txt', 'r') as f:
        lines = f.readlines()
        P2 = np.array(lines[2].strip().split(' ')[1:], dtype=np.float32).reshape(3, 4)
        R0 = np.array(lines[4].strip().split(' ')[1:], dtype=np.float32).reshape(3, 3)
        V2C = np.array(lines[5].strip().split(' ')[1:], dtype=np.float32).reshape(3, 4)

    fig = plt.figure(figsize=(12, 6))
    # draw image
    plt.imshow(img)

    # transform the pointcloud from velodyne coordiante to camera_0 coordinate
    scan_hom = np.hstack((scan[:, :3], np.ones((scan.shape[0], 1), dtype=np.float32))) # [N, 4]
    scan_C0 = np.dot(scan_hom, np.dot(V2C.T, R0.T)) # [N, 3]

    # transform the pointcloud from camera_0 coordinate to camera_2 coordinate
    scan_C0_hom = np.hstack((scan_C0, np.ones((scan.shape[0], 1), dtype=np.float32))) # [N, 4]
    scan_C2 = np.dot(scan_C0_hom, P2.T) # [N, 3]
    scan_C2_depth = scan_C2[:, 2]
    scan_C2 = (scan_C2[:, :2].T / scan_C2[:, 2]).T

    # remove points outside the image
    inds = scan_C2[:, 0] > 0
    inds = np.logical_and(inds, scan_C2[:, 0] < img.shape[1])
    inds = np.logical_and(inds, scan_C2[:, 1] > 0)
    inds = np.logical_and(inds, scan_C2[:, 1] < img.shape[0])
    inds = np.logical_and(inds, scan_C2_depth > 0)

    plt.scatter(scan_C2[inds, 0], scan_C2[inds, 1], c=-scan_C2_depth[inds], alpha=0.5, s=1, cmap='viridis')

    # fig.patch.set_visible(False)
    plt.axis('off')
    plt.tight_layout()
    plt.savefig(f'images/camera/overlap/{file_id}_overlap.png', bbox_inches='tight');
    #plt.show()

In [10]:
%%capture out

for i in trange(N_SAMPLES):
    cloud2img(i)

In [11]:
fps = 2

images = []
for file_id in trange(N_SAMPLES):
    filename = f'images/camera/overlap/{file_id:06}_overlap.png'
    images.append(imageio.imread(filename))

imageio.mimsave('gif/overlap.gif', images, fps=fps)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=160.0), HTML(value='')))




<img src="gif/pc.gif" width="750" align="center">
<img src="gif/3dboxes.gif" width="750" align="center">
<img src="gif/overlap.gif" width="750" align="center">