In [1]:

import yaml
import matplotlib.pyplot as plt
# from train_eval.trainer import Trainer
import torch
import torch.nn as nn
import os
import numpy as np
os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
os.environ['CUDA_VISIBLE_DEVICES'] = "4"
os.environ["MKL_NUM_THREADS"] = "16"
os.environ["NUMEXPR_NUM_THREADS"] = "16"
os.environ["OMP_NUM_THREADS"] = "16"

In [2]:
with open("configs/occlusion_train_v2_vis.yml", 'r') as yaml_file:
    cfg = yaml.safe_load(yaml_file)

In [3]:
from train_eval.evaluator import Evaluator
trainer = Evaluator(cfg, "/home/stanliu/data/mnt/nuScenes/nuscenes", "vis_data","track_completion_model/track_completion_att2.tar")

Loading NuScenes tables for version v1.0-trainval...
Loading nuScenes-lidarseg...
32 category,
8 attribute,
4 visibility,
64386 instance,
12 sensor,
10200 calibrated_sensor,
2631083 ego_pose,
68 log,
850 scene,
34149 sample,
2631083 sample_data,
1166187 sample_annotation,
4 map,
34149 lidarseg,
Done loading in 53.068 seconds.
Reverse indexing ...
Done reverse indexing in 10.1 seconds.


In [None]:
import train_eval.utils as u
with torch.no_grad():
    for i,data in enumerate(trainer.dl):
        # torch.cuda.empty_cache()
        # Load data
        data = u.send_to_device(u.convert_double_to_float(data))
        data_test=data['inputs']
        gt_test=data['ground_truth']

        break

In [8]:
data_test.keys()

dict_keys(['instance_token', 'sample_token', 'map_representation', 'target_agent_representation', 'origin'])

In [4]:
import torch.optim
from typing import Dict, Union
import torch
import numpy as np
import os
from datasets.nuScenes.prediction import PredictHelper_occ
from nuscenes.prediction.input_representation.static_layers import *
from nuscenes.prediction.input_representation.combinators import Rasterizer
from pyquaternion import Quaternion
from nuscenes.eval.common.utils import quaternion_yaw
import logging
from numpy import linalg as LA
import matplotlib.pyplot as plt
import matplotlib.image as im
from PIL import Image
import copy
from nuscenes.utils.geometry_utils import view_points, box_in_image, BoxVisibility, transform_matrix
from executables.track_completion import *
def get_cam(sample_record,nusc,anntoken):
    cams = [key for key in sample_record['data'].keys() if 'CAM' in key]
    for cam in cams:
        _, boxes, _ = nusc.get_sample_data(sample_record['data'][cam], box_vis_level=BoxVisibility.ANY,
                                                selected_anntokens=[anntoken])
        if len(boxes) > 0:
            break  # We found an image that matches. Let's abort.
    assert len(boxes) > 0, 'Error: Could not find image where annotation is visible. ' \
                            'Try using e.g. BoxVisibility.ANY.'
    assert len(boxes) < 2, 'Error: Found multiple annotations. Something is wrong!'

    cam = sample_record['data'][cam]
    return cam
def render_occ_anns(frame,nusc: NuScenes,global_coord,global_rotation,save_dir,key,frame_id,save):
    anntoken=frame['ann_token']
    ann_record = nusc.get('sample_annotation', anntoken)
    sample_record = nusc.get('sample', ann_record['sample_token'])
    cam = get_cam(sample_record,nusc,anntoken)
    z=nusc.get('sample_annotation', anntoken)['translation'][-1]
    data_path, boxes, camera_intrinsic = nusc.get_sample_data(cam, selected_anntokens=[anntoken])
    sd_record = nusc.get('sample_data', cam)
    cs_record = nusc.get('calibrated_sensor', sd_record['calibrated_sensor_token'])
    # sensor_record = nusc.get('sensor', cs_record['sensor_token'])
    pose_record = nusc.get('ego_pose', sd_record['ego_pose_token'])

    im = Image.open(data_path)
    fig, axes = plt.subplots(1, 1, figsize=(9, 9))
    axes.imshow(im)
    axes.set_title(nusc.get('sample_data', cam)['channel'])
    axes.axis('off')
    axes.set_aspect('equal')
    def get_color(name):
        return nusc.colormap[name]
    assert len(boxes)==1
    pred_box=copy.deepcopy(boxes[0])
    pred_box.name="human.pedestrian.adult"
    pred_box.center=global_coord+[z]
    pred_box.orientation=Quaternion(global_rotation)

    pred_box.translate(-np.array(pose_record['translation']))
    pred_box.rotate(Quaternion(pose_record['rotation']).inverse)

    #  Move box to sensor coord system.
    pred_box.translate(-np.array(cs_record['translation']))
    pred_box.rotate(Quaternion(cs_record['rotation']).inverse)
    boxes.append(pred_box)
    # boxes.append()
    for box in boxes:
        c = np.array(get_color(box.name)) / 255.0
        box.render(axes, view=camera_intrinsic, normalize=True, colors=(c, c, c))
    if save:
        out_dir=os.path.join(save_dir,key)
        if not os.path.exists(out_dir):
            os.makedirs(out_dir)
        out_path=os.path.join(out_dir,str(frame_id))
        plt.savefig(out_path)
        plt.close(fig)
    return

@torch.no_grad()
def local_pose_to_image(local_poses,pose_mask,resolution,img_size,arror_length=None):
    '''local_poses: [T,4] 
    mask: [T]
    '''
    if arror_length is None:
        arror_length=8
    y_m=np.asarray(local_poses[:,1][pose_mask].cpu())
    x_m=np.asarray(local_poses[:,0][pose_mask].cpu())
    img_origin=np.round(np.asarray(img_size)/2).astype(np.int)
    x=img_origin[1]+x_m*resolution
    y=img_origin[0]-y_m*resolution
    yaw=np.asarray((local_poses[:,2][pose_mask]).cpu())
    dy=-np.sin(yaw+np.pi/2)*arror_length
    dx=np.cos(yaw+np.pi/2)*arror_length
    return x,y,dx,dy

@torch.no_grad()
def visualize(inputs: Dict,ground_truth: Dict,predictions: Dict,helper: PredictHelper_occ, selector,token_dicts,save_folder='./tmp', mode='refine',save=False):

    upper_limit=100
    batch_size=len(predictions['traj'])
    layer_names = ['drivable_area', 'ped_crossing']
    maps= load_all_maps(helper)
    colors = [(255, 255, 255), (119, 136, 153)]
    for sample_id in range(batch_size):
        if sample_id>upper_limit:
            return
        # try:
        if not selector[sample_id]:
            continue
        instance_token=inputs['instance_token'][sample_id]
        sample_token=inputs['sample_token'][sample_id]
        future=inputs['target_agent_representation']['future']['traj'][sample_id]
        mask_fut=inputs['target_agent_representation']['future']['mask'][sample_id]
        hist=inputs['target_agent_representation']['history']['traj'][sample_id]
        nearest_idx=np.where(mask_fut[:, 0].cpu() == 0)[0][-1]
        prediction_horizon=future[nearest_idx,-1]
        sample_annotation = helper.get_sample_annotation(instance_token, sample_token)
            
        map_name = helper.get_map_name_from_sample_token(sample_token)
        x, y = sample_annotation['translation'][:2]
        yaw = quaternion_yaw(Quaternion(sample_annotation['rotation']))
        yaw_corrected = correct_yaw(yaw)
        global_pose=(x,y,yaw_corrected)
        if 'origin' in inputs:
            origin=tuple([inputs['origin'][sample_id,0].item(),inputs['origin'][sample_id,1].item(),inputs['origin'][sample_id,2].item()])
        else:
            coords_fut,global_yaw_fut,time_fut = helper.get_future_for_agent(instance_token, sample_token, seconds=2+prediction_horizon, in_agent_frame=False,add_yaw_and_time=True)

            sep_idx= np.searchsorted(time_fut, (prediction_horizon-0.001).item())
            origin_fut=coords_fut[sep_idx][0],coords_fut[sep_idx][1],correct_yaw(quaternion_yaw(Quaternion(global_yaw_fut[sep_idx])))
            origin=tuple((np.asarray(global_pose)+np.asarray(origin_fut))/2)
        dist=LA.norm(future[0,:2].cpu(),ord=2)
        image_side_length = 2 * max(25,dist+10)
        image_side_length_pixels = 400
        resolution=image_side_length_pixels/image_side_length
        patchbox = get_patchbox(origin[0], origin[1], image_side_length)

        angle_in_degrees = angle_of_rotation(origin[2]) * 180 / np.pi

        canvas_size = (image_side_length_pixels, image_side_length_pixels)
        masks = maps[map_name].get_map_mask(patchbox, angle_in_degrees, layer_names, canvas_size=canvas_size)
        
        images = []
        for mask, color in zip(masks, colors):
            images.append(change_color_of_binary_mask(np.repeat(mask[::-1, :, np.newaxis], 3, 2), color))
        if mode=='refine':
            traj = predictions['refined_traj'][sample_id].squeeze(0)
            yaw = predictions['refined_yaw'][sample_id]
        elif mode=='raw':
            traj = predictions['traj'][sample_id].squeeze(0)
            yaw = predictions['yaw'][sample_id]
        lanes=inputs['map_representation']['lane_node_feats'][sample_id].flatten(0,1).clone()
        lanes_mask=inputs['map_representation']['lane_node_feats'][sample_id].flatten(0,1)[:,0].bool()
        pred = torch.cat((traj,yaw),-1)
        pose_pred_mask=~(predictions['mask'][sample_id]).bool()
        gt = ground_truth['traj'][sample_id]
        image = Rasterizer().combine(images)
        pose_future_mask=~inputs['target_agent_representation']['future']['mask'][sample_id][:,0].bool()
        pose_hist_mask=~inputs['target_agent_representation']['history']['mask'][sample_id][:,0].bool()
        xs, ys, dxs, dys=local_pose_to_image(future,pose_future_mask,resolution,canvas_size)
        xsh, ysh, dxsh, dysh=local_pose_to_image(hist,pose_hist_mask,resolution,canvas_size)
        xsp, ysp, dxsp, dysp=local_pose_to_image(pred,pose_pred_mask,resolution,canvas_size,5)
        xsg, ysg, dxsg, dysg=local_pose_to_image(gt,pose_pred_mask,resolution,canvas_size,5)
        xsl, ysl, dxsl, dysl=local_pose_to_image(lanes,lanes_mask,resolution,canvas_size,2.5)
        # plt.imshow(image)
        fig = plt.figure(figsize = (8,8))
        ax = fig.add_subplot(1,1,1) 
        
        for x, y, dx, dy in zip(xs, ys, dxs, dys):
            ax.arrow(x, y, dx, dy, width=0.8, color=(1,0,0,1))
        for x, y, dx, dy in zip(xsh, ysh, dxsh, dysh):
            ax.arrow(x, y, dx, dy, width=0.8, color=(0,1,0,1))
        for x, y, dx, dy in zip(xsp, ysp, dxsp, dysp):
            ax.arrow(x, y, dx, dy, width=1.0, color=(0.0,0,1,1))
        for x, y, dx, dy in zip(xsg, ysg, dxsg, dysg):
            ax.arrow(x, y, dx, dy, width=1.0, color=(1,0,1,0.3))
        for x, y, dx, dy in zip(xsl, ysl, dxsl, dysl):
            ax.arrow(x, y, dx, dy, width=0.5, color=(1,0.5,0,0.3))
        ax.imshow(image)
        ax.grid(False)
        fig.canvas.draw()
        image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
        image_from_plot = image_from_plot.reshape(fig.canvas.get_width_height()[::-1] + (3,))
        # plt.close(fig)
        if save:
            plt.close(fig)
            out_dir=os.path.join(save_folder,instance_token+'_'+sample_token)
            if not os.path.exists(out_dir):
                os.makedirs(out_dir)
            im.imsave(os.path.join(out_dir,'bev'), image_from_plot)
            # except:
            #     continue
        # fig, axes = plt.subplots(1, 2, figsize=(18, 9))

        key=instance_token+"_"+sample_token

        missing_frames=token_dicts[key]
        for idx,frame in enumerate(missing_frames):
            yaw=pred[idx,-1].item()

            global_coord=local_to_global(origin, tuple(pred[idx,:2].cpu().numpy()))
            global_rotation=get_global_rotation(origin,yaw)

            render_occ_anns(frame,helper.data,list(global_coord),global_rotation,save_folder,key,idx,save)
        # break
    return
@torch.no_grad()
def visualize_preparations(vis_dict, inputs: Dict,ground_truth: Dict,predictions: Dict,helper: PredictHelper_occ, selector,token_dicts,save_folder='./tmp', mode='refine',save=False):

    batch_size=len(predictions['traj'])
    for sample_id in range(batch_size):
        if not selector[sample_id]:
            continue
        instance_token=inputs['instance_token'][sample_id]
        sample_token=inputs['sample_token'][sample_id]
        key=instance_token+"_"+sample_token
        
        future=inputs['target_agent_representation']['future']['traj'][sample_id]
        mask_fut=inputs['target_agent_representation']['future']['mask'][sample_id]
        hist=inputs['target_agent_representation']['history']['traj'][sample_id]
        vis_dict[key]={
            "future":future,
            "mask_fut":mask_fut,
            "history":hist,
            "origin":inputs['origin'][sample_id],
            "traj" :predictions['refined_traj'][sample_id],
            "yaw" :predictions['refined_yaw'][sample_id],
            "pose_pred_mask":predictions['mask'][sample_id],
            "gt" : ground_truth['traj'][sample_id],
            "lanes":inputs['map_representation']['lane_node_feats'][sample_id],
            "lanes_mask":inputs['map_representation']['lane_node_feats'][sample_id],
            "pose_future_mask":inputs['target_agent_representation']['future']['mask'],
            "pose_hist_mask":inputs['target_agent_representation']['history']['mask'][sample_id]
        }

    return vis_dict

In [5]:
tokens_list=trainer.dl.dataset.data_list
token_dicts={}
for token_dict in tokens_list:
    key=token_dict['start']['ins_token']+"_"+token_dict['start']['sample_token']
    token_dicts[key]=token_dict['missing_frames']

In [6]:
import train_eval.utils as u
helper= PredictHelper_occ(trainer.dl.dataset.helper.data)
vis_dict={}
with torch.no_grad():
    for i,data in enumerate(trainer.dl):
        print(i,len(trainer.dl))
        # torch.cuda.empty_cache()
        # Load data
        data = u.send_to_device(u.convert_double_to_float(data))
        data_test=data['inputs']
        gt_test=data['ground_truth']
        
        selectors=(torch.norm(gt_test['traj'][:,:,-1],1,dim=1).cpu()>1.5)*(torch.sum(torch.norm(gt_test['traj'][:,:,:2],2,dim=-1),dim=-1).cpu()>10.0)
        predcitions=trainer.model(data_test)
        # visualize(data_test,gt_test,predcitions,helper, selectors, token_dicts,save=True)
        vis_dict=visualize_preparations(vis_dict, data_test, gt_test, predcitions, helper, selectors, token_dicts)
        # break

0 46
1 46
2 46
3 46
4 46
5 46
6 46
7 46
8 46
9 46
10 46
11 46
12 46
13 46
14 46
15 46
16 46
17 46
18 46
19 46
20 46
21 46
22 46
23 46
24 46
25 46
26 46
27 46
28 46
29 46
30 46
31 46
32 46
33 46
34 46
35 46
36 46
37 46
38 46
39 46
40 46
41 46
42 46
43 46
44 46
45 46


In [11]:
vis_sample_token="64daa0864dc04a1c94cd02328c449dec_d7400c505fbb47e29638c057780a696b"
vis_dict=vis_dict[vis_sample_token]

In [None]:
layer_names = ['drivable_area', 'ped_crossing']
maps= load_all_maps(helper)
colors = [(255, 255, 255), (119, 136, 153)]
instance_token,sample_token=vis_sample_token.split["_"]

future=vis_dict['future']
mask_fut=vis_dict['mask_fut']
hist=vis_dict['history']
nearest_idx=np.where(mask_fut[:, 0].cpu() == 0)[0][-1]
prediction_horizon=future[nearest_idx,-1]
sample_annotation = helper.get_sample_annotation(instance_token, sample_token)
    
map_name = helper.get_map_name_from_sample_token(sample_token)
x, y = sample_annotation['translation'][:2]
yaw = quaternion_yaw(Quaternion(sample_annotation['rotation']))
yaw_corrected = correct_yaw(yaw)
global_pose=(x,y,yaw_corrected)
origin=tuple([vis_dict['origin'][0].item(),vis_dict['origin'][1].item(),vis_dict['origin'][2].item()])
dist=LA.norm(future[0,:2].cpu(),ord=2)
image_side_length = 2 * max(25,dist+10)
image_side_length_pixels = 400
resolution=image_side_length_pixels/image_side_length
patchbox = get_patchbox(origin[0], origin[1], image_side_length)

angle_in_degrees = angle_of_rotation(origin[2]) * 180 / np.pi

canvas_size = (image_side_length_pixels, image_side_length_pixels)
masks = maps[map_name].get_map_mask(patchbox, angle_in_degrees, layer_names, canvas_size=canvas_size)

images = []

In [6]:
import json

file_path = os.path.join('vis_data','no_point'+'.json')
with open(file_path, 'r') as json_file:
    data_list=json.load(json_file)

In [8]:
data_dict={}
for token_dict in data_list:
    key=token_dict['start']['ins_token']+"_"+token_dict['start']['sample_token']
    data_dict[key]=token_dict

In [13]:
history=data_test['target_agent_representation']['history']
future=data_test['target_agent_representation']['future']
concat_motion=data_test['target_agent_representation']['concat_motion']
time_query=data_test['target_agent_representation']['time_query']
refine_input=data_test['target_agent_representation']['refine_input']

In [None]:
for filename in os.listdir('tmp'):
    if filename.endswith('.png'):
        key=filename[:-4]
        token_dict=data_dict[key]

In [None]:
predcitions=trainer.model(data_test)
predcitions

In [46]:
torch.sum(torch.norm(gt_test['traj'][:,:,:2],2,dim=-1),dim=-1)

tensor([ 4.2488,  6.2277, 26.1447,  3.5181,  6.0860, 11.3073,  5.1698,  4.6631,
         9.5787,  2.2749,  8.1890,  2.4670,  3.8158, 12.2496,  4.5915],
       device='cuda:0')

In [3]:
%matplotlib inline
from nuscenes.nuscenes import NuScenes

nusc = NuScenes(version='v1.0-mini', dataroot='/home/stanliu/data/mnt/nuScenes/nuscenes', verbose=True)



Loading NuScenes tables for version v1.0-mini...
Loading nuScenes-lidarseg...
32 category,
8 attribute,
4 visibility,
911 instance,
12 sensor,
120 calibrated_sensor,
31206 ego_pose,
8 log,
10 scene,
404 sample,
31206 sample_data,
18538 sample_annotation,
4 map,
404 lidarseg,
Done loading in 3.915 seconds.
Reverse indexing ...
Done reverse indexing in 0.2 seconds.


In [4]:
my_scene = nusc.scene[0]
first_sample_token = my_scene['first_sample_token']
my_sample = nusc.get('sample', first_sample_token)


sensor = 'CAM_FRONT'
cam_front_data = nusc.get('sample_data', my_sample['data'][sensor])
cam_front_data



{'token': 'e3d495d4ac534d54b321f50006683844',
 'sample_token': 'ca9a282c9e77460f8360f564131a8af5',
 'ego_pose_token': 'e3d495d4ac534d54b321f50006683844',
 'calibrated_sensor_token': '1d31c729b073425e8e0202c5c6e66ee1',
 'timestamp': 1532402927612460,
 'fileformat': 'jpg',
 'is_key_frame': True,
 'height': 900,
 'width': 1600,
 'filename': 'samples/CAM_FRONT/n015-2018-07-24-11-22-45+0800__CAM_FRONT__1532402927612460.jpg',
 'prev': '',
 'next': '68e8e98cf7b0487baa139df808641db7',
 'sensor_modality': 'camera',
 'channel': 'CAM_FRONT'}

In [None]:
from nuscenes.utils.data_classes import LidarPointCloud, RadarPointCloud, Box


In [None]:

my_annotation_token = my_sample['anns'][18]
my_annotation_metadata =  nusc.get('sample_annotation', my_annotation_token)


nusc.render_annotation(my_annotation_token)

