In [3]:
import numpy as np
import pickle
import argparse
import math
import torch
import torch.nn as nn
from tqdm import tqdm

def add_sdc(data):
    sdc_indices = np.argwhere(data['state/is_sdc'] == True)
    sdc_indices = np.squeeze(sdc_indices, 0)
    # print(sdc_indices)
    data['sdc/current/x'] = np.expand_dims(data['state/current/x'][sdc_indices[0], sdc_indices[1]], 0)
    data['sdc/current/y'] = np.expand_dims(data['state/current/y'][sdc_indices[0], sdc_indices[1]], 0)
    data['sdc/current/z'] = np.expand_dims(data['state/current/z'][sdc_indices[0], sdc_indices[1]], 0)
    data['sdc/current/bbox_yaw'] = np.expand_dims(data['state/current/bbox_yaw'][sdc_indices[0], sdc_indices[1]], 0)
    return data
    
def _stack_field(data,times,field):
    if field == 'type':
        # [batch_size, num_agents]
        fields = data['state/type']
        # The `type` field's shape is different from other fields.  Broadcast it
        # to have the same shape as another field.
        x = _stack_field(data, times, field='x')
        # [batch_size, num_agents, num_steps, 1]
        fields = np.broadcast_to(fields[:, :, np.newaxis, np.newaxis], x.shape)
    else:
        # [batch_size, num_agents, num_steps]
        fields = np.concatenate([data[f'state/{t}/{field}'] for t in times], axis=-1)
        # [batch_size, num_agents, num_steps, 1]
        fields = fields[:, :, :, np.newaxis]
    return fields
  
def get_time(times):
    num_steps = 0
    if 'past' in times:
      num_steps += 10
    if 'current' in times:
      num_steps += 1
    if 'future' in times:
      num_steps += 80
    return num_steps

def rotate_points_around_origin(x, y, angle):
    translated_x = np.cos(angle) * x - np.sin(angle) * y
    translated_y = np.sin(angle) * x + np.cos(angle) * y
    
    return translated_x, translated_y

def prepare_for_sample(data, times):
    x = _stack_field(data, times, 'x')
    y = _stack_field(data, times, 'y')
    z = _stack_field(data, times, 'z')
    bbox_yaw = _stack_field(data, times, 'bbox_yaw')
    length = _stack_field(data, times, 'length')
    width = _stack_field(data, times, 'width')
    agent_type = _stack_field(data, times, 'type')
    valid = _stack_field(data, times, 'valid')
    
    sdc_x = data['sdc/current/x'][:, np.newaxis, np.newaxis, :]
    sdc_y = data['sdc/current/y'][:, np.newaxis, np.newaxis, :]
    sdc_z = data['sdc/current/z'][:, np.newaxis, np.newaxis, :]
    x = x - sdc_x
    y = y - sdc_y
    z = z - sdc_z
    
    angle = np.pi / 2 - data['sdc/current/bbox_yaw'][:, np.newaxis, np.newaxis, :]
    x, y = rotate_points_around_origin(x, y, angle)
    x = np.cos(angle) * x - np.sin(angle) * y
    y = np.sin(angle) * x + np.cos(angle) * y
    bbox_yaw = bbox_yaw + angle
    
    return (x, y, z, bbox_yaw, width, length, agent_type, valid)
  
def sample_agent_points(prepared_data, points_per_side_length = 48, points_per_side_width = 16):
    x, y, z, bbox_yaw, width, length, agent_type, valid = prepared_data
    if points_per_side_length == 1:
      step_x = 0.0
    else:
      step_x = 1.0 / (points_per_side_length - 1)
    if points_per_side_width == 1:
      step_y = 0.0
    else:
      step_y = 1.0 / (points_per_side_width - 1)
    unit_x = []
    unit_y = []
    for xi in range(points_per_side_length):
      for yi in range(points_per_side_width):
        unit_x.append(xi * step_x - 0.5)
        unit_y.append(yi * step_y - 0.5)

    # Center unit_x and unit_y if there was only 1 point on those dimensions.
    if points_per_side_length == 1:
      unit_x = np.array(unit_x) + 0.5
    if points_per_side_width == 1:
      unit_y = np.array(unit_y) + 0.5

    unit_x = np.array(unit_x, dtype = np.float32)
    unit_y = np.array(unit_y, dtype = np.float32)

    num_points = points_per_side_length * points_per_side_width

    # Transform the unit square points to agent dimensions and coordinate frames.
    sin_yaw = np.sin(bbox_yaw)
    cos_yaw = np.cos(bbox_yaw)

    # [..., num_points]
    tx = cos_yaw * length * unit_x - sin_yaw * width * unit_y + x
    ty = sin_yaw * length * unit_x + cos_yaw * width * unit_y + y
    tz = np.broadcast_to(z, tx.shape)

    agent_type = np.broadcast_to(agent_type, tx.shape)
    valid = np.broadcast_to(valid, tx.shape)

    return tx, ty, tz, agent_type, valid
  
def transform_to_image_coordinate(points_x, points_y, pixels_per_meter = 3.2, sdc_x_in_grid = 128, sdc_y_in_grid = 196):
    points_x = np.round(points_x * pixels_per_meter) + sdc_x_in_grid
    points_y = np.round(-points_y * pixels_per_meter) + sdc_y_in_grid
    
    point_is_in_fov = np.logical_and(
      np.logical_and(np.greater_equal(points_x, 0), np.greater_equal(points_y, 0)),
      np.logical_and(np.less(points_x, grid_width_cells),np.less(points_y, grid_height_cells)))
    
    return points_x, points_y, point_is_in_fov
  
def get_points_from_bbox(data, times = ['past']):
    prepared_data  = prepare_for_sample(data, times)  #translate and rotate according to sdc
    x, y, z, agent_type, agent_valid = sample_agent_points(prepared_data)
    points_x_img, points_y_img, points_is_in_fov = transform_to_image_coordinate(x, y)
    point_is_in_fov_and_valid = np.logical_and(points_is_in_fov, agent_valid.astype(bool))

    x_list = []
    y_list = []
    z_list = []
    time_stamp_list = []
    for object_type in all_agent_type:
        agent_type_matches = np.equal(agent_type, object_type)
        should_keep_point = np.logical_and(point_is_in_fov_and_valid, agent_type_matches)
        # should_keep_point = agent_type_matches
        points_indices = np.argwhere(should_keep_point).astype(np.int32)
        
        # print(points_indices.shape)
        x_real_coor = x[points_indices[:, 0], points_indices[:, 1], points_indices[:, 2], points_indices[:, 3]]
        y_real_coor = y[points_indices[:, 0], points_indices[:, 1], points_indices[:, 2], points_indices[:, 3]]
        z_real_coor = z[points_indices[:, 0], points_indices[:, 1], points_indices[:, 2], points_indices[:, 3]]
        x_real_coor = np.expand_dims(x_real_coor, axis = -1)
        y_real_coor = np.expand_dims(y_real_coor, axis = -1)
        z_real_coor = np.expand_dims(z_real_coor, axis = -1)
        time_stamp = np.expand_dims(points_indices[:, 2], -1)

        x_list.append(x_real_coor)
        y_list.append(y_real_coor)
        z_list.append(z_real_coor)
        time_stamp_list.append(time_stamp)
    
    return x_list, y_list, z_list, time_stamp_list
  
def get_features(points_x_with_type, points_y_with_type, points_z_with_type, time_stamp_with_type, batch_idx):
    points_features_list = []
    for idx, type in enumerate(range(1, 4)):
        type_embedding = nn.Embedding(type_embedding_num, embedded_dim)
        time_embedding = nn.Embedding(time_embedding_num_past, embedded_dim)
        type = torch.LongTensor([type])
        type_concat = type_embedding(type).repeat(points_x_with_type[idx].shape[0], 1)
        # print(type)
        # print(type_concat.shape)
        x = torch.tensor(points_x_with_type[idx])
        y = torch.tensor(points_y_with_type[idx])
        z = torch.tensor(points_z_with_type[idx])
        time = torch.LongTensor(time_stamp_with_type[idx]+1)
        time_concat = time_embedding(time).squeeze(1)
        # print(time_concat.shape)
        batch_concat = torch.LongTensor([batch_idx]).repeat(points_x_with_type[idx].shape[0], 1)
        # print(batch_concat.shape)
        points_features_this_type = torch.cat((x, y, z, type_concat, time_concat, batch_concat), -1)
        points_features_list.append(points_features_this_type)
        # print(points_features_this_type.shape)
        
    points_features = torch.cat(points_features_list, 0)

    return points_features

class VoxelGenerator(object):
    def __init__(self,
                 grid_height_cells,
                 grid_width_cells,
                 max_num_points,
                 max_voxels=20000):
        # grid_size = torch.tensor([batch_size, grid_height_cells, grid_width_cells, 1], dtype = torch.int32)
        self._max_num_points = max_num_points
        self._max_voxels = max_voxels
        # self._grid_size = grid_size
        self.grid_height_cells = grid_height_cells
        self.grid_width_cells = grid_width_cells
        

    def generate(self, points):
        """Generate voxels given points."""
        return points_to_voxel(points,
                                self.grid_height_cells, 
                                self.grid_width_cells,
                                self._max_num_points,
                                self._max_voxels)

def points_to_voxel(points,
                    grid_height_cells,
                    grid_width_cells,
                    max_points=35,
                    max_voxels=20000,
                    pixels_per_meter=3.2,
                    sdc_x_in_grid=128,
                    sdc_y_in_grid=192):
    """
    Returns:
        voxels: [M, max_points, ndim] float tensor. only contain points.
        coordinates: [M, 4] int32 tensor.
        num_points_per_voxel: [M] int32 tensor.
    """
    batch_size = points[:, -1].max().int().item() + 1
    voxelmap_shape = tuple(torch.tensor([batch_size, grid_height_cells, grid_width_cells, 1], dtype = torch.int32))
    # don't create large array in jit(nopython=True) code.
    num_points_per_voxel = torch.zeros((max_voxels, ), dtype=torch.int32)
    coor_to_voxelidx = -torch.ones(voxelmap_shape, dtype=torch.int32)
    voxels = torch.zeros((max_voxels, max_points, points.shape[-1]), dtype=points.dtype)
    coors = torch.zeros((max_voxels, 4), dtype=torch.int32)
    
    voxel_num = _points_to_voxel_kernel(points,
                                        voxelmap_shape,
                                        num_points_per_voxel,
                                        coor_to_voxelidx, voxels, coors,
                                        max_points, max_voxels,
                                        pixels_per_meter,
                                        sdc_x_in_grid,
                                        sdc_y_in_grid)
    # print(coors)
    coors = coors[:voxel_num]
    voxels = voxels[:voxel_num]
    num_points_per_voxel = num_points_per_voxel[:voxel_num]

    return voxels, coors, num_points_per_voxel

def _points_to_voxel_kernel(points,
                            voxelmap_shape,
                            num_points_per_voxel,
                            coor_to_voxelidx,
                            voxels,
                            coors,
                            max_points=35,
                            max_voxels=20000,
                            pixels_per_meter=3.2,
                            sdc_x_in_grid=128,
                            sdc_y_in_grid=192):
    """
    Returns:
        voxels: Shape [M, max_points, ndim], only contain points.
        coordinates: Shape [M, 4].  [batch_idx, x, y, z]
        num_points_per_voxel: Shape [M].
    """
    N = points.shape[0]

    coor = torch.zeros((4, ), dtype=torch.int32)
    voxel_num = 0
    points_in_img_x = torch.round(points[:, 0] * pixels_per_meter) + sdc_x_in_grid
    points_in_img_y = torch.round(-points[:, 1] * pixels_per_meter) + sdc_y_in_grid
    batch_size = voxelmap_shape[0]
    # print(batch_size)
    
    for batch_id in range(batch_size):
        indices = points[:, -1] == batch_id
        img_coor_x = points_in_img_x[indices]
        img_coor_y = points_in_img_y[indices]
        for i in tqdm(range(len(img_coor_x))):
            coor[0] = batch_id
            coor[1], coor[2] = img_coor_x[i], img_coor_y[i]
            coor[3] = 0
            voxelidx = coor_to_voxelidx[coor[0], coor[1], coor[2], coor[3]]
            if voxelidx == -1:
                voxelidx = voxel_num
                if voxel_num >= max_voxels:
                    continue
                voxel_num += 1
                coor_to_voxelidx[coor[0], coor[1], coor[2], coor[3]] = voxelidx
                coors[voxelidx] = coor
            num = num_points_per_voxel[voxelidx]
            if num < max_points:
                voxels[voxelidx, num] = points[i]
                num_points_per_voxel[voxelidx] += 1
    return voxel_num

In [4]:
points_per_side_length = 48
points_per_side_width = 16
grid_height_cells = 256
grid_width_cells= 256
pixels_per_meter = 3.2
sdc_x_in_grid = 128
sdc_y_in_grid = 196
all_agent_type = [1, 2, 3]
type_embedding_num = 4
time_embedding_num_past = 11
embedded_dim = 16

with open('train_preprocessed_data/0.pkl', 'rb') as fp:
    data_1 = pickle.load(fp)
    
with open('train_preprocessed_data/1.pkl', 'rb') as fp:
    data_2 = pickle.load(fp)
data_1 = add_sdc(data_1)
data_2 = add_sdc(data_2)

points_x_with_type_1, points_y_with_type_1, points_z_with_type_1, time_stamp_with_type_1 = get_points_from_bbox(data_1, ['past'])
points_x_with_type_2, points_y_with_type_2, points_z_with_type_2, time_stamp_with_type_2 = get_points_from_bbox(data_2, ['past'])


points_features_1 = get_features(points_x_with_type_1, points_y_with_type_1, points_z_with_type_1, time_stamp_with_type_1, batch_idx = 0)
points_features_2 = get_features(points_x_with_type_2, points_y_with_type_2, points_z_with_type_2, time_stamp_with_type_2, batch_idx = 1)

points_features = torch.cat((points_features_1, points_features_2), 0)
voxel_generator = VoxelGenerator(grid_height_cells=256, grid_width_cells=256, max_num_points=32)
voxels, coors, num_points_per_voxel = voxel_generator.generate(points_features)
print(voxels.shape)
print(coors.shape)
print(num_points_per_voxel.shape)


100%|██████████| 140750/140750 [00:09<00:00, 15265.75it/s]
100%|██████████| 633389/633389 [00:39<00:00, 16057.93it/s]


torch.Size([11233, 32, 36])
torch.Size([11233, 4])
torch.Size([11233])


In [25]:
import numpy as np
from cumm import tensorview as tv
from spconv.utils import Point2VoxelCPU3d

gen = Point2VoxelCPU3d(
        vsize_xyz=[1/3.2, 1/3.2, 200],
        coors_range_xyz=[-40, -20, -100, 40, 60, 100],
        num_point_features=
        5,  # here num_point_features must equal to pc.shape[1]
        max_num_voxels=200000,
        max_num_points_per_voxel=32)

pc = np.random.uniform(-10, 10, size=[100000, 3])
other_pc_feature = np.random.uniform(-1, 1, size=[100000, 2])
pc_with_feature = np.concatenate([pc, other_pc_feature], axis=1)
pc_tv = tv.from_numpy(pc_with_feature)
voxels_tv, indices_tv, num_p_in_vx_tv = gen.point_to_voxel(pc_tv)

print(voxels_tv.shape)
print(indices_tv.numpy_view()[:, 1].max())
print(num_p_in_vx_tv.shape)

[4096, 32, 5]
95
[4096]


In [5]:
import torch.nn.functional as F

class PFNLayer(nn.Module):
    def __init__(self,
                 in_channels,
                 out_channels,
                 use_norm=True,
                 last_layer=False):
        super().__init__()
 
        self.last_vfe = last_layer
        self.use_norm = use_norm
        if not self.last_vfe:
            out_channels = out_channels // 2
 
        if self.use_norm:
            # 根据论文中，这是是简化版pointnet网络层的初始化
            # 论文中使用的是 1x1 的卷积层完成这里的升维操作（理论上使用卷积的计算速度会更快）
            # 输入的通道数是刚刚经过数据增强过后的点云特征，每个点云有10个特征，
            # 输出的通道数是64
            self.linear = nn.Linear(in_channels, out_channels, bias=False)
            # 一维BN层
            self.norm = nn.BatchNorm1d(out_channels, eps=1e-3, momentum=0.01)
        else:
            self.linear = nn.Linear(in_channels, out_channels, bias=True)
 
        self.part = 50000
 
    def forward(self, inputs):
        if inputs.shape[0] > self.part:
            # nn.Linear performs randomly when batch size is too large
            num_parts = inputs.shape[0] // self.part
            part_linear_out = [self.linear(inputs[num_part * self.part:(num_part + 1) * self.part])
                               for num_part in range(num_parts + 1)]
            x = torch.cat(part_linear_out, dim=0)
        else:
            # x的维度由（M, 32, 10）升维成了（M, 32, 64）
            x = self.linear(inputs)
        torch.backends.cudnn.enabled = False
        # BatchNorm1d层:(M, 64, 32) --> (M, 32, 64)
        # （pillars,num_point,channel）->(pillars,channel,num_points)
        # 这里之所以变换维度，是因为BatchNorm1d在通道维度上进行,对于图像来说默认模式为[N,C,H*W],通道在第二个维度上
        x = self.norm(x.permute(0, 2, 1)).permute(0, 2, 1) if self.use_norm else x
        torch.backends.cudnn.enabled = True
        x = F.relu(x)
        # 完成pointnet的最大池化操作，找出每个pillar中最能代表该pillar的点
        # x_max shape ：（M, 1, 64）　
        # print(x.shape)   #[M, ]
        #####################################################################
        x_max = torch.max(x, dim=1, keepdim=True)[0]
 
        if self.last_vfe:
            # 返回经过简化版pointnet处理pillar的结果
            return x_max
        else:
            x_repeat = x_max.repeat(1, inputs.shape[1], 1)
            # print('x', x.shape)
            # print('x_repeat', x_repeat.shape)
            x_concatenated = torch.cat([x, x_repeat], dim=2)
            # print('x_concat', x_concatenated.shape)
            return x_concatenated
        #####################################################################
        # return x
            

USE_NORM = True
NUM_FILTERS = [64]
voxel_size = [1/3.2, 1/3.2]
point_cloud_range = [-40, -20, 40, 60]  #[x_min, y_min, x_max, y_max]
 
class PillarVFE(nn.Module):
    """
    model_cfg:NAME: PillarVFE
                    WITH_DISTANCE: False
                    USE_ABSLOTE_XYZ: True
                    USE_NORM: True
                    NUM_FILTERS: [64]
    num_point_features:4
    voxel_size:[0.16 0.16 4]
    POINT_CLOUD_RANGE: [0, -39.68, -3, 69.12, 39.68, 1]
    """
 
    def __init__(self, num_point_features, voxel_size, point_cloud_range, with_distance = True):
        super().__init__()
 
        self.use_norm = USE_NORM
        num_point_features += 4
        self.with_distance = with_distance
        if self.with_distance:
            num_point_features += 1
 
        self.num_filters = NUM_FILTERS
        assert len(self.num_filters) > 0
        num_filters = [num_point_features] + list(self.num_filters)
 
        pfn_layers = []
        for i in range(len(num_filters) - 1):
            in_filters = num_filters[i]
            out_filters = num_filters[i + 1]
            pfn_layers.append(
                PFNLayer(in_filters, out_filters, self.use_norm, last_layer=(i >= len(num_filters) - 2))
            )
        # 加入线性层，将10维特征变为64维特征
        self.pfn_layers = nn.ModuleList(pfn_layers)
 
        self.voxel_x = voxel_size[0]
        self.voxel_y = voxel_size[1]
        self.x_offset = self.voxel_x / 2 + point_cloud_range[0]
        self.y_offset = self.voxel_y / 2 + point_cloud_range[1]
 
    def get_output_feature_dim(self):
        return self.num_filters[-1]
 
    def get_paddings_indicator(self, actual_num, max_num, axis=0):
        """
        Args:
            actual_num:每个voxel实际点的数量(M,)
            max_num:voxel最大点的数量(32,)
        Returns:
            paddings_indicator:表明一个pillar中哪些是真实数据,哪些是填充的0数据
        """
        # 扩展一个维度，使变为（M，1）
        actual_num = torch.unsqueeze(actual_num, axis + 1)
        # [1, 1]
        max_num_shape = [1] * len(actual_num.shape)
        # [1, -1]
        max_num_shape[axis + 1] = -1
        # (1,32)
        max_num = torch.arange(max_num, dtype=torch.int, device=actual_num.device).view(max_num_shape)
        # (M, 32)
        paddings_indicator = actual_num.int() > max_num
        return paddings_indicator
 
    def forward(self, voxel_features, coords, num_points_per_voxel):
        # 求每个pillar中所有点云的和 (M, 32, 3)->(M, 1, 3) 设置keepdim=True的，则保留原来的维度信息
        # 然后在使用求和信息除以每个点云中有多少个点来求每个pillar中所有点云的平均值 points_mean shape：(M, 1, 3)
        points_mean = voxel_features[:, :, :2].sum(dim=1, keepdim=True) / num_points_per_voxel.type_as(voxel_features).view(-1, 1, 1)
        # 每个点云数据减去该点对应pillar的平均值得到差值 xc,yc,zc
        f_cluster = voxel_features[:, :, :2] - points_mean
 
        # 创建每个点云到该pillar的坐标中心点偏移量空数据 xp,yp
        f_center = torch.zeros_like(voxel_features[:, :, :2])
        #  coords是每个网格点的坐标，即[432, 496, 1]，需要乘以每个pillar的长宽得到点云数据中实际的长宽（单位米）
        #  同时为了获得每个pillar的中心点坐标，还需要加上每个pillar长宽的一半得到中心点坐标
        #  每个点的x、y、z减去对应pillar的坐标中心点，得到每个点到该点中心点的偏移量
        f_center[:, :, 0] = voxel_features[:, :, 0] - (
                coords[:, 0].to(voxel_features.dtype).unsqueeze(1) * self.voxel_x + self.x_offset)
        f_center[:, :, 1] = voxel_features[:, :, 1] - (
                coords[:, 1].to(voxel_features.dtype).unsqueeze(1) * self.voxel_y + self.y_offset) 
 
        # if self.use_absolute_xyz:
        features = [voxel_features[..., :-1], f_cluster, f_center]   #last dim is batch_idx
        # else:
        #     features = [voxel_features[..., 3:], f_cluster, f_center]
        # 如果使用距离信息
        if self.with_distance:
            # torch.norm的第一个2指的是求2范数，第二个2是在第三维度求范数
            points_dist = torch.norm(voxel_features[:, :, :3], 2, 2, keepdim=True)
            features.append(points_dist)
        # 将特征在最后一维度拼接 得到维度为（M，32,10）的张量
        features = torch.cat(features, dim=-1)
        # 每个pillar中点云的最大数量
        voxel_count = features.shape[1]
        """
        由于在生成每个pillar中,不满足最大32个点的pillar会存在由0填充的数据,
        而刚才上面的计算中，会导致这些
        由0填充的数据在计算出现xc,yc,zc和xp,yp,zp出现数值,
        所以需要将这个被填充的数据的这些数值清0,
        因此使用get_paddings_indicator计算features中哪些是需要被保留真实数据和需要被置0的填充数据
        """
        # 得到mask维度是（M， 32）
        # mask中指名了每个pillar中哪些是需要被保留的数据
        mask = self.get_paddings_indicator(num_points_per_voxel, voxel_count, axis=0)
        # （M， 32）->(M, 32, 1)
        mask = torch.unsqueeze(mask, -1).type_as(voxel_features)
        # 将feature中被填充数据的所有特征置0
        features *= mask
 
        for pfn in self.pfn_layers:
            features = pfn(features)
        # (M, 64), 每个pillar抽象出一个64维特征
        features = features.squeeze()
        pillar_features = features
        return pillar_features

In [6]:
pillar_VFE = PillarVFE(voxels.shape[-1]-1, voxel_size, point_cloud_range, True)
pillar_features = pillar_VFE.forward(voxels, coors, num_points_per_voxel)
print(pillar_features.shape)

torch.Size([12039, 64])


In [7]:
NUM_BEV_FEATURES = 64
class PointPillarScatter(nn.Module):
    def __init__(self, grid_size):
        super().__init__()

        self.num_bev_features = NUM_BEV_FEATURES  # 64
        self.nx, self.ny, self.nz = grid_size  # [432,496,1]
        assert self.nz == 1
 
    def forward(self, pillar_features, coors):
        # 拿到经过前面pointnet处理过后的pillar数据和每个pillar所在点云中的坐标位置
        # pillar_features 维度 （M， 64）
        # coords 维度 （M， 4）
 
        # 将转换成为伪图像的数据存在到该列表中
        batch_spatial_features = []
        batch_size = coors[:, 0].max().int().item() + 1
        print(batch_size)
        # for batch_idx in range(batch_size):
            
        for batch_idx in range(batch_size):
            # 创建一个空间坐标所有用来接受pillar中的数据
            # self.num_bev_features是64
            # self.nz * self.nx * self.ny是生成的空间坐标索引 [496, 432, 1]的乘积
            # spatial_feature 维度 (64,214272)
            spatial_feature = torch.zeros((self.nx, self.ny, self.num_bev_features),dtype=pillar_features.dtype,device=pillar_features.device)  # (64,214272)-->1x432x496=214272
 
            # 从coords[:, 0]取出该batch_idx的数据mask
            batch_mask = coors[:, 0] == batch_idx
            # 根据mask提取坐标
            this_coors = coors[batch_mask, :].type(torch.long)
            # this_coords中存储的坐标是z,y和x的形式,且只有一层，因此计算索引的方式如下
            # 平铺后需要计算前面有多少个pillar 一直到当前pillar的索引

            pillars = pillar_features[batch_mask, :]
            spatial_feature[this_coors[:, 1], this_coors[:, 2], :] = pillars
            # 将空间特征加入list,每个元素为(64, 214272)
            batch_spatial_features.append(spatial_feature)

        batch_spatial_features = torch.stack(batch_spatial_features, 0)
 
        return batch_spatial_features

In [8]:
pillar_scatter = PointPillarScatter([grid_height_cells, grid_width_cells, 1])
batch_spatial_features = pillar_scatter.forward(pillar_features, coors)
print(batch_spatial_features.shape)

2
torch.Size([2, 256, 256, 64])


In [29]:
def rotate_points_around_origin(x, y, angle):
    translated_x = np.cos(angle) * x - np.sin(angle) * y
    translated_y = np.sin(angle) * x + np.cos(angle) * y
    
    return translated_x, translated_y
    

def transform_roadgraph(data):
    sdc_xyz = np.stack((data['sdc/current/x'], data['sdc/current/y'], data['sdc/current/z']), 2)
    # sdc_xyz = sdc_xyz[:, :, np.newaxis, :]
    
    #translate
    rg_points = data['roadgraph_samples/xyz'] - sdc_xyz
    rg_x, rg_y = rg_points[..., 0], rg_points[..., 1]
    
    #rotate
    angle = np.pi / 2 - data['sdc/current/bbox_yaw']
    rg_x, rg_y = rotate_points_around_origin(rg_x, rg_y, angle)
    
    return rg_x, rg_y, rg_points[..., 2]

def process_roadgraph(data):
    rg_x, rg_y, rg_z = transform_roadgraph(data_1)
    img_rg_x, img_rg_y, points_is_in_fov_rg = transform_to_image_coordinate(rg_x, rg_y)
    
    return rg_x, rg_y, points_is_in_fov_rg

In [33]:
rg_x_1, rg_y_1, points_is_in_fov_rg_1 = process_roadgraph(data_1)
print(rg_x_1.shape)
print(points_is_in_fov_rg_1.shape)

points_indices = np.argwhere(points_is_in_fov_rg_1).astype(np.int32)
rg_x_1 = rg_x_1[points_indices[:, 0], points_indices[:, 1]]
rg_y_1 = rg_y_1[points_indices[:, 0], points_indices[:, 1]]

rg_x_1 = torch.from_numpy(rg_x_1[..., np.newaxis])
rg_y_1 = torch.from_numpy(rg_y_1[..., np.newaxis])
batch_idx = 0
batch_concat = torch.LongTensor([batch_idx+1]).repeat(rg_x_1.shape[0], 1)

points_features_rg = torch.cat((rg_x_1, rg_y_1, batch_concat), -1)
print(points_features_rg.shape)

(1, 19562)
(1, 19562)
torch.Size([4885, 3])


In [37]:
voxel_generator_rg = VoxelGenerator(grid_height_cells=256, grid_width_cells=256, max_num_points=32)
voxels_rg, coors_rg, num_points_per_voxel_rg = voxel_generator_rg.generate(points_features_rg)
print(voxels_rg.shape)
print(coors_rg.shape)
print(num_points_per_voxel_rg.shape)

0it [00:00, ?it/s]
100%|██████████| 4885/4885 [00:00<00:00, 12259.98it/s]


torch.Size([4662, 32, 3])
torch.Size([4662, 4])
torch.Size([4662])


In [40]:
pillar_VFE_rg = PillarVFE(2, voxel_size, point_cloud_range, False)
pillar_features_rg = pillar_VFE_rg.forward(voxels_rg, coors_rg, num_points_per_voxel_rg)
print(pillar_features_rg.shape)


torch.Size([4662, 64])
