In [None]:
import os

import math
import numpy as np

import random

import tensorboard

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

from torch.utils.data import Dataset
from torch.utils.data import DataLoader

from torch.utils.tensorboard import SummaryWriter

from scipy.spatial import ConvexHull

from shapely.geometry import Polygon
import scipy.io as io

%load_ext autoreload
%autoreload 2

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
DATASET_PATH = "/mnt/raid/xyiheng/training/"
SAMPLE_SIZE = 18750
BATCH_SIZE = 8
NUM_CLASSES = 4
NEAR_THRESHOLD = 1
FAR_THRESHOLD = 2

In [None]:
MEAN_SIZE_ARR = torch.FloatTensor([[1.5260834319114884,1.6285898684851439,3.883954491684643],
                                   [1.7607064854022731,0.6601894361488745,0.8422843770893693],
                                   [1.73720344191764,0.5967732022126614,1.7635464044253226]]).to(device)
RATIO = 1

In [None]:
def inverse_rigid_trans(Tr):
    """ Inverse a rigid body transform matrix (3x4 as [R|t])
        [R'|-R't; 0|1]
    """
    inv_Tr = np.zeros_like(Tr)  # 3x4
    inv_Tr[0:3, 0:3] = np.transpose(Tr[0:3, 0:3])
    inv_Tr[0:3, 3] = np.dot(-np.transpose(Tr[0:3, 0:3]), Tr[0:3, 3])
    return inv_Tr

In [None]:
## Define Dataset
class KittiDataset(Dataset):
    def __init__(self, train, dataset_path, sample_size, augment):
        super(KittiDataset, self).__init__()
        self.train = train
        self.dataset_path = dataset_path
        self.sample_size = sample_size
        self.augment = augment
        if self.train:
            f = open(self.dataset_path + "train.txt", "r")
        else:
            f = open(self.dataset_path + "val.txt", "r")
        self.index = f.read().splitlines()
    
    def __len__(self):
        return len(self.index)
    
    def theta_to_class(self, theta):
        return int((theta + np.pi)/(np.pi/6))
    
    def theta_array_to_class(self, theta_array):
        theta_class = np.array((theta_array + np.pi)/(np.pi/6),dtype=np.int)
        theta_class[theta_class>=12] = 0
        theta_class[theta_class<0] = 0
        return theta_class
    
    def __getitem__(self, idx):
        idx_str = self.index[idx]
        scan = np.fromfile(self.dataset_path + "velodyne/" + idx_str + ".bin", 
                            dtype=np.float32).reshape((-1,4))[:,0:3]
        x = scan[:,0] + 0
        y = scan[:,1] + 0
        z = scan[:,2] + 0
        mask = np.logical_and(np.logical_and(x>=0,x<=70.4),np.logical_and(y>=-40,y<=40),np.logical_and(z>=-3,z<=1))
        x = x[mask]
        y = y[mask]
        z = z[mask]
        scan = np.zeros((x.shape[0],3))
        scan[:,0] = x
        scan[:,1] = y
        scan[:,2] = z
        f = open(self.dataset_path + "calib/" + idx_str + ".txt", "r")
        lines = f.readlines()
        R0 = np.array(lines[4].split(" ")[1:], dtype=np.float).reshape((3,3))
        Tr_veo_cam = np.array(lines[5].split(" ")[1:], dtype=np.float).reshape((3,4))
        Tr_cam_veo = inverse_rigid_trans(Tr_veo_cam)
        f = open(self.dataset_path + "label_2/" + idx_str + ".txt","r")
        lines = f.readlines()
        labels = np.zeros((17,1 + 4 + 2 + 3 + 6 + 3 + 1)) # 1class, 3center, 1+1angle, 3+1size, 6xyz_min/max, 
        # 3mean_size, 1difficulty
        for i in range(17): # padding some invalid labels
            if i < len(lines):
                data = lines[i].split(" ")
                if data[0] == "Car" or data[0] == "Pedestrian" or data[0] == "Cyclist":
                    labels[i,4] = float(data[14])
                    labels[i,5] = self.theta_to_class(labels[i,4])
                    labels[i,7] = float(data[8])
                    labels[i,8] = float(data[9])
                    labels[i,9] = float(data[10])
                    
                    h = float(data[8])
                    w = float(data[9])
                    l = float(data[10])
                    x_corners = [l / 2, l / 2, -l / 2, -l / 2, l / 2, l / 2, -l / 2, -l / 2]
                    y_corners = [0, 0, 0, 0, -h, -h, -h, -h]
                    z_corners = [w / 2, -w / 2, -w / 2, w / 2, w / 2, -w / 2, -w / 2, w / 2]
                    R = np.zeros((3,3))
                    a = np.cos(float(data[14]))
                    b = np.sin(float(data[14]))
                    R[0, 0] = a
                    R[0, 2] = b
                    R[1, 1] = 1.0
                    R[2, 0] = -b
                    R[2, 2] = a
                    corners_3d = np.dot(R, np.vstack([x_corners, y_corners, z_corners]))
                    corners_3d[0, :] = corners_3d[0, :] + float(data[11])
                    corners_3d[1, :] = corners_3d[1, :] + float(data[12])
                    corners_3d[2, :] = corners_3d[2, :] + float(data[13])
                    corners_3d = np.transpose(np.dot(np.linalg.inv(R0), corners_3d)) # [8,3]
                    n = corners_3d.shape[0]
                    corners_3d_hom = np.hstack((corners_3d, np.ones((n, 1))))
                    corners_3d = np.dot(corners_3d_hom, np.transpose(Tr_cam_veo))
                    
                    xmin = np.min(corners_3d[:,0])
                    ymin = np.min(corners_3d[:,1])
                    zmin = np.min(corners_3d[:,2])
                    xmax = np.max(corners_3d[:,0])
                    ymax = np.max(corners_3d[:,1])
                    zmax = np.max(corners_3d[:,2])
                    labels[i,10] = xmin
                    labels[i,11] = xmax
                    labels[i,12] = ymin
                    labels[i,13] = ymax
                    labels[i,14] = zmin
                    labels[i,15] = zmax
                    labels[i,1] = (xmin + xmax) / 2
                    labels[i,2] = (ymin + ymax) / 2
                    labels[i,3] = (zmin + zmax) / 2
                                        
                    labels[i,19] = int(data[2])
                    if data[0] == "Car":
                        labels[i, 0] = 1
                    
                        labels[i,6] = 0
                    
                        labels[i,16] = 1.5260834319114884
                        labels[i,17] = 1.6285898684851439
                        labels[i,18] = 3.883954491684643
                    elif data[0] == "Pedestrian":
                        labels[i, 0] = 2
                    
                        labels[i,6] = 1
                    
                        labels[i,16] = 1.7607064854022731
                        labels[i,17] = 0.6601894361488745
                        labels[i,18] = 0.8422843770893693
                    else:
                        labels[i, 0] = 3
                    
                        labels[i,6] = 2
                    
                        labels[i,16] = 1.73720344191764
                        labels[i,17] = 0.5967732022126614
                        labels[i,18] = 1.7635464044253226
                else:
                    for j in range(labels.shape[1]):
                        labels[i,j] = -1000
                    labels[i,0] = 0
                    labels[i,5] = 0
                    labels[i,6] = 0
            else: # # 1class, 3center, 1_r+1_c angle, 1+3size, 6min maxcorners, 3mean_size, 1difficulty
                for j in range(labels.shape[1]):
                    labels[i,j] = -1000
                labels[i,0] = 0
                labels[i,5] = 0
                labels[i,6] = 0

        if self.augment:
            # 1class, 3center, 1r+1cangle, 1+3size, 6min maxcorners, 3mean_size, 1difficulty
            if np.random.random() > 0.5:
                # Flipping along the YZ plane
                scan[:,0] = -1 * scan[:,0]
                labels[:,1] = -1 * labels[:,1]
                x_min_origin = labels[:,10] + 0
                x_max_origin = labels[:,11] + 0
                labels[:,10] = -1 * x_max_origin
                labels[:,11] = -1 * x_min_origin
                theta_z = -labels[:,4] - np.pi/2 # -3/2pi ~ 1/2pi
                theta_z[theta_z>=0] = np.pi - theta_z[theta_z>=0]
                theta_z[theta_z<0] = -np.pi - theta_z[theta_z<0]
                labels[:,4] = -theta_z - np.pi/2
                labels[:,5] = self.theta_array_to_class(labels[:,4])


            if np.random.random() > 0.5:
                # Flipping along the XZ plane
                scan[:,1] = -1 * scan[:,1]
                labels[:,2] = -1 * labels[:,2]
                y_min_origin = labels[:,12] + 0
                y_max_origin = labels[:,13] + 0
                labels[:,12] = -1 * y_max_origin
                labels[:,13] = -1 * y_min_origin
                theta_z = -labels[:,4] - np.pi/2 # -3/2pi ~ 1/2pi
                theta_z = -theta_z
                labels[:,4] = -theta_z - np.pi/2
                labels[:,5] = self.theta_array_to_class(labels[:,4])

            # Rotation along up-axis/Z-axis
            theta = (np.random.random()*np.pi/18) - np.pi/36 # -5 ~ +5 degree
            theta_ = labels[:,4] - theta
            mask = np.zeros((theta_.shape[0],1))
            mask[theta_>np.pi] = 1
            mask[theta_<-np.pi] = 1
            if np.sum(mask) > 0:
                theta = 0
            labels[:,4] -= theta
            labels[:,5] = self.theta_array_to_class(labels[:,4])
            matrix = np.zeros((3,3))
            a = np.cos(theta)
            b = np.sin(theta)
            matrix[0, 0] = a
            matrix[0, 1] = -b
            matrix[1, 0] = b
            matrix[1, 1] = a
            matrix[2, 2] = 1.0
            scan[:,0:3] = np.dot(scan[:,0:3], np.transpose(matrix))
            labels[:,1:4] = np.dot(labels[:,1:4], np.transpose(matrix))
            xyz_min = np.ones((labels.shape[0],3)) # 17,3 = padding
            xyz_max = np.ones((labels.shape[0],3))
            xyz_min[:,0] = labels[:,10]
            xyz_max[:,0] = labels[:,11]
            xyz_min[:,1] = labels[:,12]
            xyz_max[:,1] = labels[:,13]
            xyz_min[:,2] = labels[:,14]
            xyz_max[:,2] = labels[:,15]
            xyz_min[:,0:3] = np.dot(xyz_min[:,0:3], np.transpose(matrix))
            xyz_max[:,0:3] = np.dot(xyz_max[:,0:3], np.transpose(matrix))
            labels[:,10] = xyz_min[:,0] 
            labels[:,11] = xyz_max[:,0]
            labels[:,12] = xyz_min[:,1]
            labels[:,13] = xyz_max[:,1] 
            labels[:,14] = xyz_min[:,2]
            labels[:,15] = xyz_max[:,2]
            
            # Rescale randomly by 0.9 - 1.1
            proportion = np.random.uniform(0.9, 1.1, 1)
            RATIO = proportion
            scan[:,0:3] = scan[:,0:3] * proportion
            labels[:,1:4] = labels[:,1:4] * proportion
            labels[:,7:19] = labels[:,7:19] * proportion
        else:
            RATIO = 1
        
        sample_indexes = np.array(random.sample(range(0, scan.shape[0]), self.sample_size), dtype=np.long)
        return torch.FloatTensor(scan), torch.FloatTensor(labels)

In [None]:
training_set = KittiDataset(True, DATASET_PATH, SAMPLE_SIZE, True)
val_set = KittiDataset(False, DATASET_PATH, SAMPLE_SIZE, False)
training_dataloader = DataLoader(dataset = training_set, batch_size = BATCH_SIZE, 
                                 shuffle = True, drop_last = True, num_workers=4)
val_dataloader = DataLoader(dataset = val_set, batch_size = BATCH_SIZE, 
                                 shuffle = False, drop_last = True, num_workers=4)

In [None]:
print(len(training_dataloader))
print(len(val_dataloader))

In [None]:
def index_points(points, idx):
    """
    Input:
        points: input points data, [B, N, C]
        idx: sample index data, [B, S]
    Return:
        new_points:, indexed points data, [B, S, C]
    """
    device = points.device
    B = points.shape[0]
    view_shape = list(idx.shape)
    view_shape[1:] = [1] * (len(view_shape) - 1)
    repeat_shape = list(idx.shape)
    repeat_shape[0] = 1
    batch_indices = torch.arange(B, dtype=torch.long).to(device).view(view_shape).repeat(repeat_shape)
    new_points = points[batch_indices, idx, :]
    return new_points

In [None]:
def farthest_point_sample(xyz, num_centroids):
    """
    Input:
        xyz: pointcloud data, [B, N, 3]
        num_centroids: number of samples(centroids)
    Return:
        centroids: sampled pointcloud index, [B, npoint]
    """
    device = xyz.device
    B, N, C = xyz.shape
    centroids = torch.zeros(B, num_centroids, dtype=torch.long).to(device)
    distance = torch.ones(B, N).to(device) * 1e10
    farthest = torch.randint(0, N, (B,), dtype=torch.long).to(device)
    batch_indices = torch.arange(B, dtype=torch.long).to(device)
    for i in range(num_centroids):
        centroids[:, i] = farthest # index
        centroid = xyz[batch_indices, farthest, :].view(B, 1, 3)
        dist = torch.sum(torch.sqrt((xyz - centroid) ** 2), -1)
        mask = dist < distance
        distance[mask] = dist[mask]
        farthest = torch.max(distance, -1)[1]
    return centroids

In [None]:
def square_distance(src, dst):
    """
    Calculate Euclid distance between each two points.
    src^T * dst = xn * xm + yn * ym + zn * zm；
    sum(src^2, dim=-1) = xn*xn + yn*yn + zn*zn;
    sum(dst^2, dim=-1) = xm*xm + ym*ym + zm*zm;
    dist = (xn - xm)^2 + (yn - ym)^2 + (zn - zm)^2
         = sum(src**2,dim=-1)+sum(dst**2,dim=-1)-2*src^T*dst
    Input:
        src: source points, [B, N, C]
        dst: target points, [B, M, C]
    Output:
        dist: per-point square distance, [B, N, M]
    """
    B, N, _ = src.shape
    _, M, _ = dst.shape
    dist = -2 * torch.matmul(src, dst.permute(0, 2, 1))
    dist += torch.sum(src ** 2, -1).view(B, N, 1)
    dist += torch.sum(dst ** 2, -1).view(B, 1, M)
    return dist

In [None]:
def query_ball_point(radius, nsample, xyz, new_xyz):
    """
    Input:
        radius: local region radius
        nsample: max sample number in local region
        xyz: all points, [B, N, 3]
        new_xyz: query points, [B, S, 3]
    Return:
        group_idx: grouped points index, [B, S, nsample]
    """
    device = xyz.device
    B, N, C = xyz.shape
    _, S, _ = new_xyz.shape
    group_idx = torch.arange(N, dtype=torch.long).to(device).view(1, 1, N).repeat([B, S, 1])
    sqrdists = square_distance(new_xyz, xyz)
    group_idx[sqrdists > radius ** 2] = N
    group_idx = group_idx.sort(dim=-1)[0][:, :, :nsample]
    group_first = group_idx[:, :, 0].view(B, S, 1).repeat([1, 1, nsample])
    mask = group_idx == N
    group_idx[mask] = group_first[mask]
    return group_idx

In [None]:
def sample_and_group(xyz, feature, num_centroids, num_neighbors, radius):
    """
    Input:
        xyz: input points position data, [B, N, 3]
        feature: input feature data, [B, N, D]
        num_centroids:
        num_neighbors:
        radius:
    Return:
        centroids: sampled points position data, [B, num_centroids, num_neighbors, 3]
        new_points: sampled position+feature data, [B, num_centroids, num_neighbors, 3 + D]
    """
    device = xyz.device
    B, N, C = xyz.shape
    S = num_centroids
    # get centroids
    centroids_index = farthest_point_sample(xyz, num_centroids) # [B, num_centroids]
    centroids = index_points(xyz, centroids_index)
    neighbors_index = query_ball_point(radius, num_neighbors, xyz, centroids)
    neighbors = index_points(xyz, neighbors_index) # [B, npoint, nsample, C]
    neighbors_norm = neighbors - centroids.view(B, S, 1, C)
    
    if feature is not None:
        feature_neighbors = index_points(feature, neighbors_index)
        new_points = torch.cat([neighbors_norm, feature_neighbors], dim = -1)
    else:
        new_points = neighbors_norm
    
    return centroids, new_points

In [None]:
class PointNetSetAbstraction(nn.Module):
    def __init__(self, num_centroids, radius, num_neighbors, in_channel, mlp):
        super(PointNetSetAbstraction, self).__init__()
        self.num_centroids = num_centroids
        self.radius = radius
        self.num_neighbors = num_neighbors
        
        self.conv_list = nn.ModuleList()
        self.bn_list = nn.ModuleList()
        
        last_channel = in_channel
        for out_channel in mlp:
            self.conv_list.append(nn.Conv2d(last_channel, out_channel, 1, 1))
            self.bn_list.append(nn.BatchNorm2d(out_channel))
            last_channel = out_channel
        
    def forward(self, xyz, feature):
        # xyz: [B, C, N] feature: [B, D, N]
        xyz = xyz.permute(0, 2, 1)
        if feature is not None:
            feature = feature.permute(0, 2, 1)
        
        centroids, new_points = sample_and_group(xyz, feature, self.num_centroids, self.num_neighbors, self.radius)
        # new_points [B, num_centroids, num_neighbors, 3 + D]
        new_points = new_points.permute(0, 3, 2, 1) # [B, 3 + D, num_neighbors, num_centroids]
        for i, conv in enumerate(self.conv_list):
            bn = self.bn_list[i]
            new_points = F.relu(bn(conv(new_points)), inplace=True)
        new_points = torch.max(new_points, 2)[0] # dim = 2 -> reduce the third dimension = num_neighbors
        new_xyz = centroids.permute(0, 2, 1) # from [B, N, C] to [B, C, N]
        return new_xyz, new_points

In [None]:
class PointNetFeaturePropagation(nn.Module):
    def __init__(self, in_channel, mlp):
        super(PointNetFeaturePropagation, self).__init__()
        self.conv_list = nn.ModuleList()
        self.bn_list = nn.ModuleList()
        last_channel = in_channel
        for out_channel in mlp:
            self.conv_list.append(nn.Conv1d(last_channel, out_channel, 1))
            self.bn_list.append(nn.BatchNorm1d(out_channel))
            last_channel = out_channel
    
    def forward(self, xyz1, xyz2, feature1, feature2):
        """
        Input:
            xyz1: input points position data, [B, C, N]
            xyz2: sampled input points position data, [B, C, S]
            feature1: input feature data, [B, D, N]
            feature2: input feature data, [B, D, S]
        Return:
            new_feature: upsampled feature data, [B, D', N]
        """
        xyz1 = xyz1.permute(0, 2, 1) # [B, N, C]
        xyz2 = xyz2.permute(0, 2, 1) # [B, S, C]
        
        B = xyz1.shape[0]
        N = xyz1.shape[1]
        S = xyz2.shape[1]
        D = feature2.shape[1]
        
        dists = square_distance(xyz1, xyz2)
        dists, idx = dists.sort(dim = -1) # [B, N, S]
        dists, idx = dists[:, :, :3], idx[:, :, :3]  # [B, N, 3]
        
        dist_rev = 1.0 / (dists + 1e-8) # [B, N, 3]
        norm = torch.sum(dist_rev, dim = 2, keepdim = True) # [B, N, 3]
        weights = dist_rev / norm # [B, N, 3]
        
        feature2 = feature2.permute(0, 2, 1) # [B, S, D]
        interpolated_feature = torch.sum(index_points(feature2, idx) * weights.view(B, N, 3, 1), dim=2)
        
        if feature1 is None:
            new_feature = interpolated_feature
        else:
            feature1 = feature1.permute(0, 2, 1) # [B, N, D]
            new_feature = torch.cat([feature1, interpolated_feature], dim = -1) # [B, N, D + ?]
        
        new_feature = new_feature.permute(0, 2, 1) #[B, D + ?, N]
        
        for i, conv in enumerate(self.conv_list):
            bn = self.bn_list[i]
            new_feature = F.relu(bn(conv(new_feature)), inplace=True)
        return new_feature

In [None]:
class PointNet2(nn.Module):
    def __init__(self):
        super(PointNet2, self).__init__()
        self.sa1 = PointNetSetAbstraction(1024, 2, 64, 3, [32, 32, 64])
        self.sa2 = PointNetSetAbstraction(512, 4, 64, 64 + 3, [64, 64, 128])
        self.sa3 = PointNetSetAbstraction(256, 6, 64, 128 + 3, [128, 128, 256])
        self.sa4 = PointNetSetAbstraction(128, 8, 64, 256 + 3, [256, 256, 512])
        self.fp4 = PointNetFeaturePropagation(256 + 512, [256, 256])
        self.fp3 = PointNetFeaturePropagation(128 + 256, [256, 256])
        self.fp2 = PointNetFeaturePropagation(64 + 256, [256, 256])
        #self.fp1 = PointNetFeaturePropagation(256, [256, 256])
        
    def forward(self, xyz):
        # xyz: [B, 3, N]
        xyz_0 = xyz[:,0:3,:]
        feature_0 = None
        B, N = xyz.shape[0], xyz.shape[2]
        
        xyz_1, feature_1 = self.sa1(xyz_0, feature_0)
        xyz_2, feature_2 = self.sa2(xyz_1, feature_1)
        xyz_3, feature_3 = self.sa3(xyz_2, feature_2)
        xyz_4, feature_4 = self.sa4(xyz_3, feature_3)
        
        feature_3 = self.fp4(xyz_3, xyz_4, feature_3, feature_4)
        feature_2 = self.fp3(xyz_2, xyz_3, feature_2, feature_3)
        feature_1 = self.fp2(xyz_1, xyz_2, feature_1, feature_2)
        #feature_0 = self.fp1(xyz_0, xyz_1, feature_0, feature_1)
        
        return xyz_1, feature_1

In [None]:
class Vote(nn.Module):
    def __init__(self):
        super(Vote, self).__init__()
        self.conv1 = nn.Conv1d(256, 256, 1)
        self.bn1 = nn.BatchNorm1d(256)
        #self.conv1_1 = nn.Conv1d(256, 256, 1)
        self.conv2 = nn.Conv1d(256, 256, 1)
        #self.conv2_1 = nn.Conv1d(256, 256, 1)
        #self.conv2_2 = nn.Conv1d(256, 256, 1)
        self.bn2 = nn.BatchNorm1d(256)
        self.conv3 = nn.Conv1d(256, 259, 1)
    
    def forward(self, feature):
        vote = F.relu(self.bn1(self.conv1(feature)), inplace=True)
        vote = F.relu(self.bn2(self.conv2(vote)), inplace=True)
        vote = self.conv3(vote)
        return vote[:,0:3,:], vote[:,3:,:] # xyz offset and feature residual

In [None]:
class ProposalModule(nn.Module):
    def __init__(self, num_proposals, radius, num_neighbors, vote_dimension, mlp, num_classes):
        super(ProposalModule, self).__init__()
        self.num_classes = num_classes
        self.vote_aggregation = PointNetSetAbstraction(num_proposals, 
                                                       radius, num_neighbors, 
                                                       vote_dimension, mlp)
        self.conv1 = nn.Conv1d(128, 128, 1)
        self.bn1 = nn.BatchNorm1d(128)
        self.conv2 = nn.Conv1d(128, 128, 1)
        self.bn2 = nn.BatchNorm1d(128)
        self.conv3 = nn.Conv1d(128, 2 + 4 + 3 + 2 * 12 + 4 * 3, 1) # objectness, class, center, angle, size
        
    def forward(self, xyz, feature):
        xyz, new_points = self.vote_aggregation(xyz, feature) # new_points 3+D, feature 3+D' [B, num_proposals, 3 + D]
        proposal = F.relu(self.bn1(self.conv1(new_points)), inplace=True)
        proposal = F.relu(self.bn2(self.conv2(proposal)), inplace=True)
        proposal = self.conv3(proposal)
        return xyz, proposal

In [None]:
class VoteNet(nn.Module):
    def __init__(self, num_proposals, radius, num_neighbors, vote_dimension, mlp, num_classes):
        super(VoteNet, self).__init__()
        self.backbone = PointNet2()
        self.vote = Vote()
        # num_centroids, radius, num_neighbors, in_channel, mlp
        self.proposal = ProposalModule(num_proposals, radius, num_neighbors, vote_dimension, mlp, num_classes)
    
    def forward(self, xyz):
        seed, feature = self.backbone(xyz)
        xyz_offset, feature_residual = self.vote(feature)
        vote = seed + xyz_offset
        feature = feature + feature_residual
        vote, proposals = self.proposal(vote, feature)
        return seed, xyz_offset, vote, proposals

In [None]:
def huber_loss(error, delta=1.0):
    """
    Args:
        error: Torch tensor (d1,d2,...,dk)
    Returns:
        loss: Torch tensor (d1,d2,...,dk)
    x = error = pred - gt or dist(pred,gt)
    0.5 * |x|^2                 if |x|<=d
    0.5 * d^2 + d * (|x|-d)     if |x|>d
    Ref: https://github.com/charlesq34/frustum-pointnets/blob/master/models/model_util.py
    """
    abs_error = torch.abs(error)
    #quadratic = torch.min(abs_error, torch.FloatTensor([delta]))
    quadratic = torch.clamp(abs_error, max=delta)
    linear = (abs_error - quadratic)
    loss = 0.5 * quadratic**2 + delta * linear
    return loss

In [None]:
def nn_distance(pc1, pc2, l1smooth=False, delta=1.0, l1=False):
    """
    Input:
        pc1: (B,N,C) torch tensor
        pc2: (B,M,C) torch tensor
        l1smooth: bool, whether to use l1smooth loss
        delta: scalar, the delta used in l1smooth loss
    Output:
        dist1: (B,N) torch float32 tensor
        idx1: (B,N) torch int64 tensor
        dist2: (B,M) torch float32 tensor
        idx2: (B,M) torch int64 tensor
    """
    N = pc1.shape[1]
    M = pc2.shape[1]
    pc1_expand_tile = pc1.unsqueeze(2).repeat(1,1,M,1)
    pc2_expand_tile = pc2.unsqueeze(1).repeat(1,N,1,1)
    pc_diff = pc1_expand_tile - pc2_expand_tile
    
    if l1smooth:
        pc_dist = torch.sum(huber_loss(pc_diff, delta), dim=-1) # (B,N,M)
    elif l1:
        pc_dist = torch.sum(torch.abs(pc_diff), dim=-1) # (B,N,M)
    else:
        pc_dist = torch.sum(pc_diff**2, dim=-1) # (B,N,M)
    dist1, idx1 = torch.min(pc_dist, dim=2) # (B,N)
    dist2, idx2 = torch.min(pc_dist, dim=1) # (B,M)
    return dist1, idx1, dist2, idx2

In [None]:
from datetime import datetime
TIMESTAMP = "{0:%Y-%m-%dT%H-%M-%S/}".format(datetime.now())

OBJECT_LIST = []
MASK_LIST = []
VOTE_LIST = []

In [None]:
class Loss(nn.Module):
    def __init__(self, ignore_index=0, objectness_weight=None, semantic_weight=None, weight=None):
        super(Loss, self).__init__()
        self.ignore_index = ignore_index
        self.objectness_weight = objectness_weight
        self.semantic_weight = semantic_weight
        self.weight = weight
    
    def forward(self, seed, xyz_offset, vote, proposals, label):
        # proposals -> # 2objectness, 4class, 3center, 12*2angle, 3*4size
        # label -> # 1class, 3center, 1r+1cangle, 1+3size, 6min maxcorners, 3mean_size, 1difficulty
        B = seed.shape[0]
        K = proposals.shape[2]
        semantic_label = label[:,:,0].long()
        center = label[:,:,1:4]
        angle_reg = label[:,:,4]
        angle_cls = label[:,:,5].long()
        angle_residual = angle_reg - (-np.pi + np.pi / 12 + (angle_cls.float()+1) * (np.pi / 6))
        size_cls = label[:,:,6].long()
        size_reg = label[:,:,7:10] # height, width, length
        mean_size = label[:,:,16:19]
        size_residual = size_reg - mean_size
        min_max = label[:,:,10:16] # xyz_min/max
        
        difficulty = label[:,:,19]
        
        predict_objectness = proposals[:,0:2,:]
        predict_semantic_label = proposals[:,2:6,:]
        predict_center = proposals[:,6:9,:]
        predict_angle_cls = proposals[:,21:33,:]
        predict_angle_residual = proposals[:,9:21,:]
        predict_size_cls = proposals[:,33:36,:]
        predict_size_residual = proposals[:,36:45]
        
        # vote loss
        # for seed if it is in one of the boxes -> calculate vote loss
        xmin = min_max[:,:,0]
        xmax = min_max[:,:,1]
        ymin = min_max[:,:,2]
        ymax = min_max[:,:,3]
        zmin = min_max[:,:,4]
        zmax = min_max[:,:,5]
        
        counter = 0
        vote_criterion = nn.L1Loss()
        vote_loss = None
        isVote = False
        for i in range(seed.shape[0]):
            for j in range(seed.shape[2]):
                for k in range(label.shape[1]):
                    if semantic_label[i,k]>0 and seed[i,0,j]<=xmax[i,k] and seed[i,0,j]>=xmin[i,k] and seed[i,1,j]<=ymax[i,k] and seed[i,1,j]>=ymin[i,k] and seed[i,2,j]<=zmax[i,k] and seed[i,2,j]>=zmin[i,k]:
                        counter += 1
                        isVote = True
                        true_offset = center.permute(0,2,1)[i,:,k] - seed[i,:,j]
                        if counter == 1:
                            vote_loss = self.weight[difficulty[i,k].long(), semantic_label[i,k]] * vote_criterion(xyz_offset[i,:,j], true_offset)
                        else:
                            vote_loss += self.weight[difficulty[i,k].long(), semantic_label[i,k]] * vote_criterion(xyz_offset[i,:,j], true_offset)
        if isVote:
            vote_loss /= counter
        VOTE_LIST.append(counter)
        
        # objectness loss
        objectness_criterion = nn.CrossEntropyLoss(weight=self.objectness_weight, reduction="none")
        objectness_label = torch.zeros((B, K), dtype=torch.long).to(device) #[B, K]
        object_mask = torch.zeros((B, K)).to(device)
        dis1, ind1, _, _ = nn_distance(vote.permute(0,2,1), center) # ind1 -> [B, K], K = num_proposals
        objectness_label[torch.sqrt(dis1+1e-6)<NEAR_THRESHOLD] = 1
        OBJECT_LIST.append(torch.sum(objectness_label))
        object_mask[torch.sqrt(dis1+1e-6)<NEAR_THRESHOLD] = 1
        object_mask[torch.sqrt(dis1+1e-6)>FAR_THRESHOLD] = 1
        MASK_LIST.append(torch.sum(object_mask))
        objectness_loss = objectness_criterion(predict_objectness, objectness_label)
        objectness_loss = torch.sum(objectness_loss*object_mask)/(torch.sum(object_mask)+1e-6)
        object_assignment = ind1
        
        if torch.sum(objectness_label) > 0 and counter > 0:
            # semantic loss
            semantic_criterion = nn.CrossEntropyLoss(ignore_index=0, reduction="none")
            semantic_label = torch.gather(semantic_label, 1, object_assignment) # select (B,K) from (B,K2)
            difficulty_label = torch.gather(difficulty, 1, object_assignment)
            weight_mask = torch.zeros((B,K)).to(device)
            weight_mask[semantic_label==1] = self.semantic_weight[1] 
            weight_mask[semantic_label==2] = self.semantic_weight[2]
            weight_mask[semantic_label==3] = self.semantic_weight[3]
            weight_mask[difficulty_label==1] *= 1
            weight_mask[difficulty_label==2] *= 1
            semantic_loss = semantic_criterion(predict_semantic_label, semantic_label) # (B,K)
            semantic_loss = torch.sum(semantic_loss*(objectness_label.float())*weight_mask)/(torch.sum(objectness_label)+1e-6)
        
            # center loss
            dist1, ind1, _, _ = nn_distance(predict_center.permute(0,2,1), center, l1smooth=True) # dist1: BxK
            center_loss = torch.sum(dist1*(objectness_label.float())*weight_mask)/(torch.sum(objectness_label)+1e-6)
        
            # angle loss
            angle_cls = torch.gather(angle_cls, 1, object_assignment) # select (B,K) from (B,K2)
            angle_cls_criterion = nn.CrossEntropyLoss(reduction='none')
            angle_cls_loss = angle_cls_criterion(predict_angle_cls, angle_cls) # (B,K)
            angle_cls_loss = torch.sum(angle_cls_loss*(objectness_label.float())*weight_mask)/(torch.sum(objectness_label)+1e-6)
            
            angle_residual = torch.gather(angle_residual, 1, object_assignment) # select (B,K) from (B,K2)
            angle_residual_normalized = angle_residual / (np.pi / 12)
            angle_one_hot = torch.FloatTensor(B, angle_cls.shape[1], 12).zero_().to(device)
            angle_one_hot.scatter_(2, angle_cls.unsqueeze(-1), 1) # src==1 so it's *one-hot* (B,K,num_heading_bin)
            angle_residual_normalized_loss = huber_loss(torch.sum(predict_angle_residual.permute(0,2,1)*angle_one_hot, -1) - angle_residual_normalized, delta=1.0) # (B,K)
            angle_residual_normalized_loss = torch.sum(angle_residual_normalized_loss*(objectness_label.float())*weight_mask)/(torch.sum(objectness_label)+1e-6)
        
            # size loss
            size_cls = torch.gather(size_cls, 1, object_assignment) # select (B,K) from (B,K2)
            size_cls_criterion = nn.CrossEntropyLoss(reduction='none')
            size_cls_loss = size_cls_criterion(predict_size_cls, size_cls) # (B,K)
            size_cls_loss = torch.sum(size_cls_loss*(objectness_label.float())*weight_mask)/(torch.sum(objectness_label)+1e-6)
        
            size_residual = torch.gather(size_residual, 1, object_assignment.unsqueeze(-1).repeat(1,1,3)) # select (B,K,3) from (B,K2,3)
            size_one_hot = torch.FloatTensor(B, size_cls.shape[1], 3).zero_().to(device)
            size_one_hot.scatter_(2, size_cls.unsqueeze(-1), 1) # src==1 so it's *one-hot* (B,K,num_size_cluster)
            size_one_hot_tiled = size_one_hot.unsqueeze(-1).repeat(1,1,1,3) # (B,K,num_size_cluster,3)
            predicted_size_residual_normalized = torch.sum(predict_size_residual.view(B,K,3,3)*size_one_hot_tiled, 2) # (B,K,3)
        
            mean_size_arr_expanded = MEAN_SIZE_ARR.unsqueeze(0).unsqueeze(0) * RATIO # (1,1,num_size_cluster,3)
            mean_size_label = torch.sum(size_one_hot_tiled * mean_size_arr_expanded, 2) # (B,K,3)
            size_residual_normalized = size_residual / mean_size_label # (B,K,3)
            size_residual_normalized_loss = torch.mean(huber_loss(predicted_size_residual_normalized - size_residual_normalized, delta=1.0), -1) # (B,K,3) -> (B,K)
            size_residual_normalized_loss = torch.sum(size_residual_normalized_loss*(objectness_label.float())*weight_mask)/(torch.sum(objectness_label)+1e-6)

            box_loss = center_loss + 0.1*angle_cls_loss + angle_residual_normalized_loss + 0.3*size_cls_loss + size_residual_normalized_loss
            return box_loss + 0.5 * objectness_loss + vote_loss + semantic_loss
        elif torch.sum(objectness_label) == 0 and counter > 0:
            return vote_loss + 0.5 * objectness_loss
        elif torch.sum(objectness_label) > 0 and counter == 0:
            # semantic_loss
            semantic_criterion = nn.CrossEntropyLoss(ignore_index=0, reduction="none")
            semantic_label = torch.gather(semantic_label, 1, object_assignment) # select (B,K) from (B,K2)
            difficulty_label = torch.gather(difficulty, 1, object_assignment)
            weight_mask = torch.zeros((B,K)).to(device)
            weight_mask[semantic_label==1] = self.semantic_weight[1] 
            weight_mask[semantic_label==2] = self.semantic_weight[2]
            weight_mask[semantic_label==3] = self.semantic_weight[3]
            weight_mask[difficulty_label==1] *= 1
            weight_mask[difficulty_label==2] *= 1
            semantic_loss = semantic_criterion(predict_semantic_label, semantic_label) # (B,K)
            semantic_loss = torch.sum(semantic_loss*(objectness_label.float())*weight_mask)/(torch.sum(objectness_label)+1e-6)
        
            # center loss
            dist1, ind1, _, _ = nn_distance(predict_center.permute(0,2,1), center, l1smooth=True) # dist1: BxK
            center_loss = torch.sum(dist1*(objectness_label.float())*weight_mask)/(torch.sum(objectness_label)+1e-6)
        
            # angle loss
            angle_cls = torch.gather(angle_cls, 1, object_assignment) # select (B,K) from (B,K2)
            angle_cls_criterion = nn.CrossEntropyLoss(reduction='none')
            angle_cls_loss = angle_cls_criterion(predict_angle_cls, angle_cls) # (B,K)
            angle_cls_loss = torch.sum(angle_cls_loss*(objectness_label.float())*weight_mask)/(torch.sum(objectness_label)+1e-6)
            
            angle_residual = torch.gather(angle_residual, 1, object_assignment) # select (B,K) from (B,K2)
            angle_residual_normalized = angle_residual / (np.pi / 12)
            angle_one_hot = torch.FloatTensor(B, angle_cls.shape[1], 12).zero_().to(device)
            angle_one_hot.scatter_(2, angle_cls.unsqueeze(-1), 1) # src==1 so it's *one-hot* (B,K,num_heading_bin)
            angle_residual_normalized_loss = huber_loss(torch.sum(predict_angle_residual.permute(0,2,1)*angle_one_hot, -1) - angle_residual_normalized, delta=1.0) # (B,K)
            angle_residual_normalized_loss = torch.sum(angle_residual_normalized_loss*(objectness_label.float())*weight_mask)/(torch.sum(objectness_label)+1e-6)
        
            # size loss
            size_cls = torch.gather(size_cls, 1, object_assignment) # select (B,K) from (B,K2)
            size_cls_criterion = nn.CrossEntropyLoss(reduction='none')
            size_cls_loss = size_cls_criterion(predict_size_cls, size_cls) # (B,K)
            size_cls_loss = torch.sum(size_cls_loss*(objectness_label.float())*weight_mask)/(torch.sum(objectness_label)+1e-6)
        
            size_residual = torch.gather(size_residual, 1, object_assignment.unsqueeze(-1).repeat(1,1,3)) # select (B,K,3) from (B,K2,3)
            size_one_hot = torch.FloatTensor(B, size_cls.shape[1], 3).zero_().to(device)
            size_one_hot.scatter_(2, size_cls.unsqueeze(-1), 1) # src==1 so it's *one-hot* (B,K,num_size_cluster)
            size_one_hot_tiled = size_one_hot.unsqueeze(-1).repeat(1,1,1,3) # (B,K,num_size_cluster,3)
            predicted_size_residual_normalized = torch.sum(predict_size_residual.view(B,K,3,3)*size_one_hot_tiled, 2) # (B,K,3)
        
            mean_size_arr_expanded = MEAN_SIZE_ARR.unsqueeze(0).unsqueeze(0) * RATIO # (1,1,num_size_cluster,3)
            mean_size_label = torch.sum(size_one_hot_tiled * mean_size_arr_expanded, 2) # (B,K,3)
            size_residual_normalized = size_residual / mean_size_label # (B,K,3)
            size_residual_normalized_loss = torch.mean(huber_loss(predicted_size_residual_normalized - size_residual_normalized, delta=1.0), -1) # (B,K,3) -> (B,K)
            size_residual_normalized_loss = torch.sum(size_residual_normalized_loss*(objectness_label.float())*weight_mask)/(torch.sum(objectness_label)+1e-6)

            box_loss = center_loss + 0.1*angle_cls_loss + angle_residual_normalized_loss + 0.3*size_cls_loss + size_residual_normalized_loss
            return 0.5 * objectness_loss + box_loss + semantic_loss
        else:
            return 0.5 * objectness_loss

In [None]:
#proportion = torch.FloatTensor([1e-6,28742,4487,1627])
#semantic_weight = torch.sqrt(1 / (proportion / torch.sum(proportion)))
#semantic_weight[0] = 1e-6
semantic_weight = torch.FloatTensor([1e-6,1,1,1])
print(semantic_weight)
weight = torch.zeros((4,4)).to(device)
weight[0:4,:] = semantic_weight
#weight[1,:] *= 2
#weight[2,:] *= 3
print(weight)
#objectness_proportion = torch.FloatTensor([501.64367816091954, 2.8735632183908044])
#objectness_weight = torch.log(1 / (objectness_proportion / torch.sum(objectness_proportion))).to(device)
objectness_weight = torch.FloatTensor([0.05, 0.95]).to(device)
print(objectness_weight)
votenet = VoteNet(num_proposals=512, radius=1, num_neighbors=64, vote_dimension=259, mlp=[128,128,128], num_classes=4)
criterion = Loss(ignore_index=0, objectness_weight=objectness_weight, semantic_weight=semantic_weight, weight=weight)
optimizer = torch.optim.Adam(votenet.parameters(), lr=0.001, betas=(0.9, 0.999), weight_decay=0)
lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, 0.5)

In [None]:
for m in votenet.modules():
    if isinstance(m, (nn.Conv2d, nn.Conv1d)):
        nn.init.kaiming_normal_(m.weight, mode='fan_in')

In [None]:
sum(p.numel() for p in votenet.parameters() if p.requires_grad)

In [None]:
writer = SummaryWriter("runs/loss" + TIMESTAMP)

votenet.to(device)

for epoch in range(50):
    if (epoch+1) % 6 == 0:# every 5 epoch update
        lr_scheduler.step()
    train_running_loss = 0.0
    val_running_loss = 0.0
    correct = 0.0
    total = 0
    break_signal= False
    for i, data in enumerate(training_dataloader, 0):
        votenet.train()
        X, y = data
        X = X.permute(0, 2, 1)
        X = X.to(device)
        y = y.to(device)
        optimizer.zero_grad()
        
        seed, xyz_offset, vote, proposals = votenet(X)
        loss = criterion(seed, xyz_offset, vote, proposals, y)
        loss.backward()
        optimizer.step()
        
        train_running_loss += loss.item()
        
        if i % 20 == 19:
            with torch.no_grad():
                votenet.eval()
                random_indexes = np.array(random.sample(range(0, len(val_dataloader)), 20), dtype=np.long)
                for j in range(len(random_indexes)):
                    X_val, y_val = val_set[random_indexes[j]]
                    X_val = X_val.view(-1, SAMPLE_SIZE, 3).permute(0, 2, 1)
                    y_val = y_val.view(-1, 17, 20)
                    X_val = X_val.to(device)
                    y_val = y_val.to(device)
                    seed, xyz_offset, vote, proposals = votenet(X_val)
                    val_running_loss += criterion(seed, xyz_offset, vote, proposals, y_val).item()
                        
            train_running_loss /= 20
            val_running_loss /= 20
        
            with open('loss.txt','a') as f:
                f.write("[Epoch %d, Iteration %5d] train_loss: %.3f acc: %.2f %% val_loss: %.3f\n" % 
                        (epoch+1, i+1, train_running_loss, 100*correct, val_running_loss))
        
            writer.add_scalars('loss', {'training_loss':train_running_loss,
                                        'val_loss':val_running_loss}, epoch * len(training_dataloader) + i)

            train_running_loss = 0.0
            val_running_loss = 0.0
            correct = 0.0
            total = 0
            writer.flush()
    if break_signal:
        break

In [None]:
OBJECT_LIST = np.array(OBJECT_LIST, dtype=np.float)
MASK_LIST = np.array(MASK_LIST, dtype=np.float)
VOTE_LIST = np.array(VOTE_LIST, dtype=np.float)
print(np.mean(OBJECT_LIST))
print(np.mean(MASK_LIST))
print(np.mean(OBJECT_LIST) / np.mean(MASK_LIST))
print(np.mean(VOTE_LIST))

In [None]:
votenet.eval()
torch.save(votenet.state_dict(), "votenet_1")

In [None]:
def box3d_vol(corners):
    ''' corners: (8,3) no assumption on axis direction '''
    a = np.sqrt(np.sum((corners[0,:] - corners[1,:])**2))
    b = np.sqrt(np.sum((corners[1,:] - corners[2,:])**2))
    c = np.sqrt(np.sum((corners[0,:] - corners[4,:])**2))
    return a*b*c

def poly_area(x,y):
    """ Ref: http://stackoverflow.com/questions/24467972/calculate-area-of-polygon-given-x-y-coordinates """
    return 0.5*np.abs(np.dot(x,np.roll(y,1))-np.dot(y,np.roll(x,1)))

def convex_hull_intersection(p1, p2):
    """ Compute area of two convex hull's intersection area.
        p1,p2 are a list of (x,y) tuples of hull vertices.
        return a list of (x,y) for the intersection and its volume
    """
    poly1 = Polygon(p1).convex_hull
    poly2 = Polygon(p2).convex_hull

    if not poly1.intersects(poly2):
        inter_area = 0
    else:
        inter_area = poly1.intersection(poly2).area
    return inter_area

def bboxIoU(corners1, corners2):
    # corner points are in counter clockwise order
    # up direction is negative Y
    temp_z1 = -corners1[:,2]
    temp_z2 = -corners2[:,2]
    corners1[:,2] = corners1[:,1]
    corners2[:,2] = corners2[:,1]
    corners1[:,1] = temp_z1
    corners2[:,1] = temp_z2
    rect1 = [(corners1[i,0], corners1[i,2]) for i in range(3,-1,-1)]
    rect2 = [(corners2[i,0], corners2[i,2]) for i in range(3,-1,-1)]
    
    area1 = poly_area(np.array(rect1)[:,0], np.array(rect1)[:,1])
    area2 = poly_area(np.array(rect2)[:,0], np.array(rect2)[:,1])
   
    inter_area = convex_hull_intersection(rect1, rect2)
    iou_2d = inter_area/(area1+area2-inter_area)
    ymax = min(corners1[0,1], corners2[0,1])
    ymin = max(corners1[4,1], corners2[4,1])

    inter_vol = inter_area * max(0.0, ymax-ymin)
    
    vol1 = box3d_vol(corners1)
    vol2 = box3d_vol(corners2)
    iou = inter_vol / (vol1 + vol2 - inter_vol)
    return iou, iou_2d

In [None]:
def get_bb_box(center, angle, size):
    if angle.shape[0] == 24: # predict
        angle_cls = torch.max(F.softmax(angle[12:,],dim=0),dim=0)[1]
        # un - normalized
        angle_residual = angle[angle_cls] * (np.pi/12)
        size_cls = torch.max(F.softmax(size[0:3]), dim=0)[1]
        # un - normalized
        size_residual = size[3+size_cls*3:3+size_cls*3+3] * MEAN_SIZE_ARR[size_cls]
        size = MEAN_SIZE_ARR[size_cls] + size_residual
        angle = -np.pi + np.pi/12 + angle_cls * (np.pi/6) + angle_residual
        
    elif angle.shape[0] != 24: # ground truth labels
        angle = angle[0]
        size = size[1:4]
        
    angle = -np.pi/2 - angle # from camera y to lidar z
    h = size[0].cpu().detach().numpy()
    w = size[1].cpu().detach().numpy()
    l = size[2].cpu().detach().numpy()
    x_corners = [l / 2, l / 2, -l / 2, -l / 2, l / 2, l / 2, -l / 2, -l / 2]
    y_corners = [w / 2, -w / 2, -w / 2, w / 2, w / 2, -w / 2, -w / 2, w / 2]
    z_corners = [0, 0, 0, 0, h, h, h, h]
    matrix = np.zeros((3,3))
    a = np.cos(angle.cpu().detach().numpy())
    b = np.sin(angle.cpu().detach().numpy())
    matrix[0, 0] = a
    matrix[0, 1] = -b
    matrix[1, 0] = b
    matrix[1, 1] = a
    matrix[2, 2] = 1
    corners_3d = np.dot(matrix, np.vstack([x_corners, y_corners, z_corners]))
    corners_3d[0, :] = corners_3d[0, :] + center[0].cpu().detach().numpy()
    corners_3d[1, :] = corners_3d[1, :] + center[1].cpu().detach().numpy()
    corners_3d[2, :] = corners_3d[2, :] + center[2].cpu().detach().numpy()
    return np.transpose(corners_3d) # from 3,8 -> 8,3

In [None]:
# NMS and mAP -> IoU threshold: 0.5 (for NMS), 0.5 for pedestrian and 0.7 for car (mAP)
def voc_ap(rec, prec, use_07_metric=False):
    """Compute VOC AP given precision and recall. If use_07_metric is true, uses
    the VOC 07 11-point method (default:False).
    """
    if use_07_metric:
        # 11 point metric
        ap = 0.
        for t in np.arange(0., 1.1, 0.1):
            if np.sum(rec >= t) == 0:
                p = 0 
            else:
                p = np.max(prec[rec >= t])
            ap = ap + p / 11.
    else:
        # correct AP calculation
        # first append sentinel values at the end
        mrec = np.concatenate(([0.], rec, [1.]))
        mpre = np.concatenate(([0.], prec, [0.]))

        # compute the precision envelope
        for i in range(mpre.size - 1, 0, -1):
            mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
        
        i = np.where(mrec[1:] != mrec[:-1])[0]

        # and sum (\Delta recall) * prec
        ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
    return ap

def nms(y, proposals):
    # y -> label -> 1class, 3center, 1r+1cangle, 1+3size, 6min maxcorners, 3mean_size, 1difficulty
    # proposal -> 2objectness, 4class, 3center, 12*2angle, 3*4size (reg first for angle and cls first for size)
    B, S = proposals.shape[0], proposals.shape[2] # batch size, num of proposals
    objectness_scores = F.softmax(proposals[:,0:2,:], dim=1)
    positive_objectness_scores = objectness_scores[:,1,:].view(B, -1)
    class_scores = F.softmax(proposals[:,2:6,:], dim=1)
    predict_classes = torch.max(class_scores, dim=1)[1]
    car_ap = []
    pe_ap = []
    cy_ap = []
    
    for i in range(B): # every batch
        object_sorted_scores, object_sorted_index = torch.sort(positive_objectness_scores[i])
        original = object_sorted_index.cpu().detach().numpy().tolist()
        keep = []
        while len(original) > 0:
            keep.append(original[0])
            max_box = get_bb_box(proposals[i,6:9,original[0]], proposals[i,9:33,original[0]], proposals[i,33:45,original[0]])
            del(original[0]) # remove the first item of original
            for j in range(len(original)): # iterate the rest
                if j < len(original): # avoid overflow
                    box = get_bb_box(proposals[i,6:9,original[j]], proposals[i,9:33,original[j]], proposals[i,33:45,original[j]])
                    if bboxIoU(max_box, box) > 0.5:
                        del(original[j]) # delete the jth element
                        j -= 1

        keep = np.array(keep)
        _, idx1, _, _ = nn_distance(proposals[i,6:9,keep].view(1,-1,3), y[i,:,1:4].view(1,-1,3))
        
        car_scores = []
        car_index = []
        car_keep_index = []
        pe_scores = []
        pe_index = []
        pe_keep_index = []
        cy_scores = []
        cy_index = []
        cy_keep_index = []
        for j in range(1, 4): # iterate 3 classes
            for k in range(keep.shape[0]):
                if predict_classes[i,keep[k]] == j and j == 1:
                    car_scores.append(class_scores[i,1,keep[k]])
                    car_index.append(keep[k])
                    car_keep_index.append(k)
                elif predict_classes[i,keep[k]] == j and j == 2:
                    pe_scores.append(class_scores[i,2,keep[k]])
                    pe_index.append(keep[k])
                    pe_keep_index.append(k)
                elif predict_classes[i,keep[k]] == j and j == 3:
                    cy_scores.append(class_scores[i,3,keep[k]])
                    cy_index.append(keep[k])
                    cy_keep_index.append(k)
        
        sorted_car, sorted_car_index = torch.sort(torch.FloatTensor(car_scores))
        sorted_pe, sorted_pe_index = torch.sort(torch.FloatTensor(pe_scores))
        sorted_cy, sorted_cy_index = torch.sort(torch.FloatTensor(cy_scores))

        num_gt_car = 0
        num_gt_pe = 0
        num_gt_cy = 0
        for j in range(y[i].shape[0]):
            if y[i,j,0] == 1:
                num_gt_car += 1
            elif y[i,j,0] == 2:
                num_gt_pe += 1
            elif y[i,j,0] == 3:
                num_gt_cy += 1
        
        # car p/r
        car_precision = []
        car_recall = []
        tp = 1e-6
        fp = 1e-6
        fn = num_gt_car + 1e-6
        detected_car_index = []
        for j in range(sorted_car.shape[0]):
            bbox1 = get_bb_box(proposals[i,6:9,car_index[sorted_car_index[j]]],proposals[i,9:33,car_index[sorted_car_index[j]]],proposals[i,33:45,car_index[sorted_car_index[j]]])
            bbox2 = get_bb_box(y[i,idx1[0,car_keep_index[sorted_car_index[j]]].long(),1:4], y[i,idx1[0,car_keep_index[sorted_car_index[j]]].long(),4:6],y[i,idx1[0,car_keep_index[sorted_car_index[j]]].long(),6:10])
            iou = bboxIoU(bbox1, bbox2)
            if y[i,idx1[0,car_keep_index[sorted_car_index[j]]].long(),0] == 1 and iou >= 0.7:
                is_detected_before = False
                for k in range(len(detected_car_index)):
                    if detected_car_index[k] == idx1[0,car_keep_index[sorted_car_index[j]]]:
                        is_detected_before = True
                        break
                if is_detected_before == False:
                    tp += 1
                    fn -= 1
                    detected_car_index.append(idx1[0,car_keep_index[sorted_car_index[j]]])
                else:
                    fp += 1
            else:
                fp += 1
            car_precision.append(tp / (tp + fp))
            car_recall.append(tp / (tp + fn))
        car_precision = np.array(car_precision)
        car_recall = np.array(car_recall)
        car_ap.append(voc_ap(car_recall, car_precision))

        # pedestrian p/r
        pe_precision = []
        pe_recall = []
        tp = 1e-6
        fp = 1e-6
        fn = num_gt_pe + 1e-6
        detected_pe_index = []
        for j in range(sorted_pe.shape[0]):
            bbox1 = get_bb_box(proposals[i,6:9,pe_index[sorted_pe_index[j]]],proposals[i,9:33,pe_index[sorted_pe_index[j]]],proposals[i,33:45,pe_index[sorted_pe_index[j]]])
            bbox2 = get_bb_box(y[i,idx1[0,pe_keep_index[sorted_pe_index[j]]].long(),1:4], y[i,idx1[0,pe_keep_index[sorted_pe_index[j]]].long(),4:6],y[i,idx1[0,pe_keep_index[sorted_pe_index[j]]].long(),6:10])
            iou = bboxIoU(bbox1, bbox2)
            if y[i,idx1[0,pe_keep_index[sorted_pe_index[j]]].long(),0] == 2 and iou >= 0.5:
                is_detected_before = False
                for k in range(len(detected_pe_index)):
                    if detected_pe_index[k] == idx1[0,pe_keep_index[sorted_pe_index[j]]]:
                        is_detected_before = True
                        break
                if is_detected_before == False:
                    tp += 1
                    fn -= 1
                    detected_pe_index.append(idx1[0,pe_keep_index[sorted_pe_index[j]]])
                else:
                    fp += 1
            else:
                fp += 1
            pe_precision.append(tp / (tp + fp))
            pe_recall.append(tp / (tp + fn))
        pe_precision = np.array(pe_precision)
        pe_recall = np.array(pe_recall)
        pe_ap.append(voc_ap(pe_recall, pe_precision))

        # cyclist p/r
        cy_precision = []
        cy_recall = []
        tp = 1e-6
        fp = 1e-6
        fn = num_gt_cy + 1e-6
        detected_cy_index = []
        for j in range(sorted_cy.shape[0]):
            bbox1 = get_bb_box(proposals[i,6:9,cy_index[sorted_cy_index[j]]],proposals[i,9:33,cy_index[sorted_cy_index[j]]],proposals[i,33:45,cy_index[sorted_cy_index[j]]])
            bbox2 = get_bb_box(y[i,idx1[0,cy_keep_index[sorted_cy_index[j]]].long(),1:4], y[i,idx1[0,cy_keep_index[sorted_cy_index[j]]].long(),4:6],y[i,idx1[0,cy_keep_index[sorted_cy_index[j]]].long(),6:10])
            iou = bboxIoU(bbox1, bbox2)
            if y[i,idx1[0,cy_keep_index[sorted_cy_index[j]]].long(),0] == 3 and iou >= 0.5:
                is_detected_before = False
                for k in range(len(detected_cy_index)):
                    if detected_cy_index[k] == idx1[0,cy_keep_index[sorted_cy_index[j]]]:
                        is_detected_before = True
                        break
                if is_detected_before == False:
                    tp += 1
                    fn -= 1
                    detected_cy_index.append(idx1[0,cy_keep_index[sorted_cy_index[j]]])
                else:
                    fp += 1
            else:
                fp += 1
            cy_precision.append(tp / (tp + fp))
            cy_recall.append(tp / (tp + fn))
        cy_precision = np.array(cy_precision)
        cy_recall = np.array(cy_recall)
        cy_ap.append(voc_ap(cy_recall, cy_precision))

    return np.mean(np.array(car_ap)), np.mean(np.array(pe_ap)), np.mean(np.array(cy_ap))

In [None]:
votenet = VoteNet(num_proposals=512, radius=1, num_neighbors=64, vote_dimension=259, mlp=[128,128,128], num_classes=4)
votenet.to(device)
votenet.load_state_dict(torch.load("votenet_1"))
votenet.eval()
car_ap = []
pe_ap = []
cy_ap = []
for i, data in enumerate(val_dataloader, 0):
    X_val, y_val = data2
    X_val = X_val.permute(0, 2, 1)
    X_val = X_val.to(device)
    y_val = y_val.to(device)
    _, _, _, proposals = votenet(X_val)
    car_ap_temp, pe_ap_temp, cy_ap_temp = nms(y_val, proposals)
    car_ap.append(car_ap_temp)
    pe_ap.append(pe_ap_temp)
    cy_ap.append(cy_ap_temp)

In [None]:
print(np.mean(np.array(car_ap)))
print(np.mean(np.array(pe_ap)))
print(np.mean(np.array(cy_ap)))