In [1]:
import open3d as o3d
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
from pointnet2_ops import pointnet2_utils
from knn_cuda import KNN

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


If this is not desired, please set os.environ['TORCH_CUDA_ARCH_LIST'].


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [3]:
print("Load a ply point cloud, print it, and render it")
path = "../dataset/OldDataset/Barn_is/Barn/Barn01.ply"
pcd = o3d.io.read_point_cloud(path)
print(pcd)

voxel_grid = o3d.geometry.VoxelGrid.create_from_point_cloud(pcd,
                                                        voxel_size=0.01)
voxels = voxel_grid.get_voxels()  # returns list of voxels
indices = np.stack(list(vx.grid_index for vx in voxels))
colors = np.stack(list(vx.color for vx in voxels))
# print(indices[0:10])
# print(voxel_grid)
o3d.visualization.draw_geometries([voxel_grid])

voxel_grid = o3d.geometry.VoxelGrid.create_from_point_cloud(pcd,
                                                    voxel_size=0.2)
voxels = voxel_grid.get_voxels()  # returns list of voxels
indices = np.stack(list(vx.grid_index for vx in voxels))
colors = np.stack(list(vx.color for vx in voxels))
# print(indices[0:10])
# print(voxel_grid)
o3d.visualization.draw_geometries([voxel_grid])


Load a ply point cloud, print it, and render it
PointCloud with 1642571 points.


In [22]:
class Tokenizer (nn.Module):
    def __init__(self, num_group, group_size):
        super().__init__()
        self.num_group = num_group
        self.group_size = group_size
        self.knn = KNN(k=self.group_size, transpose_mode=True)
        self.mask_ratio = 0.6

    def forward(self, xyz):
            '''
                input: B N 3
                ---------------------------
                output: B G M 3
                center : B G 3
            '''
            batch_size, num_points, _ = xyz.shape
            # fps the centers out
            center = self.fps(xyz, self.num_group) # B G 3
            # knn to get the neighborhood
            _, idx = self.knn(xyz, center) # B G M
            assert idx.size(1) == self.num_group
            assert idx.size(2) == self.group_size
            idx_base = torch.arange(0, batch_size, device=xyz.device).view(-1, 1, 1) * num_points
            idx = idx + idx_base
            idx = idx.view(-1)
            neighborhood = xyz.view(batch_size * num_points, -1)[idx, :]
            neighborhood = neighborhood.view(batch_size, self.num_group, self.group_size, 3).contiguous()
            # normalize
            neighborhood = neighborhood - center.unsqueeze(2)
            return neighborhood, center
    
    def masking(self, center, noaug = False):
        '''
            center : B G 3
            --------------
            mask : B G (bool)
        '''
        B, G, _ = center.shape
        # skip the mask
        if noaug or self.mask_ratio == 0:
            return torch.zeros(center.shape[:2]).bool()

        self.num_mask = int(self.mask_ratio * G)

        overall_mask = np.zeros([B, G])
        for i in range(B):
            mask = np.hstack([
                np.zeros(G-self.num_mask),
                np.ones(self.num_mask),
            ])
            np.random.shuffle(mask)
            overall_mask[i, :] = mask
        overall_mask = torch.from_numpy(overall_mask).to(torch.bool)

        return overall_mask.to(center.device) # B G
    
    def fps(self, data, number):
        '''
            data B N 3
            number int
        '''
        # print(number)
        # print("yoyoyo",data.scalar_type())
        fps_idx = pointnet2_utils.furthest_point_sample(data, number) 
        fps_data = pointnet2_utils.gather_operation(data.transpose(1, 2).contiguous(), fps_idx).transpose(1,2).contiguous()
        print(fps_data)
        return fps_data
    
def farthest_point_sample(point, npoint):
    """
    Input:
        xyz: pointcloud data, [N, D]
        npoint: number of samples
    Return:
        centroids: sampled pointcloud index, [npoint, D]
    """
    N, D = point.shape
    xyz = point[:,:3]
    centroids = np.zeros((npoint,))
    distance = np.ones((N,)) * 1e10
    farthest = np.random.randint(0, N)
    for i in range(npoint):
        centroids[i] = farthest
        centroid = xyz[farthest, :]
        dist = np.sum((xyz - centroid) ** 2, -1)
        mask = dist < distance
        distance[mask] = dist[mask]
        farthest = np.argmax(distance, -1)
    point = point[centroids.astype(np.int32)]
    return point


In [23]:
tokenizer = Tokenizer(32,128)

In [6]:
from torchvision import transforms

class PointcloudScaleAndTranslate(object):
    def __init__(self, scale_low=2. / 3., scale_high=3. / 2., translate_range=0.2):
        self.scale_low = scale_low
        self.scale_high = scale_high
        self.translate_range = translate_range

    def __call__(self, pc):
        bsize = pc.size()[0]
        for i in range(bsize):
            xyz1 = np.random.uniform(low=self.scale_low, high=self.scale_high, size=[3])
            xyz2 = np.random.uniform(low=-self.translate_range, high=self.translate_range, size=[3])
            
            pc[i, :, 0:3] = torch.mul(pc[i, :, 0:3], torch.from_numpy(xyz1).float().cuda()) + torch.from_numpy(xyz2).float().cuda()
            
        return pc


train_transforms = transforms.Compose(
    [
        # data_transforms.PointcloudScale(),
        # data_transforms.PointcloudRotate(),
        # data_transforms.PointcloudRotatePerturbation(),
        # data_transforms.PointcloudTranslate(),
        # data_transforms.PointcloudJitter(),
        # data_transforms.PointcloudRandomInputDropout(),
        PointcloudScaleAndTranslate(),
    ]
)



In [7]:
torch.cuda.is_available()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [8]:
path = "../dataset/Dataset/pcd/Barn01.npy"
np_pcd = np.load(path)

print(np_pcd.shape)
tensor_pcd = torch.from_numpy(np.reshape(np_pcd, (1, np_pcd.shape[0], np_pcd.shape[1]))).to(device).cuda().float()

(16384, 3)


In [9]:
tensor_pcd = train_transforms(tensor_pcd)

In [10]:
print(tensor_pcd)

tensor([[[  3.8867,  -7.0483, -23.3645],
         [-21.4999,  -5.5777, -21.4391],
         [ -4.6564,   7.9825, -18.5496],
         ...,
         [  2.3679,  -7.2225, -23.5431],
         [  2.3936,  -7.2203, -21.9857],
         [  3.8669,  -6.3027, -23.6986]]], device='cuda:0')


In [11]:
test, centers = tokenizer.forward(tensor_pcd)

tensor([[[ 3.8867e+00, -7.0483e+00, -2.3364e+01],
         [-2.1500e+01, -5.5777e+00, -2.1439e+01],
         [-4.6564e+00,  7.9825e+00, -1.8550e+01],
         [-9.0250e+00, -7.4897e+00, -2.4175e+01],
         [ 4.6501e+00,  4.5830e+00, -2.4156e+01],
         [-2.8450e+00, -1.5664e+00, -1.9275e+01],
         [-1.5544e+01, -6.5998e+00, -2.4178e+01],
         [-2.5397e+00, -5.9901e+00, -2.4146e+01],
         [ 3.3715e+00, -9.5880e-01, -2.1403e+01],
         [ 5.9040e-02,  4.3668e+00, -2.0219e+01],
         [-5.8925e+00, -6.3627e+00, -1.9787e+01],
         [ 4.8794e+00,  9.7736e+00, -2.4150e+01],
         [-1.2012e+01, -5.1754e+00, -2.1011e+01],
         [-4.7247e+00,  2.8751e+00, -1.8567e+01],
         [ 6.0323e-01, -4.9373e+00, -2.0487e+01],
         [-1.5960e+01, -4.2331e+00, -2.0916e+01],
         [ 4.5705e+00, -3.3100e+00, -2.4168e+01],
         [ 3.6931e+00,  7.3881e+00, -2.1448e+01],
         [-6.0736e+00, -5.2796e+00, -2.3380e+01],
         [-4.1025e-01,  7.9603e+00, -2.0008e+01],


In [12]:
print(test.shape)

# print(test[0].shape)
# print(test[1].shape)

torch.Size([1, 32, 128, 3])


In [16]:
pcd_patch = test.cpu().detach().numpy()

In [17]:
print(pcd_patch.shape)

(1, 32, 128, 3)


In [18]:
import open3d as o3d


# Pass xyz to Open3D.o3d.geometry.PointCloud and visualize
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(pcd_patch[0][31])
o3d.visualization.draw_geometries([pcd])

In [24]:
bool_masked_pos = tokenizer.masking(centers, False)

In [25]:
print(bool_masked_pos)

tensor([[False,  True,  True,  True,  True, False,  True, False,  True, False,
          True, False,  True, False, False,  True, False,  True, False,  True,
          True,  True,  True,  True,  True, False, False, False,  True,  True,
         False,  True]], device='cuda:0')


In [None]:
group_input_tokens = self.encoder(neighborhood)  #  B G C

batch_size, seq_len, C = group_input_tokens.size()

x_vis = group_input_tokens[~bool_masked_pos].reshape(batch_size, -1, C)
# add pos embedding
# mask pos center
masked_center = center[~bool_masked_pos].reshape(batch_size, -1, 3)
pos = self.pos_embed(masked_center)

# transformer
x_vis = self.blocks(x_vis, pos)
x_vis = self.norm(x_vis)