## 可视化指定场景视角的关键点和抓取位姿

In [1]:
import torch
import scipy.io as scio
from models.graspnet import GraspNet, pred_decode
from dataset.graspnet_dataset import GraspNetDataset, collate_fn
from graspnetAPI import GraspGroup, GraspNetEval
from graspnetAPI.utils.utils import generate_scene_model
from graspnetAPI.utils.eval_utils import get_scene_name, create_table_points, parse_posevector, \
    load_dexnet_model, transform_points, compute_point_distance, compute_closest_points, \
        voxel_sample_points, topk_grasps, get_grasp_score, collision_detection, eval_grasp
from utils.data_utils import CameraInfo, transform_point_cloud, \
    create_point_cloud_from_depth_image, get_workspace_mask, remove_invisible_grasp_points
from utils.collision_detector import ModelFreeCollisionDetector
import numpy as np
from experiment.utils import toOpen3dCloud
import open3d as o3d
import os
os.environ['DISPLAY'] = ":11.0"

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


pybullet build time: May 20 2022 19:44:17


In [10]:
TEST_DATASET = GraspNetDataset("data/Benchmark/graspnet", valid_obj_idxs=None, grasp_labels=None, split="test", 
                               camera="realsense", num_points=20000, remove_outlier=True, 
                               augment=False, load_label=False)

ge = GraspNetEval(root="data/Benchmark/graspnet", camera="realsense", split="test")
net = GraspNet(input_feature_dim=0, num_view=300, num_angle=12, num_depth=4, 
               cylinder_radius=0.05, hmin=-0.02, hmax_list=[0.01,0.02,0.03,0.04], is_training=False)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net.to(device)
# Load checkpoint
checkpoint_path = "logs/log_rs_spotr/2023-10-06-23-21/checkpoint.tar"
# checkpoint_path = "logs/log_rs_spotr/2023-11-07-17-30/checkpoint.tar"
checkpoint = torch.load(checkpoint_path)
net.load_state_dict(checkpoint['model_state_dict'])
start_epoch = checkpoint['epoch']
print(f"-> loaded checkpoint {checkpoint_path} (epoch: {start_epoch})")
# net.eval()

Loading data path and collision labels...: 100%|██████████| 90/90 [00:00<00:00, 321.02it/s]
Loading data path...: 100%|██████████| 90/90 [00:00<00:00, 238.58it/s]


-> loaded checkpoint logs/log_rs_spotr/2023-10-06-23-21/checkpoint.tar (epoch: 18)


In [6]:
def get_data(scene_id, img_id, raw=False):
    ret_dict = TEST_DATASET.get_data((scene_id - 100) * 256 + img_id, return_raw_cloud=raw)
    if raw == False:
        pcd = ret_dict['point_clouds']
        obj_mask = ret_dict['seg_mask']
        pcd_obj_inds = np.argwhere(obj_mask>0).squeeze() # (N_obj,)
        pcd_obj = pcd[obj_mask>0]
        num_pts_obj = 1024
        if len(pcd_obj) >= num_pts_obj:
            idxs = np.random.choice(len(pcd_obj), num_pts_obj, replace=False)
        else:
            idxs1 = np.arange(len(pcd_obj))
            idxs2 = np.random.choice(len(pcd_obj), num_pts_obj-len(pcd_obj), replace=True)
            idxs = np.concatenate([idxs1, idxs2], axis=0)
        pcd_obj_inds = pcd_obj_inds[idxs] # (1024, )
        ret_dict['pcd_obj_inds'] = pcd_obj_inds
    
    return ret_dict

In [11]:
# scene_id = 100
img_id = 0

for scene_id in range(103, 106):
    # 点云，颜色，segmentation， 物体点索引
    data = [get_data(scene_id, img_id)]

    # 扩展 batch 维度
    batch_data = collate_fn(data)

    # 送到 GPU
    for key in batch_data:
        if 'list' in key:
            for i in range(len(batch_data[key])):
                for j in range(len(batch_data[key][i])):
                    batch_data[key][i][j] = batch_data[key][i][j].to(device)
        else:
            batch_data[key] = batch_data[key].to(device)

    # Forward pass，保存关键点和场景
    with torch.no_grad():
        end_points = net(batch_data)
        grasp_preds = pred_decode(end_points)

    vis_result_dir = f"vis_result/{scene_id}_{img_id}"
    if not os.path.exists(vis_result_dir):
        os.makedirs(vis_result_dir)

    # 保存关键点
    global_p = end_points['global_p']
    global_p = global_p[4][0].detach().cpu().numpy()
    global_p_o3d = toOpen3dCloud(global_p)
    o3d.io.write_point_cloud(f"{vis_result_dir}/keys.ply", global_p_o3d)

    # 抓取位姿送到 CPU
    preds = grasp_preds[0].detach().cpu().numpy()
    gg = GraspGroup(preds)
    num_pred = len(gg)
    # total_pred += num_pred

    # 抓取点 (1024, 3)
    graspable_p = preds[:, 13:16]
    graspable_pcd_o3d = toOpen3dCloud(graspable_p)
    o3d.io.write_point_cloud(f"{vis_result_dir}/graspness.ply", graspable_pcd_o3d)

    # 原始规模的点云
    cloud, rgb = get_data(scene_id, img_id, raw=True)
    # 保存到本地
    raw_pcd_o3d = toOpen3dCloud(cloud, rgb)
    o3d.io.write_point_cloud(f"{vis_result_dir}/raw_pcd.ply", raw_pcd_o3d)
     
    # collision detection 
    cloud, _ = get_data(scene_id, img_id, raw=True)
    mfcdetector = ModelFreeCollisionDetector(cloud, voxel_size=0.01)
    collision_mask = mfcdetector.detect(gg, approach_dist=0.05, collision_thresh=0.01)
    num_coll = np.count_nonzero(collision_mask)
    # total_coll += num_coll
    print(f"collision rate: {(num_coll/num_pred):.2f}\n")
    gg = gg[~collision_mask]

    # 可视化抓取
    # table = create_table_points(1.0, 1.0, 0.05, dx=-0.5, dy=-0.5, dz=-0.05, grid_size=0.008)
    # _, pose_list, camera_pose, align_mat = ge.get_model_poses(scene_id, img_id)
    # table_trans = transform_points(table, np.linalg.inv(np.matmul(align_mat, camera_pose)))
    # t = o3d.geometry.PointCloud()
    # t.points = o3d.utility.Vector3dVector(table_trans)
    # model_list = generate_scene_model("data/Benchmark/graspnet", 'scene_%04d' % scene_id , 
    #                                 img_id, return_poses=False, align=False, camera="realsense")

    nms_gg = gg.nms()
    nms_gg = nms_gg[:10]
    grasps_geometry = nms_gg.to_open3d_geometry_list()
    pcd = ge.loadScenePointCloud(scene_id, "realsense", img_id)
    o3d.visualization.draw_geometries([pcd, *grasps_geometry])
    # o3d.visualization.draw_geometries([pcd, *grasps_geometry, *model_list])
    # o3d.visualization.draw_geometries([*grasps_geometry, *model_list, t])


collision rate: 0.70

collision rate: 0.72

collision rate: 0.62



KeyboardInterrupt: 

### log 抓取分

In [1]:
import numpy as np

In [5]:
for i in np.arange(0.1, 1.1, 0.1):
    print(f"log {i} = {np.log(1/i):.4f}")

log 0.1 = 2.3026
log 0.2 = 1.6094
log 0.30000000000000004 = 1.2040
log 0.4 = 0.9163
log 0.5 = 0.6931
log 0.6 = 0.5108
log 0.7000000000000001 = 0.3567
log 0.8 = 0.2231
log 0.9 = 0.1054
log 1.0 = 0.0000


### 传播 top-k 交叉熵 loss

In [2]:
import torch

In [16]:
# 假设你有一个模型的预测值 predictions 和对应的目标真值 targets
predictions = torch.randn((2, 2, 1024))  # 模型的预测值
targets = torch.randint(0, 1, (2, 1024)).long()  # 随机生成一个目标标签，这里假设有1000个类别
criterion = torch.nn.CrossEntropyLoss(reduction='none')

# 计算误差（可以是任何形式的误差，如交叉熵、均方误差等）
errors = criterion(predictions, targets)

# 使用torch.topk获取前512个最大误差项的索引
topk_values, topk_indices = torch.topk(errors, k=512)

# 仅保留前512个最大误差项的损失
loss = torch.mean(topk_values)

print(loss)

tensor(1.4968)


### 点云渲染

In [6]:
import numpy as np
from plyfile import PlyData, PlyElement
import pandas as pd
 
file_dir = 'vis_result/103_0/raw_pcd.ply'  #文件的路径
plydata = PlyData.read(file_dir)  # 读取文件
data = plydata.elements[0].data  # 读取数据
data_pd = pd.DataFrame(data)  # 转换成DataFrame, 因为DataFrame可以解析结构化的数据
pcl = np.zeros(data_pd.shape, dtype=np.float)  # 初始化储存数据的array
property_names = data[0].dtype.names  # 读取property的名字
for i, name in enumerate(property_names):  # 按property读取数据，这样可以保证读出的数据是同样的数据类型。
    pcl[:, i] = data_pd[name]

def standardize_bbox(pcl, points_per_object):
    pt_indices = np.random.choice(pcl.shape[0], points_per_object, replace=False)
    np.random.shuffle(pt_indices)
    pcl = pcl[pt_indices] # n by 3
    mins = np.amin(pcl, axis=0)
    maxs = np.amax(pcl, axis=0)
    center = ( mins + maxs ) / 2.
    scale = np.amax(maxs-mins)
    print("Center: {}, Scale: {}".format(center, scale))
    result = ((pcl - center)/scale).astype(np.float32) # [-0.5, 0.5]
    return result

xml_head = \
"""
<scene version="0.6.0">
    <integrator type="path">
        <integer name="maxDepth" value="-1"/>
    </integrator>
    <sensor type="perspective">
        <float name="farClip" value="100"/>
        <float name="nearClip" value="0.1"/>
        <transform name="toWorld">
            <lookat origin="3,3,3" target="0,0,0" up="0,0,1"/>
        </transform>
        <float name="fov" value="25"/>
        
        <sampler type="ldsampler">
            <integer name="sampleCount" value="256"/>
        </sampler>
        <film type="hdrfilm">
            <integer name="width" value="1600"/>
            <integer name="height" value="1200"/>
            <rfilter type="gaussian"/>
            <boolean name="banner" value="false"/>
        </film>
    </sensor>
    
    <bsdf type="roughplastic" id="surfaceMaterial">
        <string name="distribution" value="ggx"/>
        <float name="alpha" value="0.05"/>
        <float name="intIOR" value="1.46"/>
        <rgb name="diffuseReflectance" value="1,1,1"/> <!-- default 0.5 -->
    </bsdf>
    
"""

xml_ball_segment = \
"""
    <shape type="sphere">
        <float name="radius" value="0.025"/>
        <transform name="toWorld">
            <translate x="{}" y="{}" z="{}"/>
        </transform>
        <bsdf type="diffuse">
            <rgb name="reflectance" value="{},{},{}"/>
        </bsdf>
    </shape>
"""

xml_tail = \
"""
    <shape type="rectangle">
        <ref name="bsdf" id="surfaceMaterial"/>
        <transform name="toWorld">
            <scale x="10" y="10" z="1"/>
            <translate x="0" y="0" z="-0.5"/>
        </transform>
    </shape>
    
    <shape type="rectangle">
        <transform name="toWorld">
            <scale x="10" y="10" z="1"/>
            <lookat origin="-4,4,20" target="0,0,0" up="0,0,1"/>
        </transform>
        <emitter type="area">
            <rgb name="radiance" value="6,6,6"/>
        </emitter>
    </shape>
</scene>
"""

def colormap(x,y,z):
    vec = np.array([x,y,z])
    vec = np.clip(vec, 0.001,1.0)
    norm = np.sqrt(np.sum(vec**2))
    vec /= norm
    return [vec[0], vec[1], vec[2]]
xml_segments = [xml_head]

pcl = pcl[:, :3]
# pcl = np.load('chair_pcl.npy')
pcl = standardize_bbox(pcl, 2048)
pcl = pcl[:,[2,0,1]] # z, x, y
pcl[:,0] *= -1 # -z, x, y
pcl[:,2] += 0.0125 # -z, x, y+0.0125

for i in range(pcl.shape[0]):
    color = colormap(pcl[i,0]+0.5,pcl[i,1]+0.5,pcl[i,2]+0.5-0.0125)
    xml_segments.append(xml_ball_segment.format(pcl[i,0],pcl[i,1],pcl[i,2], *color))
xml_segments.append(xml_tail)

xml_content = str.join('', xml_segments)

with open('mitsuba_scene.xml', 'w') as f:
    f.write(xml_content)

Center: [-0.02210835  0.03199192  0.4575    ], Scale: 0.3523904354109818


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pcl = np.zeros(data_pd.shape, dtype=np.float)  # 初始化储存数据的array
