In [1]:
import sys
import os
base_dir = "/home/jingpei/Desktop/CtRNet-robot-pose-estimation"
sys.path.append(base_dir)

import torch
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image as PILImage
import cv2


from utils import *
from models.CtRNet import CtRNet



In [2]:
import argparse
parser = argparse.ArgumentParser()

args = parser.parse_args("")

args.base_dir = "/home/jingpei/Desktop/CtRNet-robot-pose-estimation"
args.use_gpu = True
args.trained_on_multi_gpus = False
args.keypoint_seg_model_path = os.path.join(args.base_dir,"weights/baxter/net.pth")
args.urdf_file = os.path.join(args.base_dir,"urdfs/Baxter/baxter_description/urdf/baxter.urdf")

args.robot_name = 'Baxter_left_arm' # "Panda" or "Baxter_left_arm"
args.n_kp = 7
args.height = 1536
args.width = 2048
args.fx, args.fy, args.px, args.py = 960.41357421875, 960.22314453125, 1021.7171020507812, 776.2381591796875
args.scale = 0.3125  # scale the input image size to (640,480)

# scale the camera parameters
args.width = int(args.width * args.scale)
args.height = int(args.height * args.scale)
args.fx = args.fx * args.scale
args.fy = args.fy * args.scale
args.px = args.px * args.scale
args.py = args.py * args.scale

In [3]:
trans_to_tensor = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

def preprocess_img(cv_img,args):
    image_pil = PILImage.fromarray(cv_img)
    width, height = image_pil.size
    new_size = (int(width*args.scale),int(height*args.scale))
    image_pil = image_pil.resize(new_size)
    image = trans_to_tensor(image_pil)
    return image

CtRNet = CtRNet(args)

Loading keypoint segmentation model from /home/jingpei/Desktop/CtRNet-robot-pose-estimation/weights/baxter/net.pth
Camera intrinsics: [[300.12924194   0.         319.28659439]
 [  0.         300.06973267 242.57442474]
 [  0.           0.           1.        ]]
Robot model: Baxter_left_arm


In [4]:
import pickle

filename = os.path.join(args.base_dir,"data/baxter_data/baxter-real-dataset/ground_truth_data")
infile = open(filename,'rb')
ground_truth = pickle.load(infile)
infile.close()

In [7]:
import glob

result_dict = dict()
count = 0

for pose_idx in range(20):
    path = os.path.join(args.base_dir,"data/baxter_data/baxter-real-dataset/pose_" + str(pose_idx) + "/*.png")
    file_list = glob.glob(path)

    for image_file in file_list:

        cv_img = cv2.imread(image_file)
        cv_img = cv2.cvtColor(cv_img, cv2.COLOR_BGR2RGB)
        image = preprocess_img(cv_img,args)

        if args.use_gpu:
            image = image.cuda()

        joint_angles = np.array(ground_truth["pose_" + str(pose_idx)]['joints'])

        cTr, points_2d, segmentation = CtRNet.inference_single_image(image, joint_angles)

        _,point_3d = CtRNet.robot.get_joint_RT(np.array(joint_angles))

        tmp_dict = dict()
        tmp_dict['point_2d'] = points_2d.detach().cpu().numpy().squeeze()
        tmp_dict['point_3d'] = point_3d
        tmp_dict['cTb'] = cTr.detach().cpu().numpy().squeeze()
        result_dict[image_file] = tmp_dict

        count+=1


In [9]:
baxter_vis = Baxter_visualization(fx=960.41357421875, fy=960.22314453125, px=1021.7171020507812, py=776.2381591796875, D=None)

def get_ee_position(cTb,joints,baxter_vis):

    rvec = cTb[:3]
    tvec = cTb[3:].reshape(-1,1)
    R,_ = cv2.Rodrigues(rvec)
    quat_tmp = quaternions.mat2quat(R)
    quat = [quat_tmp[1],quat_tmp[2],quat_tmp[3],quat_tmp[0]]

    pose = np.hstack((R,tvec))
    pred_pose = np.vstack((pose,[0,0,0,1]))
    
    ee_3d = pred_pose @ baxter_vis.get_bl_T_Jn(8,joints) @ np.array([0,0,0,1]).reshape((-1,1))
    ee_3d = baxter_vis.dehomogenize_3d(ee_3d).reshape(-1)
    
    #ee_2d = get_coor_by_P(ee_3d)
    P = baxter_vis.get_camera_matrix()
    ee_2d = baxter_vis.dehomogenize_2d(P @ ee_3d.reshape(3,1))
    ee_2d = ee_2d.reshape(-1)
    
    return ee_3d, ee_2d

In [11]:
err_2d_list = []
err_3d_list = []
for pose_idx in range(20):
    path = os.path.join(args.base_dir,"data/baxter_data/baxter-real-dataset/pose_" + str(pose_idx) + "/*.png")
    file_list = glob.glob(path)
    gt_2d = np.array(ground_truth["pose_" + str(pose_idx)]['ee_2d'])
    gt_3d = np.array(ground_truth["pose_" + str(pose_idx)]['ee_3d'])

    for image_file in file_list:
        points_2d = result_dict[image_file]['point_2d']
        points_3d = result_dict[image_file]['point_3d']
        cTb = result_dict[image_file]['cTb']
        
        joints = ground_truth["pose_" + str(pose_idx)]['joints']
        
        ee_3d, ee_2d = get_ee_position(cTb,joints,baxter_vis)
        err_2d = np.linalg.norm(gt_2d - ee_2d)
        err_3d = np.linalg.norm(gt_3d - ee_3d)
        err_2d_list.append(err_2d)
        err_3d_list.append(err_3d)

In [12]:
# mean 2d error
err_2d_list = np.array(err_2d_list)
np.mean(err_2d_list)

11.628338259399516

In [13]:
# pck_auc
pck_2d = []
for i in range(200):
    num = np.sum(err_2d_list < i)
    pck_2d.append(num/100)

AUC_2d = np.sum(pck_2d) / len(pck_2d)
AUC_2d

0.9394499999999999

In [14]:
# pck @50
pck_2d[50]

1.0

In [15]:
# mean 3d error
err_3d_list = np.array(err_3d_list)
np.mean(err_3d_list)

0.06381392332167647

In [16]:
# add_auc
add_3d = []
err_3d_list = err_3d_list*1000
for i in range(400):
    num = np.sum(err_3d_list < i)
    add_3d.append(num/100)

AUC_3d = np.sum(add_3d) / len(add_3d)
AUC_3d

0.8393

In [17]:
# add @100
add_3d[100]

0.88