In [1]:
from mmpose.apis import inference
from typing import List, Optional, Union
import torch
import torch.nn as nn
import numpy as np
from mmpose.structures import PoseDataSample
from mmpose.apis import init_model as init_pose_estimator
from mmpose.registry import VISUALIZERS
from mmengine.dataset import Compose, pseudo_collate
from PIL import Image
import json
from mmpose.structures import merge_data_samples

In [2]:
model_config = r'C:\Users\user\Documents\GitHub\mmpose\my_code\work_dirs\HMD_mo2cap2_test\20240807_023443\vis_data\config.py'
model_ckpt = r'C:\Users\user\Documents\GitHub\mmpose\my_code\work_dirs\HMD_mo2cap2_test\epoch_10.pth'
device = 'cuda'

In [3]:
		
def make_data_info(img_path,test_mode=False):
		MM_TO_M = 1000
		json_path = img_path.replace('.png', '.json').replace('rgba', 'json')
		try:
			with open(json_path, 'r') as f:
				frame_data = json.load(f)
		except FileNotFoundError:
			print(f"Error: File not found - {json_path}")
			return None
		except json.JSONDecodeError:
			print(f"Error: Invalid JSON format in file - {json_path}")
			return None
		try:

			p2d = np.zeros((15, 2))
			p3d = np.zeros((15, 3))

			joint_names = []
			for key in frame_data.keys():
				if key not in ['action', 'Head']: # keys to skip from json
					joint_names.append(key)


			x_min, y_min = 300,300
			x_max, y_max = 0,0
			# TODO : joint_names 순서랑 metainfo 순서랑 다름. 순서확인할것 joint_names 배열 순서 수정할 것 metainfo 있는걸로
			# 근데 왜 visuall hook에는 예측된 2d 가 정상으로 보이지?
			for jid, joint_name in enumerate(joint_names):
				p2d[jid][0] = frame_data[joint_name]['2d'][0] - 33
				p2d[jid][1] = frame_data[joint_name]['2d'][1]
				# if frame_data[joint_name]['2d'][0] < x_min: x_min = frame_data[joint_name]['2d'][0]
				# if frame_data[joint_name]['2d'][1] < y_min: y_min = frame_data[joint_name]['2d'][1]
				# if frame_data[joint_name]['2d'][0] > x_max: x_max = frame_data[joint_name]['2d'][0]
				# if frame_data[joint_name]['2d'][1] > y_max: y_max = frame_data[joint_name]['2d'][1]
				
				#test 셋 좌표계와 동일하게
				# p3d[jid][0] = frame_data[joint_name]['3d'][0]
				# p3d[jid][1] = frame_data[joint_name]['3d'][1]
				# p3d[jid][2] = frame_data[joint_name]['3d'][2]
				if not test_mode:
					p3d[jid][0] = frame_data[joint_name]['3d'][0]
					p3d[jid][1] = frame_data[joint_name]['3d'][2] * -1
					p3d[jid][2] = frame_data[joint_name]['3d'][1] 
				else:
					p3d[jid][0] = frame_data[joint_name]['3d'][0]
					p3d[jid][1] = frame_data[joint_name]['3d'][1]
					p3d[jid][2] = frame_data[joint_name]['3d'][2]

			# 둘다 neck이 000
			p3d -= p3d[0]
			hmd_info = p3d[[0,3,6],:].reshape(1,3,3)
			rand_val = np.random.randint(-10, 10, hmd_info.shape)
			hmd_info_w_noise = (hmd_info + rand_val).reshape(1,-3,3)
			p3d /= MM_TO_M
			hmd_info /= MM_TO_M
			hmd_info_w_noise /= MM_TO_M
			# bbox = np.array([x_min-10, y_min-10, x_max+10, y_max+10])
			# bbox = np.clip(bbox,0,256)
			# 어차피 mo2cap2 dataset의 testset에는 2차원 keypoint에 대한 정보가 없음. 그냥 이미지 전체가 bbox로 취급
			# TODO : 이거 bbox 가 달라짐(test set 이미지의 크기가1280x1024이고 , trainset의  이미지 크기는 256x256임)
			if test_mode:
				#img = img[:, 180:1120, :] # no-crop
				bbox = np.array([180,0,1120,1024]) #xyxy
				# bbox = np.array([0,0,1280,1024]) #xyxy
		
			else:
				bbox = np.array([0,0,256,256])
			bbox = bbox[np.newaxis,:]

		except KeyError as e:
			print(f'Error: key{e}')
			return None

	


		keypoints = p2d
		keypoints_visible = np.ones((1,15),dtype=np.float32)
		keypoint3d = p3d
		keypoints = np.array(keypoints).reshape(1, -1, 2)
		keypoint3d = np.array(keypoint3d).reshape(1, -1, 3)

		area = np.clip((x_max - x_min) * (y_max  - y_min) * 0.53, a_min=1.0, a_max=None)
		area = np.array(area, dtype=np.float32)

		# data_info 딕셔너리 생성

		data_info = {
			'img_path': img_path,
			'num_keypoints': 15,
			'keypoints': keypoints,
			'keypoints_visible': keypoints_visible,
			'keypoint3d': keypoint3d,
			'bbox' : bbox,
			'bbox_score': np.ones(1, dtype=np.float32),
			'area': area,
			'hmd_info':hmd_info,
			'hmd_info_w_noise':hmd_info_w_noise,
			'raw_ann_info': {
				'id': 1,
				'category_id': np.ones(1, dtype=np.float32),
				'keypoint3d': keypoint3d,
				'iscrowd': 0,
				'num_keypoints': 15,
			},
		}
		if test_mode:
			test_mode_update = {
				'action': [frame_data['action']],
				'raw_ann_info': {
				}
			}
			# data_info 업데이트
			data_info.update(test_mode_update)
		else:
			not_test_mode_update = {
				'depth_path': None,
				'segmentation_path': None,
				'raw_ann_info': {
					'bbox': bbox,
					'keypoints': keypoints,
					'area': area,
				}
			}
			# data_info 업데이트
			data_info.update(not_test_mode_update)


		return data_info

In [4]:
def inference_topdown(model: nn.Module,
                      img: Union[np.ndarray, str],
                      bboxes: Optional[Union[List, np.ndarray]] = None,
                      bbox_format: str = 'xyxy') -> List[PoseDataSample]:
	"""Inference image with a top-down pose estimator.

	Args:
		model (nn.Module): The top-down pose estimator
		img (np.ndarray | str): The loaded image or image file to inference
		bboxes (np.ndarray, optional): The bboxes in shape (N, 4), each row
			represents a bbox. If not given, the entire image will be regarded
			as a single bbox area. Defaults to ``None``
		bbox_format (str): The bbox format indicator. Options are ``'xywh'``
			and ``'xyxy'``. Defaults to ``'xyxy'``

	Returns:
		List[:obj:`PoseDataSample`]: The inference results. Specifically, the
		predicted keypoints and scores are saved at
		``data_sample.pred_instances.keypoints`` and
		``data_sample.pred_instances.keypoint_scores``.
	"""
	scope = model.cfg.get('default_scope', 'mmpose')
	if scope is not None:
		inference.init_default_scope(scope)
	pipeline = Compose(model.cfg.test_dataloader.dataset.pipeline)

	if bboxes is None or len(bboxes) == 0:
		# get bbox from the image size
		if isinstance(img, str):
			w, h = Image.open(img).size
		else:
			h, w = img.shape[:2]

		bboxes = np.array([[0, 0, w, h]], dtype=np.float32)
	else:
		if isinstance(bboxes, list):
			bboxes = np.array(bboxes)

		assert bbox_format in {'xyxy', 'xywh'}, \
			f'Invalid bbox_format "{bbox_format}".'

		if bbox_format == 'xywh':
			bboxes = inference.bbox_xywh2xyxy(bboxes)

	# construct batch data samples
	data_list = []

	if isinstance(img, str):
		data_info = make_data_info(img)
	else:
		raise

	# data_info['bbox'] = bbox[None]  # shape (1, 4)
	# data_info['bbox_score'] = np.ones(1, dtype=np.float32)  # shape (1,)
	data_info.update(model.dataset_meta)
	data_list.append(pipeline(data_info))

	if data_list:
		# collate data list into a batch, which is a dict with following keys:
		# batch['inputs']: a list of input images
		# batch['data_samples']: a list of :obj:`PoseDataSample`
		batch = inference.pseudo_collate(data_list)
		with torch.no_grad():
			results = model.test_step(batch)
	else:
		results = []

	return results

In [5]:
# build pose estimator
pose_estimator = init_pose_estimator(
	model_config,
	model_ckpt,
	device=device,
	# cfg_options=dict(
	# 	model=dict(test_cfg=dict(output_heatmaps=args.draw_heatmap)))
	)
visualizer = VISUALIZERS.build(pose_estimator.cfg.visualizer)
visualizer.set_dataset_meta(pose_estimator.dataset_meta)

Loads checkpoint by local backend from path: C:\Users\user\Documents\GitHub\mmpose\my_code\work_dirs\HMD_mo2cap2_test\epoch_10.pth


In [6]:
test_img_path = r'F:\mo2cap2_data_half\TestSet\weipeng_studio\rgba\frame_c_0_f_0777.png'
train_img_path = r'F:\mo2cap2_data_half\ValSet\mo2cap2_chunk_0033\rgba\mo2cap2_chunk_0033_000823.png'

In [7]:
# infer_data=inference_topdown(
# 	model=pose_estimator,
# 	img=test_img_path)

# pred = infer_data[0].pred_instances.keypoint_3d
# gt = infer_data[0].gt_instances.keypoint3d

# pred = pred.detach().cpu().view(15,3)
# gt = torch.tensor(gt).view(15,3)
# print(pred.shape,gt.shape)
# pred
# gt

In [8]:
# make_data_info(test_img_path,True)

In [9]:
import mmcv

img_path = train_img_path

infer_data=inference_topdown(
	model=pose_estimator,
	img=img_path)
data_samples = merge_data_samples(infer_data)

if isinstance(img_path, str):
	img = mmcv.imread(img_path, channel_order='rgb')
elif isinstance(img_path, np.ndarray):
	img = mmcv.bgr2rgb(img)

visualizer.add_datasample(
'result',
image = img,
data_sample = data_samples,
draw_gt = True,
draw_pred = True,
draw_2d = True,
draw_bbox = False,
show_kpt_idx = False,
skeleton_style = 'mmpose',
dataset_2d = 'coco',
dataset_3d = 'coco',
convert_keypoint = False,
axis_azimuth = 90,
axis_limit = 1.7,
axis_dist = 8.0,
axis_elev = -10.0,
num_instances = -1,
show = True,
wait_time = 0,
out_file = None,
kpt_thr = 0.3,
step = 0) 

array([[[255, 255, 255],
        [255, 255, 255],
        [255, 255, 255],
        ...,
        [255, 255, 255],
        [255, 255, 255],
        [255, 255, 255]],

       [[255, 255, 255],
        [255, 255, 255],
        [255, 255, 255],
        ...,
        [255, 255, 255],
        [255, 255, 255],
        [255, 255, 255]],

       [[255, 255, 255],
        [255, 255, 255],
        [255, 255, 255],
        ...,
        [255, 255, 255],
        [255, 255, 255],
        [255, 255, 255]],

       ...,

       [[255, 255, 255],
        [255, 255, 255],
        [255, 255, 255],
        ...,
        [255, 255, 255],
        [255, 255, 255],
        [255, 255, 255]],

       [[255, 255, 255],
        [255, 255, 255],
        [255, 255, 255],
        ...,
        [255, 255, 255],
        [255, 255, 255],
        [255, 255, 255]],

       [[255, 255, 255],
        [255, 255, 255],
        [255, 255, 255],
        ...,
        [255, 255, 255],
        [255, 255, 255],
        [255, 255, 255]]

In [10]:
infer_data

[<PoseDataSample(
 
     META INFORMATION
     dataset_name: 'mo2cap2'
     img_path: 'F:\\mo2cap2_data_half\\ValSet\\mo2cap2_chunk_0033\\rgba\\mo2cap2_chunk_0033_000823.png'
     input_size: (256, 256)
     raw_ann_info: 
         bbox: array([[  0,   0, 256, 256]])
         keypoints: array([[[134.61337166, 198.50747153],
                     [167.90431027, 179.03470598],
                     [160.78406192, 146.86759427],
                     [168.87520759, 132.89525752],
                     [101.89598031, 191.54762624],
                     [101.52970372, 154.64018315],
                     [ 84.89197173, 153.08418472],
                     [141.58364949, 155.0865981 ],
                     [141.57454608, 146.29016536],
                     [140.16659262, 156.31262691],
                     [142.44873908, 149.57985191],
                     [125.06243055, 156.32110424],
                     [120.17425073, 138.86751302],
                     [121.02561356, 139.62795134],
           

In [11]:
pred = infer_data[0].pred_instances.keypoint_3d
gt = infer_data[0].gt_instances.keypoint3d

pred = pred.detach().cpu().view(15,3)
gt = torch.tensor(gt).view(15,3)
print(pred.shape,gt.shape)
pred
gt

torch.Size([15, 3]) torch.Size([15, 3])


tensor([[ 0.0000,  0.0000,  0.0000],
        [ 0.1632, -0.0688, -0.0346],
        [ 0.2218, -0.3440, -0.1437],
        [ 0.1600, -0.1018, -0.2575],
        [-0.1641, -0.0619,  0.0425],
        [-0.2585, -0.3372, -0.0788],
        [-0.2305, -0.0783, -0.1630],
        [ 0.1043, -0.6304,  0.0092],
        [ 0.1660, -1.1080, -0.0071],
        [ 0.1801, -1.4196,  0.2915],
        [ 0.2546, -1.5901,  0.1708],
        [-0.0915, -0.6225,  0.0215],
        [-0.2191, -1.0759, -0.1465],
        [-0.2676, -1.4989, -0.0915],
        [-0.3569, -1.5795, -0.2701]], dtype=torch.float64)

In [15]:
from mmpose.evaluation.metrics import mo2cap2_evaluate
# compute_error 인풋 확인
error = mo2cap2_evaluate.compute_error(pred=pred, gt=gt, mode='mo2cap2')
error

114.33787011999581