In [1]:
# !git clone https://github.com/CMU-Perceptual-Computing-Lab/panoptic-toolbox
# %cd panoptic-toolbox
# !./scripts/getData.sh 171204_pose1_sample
# !./scripts/extractAll.sh 171204_pose1_sample

In [2]:
# ------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# ------------------------------------------------------------------------------

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import torch
import torch.nn as nn
import torch.optim as optim
import torch.backends.cudnn as cudnn
import torch.utils.data
import torch.utils.data.distributed
import torchvision.transforms as transforms
from tensorboardX import SummaryWriter
import argparse
import os
import pprint
import logging
import json
import sys
import copy
import cv2

def add_path(path):
    if path not in sys.path:
        sys.path.insert(0, path)


# this_dir = osp.dirname(__file__)
this_dir = os.getcwd()

# lib_path = osp.join(this_dir, '..', 'lib')
lib_path = os.path.join(this_dir, 'lib')
add_path(lib_path)

In [3]:
from mmpose.core.evaluation import keypoints_from_heatmaps
import math
import matplotlib.pyplot as plt
import cv2
import numpy as np
%matplotlib inline

In [4]:
# import _init_paths
from core.config import config
from core.config import update_config
from core.function import train_3d, validate_3d
from utils.utils import create_logger
from utils.utils import save_checkpoint, load_checkpoint, load_model_state
from utils.utils import load_backbone_panoptic
import dataset
import models



In [5]:
%matplotlib inline

In [6]:
def parse_args(argstring):
    parser = argparse.ArgumentParser(description='Train keypoints network')
    parser.add_argument('--cfg', help='experiment configure file name', required=True, type=str)
    args, rest = parser.parse_known_args(argstring)
    update_config(args.cfg)
    return args

In [7]:
args = parse_args(['--cfg', 'configs/panoptic/resnet50/prn64_cpn80x80x20_960x512_cam5.yaml'])
#python run/train_3d.py --cfg configs/panoptic/resnet50/prn64_cpn80x80x20_960x512_cam5.yaml

In [8]:
config.DATASET

{'ROOT': '/datasets_2/panoptic-toolbox/data/',
 'TRAIN_DATASET': 'panoptic',
 'TEST_DATASET': 'panoptic',
 'TRAIN_SUBSET': 'train',
 'TEST_SUBSET': 'validation',
 'ROOTIDX': 2,
 'DATA_FORMAT': 'jpg',
 'BBOX': 2000,
 'CROP': True,
 'COLOR_RGB': True,
 'FLIP': False,
 'DATA_AUGMENTATION': False,
 'CAMERA_NUM': 5,
 'FOCAL_LENGTH_AUGMENTATION': False,
 'GRID_ZROTATION_AUGMENTATION': False,
 'SPACE_XYTRANSLATION_AUGMENTATION': False,
 'SCALE_FACTOR': 0.35,
 'ROT_FACTOR': 45}

In [9]:
###################################################
config.DATASET.GRID_ZROTATION_AUGMENTATION = False
#################################################

In [10]:
# logger, final_output_dir, tb_log_dir = create_logger(
#     config, args.cfg, 'validate')

In [11]:
# logger.info(pprint.pformat(args))
# logger.info(pprint.pformat(config))

In [12]:
gpus = [int(i) for i in config.GPUS.split(',')]
print('=> Loading data ..')
normalize = transforms.Normalize(
    mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

=> Loading data ..


In [13]:
config.TEST.BATCH_SIZE = 1

In [14]:
test_dataset = eval('dataset.' + config.DATASET.TEST_DATASET)(
    config, config.DATASET.TEST_SUBSET, False,
    transforms.Compose([
        transforms.ToTensor(),
        normalize,
    ]))

test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=config.TEST.BATCH_SIZE * len(gpus),
    shuffle=True,
    num_workers=config.WORKERS,
    pin_memory=True)

In [15]:
cudnn.benchmark = config.CUDNN.BENCHMARK
torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC
torch.backends.cudnn.enabled = config.CUDNN.ENABLED

In [16]:
config.BACKBONE_MODEL = 'pose_hrnet'

In [17]:
# config.BACKBONE_MODEL = 'pose_resnet'

In [18]:
print('=> Constructing models ..')
model = eval('models.' + config.MODEL + '.get_multi_person_pose_net')(
    config, is_train=True)
with torch.no_grad():
    model = torch.nn.DataParallel(model, device_ids=gpus).cuda()

final_output_dir = ""
    
test_model_file = os.path.join(final_output_dir, config.TEST.MODEL_FILE)

=> Constructing models ..
load checkpoint from local path: /workspace/voxelpose-pytorch/data/higher_hrnet48_coco_512x512-60fedcbc_20200712.pth


In [19]:
model.module.backbone

AssociativeEmbedding(
  (backbone): HRNet(
    (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, 

In [20]:
test_model_file = '/workspace/voxelpose-pytorch/output_HRNET/panoptic/multi_person_posenet_50/prn64_cpn80x80x20_960x512_cam5/inter-checkpoint.pth'
test_model_file = '/workspace/voxelpose-pytorch/output_HRNET_ASSO/panoptic/multi_person_posenet_50/prn64_cpn80x80x20_960x512_cam5/inter-checkpoint_20000.pth'

# test_model_file = '/workspace/voxelpose-pytorch/output_HRNET_ASSO/panoptic/multi_person_posenet_50/prn64_cpn80x80x20_960x512_cam5_AUGMENTATION/inter-checkpoint.pth'

# test_model_file = '/workspace/voxelpose-pytorch/output_HRNET_ASSO/panoptic/multi_person_posenet_50/prn64_cpn80x80x20_960x512_cam5_AUGMENTATION/inter-checkpoint_rot75000.pth'

test_model_file = '/workspace/voxelpose-pytorch/output_HRNET_ASSO/panoptic/multi_person_posenet_50/prn64_cpn80x80x20_960x512_cam5_AUGMENTATION/inter-checkpoint_RT_117000.pth'
test_model_file = '/workspace/voxelpose-pytorch/output_HRNET_ASSO/panoptic/multi_person_posenet_50/prn64_cpn80x80x20_960x512_cam5_AUGMENTATION/inter-checkpoint_RT_1epoch.pth'

model.load_state_dict(torch.load(test_model_file), strict=True)
# model.module.load_state_dict(torch.load(test_model_file), strict=True)

<All keys matched successfully>

In [21]:
model.eval()

DataParallel(
  (module): MultiPersonPoseNet(
    (backbone): AssociativeEmbedding(
      (backbone): HRNet(
        (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (layer1): Sequential(
          (0): Bottleneck(
            (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
            (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (conv3): Conv2d(64, 256, ker

In [22]:
# with torch.no_grad():
#     for i, (inputs, targets_2d, weights_2d, targets_3d, meta, input_heatmap) in enumerate(test_loader):
#         if 'panoptic' in config.DATASET.TEST_DATASET:
#             preds, heatmaps, grid_centers, _, _, _ = model(views=inputs, meta=meta, targets_2d=targets_2d,
#                                                       weights_2d=weights_2d, targets_3d=targets_3d[0])
#         print(i)
#         break

In [23]:
from utils.transforms import affine_transform, get_scale, get_affine_transform

In [24]:
normalize = transforms.Normalize(
    mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

In [25]:
transform = transforms.Compose([
        transforms.ToTensor(),
        normalize,
    ])

In [26]:
def get_P(meta, _cam_idx, _target_batch_idx):
    R = meta[_cam_idx]['camera']['R'][_target_batch_idx].numpy()
    T = meta[_cam_idx]['camera']['T'][_target_batch_idx].numpy()
    fx = meta[_cam_idx]['camera']['fx'][_target_batch_idx].numpy()
    fy = meta[_cam_idx]['camera']['fy'][_target_batch_idx].numpy()
    cx = meta[_cam_idx]['camera']['cx'][_target_batch_idx].numpy()
    cy = meta[_cam_idx]['camera']['cy'][_target_batch_idx].numpy()
    K = np.array([[fx, 0, cx],
              [0, fy, cy],
              [0, 0, 1]])
    
    T_cam = -np.matmul(R, T)
    
    RT = np.concatenate((R,T_cam),axis=1)
    P = np.matmul(K, RT)
    return P

In [27]:
# K[1,1] reference is not exact value. 
# only fx=fy case available. 
# but blender fy issue was currently solved. fix later.


def make_meta(_dataset_root, _data_i):
    
#     _dataset_root = '/datasets/synthetic_convini'
#     fx = 960.
#     fy = 960.

    fx = 1333.33 # blender focal length 25mm.
    fy = 1333.33
    
#     fx = 533.33 # blender focal length 10mm.
#     fy = 533.33
    
    meta_empty = {
    "image" : None, # image path
    'num_person' : torch.tensor([0]),
    'joints_3d' : torch.zeros([1, 10, 15, 3], dtype=torch.float64),
    'joints_3d_vis' : torch.zeros([1, 10, 15, 3], dtype=torch.float64),
    'roots_3d' : torch.zeros([1, 10, 3], dtype=torch.float64), 
    'joints' : torch.zeros([1, 10, 15, 2], dtype=torch.float64),
    'joints_vis' : torch.zeros([1, 10, 15, 2], dtype=torch.float64),
    'center' : torch.tensor([[960., 540.]], dtype=torch.float64),
    'scale' : torch.tensor([[10.1250,  5.4000]]),
    'rotation' : torch.tensor([0]),
    'camera' : None,
    'rot_aug_Rz' : np.zeros((1,1)),
    'trans_aug_Txy' : np.zeros((1,1))
    }

    meta_0 = copy.copy(meta_empty)
    meta_0['image'] = [os.path.join(_dataset_root, 'cam00/%04d.png'%data_i)]
    meta_0['camera'] = {
    'R': torch.tensor(np.load(os.path.join(_dataset_root,\
                                           'cam00_panoptic_R.npy')),  dtype=torch.float64).reshape(1,3,3),
     'T': torch.tensor(np.load(os.path.join(_dataset_root,\
                                           'cam00_panoptic_T.npy')),  dtype=torch.float64).reshape(1,3,1),
     'fx': torch.tensor([np.load(os.path.join(_dataset_root, 'cam00_panoptic_K.npy'))[0,0]], dtype=torch.float64),
     'fy': torch.tensor([np.load(os.path.join(_dataset_root, 'cam00_panoptic_K.npy'))[0,0]], dtype=torch.float64),
     'cx': torch.tensor([960.0], dtype=torch.float64),
     'cy': torch.tensor([540.0], dtype=torch.float64),
     'k': torch.tensor([[[0.0],
              [ 0.0],
              [0.0]]], dtype=torch.float64),
     'p': torch.tensor([[[0.0],
              [ 0.0]]], dtype=torch.float64)
    }
    
    meta_1 = copy.copy(meta_empty)
    meta_1['image'] = [os.path.join(_dataset_root, 'cam01/%04d.png'%data_i)]
    meta_1['camera'] = {
    'R': torch.tensor(np.load(os.path.join(_dataset_root,\
                                           'cam01_panoptic_R.npy')),  dtype=torch.float64).reshape(1,3,3),
     'T': torch.tensor(np.load(os.path.join(_dataset_root,\
                                           'cam01_panoptic_T.npy')),  dtype=torch.float64).reshape(1,3,1),
     'fx': torch.tensor([np.load(os.path.join(_dataset_root, 'cam01_panoptic_K.npy'))[0,0]], dtype=torch.float64),
     'fy': torch.tensor([np.load(os.path.join(_dataset_root, 'cam01_panoptic_K.npy'))[0,0]], dtype=torch.float64),
     'cx': torch.tensor([960.0], dtype=torch.float64),
     'cy': torch.tensor([540.0], dtype=torch.float64),
     'k': torch.tensor([[[0.0],
              [ 0.0],
              [0.0]]], dtype=torch.float64),
     'p': torch.tensor([[[0.0],
              [ 0.0]]], dtype=torch.float64)
    }
    
    meta_2 = copy.copy(meta_empty)
    meta_2['image'] = [os.path.join(_dataset_root, 'cam02/%04d.png'%data_i)]
    meta_2['camera'] = {
    'R': torch.tensor(np.load(os.path.join(_dataset_root,\
                                           'cam02_panoptic_R.npy')),  dtype=torch.float64).reshape(1,3,3),
     'T': torch.tensor(np.load(os.path.join(_dataset_root,\
                                           'cam02_panoptic_T.npy')),  dtype=torch.float64).reshape(1,3,1),
     'fx': torch.tensor([np.load(os.path.join(_dataset_root, 'cam02_panoptic_K.npy'))[0,0]], dtype=torch.float64),
     'fy': torch.tensor([np.load(os.path.join(_dataset_root, 'cam02_panoptic_K.npy'))[0,0]], dtype=torch.float64),
     'cx': torch.tensor([960.0], dtype=torch.float64),
     'cy': torch.tensor([540.0], dtype=torch.float64),
     'k': torch.tensor([[[0.0],
              [ 0.0],
              [0.0]]], dtype=torch.float64),
     'p': torch.tensor([[[0.0],
              [ 0.0]]], dtype=torch.float64)
    }
    
    meta_3 = copy.copy(meta_empty)
    meta_3['image'] = [os.path.join(_dataset_root, 'cam03/%04d.png'%data_i)]
    meta_3['camera'] = {
    'R': torch.tensor(np.load(os.path.join(_dataset_root,\
                                           'cam03_panoptic_R.npy')),  dtype=torch.float64).reshape(1,3,3),
     'T': torch.tensor(np.load(os.path.join(_dataset_root,\
                                           'cam03_panoptic_T.npy')),  dtype=torch.float64).reshape(1,3,1),
     'fx': torch.tensor([np.load(os.path.join(_dataset_root, 'cam03_panoptic_K.npy'))[0,0]], dtype=torch.float64),
     'fy': torch.tensor([np.load(os.path.join(_dataset_root, 'cam03_panoptic_K.npy'))[0,0]], dtype=torch.float64),
     'cx': torch.tensor([960.0], dtype=torch.float64),
     'cy': torch.tensor([540.0], dtype=torch.float64),
     'k': torch.tensor([[[0.0],
              [ 0.0],
              [0.0]]], dtype=torch.float64),
     'p': torch.tensor([[[0.0],
              [ 0.0]]], dtype=torch.float64)
    }
    
    meta_4 = copy.copy(meta_empty)
    meta_4['image'] = [os.path.join(_dataset_root, 'cam04/%04d.png'%data_i)]
    meta_4['camera'] = {
    'R': torch.tensor(np.load(os.path.join(_dataset_root,\
                                           'cam04_panoptic_R.npy')),  dtype=torch.float64).reshape(1,3,3),
     'T': torch.tensor(np.load(os.path.join(_dataset_root,\
                                           'cam04_panoptic_T.npy')),  dtype=torch.float64).reshape(1,3,1),
     'fx': torch.tensor([np.load(os.path.join(_dataset_root, 'cam04_panoptic_K.npy'))[0,0]], dtype=torch.float64),
     'fy': torch.tensor([np.load(os.path.join(_dataset_root, 'cam04_panoptic_K.npy'))[0,0]], dtype=torch.float64),
     'cx': torch.tensor([960.0], dtype=torch.float64),
     'cy': torch.tensor([540.0], dtype=torch.float64),
     'k': torch.tensor([[[0.0],
              [ 0.0],
              [0.0]]], dtype=torch.float64),
     'p': torch.tensor([[[0.0],
              [ 0.0]]], dtype=torch.float64)
    }
    
    
    _meta_new = []
    _meta_new.append(meta_0)
    _meta_new.append(meta_1)
    _meta_new.append(meta_2)
    _meta_new.append(meta_3)
    _meta_new.append(meta_4)
    
    return _meta_new


In [41]:
def make_inputs(_dataset_root, num_cam, _data_i):
    _inputs_new = []

    for i in range(num_cam):

        data_numpy = cv2.imread(os.path.join(_dataset_root, 'cam%02d/'%i+'%04d.png'%_data_i))
        data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB)
        height, width, _ = data_numpy.shape
        c = np.array([width / 2.0, height / 2.0])
        s = get_scale((width, height), [960,512])
        r = 0
        trans = get_affine_transform(c, s, r, [960,512])

        input = cv2.warpAffine(
                    data_numpy,
                    trans, (int(960), int(512)),
                    flags=cv2.INTER_LINEAR)
        input = transform(input)

        input = input.unsqueeze(0)

        _inputs_new.append(input)
        
    return _inputs_new

In [29]:
def min_zero_row(zero_mat, mark_zero):
	
	'''
	The function can be splitted into two steps:
	#1 The function is used to find the row which containing the fewest 0.
	#2 Select the zero number on the row, and then marked the element corresponding row and column as False
	'''

	#Find the row
	min_row = [99999, -1]

	for row_num in range(zero_mat.shape[0]): 
		if np.sum(zero_mat[row_num] == True) > 0 and min_row[0] > np.sum(zero_mat[row_num] == True):
			min_row = [np.sum(zero_mat[row_num] == True), row_num]

	# Marked the specific row and column as False
	zero_index = np.where(zero_mat[min_row[1]] == True)[0][0]
	mark_zero.append((min_row[1], zero_index))
	zero_mat[min_row[1], :] = False
	zero_mat[:, zero_index] = False

def mark_matrix(mat):

	'''
	Finding the returning possible solutions for LAP problem.
	'''

	#Transform the matrix to boolean matrix(0 = True, others = False)
	cur_mat = mat
	zero_bool_mat = (cur_mat == 0)
	zero_bool_mat_copy = zero_bool_mat.copy()

	#Recording possible answer positions by marked_zero
	marked_zero = []
	while (True in zero_bool_mat_copy):
		min_zero_row(zero_bool_mat_copy, marked_zero)
	
	#Recording the row and column positions seperately.
	marked_zero_row = []
	marked_zero_col = []
	for i in range(len(marked_zero)):
		marked_zero_row.append(marked_zero[i][0])
		marked_zero_col.append(marked_zero[i][1])

	#Step 2-2-1
	non_marked_row = list(set(range(cur_mat.shape[0])) - set(marked_zero_row))
	
	marked_cols = []
	check_switch = True
	while check_switch:
		check_switch = False
		for i in range(len(non_marked_row)):
			row_array = zero_bool_mat[non_marked_row[i], :]
			for j in range(row_array.shape[0]):
				#Step 2-2-2
				if row_array[j] == True and j not in marked_cols:
					#Step 2-2-3
					marked_cols.append(j)
					check_switch = True

		for row_num, col_num in marked_zero:
			#Step 2-2-4
			if row_num not in non_marked_row and col_num in marked_cols:
				#Step 2-2-5
				non_marked_row.append(row_num)
				check_switch = True
	#Step 2-2-6
	marked_rows = list(set(range(mat.shape[0])) - set(non_marked_row))

	return(marked_zero, marked_rows, marked_cols)

def adjust_matrix(mat, cover_rows, cover_cols):
	cur_mat = mat
	non_zero_element = []

	#Step 4-1
	for row in range(len(cur_mat)):
		if row not in cover_rows:
			for i in range(len(cur_mat[row])):
				if i not in cover_cols:
					non_zero_element.append(cur_mat[row][i])
	min_num = min(non_zero_element)

	#Step 4-2
	for row in range(len(cur_mat)):
		if row not in cover_rows:
			for i in range(len(cur_mat[row])):
				if i not in cover_cols:
					cur_mat[row, i] = cur_mat[row, i] - min_num
	#Step 4-3
	for row in range(len(cover_rows)):  
		for col in range(len(cover_cols)):
			cur_mat[cover_rows[row], cover_cols[col]] = cur_mat[cover_rows[row], cover_cols[col]] + min_num
	return cur_mat

def hungarian_algorithm(mat): 
	dim = mat.shape[0]
	cur_mat = mat

	#Step 1 - Every column and every row subtract its internal minimum
	for row_num in range(mat.shape[0]): 
		cur_mat[row_num] = cur_mat[row_num] - np.min(cur_mat[row_num])
	
	for col_num in range(mat.shape[1]): 
		cur_mat[:,col_num] = cur_mat[:,col_num] - np.min(cur_mat[:,col_num])
	zero_count = 0
	while zero_count < dim:
		#Step 2 & 3
		ans_pos, marked_rows, marked_cols = mark_matrix(cur_mat)
		zero_count = len(marked_rows) + len(marked_cols)

		if zero_count < dim:
			cur_mat = adjust_matrix(cur_mat, marked_rows, marked_cols)

	return ans_pos

def ans_calculation(mat, pos):
	total = 0
	ans_mat = np.zeros((mat.shape[0], mat.shape[1]))
	for i in range(len(pos)):
		total += mat[pos[i][0], pos[i][1]]
		ans_mat[pos[i][0], pos[i][1]] = mat[pos[i][0], pos[i][1]]
	return total, ans_mat

class voxelpose_tracker():
    def __init__(self, config):
        self.cfg = config
        self.pre_detection = torch.zeros([10, 15, 5])
        self.pre_num = 0
        self.is_tracking = False
        self.root_idx = self.cfg.DATASET.ROOTIDX
        
        self.remain_unmatched = []
        
#     def cost_matching(self, prev_pred, curr_pred):
#         # prev_pred, curr_pred : 10, 15, 5. 5: xyz ID conf
        
        
    def update(self, _one_pred):
        _detected_idx = _one_pred[:, 0, 3] != -1
        curr_num = sum(_detected_idx)
        
        if (not self.is_tracking) and (curr_num > 0):
            print('tracking started. initialize.')
            for ID in range(curr_num):
                _one_pred[ID, :, 3] = ID
            self.pre_detection = _one_pred
            self.pre_num = curr_num
            self.is_tracking = True
            return _one_pred
            
        if self.is_tracking and (self.pre_num == 0) and (curr_num == 0):
            print('tracking finished. idle...')
            self.is_tracking = False
            self.pre_detection = torch.zeros([10, 15, 5])
            return self.pre_detection
        
        # TRACKING
        if self.is_tracking:
            cost_martix = torch.zeros([curr_num, self.pre_num])
            for raw in range(curr_num):
                for col in range(self.pre_num):
                    curr_root_pos = _one_pred[raw, self.root_idx, :3]
                    prev_root_pos = self.pre_detection[col, self.root_idx, :3]
                    cost_martix[raw, col] = torch.norm(prev_root_pos - curr_root_pos)
                    
            cost_martix = cost_martix.detach().cpu().numpy()        
            
            print('pre_num is {}'.format(self.pre_num))
            print('curr_num is {}'.format(curr_num))
            
            
            if curr_num == self.pre_num:
                assert cost_martix.shape[0] == cost_martix.shape[1]
                ans_pos = hungarian_algorithm(cost_martix.copy())
                assert curr_num == len(ans_pos)
            elif curr_num > self.pre_num:
                print('curr_num > pre_num')
                deleted_pos = []
                padding = np.zeros([curr_num, curr_num - self.pre_num])
                cost_martix = np.concatenate((cost_martix, padding), axis=1)
                assert cost_martix.shape[0] == cost_martix.shape[1]
                ans_pos = hungarian_algorithm(cost_martix.copy())
                # [(3, 2), (0, 4), (1, 1), (2, 0), (4, 3)]
                # delete padded matching
                ans_pos = sorted(ans_pos, key=lambda x:x[1], reverse=False)
#                 for ans_i in range(curr_num, self.pre_num, -1):
                for ans_i in range(curr_num - self.pre_num):
                    deleted_pos.append(ans_pos.pop())
#                     del(ans_pos[ans_i-1])
                assert self.pre_num == len(ans_pos)

            for matching in ans_pos:
                (cur_id, prev_id) = matching
                _one_pred[cur_id, :, 3] = self.pre_detection[prev_id, :, 3]
                
            for unmatched in deleted_pos:
                (cur_id, prev_id) = unmatched
                _one_unmatched = copy.deepcopy(_one_pred[cur_id, :, :])
                _one_pred[cur_id, :, :] = -1.
                self.remain_unmatched.append(_one_unmatched)
                
            # NEED SORT ? maybe no...
            self.pre_detection = _one_pred
            self.pre_num = curr_num
            
        
        return _one_pred

In [30]:
batch_size = 1

In [31]:
# pixel std is 200.0
x = 0
y = 0
w = 960
h = 512

w = 1920
h = 1080

# w = 512
# h = 512

# input_size = cfg.data_cfg['image_size']
# aspect_ratio = input_size[0] / input_size[1]
# aspect_ratio = 288 / 384
center = np.array([x + w * 0.5, y + h * 0.5], dtype=np.float32)

# if w > aspect_ratio * h:
#     h = w * 1.0 / aspect_ratio
# elif w < aspect_ratio * h:
#     w = h * aspect_ratio

scale = np.array([w / 200.0, h / 200.0], dtype=np.float32)
# scale = scale * 1.25

# scale = np.array([max(w / 200.0, h / 200.0), max(w / 200.0, h / 200.0)], dtype=np.float32)

batch_scale = np.zeros((batch_size, 2))
batch_center = np.zeros((batch_size, 2))
batch_scale[:] = scale
batch_center[:] = center

In [32]:
VoxelTracker = voxelpose_tracker(config)

In [34]:
import time

In [42]:
dataset_root = '/datasets/synthetic_convini_real'
os.makedirs(os.path.join(dataset_root, 'debug'), exist_ok=True)

# FPS CHECK
start_time = time.time()
display_time = 1.0
counter = 0

with torch.no_grad():
    for data_i in range(180):
        meta_new = make_meta(dataset_root, data_i)
        num_view = len(meta_new)
        inputs_new = make_inputs(dataset_root, num_view, data_i)

        counter += 1
        if time.time() - start_time > display_time:
            print("FPS over time {} sec is {}".format(display_time, counter/display_time))
            counter = 0
            start_time = time.time()


FPS over time 1.0 sec is 5.0
FPS over time 1.0 sec is 5.0
FPS over time 1.0 sec is 5.0
FPS over time 1.0 sec is 5.0
FPS over time 1.0 sec is 5.0
FPS over time 1.0 sec is 5.0
FPS over time 1.0 sec is 5.0
FPS over time 1.0 sec is 5.0


KeyboardInterrupt: 

In [131]:
dataset_root = '/datasets/synthetic_convini_real'

os.makedirs(os.path.join(dataset_root, 'debug'), exist_ok=True)

with torch.no_grad():
    
    for data_i in range(180):
        print(data_i)
        meta_new = make_meta(dataset_root, data_i)
        num_view = len(meta_new)
        inputs_new = make_inputs(dataset_root, num_view, data_i)
    
        preds, heatmaps, grid_centers, _, _, _ = model(views=inputs_new, meta=meta_new, targets_2d=None,
                                                      weights_2d=None, targets_3d=None)
        
        n_b = preds.shape[0]

        for _b in range(n_b):
            one_pred = preds[_b]
            current_tracking = VoxelTracker.update(one_pred)
            current_tracking = current_tracking.detach().cpu().numpy()
#             print(current_tracking[:,0,3])

        target_batch_idx = 0
        preds_np = preds.detach().cpu().numpy()
        one_pred = preds_np[target_batch_idx]
        detected_idx = one_pred[:, 0, 3] != -1
        num_detected = sum(detected_idx)
        print('num_detected is : {}'.format(num_detected))

        pred_joint_color = [0, 0, 255]
        
        pred_joint_color = [[0, 0, 255],
                            [0, 255, 0],
                            [255, 0, 0],
                            [0, 255, 255],
                            [255, 0, 255],
                            [255, 255, 0],
                            [255, 255, 255],
                            [0, 0, 122],
                            [0, 122, 0],
                            [122, 0, 0]]
        

        output_holder = []
        heatmap_holder = []
        
        for camera_i in range(len(inputs_new)):
            image_cam = cv2.imread(meta_new[camera_i]['image'][0])
            for cand in range(num_detected):
                joint_3d = one_pred[cand, :, :3]
                person_ID = int(current_tracking[cand,0,3])

                ones = np.ones((joint_3d.shape[0],1))
                joint_3d_homo = np.concatenate((joint_3d, ones), axis=1)

                P = get_P(meta_new, camera_i, target_batch_idx) # meta, cam_idx, batch_idx , [3,4]

                projected = np.matmul(P, joint_3d_homo.T).T
                projected /= projected[:,2:3]

                for joint in range(joint_3d.shape[0]):
                    image_cam = cv2.circle(image_cam, (int(projected[joint,0]),int(projected[joint,1])), 0, pred_joint_color[person_ID], 15)
                    

            reds, maxvals = keypoints_from_heatmaps(heatmaps[camera_i].detach().cpu().numpy(), center=batch_center, scale=batch_scale)
            heatmap_fig = np.sum(heatmaps[camera_i][0].detach().cpu().numpy(), axis=0)
            backtorgb = cv2.applyColorMap(np.uint8(255 * heatmap_fig), cv2.COLORMAP_JET)
            
            
            image_cam = cv2.resize(image_cam, (480, 256))
            
            output_holder.append(image_cam)
            heatmap_holder.append(backtorgb)

        hconcat_rgb = cv2.hconcat(output_holder)
        hconcat_heatmap = cv2.hconcat(heatmap_holder)
        
        concat = cv2.vconcat([hconcat_rgb, hconcat_heatmap])
        
        cv2.imwrite(os.path.join(dataset_root, 'debug', '%04d.png'%data_i), concat)
        output_holder = []


0
pre_num is 4
curr_num is 5
curr_num > pre_num
num_detected is : 5
1
pre_num is 5
curr_num is 5
num_detected is : 5
2
pre_num is 5
curr_num is 5
num_detected is : 5
3
pre_num is 5
curr_num is 5
num_detected is : 5
4
pre_num is 5
curr_num is 5
num_detected is : 5
5
pre_num is 5
curr_num is 5
num_detected is : 5
6


KeyboardInterrupt: 

In [134]:
one_pred = copy.deepcopy(preds[0])

In [135]:
five_detected_pred = copy.deepcopy(one_pred)

In [136]:
one_pred[4,:,:] = one_pred[5,:,:]

In [137]:
four_detected_pred = copy.deepcopy(one_pred)

In [152]:
VoxelTracker = voxelpose_tracker(config)

In [153]:
current_tracking = VoxelTracker.update(four_detected_pred)
current_tracking = current_tracking.detach().cpu().numpy()

tracking started. initialize.


In [154]:
current_tracking = VoxelTracker.update(five_detected_pred)
current_tracking = current_tracking.detach().cpu().numpy()

pre_num is 4
curr_num is 5
curr_num > pre_num


In [155]:
current_tracking

array([[[ 7.89994202e+02,  8.13744690e+02,  1.49437842e+03,
          0.00000000e+00,  9.76226926e-01],
        [ 8.01947876e+02,  9.87886658e+02,  1.71987512e+03,
          0.00000000e+00,  9.76226926e-01],
        [ 8.33619507e+02,  8.10159058e+02,  9.62360596e+02,
          0.00000000e+00,  9.76226926e-01],
        [ 6.47752930e+02,  7.99960938e+02,  1.49519043e+03,
          0.00000000e+00,  9.76226926e-01],
        [ 5.45711975e+02,  7.60932556e+02,  1.21508044e+03,
          0.00000000e+00,  9.76226926e-01],
        [ 5.04735962e+02,  7.98648560e+02,  9.62696289e+02,
          0.00000000e+00,  9.76226926e-01],
        [ 7.23757874e+02,  8.32960571e+02,  9.63841553e+02,
          0.00000000e+00,  9.76226926e-01],
        [ 6.72063782e+02,  1.01198376e+03,  5.97239990e+02,
          0.00000000e+00,  9.76226926e-01],
        [ 6.56512573e+02,  1.01455273e+03,  1.99699234e+02,
          0.00000000e+00,  9.76226926e-01],
        [ 9.85145081e+02,  8.19770874e+02,  1.49284949e+03,
    

In [156]:
detected_idx = current_tracking[:, 0, 3] != -1
num_detected = sum(detected_idx)
print('num_detected is : {}'.format(num_detected))

num_detected is : 4


In [34]:
'''Hungarian Algorithm: 
Finding the minimum value in linear assignment problem.
Therefore, we can find the minimum value set in net matrix 
by using Hungarian Algorithm. In other words, the maximum value
and elements set in cost matrix are available.'''

#The matrix who you want to find the minimum sum
cost_matrix = np.array([[7, 6, 2, 9, 2],
            [6, 2, 1, 3, 9],
            [5, 6, 8, 9, 5],
            [6, 8, 5, 8, 6],
            [9, 5, 6, 4, 7]])
ans_pos = hungarian_algorithm(cost_matrix.copy())#Get the element position.
ans, ans_mat = ans_calculation(cost_matrix, ans_pos)#Get the minimum or maximum value and corresponding matrix.

#Show the result
print(f"Linear Assignment problem result: {ans:.0f}\n{ans_mat}")
print(ans_pos)

#If you want to find the maximum value, using the code as follows: 
#Using maximum value in the cost_matrix and cost_matrix to get net_matrix
profit_matrix = np.array([[7, 6, 2, 9, 2],
            [6, 2, 1, 3, 9],
            [5, 6, 8, 9, 5],
            [6, 8, 5, 8, 6],
            [9, 5, 6, 4, 7]])
max_value = np.max(profit_matrix)
cost_matrix = max_value - profit_matrix
ans_pos = hungarian_algorithm(cost_matrix.copy())#Get the element position.
ans, ans_mat = ans_calculation(profit_matrix, ans_pos)#Get the minimum or maximum value and corresponding matrix.
#Show the result
print(f"Linear Assignment problem result: {ans:.0f}\n{ans_mat}")
print(ans_pos)

Linear Assignment problem result: 18
[[0. 0. 0. 0. 2.]
 [0. 2. 0. 0. 0.]
 [5. 0. 0. 0. 0.]
 [0. 0. 5. 0. 0.]
 [0. 0. 0. 4. 0.]]
[(3, 2), (0, 4), (1, 1), (2, 0), (4, 3)]
Linear Assignment problem result: 43
[[0. 0. 0. 9. 0.]
 [0. 0. 0. 0. 9.]
 [0. 0. 8. 0. 0.]
 [0. 8. 0. 0. 0.]
 [9. 0. 0. 0. 0.]]
[(0, 3), (1, 4), (2, 2), (3, 1), (4, 0)]


In [None]:
is_tracking_started = False
ID_storage = None
previous_num_detected = 0

In [None]:
num_batch = preds.shape[0]

for _b in range(num_batch):
    one_pred = preds[_b]
    detected_idx = one_pred[:, 0, 3] != -1
    num_detected = sum(detected_idx)
    
    # INITIALIZING DETECTION
    if not is_tracking_started and (num_detected > 0):
        ID_storage = one_pred
        for ID in range(num_detected):
            ID_storage[ID, :, 3] = ID
        is_tracking_started = True
        previous_num_detected = num_detected
        continue
    
    if previous_num_detected != num_detected:
        raise ValueError('does not matched!')
    else:
        
        
    

In [None]:
# pixel std is 200.0
x = 0
y = 0
w = 960
h = 512

w = 1920
h = 1080

# w = 512
# h = 512

# input_size = cfg.data_cfg['image_size']
# aspect_ratio = input_size[0] / input_size[1]
# aspect_ratio = 288 / 384
center = np.array([x + w * 0.5, y + h * 0.5], dtype=np.float32)

# if w > aspect_ratio * h:
#     h = w * 1.0 / aspect_ratio
# elif w < aspect_ratio * h:
#     w = h * aspect_ratio

scale = np.array([w / 200.0, h / 200.0], dtype=np.float32)
# scale = scale * 1.25

# scale = np.array([max(w / 200.0, h / 200.0), max(w / 200.0, h / 200.0)], dtype=np.float32)


In [None]:
batch_scale = np.zeros((heatmaps[0].shape[0], 2))
batch_center = np.zeros((heatmaps[0].shape[0], 2))
batch_scale[:] = scale
batch_center[:] = center

In [None]:
reds, maxvals = keypoints_from_heatmaps(heatmaps[camera_i].detach().cpu().numpy(), center=batch_center, scale=batch_scale)

In [None]:
meta_new[camera_i]['image']

In [None]:
image_cam_0 = cv2.imread(meta_new[camera_i]['image'][0])

In [None]:
# i = 0
# image_cam_0 = cv2.imread('/datasets/synthetic_convini/cam%02d/0000.png'%i)

In [None]:
for j in range(reds.shape[1]):
    image_cam_0 = cv2.circle(image_cam_0, (int(reds[0,j,0]),int(reds[0,j,1])), 0, [0, 0, 255], 10)


In [None]:
fig=plt.figure(figsize=(12,8), dpi= 100, facecolor='w', edgecolor='k')
plt.imshow(image_cam_0[:,:,::-1])
# print(image_cam_0.shape)

In [None]:
image_cam_0.shape

In [None]:
heatmaps[camera_i][0].shape

In [None]:
heatmap_fig = np.sum(heatmaps[camera_i][0].detach().cpu().numpy(), axis=0)
heatmap_fig.shape

In [None]:
fig=plt.figure(figsize=(12,8), dpi= 100, facecolor='w', edgecolor='k')
plt.imshow(heatmap_fig)

In [None]:
preds

In [None]:
def get_P(meta, _cam_idx, _target_batch_idx):
    R = meta[_cam_idx]['camera']['R'][_target_batch_idx].numpy()
    T = meta[_cam_idx]['camera']['T'][_target_batch_idx].numpy()
    fx = meta[_cam_idx]['camera']['fx'][_target_batch_idx].numpy()
    fy = meta[_cam_idx]['camera']['fy'][_target_batch_idx].numpy()
    cx = meta[_cam_idx]['camera']['cx'][_target_batch_idx].numpy()
    cy = meta[_cam_idx]['camera']['cy'][_target_batch_idx].numpy()
    K = np.array([[fx, 0, cx],
              [0, fy, cy],
              [0, 0, 1]])
    
    T_cam = -np.matmul(R, T)
    
    RT = np.concatenate((R,T_cam),axis=1)
    P = np.matmul(K, RT)
    return P

In [None]:
import math
import matplotlib.pyplot as plt
import cv2
import numpy as np
%matplotlib inline

In [None]:
target_batch_idx = 0
camera_idx = camera_i

In [None]:
preds_np = preds.detach().cpu().numpy()
one_pred = preds_np[target_batch_idx]
detected_idx = one_pred[:, 0, 3] == 0
num_detected = sum(detected_idx)
print('num_detected is : {}'.format(num_detected))

In [None]:
pred_joint_color = [0, 0, 255]
GT_joint_color = [0, 255, 0]

image_cam = cv2.imread('/datasets/synthetic_convini/cam%02d/'%camera_i+'%04d.png'%data_i)

for cand in range(num_detected):
    joint_3d = one_pred[cand, :, :3]
#     joint_3d /= 1000.
    
    ones = np.ones((joint_3d.shape[0],1))
    joint_3d_homo = np.concatenate((joint_3d, ones), axis=1)
    
    P = get_P(meta_new, camera_idx, target_batch_idx) # meta, cam_idx, batch_idx , [3,4]
    
    projected = np.matmul(P, joint_3d_homo.T).T
    projected /= projected[:,2:3]

    for joint in range(joint_3d.shape[0]):
        image_cam = cv2.circle(image_cam, (int(projected[joint,0]),int(projected[joint,1])), 0, pred_joint_color, 10)
#         image_cam = cv2.circle(image_cam, (int(projected_GT[joint,0]),int(projected_GT[joint,1])), 0, GT_joint_color, 10)

    
    
    

In [None]:
fig=plt.figure(figsize=(12,8), dpi= 100, facecolor='w', edgecolor='k')
plt.imshow(image_cam[:,:,::-1])

In [None]:
# !pip install matplotlib==3.3

In [None]:
print(config.MULTI_PERSON.SPACE_SIZE)
print(config.MULTI_PERSON.SPACE_CENTER)

In [None]:
# %matplotlib notebook

In [None]:
%matplotlib inline

In [None]:
# panoptic
LIMBS15 = [[0, 1], [0, 2], [0, 3], [3, 4], [4, 5], [0, 9], [9, 10],
         [10, 11], [2, 6], [2, 12], [6, 7], [7, 8], [12, 13], [13, 14]]

In [None]:
# preds = preds.cpu().numpy()
batch_size = meta_new[0]['num_person'].shape[0]
xplot = min(4, batch_size)
yplot = int(math.ceil(float(batch_size) / xplot))

width = 4.0 * xplot * 2
height = 4.0 * yplot
fig = plt.figure(0, figsize=(width, height))
plt.subplots_adjust(left=0.05, right=0.95, bottom=0.05,
                    top=0.95, wspace=0.05, hspace=0.15)


space_size = config.MULTI_PERSON.SPACE_SIZE
sapce_center = config.MULTI_PERSON.SPACE_CENTER


for i in range(batch_size):
    num_person = num_detected
#     joints_3d = meta[0]['joints_3d'][i]
#     joints_3d_vis = meta[0]['joints_3d_vis'][i]
    ax = plt.subplot(yplot, xplot, i + 1, projection='3d')
    ax.set_box_aspect(aspect = (2,2,1))
    ax.set_xlim([int(sapce_center[0] - space_size[0]/2), int(sapce_center[0] + space_size[0]/2)])
    ax.set_ylim([int(sapce_center[1] - space_size[1]/2), int(sapce_center[1] + space_size[1]/2)])
    ax.set_zlim([int(sapce_center[2] - space_size[2]/2), int(sapce_center[2] + space_size[2]/2)])
    
#     for n in range(num_person):
#         joint = joints_3d[n]
#         joint_vis = joints_3d_vis[n]
#         for k in eval("LIMBS{}".format(len(joint))):
#             if joint_vis[k[0], 0] and joint_vis[k[1], 0]:
#                 x = [float(joint[k[0], 0]), float(joint[k[1], 0])]
#                 y = [float(joint[k[0], 1]), float(joint[k[1], 1])]
#                 z = [float(joint[k[0], 2]), float(joint[k[1], 2])]
#                 ax.plot(x, y, z, c='r', lw=1.5, marker='o', markerfacecolor='w', markersize=2,
#                         markeredgewidth=1)
#             else:
#                 x = [float(joint[k[0], 0]), float(joint[k[1], 0])]
#                 y = [float(joint[k[0], 1]), float(joint[k[1], 1])]
#                 z = [float(joint[k[0], 2]), float(joint[k[1], 2])]
#                 ax.plot(x, y, z, c='r', ls='--', lw=1.5, marker='o', markerfacecolor='w', markersize=2,
#                         markeredgewidth=1)

    colors = ['b', 'g', 'c', 'y', 'm', 'orange', 'pink', 'royalblue', 'lightgreen', 'gold']
    if preds is not None:
        pred = preds[i]
        for n in range(len(pred)):
            joint = pred[n]
            if joint[0, 3] >= 0:
                for k in eval("LIMBS{}".format(len(joint))):
                    x = [float(joint[k[0], 0]), float(joint[k[1], 0])]
                    y = [float(joint[k[0], 1]), float(joint[k[1], 1])]
                    z = [float(joint[k[0], 2]), float(joint[k[1], 2])]
                    ax.plot(x, y, z, c=colors[int(n % 10)], lw=1.5, marker='o', markerfacecolor='w', markersize=2,
                            markeredgewidth=1)
                                      



In [None]:

# EXPERIMENT that : revealed the pre-trained resnet50 has different final_layer dimension.
# so, final_layer parameter included training optimizer. good.

# pretrained = '/workspace/voxelpose-pytorch/models/pose_resnet50_panoptic.pth.tar'
# if os.path.isfile(pretrained):
#     pretrained_state_dict = torch.load(pretrained)
    
# pretrained_state_list = []
# for param_tensor in pretrained_state_dict:
#     pretrained_state_list.append(param_tensor)
#     print(param_tensor, "\t", pretrained_state_dict[param_tensor].size())
    
# model_state_dict = model.module.backbone.state_dict()

# model_state_dict_list = []
# for param_tensor in model_state_dict:
#     model_state_dict_list.append(param_tensor)
#     print(param_tensor, "\t", model_state_dict[param_tensor].size())
    
# print(len(pretrained_state_list))
# print(len(model_state_dict_list))

# for i in range(338):

#     if pretrained_state_list[i] != model_state_dict_list[i]:
#         print(pretrained_state_list[i])
#         print(model_state_dict_list[i])
        
#     if model_state_dict[pretrained_state_list[i]].size() != pretrained_state_dict[pretrained_state_list[i]].size():
#         print(pretrained_state_list[i])
#         print(pretrained_state_dict[pretrained_state_list[i]].size())
#         print(model_state_dict[pretrained_state_list[i]].size())
        
# model_state_dict[pretrained_state_list[0]].size()
# pretrained_state_dict[pretrained_state_list[0]].size()

# for k, v in pretrained_state_dict.items():
#     if "final_layer" in k:
#         pretrained_state_dict[k] = torch.zeros_like(model_state_dict[k])
# model.module.backbone.load_state_dict(pretrained_state_dict, strict=False)

# for name, param in model.module.backbone.final_layer.named_parameters():
#     print(name)