In [1]:
import os
import sys
import torch
import time
import numpy as np
import cv2
# from ortools.sat.python import cp_model

In [2]:
sys.path.append("../0.mc_utils/")

In [3]:
from openpose.op_formats import NPMapOP25bToSMPL24
from openpose.op_utils import draw_kp2d_to_image,draw_reided_bboxes
from common.mincostflow import MinCostFlowReID
from common.trajectory_solver import MC3DTrajSVDBased

from pipeline.mview_x2ds_to_A import SingleX2dsReID,HMRWrapper
from pipeline.global_cfg import CFG_SINGLE
from mchmr2.hmr_cfg import HMR_ENCODER
from mchmr2.hmr_encoder import HMREncoder

from mqtt_player.unity3d_mqtt_player import Unity3DMqttPlayer,MQTTPLAYER_CFG

In [23]:
##main view
op25b_x2ds0 = np.load("E:/4.test_videos/yunpeng_mview/01/video_0.npz",allow_pickle=True)["op25b"].item()
video0_fname = "E:/4.test_videos/yunpeng_mview/01/video_0.mp4"

##sub view
op25b_x2ds1 = np.load("E:/4.test_videos/yunpeng_mview/01/video_1.npz",allow_pickle=True)["op25b"].item()
video1_fname = "E:/4.test_videos/yunpeng_mview/01/video_1.mp4"

In [6]:
class HMRCrossviewPairing:
    def __init__(self):
        self.km = MinCostFlowReID()
        
    def __call__(self,f0,f1):
        """
        maping f0 to f1
        fx is NxD
        """
        M = f0.shape[0]
        N = f1.shape[0]
        
        ##step1. extand features
        new_f0 = f0.unsqueeze(1).repeat(1,N,1)#MxNxD
        new_f1 = f1.unsqueeze(0).repeat(M,1,1)#MxNxD
        
        ##step3.KM assign
        cost_mat = torch.norm(new_f0-new_f1,dim=-1)
        assign = self.km(cost_mat.cpu().numpy().astype(np.int))
        return assign

In [7]:
single_hmr_dict = {}
for k in range(2):
    single_hmr_dict[k] = SingleX2dsReID(CFG_SINGLE)
hmr_simple = HMRWrapper()
crossview_matching = HMRCrossviewPairing()
map_op25b_to_smpl24 = NPMapOP25bToSMPL24()

>>>Loaded HMR from:../12.models/mchmr2/20200516-hmrzero-b4f2048s10-vposer0420.pth


In [18]:
##traj mask related
RZ,_ = cv2.Rodrigues(np.array([0,0,np.pi],dtype=np.float32))
RY,_ = cv2.Rodrigues(np.array([0,np.pi,0],dtype=np.float32))
camera_to_world_R = np.eye(4)
camera_to_world_R[:3,:3]= np.matmul(RY,RZ)

traj_solver = MC3DTrajSVDBased([2200,2200,960,540])
traj_mask = np.array([[-1,1,1]])
traj_offset = np.array(CFG_SINGLE.traj_offset).reshape(1,3)

In [9]:
mview_cfg = HMR_ENCODER.clone()
mview_cfg.hmr.model = "../12.models/mchmr2/20200528-hmrzeofusion-x2b5f2048s5-vposer0420.pth"
mview_cfg.hmr.in_features=96
mview_cfg.hmr.out_features=6+32+5
mview_cfg.hmr.blocks=5
mview_cfg.freeze()
print(mview_cfg.hmr)
hmr_mview = HMREncoder(mview_cfg)

block_features: 2048
blocks: 5
final_features: 512
in_features: 96
model: ../12.models/mchmr2/20200528-hmrzeofusion-x2b5f2048s5-vposer0420.pth
out_features: 43
>>>Loaded HMR from:../12.models/mchmr2/20200528-hmrzeofusion-x2b5f2048s5-vposer0420.pth
>>>Loaded VPoser from:../12.models/mchmr2/20200420-vposer1024-lt32b3.pth


In [13]:
def compute_trajectory(x2ds,root3ds):
    """
    x2ds shape is Nx24x2,
        unit in pixel,
        numpy object
    root3ds shape is NxJx3, 
        unit in meter,
        numpy object
    """
    assert x2ds.ndim==3 and x2ds.shape[1:]==(24,2)
    assert root3ds.ndim==3 and root3ds.shape[1:]==(24,3)

    N = x2ds.shape[0]
    traj_list = []
    mask = (np.abs(x2ds).sum(-1)>0)#Nx24
    target_index = {1,2,16,17}
    for i in range(N):
        select_index = np.nonzero(mask[i])[0]#only available joints are used for traj computing
        select_index = list(set.intersection(set(select_index),target_index))
        if len(select_index)<2:
            traj = np.array([0.,0.,0.])
        else:
            traj = traj_solver(x2ds[i,select_index],#in pixel unit
                               root3ds[i,select_index])#trajs now in mm unit
        traj_list.append(traj)
    return np.array(traj_list)

In [21]:
mainview_camera = 0
subview_camera =1
main_x2ds_select = [0,1,2,16,17]
def mview_x2ds_to_A(index,x2ds_dict):
    """
    x2ds_dict: key is camera name, value is Mx24x2
    """
    #step1.singleview forward
    N = x2ds_dict[mainview_camera].shape[0]
    main_hids = None
    reid_x2ds_dict = {}
    reid_slice_dict = {}
    counter = 0
    for k in x2ds_dict:
        if k not in single_hmr_dict:
            print(">>>Fuck!{} not created?".format(k))
        ret = single_hmr_dict[k].push(index,x2ds_dict[k])##x2ds and hids
        if k==mainview_camera:
            main_hids = ret["hids"]
        reid_x2ds_dict[k]=ret["x2ds"]
        reid_slice_dict[k] = slice(counter,counter+ret["x2ds"].shape[0])
        counter += ret["x2ds"].shape[0]
    ##hmr simple forward
    hmr_ret = hmr_simple(torch.cat(list(reid_x2ds_dict.values())))
        
    #step2.cross matching
    f0 = hmr_ret["y"][reid_slice_dict[mainview_camera]]#Nx32
    f1 = hmr_ret["y"][reid_slice_dict[subview_camera]]#Nx32
    cross_assign = crossview_matching(f0,f1)

    #step3.match x2ds
    match_x2ds_list = []
    unmatch_x2ds_list = []
    match_hids_list = []
    unmatch_hids_list= []
    for i in range(N):
        x0 = reid_x2ds_dict[mainview_camera][i]
        hid = main_hids[i]
        if i in cross_assign:
            mid = cross_assign[i][0] # matched index in subview
            x1 = reid_x2ds_dict[subview_camera][mid]#24x2
            match_x2ds_list.append(torch.stack([x0,x1]))
            match_hids_list.append(hid)
        else:
            unmatch_x2ds_list.append(x0)
            unmatch_hids_list.append(hid)
    ##matched
    match_x2ds = None
    match_hids = None
    if len(match_x2ds_list)>0:
        match_x2ds = torch.stack(match_x2ds_list)
        match_hids = np.array(match_hids_list)
    ##unmatched
    unmatch_x2ds = None
    unmatch_hids = None
    if len(unmatch_x2ds_list)>0:
        unmatch_x2ds = torch.stack(unmatch_x2ds_list)
        unmatch_hids = np.array(unmatch_hids_list)
    return {"matched_x2ds":match_x2ds, ##NxCx24x2
            "matched_hids":match_hids,
            "unmatched_x2ds":unmatch_x2ds, ##Nx24x2
            "unmatched_hids":unmatch_hids}

In [24]:
window_name = "reID window"
cv2.namedWindow(window_name)
## video1
cap0 = cv2.VideoCapture(video0_fname)
h0 = int(cap0.get(cv2.CAP_PROP_FRAME_HEIGHT))
w0 = int(cap0.get(cv2.CAP_PROP_FRAME_WIDTH))
## video2
cap1 = cv2.VideoCapture(video1_fname)
h1 = int(cap1.get(cv2.CAP_PROP_FRAME_HEIGHT))
w1 = int(cap1.get(cv2.CAP_PROP_FRAME_WIDTH))

height = h0+h1
width = max(w0 , w1)
canvas = np.zeros((height, width, 3),dtype=np.uint8)
mqtt_player = Unity3DMqttPlayer(MQTTPLAYER_CFG)

for i in range(1000):
    is_valid0,frame0 = cap0.read()
    is_valid1,frame1 = cap1.read()
    if not is_valid0 or not is_valid1:
        break
        
    #step1.load data from each view
    if i in op25b_x2ds0 and i in op25b_x2ds1:
        ts = time.time()
        ##step1.load x2ds
        x2ds0 = map_op25b_to_smpl24(op25b_x2ds0[i])[...,:2] #Nx24x3
        x2ds1 = map_op25b_to_smpl24(op25b_x2ds1[i])[...,:2] #Mx24x3
        
        ##step2.assemble mview dict
        assign_ret = mview_x2ds_to_A(i,{0:x2ds0,1:x2ds1})
        ts = time.time()-ts
#         print(ts)
        matched_x2ds = assign_ret["matched_x2ds"]
        matched_hids = assign_ret["matched_hids"]
#         print(matched_x2ds.shape)
        N = matched_x2ds.shape[0]
        for i in range(N):
            draw_kp2d_to_image(frame0,matched_x2ds[i,0],color=(255,0,0))
            draw_reided_bboxes(frame0,matched_x2ds[i,0].numpy(),matched_hids[i])
            draw_kp2d_to_image(frame1,matched_x2ds[i,1],color=(255,0,0))
            draw_reided_bboxes(frame1,matched_x2ds[i,1].numpy(),matched_hids[i])
        
        ##step3.fusion
#         print(matched_x2ds.shape)
        ret = hmr_mview.forward_hmr(matched_x2ds)
        A = hmr_mview.forward_smpl(ret["root_r6d"],ret["pose_r6d"])
        A = A.cpu().numpy()
        
        ##step4.from camera space A to world space in Unity3D
        np_in_x2ds = matched_x2ds[:,mainview_camera].numpy()
        trajs = compute_trajectory(np_in_x2ds,A[...,:3,-1])*traj_mask + traj_offset
#         for i in range(N):
#             trajs[i,[0,2]] = ret_reid["humans"][i].traj_filter(index,trajs[i,[0,2]])
#             A[i,:,:3,-1] += trajs[i].reshape(1,3)  #update traj in A
        A[...,:3,-1] += trajs.reshape(-1,1,3)
        A = np.matmul(camera_to_world_R,A)
        
        mqtt_player(matched_hids,A.reshape(-1,384))
    
    canvas[:h0] = frame0
    canvas[h0:] = frame1
    new_canvas = cv2.resize(canvas,(800,900))
    cv2.imshow(window_name,new_canvas)
    if cv2.waitKey(1) & 0xFF==ord('q'):
        break
cv2.destroyAllWindows()