In [2]:
import os, sys
os.environ['CUDA_VISIBLE_DEVICES'] = '2,3'
from pathlib import Path
from tqdm import tqdm
import numpy as np
import cv2
from PIL import Image
import lovely_tensors as lt
lt.monkey_patch()
import imageio.v3 as iio
import torch

from hmr4d.dataset.pure_motion.amass import AmassDataset #52,788 samples
from hmr4d.dataset.pure_motion.utils import augment_betas, interpolate_smpl_params, rotate_around_axis
from hmr4d.dataset.pure_motion.cam_traj_utils import CameraAugmentorV11
from hmr4d.utils.body_model import BodyModelSMPLH, BodyModelSMPLX
from hmr4d.utils.body_model.smplx_lite import SmplxLiteSmplN24
from hmr4d.utils.geo.hmr_global import get_c_rootparam, get_R_c2gv, get_tgtcoord_rootparam, get_T_w2c_from_wcparams
from hmr4d.utils.geo.hmr_cam import create_camera_sensor
from hmr4d.utils.geo_transform import compute_cam_angvel, apply_T_on_points, move_to_start_point_face_z
from hmr4d.utils.net_utils import get_valid_mask
from hmr4d.utils.wis3d_utils import convert_motion_as_line_mesh
from hmr4d.utils.video_io_utils import save_video, get_writer
from hmr4d.utils.vis.renderer import Renderer, get_global_cameras_static, get_ground_params_from_points
from hmr4d.utils.vis.renderer_utils import simple_render_mesh

# [12/23 15:45:21][INFO] [AMASS] 18086 sequences. Elapsed: 2.35s
# [12/23 15:45:21][INFO] [AMASS] has 64.7 hours motion -> Resampled to 52788 samples.
# [12/23 15:40:11][INFO] [BEDLAM] 37537 sequences. 
# [12/23 15:40:24][INFO] [H36M] 600 sequences. Elapsed: 0.61s
# [12/23 15:40:25][INFO] [H36M] has 8.7 hours motion -> Resampled to 6196 samples. 
# [12/23 15:46:37][INFO] [3DPW] has 7.5 minutes motion -> Resampled to 88 samples. 

device = 'cuda:0'

In [3]:
smpl = BodyModelSMPLH(
    model_path="inputs/checkpoints/body_models", model_type="smpl",
    gender="neutral", num_betas=10, create_body_pose=False, 
    create_betas=False, create_global_orient=False, create_transl=False,
).to(device)
smplx = BodyModelSMPLX(
    model_path="inputs/checkpoints/body_models", model_type="smplx",
    gender="neutral", num_pca_comps=12, flat_hand_mean=False,
).to(device)
smplx2smpl = torch.load("hmr4d/utils/body_model/smplx2smpl_sparse.pt").to(device)
faces_smpl = torch.from_numpy((smpl.faces).astype("int")).unsqueeze(0).to(device)
faces_smplx = torch.from_numpy((smplx.faces).astype("int")).unsqueeze(0).to(device)
J_regressor = torch.load("hmr4d/utils/body_model/smpl_neutral_J_regressor.pt").to(device)

smplx_lite = SmplxLiteSmplN24()

In [4]:
# Sample Batch from Dataset
dataset = AmassDataset()

np.random.seed(4)
batch = dataset[5]
print(batch['meta'])

length = batch['length']
K = batch['K_fullimg'][0].to(device)
width, height = int(K[0,2])*2, int(K[1,2])*2

[[36m01/01 11:13:54[0m][[32mINFO[0m] [AMASS] Loading from inputs/AMASS/hmr4d_support/smplxpose_v2.pth ...[0m
[[36m01/01 11:13:56[0m][[32mINFO[0m] [AMASS] 18086 sequences. Elapsed: 2.88s[0m
[[36m01/01 11:13:56[0m][[32mINFO[0m] [AMASS] has 64.7 hours motion -> Resampled to 52788 samples.[0m


{'data_name': 'amass', 'idx': 5, 'vid': 'inputs/smplx_amass/smplxn_raw/Transitions/Transitions/mazen_c3d/crawl_push_stageii.npz', 'start_end': (129, 255)}


In [5]:
# Render Test -- Camera, SMPL-X
smpl_params_c = {k:v.to(device) for k,v in batch['smpl_params_c'].items()}
verts = smplx(**smpl_params_c).vertices

renderer_c = Renderer(width, height, device="cuda", faces=smplx.faces, K=K)

writer = get_writer('tmp.mp4', fps=30, crf=23)
for i in tqdm(range(length)):
    img = renderer_c.render_mesh(verts[i], None, [0.8, 0.8, 0.8])
    writer.write_frame(img)
writer.close()

100%|██████████| 120/120 [00:03<00:00, 31.18it/s]


In [6]:
# Render Test -- World (Random Camera), Skeleton
smpl_params_w = {k:v.to(device) for k,v in batch['smpl_params_w'].items()}
w_j3d = smplx(**smpl_params_w).joints.cpu()

width, height, K_fullimg = create_camera_sensor(1000, 1000, 24) 
wham_cam_augmentor = CameraAugmentorV11()
T_w2c = wham_cam_augmentor(w_j3d, length) 
c_j3d = apply_T_on_points(w_j3d[:,:22], T_w2c)
# c_j3d = w_j3d
verts, faces, vertex_colors = convert_motion_as_line_mesh(c_j3d)
vertex_colors = vertex_colors[None] / 255.0
renderer = Renderer(width, height, device="cuda", faces=faces, K=K_fullimg)

writer = get_writer(f'tmp.mp4', fps=30, crf=23)
for i in tqdm(range(120), desc=f"Rendering Camera"):
    img_overlay_pred = renderer.render_mesh(verts[i].cuda(), None, vertex_colors, VI=1)
    writer.write_frame(img_overlay_pred)
writer.close()

Rendering Camera: 100%|██████████| 120/120 [00:02<00:00, 54.74it/s]


In [None]:
### AMASS Train Dataset --Load Dataset-- ###

motion_frames_len = 120
l_factor = 1.5

motion_files = torch.load("inputs/AMASS/hmr4d_support/smplxpose_v2.pth")
seqs = {k: v for k,v in motion_files.items() if 'moyo_smplxn' not in k and v['pose'].shape[0] >= 25}
print(f"Total motion files: {len(seqs):,}")

hours = 0
idx2meta = []
for vid, seq in seqs.items():
    seq_length = seq["pose"].shape[0]
    num_samples = max(seq_length // motion_frames_len, 1)
    hours += seq_length
    idx2meta.extend([vid] * num_samples)
print(f"{hours / (30*3600):.1f} hours motion -> Resampled to {len(idx2meta):,} samples.")

Total motion files: 17,896
64.7 hours motion -> Resampled to 52,788 samples.


In [None]:
### AMASS Train Dataset --Load Data-- ###
idx = 10
np.random.seed(42)

mid = idx2meta[idx]
raw_data = seqs[mid]
raw_len = raw_data["pose"].shape[0]

raw_subset_len = np.random.randint(
    int(motion_frames_len / l_factor), int(motion_frames_len * l_factor)
)
start = np.random.randint(0, raw_len - raw_subset_len + 1) if raw_subset_len <= raw_len else 0
end = start + raw_subset_len if raw_subset_len <= raw_len else raw_len
print(f"{'/'.join(Path(mid).parts[2:])} : {raw_len} -> {start} ~ {end} (len={end-start})")

data = {
    "body_pose": raw_data["pose"][start:end, 3:],  # (F, 63)
    "betas": raw_data["beta"].repeat(end-start, 1),  # (10)
    "global_orient": raw_data["pose"][start:end, :3],  # (F, 3)
    "transl": raw_data["trans"][start:end, :3],  # (F, 3)
    "data_name" : "amass"
}
data = interpolate_smpl_params(data, motion_frames_len)
data["global_orient"], data["transl"], _ = get_tgtcoord_rootparam(
    data["global_orient"], data["transl"], tsf="az->ay",
)
data

smplxn_raw/Transitions/Transitions/mazen_c3d/airkick_longjump_stageii.npz : 306 -> 92 ~ 223 (len=131)


{'body_pose': tensor[120, 63] n=7560 (30Kb) x∈[-1.788, 1.745] μ=0.031 σ=0.302,
 'betas': tensor[120, 10] n=1200 (4.7Kb) x∈[-4.130, 2.712] μ=-0.422 σ=1.990,
 'global_orient': tensor[120, 3] n=360 (1.4Kb) x∈[-0.265, 0.419] μ=-0.003 σ=0.138,
 'transl': tensor[120, 3] n=360 (1.4Kb) x∈[-0.429, 2.674] μ=0.662 σ=0.885}

In [None]:
### AMASS Train Dataset --Process Data-- ###

betas = augment_betas(data["betas"], std=0.1)
global_orient_w, transl_w = rotate_around_axis(data["global_orient"], data["transl"], axis="y")
smpl_params_w = {
    'body_pose' : data["body_pose"], 
    'betas': betas, 
    'global_orient': global_orient_w, 
    'transl': transl_w
}

## Camera Trajectory Augmentation
w_j3d = smplx_lite(
    smpl_params_w["body_pose"][::10], betas[::10], global_orient_w[::10], None,
)
w_j3d = w_j3d.repeat_interleave(10, dim=0) + transl_w[:, None]  # (F, 24, 3)
width, height, K_fullimg = create_camera_sensor(1000, 1000, 24) 
wham_cam_augmentor = CameraAugmentorV11()
T_w2c = wham_cam_augmentor(w_j3d, motion_frames_len) 

offset = smplx.get_skeleton(betas[0].to(device))[0]  # (3)
global_orient_c, transl_c = get_c_rootparam(
    global_orient_w, transl_w, 
    T_w2c, offset.cpu(),
)
smpl_params_c = {
    "body_pose": smpl_params_w["body_pose"].clone(),  # (F, 63)
    "betas": smpl_params_w["betas"].clone(),  # (F, 10)
    "global_orient": global_orient_c,  # (F, 3)
    "transl": transl_c,  # (F, 3)
}
        
# World Params
gravity_vec = torch.tensor([0, -1, 0], dtype=torch.float32)  # (3), BEDLAM is ay
R_c2gv = get_R_c2gv(T_w2c[:, :3, :3], gravity_vec)  # (F, 3, 3)

K_fullimg = K_fullimg.repeat(motion_frames_len, 1, 1)  # (F, 3, 3)
cam_angvel = compute_cam_angvel(T_w2c[:, :3, :3])  # (F, 6)

batch = {
    "meta": {"data_name": "amass", "idx": idx, "T_w2c": T_w2c},
    "length": data["body_pose"].shape[0],
    "smpl_params_c": smpl_params_c,
    "smpl_params_w": smpl_params_w,
    "R_c2gv": R_c2gv,  # (F, 3, 3)
    "gravity_vec": gravity_vec,  # (3)
    "bbx_xys": torch.zeros((data["body_pose"].shape[0], 3)),  # (F, 3)  # NOTE: a placeholder
    "K_fullimg": K_fullimg,  # (F, 3, 3)
    "f_imgseq": torch.zeros((data["body_pose"].shape[0], 1024)),  # (F, D)  # NOTE: a placeholder
    "kp2d": torch.zeros(data["body_pose"].shape[0], 17, 3),  # (F, 17, 3)
    "cam_angvel": cam_angvel,  # (F, 6)
    "mask": {
        "valid": get_valid_mask(data["body_pose"].shape[0], data["body_pose"].shape[0]),
        "vitpose": False,
        "bbx_xys": False,
        "f_imgseq": False,
        "spv_incam_only": False,
    },
}

In [None]:
w_j3d = smplx(**{k:v.to(device) for k,v in smpl_params_w.items()}).joints.cpu()

width, height, K_fullimg = create_camera_sensor(1000, 1000, 24) 
wham_cam_augmentor = CameraAugmentorV11()
T_w2c = wham_cam_augmentor(w_j3d, motion_frames_len) 
c_j3d = apply_T_on_points(w_j3d[:,:22], T_w2c)
verts, faces, vertex_colors = convert_motion_as_line_mesh(c_j3d)

vertex_colors = vertex_colors[None] / 255.0
bg = np.ones((height, width, 3), dtype=np.uint8) * 255
renderer = Renderer(width, height, device="cuda", faces=faces, K=K_fullimg)
writer = get_writer(f'tmp.mp4', fps=30, crf=23)
for i in tqdm(range(motion_frames_len), desc=f"Rendering Camera"):
    img_overlay_pred = renderer.render_mesh(verts[i].cuda(), bg, vertex_colors, VI=1)
    writer.write_frame(img_overlay_pred)
writer.close()

Rendering Camera: 100%|██████████| 120/120 [00:01<00:00, 62.84it/s]


In [None]:
# smplx_out = smplx(**{
#     "body_pose": data["body_pose"].to(device),  # (F, 63)
#     "betas": betas.to(device),  # (F, 10)
#     "global_orient": global_orient_w.to(device),  # (F, 3)
#     "transl": transl_w.to(device),  # (F, 3)
# })
smplx_out = smplx(**{k: v.to(device) for k,v in batch['smpl_params_w'].items()})
pred_ay_verts = torch.stack([torch.matmul(smplx2smpl, v_) for v_ in smplx_out.vertices])
pred_gb_verts, pred_gb_joints = move_to_start_point_face_z(pred_ay_verts, J_regressor)

global_R, global_T, global_lights = get_global_cameras_static(
    pred_gb_joints.cpu(), beta=2.0, cam_height_degree=20, target_center_height=1.0,
)
_, _, K = create_camera_sensor(width, height, 24)
renderer_g = Renderer(width, height, device="cuda", faces=faces_smpl[0], K=K)

# -- render mesh -- #
scale, cx, cz = get_ground_params_from_points(pred_gb_joints[:, 0], pred_gb_verts)
renderer_g.set_ground(scale * 1.5, cx, cz)
color = torch.ones(3).float().cuda() * 0.8

writer = get_writer(f'tmp2.mp4', fps=30, crf=23)
for i in tqdm(range(batch['length']), desc=f"Rendering Global"):
    # img_overlay_pred = renderer.render_mesh(verts[i].cuda(), bg, vertex_colors, VI=1)
    cameras = renderer_g.create_camera(global_R[i], global_T[i])
    img_gb = renderer_g.render_with_ground(pred_gb_verts[[i]], color[None], cameras, global_lights)
    writer.write_frame(img_gb)
writer.close()

Rendering Global: 100%|██████████| 120/120 [00:04<00:00, 27.66it/s]


In [None]:
smplx_out = smplx(**{k: v.to(device) for k,v in smpl_params_c.items()})

# ----- Render Overlay ----- #
render_dict = {
    "faces": smplx.faces,
    "verts": smplx_out.vertices,
    'whf' : (1280, 720, 995.5555)
}
img_overlay = simple_render_mesh(render_dict)
save_video(img_overlay, "tmp.mp4", crf=23)

Rendering:   0%|          | 0/120 [00:00<?, ?it/s]

Rendering: 100%|██████████| 120/120 [00:02<00:00, 58.34it/s]
