# Tutorial: Action-Conditioned 3D Human Motion Synthesis with Transformer VAE (ACTOR)

ACTOR learns an action-aware latent representation for human motions by training a generative variational autoencoder (VAE). By sampling from this latent space and querying a certain duration through a series of positional encodings, ACTOR synthesizes variable-length motion sequences conditioned on a categorical action. Specifically, a Transformer-based architecture is designed, for encoding and decoding a sequence of parametric SMPL human body models estimated from action recognition datasets. 

## Dataset for ACTOR

To get the pre-process the dataset, please refer to the this [Github repository](https://github.com/Mathux/ACTOR) and agree to the license. There following code shows examples from `HumanAct12` dataset.

In [1]:
# Set data path
# datapath = "F://research/ACTOR/data/HumanAct12Poses/"
datapath = "E://researches/GenMotion/thirdParty/HumanAct12Poses/"

In [2]:
import torch
import os

In [3]:
from genmotion.algorithm.action_conditioned.params import HumanAct12Params
from genmotion.algorithm.action_conditioned.data_utils import get_datasets

from genmotion.algorithm.action_conditioned.utils.tensors import collate

## Training ACTOR

In [None]:
# load parameters
parameters = vars(HumanAct12Params())

In [None]:
# get datasets
datasets = get_datasets(datapath, parameters)
print("dataset length: ", {key: len(val) for key, val in datasets.items()})

In [None]:
# load model
assert parameters["modeltype"] == 'cvae'
assert parameters["archiname"] == "transformer"

from genmotion.algorithm.action_conditioned.models.architectures.transformer import Encoder_TRANSFORMER, Decoder_TRANSFORMER
from genmotion.algorithm.action_conditioned.models.modeltype.cvae import CVAE

encoder = Encoder_TRANSFORMER(**parameters)
decoder = Decoder_TRANSFORMER(**parameters)

parameters["outputxyz"] = "rcxyz" in parameters["lambdas"]

model = CVAE(encoder, decoder, **parameters).to(parameters["device"])

In [None]:
# optimizer
optimizer = torch.optim.AdamW(model.parameters(), lr=parameters["lr"])
print('Total params: %.2fM' % (sum(p.numel() for p in model.parameters()) / 1000000.0))
# print("Training model..")

In [None]:
dataset = datasets["train"]
train_iterator = torch.utils.data.DataLoader(dataset, batch_size=parameters["batch_size"], 
                                             shuffle=True, num_workers=8, collate_fn=collate)

In [None]:
from genmotion.algorithm.action_conditioned.trainer import train

In [None]:
model.device

In [None]:
epochs = 1 # total number of training epochs

In [None]:
# for epoch in range(epochs):
#     dict_loss = train(model, optimizer, train_iterator, model.device)
    
#     for key in dict_loss.keys():
#         dict_loss[key] /= len(train_iterator)
#         print(f"Loss/{key}", dict_loss[key], "f{epoch}")

## Sample ACTOR

In [5]:
# load parameters
parameters = vars(HumanAct12Params(mode="sample"))

In [6]:
# load model
assert parameters["modeltype"] == 'cvae'
assert parameters["archiname"] == "transformer"

from genmotion.algorithm.action_conditioned.models.architectures.transformer import Encoder_TRANSFORMER, Decoder_TRANSFORMER
from genmotion.algorithm.action_conditioned.models.modeltype.cvae import CVAE

encoder = Encoder_TRANSFORMER(**parameters)
decoder = Decoder_TRANSFORMER(**parameters)

parameters["outputxyz"] = "rcxyz" in parameters["lambdas"]

model = CVAE(encoder, decoder, **parameters).to(parameters["device"])

In [7]:
folder = "../pretrained/action_conditioned/humanact12/"
checkpointname = "checkpoint_5000.pth.tar"

In [8]:
print("Restore weights..")
checkpointpath = os.path.join(folder, checkpointname)
state_dict = torch.load(checkpointpath, map_location=parameters["device"])
model.load_state_dict(state_dict)

Restore weights..


<All keys matched successfully>

In [9]:
num_classes = parameters["num_classes"]
classes = torch.arange(num_classes)

In [10]:
gendurations = torch.tensor([parameters["num_frames"] for cl in classes], dtype=int)

In [11]:
noise_same_action = "interpolate"
noise_diff_action = "random"
# noise_same_action = "random"

In [12]:
model.device

device(type='cuda')

In [14]:
parameters["num_samples_per_action"]

10

In [15]:
# Generate the new data
generation = model.generate(classes, gendurations, nspa=2,
                            noise_same_action=noise_same_action,
                            noise_diff_action=noise_diff_action,
                            fact=parameters["fact_latent"])

In [20]:
generation[
    'output_xyz'
].shape

torch.Size([24, 24, 3, 60])