# Tutorial: On human motion prediction using recurrent neural networks (human_motion_rnn)

Human motion prediction using recurrent neural networks (`human_motion_rnn`) is a classical deep learning method to predict skeletal-based human motions on using deep recurrent neural networks (RNNs). With the goal of learning time-dependent representations that perform tasks such as short-term motion prediction and long-term human motion synthesis, `human_motion_rnn` trains a sequence-to-sequence model to deal with the challenge.

## Dataset for human_motion_rnn
To get the pre-process the dataset, please refer to the this [Github repository](https://github.com/enriccorona/human-motion-prediction-pytorch) and agree to the license.

## Training human_motion_rnn

In [2]:
import numpy as np
from tqdm.auto import tqdm 

In [2]:
from genmotion.algorithm.humanmotionrnn.data_utils import *
from genmotion.algorithm.humanmotionrnn.params import *
from genmotion.algorithm.humanmotionrnn.models import Seq2SeqModel

In [3]:
actions, seq_length_in, seq_length_out, data_dir = (['walking'], 50, 25, 'data/h3.6m/dataset')

In [4]:
train_dataset = HumanMotionDataset(data_dir, train_subject_ids, actions, not omit_one_hot)
test_dataset = HumanMotionDataset(data_dir, test_subject_ids, actions, not omit_one_hot, is_train = False)

Reading subject 1, action walking, subaction 1
Reading subject 1, action walking, subaction 2
Reading subject 6, action walking, subaction 1
Reading subject 6, action walking, subaction 2
Reading subject 7, action walking, subaction 1
Reading subject 7, action walking, subaction 2
Reading subject 8, action walking, subaction 1
Reading subject 8, action walking, subaction 2
Reading subject 9, action walking, subaction 1
Reading subject 9, action walking, subaction 2
Reading subject 11, action walking, subaction 1
Reading subject 11, action walking, subaction 2
Reading subject 5, action walking, subaction 1
Reading subject 5, action walking, subaction 2


In [5]:
print("Training samples: ", len(train_dataset), "Testing samples: ", len(test_dataset))
# train_dataset[0][0].shape, train_dataset[0][1].shape, train_dataset[0][2].shape

Training samples:  10000 Testing samples:  1000


In [6]:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size = 16, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size = 16, shuffle=True)

In [7]:
# training or sampling
sampling = False

In [8]:
model = Seq2SeqModel(
  architecture,
  source_seq_len if not sampling else 50,
  target_seq_len if not sampling else 100,
  rnn_size, # hidden layer size
  rnn_num_layers,
  max_gradient_norm,
  batch_size,
  learning_rate,
  learning_rate_decay_factor,
  loss_to_use if not sampling else "sampling_based",
  len( actions ),
  not omit_one_hot,
  residual_velocities,
  dtype=torch.float32)

if use_cuda:
    model = model.cuda()

One hot is  True
Input size is 55
rnn_size = 1024


In [None]:
step_time, loss = 0, 0
optimiser = torch.optim.SGD(model.parameters(), lr= learning_rate)

In [None]:
total_step = 0
train_loss_record = []
valid_loss_record = []

model.train()
for batch in tqdm(train_loader):
    encoder_inputs, decoder_inputs, decoder_outputs = batch
    encoder_inputs = encoder_inputs.float()
    decoder_inputs = decoder_inputs.float()
    decoder_outputs = decoder_outputs.float()
    
    if use_cuda and torch.cuda.is_available():
        encoder_inputs = encoder_inputs.cuda()
        decoder_inputs = decoder_inputs.cuda()
        decoder_outputs = decoder_outputs.cuda()
    
    
    preds = model(encoder_inputs, decoder_inputs)

    step_loss = (preds-decoder_outputs)**2
    step_loss = step_loss.mean()

    # Actual backpropagation
    optimiser.zero_grad()
    step_loss.backward()
    optimiser.step()
    
    total_step += 1
    train_loss_record.append(step_loss.item())
    
    if total_step % print_every == 0:
        print("training step_loss: {:.3f}".format(np.mean(train_loss_record)))
        train_loss_record.clear()    
    
model.eval()
for batch in tqdm(test_loader):
    encoder_inputs, decoder_inputs, decoder_outputs = batch
    encoder_inputs = encoder_inputs.float()
    decoder_inputs = decoder_inputs.float()
    decoder_outputs = decoder_outputs.float()
    
    if use_cuda and torch.cuda.is_available():
        encoder_inputs = encoder_inputs.cuda()
        decoder_inputs = decoder_inputs.cuda()
        decoder_outputs = decoder_outputs.cuda()
    
    
    preds = model(encoder_inputs, decoder_inputs)

    step_loss = (preds-decoder_outputs)**2
    step_loss = step_loss.mean()
    
    valid_loss_record.append(step_loss.item())
    
    
print("validation step_loss: {:.3f}".format(np.mean(valid_loss_record)))
valid_loss_record.clear()    

## Sampling human_motion_rnn

In [9]:
batch  = next(iter(test_loader))
encoder_inputs, decoder_inputs, decoder_outputs = batch

In [10]:
encoder_inputs = encoder_inputs.float().cuda()

In [11]:
model.sample(encoder_inputs)

tensor([[[ 0.3750, -0.6770,  0.1506,  ...,  0.0324, -0.2383,  1.2704],
         [ 0.4944, -0.6958,  0.1217,  ..., -0.0220, -0.3429,  1.5480],
         [ 0.6179, -0.7159,  0.0938,  ..., -0.0655, -0.4525,  1.8293],
         ...,
         [ 5.6342, -0.8643,  1.6964,  ...,  2.1641, -3.2136,  7.9908],
         [ 6.0611, -0.8580,  1.9494,  ...,  2.4551, -3.3784,  8.2996],
         [ 6.5064, -0.8508,  2.2235,  ...,  2.7643, -3.5477,  8.6034]],

        [[-0.3548, -0.8372,  0.1087,  ..., -0.8318, -0.0332,  1.0703],
         [-0.2874, -0.8566,  0.0777,  ..., -0.8902, -0.1044,  1.1293],
         [-0.2170, -0.8732,  0.0381,  ..., -0.9456, -0.1815,  1.1784],
         ...,
         [ 2.6176, -0.8606, -2.1457,  ..., -1.6798, -3.0835,  0.8412],
         [ 2.8737, -0.8586, -2.2822,  ..., -1.6805, -3.3703,  0.7673],
         [ 3.1431, -0.8567, -2.4148,  ..., -1.6739, -3.6805,  0.6870]],

        [[ 0.7404,  0.1413, -0.3407,  ...,  0.7619, -0.4401,  1.1123],
         [ 0.7625,  0.0799, -0.3911,  ...,  0