In [1]:
from functions.load_data import *
from functions.plot import *

In [2]:
ds_all, ds_all_centered, datasets, datasets_centered, ds_counts = load_data()

Original numpy dataset contains 38,309 timesteps of 53 joints with 3 dimensions each.


In [3]:
seq_len = 64
start_ts = 0

seq = ds_all_centered[start_ts: start_ts + seq_len, :, :3]
print(seq.shape)

(64, 53, 3)


### Visualize a single sequence

In [4]:
ani = animate_stick(seq, speed=50, figsize=(10, 8), cmap='inferno', cloud=False, ax_lims=(-1, 1))

Skeleton lines: 100%|██████████| 58/58 [00:00<00:00, 891.88it/s]


In [None]:
animation_js = ani.to_jshtml()
display(HTML(animation_js))

### Load network

In [6]:
from functions.model_variants import VAELSTM
import torch
from copy import deepcopy

network = VAELSTM(seq_len=1, latent_dim=256, n_units=394, device='cpu')
print(network)

load_net = torch.load('model480_static_new.pth', map_location='cpu')['params']
for k, v in deepcopy(load_net).items():
    if k.startswith('module.'):
        load_net[k[7:]] = v
        load_net.pop(k)

network.load_state_dict(load_net, strict=True)

VAELSTM(
  (encoder): Encoder(
    (lstm): LSTM(159, 394, num_layers=2, batch_first=True)
    (mean): Linear(in_features=394, out_features=256, bias=True)
    (log_var): Linear(in_features=394, out_features=256, bias=True)
  )
  (decoder): Decoder(
    (linear): Linear(in_features=256, out_features=394, bias=True)
    (lstm): LSTM(394, 394, num_layers=2, batch_first=True)
    (out): Conv1d(394, 159, kernel_size=(3,), stride=(1,), padding=(1,))
  )
)


<All keys matched successfully>

### Compare an input dance sequence from a holdout test set with its corresponding decoded sequence

In [17]:
import random

test_set = torch.load('test_set_static.h5')
print(len(test_set))

5747


In [157]:
rdm_idx = 2850
seqs = []

# randomly choose a sequence with length 64
for i in range(64):
    seq = test_set[rdm_idx + i]['seq'][:, :, :3]
    seqs.append(seq.squeeze(0))

seqs_numpy = np.stack(seqs, axis=0)
print(seqs_numpy.shape)

(64, 53, 3)


In [159]:
import numpy as np

outputs = []
for i in range(64):
    output, mean, log_var = network(seqs[i].unsqueeze(0).unsqueeze(0))
    output = output.squeeze(0).squeeze(0).detach().numpy()
    outputs.append(output)

outputs = np.stack(outputs, axis=0)
print(outputs.shape)

(64, 53, 3)


In [None]:
animation = animate_stick(seqs_numpy[:, :, :3], ghost=outputs, ghost_shift=0.2, figsize=(10, 8), cmap='inferno', cloud=False, ax_lims=(-1, 1))

animation_js = animation.to_jshtml()
display(HTML(animation_js))

### Adding noise to latent space

$\sigma=0$, the generated sequence is almost identical to the original sequence

In [None]:
outputs = []
sigma = 0

bias = torch.Tensor(np.random.normal(0, sigma, 256))

for i in range(64):
    mean, _ = network.encoder(seqs[i].unsqueeze(0).unsqueeze(0))
    output = network.decoder(mean + bias)
    output = output.squeeze(0).squeeze(0).detach().numpy()
    outputs.append(output)

outputs = np.stack(outputs, axis=0)

animation = animate_stick(seqs_numpy[:, :, :3], ghost=outputs, ghost_shift=0.2, figsize=(10, 8), cmap='inferno', cloud=False, ax_lims=(-1, 1))

animation_js = animation.to_jshtml()
display(HTML(animation_js))

$\sigma=0.1$

In [None]:
outputs = []
sigma = 0.1

bias = torch.Tensor(np.random.normal(0, sigma, 256))

for i in range(64):
    mean, _ = network.encoder(seqs[i].unsqueeze(0).unsqueeze(0))
    output = network.decoder(mean + bias)
    output = output.squeeze(0).squeeze(0).detach().numpy()
    outputs.append(output)

outputs = np.stack(outputs, axis=0)

animation = animate_stick(seqs_numpy[:, :, :3], ghost=outputs, ghost_shift=0.2, figsize=(10, 8), cmap='inferno', cloud=False, ax_lims=(-1, 1))

animation_js = animation.to_jshtml()
display(HTML(animation_js))

$\sigma=0.5$

In [None]:
outputs = []
sigma = 0.5

bias = torch.Tensor(np.random.normal(0, sigma, 256))

for i in range(64):
    mean, _ = network.encoder(seqs[i].unsqueeze(0).unsqueeze(0))
    output = network.decoder(mean + bias)
    output = output.squeeze(0).squeeze(0).detach().numpy()
    outputs.append(output)

outputs = np.stack(outputs, axis=0)

animation = animate_stick(seqs_numpy[:, :, :3], ghost=outputs, ghost_shift=0.2, figsize=(10, 8), cmap='inferno', cloud=False, ax_lims=(-1, 1))

animation_js = animation.to_jshtml()
display(HTML(animation_js))

### Sample randomly from the latent space, output shows a jittery fashion

In [None]:
outputs = []
sigma = 0.1

for i in range(64):
    output = network.decoder(torch.Tensor(np.random.normal(0, sigma, (1, 256))))
    output = output.squeeze(0).squeeze(0).detach().numpy()
    outputs.append(output)

outputs = np.stack(outputs, axis=0)

animation = animate_stick(outputs, figsize=(10, 8), cmap='inferno', cloud=False, ax_lims=(-1, 1))

animation_js = animation.to_jshtml()
display(HTML(animation_js))