### Setup env
This is notebook specific setup, my module path is different on my vm so this is a workaround

In [1]:
import sys
import os

path = os.path.abspath(os.path.join('..'))
if path not in sys.path:
    sys.path.append(path)

### Load Dataset
You can change the type of motion by changing the filepath
Dataset right now just repeats the same motion 100 times

In [3]:
from diffusion.data_loaders.motion_dataset_v2 import MotionDataset
dataset = MotionDataset("data/motions/humanoid3d_cartwheel.txt", shuffle=True)
len(dataset), dataset[0], dataset[0].trajectories.shape

Tmp angle [0.0, 0.0, 0.85536, 0.9966429999999997, -0.0070009999999999795, 0.08157, 0.0005729999999999971, 0.042303731260289315, -0.056088768155961526, -0.01172717680484046, -0.014103614145860938, 0.2358842735659614, 0.37124889801787253, -0.6111023347690597, -0.09268300376873025, -0.09541896434572254, 0.585361, 0.1699928747321186, 0.08652758875118252, 0.354108626550405, 0.160215, -0.2285399691330798, -0.39445967594673703, -0.1178224382194308, -0.369571, 0.20448116583595066, -0.12115992907931128, 0.07892319943485762, 0.3736623102073797, -0.010008232584494297, 0.30603690929303384, -0.364281, -0.13425257761871864, -0.004787718949892447, 0.0010873114649849894] 35
[-0.23938   2.078199 -0.008457]
[-0.23938   2.078199]


(160,
 Batch(trajectories=tensor([[ 0.0000,  0.0000,  0.8554,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0037, -0.0062,  0.8563,  ..., -0.2435, -1.1484, -0.8920],
         [ 0.0064, -0.0122,  0.8575,  ..., -0.0278, -1.2998, -0.9550],
         ...,
         [-0.2466,  2.0540,  0.8465,  ..., -0.8048,  0.5575,  1.2816],
         [-0.2435,  2.0658,  0.8467,  ..., -0.7320,  0.5210,  1.2596],
         [-0.2394,  2.0782,  0.8469,  ..., -0.5900,  0.5328,  1.0961]]), conditions={0: tensor([ 0.0000e+00,  0.0000e+00,  8.5536e-01,  9.9664e-01, -7.0010e-03,
          8.1570e-02,  5.7300e-04,  4.2304e-02, -5.6089e-02, -1.1727e-02,
         -1.4104e-02,  2.3588e-01,  3.7125e-01, -6.1110e-01, -9.2683e-02,
         -9.5419e-02,  5.8536e-01,  1.6999e-01,  8.6528e-02,  3.5411e-01,
          1.6022e-01, -2.2854e-01, -3.9446e-01, -1.1782e-01, -3.6957e-01,
          2.0448e-01, -1.2116e-01,  7.8923e-02,  3.7366e-01, -1.0008e-02,
          3.0604e-01, -3.6428e-01, -1.3425e-01, -4.7877e-03,  1.0873e-03,
 

### Setup Model
Configure your experiment name and savepaths here, they will all be stored under the logs folder later on

In [4]:
import os
import torch
from diffusion.diffuser.utils import Trainer as dTrainer, Config as dConfig

exp_name = "test-cartwheel-shuffled-128"
savepath = f'/home/kenji/Fyp/DeepMimic_mujoco/diffusion/logs/{exp_name}'
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

if not os.path.exists(savepath):
    os.makedirs(savepath)
    os.makedirs(os.path.join(savepath, 'sampled_motions'))

In [10]:
from diffusion.diffuser.models.temporal_v2 import TemporalUnet 

horizon = dataset[0].trajectories.shape[0]
transition_dim = dataset[0].trajectories.shape[1]

model_config = dConfig(
    TemporalUnet,
    savepath=(savepath, 'model_config.pkl'),
    horizon=horizon,
    transition_dim=transition_dim,
    cond_dim=transition_dim,
    device=device,
)
model = model_config()


[utils/config ] Config: <class 'diffusion.diffuser.models.temporal_v2.TemporalUnet'>
    cond_dim: 69
    horizon: 160
    transition_dim: 69

[ utils/config ] Saved config to: /home/kenji/Fyp/DeepMimic_mujoco/diffusion/logs/test-cartwheel-shuffled-128/model_config.pkl

[ models/temporal ] Channel dimensions: [(69, 128), (128, 256), (256, 512), (512, 1024)]
[(69, 128), (128, 256), (256, 512), (512, 1024)]


In [11]:
from diffusion.diffuser.models.diffusion_v2 import GaussianDiffusion

# model params, I am only using the very basic ones, some params are for conditioning
n_timesteps = 1000
loss_type = 'l2'
clip_denoised = False
predict_epsilon = False
action_weight = 5
loss_weights = None
loss_discount = 1
pos_dim = 35
vel_dim = 34

diffusion_config = dConfig(
    GaussianDiffusion,
    savepath=(savepath, "diffusion_config.pkl"),
    horizon=horizon,
    # transition_dim=transition_dim,
    observation_dim=pos_dim,
    action_dim=vel_dim,
    n_timesteps=n_timesteps,
    loss_type=loss_type,
    clip_denoised=clip_denoised,
    predict_epsilon=predict_epsilon,
    # loss weighting
    action_weight=action_weight,
    loss_weights=loss_weights,
    loss_discount=loss_discount,
    device=device,
)

diffusion = diffusion_config(model)


[utils/config ] Config: <class 'diffusion.diffuser.models.diffusion_v2.GaussianDiffusion'>
    action_dim: 34
    action_weight: 5
    clip_denoised: False
    horizon: 160
    loss_discount: 1
    loss_type: l2
    loss_weights: None
    n_timesteps: 1000
    observation_dim: 35
    predict_epsilon: False

[ utils/config ] Saved config to: /home/kenji/Fyp/DeepMimic_mujoco/diffusion/logs/test-cartwheel-shuffled-128/diffusion_config.pkl



### Setup Training

In [12]:
learning_rate = 2e-4
gradient_accumulate_every = 2
ema_decay = 0.995
sample_freq = 2000
save_freq = 2000
n_train_steps = 1e5
n_saves = 5
save_parallel = False
bucket = None
n_reference = 8
train_batch_size = 32

trainer_config = dConfig(
    dTrainer,
    savepath=(savepath, 'trainer_config.pkl'),
    train_batch_size=train_batch_size,
    train_lr=learning_rate,
    gradient_accumulate_every=gradient_accumulate_every,
    ema_decay=ema_decay,
    sample_freq=sample_freq,
    save_freq=save_freq,
    label_freq=int(n_train_steps // n_saves),
    save_parallel=save_parallel,
    results_folder=savepath,
    bucket=bucket,
    n_reference=n_reference,
)

trainer = trainer_config(diffusion, dataset, renderer=None)


[utils/config ] Config: <class 'diffusion.diffuser.utils.training.Trainer'>
    bucket: None
    ema_decay: 0.995
    gradient_accumulate_every: 2
    label_freq: 20000
    n_reference: 8
    results_folder: /home/kenji/Fyp/DeepMimic_mujoco/diffusion/logs/test-cartwheel-shuffled-128
    sample_freq: 2000
    save_freq: 2000
    save_parallel: False
    train_batch_size: 32
    train_lr: 0.0002

[ utils/config ] Saved config to: /home/kenji/Fyp/DeepMimic_mujoco/diffusion/logs/test-cartwheel-shuffled-128/trainer_config.pkl



### Test if model and training loop works

In [46]:
import torch
from diffuser.utils import batchify

tunet = model
tunet.to(device)
test_data = dataset[0]
batch = batchify(test_data)
t = torch.randint(0, 1000, (1,), device=device).long().to(device)
res = tunet(batch.trajectories, cond=batch.conditions, time=t, verbose=True)
res, res.shape

x.shape torch.Size([1, 24, 35])
x.shape torch.Size([1, 35, 24])
x.shape torch.Size([1, 32, 24])
1
xfinal.shape torch.Size([1, 32, 12])
x.shape torch.Size([1, 64, 12])
2
xfinal.shape torch.Size([1, 64, 6])
x.shape torch.Size([1, 128, 6])
3
xfinal.shape torch.Size([1, 128, 3])
x.shape torch.Size([1, 256, 3])
4
xfinal.shape torch.Size([1, 256, 3])
xt1.shape torch.Size([1, 256, 3])
xt2.shape torch.Size([1, 256, 3])
xt3.shape torch.Size([1, 256, 3])
pop.shape torch.Size([1, 256, 3])
pop.shape torch.Size([1, 128, 6])
pop.shape torch.Size([1, 64, 12])


(tensor([[[-2.1733e-02, -1.1539e-01, -7.9106e-02, -2.3933e-01, -9.2386e-02,
           -2.9877e-01, -1.7281e-01,  1.1766e-01, -7.2127e-02, -9.7647e-03,
            3.2103e-02, -3.0975e-02,  1.4830e-01,  4.0635e-01, -4.2027e-02,
            1.5261e-01, -8.7536e-02,  2.0752e-01,  1.3954e-01, -1.9565e-01,
            2.7834e-01,  2.4632e-01, -4.2083e-02, -1.2029e-01,  1.5369e-01,
            2.2183e-02,  2.5805e-02,  1.6060e-01,  1.7928e-01,  9.3707e-02,
           -1.4873e-01, -9.0490e-02, -3.7410e-03, -1.9466e-01, -2.9295e-02],
          [-2.2618e-01, -2.7070e-01, -3.1676e-01, -3.4229e-01,  7.0141e-02,
            8.1789e-02, -1.0541e-01,  1.3073e-01, -1.4501e-01, -1.9308e-01,
           -3.8574e-02,  6.4113e-01, -3.9532e-02,  5.8042e-01, -7.1335e-02,
            2.1991e-02,  5.3182e-02, -2.1464e-02, -3.8218e-01, -2.4950e-02,
           -6.2796e-02, -2.9848e-01, -2.7086e-01, -2.0378e-01,  2.4176e-01,
            5.5960e-02, -2.8933e-01,  2.1931e-01, -5.2595e-02, -5.2110e-02,
           

In [47]:
from diffuser.utils import report_parameters, batchify

report_parameters(model)

print('Testing forward...', end=' ', flush=True)
x = dataset[0]
batch = batchify(x)
loss, _ = diffusion.loss(batch.trajectories, {})
loss.backward()
print('✓')

[ utils/arrays ] Total parameters: 3.96 M
         downs.3.0.blocks.1.block.0.weight: 327.68 k | Conv1d(256, 256, kernel_size=(5,), stride=(1,), padding=(2,))
         downs.3.1.blocks.0.block.0.weight: 327.68 k | Conv1d(256, 256, kernel_size=(5,), stride=(1,), padding=(2,))
         downs.3.1.blocks.1.block.0.weight: 327.68 k | Conv1d(256, 256, kernel_size=(5,), stride=(1,), padding=(2,))
         ups.0.0.blocks.0.block.0.weight: 327.68 k | Conv1d(512, 128, kernel_size=(5,), stride=(1,), padding=(2,))
         mid_block1.blocks.0.block.0.weight: 327.68 k | Conv1d(256, 256, kernel_size=(5,), stride=(1,), padding=(2,))
         mid_block1.blocks.1.block.0.weight: 327.68 k | Conv1d(256, 256, kernel_size=(5,), stride=(1,), padding=(2,))
         mid_block2.blocks.0.block.0.weight: 327.68 k | Conv1d(256, 256, kernel_size=(5,), stride=(1,), padding=(2,))
         mid_block2.blocks.1.block.0.weight: 327.68 k | Conv1d(256, 256, kernel_size=(5,), stride=(1,), padding=(2,))
         downs.3.0.b

### Train Model
It took me 80s to run 1 epoch and results were pretty good from just 1 epoch

In [28]:
n_steps_per_epoch = 1000
n_epochs = int(n_train_steps // n_steps_per_epoch)
print(n_epochs)
n_epochs = 1

for i in range(n_epochs):
    print(f'Epoch {i} / {n_epochs} | {savepath}')
    trainer.train(n_train_steps=n_steps_per_epoch)

trainer.save(n_epochs)

100
Epoch 0 / 1 | /home/kenji/Fyp/DeepMimic_mujoco/diffusion/logs/test-backflip
[ utils/training ] Saved model to /home/kenji/Fyp/DeepMimic_mujoco/diffusion/logs/test-backflip/state_0.pt
0:   0.4028 | a0_loss:   0.0187 | t:   0.2189
100:   0.0908 | a0_loss:   0.0054 | t:   9.1696
200:   0.0455 | a0_loss:   0.0032 | t:   9.1730
300:   0.0245 | a0_loss:   0.0019 | t:   9.3190
400:   0.0130 | a0_loss:   0.0010 | t:   8.6788
500:   0.0070 | a0_loss:   0.0005 | t:   8.5875
600:   0.0040 | a0_loss:   0.0002 | t:   8.6971
700:   0.0025 | a0_loss:   0.0001 | t:   8.5658
800:   0.0016 | a0_loss:   0.0000 | t:   8.8950
900:   0.0011 | a0_loss:   0.0000 | t:   8.6975
[ utils/training ] Saved model to /home/kenji/Fyp/DeepMimic_mujoco/diffusion/logs/test-backflip/state_1.pt


### (Alternatively) load a checkpoint

In [13]:
# from diffusion.diffuser.utils import load_diffusion
# diffusion_experiment = load_diffusion(
#     "/home/kenji/Fyp/DeepMimic_mujoco/diffusion/logs/diffuser", dataset=dataset, epoch=1)

# renderer = diffusion_experiment.renderer
# model = diffusion_experiment.trainer.ema_model

# Optionally load a checkpoint
trainer.load(3)
model = trainer.ema_model

### Sample from model - In Betweening through Blending

Let's try doing a cartwheel while walking

Start from existing motion data instead of noise, and we leave the middle 50% of the motion to be noise

And also fix a constraint that the start and end of the motion should be the same

In [14]:
from diffusion.data_loaders.motion_dataset import MotionDataset

walk_dataset = MotionDataset("data/motions/humanoid3d_cartwheel.txt")
len(walk_dataset), walk_dataset[0], walk_dataset[0].trajectories.shape

Tmp angle [0.0, 0.0, 0.85536, 0.9966429999999997, -0.0070009999999999795, 0.08157, 0.0005729999999999971, 0.042303731260289315, -0.056088768155961526, -0.01172717680484046, -0.014103614145860938, 0.2358842735659614, 0.37124889801787253, -0.6111023347690597, -0.09268300376873025, -0.09541896434572254, 0.585361, 0.1699928747321186, 0.08652758875118252, 0.354108626550405, 0.160215, -0.2285399691330798, -0.39445967594673703, -0.1178224382194308, -0.369571, 0.20448116583595066, -0.12115992907931128, 0.07892319943485762, 0.3736623102073797, -0.010008232584494297, 0.30603690929303384, -0.364281, -0.13425257761871864, -0.004787718949892447, 0.0010873114649849894] 35


(160,
 Batch(trajectories=tensor([[ 0.0000e+00,  0.0000e+00,  8.5536e-01,  ..., -1.3425e-01,
          -4.7877e-03,  1.0873e-03],
         [ 3.6500e-03, -6.2240e-03,  8.5632e-01,  ..., -1.3036e-01,
           1.4385e-02,  1.5935e-02],
         [ 6.4000e-03, -1.2205e-02,  8.5748e-01,  ..., -1.3041e-01,
           3.6052e-02,  3.1853e-02],
         ...,
         [-2.4664e-01,  2.0540e+00,  8.4651e-01,  ...,  5.9186e-02,
          -2.0091e-01,  2.3318e-01],
         [-2.4351e-01,  2.0658e+00,  8.4672e-01,  ...,  7.3301e-02,
          -2.0666e-01,  2.1504e-01],
         [-2.3938e-01,  2.0782e+00,  8.4690e-01,  ...,  8.5008e-02,
          -2.1333e-01,  1.9921e-01]]), conditions={0: tensor([ 0.0000e+00,  0.0000e+00,  8.5536e-01,  9.9664e-01, -7.0010e-03,
          8.1570e-02,  5.7300e-04,  4.2304e-02, -5.6089e-02, -1.1727e-02,
         -1.4104e-02,  2.3588e-01,  3.7125e-01, -6.1110e-01, -9.2683e-02,
         -9.5419e-02,  5.8536e-01,  1.6999e-01,  8.6528e-02,  3.5411e-01,
          1.6022e-0

Extend the walk to be the same shape as the cartwheel

In [30]:
walk_traj = walk_dataset[0].trajectories
# padding = torch.zeros((100, 59))
walk_traj = torch.cat([walk_traj] * 2, dim=0)

# Replace 50% of the trajectory with random noise
num_frames = walk_traj.shape[0]
start_idx = num_frames // 4
end_idx = start_idx + (num_frames // 2)
random_values = torch.rand(
    (end_idx - start_idx, walk_traj.shape[1]), dtype=walk_traj.dtype
)
walk_traj[start_idx:end_idx, :] = random_values

walk_traj.unsqueeze_(0)
walk_traj.shape

torch.Size([1, 64, 35])

In [36]:
start_idx = 20
end_idx = 44
walk_traj_first_25 = walk_traj[:, :start_idx, :]
walk_traj_last_75 = walk_traj[:, end_idx:, :]
walk_traj_first_25.shape, walk_traj_last_75.shape

(torch.Size([1, 20, 35]), torch.Size([1, 20, 35]))

In [76]:
cartwheel_traj = dataset[0].trajectories
cartwheel_traj.unsqueeze_(0)
cartwheel_traj.shape

torch.Size([1, 160, 35])

In [88]:
start_idx = 0
cartwheel_start_idx = 100
window = 50
def apply_conditioning_in_betweening(x): 
    # Assume x has the shape of [1, num_frame, data_dim]
    # x[:, :start_idx, :] = walk_traj_first_25
    # x[:, end_idx:, :] = walk_traj_last_75
    x[:, start_idx:start_idx+window, :] = cartwheel_traj[:, cartwheel_start_idx:cartwheel_start_idx+window, :]
    return x

In [89]:
from diffuser.utils import batchify
test = dataset[0]
batch = batchify(test)
sample = diffusion.p_sample_loop(batch.trajectories.shape, batch.conditions, starting_motion=walk_traj, conditioning_fn=apply_conditioning_in_betweening)


[F                                                                                                    
[F1 / 1000 [                                                            ]   0% | 23.9 Hz
t : 999 | vmax : 0.0 | vmin : 0.0
[F[F                                                                                                    
                                                                                                    
[F[F2 / 1000 [                                                            ]   0% | 38.1 Hz
t : 998 | vmax : 0.0 | vmin : 0.0
[F[F                                                                                                    
                                                                                                    
[F[F3 / 1000 [                                                            ]   0% | 47.9 Hz
t : 997 | vmax : 0.0 | vmin : 0.0
[F[F                                                                                               

### Sanity check to check if output is similar to train data

In [16]:
sample, sample.trajectories.shape

(Sample(trajectories=tensor([[[ 0.0000,  0.0000,  0.8475,  ...,  0.1931, -0.2979, -0.0831],
          [ 0.0415, -0.0047,  0.8466,  ...,  0.1812, -0.2662, -0.1258],
          [ 0.0817, -0.0105,  0.8483,  ...,  0.1293, -0.1646, -0.1874],
          ...,
          [ 0.9535,  0.0164,  0.8741,  ...,  0.0799, -0.2361, -0.1045],
          [ 0.9810,  0.0152,  0.8736,  ...,  0.0750, -0.2538, -0.1148],
          [ 1.0080,  0.0138,  0.8721,  ...,  0.0762, -0.2713, -0.1198]]],
        device='cuda:0'), values=tensor([0.], device='cuda:0'), chains=None),
 torch.Size([1, 160, 35]))

In [12]:
dataset[0], dataset[0].trajectories.shape

(Batch(trajectories=tensor([[ 0.0000e+00,  0.0000e+00,  8.5536e-01,  ..., -1.3425e-01,
          -4.7877e-03,  1.0873e-03],
         [ 3.6500e-03, -6.2240e-03,  8.5632e-01,  ..., -1.3036e-01,
           1.4385e-02,  1.5935e-02],
         [ 6.4000e-03, -1.2205e-02,  8.5748e-01,  ..., -1.3041e-01,
           3.6052e-02,  3.1853e-02],
         ...,
         [-2.4664e-01,  2.0540e+00,  8.4651e-01,  ...,  5.9186e-02,
          -2.0091e-01,  2.3318e-01],
         [-2.4351e-01,  2.0658e+00,  8.4672e-01,  ...,  7.3301e-02,
          -2.0666e-01,  2.1504e-01],
         [-2.3938e-01,  2.0782e+00,  8.4690e-01,  ...,  8.5008e-02,
          -2.1333e-01,  1.9921e-01]]), conditions={0: tensor([ 0.0000e+00,  0.0000e+00,  8.5536e-01,  9.9664e-01, -7.0010e-03,
          8.1570e-02,  5.7300e-04,  4.2304e-02, -5.6089e-02, -1.1727e-02,
         -1.4104e-02,  2.3588e-01,  3.7125e-01, -6.1110e-01, -9.2683e-02,
         -9.5419e-02,  5.8536e-01,  1.6999e-01,  8.6528e-02,  3.5411e-01,
          1.6022e-01, -2.

### Save results to logs

In [90]:
import numpy as np


def save_motions(sample, output_dir, filename="motion.npy"):
    filepath = os.path.join(output_dir, filename)
    pos_data = sample.trajectories.squeeze(0).cpu().numpy()
    np.save(filepath, pos_data)
    print(f"Motion saved as {filename}")


save_motions(sample, f"{savepath}/sampled_motions", filename="run-in-between-walk-motion.npy")

Motion saved as run-in-between-walk-motion.npy


In [80]:
savepath

'/home/kenji/Fyp/DeepMimic_mujoco/diffusion/logs/test-cartwheel-shuffled-seq'