### Setup env
This is notebook specific setup, my module path is different on my vm so this is a workaround

In [1]:
import sys
import os

path = os.path.abspath(os.path.join('..'))
if path not in sys.path:
    sys.path.append(path)

### Load Dataset
You can change the type of motion by changing the filepath
Dataset right now just repeats the same motion 100 times

In [4]:
from diffusion.data_loaders.motion_dataset import MotionDataset
dataset = MotionDataset("data/motions/humanoid3d_backflip.txt")
len(dataset), dataset[0], dataset[0].trajectories.shape

Tmp angle [0.0, 0.0, 0.886733, 0.9994119999999997, 0.029214999999999935, 0.017962999999999996, -0.0005250000000000012, 0.0008703311351828238, -0.01099955419316837, 0.0011488037112933414, 0.006692356321057513, 0.28207086609928633, -0.023509972489884845, -0.024408685227982754, -0.14478594217336963, -0.25155797447522277, 0.240463, 0.10908909272941009, -0.20003141435314956, -0.02851573566767156, 0.148934, -0.04895066252132076, 0.07777454918628952, 0.00022592685870272312, -0.014186, 0.013966981294680377, -0.07269661242438823, -0.21047301550502306, -0.04167931361131441, 0.013137096158641143, -0.025583069520627662, -0.027859, -0.023363972687569904, 0.016181526060614963, 0.2111699686528958] 35


(100,
 Batch(trajectories=tensor([[ 0.0000e+00,  0.0000e+00,  8.8673e-01,  9.9941e-01,  2.9215e-02,
           1.7963e-02, -5.2500e-04,  8.7033e-04, -1.1000e-02,  1.1488e-03,
           6.6924e-03,  2.8207e-01, -2.3510e-02, -2.4409e-02, -1.4479e-01,
          -2.5156e-01,  2.4046e-01,  1.0909e-01, -2.0003e-01, -2.8516e-02,
           1.4893e-01, -4.8951e-02,  7.7775e-02,  2.2593e-04, -1.4186e-02,
           1.3967e-02, -7.2697e-02, -2.1047e-01, -4.1679e-02,  1.3137e-02,
          -2.5583e-02, -2.7859e-02, -2.3364e-02,  1.6182e-02,  2.1117e-01],
         [-2.0268e-02, -7.3500e-04,  9.0938e-01,  9.9920e-01,  3.0395e-02,
           2.4696e-02,  8.4250e-03, -4.4505e-05,  2.2081e-03, -3.1660e-03,
           1.1586e-02,  2.9219e-01, -1.8050e-02, -2.5669e-01, -1.8148e-01,
          -4.4781e-01,  2.3335e-01,  3.7712e-01, -2.4100e-01,  1.8741e-01,
           1.7199e-01, -6.5981e-02,  1.4936e-02, -4.7221e-03, -1.4254e-02,
           4.3702e-02,  2.0182e-01, -2.0957e-01, -3.0643e-02, -5.7684e-02,

### Setup Model
Configure your experiment name and savepaths here, they will all be stored under the logs folder later on

In [5]:
import os
import torch
from diffusion.diffuser.utils import Trainer as dTrainer, Config as dConfig

exp_name = "test-constrained-sampling-holding-a-box-backflip"
savepath = f'/home/kenji/Fyp/DeepMimic_mujoco/diffusion/logs/{exp_name}'
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

if not os.path.exists(savepath):
    os.makedirs(savepath)
    os.makedirs(os.path.join(savepath, 'sampled_motions'))

In [6]:
from diffusion.diffuser.models.temporal import TemporalUnet 

horizon = dataset[0].trajectories.shape[0]
transition_dim = dataset[0].trajectories.shape[1]

model_config = dConfig(
    TemporalUnet,
    savepath=(savepath, 'model_config.pkl'),
    horizon=horizon,
    transition_dim=transition_dim,
    cond_dim=transition_dim,
    device=device,
)
model = model_config()


[utils/config ] Config: <class 'diffusion.diffuser.models.temporal.TemporalUnet'>
    cond_dim: 35
    horizon: 24
    transition_dim: 35

[ utils/config ] Saved config to: /home/kenji/Fyp/DeepMimic_mujoco/diffusion/logs/test-constrained-sampling-holding-a-box-backflip/model_config.pkl

[ models/temporal ] Channel dimensions: [(35, 32), (32, 64), (64, 128), (128, 256)]
[(35, 32), (32, 64), (64, 128), (128, 256)]


In [7]:
from diffusion.diffuser.models.diffusion import GaussianDiffusion

# model params, I am only using the very basic ones, some params are for conditioning
n_timesteps = 100
loss_type = 'l2'
clip_denoised = False
predict_epsilon = False
action_weight = 5
loss_weights = None
loss_discount = 1

diffusion_config = dConfig(
    GaussianDiffusion,
    savepath=(savepath, 'diffusion_config.pkl'),
    horizon=horizon,
    transition_dim=transition_dim,
    n_timesteps=n_timesteps,
    loss_type=loss_type,
    clip_denoised=clip_denoised,
    predict_epsilon=predict_epsilon,
    # loss weighting
    action_weight=action_weight,
    loss_weights=loss_weights,
    loss_discount=loss_discount,
    device=device,
)

diffusion = diffusion_config(model)


[utils/config ] Config: <class 'diffusion.diffuser.models.diffusion.GaussianDiffusion'>
    action_weight: 5
    clip_denoised: False
    horizon: 24
    loss_discount: 1
    loss_type: l2
    loss_weights: None
    n_timesteps: 100
    predict_epsilon: False
    transition_dim: 35

[ utils/config ] Saved config to: /home/kenji/Fyp/DeepMimic_mujoco/diffusion/logs/test-constrained-sampling-holding-a-box-backflip/diffusion_config.pkl



### Setup Training

In [8]:
learning_rate = 2e-4
gradient_accumulate_every = 2
ema_decay = 0.995
sample_freq = 2000
save_freq = 2000
n_train_steps = 1e5
n_saves = 5
save_parallel = False
bucket = None
n_reference = 8
train_batch_size = 32

trainer_config = dConfig(
    dTrainer,
    savepath=(savepath, 'trainer_config.pkl'),
    train_batch_size=train_batch_size,
    train_lr=learning_rate,
    gradient_accumulate_every=gradient_accumulate_every,
    ema_decay=ema_decay,
    sample_freq=sample_freq,
    save_freq=save_freq,
    label_freq=int(n_train_steps // n_saves),
    save_parallel=save_parallel,
    results_folder=savepath,
    bucket=bucket,
    n_reference=n_reference,
)

trainer = trainer_config(diffusion, dataset, renderer=None)


[utils/config ] Config: <class 'diffusion.diffuser.utils.training.Trainer'>
    bucket: None
    ema_decay: 0.995
    gradient_accumulate_every: 2
    label_freq: 20000
    n_reference: 8
    results_folder: /home/kenji/Fyp/DeepMimic_mujoco/diffusion/logs/test-constrained-sampling-holding-a-box-backflip
    sample_freq: 2000
    save_freq: 2000
    save_parallel: False
    train_batch_size: 32
    train_lr: 0.0002

[ utils/config ] Saved config to: /home/kenji/Fyp/DeepMimic_mujoco/diffusion/logs/test-constrained-sampling-holding-a-box-backflip/trainer_config.pkl



### Test if model and training loop works

In [9]:
import torch
from diffuser.utils import batchify

tunet = model
tunet.to(device)
test_data = dataset[0]
batch = batchify(test_data)
t = torch.randint(0, 1000, (1,), device=device).long().to(device)
res = tunet(batch.trajectories, cond=batch.conditions, time=t, verbose=True)
res, res.shape

x.shape torch.Size([1, 24, 35])
x.shape torch.Size([1, 35, 24])
x.shape torch.Size([1, 32, 24])
1
xfinal.shape torch.Size([1, 32, 12])
x.shape torch.Size([1, 64, 12])
2
xfinal.shape torch.Size([1, 64, 6])
x.shape torch.Size([1, 128, 6])
3
xfinal.shape torch.Size([1, 128, 3])
x.shape torch.Size([1, 256, 3])
4
xfinal.shape torch.Size([1, 256, 3])
xt1.shape torch.Size([1, 256, 3])
xt2.shape torch.Size([1, 256, 3])
xt3.shape torch.Size([1, 256, 3])
pop.shape torch.Size([1, 256, 3])
pop.shape torch.Size([1, 128, 6])
pop.shape torch.Size([1, 64, 12])


(tensor([[[-4.1522e-02, -2.9378e-01,  4.7054e-02,  4.9391e-02,  3.3728e-01,
            1.7558e-01,  1.1479e-01,  5.2736e-02, -1.1865e-01,  1.0362e-02,
           -6.2314e-02,  3.4271e-01,  1.5515e-01,  9.3412e-03, -4.0863e-02,
           -3.5339e-01,  2.5662e-02,  7.4038e-03,  1.8390e-01,  2.8440e-02,
            9.7992e-02,  5.6724e-02, -1.1457e-01, -6.5287e-02,  2.4687e-01,
           -7.9911e-02,  1.5765e-01,  1.1945e-02, -9.7975e-02, -6.2015e-02,
            2.9638e-01,  9.2851e-02, -1.4049e-01, -5.5358e-02, -2.5892e-01],
          [-1.5442e-01, -3.6545e-01,  9.7571e-02,  2.3617e-01,  5.1139e-01,
            2.8877e-01,  1.6119e-01,  1.9594e-01,  1.4865e-01, -7.3018e-02,
            1.6743e-01,  7.1455e-02,  9.1255e-02, -1.0741e-01,  5.1165e-02,
           -2.1315e-01, -1.3991e-01,  3.8541e-01,  6.5951e-02,  1.3707e-01,
            1.2167e-01,  6.4475e-02,  4.2383e-02,  2.5059e-01, -1.7989e-01,
            1.1024e-01,  2.2004e-02,  1.5315e-01, -9.8456e-02, -3.0135e-01,
           

In [10]:
from diffuser.utils import report_parameters, batchify

report_parameters(model)

print('Testing forward...', end=' ', flush=True)
x = dataset[0]
batch = batchify(x)
loss, _ = diffusion.loss(batch.trajectories, {})
loss.backward()
print('✓')

[ utils/arrays ] Total parameters: 3.96 M
         downs.3.0.blocks.1.block.0.weight: 327.68 k | Conv1d(256, 256, kernel_size=(5,), stride=(1,), padding=(2,))
         downs.3.1.blocks.0.block.0.weight: 327.68 k | Conv1d(256, 256, kernel_size=(5,), stride=(1,), padding=(2,))
         downs.3.1.blocks.1.block.0.weight: 327.68 k | Conv1d(256, 256, kernel_size=(5,), stride=(1,), padding=(2,))
         ups.0.0.blocks.0.block.0.weight: 327.68 k | Conv1d(512, 128, kernel_size=(5,), stride=(1,), padding=(2,))
         mid_block1.blocks.0.block.0.weight: 327.68 k | Conv1d(256, 256, kernel_size=(5,), stride=(1,), padding=(2,))
         mid_block1.blocks.1.block.0.weight: 327.68 k | Conv1d(256, 256, kernel_size=(5,), stride=(1,), padding=(2,))
         mid_block2.blocks.0.block.0.weight: 327.68 k | Conv1d(256, 256, kernel_size=(5,), stride=(1,), padding=(2,))
         mid_block2.blocks.1.block.0.weight: 327.68 k | Conv1d(256, 256, kernel_size=(5,), stride=(1,), padding=(2,))
         downs.3.0.b

### Train Model
It took me 80s to run 1 epoch and results were pretty good from just 1 epoch

In [11]:
n_steps_per_epoch = 1000
n_epochs = int(n_train_steps // n_steps_per_epoch)
print(n_epochs)
n_epochs = 1

for i in range(n_epochs):
    print(f'Epoch {i} / {n_epochs} | {savepath}')
    trainer.train(n_train_steps=n_steps_per_epoch)

trainer.save(n_epochs)

100
Epoch 0 / 1 | /home/kenji/Fyp/DeepMimic_mujoco/diffusion/logs/test-constrained-sampling-holding-a-box-backflip
[ utils/training ] Saved model to /home/kenji/Fyp/DeepMimic_mujoco/diffusion/logs/test-constrained-sampling-holding-a-box-backflip/state_0.pt
0:   0.4084 | a0_loss:   0.0366 | t:   0.4417
100:   0.1645 | a0_loss:   0.0260 | t:   8.3628
200:   0.1279 | a0_loss:   0.0237 | t:   8.3448
300:   0.1126 | a0_loss:   0.0227 | t:   8.4424
400:   0.1062 | a0_loss:   0.0225 | t:   8.6351
500:   0.1037 | a0_loss:   0.0225 | t:   8.4802
600:   0.1027 | a0_loss:   0.0225 | t:   8.3252
700:   0.1023 | a0_loss:   0.0224 | t:   8.3139
800:   0.1021 | a0_loss:   0.0224 | t:   8.6167
900:   0.1019 | a0_loss:   0.0224 | t:   8.3957
[ utils/training ] Saved model to /home/kenji/Fyp/DeepMimic_mujoco/diffusion/logs/test-constrained-sampling-holding-a-box-backflip/state_1.pt


### (Optionally) load a checkpoint

In [11]:
# from diffusion.diffuser.utils import load_diffusion
# diffusion_experiment = load_diffusion(
#     "/home/kenji/Fyp/DeepMimic_mujoco/diffusion/logs/diffuser", dataset=dataset, epoch=1)

# renderer = diffusion_experiment.renderer
# model = diffusion_experiment.trainer.ema_model

# Optionally load a checkpoint
trainer.load(1)
model = trainer.ema_model

### Sample from model

In [12]:
from diffuser.utils import batchify
test = dataset[0]
batch = batchify(test)
sample = diffusion.p_sample_loop(batch.trajectories.shape, batch.conditions)


[F                                                                                                    
[F1 / 100 [                                                            ]   1% | 2.6 Hz
t : 99 | vmax : 0.0 | vmin : 0.0
[F[F                                                                                                    
                                                                                                    
[F[F2 / 100 [#                                                           ]   2% | 5.1 Hz
t : 98 | vmax : 0.0 | vmin : 0.0
[F[F                                                                                                    
                                                                                                    
[F[F3 / 100 [#                                                           ]   3% | 7.5 Hz
t : 97 | vmax : 0.0 | vmin : 0.0
[F[F                                                                                                    
   

### Sanity check to check if output is similar to train data

In [13]:
sample, sample.trajectories.shape

(Sample(trajectories=tensor([[[-6.3426e-03, -1.4106e-02,  8.8482e-01,  1.0083e+00,  2.8225e-02,
            1.4014e-02,  2.3145e-03, -5.0685e-03, -1.6455e-02,  8.6228e-03,
            2.3576e-03,  2.7270e-01, -2.1090e-02,  0.0000e+00,  0.0000e+00,
            0.0000e+00,  1.5700e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
            1.5700e+00, -4.5776e-02,  7.8547e-02, -9.3025e-04, -1.7629e-02,
            2.6840e-02, -6.6006e-02, -2.1890e-01, -4.1979e-02,  5.9533e-03,
           -2.5995e-02, -2.5347e-02, -2.7208e-02,  2.6668e-02,  2.0306e-01],
          [-1.7171e-02,  1.6941e-02,  9.1844e-01,  9.9335e-01,  1.0770e-02,
            6.4097e-02, -8.0007e-03,  2.9374e-03,  8.3548e-03, -3.6005e-03,
            1.6575e-02,  3.0403e-01, -2.3058e-02,  0.0000e+00,  0.0000e+00,
            0.0000e+00,  1.5700e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
            1.5700e+00, -5.2275e-02, -5.1161e-03, -8.5535e-03,  3.3802e-03,
            3.5108e-02,  2.0444e-01, -1.8327e-01, -2.2429e-02, -5.5

In [14]:
dataset[0], dataset[0].trajectories.shape

(Batch(trajectories=tensor([[ 0.0000e+00,  0.0000e+00,  8.8673e-01,  9.9941e-01,  2.9215e-02,
           1.7963e-02, -5.2500e-04,  8.7033e-04, -1.1000e-02,  1.1488e-03,
           6.6924e-03,  2.8207e-01, -2.3510e-02, -2.4409e-02, -1.4479e-01,
          -2.5156e-01,  2.4046e-01,  1.0909e-01, -2.0003e-01, -2.8516e-02,
           1.4893e-01, -4.8951e-02,  7.7775e-02,  2.2593e-04, -1.4186e-02,
           1.3967e-02, -7.2697e-02, -2.1047e-01, -4.1679e-02,  1.3137e-02,
          -2.5583e-02, -2.7859e-02, -2.3364e-02,  1.6182e-02,  2.1117e-01],
         [-2.0268e-02, -7.3500e-04,  9.0938e-01,  9.9920e-01,  3.0395e-02,
           2.4696e-02,  8.4250e-03, -4.4505e-05,  2.2081e-03, -3.1660e-03,
           1.1586e-02,  2.9219e-01, -1.8050e-02, -2.5669e-01, -1.8148e-01,
          -4.4781e-01,  2.3335e-01,  3.7712e-01, -2.4100e-01,  1.8741e-01,
           1.7199e-01, -6.5981e-02,  1.4936e-02, -4.7221e-03, -1.4254e-02,
           4.3702e-02,  2.0182e-01, -2.0957e-01, -3.0643e-02, -5.7684e-02,
     

### Check if constrained sampling is working

The values at index 13 to 20 should be 0 to look like its carrying a box

In [15]:
print(sample[0][0][0])

tensor([-6.3426e-03, -1.4106e-02,  8.8482e-01,  1.0083e+00,  2.8225e-02,
         1.4014e-02,  2.3145e-03, -5.0685e-03, -1.6455e-02,  8.6228e-03,
         2.3576e-03,  2.7270e-01, -2.1090e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  1.5700e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         1.5700e+00, -4.5776e-02,  7.8547e-02, -9.3025e-04, -1.7629e-02,
         2.6840e-02, -6.6006e-02, -2.1890e-01, -4.1979e-02,  5.9533e-03,
        -2.5995e-02, -2.5347e-02, -2.7208e-02,  2.6668e-02,  2.0306e-01],
       device='cuda:0')


### Save results to logs

In [16]:
import numpy as np


def save_motions(sample, output_dir, filename="motion.npy"):
    filepath = os.path.join(output_dir, filename)
    pos_data = sample.trajectories.squeeze(0).cpu().numpy()
    np.save(filepath, pos_data)
    print(f"Motion {i} saved as {filename}")


save_motions(sample, f"{savepath}/sampled_motions", filename="motion1.npy")

Motion 0 saved as motion1.npy
