### Setup env
This is notebook specific setup, my module path is different on my vm so this is a workaround

In [3]:
import sys
import os

path = os.path.abspath(os.path.join('..'))
if path not in sys.path:
    sys.path.append(path)

### Load Dataset
You can change the type of motion by changing the filepath
Dataset right now just repeats the same motion 100 times

In [18]:
from diffusion.data_loaders.motion_dataset import MotionDataset
dataset = MotionDataset("data/motions/humanoid3d_dance_b.txt")
len(dataset), dataset[0], dataset[0].trajectories.shape

Tmp angle [0.0, 0.0, 0.78376018, 0.9808313384999998, 0.1189326497, 0.15323081459999996, -0.018580306399999996, -0.05478197593616119, -0.026172051539739314, 0.014375909756315875, -0.19993647449072474, 0.3285354293520373, 0.1906637990847742, 0.36137716110543394, -0.7436360474914797, 1.8424103204285334, 0.8445403907, 0.09932125112503785, 0.30724154600497, 0.35361953315845085, 0.4740349157, 0.223112496519707, 0.10180857617828531, -0.14935436908675828, -0.1758576301, 0.26386327995533265, 0.025550573607299266, 0.26352809503472835, -0.3431028336232349, -0.6613714314360651, 0.07508298155110528, -0.9549970431, -0.118875289148878, -0.5143185800783003, -0.16694505887179326] 35


(100,
 Batch(trajectories=tensor([[ 0.0000,  0.0000,  0.7838,  ..., -0.1189, -0.5143, -0.1669],
         [ 0.0029, -0.0054,  0.7806,  ..., -0.1315, -0.5176, -0.1904],
         [ 0.0060, -0.0111,  0.7779,  ..., -0.1396, -0.5187, -0.2048],
         ...,
         [ 0.1405, -0.0450,  0.7955,  ..., -0.2444, -0.5266, -0.2034],
         [ 0.1402, -0.0486,  0.7909,  ..., -0.2500, -0.5194, -0.2263],
         [ 0.1402, -0.0525,  0.7857,  ..., -0.2550, -0.5115, -0.2476]]), conditions={0: tensor([ 0.0000,  0.0000,  0.7838,  0.9808,  0.1189,  0.1532, -0.0186, -0.0548,
         -0.0262,  0.0144, -0.1999,  0.3285,  0.1907,  0.3614, -0.7436,  1.8424,
          0.8445,  0.0993,  0.3072,  0.3536,  0.4740,  0.2231,  0.1018, -0.1494,
         -0.1759,  0.2639,  0.0256,  0.2635, -0.3431, -0.6614,  0.0751, -0.9550,
         -0.1189, -0.5143, -0.1669])}),
 torch.Size([152, 35]))

### Setup Model
Configure your experiment name and savepaths here, they will all be stored under the logs folder later on

In [5]:
import os
import torch
from diffusion.diffuser.utils import Trainer as dTrainer, Config as dConfig

exp_name = "test-constrained-sampling-holding-a-box-dance-a"
savepath = f'/home/kenji/Fyp/DeepMimic_mujoco/diffusion/logs/{exp_name}'
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

if not os.path.exists(savepath):
    os.makedirs(savepath)
    os.makedirs(os.path.join(savepath, 'sampled_motions'))

In [6]:
from diffusion.diffuser.models.temporal import TemporalUnet 

horizon = dataset[0].trajectories.shape[0]
transition_dim = dataset[0].trajectories.shape[1]

model_config = dConfig(
    TemporalUnet,
    savepath=(savepath, 'model_config.pkl'),
    horizon=horizon,
    transition_dim=transition_dim,
    cond_dim=transition_dim,
    device=device,
)
model = model_config()


[utils/config ] Config: <class 'diffusion.diffuser.models.temporal.TemporalUnet'>
    cond_dim: 35
    horizon: 96
    transition_dim: 35

[ utils/config ] Saved config to: /home/kenji/Fyp/DeepMimic_mujoco/diffusion/logs/test-constrained-sampling-holding-a-box-dance-a/model_config.pkl

[ models/temporal ] Channel dimensions: [(35, 32), (32, 64), (64, 128), (128, 256)]
[(35, 32), (32, 64), (64, 128), (128, 256)]


In [7]:
from diffusion.diffuser.models.diffusion import GaussianDiffusion

# model params, I am only using the very basic ones, some params are for conditioning
n_timesteps = 100
loss_type = 'l2'
clip_denoised = False
predict_epsilon = False
action_weight = 5
loss_weights = None
loss_discount = 1

diffusion_config = dConfig(
    GaussianDiffusion,
    savepath=(savepath, 'diffusion_config.pkl'),
    horizon=horizon,
    transition_dim=transition_dim,
    n_timesteps=n_timesteps,
    loss_type=loss_type,
    clip_denoised=clip_denoised,
    predict_epsilon=predict_epsilon,
    # loss weighting
    action_weight=action_weight,
    loss_weights=loss_weights,
    loss_discount=loss_discount,
    device=device,
)

diffusion = diffusion_config(model)


[utils/config ] Config: <class 'diffusion.diffuser.models.diffusion.GaussianDiffusion'>
    action_weight: 5
    clip_denoised: False
    horizon: 96
    loss_discount: 1
    loss_type: l2
    loss_weights: None
    n_timesteps: 100
    predict_epsilon: False
    transition_dim: 35

[ utils/config ] Saved config to: /home/kenji/Fyp/DeepMimic_mujoco/diffusion/logs/test-constrained-sampling-holding-a-box-dance-a/diffusion_config.pkl



### Setup Training

In [8]:
learning_rate = 2e-4
gradient_accumulate_every = 2
ema_decay = 0.995
sample_freq = 2000
save_freq = 2000
n_train_steps = 1e5
n_saves = 5
save_parallel = False
bucket = None
n_reference = 8
train_batch_size = 32

trainer_config = dConfig(
    dTrainer,
    savepath=(savepath, 'trainer_config.pkl'),
    train_batch_size=train_batch_size,
    train_lr=learning_rate,
    gradient_accumulate_every=gradient_accumulate_every,
    ema_decay=ema_decay,
    sample_freq=sample_freq,
    save_freq=save_freq,
    label_freq=int(n_train_steps // n_saves),
    save_parallel=save_parallel,
    results_folder=savepath,
    bucket=bucket,
    n_reference=n_reference,
)

trainer = trainer_config(diffusion, dataset, renderer=None)


[utils/config ] Config: <class 'diffusion.diffuser.utils.training.Trainer'>
    bucket: None
    ema_decay: 0.995
    gradient_accumulate_every: 2
    label_freq: 20000
    n_reference: 8
    results_folder: /home/kenji/Fyp/DeepMimic_mujoco/diffusion/logs/test-constrained-sampling-holding-a-box-dance-a
    sample_freq: 2000
    save_freq: 2000
    save_parallel: False
    train_batch_size: 32
    train_lr: 0.0002

[ utils/config ] Saved config to: /home/kenji/Fyp/DeepMimic_mujoco/diffusion/logs/test-constrained-sampling-holding-a-box-dance-a/trainer_config.pkl



### Test if model and training loop works

In [9]:
import torch
from diffuser.utils import batchify

tunet = model
tunet.to(device)
test_data = dataset[0]
batch = batchify(test_data)
t = torch.randint(0, 1000, (1,), device=device).long().to(device)
res = tunet(batch.trajectories, cond=batch.conditions, time=t, verbose=True)
res, res.shape

x.shape torch.Size([1, 96, 35])
x.shape torch.Size([1, 35, 96])
x.shape torch.Size([1, 32, 96])
1
xfinal.shape torch.Size([1, 32, 48])
x.shape torch.Size([1, 64, 48])
2
xfinal.shape torch.Size([1, 64, 24])
x.shape torch.Size([1, 128, 24])
3
xfinal.shape torch.Size([1, 128, 12])
x.shape torch.Size([1, 256, 12])
4
xfinal.shape torch.Size([1, 256, 12])
xt1.shape torch.Size([1, 256, 12])
xt2.shape torch.Size([1, 256, 12])
xt3.shape torch.Size([1, 256, 12])
pop.shape torch.Size([1, 256, 12])
pop.shape torch.Size([1, 128, 24])
pop.shape torch.Size([1, 64, 48])


(tensor([[[ 0.4984, -0.5606, -0.3092,  ..., -0.3186,  0.1175,  0.5711],
          [ 0.3266,  0.0279, -0.1262,  ..., -0.4264, -0.1249,  0.2344],
          [ 0.5482, -0.0678, -0.4978,  ...,  0.3667,  0.3144,  0.6020],
          ...,
          [-0.2615,  0.9959, -0.1522,  ..., -0.2135,  0.9933,  0.1541],
          [ 0.7626,  0.2618, -0.3036,  ..., -0.2087,  0.2411,  0.1918],
          [ 0.2449,  0.2712, -0.4646,  ..., -0.0249,  0.4258,  0.3678]]],
        device='cuda:0', grad_fn=<PermuteBackward0>),
 torch.Size([1, 96, 35]))

In [10]:
from diffuser.utils import report_parameters, batchify

report_parameters(model)

print('Testing forward...', end=' ', flush=True)
x = dataset[0]
batch = batchify(x)
loss, _ = diffusion.loss(batch.trajectories, {})
loss.backward()
print('✓')

[ utils/arrays ] Total parameters: 3.96 M
         downs.3.0.blocks.1.block.0.weight: 327.68 k | Conv1d(256, 256, kernel_size=(5,), stride=(1,), padding=(2,))
         downs.3.1.blocks.0.block.0.weight: 327.68 k | Conv1d(256, 256, kernel_size=(5,), stride=(1,), padding=(2,))
         downs.3.1.blocks.1.block.0.weight: 327.68 k | Conv1d(256, 256, kernel_size=(5,), stride=(1,), padding=(2,))
         ups.0.0.blocks.0.block.0.weight: 327.68 k | Conv1d(512, 128, kernel_size=(5,), stride=(1,), padding=(2,))
         mid_block1.blocks.0.block.0.weight: 327.68 k | Conv1d(256, 256, kernel_size=(5,), stride=(1,), padding=(2,))
         mid_block1.blocks.1.block.0.weight: 327.68 k | Conv1d(256, 256, kernel_size=(5,), stride=(1,), padding=(2,))
         mid_block2.blocks.0.block.0.weight: 327.68 k | Conv1d(256, 256, kernel_size=(5,), stride=(1,), padding=(2,))
         mid_block2.blocks.1.block.0.weight: 327.68 k | Conv1d(256, 256, kernel_size=(5,), stride=(1,), padding=(2,))
         downs.3.0.b

### Train Model
It took me 80s to run 1 epoch and results were pretty good from just 1 epoch

In [11]:
n_steps_per_epoch = 1000
n_epochs = int(n_train_steps // n_steps_per_epoch)
print(n_epochs)
n_epochs = 1

for i in range(n_epochs):
    print(f'Epoch {i} / {n_epochs} | {savepath}')
    trainer.train(n_train_steps=n_steps_per_epoch)

trainer.save(n_epochs)

100
Epoch 0 / 1 | /home/kenji/Fyp/DeepMimic_mujoco/diffusion/logs/test-constrained-sampling-holding-a-box-dance-a
[ utils/training ] Saved model to /home/kenji/Fyp/DeepMimic_mujoco/diffusion/logs/test-constrained-sampling-holding-a-box-dance-a/state_0.pt
0:   0.3678 | a0_loss:   0.1674 | t:   0.4784
100:   0.2190 | a0_loss:   0.1309 | t:   8.5328
200:   0.2110 | a0_loss:   0.1288 | t:   8.4568
300:   0.2086 | a0_loss:   0.1282 | t:   8.3370
400:   0.2078 | a0_loss:   0.1280 | t:   8.6017
500:   0.2075 | a0_loss:   0.1279 | t:   8.4035
600:   0.2072 | a0_loss:   0.1278 | t:   8.6204
700:   0.2071 | a0_loss:   0.1278 | t:   8.3511
800:   0.2070 | a0_loss:   0.1278 | t:   8.3714
900:   0.2069 | a0_loss:   0.1277 | t:   8.2623
[ utils/training ] Saved model to /home/kenji/Fyp/DeepMimic_mujoco/diffusion/logs/test-constrained-sampling-holding-a-box-dance-a/state_1.pt


### (Optionally) load a checkpoint

In [12]:
# from diffusion.diffuser.utils import load_diffusion
# diffusion_experiment = load_diffusion(
#     "/home/kenji/Fyp/DeepMimic_mujoco/diffusion/logs/diffuser", dataset=dataset, epoch=1)

# renderer = diffusion_experiment.renderer
# model = diffusion_experiment.trainer.ema_model

# Optionally load a checkpoint
trainer.load(1)
model = trainer.ema_model

### Sample from model

In [13]:
from diffuser.utils import batchify
test = dataset[0]
batch = batchify(test)
sample = diffusion.p_sample_loop(batch.trajectories.shape, batch.conditions)


[F                                                                                                    
[F1 / 100 [                                                            ]   1% | 4.5 Hz
t : 99 | vmax : 0.0 | vmin : 0.0
[F[F                                                                                                    
                                                                                                    
[F[F2 / 100 [#                                                           ]   2% | 8.5 Hz
t : 98 | vmax : 0.0 | vmin : 0.0
[F[F                                                                                                    
                                                                                                    
[F[F3 / 100 [#                                                           ]   3% | 12.3 Hz
t : 97 | vmax : 0.0 | vmin : 0.0
[F[F                                                                                                    
  

### Sanity check to check if output is similar to train data

In [14]:
sample, sample.trajectories.shape

(Sample(trajectories=tensor([[[-0.0024,  0.0256,  0.8210,  ..., -0.1627,  0.0534,  0.4475],
          [ 0.0069,  0.0330,  0.8048,  ..., -0.1494, -0.0576,  0.4609],
          [-0.0047,  0.0363,  0.8009,  ..., -0.0756, -0.1584,  0.4538],
          ...,
          [ 0.1604,  1.0353,  0.9207,  ..., -0.2798,  0.4971,  0.3876],
          [ 0.1631,  0.9910,  0.9145,  ..., -0.2420,  0.3584,  0.3468],
          [ 0.1046,  1.0092,  0.8873,  ..., -0.1935,  0.2311,  0.3411]]],
        device='cuda:0'), values=tensor([0.], device='cuda:0'), chains=None),
 torch.Size([1, 96, 35]))

In [15]:
dataset[0], dataset[0].trajectories.shape

(Batch(trajectories=tensor([[ 0.0000,  0.0000,  0.8412,  ..., -0.1657,  0.0620,  0.4363],
         [-0.0042,  0.0209,  0.8239,  ..., -0.1308, -0.0612,  0.4480],
         [-0.0077,  0.0406,  0.8073,  ..., -0.0710, -0.1537,  0.4579],
         ...,
         [ 0.1867,  1.0700,  0.9073,  ..., -0.2866,  0.4868,  0.3938],
         [ 0.1925,  1.0912,  0.8936,  ..., -0.2435,  0.3195,  0.3669],
         [ 0.1964,  1.1110,  0.8786,  ..., -0.2187,  0.1812,  0.3456]]), conditions={0: tensor([ 0.0000e+00,  0.0000e+00,  8.4124e-01,  9.6470e-01,  1.3950e-01,
          2.2107e-01, -3.1969e-02, -1.7036e-01,  3.3696e-01, -7.5271e-02,
         -4.6382e-01, -3.1054e-01,  5.7588e-01,  2.9469e+00, -6.9392e-01,
         -2.2105e+00,  8.0813e-01,  2.3368e+00,  9.4218e-05,  5.6050e-01,
          1.7677e-01, -6.8921e-02, -2.9141e-01, -7.8512e-01, -8.8701e-01,
          6.5749e-01,  1.6143e-01, -2.8303e-01,  6.4449e-03, -7.9685e-01,
          4.2248e-01, -5.1729e-01, -1.6569e-01,  6.1982e-02,  4.3629e-01])}),
 to

### Check if constrained sampling is working

The values at index 13 to 20 should be 0 to look like its carrying a box

In [16]:
print(sample[0][0][0])

tensor([-0.0024,  0.0256,  0.8210,  0.9615,  0.1420,  0.2100, -0.0322, -0.1775,
         0.3124, -0.0610, -0.4714, -0.3113,  0.5670,  0.0000,  0.0000,  0.0000,
         1.5700,  0.0000,  0.0000,  0.0000,  1.5700, -0.0451, -0.3067, -0.7929,
        -0.8872,  0.6484,  0.1866, -0.2729, -0.0048, -0.7787,  0.4097, -0.5273,
        -0.1627,  0.0534,  0.4475], device='cuda:0')


### Save results to logs

In [17]:
import numpy as np


def save_motions(sample, output_dir, filename="motion.npy"):
    filepath = os.path.join(output_dir, filename)
    pos_data = sample.trajectories.squeeze(0).cpu().numpy()
    np.save(filepath, pos_data)
    print(f"Motion {i} saved as {filename}")


save_motions(sample, f"{savepath}/sampled_motions", filename="motion1.npy")

Motion 0 saved as motion1.npy
