In [1]:
import sys
import os

path = os.path.abspath(os.path.join('..'))
if path not in sys.path:
    sys.path.append(path)

In [2]:
from diffusion.data_loaders.spinkick_frame_dataset import SpinkickFramesDataset
dataset = SpinkickFramesDataset("/home/kenji/Fyp/DeepMimic_mujoco/diffusion/data/motions/humanoid3d_spinkick.txt")
len(dataset), dataset[0].shape

(78, torch.Size([78, 44]))

In [3]:
dataset[0]

tensor([[ 0.0167,  0.0000,  0.8251,  ..., -0.3155,  0.2235,  1.4452],
        [ 0.0167, -0.0098,  0.8223,  ..., -0.2932,  0.2308,  1.4881],
        [ 0.0167, -0.0205,  0.8187,  ..., -0.2659,  0.2363,  1.5274],
        ...,
        [ 0.0167,  0.4499,  0.7884,  ..., -0.3115,  0.1044,  1.2661],
        [ 0.0167,  0.4598,  0.8026,  ..., -0.2961,  0.0695,  1.1802],
        [ 0.0000,  0.4689,  0.8251,  ..., -0.2681,  0.0327,  1.1281]])

### Model NN

In [4]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=1000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-np.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)

        self.register_buffer('pe', pe)

    def forward(self, x):
        # not used in the final model
        x = x + self.pe[:x.shape[0], :]
        return self.dropout(x)


class TimestepEmbedder(nn.Module):
    def __init__(self, latent_dim, sequence_pos_encoder):
        super().__init__()
        self.latent_dim = latent_dim
        self.sequence_pos_encoder = sequence_pos_encoder

        time_embed_dim = self.latent_dim
        self.time_embed = nn.Sequential(
            nn.Linear(self.latent_dim, time_embed_dim),
            nn.SiLU(),
            nn.Linear(time_embed_dim, time_embed_dim),
        )

    def forward(self, timesteps):
        return self.time_embed(self.sequence_pos_encoder.pe[timesteps])

In [19]:
class MotionTransformer(nn.Module):
    def __init__(self, frame_dim, latent_dim=256, ff_size=1024, num_layers=8, num_heads=4, dropout=0.1, activation="gelu"):
        super(MotionTransformer, self).__init__()
        
        # self.nfeats = nfeats
        self.frame_dim = frame_dim
        self.latent_dim = latent_dim
        self.ff_size = ff_size  
        self.dropout = dropout

        self.frameEmbedding = nn.Linear(self.frame_dim, self.latent_dim)
        self.sequence_pos_encoder = PositionalEncoding(self.latent_dim, self.dropout)
        self.embed_timestep = TimestepEmbedder(self.latent_dim, self.sequence_pos_encoder)

        # Transformer Encoder
        encoder_layers = nn.TransformerEncoderLayer(d_model=self.latent_dim, nhead=num_heads, 
                                                    dim_feedforward=ff_size, dropout=dropout, activation=activation, batch_first=True)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers=num_layers)

        # Output Linear Layer
        self.frameOutEmbedding = nn.Linear(self.latent_dim, self.frame_dim)

    def forward(self, x: torch.Tensor, timesteps, y=None, verbose=False):
        """
        x: [batch_size, max_frames, n_feats], denoted x_t in the paper
        timesteps: [batch_size] (int)
        """
        # x: [batch_size, seq_len, nfeats]
        emb = self.embed_timestep(timesteps)  # [bs, n_frames, time_embed_dim]
        if verbose:
            print("Emb", emb.shape)
            print(emb)

        x = self.frameEmbedding(x) # [bs, n_frames, n_dim]
        if verbose:
            print("Frame Embedding", x.shape)
            print(x)

        # Transformer Encoder
        # adding the timestep embed
        xseq = torch.cat((emb, x), axis=1)  # [bs, n_frames+1, n_dim]
        if verbose:
            print("Concat x and zkx", xseq.shape)
            print(xseq)

        xseq = self.sequence_pos_encoder(xseq)  # [bs, n_frames+1, n_dim]
        if verbose:
            print("Sequence Pos Encoder", xseq.shape)
            print(xseq)

        output = self.transformer_encoder(xseq)[:, 1:, :]  # , src_key_padding_mask=~maskseq)  # [bs, n_frames, n_dim]
        if verbose:
            print("Transformer Encoder", output.shape)
            print(output)

        # Output Linear
        output = self.frameOutEmbedding(output)
        if verbose:
            print("Output Embedding", output.shape)
            print(output)

        return output

In [6]:
from torch.utils.data import DataLoader

batch_size = 1
dataloader = DataLoader(
        dataset, batch_size=batch_size, shuffle=True,
        num_workers=8, drop_last=True)

frame_dim = dataset[0].shape[1]
duration = dataset[0][0][0]

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = MotionTransformer(frame_dim=frame_dim, latent_dim=64, ff_size=256, num_layers=8, num_heads=4, dropout=0.1, activation="gelu").to(device)

frame_dim, duration

(44, tensor(0.0167))

In [7]:
test_sample = dataset[0].unsqueeze(0).to(device)    
print(test_sample.shape)

res = model(test_sample, torch.tensor([0]).to(device), verbose=True)
res.shape

torch.Size([1, 78, 44])
Emb torch.Size([1, 1, 64])
tensor([[[ 0.1355,  0.1509, -0.0536,  0.2472,  0.0963,  0.0171, -0.0135,
           0.0531, -0.1553,  0.1024,  0.2645,  0.1261, -0.2497, -0.1488,
           0.1749, -0.2271, -0.2412,  0.0258,  0.0690, -0.0830, -0.2255,
          -0.1366, -0.0173, -0.1064, -0.1128,  0.0208,  0.1736, -0.1189,
           0.1080, -0.1464,  0.0616, -0.1079,  0.2156,  0.0233, -0.0911,
           0.2576, -0.1041,  0.1393, -0.1299, -0.0093,  0.0583, -0.0830,
           0.0744, -0.1979, -0.1126, -0.0234,  0.1259, -0.0041, -0.0150,
          -0.1221,  0.0012,  0.0747,  0.0890,  0.1052,  0.2678,  0.0535,
           0.1055,  0.0157, -0.1330,  0.0818,  0.0618, -0.0543,  0.0592,
           0.0396]]], device='cuda:0', grad_fn=<ViewBackward0>)
Frame Embedding torch.Size([1, 78, 64])
tensor([[[-0.5846,  0.1858,  0.3289,  ...,  0.0328,  0.1630, -0.5043],
         [-0.5850,  0.2093,  0.3227,  ...,  0.0335,  0.1627, -0.4947],
         [-0.5819,  0.2296,  0.3149,  ...,  0.

torch.Size([1, 78, 44])

In [8]:
from diffusion.diffusion import gaussian_diffusion as gd
from diffusion.diffusion.respace import SpacedDiffusion, space_timesteps

def create_gaussian_diffusion(
        diffusion_steps, # number eg 1000
        noise_schedule, # can be 'linear', 'cosine'
        sigma_small, # default True
        lambda_vel, lambda_rcxyz, lambda_fc # for geometric loss, we don't have fc, default 1 for rest
        ):
    # default params
    predict_xstart = True  # we always predict x_start (a.k.a. x0), that's our deal!
    steps = diffusion_steps
    scale_beta = 1.  # no scaling
    timestep_respacing = ''  # can be used for ddim sampling, we don't use it.
    learn_sigma = False
    rescale_timesteps = False

    betas = gd.get_named_beta_schedule(noise_schedule, steps, scale_beta)
    loss_type = gd.LossType.MSE

    if not timestep_respacing:
        timestep_respacing = [steps]

    return SpacedDiffusion(
        use_timesteps=space_timesteps(steps, timestep_respacing),
        betas=betas,
        model_mean_type=(
            gd.ModelMeanType.EPSILON if not predict_xstart else gd.ModelMeanType.START_X
        ),
        model_var_type=(
            (
                gd.ModelVarType.FIXED_LARGE
                if not sigma_small
                else gd.ModelVarType.FIXED_SMALL
            )
            if not learn_sigma
            else gd.ModelVarType.LEARNED_RANGE
        ),
        loss_type=loss_type,
        rescale_timesteps=rescale_timesteps,
        lambda_vel=lambda_vel,
        lambda_rcxyz=lambda_rcxyz,
        lambda_fc=lambda_fc,
    )

In [21]:
model = MotionTransformer(frame_dim=frame_dim, latent_dim=64, ff_size=128, num_layers=4, num_heads=4, dropout=0.1, activation="gelu").to(device)
diffusion = create_gaussian_diffusion(diffusion_steps=1000, noise_schedule="cosine", sigma_small=True, lambda_vel=0.5, lambda_rcxyz=0.5, lambda_fc=1)

In [12]:
class DefaultArgs:
    def __init__(self, save_dir, model_path, eval_model_path):
        # Base options
        self.cuda = True
        self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
        self.seed = 10
        self.batch_size = 1

        # Diffusion options
        self.noise_schedule = 'cosine'
        self.diffusion_steps = 1000
        self.sigma_small = True

        # Model options
        self.arch = 'trans_enc'
        self.emb_trans_dec = False
        self.layers = 4
        self.latent_dim = 128
        self.cond_mask_prob = 0.1
        self.lambda_rcxyz = 1.0
        self.lambda_vel = 1.0
        self.lambda_fc = 1.0
        self.unconstrained = False  # This is inferred from the 'action' parameter

        # Data options
        self.dataset = 'humanml'
        self.data_dir = ""

        # Training options
        self.save_dir = save_dir
        self.overwrite = False
        self.train_platform_type = 'NoPlatform'
        self.lr = 1e-4
        self.weight_decay = 0.0
        self.lr_anneal_steps = 0
        self.eval_batch_size = 16
        self.eval_split = 'test'
        self.eval_during_training = False
        self.eval_rep_times = 3
        self.eval_num_samples = 1000
        self.log_interval = 500
        self.save_interval = 1000
        self.num_steps = 5000
        # self.num_frames = 29
        self.resume_checkpoint = ""

        # Sampling options
        self.model_path = model_path
        self.output_dir = ''
        self.num_samples = 10
        self.num_repetitions = 3
        self.guidance_param = 2.5

        # Generate options
        self.motion_length = 6.0
        self.input_text = ''
        self.action_file = ''
        self.text_prompt = ''
        self.action_name = ''

        # Edit options
        self.edit_mode = 'in_between'
        self.text_condition = ''
        self.prefix_end = 0.25
        self.suffix_start = 0.75

        # Evaluation options
        self.eval_model_path = eval_model_path
        self.eval_mode = 'wo_mm'
        self.eval_guidance_param = 2.5


In [13]:
expname = "model-v2-frame-data"

args = DefaultArgs(save_dir=f"/home/kenji/Fyp/DeepMimic_mujoco/diffusion/logs/{expname}", model_path=f"/home/kenji/Fyp/DeepMimic_mujoco/diffusion/logs/{expname}/model.pt", eval_model_path=f"/home/kenji/Fyp/DeepMimic_mujoco/diffusion/logs/{expname}/model.pt")
args.device

'cuda:0'

In [14]:
from train.training_loop import TrainLoop
TrainLoop(args, None , model, diffusion, dataloader).run_loop()

Starting epoch 0/65


  0%|          | 0/78 [00:00<?, ?it/s]

Logging to /tmp/openai-2023-12-25-08-50-27-848752
----------------------------
| frame_loss    | 2.06e+03 |
| frame_loss_q1 | 2.06e+03 |
| grad_norm     | 9.15e+03 |
| loss          | 2.32e+04 |
| loss_q1       | 2.09e+03 |
| param_norm    | 36.4     |
| rcxyz_mse     | 63.9     |
| rcxyz_mse_q1  | 57.7     |
| samples       | 1        |
| step          | 0        |
| vel_mse       | 4.23e+04 |
| vel_mse_q1    | 0        |
----------------------------
step[0]: loss[23224.81133]
saving model...


  1%|▏         | 1/78 [00:00<00:48,  1.59it/s]

Skipping evaluation for now.


100%|██████████| 78/78 [00:21<00:00,  3.70it/s]


Starting epoch 1/65


100%|██████████| 78/78 [00:21<00:00,  3.64it/s]


Starting epoch 2/65


100%|██████████| 78/78 [00:24<00:00,  3.25it/s]


Starting epoch 3/65


100%|██████████| 78/78 [00:22<00:00,  3.46it/s]


Starting epoch 4/65


100%|██████████| 78/78 [00:24<00:00,  3.22it/s]


Starting epoch 5/65


100%|██████████| 78/78 [00:22<00:00,  3.47it/s]


Starting epoch 6/65


 41%|████      | 32/78 [00:08<00:10,  4.18it/s]

----------------------------
| frame_loss    | 263      |
| frame_loss_q0 | 239      |
| frame_loss_q1 | 283      |
| frame_loss_q2 | 291      |
| frame_loss_q3 | 239      |
| grad_norm     | 677      |
| loss          | 1.05e+04 |
| loss_q0       | 246      |
| loss_q1       | 290      |
| loss_q2       | 299      |
| loss_q3       | 245      |
| param_norm    | 36.3     |
| rcxyz_mse     | 13.9     |
| rcxyz_mse_q0  | 12.5     |
| rcxyz_mse_q1  | 14       |
| rcxyz_mse_q2  | 15.5     |
| rcxyz_mse_q3  | 12.5     |
| samples       | 501      |
| step          | 500      |
| vel_mse       | 2.04e+04 |
| vel_mse_q0    | 0        |
| vel_mse_q1    | 0        |
| vel_mse_q2    | 0        |
| vel_mse_q3    | 0        |
----------------------------


 42%|████▏     | 33/78 [00:08<00:10,  4.20it/s]

step[500]: loss[10493.46658]


100%|██████████| 78/78 [00:20<00:00,  3.83it/s]


Starting epoch 7/65


100%|██████████| 78/78 [00:24<00:00,  3.19it/s]


Starting epoch 8/65


100%|██████████| 78/78 [00:23<00:00,  3.27it/s]


Starting epoch 9/65


100%|██████████| 78/78 [00:24<00:00,  3.23it/s]


Starting epoch 10/65


100%|██████████| 78/78 [00:21<00:00,  3.65it/s]


Starting epoch 11/65


100%|██████████| 78/78 [00:24<00:00,  3.18it/s]


Starting epoch 12/65


 82%|████████▏ | 64/78 [00:17<00:03,  3.77it/s]

----------------------------
| frame_loss    | 172      |
| frame_loss_q0 | 166      |
| frame_loss_q1 | 170      |
| frame_loss_q2 | 173      |
| frame_loss_q3 | 179      |
| grad_norm     | 150      |
| loss          | 3.82e+03 |
| loss_q0       | 170      |
| loss_q1       | 174      |
| loss_q2       | 177      |
| loss_q3       | 183      |
| param_norm    | 36.3     |
| rcxyz_mse     | 7.75     |
| rcxyz_mse_q0  | 7.18     |
| rcxyz_mse_q1  | 8.01     |
| rcxyz_mse_q2  | 8.03     |
| rcxyz_mse_q3  | 8.25     |
| samples       | 1e+03    |
| step          | 1e+03    |
| vel_mse       | 7.29e+03 |
| vel_mse_q0    | 0        |
| vel_mse_q1    | 0        |
| vel_mse_q2    | 0        |
| vel_mse_q3    | 0        |
----------------------------
step[1000]: loss[3819.25424]
saving model...


 83%|████████▎ | 65/78 [00:17<00:03,  3.83it/s]

Skipping evaluation for now.


100%|██████████| 78/78 [00:20<00:00,  3.81it/s]


Starting epoch 13/65


100%|██████████| 78/78 [00:20<00:00,  3.75it/s]


Starting epoch 14/65


100%|██████████| 78/78 [00:23<00:00,  3.39it/s]


Starting epoch 15/65


100%|██████████| 78/78 [00:21<00:00,  3.64it/s]


Starting epoch 16/65


100%|██████████| 78/78 [00:21<00:00,  3.69it/s]


Starting epoch 17/65


100%|██████████| 78/78 [00:21<00:00,  3.69it/s]


Starting epoch 18/65


100%|██████████| 78/78 [00:22<00:00,  3.50it/s]


Starting epoch 19/65


 23%|██▎       | 18/78 [00:05<00:16,  3.67it/s]

----------------------------
| frame_loss    | 153      |
| frame_loss_q0 | 131      |
| frame_loss_q1 | 142      |
| frame_loss_q2 | 161      |
| frame_loss_q3 | 178      |
| grad_norm     | 172      |
| loss          | 4.72e+03 |
| loss_q0       | 135      |
| loss_q1       | 145      |
| loss_q2       | 165      |
| loss_q3       | 182      |
| param_norm    | 36.4     |
| rcxyz_mse     | 7.12     |
| rcxyz_mse_q0  | 6.86     |
| rcxyz_mse_q1  | 6.06     |
| rcxyz_mse_q2  | 7.4      |
| rcxyz_mse_q3  | 7.65     |
| samples       | 1.5e+03  |
| step          | 1.5e+03  |
| vel_mse       | 9.12e+03 |
| vel_mse_q0    | 0        |
| vel_mse_q1    | 0        |
| vel_mse_q2    | 0        |
| vel_mse_q3    | 0        |
----------------------------


 24%|██▍       | 19/78 [00:05<00:15,  3.87it/s]

step[1500]: loss[4716.01332]


100%|██████████| 78/78 [00:23<00:00,  3.25it/s]


Starting epoch 20/65


100%|██████████| 78/78 [00:20<00:00,  3.84it/s]


Starting epoch 21/65


100%|██████████| 78/78 [00:22<00:00,  3.50it/s]


Starting epoch 22/65


100%|██████████| 78/78 [00:22<00:00,  3.40it/s]


Starting epoch 23/65


100%|██████████| 78/78 [00:23<00:00,  3.29it/s]


Starting epoch 24/65


100%|██████████| 78/78 [00:20<00:00,  3.81it/s]


Starting epoch 25/65


 64%|██████▍   | 50/78 [00:15<00:06,  4.12it/s]

----------------------------
| frame_loss    | 132      |
| frame_loss_q0 | 87.8     |
| frame_loss_q1 | 118      |
| frame_loss_q2 | 149      |
| frame_loss_q3 | 173      |
| grad_norm     | 182      |
| loss          | 5.11e+03 |
| loss_q0       | 90.4     |
| loss_q1       | 121      |
| loss_q2       | 152      |
| loss_q3       | 177      |
| param_norm    | 36.5     |
| rcxyz_mse     | 6.62     |
| rcxyz_mse_q0  | 5.15     |
| rcxyz_mse_q1  | 5.78     |
| rcxyz_mse_q2  | 7.08     |
| rcxyz_mse_q3  | 8.55     |
| samples       | 2e+03    |
| step          | 2e+03    |
| vel_mse       | 9.94e+03 |
| vel_mse_q0    | 0        |
| vel_mse_q1    | 0        |
| vel_mse_q2    | 0        |
| vel_mse_q3    | 0        |
----------------------------
step[2000]: loss[5106.31707]
saving model...


 65%|██████▌   | 51/78 [00:15<00:06,  4.02it/s]

Skipping evaluation for now.


100%|██████████| 78/78 [00:22<00:00,  3.45it/s]


Starting epoch 26/65


100%|██████████| 78/78 [00:20<00:00,  3.73it/s]


Starting epoch 27/65


100%|██████████| 78/78 [00:23<00:00,  3.38it/s]


Starting epoch 28/65


100%|██████████| 78/78 [00:21<00:00,  3.59it/s]


Starting epoch 29/65


100%|██████████| 78/78 [00:22<00:00,  3.41it/s]


Starting epoch 30/65


100%|██████████| 78/78 [00:21<00:00,  3.56it/s]


Starting epoch 31/65


100%|██████████| 78/78 [00:21<00:00,  3.62it/s]


Starting epoch 32/65


  5%|▌         | 4/78 [00:01<00:22,  3.30it/s]

----------------------------
| frame_loss    | 121      |
| frame_loss_q0 | 66.7     |
| frame_loss_q1 | 101      |
| frame_loss_q2 | 142      |
| frame_loss_q3 | 164      |
| grad_norm     | 156      |
| loss          | 4.23e+03 |
| loss_q0       | 69       |
| loss_q1       | 104      |
| loss_q2       | 145      |
| loss_q3       | 168      |
| param_norm    | 36.6     |
| rcxyz_mse     | 6.24     |
| rcxyz_mse_q0  | 4.65     |
| rcxyz_mse_q1  | 5.04     |
| rcxyz_mse_q2  | 6.66     |
| rcxyz_mse_q3  | 7.57     |
| samples       | 2.5e+03  |
| step          | 2.5e+03  |
| vel_mse       | 8.2e+03  |
| vel_mse_q0    | 0        |
| vel_mse_q1    | 0        |
| vel_mse_q2    | 0        |
| vel_mse_q3    | 0        |
----------------------------


  6%|▋         | 5/78 [00:01<00:20,  3.57it/s]

step[2500]: loss[4226.01022]


100%|██████████| 78/78 [00:23<00:00,  3.38it/s]


Starting epoch 33/65


100%|██████████| 78/78 [00:21<00:00,  3.61it/s]


Starting epoch 34/65


100%|██████████| 78/78 [00:21<00:00,  3.65it/s]


Starting epoch 35/65


100%|██████████| 78/78 [00:20<00:00,  3.75it/s]


Starting epoch 36/65


100%|██████████| 78/78 [00:21<00:00,  3.65it/s]


Starting epoch 37/65


100%|██████████| 78/78 [00:27<00:00,  2.79it/s]


Starting epoch 38/65


 46%|████▌     | 36/78 [00:12<00:12,  3.45it/s]

----------------------------
| frame_loss    | 110      |
| frame_loss_q0 | 54.5     |
| frame_loss_q1 | 94.4     |
| frame_loss_q2 | 137      |
| frame_loss_q3 | 161      |
| grad_norm     | 146      |
| loss          | 4.17e+03 |
| loss_q0       | 56.8     |
| loss_q1       | 97.1     |
| loss_q2       | 140      |
| loss_q3       | 164      |
| param_norm    | 36.7     |
| rcxyz_mse     | 5.78     |
| rcxyz_mse_q0  | 4.69     |
| rcxyz_mse_q1  | 5.46     |
| rcxyz_mse_q2  | 6.71     |
| rcxyz_mse_q3  | 6.44     |
| samples       | 3e+03    |
| step          | 3e+03    |
| vel_mse       | 8.11e+03 |
| vel_mse_q0    | 0        |
| vel_mse_q1    | 0        |
| vel_mse_q2    | 0        |
| vel_mse_q3    | 0        |
----------------------------
step[3000]: loss[4169.24568]
saving model...


 47%|████▋     | 37/78 [00:12<00:11,  3.50it/s]

Skipping evaluation for now.


100%|██████████| 78/78 [00:23<00:00,  3.28it/s]


Starting epoch 39/65


100%|██████████| 78/78 [00:22<00:00,  3.52it/s]


Starting epoch 40/65


100%|██████████| 78/78 [00:24<00:00,  3.21it/s]


Starting epoch 41/65


100%|██████████| 78/78 [00:22<00:00,  3.44it/s]


Starting epoch 42/65


100%|██████████| 78/78 [00:21<00:00,  3.61it/s]


Starting epoch 43/65


100%|██████████| 78/78 [00:22<00:00,  3.45it/s]


Starting epoch 44/65


 87%|████████▋ | 68/78 [00:18<00:02,  4.02it/s]

----------------------------
| frame_loss    | 106      |
| frame_loss_q0 | 44.5     |
| frame_loss_q1 | 87.7     |
| frame_loss_q2 | 135      |
| frame_loss_q3 | 159      |
| grad_norm     | 132      |
| loss          | 3.74e+03 |
| loss_q0       | 46.1     |
| loss_q1       | 90.3     |
| loss_q2       | 138      |
| loss_q3       | 162      |
| param_norm    | 36.8     |
| rcxyz_mse     | 5.45     |
| rcxyz_mse_q0  | 3.28     |
| rcxyz_mse_q1  | 5.09     |
| rcxyz_mse_q2  | 6.34     |
| rcxyz_mse_q3  | 7.32     |
| samples       | 3.5e+03  |
| step          | 3.5e+03  |
| vel_mse       | 7.27e+03 |
| vel_mse_q0    | 0        |
| vel_mse_q1    | 0        |
| vel_mse_q2    | 0        |
| vel_mse_q3    | 0        |
----------------------------


 88%|████████▊ | 69/78 [00:18<00:02,  4.04it/s]

step[3500]: loss[3742.16884]


100%|██████████| 78/78 [00:20<00:00,  3.75it/s]


Starting epoch 45/65


100%|██████████| 78/78 [00:21<00:00,  3.63it/s]


Starting epoch 46/65


100%|██████████| 78/78 [00:20<00:00,  3.86it/s]


Starting epoch 47/65


100%|██████████| 78/78 [00:22<00:00,  3.50it/s]


Starting epoch 48/65


100%|██████████| 78/78 [00:23<00:00,  3.29it/s]


Starting epoch 49/65


100%|██████████| 78/78 [00:20<00:00,  3.76it/s]


Starting epoch 50/65


100%|██████████| 78/78 [00:22<00:00,  3.52it/s]


Starting epoch 51/65


 28%|██▊       | 22/78 [00:06<00:14,  3.89it/s]

----------------------------
| frame_loss    | 104      |
| frame_loss_q0 | 39.1     |
| frame_loss_q1 | 83.8     |
| frame_loss_q2 | 132      |
| frame_loss_q3 | 157      |
| grad_norm     | 123      |
| loss          | 3.55e+03 |
| loss_q0       | 40.4     |
| loss_q1       | 86.2     |
| loss_q2       | 136      |
| loss_q3       | 161      |
| param_norm    | 36.9     |
| rcxyz_mse     | 5.27     |
| rcxyz_mse_q0  | 2.54     |
| rcxyz_mse_q1  | 4.73     |
| rcxyz_mse_q2  | 6.27     |
| rcxyz_mse_q3  | 6.98     |
| samples       | 4e+03    |
| step          | 4e+03    |
| vel_mse       | 6.88e+03 |
| vel_mse_q0    | 0        |
| vel_mse_q1    | 0        |
| vel_mse_q2    | 0        |
| vel_mse_q3    | 0        |
----------------------------
step[4000]: loss[3545.95134]
saving model...


 29%|██▉       | 23/78 [00:06<00:14,  3.74it/s]

Skipping evaluation for now.


100%|██████████| 78/78 [00:24<00:00,  3.18it/s]


Starting epoch 52/65


100%|██████████| 78/78 [00:24<00:00,  3.15it/s]


Starting epoch 53/65


100%|██████████| 78/78 [00:22<00:00,  3.53it/s]


Starting epoch 54/65


100%|██████████| 78/78 [00:22<00:00,  3.46it/s]


Starting epoch 55/65


100%|██████████| 78/78 [00:20<00:00,  3.75it/s]


Starting epoch 56/65


100%|██████████| 78/78 [00:23<00:00,  3.35it/s]


Starting epoch 57/65


 69%|██████▉   | 54/78 [00:15<00:05,  4.11it/s]

----------------------------
| frame_loss    | 101      |
| frame_loss_q0 | 33.5     |
| frame_loss_q1 | 79.9     |
| frame_loss_q2 | 130      |
| frame_loss_q3 | 155      |
| grad_norm     | 114      |
| loss          | 3.56e+03 |
| loss_q0       | 34.7     |
| loss_q1       | 81.9     |
| loss_q2       | 133      |
| loss_q3       | 159      |
| param_norm    | 37       |
| rcxyz_mse     | 5.15     |
| rcxyz_mse_q0  | 2.25     |
| rcxyz_mse_q1  | 3.89     |
| rcxyz_mse_q2  | 6.36     |
| rcxyz_mse_q3  | 7.73     |
| samples       | 4.5e+03  |
| step          | 4.5e+03  |
| vel_mse       | 6.91e+03 |
| vel_mse_q0    | 0        |
| vel_mse_q1    | 0        |
| vel_mse_q2    | 0        |
| vel_mse_q3    | 0        |
----------------------------


 71%|███████   | 55/78 [00:15<00:05,  4.19it/s]

step[4500]: loss[3557.15965]


100%|██████████| 78/78 [00:21<00:00,  3.62it/s]


Starting epoch 58/65


100%|██████████| 78/78 [00:24<00:00,  3.21it/s]


Starting epoch 59/65


100%|██████████| 78/78 [00:20<00:00,  3.74it/s]


Starting epoch 60/65


100%|██████████| 78/78 [00:23<00:00,  3.32it/s]


Starting epoch 61/65


100%|██████████| 78/78 [00:21<00:00,  3.66it/s]


Starting epoch 62/65


100%|██████████| 78/78 [00:26<00:00,  3.00it/s]


Starting epoch 63/65


100%|██████████| 78/78 [00:22<00:00,  3.53it/s]


Starting epoch 64/65


 10%|█         | 8/78 [00:02<00:17,  3.95it/s]

----------------------------
| frame_loss    | 97.4     |
| frame_loss_q0 | 30.4     |
| frame_loss_q1 | 79.2     |
| frame_loss_q2 | 130      |
| frame_loss_q3 | 153      |
| grad_norm     | 108      |
| loss          | 3.31e+03 |
| loss_q0       | 31.6     |
| loss_q1       | 81.4     |
| loss_q2       | 133      |
| loss_q3       | 157      |
| param_norm    | 37.1     |
| rcxyz_mse     | 4.96     |
| rcxyz_mse_q0  | 2.37     |
| rcxyz_mse_q1  | 4.55     |
| rcxyz_mse_q2  | 5.98     |
| rcxyz_mse_q3  | 7.06     |
| samples       | 5e+03    |
| step          | 5e+03    |
| vel_mse       | 6.42e+03 |
| vel_mse_q0    | 0        |
| vel_mse_q1    | 0        |
| vel_mse_q2    | 0        |
| vel_mse_q3    | 0        |
----------------------------
step[5000]: loss[3309.96929]
saving model...


 12%|█▏        | 9/78 [00:02<00:21,  3.24it/s]

Skipping evaluation for now.


100%|██████████| 78/78 [00:21<00:00,  3.57it/s]

saving model...
Skipping evaluation for now.





In [16]:
dataset[0].shape

torch.Size([78, 44])

In [17]:
num_frames = dataset[0].shape[0]
num_feats = dataset[0].shape[1]

In [36]:
sample_batch_size = 1

all_motions = []
for rep_i in range(args.num_repetitions):
    print(f'### Sampling [repetitions #{rep_i}]')

    sample_fn = diffusion.p_sample_loop

    sample = sample_fn(
        model,
        (sample_batch_size, num_frames, num_feats),
        clip_denoised=False,
        model_kwargs={"y": {}},
        skip_timesteps=0,  # 0 is the default value - i.e. don't skip any step
        init_image=None,
        progress=True,
        dump_steps=None,
        noise=None,
        const_noise=False,
    )

    all_motions.append(sample.cpu().numpy())

### Sampling [repetitions #0]


100%|██████████| 1000/1000 [00:03<00:00, 292.09it/s]


### Sampling [repetitions #1]


100%|██████████| 1000/1000 [00:03<00:00, 293.36it/s]


### Sampling [repetitions #2]


100%|██████████| 1000/1000 [00:03<00:00, 270.33it/s]


In [37]:
all_motions = np.concatenate(all_motions, axis=0)
all_motions.shape

(3, 78, 44)

In [41]:
dura = 0.0167
for motion in all_motions:
    for frame in motion:
        frame[0] = dura

Before [ 0.09484142  0.23697427  0.4478516   1.0336378   0.3625738  -0.34355313
 -0.30178106  0.4988734  -0.21699992 -0.2794978  -0.78938407 -0.62915474
 -0.02144533 -0.8769405   0.48450437  1.126303    0.16602735 -0.30404997
  0.13034339  0.12543999 -0.1863358   0.07986425 -0.4495588  -0.10429495
 -0.42804155 -0.62458265 -0.5617694  -0.09850261  0.69818634 -0.66124535
  0.61351323  0.42892107  0.01878975 -0.3479311  -0.39352363 -0.85118103
  0.31626615 -0.6836971   0.05442764 -0.0532995   0.51179856 -0.39984375
  0.5849843  -0.33461916]
After [ 0.0167      0.23697427  0.4478516   1.0336378   0.3625738  -0.34355313
 -0.30178106  0.4988734  -0.21699992 -0.2794978  -0.78938407 -0.62915474
 -0.02144533 -0.8769405   0.48450437  1.126303    0.16602735 -0.30404997
  0.13034339  0.12543999 -0.1863358   0.07986425 -0.4495588  -0.10429495
 -0.42804155 -0.62458265 -0.5617694  -0.09850261  0.69818634 -0.66124535
  0.61351323  0.42892107  0.01878975 -0.3479311  -0.39352363 -0.85118103
  0.31626615

In [43]:
import numpy as np

def save_motions(all_motions, output_dir):
    for i, motion in enumerate(all_motions):
        filename = f"motion_{i}.npy"
        filepath = os.path.join(output_dir, filename)
        np.save(filepath, motion)
        print(f"Motion {i} saved as {filename}")

save_motions(all_motions, f"/home/kenji/Fyp/DeepMimic_mujoco/diffusion/logs/{expname}/sampled_motions/")

Motion 0 saved as motion_0.npy
Motion 1 saved as motion_1.npy
Motion 2 saved as motion_2.npy
