In [52]:
from SBM_SDE import *
from obs_and_flow_classes_and_functions import *
import seaborn as sns
import torch
from torch import nn
import torch.distributions as d
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import math
from tqdm import tqdm
import random
from torch.autograd import Function
import argparse
import os
import sys
from pathlib import Path
import shutil
import pandas as pd

In [53]:
torch.manual_seed(0)
devi = torch.device("".join(["cuda:",f'{cuda_id}']) if torch.cuda.is_available() else "cpu")

cuda_id = 1
dt = .2 #SDE discretization timestep.
t = 500 #Simulation run for T hours.
n = int(t / dt) 
t_span = np.linspace(0, t, n + 1)
t_span_tensor = torch.reshape(torch.Tensor(t_span), [1, n + 1, 1]) #T_span needs to be converted to tensor object. Additionally, facilitates conversion of I_S and I_D to tensor objects.
l_r = 1e-3
niter = 2000
piter = 500
batch_size = 2 #Number of sets of observation outputs to sample per set of parameters.
state_dim_SCON = 3 #Not including CO2 in STATE_DIM, because CO2 is an observation.
state_dim_SAWB = 4 #Not including CO2 in STATE_DIM, because CO2 is an observation.

In [54]:
temp_ref = 283

#System parameters from deterministic CON model
u_M = 0.002
a_SD = 0.33
a_DS = 0.33
a_M = 0.33
a_MSC = 0.5
k_S_ref = 0.000025
k_D_ref = 0.005
k_M_ref = 0.0002
Ea_S = 75
Ea_D = 50
Ea_M = 50

#SCON diffusion matrix sigma scale parameters
c_SOC = 1.
c_DOC = 0.01
c_MBC = 0.1
c_CO2 = 0.0001

SCON_C_params_dict = {'u_M': u_M, 'a_SD': a_SD, 'a_DS': a_DS, 'a_M': a_M, 'a_MSC': a_MSC, 'k_S_ref': k_S_ref, 'k_D_ref': k_D_ref, 'k_M_ref': k_M_ref, 'Ea_S': Ea_S, 'Ea_D': Ea_D, 'Ea_M': Ea_M, 'c_SOC': c_SOC, 'c_DOC': c_DOC, 'c_MBC': c_MBC}

In [55]:
#System parameters from deterministic AWB model
u_Q_ref = 0.2
Q = 0.002
a_MSA = 0.5
K_D = 200
K_U = 1
V_D_ref = 0.4
V_U_ref = 0.02
Ea_V_D = 75
Ea_V_U = 50
r_M = 0.0004
r_E = 0.00001
r_L = 0.0005

#SAWB diffusion matrix sigma scale parameters
c_SOC = 1.
c_DOC = 0.01
c_MBC = 0.1
c_EEC = 0.001

SAWB_C_params_dict = {'u_Q_ref': u_Q_ref, 'Q': Q, 'a_MSA': a_MSA, 'K_D': K_D, 'K_U': K_U, 'V_D_ref': V_D_ref, 'V_U_ref': V_U_ref, 'Ea_V_D': Ea_V_D, 'Ea_V_U': Ea_V_U, 'r_M': r_M, 'r_E': r_E, 'r_L': r_L, 'c_SOC': c_SOC, 'c_DOC': c_DOC, 'c_MBC': c_MBC, 'c_EEC': c_EEC}

In [56]:
#Obtain SOC and DOC pool litter inputs for all SBMs.
i_s_tensor = 0.001 + 0.0005 * torch.sin((2 * np.pi / (24 * 365)) * t_span_tensor) #Exogenous SOC input function
i_d_tensor = 0.0001 + 0.00005 * torch.sin((2 * np.pi / (24 * 365)) * t_span_tensor) #Exogenous DOC input function

In [57]:
#Read-in deterministic data observations for use in inference.
obs_times, obs_means_CON, obs_error_CON = csv_to_obs_df('CON_synthetic_sol_df.csv', 4, t, 0.1)
obs_times, obs_means_AWB, obs_error_AWB = csv_to_obs_df('AWB_synthetic_sol_df.csv', 5, t, 0.1)
obs_times, obs_means_AWB_ECA, obs_error_AWB_ECA = csv_to_obs_df('AWB_ECA_synthetic_sol_df.csv', 5, t, 0.1)

In [58]:
def neg_log_lik(C_PATH, T_SPAN_TENSOR, DT, I_S_TENSOR, I_D_TENSOR, DRIFT_DIFFUSION, PARAMS_DICT, TEMP_REF):
    drift, diffusion_sqrt = DRIFT_DIFFUSION(C_PATH[:, :-1, :], T_SPAN_TENSOR[:, :-1, :], I_S_TENSOR[:, :-1, :], I_D_TENSOR[:, :-1, :], PARAMS_DICT, TEMP_REF)
    euler_maruyama_state_sample_object = d.multivariate_normal.MultivariateNormal(loc = C_PATH[:, :-1, :] + drift * DT, scale_tril = diffusion_sqrt * math.sqrt(DT))
    return -euler_maruyama_state_sample_object.log_prob(C_PATH[:, 1:, :]).sum(-1)

In [59]:
obs_model_CON_noCO2 = ObsModel(DEVICE = devi, TIMES = obs_times, DT = dt, MU = obs_means_CON[:-1, :], SCALE = obs_error_CON[:, :-1])
obs_model_AWB_noCO2 = ObsModel(DEVICE = devi, TIMES = obs_times, DT = dt, MU = obs_means_AWB[:-1, :], SCALE = obs_error_AWB[:, :-1])
obs_model_AWB_ECA_noCO2 = ObsModel(DEVICE = devi, TIMES = obs_times, DT = dt, MU = obs_means_AWB_ECA[:-1, :], SCALE = obs_error_AWB_ECA[:, :-1])

In [60]:
def train(DEVICE, L_R, NITER, PRETRAIN_ITER, BATCH_SIZE, OBS_MODEL, STATE_DIM, T, DT, N, T_SPAN_TENSOR, I_S_TENSOR, I_D_TENSOR, DRIFT_DIFFUSION, PARAMS_DICT, TEMP_REF, ANALYTICAL_STEADY_STATE_INIT):
    net = SDEFlow(DEVICE, BATCH_SIZE, OBS_MODEL, STATE_DIM, T, DT, N).to(DEVICE)
    optimizer = optim.Adam(net.parameters(), lr = L_R) 
    if PRETRAIN_ITER >= NITER:
        raise Exception("PRETRAIN_ITER must be < NITER.")
    best_loss_norm = 1e10
    best_loss_ELBO = 1e20
    norm_losses = [best_loss_norm] * 10
    ELBO_losses = [best_loss_ELBO] * 10
    C0 = ANALYTICAL_STEADY_STATE_INIT(I_S_TENSOR[0, 0, 0].item(), I_D_TENSOR[0, 0, 0].item(), PARAMS_DICT) #Calculate deterministic initial conditions.
    C0 = C0[(None,) * 2].repeat(BATCH_SIZE, 1, 1).to(DEVICE) #Assign initial conditions to C_PATH.
    with tqdm(total = NITER, desc = f'Train Diffusion', position = -1) as tq:
        for iter in range(NITER):
            net.train()
            optimizer.zero_grad()
            C_PATH, log_prob = net() #Obtain paths with solutions at times after t0.
            C_PATH = torch.cat([C0, C_PATH], 1) #Append deterministic CON initial conditions conditional on parameter values to C path. 
            if iter <= PRETRAIN_ITER:
                l1_norm_element = C_PATH - torch.mean(OBS_MODEL.mu, -1)
                l1_norm = torch.sum(torch.abs(l1_norm_element)).mean()
                best_loss_norm = l1_norm if l1_norm < best_loss_norm else best_loss_norm
                norm_losses.append(l1_norm.item())
                #l2_norm_element = C_PATH - torch.mean(OBS_MODEL.mu, -1)
                #l2_norm = torch.sqrt(torch.sum(torch.square(l2_norm_element))).mean()
                #best_loss_norm = l2_norm if l2_norm < best_loss_norm else best_loss_norm
                #l2_norm.backward()
                #norm_losses.append(l2_norm.item())
                if len(norm_losses) > 10:
                    norm_losses.pop(0)
                if iter % 10 == 0:
                    print(f"Moving average norm loss at {iter} iterations is: {sum(norm_losses) / len(norm_losses)}. Best norm loss value is: {best_loss_norm}.")
                    print('\nC_PATH mean =', C_PATH.mean(-2))
                    print('\nC_PATH =', C_PATH)
                l1_norm.backward()
            else:
                log_lik = neg_log_lik(C_PATH, T_SPAN_TENSOR.to(DEVICE), dt, I_S_TENSOR.to(DEVICE), I_D_TENSOR.to(DEVICE), DRIFT_DIFFUSION, PARAMS_DICT, TEMP_REF)
                neg_ELBO = log_prob.mean() + log_lik.mean() - OBS_MODEL(C_PATH)
                best_loss_ELBO = neg_ELBO if neg_ELBO < best_loss_ELBO else best_loss_ELBO
                ELBO_losses.append(neg_ELBO.item())
                if len(ELBO_losses) > 10:
                    ELBO_losses.pop(0)
                if iter % 10 == 0:
                    print(f"Moving average ELBO loss at {iter} iterations is: {sum(ELBO_losses) / len(ELBO_losses)}. Best ELBO loss value is: {best_loss_ELBO}.")
                    print('\nC_PATH mean =', C_PATH.mean(-2))
                    print('\n C_PATH =', C_PATH)
                neg_ELBO.backward()
            torch.nn.utils.clip_grad_norm_(net.parameters(), 3.0)
            optimizer.step()
            if iter % 100000 == 0 and iter > 0:
                optimizer.param_groups[0]['lr'] *= 0.1
            tq.update()

In [61]:
train(devi, l_r, niter, piter, batch_size, obs_model_CON_noCO2, state_dim_SCON, t, dt, n, t_span_tensor, i_s_tensor, i_d_tensor, drift_diffusion_SCON_C, SCON_C_params_dict, temp_ref, analytical_steady_state_init_CON)


Train Diffusion:   0%|          | 0/2000 [00:00<?, ?it/s][A

Moving average norm loss at 0 iterations is: 9000022811.225. Best norm loss value is: 228112.25.

C_PATH mean = tensor([[0.7400, 0.6597, 0.7890],
        [0.7433, 0.6603, 0.7777]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[45.6603,  0.0715,  0.7147],
         [ 0.7279,  0.9477,  0.6554],
         [ 0.7301,  0.5975,  0.9460],
         ...,
         [ 0.7950,  0.6977,  0.8644],
         [ 0.7731,  0.7774,  0.4001],
         [ 0.6925,  0.5654,  0.0750]],

        [[45.6603,  0.0715,  0.7147],
         [ 0.7279,  0.6799,  1.0443],
         [ 0.9856,  0.2549,  0.5817],
         ...,
         [ 0.7855,  0.6523,  0.5108],
         [ 0.5117,  0.5873,  0.4136],
         [ 1.1247,  0.7128,  0.1988]]], grad_fn=<CatBackward>)



Train Diffusion:   0%|          | 1/2000 [00:02<1:07:41,  2.03s/it][A
Train Diffusion:   0%|          | 2/2000 [00:04<1:08:17,  2.05s/it][A
Train Diffusion:   0%|          | 3/2000 [00:06<1:08:13,  2.05s/it][A
Train Diffusion:   0%|          | 4/2000 [00:08<1:08:03,  2.05s/it][A
Train Diffusion:   0%|          | 5/2000 [00:10<1:06:09,  1.99s/it][A
Train Diffusion:   0%|          | 6/2000 [00:12<1:05:48,  1.98s/it][A
Train Diffusion:   0%|          | 7/2000 [00:14<1:06:01,  1.99s/it][A
Train Diffusion:   0%|          | 8/2000 [00:15<1:05:14,  1.96s/it][A
Train Diffusion:   0%|          | 9/2000 [00:18<1:09:14,  2.09s/it][A
Train Diffusion:   0%|          | 10/2000 [00:20<1:12:22,  2.18s/it][A

Moving average norm loss at 10 iterations is: 218342.3984375. Best norm loss value is: 210876.328125.

C_PATH mean = tensor([[3.7924, 0.3075, 0.5256],
        [3.7789, 0.3094, 0.5240]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[45.6603,  0.0715,  0.7147],
         [ 2.3466,  0.5729,  0.4737],
         [ 3.3941,  0.3213,  0.3568],
         ...,
         [ 2.8928,  0.4090,  0.6905],
         [ 2.7312,  0.4883,  0.6698],
         [ 2.0213,  0.4760,  0.4812]],

        [[45.6603,  0.0715,  0.7147],
         [ 0.3281,  1.4398,  0.2918],
         [ 0.2848,  0.7024,  0.4188],
         ...,
         [ 0.5867,  0.4011,  0.5215],
         [ 0.4558,  0.3546,  0.5002],
         [ 0.9053,  0.3301,  0.3162]]], grad_fn=<CatBackward>)



Train Diffusion:   1%|          | 11/2000 [00:24<1:27:13,  2.63s/it][A
Train Diffusion:   1%|          | 12/2000 [00:31<2:09:18,  3.90s/it][A
Train Diffusion:   1%|          | 13/2000 [00:35<2:08:46,  3.89s/it][A
Train Diffusion:   1%|          | 14/2000 [00:38<2:03:08,  3.72s/it][A
Train Diffusion:   1%|          | 15/2000 [00:41<1:56:53,  3.53s/it][A
Train Diffusion:   1%|          | 16/2000 [00:44<1:47:33,  3.25s/it][A
Train Diffusion:   1%|          | 17/2000 [00:46<1:41:46,  3.08s/it][A
Train Diffusion:   1%|          | 18/2000 [00:48<1:29:38,  2.71s/it][A
Train Diffusion:   1%|          | 19/2000 [00:50<1:24:01,  2.54s/it][A
Train Diffusion:   1%|          | 20/2000 [00:53<1:28:38,  2.69s/it][A

Moving average norm loss at 20 iterations is: 202036.8375. Best norm loss value is: 195181.328125.

C_PATH mean = tensor([[6.7430, 0.1370, 0.5419],
        [6.7903, 0.1363, 0.5422]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[45.6603,  0.0715,  0.7147],
         [ 0.1690,  4.0029,  0.2790],
         [ 0.0798,  0.2799,  0.2803],
         ...,
         [ 1.2348,  0.2486,  0.6229],
         [12.4209,  0.2665,  0.5003],
         [ 8.6746,  0.2571,  0.1962]],

        [[45.6603,  0.0715,  0.7147],
         [ 1.2747,  0.9375,  0.2827],
         [ 1.9599,  1.1452,  0.2881],
         ...,
         [11.5284,  0.2562,  0.8028],
         [ 0.5427,  0.2642,  0.5280],
         [ 2.5230,  0.2563,  0.1974]]], grad_fn=<CatBackward>)



Train Diffusion:   1%|          | 21/2000 [00:55<1:22:25,  2.50s/it][A
Train Diffusion:   1%|          | 22/2000 [00:58<1:23:27,  2.53s/it][A
Train Diffusion:   1%|          | 23/2000 [01:01<1:25:48,  2.60s/it][A
Train Diffusion:   1%|          | 24/2000 [01:03<1:21:07,  2.46s/it][A
Train Diffusion:   1%|▏         | 25/2000 [01:06<1:24:15,  2.56s/it][A
Train Diffusion:   1%|▏         | 26/2000 [01:08<1:25:27,  2.60s/it][A
Train Diffusion:   1%|▏         | 27/2000 [01:11<1:30:36,  2.76s/it][A
Train Diffusion:   1%|▏         | 28/2000 [01:14<1:24:52,  2.58s/it][A
Train Diffusion:   1%|▏         | 29/2000 [01:16<1:18:39,  2.39s/it][A
Train Diffusion:   2%|▏         | 30/2000 [01:18<1:14:25,  2.27s/it][A

Moving average norm loss at 30 iterations is: 185154.1421875. Best norm loss value is: 176805.546875.

C_PATH mean = tensor([[10.7680,  0.0757,  0.1956],
        [10.6673,  0.0775,  0.1954]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[45.6603,  0.0715,  0.7147],
         [ 0.3100,  1.0699,  0.1193],
         [ 0.2253,  0.0623,  0.1134],
         ...,
         [20.2544,  0.0995,  0.2780],
         [ 0.1999,  0.1112,  0.1752],
         [12.4720,  0.1063,  0.0962]],

        [[45.6603,  0.0715,  0.7147],
         [ 1.9191,  5.4829,  0.1054],
         [ 5.6341,  0.5507,  0.1216],
         ...,
         [ 1.4338,  0.1178,  0.2985],
         [18.7093,  0.1028,  0.2096],
         [ 3.4598,  0.1124,  0.0943]]], grad_fn=<CatBackward>)



Train Diffusion:   2%|▏         | 31/2000 [01:20<1:11:49,  2.19s/it][A
Train Diffusion:   2%|▏         | 32/2000 [01:22<1:12:44,  2.22s/it][A
Train Diffusion:   2%|▏         | 33/2000 [01:24<1:09:03,  2.11s/it][A
Train Diffusion:   2%|▏         | 34/2000 [01:26<1:09:17,  2.11s/it][A
Train Diffusion:   2%|▏         | 35/2000 [01:28<1:08:45,  2.10s/it][A
Train Diffusion:   2%|▏         | 36/2000 [01:30<1:07:45,  2.07s/it][A
Train Diffusion:   2%|▏         | 37/2000 [01:33<1:13:47,  2.26s/it][A
Train Diffusion:   2%|▏         | 38/2000 [01:36<1:21:15,  2.48s/it][A
Train Diffusion:   2%|▏         | 39/2000 [01:40<1:37:56,  3.00s/it][A
Train Diffusion:   2%|▏         | 40/2000 [01:43<1:43:41,  3.17s/it][A

Moving average norm loss at 40 iterations is: 164252.5671875. Best norm loss value is: 153982.65625.

C_PATH mean = tensor([[15.5592,  0.0613,  0.0783],
        [15.2469,  0.0633,  0.0789]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [3.9681e-01, 8.0892e-01, 6.8623e-02],
         [9.3098e-02, 1.7025e-01, 7.7535e-02],
         ...,
         [7.8836e-01, 6.8734e-02, 7.3162e-02],
         [1.2459e-01, 6.3573e-02, 4.5723e-02],
         [4.6277e+00, 6.2374e-02, 2.4944e-02]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [3.1612e+00, 5.8803e+00, 6.4832e-02],
         [1.1732e+01, 6.4065e-01, 6.3747e-02],
         ...,
         [3.1870e+01, 6.2922e-02, 9.0054e-02],
         [3.3558e+01, 6.6008e-02, 4.5927e-02],
         [2.1519e+01, 6.9973e-02, 2.6810e-02]]], grad_fn=<CatBackward>)



Train Diffusion:   2%|▏         | 41/2000 [01:46<1:39:34,  3.05s/it][A
Train Diffusion:   2%|▏         | 42/2000 [01:50<1:45:13,  3.22s/it][A
Train Diffusion:   2%|▏         | 43/2000 [01:53<1:41:08,  3.10s/it][A
Train Diffusion:   2%|▏         | 44/2000 [01:55<1:30:51,  2.79s/it][A
Train Diffusion:   2%|▏         | 45/2000 [01:57<1:26:04,  2.64s/it][A
Train Diffusion:   2%|▏         | 46/2000 [01:59<1:21:39,  2.51s/it][A
Train Diffusion:   2%|▏         | 47/2000 [02:01<1:18:56,  2.43s/it][A
Train Diffusion:   2%|▏         | 48/2000 [02:03<1:15:53,  2.33s/it][A
Train Diffusion:   2%|▏         | 49/2000 [02:05<1:12:16,  2.22s/it][A
Train Diffusion:   2%|▎         | 50/2000 [02:08<1:14:40,  2.30s/it][A

Moving average norm loss at 50 iterations is: 141789.3328125. Best norm loss value is: 132928.890625.

C_PATH mean = tensor([[19.8470,  0.0349,  0.0570],
        [20.5399,  0.0317,  0.0571]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [2.9079e+00, 8.1895e+00, 4.1120e-02],
         [5.5168e-02, 1.0948e+00, 4.0789e-02],
         ...,
         [6.7354e-01, 3.9239e-02, 6.5607e-02],
         [3.9591e+01, 3.8775e-02, 4.4161e-02],
         [5.4867e+00, 4.0554e-02, 2.5541e-02]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [3.8718e-01, 8.7704e-01, 4.0985e-02],
         [1.5511e+01, 3.2628e-01, 4.2292e-02],
         ...,
         [3.7697e+01, 4.1113e-02, 7.1276e-02],
         [3.5176e-02, 4.1011e-02, 4.5318e-02],
         [2.6048e+01, 3.9248e-02, 2.5664e-02]]], grad_fn=<CatBackward>)



Train Diffusion:   3%|▎         | 51/2000 [02:10<1:13:20,  2.26s/it][A
Train Diffusion:   3%|▎         | 52/2000 [02:12<1:14:13,  2.29s/it][A
Train Diffusion:   3%|▎         | 53/2000 [02:17<1:34:35,  2.92s/it][A
Train Diffusion:   3%|▎         | 54/2000 [02:20<1:32:23,  2.85s/it][A
Train Diffusion:   3%|▎         | 55/2000 [02:22<1:25:32,  2.64s/it][A
Train Diffusion:   3%|▎         | 56/2000 [02:24<1:19:02,  2.44s/it][A
Train Diffusion:   3%|▎         | 57/2000 [02:26<1:13:37,  2.27s/it][A
Train Diffusion:   3%|▎         | 58/2000 [02:27<1:09:12,  2.14s/it][A
Train Diffusion:   3%|▎         | 59/2000 [02:29<1:05:58,  2.04s/it][A
Train Diffusion:   3%|▎         | 60/2000 [02:31<1:03:46,  1.97s/it][A

Moving average norm loss at 60 iterations is: 127889.003125. Best norm loss value is: 125471.40625.

C_PATH mean = tensor([[2.3980e+01, 1.3838e-02, 7.1255e-02],
        [2.4735e+01, 1.1013e-02, 7.1276e-02]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [3.9343e-01, 7.9974e+00, 2.2950e-02],
         [1.5293e+01, 3.2181e-01, 2.4689e-02],
         ...,
         [1.1481e+00, 2.2460e-02, 1.3192e-01],
         [3.2727e-02, 2.2489e-02, 6.9384e-02],
         [2.9723e+01, 2.2602e-02, 2.1647e-02]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [1.3732e+00, 9.2487e-01, 2.3393e-02],
         [5.1478e-02, 3.3399e-01, 2.4070e-02],
         ...,
         [4.6501e+01, 2.2240e-02, 1.3272e-01],
         [4.6567e+01, 2.2122e-02, 7.2341e-02],
         [6.6621e+00, 2.3060e-02, 2.1688e-02]]], grad_fn=<CatBackward>)



Train Diffusion:   3%|▎         | 61/2000 [02:33<1:02:50,  1.94s/it][A
Train Diffusion:   3%|▎         | 62/2000 [02:35<1:02:16,  1.93s/it][A
Train Diffusion:   3%|▎         | 63/2000 [02:37<1:05:00,  2.01s/it][A
Train Diffusion:   3%|▎         | 64/2000 [02:40<1:13:27,  2.28s/it][A
Train Diffusion:   3%|▎         | 65/2000 [02:42<1:17:05,  2.39s/it][A
Train Diffusion:   3%|▎         | 66/2000 [02:45<1:16:25,  2.37s/it][A
Train Diffusion:   3%|▎         | 67/2000 [02:47<1:14:54,  2.33s/it][A
Train Diffusion:   3%|▎         | 68/2000 [02:49<1:11:52,  2.23s/it][A
Train Diffusion:   3%|▎         | 69/2000 [02:51<1:12:12,  2.24s/it][A
Train Diffusion:   4%|▎         | 70/2000 [02:53<1:10:49,  2.20s/it][A

Moving average norm loss at 70 iterations is: 123683.0578125. Best norm loss value is: 119848.6796875.

C_PATH mean = tensor([[2.4253e+01, 7.8262e-03, 3.3663e-01],
        [2.5838e+01, 5.2956e-03, 3.3617e-01]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [1.3026e-01, 7.5713e+00, 2.6676e-02],
         [5.8557e-02, 8.6420e-02, 2.7497e-02],
         ...,
         [3.8990e+00, 2.4015e-02, 9.7287e-01],
         [4.9696e+01, 2.5675e-02, 4.5453e-01],
         [8.5061e+00, 2.6241e-02, 5.4670e-02]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [1.3026e-01, 1.2355e+00, 2.7493e-02],
         [6.8559e+00, 8.7124e-02, 2.9600e-02],
         ...,
         [4.8946e+01, 2.5046e-02, 6.6542e-01],
         [3.5591e-01, 2.4700e-02, 4.4988e-01],
         [3.1078e+01, 2.5993e-02, 5.7103e-02]]], grad_fn=<CatBackward>)



Train Diffusion:   4%|▎         | 71/2000 [02:56<1:15:38,  2.35s/it][A
Train Diffusion:   4%|▎         | 72/2000 [02:59<1:17:50,  2.42s/it][A
Train Diffusion:   4%|▎         | 73/2000 [03:02<1:22:19,  2.56s/it][A
Train Diffusion:   4%|▎         | 74/2000 [03:04<1:18:58,  2.46s/it][A
Train Diffusion:   4%|▍         | 75/2000 [03:06<1:14:12,  2.31s/it][A
Train Diffusion:   4%|▍         | 76/2000 [03:08<1:11:33,  2.23s/it][A
Train Diffusion:   4%|▍         | 77/2000 [03:10<1:09:14,  2.16s/it][A
Train Diffusion:   4%|▍         | 78/2000 [03:12<1:05:59,  2.06s/it][A
Train Diffusion:   4%|▍         | 79/2000 [03:14<1:04:19,  2.01s/it][A
Train Diffusion:   4%|▍         | 80/2000 [03:16<1:04:09,  2.00s/it][A

Moving average norm loss at 80 iterations is: 115349.9625. Best norm loss value is: 111201.6015625.

C_PATH mean = tensor([[2.5459e+01, 4.2113e-03, 3.7883e-01],
        [2.5192e+01, 6.5288e-03, 3.8637e-01]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [2.5834e-01, 1.8200e+00, 2.4140e-02],
         [5.2770e-02, 6.6780e-02, 2.5033e-02],
         ...,
         [4.3983e+01, 2.0684e-02, 6.8006e-01],
         [3.0598e+00, 2.0657e-02, 4.5790e-01],
         [9.7047e+00, 2.3665e-02, 4.9238e-02]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [1.0647e-01, 7.6588e+00, 2.1630e-02],
         [1.0225e+01, 8.4391e-02, 2.2954e-02],
         ...,
         [7.5094e+00, 1.8928e-02, 1.0425e+00],
         [4.4066e+01, 2.0296e-02, 5.8846e-01],
         [2.8662e+01, 1.9953e-02, 5.7883e-02]]], grad_fn=<CatBackward>)



Train Diffusion:   4%|▍         | 81/2000 [03:18<1:03:47,  1.99s/it][A
Train Diffusion:   4%|▍         | 82/2000 [03:20<1:05:30,  2.05s/it][A
Train Diffusion:   4%|▍         | 83/2000 [03:22<1:06:12,  2.07s/it][A
Train Diffusion:   4%|▍         | 84/2000 [03:24<1:07:33,  2.12s/it][A
Train Diffusion:   4%|▍         | 85/2000 [03:26<1:08:40,  2.15s/it][A
Train Diffusion:   4%|▍         | 86/2000 [03:29<1:09:26,  2.18s/it][A
Train Diffusion:   4%|▍         | 87/2000 [03:31<1:07:56,  2.13s/it][A
Train Diffusion:   4%|▍         | 88/2000 [03:33<1:13:12,  2.30s/it][A
Train Diffusion:   4%|▍         | 89/2000 [03:36<1:14:19,  2.33s/it][A
Train Diffusion:   4%|▍         | 90/2000 [03:38<1:15:37,  2.38s/it][A

Moving average norm loss at 90 iterations is: 104500.2796875. Best norm loss value is: 98319.84375.

C_PATH mean = tensor([[2.7134e+01, 2.9175e-03, 3.2179e-01],
        [2.7578e+01, 4.8645e-03, 3.2241e-01]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.8136e-02, 2.7649e+00, 1.3936e-02],
         [1.2565e+01, 4.1984e-02, 1.3648e-02],
         ...,
         [4.2612e+01, 1.1261e-02, 7.3207e-01],
         [7.7589e+00, 1.2000e-02, 4.1977e-01],
         [1.1690e+01, 1.3487e-02, 3.6168e-02]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [9.4821e-01, 7.6203e+00, 1.3400e-02],
         [9.5416e-02, 8.5380e-02, 1.6265e-02],
         ...,
         [1.2366e+01, 1.2488e-02, 7.6718e-01],
         [4.1826e+01, 1.2138e-02, 4.3899e-01],
         [2.7143e+01, 1.1821e-02, 3.5676e-02]]], grad_fn=<CatBackward>)



Train Diffusion:   5%|▍         | 91/2000 [03:40<1:11:19,  2.24s/it][A
Train Diffusion:   5%|▍         | 92/2000 [03:42<1:07:13,  2.11s/it][A
Train Diffusion:   5%|▍         | 93/2000 [03:44<1:05:01,  2.05s/it][A
Train Diffusion:   5%|▍         | 94/2000 [03:46<1:07:55,  2.14s/it][A
Train Diffusion:   5%|▍         | 95/2000 [03:49<1:13:30,  2.32s/it][A
Train Diffusion:   5%|▍         | 96/2000 [03:51<1:15:15,  2.37s/it][A
Train Diffusion:   5%|▍         | 97/2000 [03:54<1:13:41,  2.32s/it][A
Train Diffusion:   5%|▍         | 98/2000 [03:56<1:10:35,  2.23s/it][A
Train Diffusion:   5%|▍         | 99/2000 [03:57<1:06:45,  2.11s/it][A
Train Diffusion:   5%|▌         | 100/2000 [03:59<1:04:05,  2.02s/it][A

Moving average norm loss at 100 iterations is: 89785.6484375. Best norm loss value is: 82542.984375.

C_PATH mean = tensor([[3.0049e+01, 2.8817e-03, 1.5302e-01],
        [3.0754e+01, 4.5096e-03, 1.5267e-01]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [2.5747e+00, 4.3645e+00, 7.0306e-03],
         [1.5696e+00, 3.8142e-01, 7.6819e-03],
         ...,
         [1.7777e+01, 6.0376e-03, 4.4556e-01],
         [1.3126e+01, 6.3809e-03, 2.0171e-01],
         [1.4746e+01, 2.9529e-03, 1.6541e-02]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [1.0728e-01, 8.7731e+00, 3.5095e-03],
         [1.6071e+01, 6.6659e-02, 3.9471e-03],
         ...,
         [4.3128e+01, 3.5705e-03, 4.6431e-01],
         [4.1556e+01, 3.2418e-03, 1.9533e-01],
         [2.7729e+01, 7.1209e-03, 1.1568e-02]]], grad_fn=<CatBackward>)



Train Diffusion:   5%|▌         | 101/2000 [04:01<1:05:00,  2.05s/it][A
Train Diffusion:   5%|▌         | 102/2000 [04:03<1:04:43,  2.05s/it][A
Train Diffusion:   5%|▌         | 103/2000 [04:05<1:04:49,  2.05s/it][A
Train Diffusion:   5%|▌         | 104/2000 [04:07<1:04:32,  2.04s/it][A
Train Diffusion:   5%|▌         | 105/2000 [04:10<1:05:22,  2.07s/it][A
Train Diffusion:   5%|▌         | 106/2000 [04:12<1:04:41,  2.05s/it][A
Train Diffusion:   5%|▌         | 107/2000 [04:14<1:11:54,  2.28s/it][A
Train Diffusion:   5%|▌         | 108/2000 [04:17<1:18:51,  2.50s/it][A
Train Diffusion:   5%|▌         | 109/2000 [04:20<1:18:01,  2.48s/it][A
Train Diffusion:   6%|▌         | 110/2000 [04:22<1:13:50,  2.34s/it][A

Moving average norm loss at 110 iterations is: 73336.9546875. Best norm loss value is: 65672.5.

C_PATH mean = tensor([[3.3618e+01, 4.1849e-03, 5.8135e-02],
        [3.3829e+01, 5.8227e-03, 6.0799e-02]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.2293e+00, 6.8679e+00, 1.0036e-03],
         [5.1189e+00, 1.3528e-01, 8.7383e-03],
         ...,
         [4.3389e+01, 6.9051e-03, 1.9301e-01],
         [4.1354e+01, 8.3747e-04, 8.5487e-02],
         [2.8428e+01, 5.5303e-04, 1.3540e-02]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [2.6480e-01, 1.0078e+01, 8.5328e-03],
         [1.8873e+01, 8.8546e-01, 1.6366e-03],
         ...,
         [2.3410e+01, 1.1097e-03, 1.9314e-01],
         [1.8657e+01, 9.1894e-03, 4.4186e-02],
         [1.7706e+01, 1.0624e-02, 4.3191e-03]]], grad_fn=<CatBackward>)



Train Diffusion:   6%|▌         | 111/2000 [04:24<1:09:22,  2.20s/it][A
Train Diffusion:   6%|▌         | 112/2000 [04:26<1:07:30,  2.15s/it][A
Train Diffusion:   6%|▌         | 113/2000 [04:28<1:05:45,  2.09s/it][A
Train Diffusion:   6%|▌         | 114/2000 [04:30<1:04:01,  2.04s/it][A
Train Diffusion:   6%|▌         | 115/2000 [04:31<1:01:55,  1.97s/it][A
Train Diffusion:   6%|▌         | 116/2000 [04:33<1:01:21,  1.95s/it][A
Train Diffusion:   6%|▌         | 117/2000 [04:35<1:00:36,  1.93s/it][A
Train Diffusion:   6%|▌         | 118/2000 [04:37<59:33,  1.90s/it]  [A
Train Diffusion:   6%|▌         | 119/2000 [04:39<59:09,  1.89s/it][A
Train Diffusion:   6%|▌         | 120/2000 [04:41<1:00:22,  1.93s/it][A

Moving average norm loss at 120 iterations is: 55896.0453125. Best norm loss value is: 47813.29296875.

C_PATH mean = tensor([[3.7220e+01, 9.2616e-03, 1.4494e-02],
        [3.7007e+01, 9.6475e-03, 1.3738e-02]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [5.8345e+00, 9.2375e+00, 1.0235e-02],
         [1.0908e+01, 2.3412e+00, 3.3484e-04],
         ...,
         [2.9621e+01, 4.4097e-04, 1.3970e-03],
         [2.5291e+01, 1.3514e-02, 5.1380e-04],
         [2.9528e+01, 8.1774e-05, 4.4339e-04]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [1.3638e+00, 1.2750e+01, 3.3002e-04],
         [2.1454e+01, 5.8549e-01, 1.3720e-02],
         ...,
         [4.3621e+01, 8.1036e-03, 1.8199e-02],
         [4.1255e+01, 2.0778e-04, 1.2138e-02],
         [2.1932e+01, 1.9016e-02, 1.3631e-02]]], grad_fn=<CatBackward>)



Train Diffusion:   6%|▌         | 121/2000 [04:43<1:03:16,  2.02s/it][A
Train Diffusion:   6%|▌         | 122/2000 [04:45<1:01:27,  1.96s/it][A
Train Diffusion:   6%|▌         | 123/2000 [04:47<1:00:01,  1.92s/it][A
Train Diffusion:   6%|▌         | 124/2000 [04:49<59:43,  1.91s/it]  [A
Train Diffusion:   6%|▋         | 125/2000 [04:51<1:03:34,  2.03s/it][A
Train Diffusion:   6%|▋         | 126/2000 [04:53<1:03:30,  2.03s/it][A
Train Diffusion:   6%|▋         | 127/2000 [04:55<1:02:32,  2.00s/it][A
Train Diffusion:   6%|▋         | 128/2000 [04:57<1:00:44,  1.95s/it][A
Train Diffusion:   6%|▋         | 129/2000 [04:59<1:01:14,  1.96s/it][A
Train Diffusion:   6%|▋         | 130/2000 [05:01<1:00:20,  1.94s/it][A

Moving average norm loss at 130 iterations is: 37789.4025390625. Best norm loss value is: 29543.65625.

C_PATH mean = tensor([[4.0594e+01, 1.6592e-02, 1.4323e-02],
        [4.0621e+01, 1.7223e-02, 1.4424e-02]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [6.0754e+00, 1.2935e+01, 1.3853e-02],
         [1.5989e+01, 8.0208e-01, 1.1157e-04],
         ...,
         [3.5673e+01, 8.2137e-05, 4.2949e-03],
         [3.1231e+01, 1.9224e-02, 5.0539e-05],
         [2.9186e+01, 2.0762e-05, 1.0987e-02]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [2.7548e+00, 1.1777e+01, 7.8918e-05],
         [2.2189e+01, 2.5521e+00, 1.9803e-02],
         ...,
         [4.3612e+01, 1.0523e-02, 6.5419e-05],
         [4.0297e+01, 4.7812e-05, 2.4268e-03],
         [2.4626e+01, 2.8611e-02, 5.1463e-05]]], grad_fn=<CatBackward>)



Train Diffusion:   7%|▋         | 131/2000 [05:03<59:25,  1.91s/it]  [A
Train Diffusion:   7%|▋         | 132/2000 [05:04<58:36,  1.88s/it][A
Train Diffusion:   7%|▋         | 133/2000 [05:06<58:01,  1.86s/it][A
Train Diffusion:   7%|▋         | 134/2000 [05:08<57:32,  1.85s/it][A
Train Diffusion:   7%|▋         | 135/2000 [05:10<57:00,  1.83s/it][A
Train Diffusion:   7%|▋         | 136/2000 [05:12<56:46,  1.83s/it][A
Train Diffusion:   7%|▋         | 137/2000 [05:14<59:50,  1.93s/it][A
Train Diffusion:   7%|▋         | 138/2000 [05:16<1:02:21,  2.01s/it][A
Train Diffusion:   7%|▋         | 139/2000 [05:18<1:00:40,  1.96s/it][A
Train Diffusion:   7%|▋         | 140/2000 [05:20<1:01:42,  1.99s/it][A

Moving average norm loss at 140 iterations is: 19257.14169921875. Best norm loss value is: 10743.94921875.

C_PATH mean = tensor([[4.4166e+01, 2.7324e-02, 2.6172e-02],
        [4.4230e+01, 2.7065e-02, 2.6893e-02]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [5.8556e+00, 1.3428e+01, 6.9532e-05],
         [1.9934e+01, 8.7447e-01, 7.0994e-05],
         ...,
         [4.3974e+01, 1.5812e-02, 1.7729e-05],
         [3.7255e+01, 3.4706e-02, 2.4158e-03],
         [2.9009e+01, 5.8479e-02, 1.6090e-02]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [3.4209e+00, 1.3024e+01, 1.7541e-02],
         [2.1937e+01, 2.1313e+00, 2.6507e-02],
         ...,
         [4.1437e+01, 4.2669e-05, 4.1828e-03],
         [3.9696e+01, 1.7528e-05, 9.5753e-06],
         [2.7816e+01, 2.9298e-06, 2.5749e-05]]], grad_fn=<CatBackward>)



Train Diffusion:   7%|▋         | 141/2000 [05:22<1:03:55,  2.06s/it][A
Train Diffusion:   7%|▋         | 142/2000 [05:24<1:02:31,  2.02s/it][A
Train Diffusion:   7%|▋         | 143/2000 [05:26<1:00:26,  1.95s/it][A
Train Diffusion:   7%|▋         | 144/2000 [05:28<1:00:24,  1.95s/it][A
Train Diffusion:   7%|▋         | 145/2000 [05:30<59:05,  1.91s/it]  [A
Train Diffusion:   7%|▋         | 146/2000 [05:31<58:35,  1.90s/it][A
Train Diffusion:   7%|▋         | 147/2000 [05:33<58:41,  1.90s/it][A
Train Diffusion:   7%|▋         | 148/2000 [05:35<58:36,  1.90s/it][A
Train Diffusion:   7%|▋         | 149/2000 [05:37<59:56,  1.94s/it][A
Train Diffusion:   8%|▊         | 150/2000 [05:39<1:00:17,  1.96s/it][A

Moving average norm loss at 150 iterations is: 8307.652294921874. Best norm loss value is: 5484.7412109375.

C_PATH mean = tensor([[4.6869e+01, 3.2963e-02, 4.1035e-02],
        [4.6859e+01, 3.3310e-02, 3.7722e-02]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [6.4549e+00, 1.4196e+01, 1.4424e-05],
         [2.3389e+01, 2.5934e+00, 3.6774e-02],
         ...,
         [4.4312e+01, 1.5023e-05, 3.1110e-03],
         [4.0033e+01, 7.0444e-06, 2.1725e-03],
         [2.9331e+01, 8.2401e-02, 2.1519e-02]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5372e+00, 1.3804e+01, 2.1283e-02],
         [2.3823e+01, 8.0392e-01, 2.1673e-05],
         ...,
         [4.5392e+01, 1.8001e-02, 6.4274e-06],
         [4.0310e+01, 4.3537e-02, 3.2665e-06],
         [2.9642e+01, 1.8887e-06, 1.5489e-05]]], grad_fn=<CatBackward>)



Train Diffusion:   8%|▊         | 151/2000 [05:42<1:11:41,  2.33s/it][A
Train Diffusion:   8%|▊         | 152/2000 [05:45<1:13:18,  2.38s/it][A
Train Diffusion:   8%|▊         | 153/2000 [05:47<1:09:46,  2.27s/it][A
Train Diffusion:   8%|▊         | 154/2000 [05:49<1:06:15,  2.15s/it][A
Train Diffusion:   8%|▊         | 155/2000 [05:51<1:04:06,  2.08s/it][A
Train Diffusion:   8%|▊         | 156/2000 [05:53<1:02:26,  2.03s/it][A
Train Diffusion:   8%|▊         | 157/2000 [05:55<1:01:14,  1.99s/it][A
Train Diffusion:   8%|▊         | 158/2000 [05:56<59:39,  1.94s/it]  [A
Train Diffusion:   8%|▊         | 159/2000 [05:58<59:07,  1.93s/it][A
Train Diffusion:   8%|▊         | 160/2000 [06:00<58:29,  1.91s/it][A

Moving average norm loss at 160 iterations is: 8224.08984375. Best norm loss value is: 5025.03515625.

C_PATH mean = tensor([[4.5326e+01, 4.0074e-02, 6.3342e-02],
        [4.5321e+01, 3.8854e-02, 5.7894e-02]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [5.6619e+00, 1.3934e+01, 2.0109e-05],
         [2.1040e+01, 2.1670e+00, 5.5024e-02],
         ...,
         [4.4755e+01, 2.8851e-05, 5.7961e-03],
         [4.0012e+01, 7.3522e-02, 6.1051e-03],
         [2.9638e+01, 1.2590e-01, 5.7308e-05]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [3.4098e+00, 1.3739e+01, 3.5930e-02],
         [2.1606e+01, 5.4139e-01, 2.4829e-05],
         ...,
         [4.4698e+01, 3.0225e-02, 2.5203e-05],
         [4.0189e+01, 1.3592e-05, 1.3700e-05],
         [2.9404e+01, 2.6807e-06, 5.3648e-02]]], grad_fn=<CatBackward>)



Train Diffusion:   8%|▊         | 161/2000 [06:02<57:29,  1.88s/it][A
Train Diffusion:   8%|▊         | 162/2000 [06:04<56:36,  1.85s/it][A
Train Diffusion:   8%|▊         | 163/2000 [06:06<55:47,  1.82s/it][A
Train Diffusion:   8%|▊         | 164/2000 [06:07<55:18,  1.81s/it][A
Train Diffusion:   8%|▊         | 165/2000 [06:09<54:47,  1.79s/it][A
Train Diffusion:   8%|▊         | 166/2000 [06:11<54:22,  1.78s/it][A
Train Diffusion:   8%|▊         | 167/2000 [06:13<54:05,  1.77s/it][A
Train Diffusion:   8%|▊         | 168/2000 [06:14<54:30,  1.79s/it][A
Train Diffusion:   8%|▊         | 169/2000 [06:16<54:53,  1.80s/it][A
Train Diffusion:   8%|▊         | 170/2000 [06:18<55:35,  1.82s/it][A

Moving average norm loss at 170 iterations is: 6123.4091796875. Best norm loss value is: 4627.35595703125.

C_PATH mean = tensor([[4.5385e+01, 4.3289e-02, 7.7742e-02],
        [4.5381e+01, 4.5227e-02, 7.2719e-02]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [5.5203e+00, 1.4173e+01, 4.6437e-02],
         [2.1093e+01, 7.8784e-01, 2.2460e-05],
         ...,
         [4.4257e+01, 3.0676e-05, 5.0401e-03],
         [3.9883e+01, 1.0009e-01, 6.8741e-06],
         [2.9658e+01, 3.2736e-06, 3.0703e-05]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [3.6069e+00, 1.3821e+01, 1.7434e-05],
         [2.1711e+01, 2.2516e+00, 7.1923e-02],
         ...,
         [4.4338e+01, 4.0053e-02, 1.4235e-05],
         [3.9767e+01, 1.2073e-05, 4.7281e-03],
         [2.9321e+01, 1.3147e-01, 6.3547e-02]]], grad_fn=<CatBackward>)



Train Diffusion:   9%|▊         | 171/2000 [06:20<55:59,  1.84s/it][A
Train Diffusion:   9%|▊         | 172/2000 [06:22<56:11,  1.84s/it][A
Train Diffusion:   9%|▊         | 173/2000 [06:24<56:45,  1.86s/it][A
Train Diffusion:   9%|▊         | 174/2000 [06:26<56:47,  1.87s/it][A
Train Diffusion:   9%|▉         | 175/2000 [06:27<56:38,  1.86s/it][A
Train Diffusion:   9%|▉         | 176/2000 [06:29<56:33,  1.86s/it][A
Train Diffusion:   9%|▉         | 177/2000 [06:31<56:17,  1.85s/it][A
Train Diffusion:   9%|▉         | 178/2000 [06:33<55:37,  1.83s/it][A
Train Diffusion:   9%|▉         | 179/2000 [06:35<54:57,  1.81s/it][A
Train Diffusion:   9%|▉         | 180/2000 [06:36<54:28,  1.80s/it][A

Moving average norm loss at 180 iterations is: 4819.21708984375. Best norm loss value is: 4503.36572265625.

C_PATH mean = tensor([[45.3327,  0.0596,  0.0842],
        [45.3321,  0.0613,  0.0863]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [6.6583e+00, 1.6826e+01, 1.7118e-05],
         [2.3960e+01, 4.4051e+00, 2.4410e-05],
         ...,
         [4.4152e+01, 4.4398e-02, 5.0426e-07],
         [4.0155e+01, 1.0285e-01, 1.5054e-03],
         [3.0793e+01, 2.5811e-06, 2.3545e-05]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [5.6625e+00, 1.6935e+01, 5.8104e-02],
         [2.4030e+01, 4.7409e+00, 9.5014e-02],
         ...,
         [4.4182e+01, 2.1546e-05, 1.1975e-03],
         [4.0132e+01, 8.4487e-06, 5.9065e-07],
         [3.0979e+01, 1.2590e-01, 8.3996e-02]]], grad_fn=<CatBackward>)



Train Diffusion:   9%|▉         | 181/2000 [06:38<54:26,  1.80s/it][A
Train Diffusion:   9%|▉         | 182/2000 [06:40<54:00,  1.78s/it][A
Train Diffusion:   9%|▉         | 183/2000 [06:42<55:02,  1.82s/it][A
Train Diffusion:   9%|▉         | 184/2000 [06:44<55:24,  1.83s/it][A
Train Diffusion:   9%|▉         | 185/2000 [06:46<55:43,  1.84s/it][A
Train Diffusion:   9%|▉         | 186/2000 [06:48<56:13,  1.86s/it][A
Train Diffusion:   9%|▉         | 187/2000 [06:50<1:01:08,  2.02s/it][A
Train Diffusion:   9%|▉         | 188/2000 [06:52<59:40,  1.98s/it]  [A
Train Diffusion:   9%|▉         | 189/2000 [06:54<58:29,  1.94s/it][A
Train Diffusion:  10%|▉         | 190/2000 [06:56<58:08,  1.93s/it][A

Moving average norm loss at 190 iterations is: 4377.194677734375. Best norm loss value is: 4059.94287109375.

C_PATH mean = tensor([[4.5524e+01, 4.0143e-02, 9.8588e-02],
        [4.5522e+01, 4.2605e-02, 9.4153e-02]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [3.6722e+00, 1.3624e+01, 1.4673e-05],
         [2.2147e+01, 5.0074e-01, 1.9534e-05],
         ...,
         [4.3975e+01, 2.9013e-05, 5.4731e-03],
         [3.9402e+01, 1.3639e-05, 8.4886e-06],
         [2.9336e+01, 1.0854e-01, 9.9654e-02]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [5.6302e+00, 1.3739e+01, 6.9037e-02],
         [2.1253e+01, 2.1145e+00, 1.0573e-01],
         ...,
         [4.4231e+01, 5.6056e-02, 1.4008e-05],
         [3.9702e+01, 9.6618e-02, 5.2915e-03],
         [2.9178e+01, 3.3113e-06, 4.0374e-05]]], grad_fn=<CatBackward>)



Train Diffusion:  10%|▉         | 191/2000 [06:57<57:03,  1.89s/it][A
Train Diffusion:  10%|▉         | 192/2000 [06:59<55:52,  1.85s/it][A
Train Diffusion:  10%|▉         | 193/2000 [07:01<55:26,  1.84s/it][A
Train Diffusion:  10%|▉         | 194/2000 [07:03<58:10,  1.93s/it][A
Train Diffusion:  10%|▉         | 195/2000 [07:05<59:21,  1.97s/it][A
Train Diffusion:  10%|▉         | 196/2000 [07:07<1:02:11,  2.07s/it][A
Train Diffusion:  10%|▉         | 197/2000 [07:09<1:00:36,  2.02s/it][A
Train Diffusion:  10%|▉         | 198/2000 [07:11<1:01:38,  2.05s/it][A
Train Diffusion:  10%|▉         | 199/2000 [07:13<1:00:45,  2.02s/it][A
Train Diffusion:  10%|█         | 200/2000 [07:15<1:00:04,  2.00s/it][A

Moving average norm loss at 200 iterations is: 4222.129711914063. Best norm loss value is: 3769.239501953125.

C_PATH mean = tensor([[4.5471e+01, 4.1023e-02, 1.1105e-01],
        [4.5471e+01, 3.9990e-02, 1.1374e-01]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [3.3948e+00, 1.3449e+01, 8.5003e-02],
         [2.1624e+01, 1.9484e+00, 1.3489e-01],
         ...,
         [4.4252e+01, 6.7802e-02, 7.4425e-03],
         [3.9620e+01, 1.4213e-05, 7.8189e-03],
         [2.9074e+01, 3.3104e-06, 1.3908e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [5.4168e+00, 1.3417e+01, 1.4550e-05],
         [2.0792e+01, 3.1415e-01, 1.8141e-05],
         ...,
         [4.4168e+01, 2.9267e-05, 2.5000e-05],
         [3.9572e+01, 9.8654e-02, 1.7603e-05],
         [2.9145e+01, 1.0491e-01, 6.1589e-05]]], grad_fn=<CatBackward>)



Train Diffusion:  10%|█         | 201/2000 [07:17<59:21,  1.98s/it]  [A
Train Diffusion:  10%|█         | 202/2000 [07:19<57:27,  1.92s/it][A
Train Diffusion:  10%|█         | 203/2000 [07:21<56:46,  1.90s/it][A
Train Diffusion:  10%|█         | 204/2000 [07:23<1:01:13,  2.05s/it][A
Train Diffusion:  10%|█         | 205/2000 [07:25<59:38,  1.99s/it]  [A
Train Diffusion:  10%|█         | 206/2000 [07:27<59:40,  2.00s/it][A
Train Diffusion:  10%|█         | 207/2000 [07:29<58:58,  1.97s/it][A
Train Diffusion:  10%|█         | 208/2000 [07:31<58:05,  1.95s/it][A
Train Diffusion:  10%|█         | 209/2000 [07:33<58:06,  1.95s/it][A
Train Diffusion:  10%|█         | 210/2000 [07:35<58:38,  1.97s/it][A

Moving average norm loss at 210 iterations is: 4124.4576171875. Best norm loss value is: 3769.239501953125.

C_PATH mean = tensor([[4.5543e+01, 3.9375e-02, 1.1417e-01],
        [4.5546e+01, 3.9645e-02, 1.2079e-01]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [3.6370e+00, 1.3139e+01, 1.4865e-05],
         [2.2256e+01, 3.2223e-01, 1.9259e-05],
         ...,
         [4.3984e+01, 7.6403e-02, 2.3903e-05],
         [3.9321e+01, 8.8310e-02, 8.0290e-03],
         [2.8886e+01, 9.6231e-02, 1.5463e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [5.6827e+00, 1.3180e+01, 9.6097e-02],
         [2.1276e+01, 1.7885e+00, 1.5238e-01],
         ...,
         [4.3953e+01, 2.8853e-05, 7.2273e-03],
         [3.9321e+01, 1.4420e-05, 1.6583e-05],
         [2.8774e+01, 3.3666e-06, 6.4682e-05]]], grad_fn=<CatBackward>)



Train Diffusion:  11%|█         | 211/2000 [07:37<57:58,  1.94s/it][A
Train Diffusion:  11%|█         | 212/2000 [07:39<56:12,  1.89s/it][A
Train Diffusion:  11%|█         | 213/2000 [07:40<54:56,  1.84s/it][A
Train Diffusion:  11%|█         | 214/2000 [07:42<53:56,  1.81s/it][A
Train Diffusion:  11%|█         | 215/2000 [07:44<53:18,  1.79s/it][A
Train Diffusion:  11%|█         | 216/2000 [07:46<52:50,  1.78s/it][A
Train Diffusion:  11%|█         | 217/2000 [07:47<52:35,  1.77s/it][A
Train Diffusion:  11%|█         | 218/2000 [07:49<52:18,  1.76s/it][A
Train Diffusion:  11%|█         | 219/2000 [07:51<52:09,  1.76s/it][A
Train Diffusion:  11%|█         | 220/2000 [07:53<52:45,  1.78s/it][A

Moving average norm loss at 220 iterations is: 3879.008251953125. Best norm loss value is: 3670.15283203125.

C_PATH mean = tensor([[4.5467e+01, 4.1419e-02, 1.3985e-01],
        [4.5466e+01, 4.0511e-02, 1.3291e-01]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [5.6153e+00, 1.3029e+01, 1.3450e-05],
         [2.1153e+01, 2.5090e-01, 2.0386e-01],
         ...,
         [4.4098e+01, 2.8855e-05, 8.7521e-03],
         [3.9387e+01, 8.9160e-02, 9.7387e-03],
         [2.8717e+01, 3.7115e-06, 6.6042e-05]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [3.5465e+00, 1.2941e+01, 1.2643e-01],
         [2.2025e+01, 1.7100e+00, 1.7897e-05],
         ...,
         [4.4064e+01, 8.8811e-02, 3.4680e-05],
         [3.9393e+01, 1.4215e-05, 2.1636e-05],
         [2.8770e+01, 9.6701e-02, 2.0338e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  11%|█         | 221/2000 [07:55<54:46,  1.85s/it][A
Train Diffusion:  11%|█         | 222/2000 [07:57<55:07,  1.86s/it][A
Train Diffusion:  11%|█         | 223/2000 [07:58<54:04,  1.83s/it][A
Train Diffusion:  11%|█         | 224/2000 [08:00<54:54,  1.86s/it][A
Train Diffusion:  11%|█▏        | 225/2000 [08:02<54:25,  1.84s/it][A
Train Diffusion:  11%|█▏        | 226/2000 [08:04<55:48,  1.89s/it][A
Train Diffusion:  11%|█▏        | 227/2000 [08:06<57:35,  1.95s/it][A
Train Diffusion:  11%|█▏        | 228/2000 [08:08<59:38,  2.02s/it][A
Train Diffusion:  11%|█▏        | 229/2000 [08:10<58:54,  2.00s/it][A
Train Diffusion:  12%|█▏        | 230/2000 [08:12<59:49,  2.03s/it][A

Moving average norm loss at 230 iterations is: 3823.516015625. Best norm loss value is: 3592.9765625.

C_PATH mean = tensor([[4.5517e+01, 3.5891e-02, 1.4243e-01],
        [4.5516e+01, 3.9096e-02, 1.3648e-01]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [5.6156e+00, 1.2749e+01, 1.3199e-05],
         [2.0811e+01, 1.8636e-01, 2.2788e-01],
         ...,
         [4.4416e+01, 3.0329e-05, 4.9344e-05],
         [3.9677e+01, 1.6096e-05, 1.1135e-02],
         [2.8736e+01, 8.4050e-02, 6.9283e-05]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [3.3985e+00, 1.2876e+01, 1.4371e-01],
         [2.2064e+01, 1.5460e+00, 1.8739e-05],
         ...,
         [4.4248e+01, 8.2004e-02, 1.1109e-02],
         [3.9450e+01, 7.9728e-02, 3.2219e-05],
         [2.8862e+01, 4.2976e-06, 2.2799e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  12%|█▏        | 231/2000 [08:15<1:04:18,  2.18s/it][A
Train Diffusion:  12%|█▏        | 232/2000 [08:17<1:03:55,  2.17s/it][A
Train Diffusion:  12%|█▏        | 233/2000 [08:19<1:02:10,  2.11s/it][A
Train Diffusion:  12%|█▏        | 234/2000 [08:21<1:01:22,  2.09s/it][A
Train Diffusion:  12%|█▏        | 235/2000 [08:23<59:15,  2.01s/it]  [A
Train Diffusion:  12%|█▏        | 236/2000 [08:25<59:46,  2.03s/it][A
Train Diffusion:  12%|█▏        | 237/2000 [08:27<58:32,  1.99s/it][A
Train Diffusion:  12%|█▏        | 238/2000 [08:29<57:55,  1.97s/it][A
Train Diffusion:  12%|█▏        | 239/2000 [08:31<58:15,  1.98s/it][A
Train Diffusion:  12%|█▏        | 240/2000 [08:33<57:46,  1.97s/it][A

Moving average norm loss at 240 iterations is: 3734.4480712890627. Best norm loss value is: 3496.155029296875.

C_PATH mean = tensor([[4.5481e+01, 3.9295e-02, 1.5713e-01],
        [4.5483e+01, 3.7605e-02, 1.5246e-01]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [3.4123e+00, 1.2469e+01, 1.3803e-05],
         [2.0919e+01, 1.4320e+00, 2.8121e-01],
         ...,
         [4.4339e+01, 3.0086e-05, 6.4503e-05],
         [3.9515e+01, 1.6551e-05, 1.2992e-02],
         [2.8660e+01, 8.6148e-02, 7.0868e-05]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [5.6582e+00, 1.2644e+01, 1.7686e-01],
         [2.2108e+01, 1.4362e-01, 1.8096e-05],
         ...,
         [4.4339e+01, 8.4335e-02, 1.2898e-02],
         [3.9533e+01, 8.2942e-02, 3.9385e-05],
         [2.8596e+01, 3.8868e-06, 2.8017e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  12%|█▏        | 241/2000 [08:35<58:02,  1.98s/it][A
Train Diffusion:  12%|█▏        | 242/2000 [08:37<56:49,  1.94s/it][A
Train Diffusion:  12%|█▏        | 243/2000 [08:38<55:54,  1.91s/it][A
Train Diffusion:  12%|█▏        | 244/2000 [08:40<55:11,  1.89s/it][A
Train Diffusion:  12%|█▏        | 245/2000 [08:42<54:43,  1.87s/it][A
Train Diffusion:  12%|█▏        | 246/2000 [08:44<54:23,  1.86s/it][A
Train Diffusion:  12%|█▏        | 247/2000 [08:46<54:12,  1.86s/it][A
Train Diffusion:  12%|█▏        | 248/2000 [08:48<54:07,  1.85s/it][A
Train Diffusion:  12%|█▏        | 249/2000 [08:49<54:12,  1.86s/it][A
Train Diffusion:  12%|█▎        | 250/2000 [08:51<54:02,  1.85s/it][A

Moving average norm loss at 250 iterations is: 3945.3220703125. Best norm loss value is: 3496.155029296875.

C_PATH mean = tensor([[4.5482e+01, 4.0898e-02, 1.6487e-01],
        [4.5483e+01, 4.1218e-02, 1.6553e-01]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [3.9459e+00, 1.2202e+01, 2.1948e-01],
         [2.1956e+01, 1.5630e-01, 1.5351e-05],
         ...,
         [4.3912e+01, 2.9932e-05, 1.1256e-02],
         [3.9067e+01, 1.6113e-05, 2.5205e-05],
         [2.8162e+01, 8.4249e-02, 6.5677e-05]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [6.0723e+00, 1.2391e+01, 1.3290e-05],
         [2.2905e+01, 1.3923e+00, 3.4987e-01],
         ...,
         [4.3900e+01, 8.3679e-02, 4.1470e-05],
         [3.9074e+01, 8.6976e-02, 1.2682e-02],
         [2.8202e+01, 4.2044e-06, 3.4347e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  13%|█▎        | 251/2000 [08:53<53:49,  1.85s/it][A
Train Diffusion:  13%|█▎        | 252/2000 [08:55<53:11,  1.83s/it][A
Train Diffusion:  13%|█▎        | 253/2000 [08:57<53:00,  1.82s/it][A
Train Diffusion:  13%|█▎        | 254/2000 [08:58<52:39,  1.81s/it][A
Train Diffusion:  13%|█▎        | 255/2000 [09:00<52:23,  1.80s/it][A
Train Diffusion:  13%|█▎        | 256/2000 [09:02<52:12,  1.80s/it][A
Train Diffusion:  13%|█▎        | 257/2000 [09:04<52:20,  1.80s/it][A
Train Diffusion:  13%|█▎        | 258/2000 [09:06<52:15,  1.80s/it][A
Train Diffusion:  13%|█▎        | 259/2000 [09:07<51:59,  1.79s/it][A
Train Diffusion:  13%|█▎        | 260/2000 [09:09<51:50,  1.79s/it][A

Moving average norm loss at 260 iterations is: 4065.72783203125. Best norm loss value is: 3496.155029296875.

C_PATH mean = tensor([[4.5477e+01, 4.4036e-02, 1.7986e-01],
        [4.5477e+01, 4.2652e-02, 1.7278e-01]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [6.4638e+00, 1.2153e+01, 1.2978e-05],
         [2.2883e+01, 1.3770e+00, 1.6477e-05],
         ...,
         [4.3553e+01, 8.6508e-02, 9.6934e-03],
         [3.8738e+01, 8.5546e-02, 1.8547e-05],
         [2.7806e+01, 4.3818e-06, 5.7987e-05]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4425e+00, 1.1971e+01, 2.7108e-01],
         [2.3600e+01, 1.6840e-01, 4.2329e-01],
         ...,
         [4.3566e+01, 3.1649e-05, 2.7869e-05],
         [3.8699e+01, 1.7789e-05, 1.1216e-02],
         [2.7827e+01, 8.5162e-02, 4.2152e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  13%|█▎        | 261/2000 [09:11<52:06,  1.80s/it][A
Train Diffusion:  13%|█▎        | 262/2000 [09:13<51:58,  1.79s/it][A
Train Diffusion:  13%|█▎        | 263/2000 [09:15<51:49,  1.79s/it][A
Train Diffusion:  13%|█▎        | 264/2000 [09:16<51:41,  1.79s/it][A
Train Diffusion:  13%|█▎        | 265/2000 [09:18<51:51,  1.79s/it][A
Train Diffusion:  13%|█▎        | 266/2000 [09:20<52:06,  1.80s/it][A
Train Diffusion:  13%|█▎        | 267/2000 [09:22<52:01,  1.80s/it][A
Train Diffusion:  13%|█▎        | 268/2000 [09:24<51:51,  1.80s/it][A
Train Diffusion:  13%|█▎        | 269/2000 [09:25<51:41,  1.79s/it][A
Train Diffusion:  14%|█▎        | 270/2000 [09:27<52:26,  1.82s/it][A

Moving average norm loss at 270 iterations is: 4192.828955078125. Best norm loss value is: 3421.429443359375.

C_PATH mean = tensor([[4.5455e+01, 3.8518e-02, 1.9746e-01],
        [4.5456e+01, 3.9978e-02, 1.9615e-01]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [3.9301e+00, 1.2032e+01, 3.3979e-01],
         [2.2619e+01, 8.3961e-02, 5.2298e-01],
         ...,
         [4.4613e+01, 3.3835e-05, 1.8268e-02],
         [3.9599e+01, 1.8892e-05, 5.0503e-05],
         [2.8233e+01, 8.6042e-02, 4.9032e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [6.1540e+00, 1.1738e+01, 1.3040e-05],
         [2.1681e+01, 1.1546e+00, 1.6754e-05],
         ...,
         [4.4611e+01, 8.4135e-02, 9.3028e-05],
         [3.9603e+01, 8.4473e-02, 1.9093e-02],
         [2.8318e+01, 4.5481e-06, 7.1172e-05]]], grad_fn=<CatBackward>)



Train Diffusion:  14%|█▎        | 271/2000 [09:29<52:31,  1.82s/it][A
Train Diffusion:  14%|█▎        | 272/2000 [09:31<52:13,  1.81s/it][A
Train Diffusion:  14%|█▎        | 273/2000 [09:33<52:11,  1.81s/it][A
Train Diffusion:  14%|█▎        | 274/2000 [09:34<51:56,  1.81s/it][A
Train Diffusion:  14%|█▍        | 275/2000 [09:36<51:43,  1.80s/it][A
Train Diffusion:  14%|█▍        | 276/2000 [09:38<51:34,  1.79s/it][A
Train Diffusion:  14%|█▍        | 277/2000 [09:40<51:30,  1.79s/it][A
Train Diffusion:  14%|█▍        | 278/2000 [09:42<51:35,  1.80s/it][A
Train Diffusion:  14%|█▍        | 279/2000 [09:43<51:26,  1.79s/it][A
Train Diffusion:  14%|█▍        | 280/2000 [09:45<51:23,  1.79s/it][A

Moving average norm loss at 280 iterations is: 3852.81455078125. Best norm loss value is: 3224.900146484375.

C_PATH mean = tensor([[4.5453e+01, 4.1978e-02, 2.0920e-01],
        [4.5452e+01, 4.1879e-02, 2.1059e-01]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [7.8475e+00, 1.1214e+01, 4.2385e-01],
         [2.5265e+01, 1.1658e+00, 1.6216e-05],
         ...,
         [4.3788e+01, 8.8960e-02, 2.4918e-05],
         [3.8618e+01, 8.7481e-02, 1.3416e-02],
         [2.7271e+01, 4.2684e-06, 6.8421e-05]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [6.2927e+00, 1.0672e+01, 1.1805e-05],
         [2.5260e+01, 9.1747e-02, 6.3499e-01],
         ...,
         [4.3801e+01, 3.2113e-05, 1.0690e-02],
         [3.8467e+01, 1.8246e-05, 1.8331e-05],
         [2.6803e+01, 9.0598e-02, 6.2644e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  14%|█▍        | 281/2000 [09:47<51:41,  1.80s/it][A
Train Diffusion:  14%|█▍        | 282/2000 [09:49<51:36,  1.80s/it][A
Train Diffusion:  14%|█▍        | 283/2000 [09:51<52:03,  1.82s/it][A
Train Diffusion:  14%|█▍        | 284/2000 [09:53<51:48,  1.81s/it][A
Train Diffusion:  14%|█▍        | 285/2000 [09:54<51:32,  1.80s/it][A
Train Diffusion:  14%|█▍        | 286/2000 [09:56<51:17,  1.80s/it][A
Train Diffusion:  14%|█▍        | 287/2000 [09:58<51:13,  1.79s/it][A
Train Diffusion:  14%|█▍        | 288/2000 [10:00<51:09,  1.79s/it][A
Train Diffusion:  14%|█▍        | 289/2000 [10:01<51:00,  1.79s/it][A
Train Diffusion:  14%|█▍        | 290/2000 [10:03<51:08,  1.79s/it][A

Moving average norm loss at 290 iterations is: 3327.2348876953124. Best norm loss value is: 3107.265625.

C_PATH mean = tensor([[4.5525e+01, 4.0433e-02, 2.0942e-01],
        [4.5521e+01, 4.0403e-02, 2.0143e-01]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [7.1151e+00, 1.2227e+01, 4.8795e-01],
         [2.4122e+01, 1.3280e-01, 1.4009e-05],
         ...,
         [4.4293e+01, 8.2796e-02, 4.2632e-05],
         [3.9538e+01, 1.7885e-05, 2.5232e-05],
         [2.8172e+01, 8.3467e-02, 6.9652e-05]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [5.0907e+00, 1.1805e+01, 1.1251e-05],
         [2.3534e+01, 1.3613e+00, 7.1724e-01],
         ...,
         [4.4374e+01, 3.0793e-05, 1.2910e-02],
         [3.9383e+01, 8.2739e-02, 1.6036e-02],
         [2.8404e+01, 4.9482e-06, 6.9636e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  15%|█▍        | 291/2000 [10:05<51:23,  1.80s/it][A
Train Diffusion:  15%|█▍        | 292/2000 [10:07<51:22,  1.80s/it][A
Train Diffusion:  15%|█▍        | 293/2000 [10:09<51:12,  1.80s/it][A
Train Diffusion:  15%|█▍        | 294/2000 [10:10<51:03,  1.80s/it][A
Train Diffusion:  15%|█▍        | 295/2000 [10:12<51:06,  1.80s/it][A
Train Diffusion:  15%|█▍        | 296/2000 [10:14<51:23,  1.81s/it][A
Train Diffusion:  15%|█▍        | 297/2000 [10:16<51:48,  1.83s/it][A
Train Diffusion:  15%|█▍        | 298/2000 [10:18<51:50,  1.83s/it][A
Train Diffusion:  15%|█▍        | 299/2000 [10:20<51:58,  1.83s/it][A
Train Diffusion:  15%|█▌        | 300/2000 [10:21<52:01,  1.84s/it][A

Moving average norm loss at 300 iterations is: 3531.7010009765627. Best norm loss value is: 3107.265625.

C_PATH mean = tensor([[4.5504e+01, 3.9556e-02, 2.1573e-01],
        [4.5505e+01, 3.6340e-02, 2.1483e-01]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [7.8850e+00, 1.1422e+01, 1.0043e-05],
         [2.4948e+01, 8.1662e-02, 7.1864e-01],
         ...,
         [4.4117e+01, 3.1709e-05, 1.3763e-02],
         [3.8854e+01, 8.1232e-02, 1.6107e-02],
         [2.7173e+01, 8.1682e-02, 7.5165e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [6.1040e+00, 1.0750e+01, 5.7269e-01],
         [2.4934e+01, 1.1383e+00, 1.5456e-05],
         ...,
         [4.4107e+01, 8.0479e-02, 3.7938e-05],
         [3.9031e+01, 1.8119e-05, 2.5095e-05],
         [2.7646e+01, 4.8716e-06, 8.0431e-05]]], grad_fn=<CatBackward>)



Train Diffusion:  15%|█▌        | 301/2000 [10:23<52:26,  1.85s/it][A
Train Diffusion:  15%|█▌        | 302/2000 [10:25<52:07,  1.84s/it][A
Train Diffusion:  15%|█▌        | 303/2000 [10:27<51:38,  1.83s/it][A
Train Diffusion:  15%|█▌        | 304/2000 [10:29<51:32,  1.82s/it][A
Train Diffusion:  15%|█▌        | 305/2000 [10:31<51:11,  1.81s/it][A
Train Diffusion:  15%|█▌        | 306/2000 [10:32<51:00,  1.81s/it][A
Train Diffusion:  15%|█▌        | 307/2000 [10:34<51:00,  1.81s/it][A
Train Diffusion:  15%|█▌        | 308/2000 [10:36<50:49,  1.80s/it][A
Train Diffusion:  15%|█▌        | 309/2000 [10:38<50:40,  1.80s/it][A
Train Diffusion:  16%|█▌        | 310/2000 [10:40<50:36,  1.80s/it][A

Moving average norm loss at 310 iterations is: 3396.6921630859374. Best norm loss value is: 3055.78759765625.

C_PATH mean = tensor([[4.5444e+01, 4.0807e-02, 2.2930e-01],
        [4.5445e+01, 4.1809e-02, 2.3180e-01]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [5.7915e+00, 1.1106e+01, 1.0797e-05],
         [2.4706e+01, 1.0044e-01, 1.5041e-05],
         ...,
         [4.4111e+01, 8.7134e-02, 1.5100e-02],
         [3.9028e+01, 1.8465e-05, 2.6241e-05],
         [2.7683e+01, 5.2382e-06, 7.3856e-05]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [7.6764e+00, 1.1642e+01, 6.9306e-01],
         [2.4933e+01, 1.1895e+00, 7.4466e-01],
         ...,
         [4.4102e+01, 3.4671e-05, 4.0160e-05],
         [3.9008e+01, 8.6405e-02, 1.9164e-02],
         [2.7469e+01, 8.3897e-02, 7.3929e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  16%|█▌        | 311/2000 [10:41<51:01,  1.81s/it][A
Train Diffusion:  16%|█▌        | 312/2000 [10:43<50:46,  1.80s/it][A
Train Diffusion:  16%|█▌        | 313/2000 [10:45<50:33,  1.80s/it][A
Train Diffusion:  16%|█▌        | 314/2000 [10:47<50:31,  1.80s/it][A
Train Diffusion:  16%|█▌        | 315/2000 [10:49<50:25,  1.80s/it][A
Train Diffusion:  16%|█▌        | 316/2000 [10:50<50:20,  1.79s/it][A
Train Diffusion:  16%|█▌        | 317/2000 [10:52<50:13,  1.79s/it][A
Train Diffusion:  16%|█▌        | 318/2000 [10:54<50:12,  1.79s/it][A
Train Diffusion:  16%|█▌        | 319/2000 [10:56<50:05,  1.79s/it][A
Train Diffusion:  16%|█▌        | 320/2000 [10:57<50:01,  1.79s/it][A

Moving average norm loss at 320 iterations is: 3341.110791015625. Best norm loss value is: 3055.78759765625.

C_PATH mean = tensor([[4.5508e+01, 4.2835e-02, 2.3861e-01],
        [4.5507e+01, 3.9509e-02, 2.2326e-01]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [7.7033e+00, 1.1629e+01, 9.8917e-06],
         [2.4767e+01, 1.1482e+00, 7.1154e-01],
         ...,
         [4.4170e+01, 8.4976e-02, 4.4180e-05],
         [3.9050e+01, 1.9413e-05, 1.9595e-02],
         [2.7502e+01, 5.2250e-06, 7.1000e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [5.7186e+00, 1.1226e+01, 7.3979e-01],
         [2.5122e+01, 1.0378e-01, 1.5335e-05],
         ...,
         [4.4117e+01, 3.4639e-05, 1.5049e-02],
         [3.9061e+01, 8.3571e-02, 2.8544e-05],
         [2.7723e+01, 8.3363e-02, 7.5687e-05]]], grad_fn=<CatBackward>)



Train Diffusion:  16%|█▌        | 321/2000 [10:59<50:21,  1.80s/it][A
Train Diffusion:  16%|█▌        | 322/2000 [11:01<50:18,  1.80s/it][A
Train Diffusion:  16%|█▌        | 323/2000 [11:03<50:14,  1.80s/it][A
Train Diffusion:  16%|█▌        | 324/2000 [11:05<50:12,  1.80s/it][A
Train Diffusion:  16%|█▋        | 325/2000 [11:06<50:05,  1.79s/it][A
Train Diffusion:  16%|█▋        | 326/2000 [11:08<50:02,  1.79s/it][A
Train Diffusion:  16%|█▋        | 327/2000 [11:10<49:56,  1.79s/it][A
Train Diffusion:  16%|█▋        | 328/2000 [11:12<49:51,  1.79s/it][A
Train Diffusion:  16%|█▋        | 329/2000 [11:14<49:48,  1.79s/it][A
Train Diffusion:  16%|█▋        | 330/2000 [11:15<49:45,  1.79s/it][A

Moving average norm loss at 330 iterations is: 3271.491259765625. Best norm loss value is: 3040.886474609375.

C_PATH mean = tensor([[4.5566e+01, 3.9707e-02, 2.2841e-01],
        [4.5565e+01, 4.1439e-02, 2.3451e-01]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [7.6801e+00, 1.1602e+01, 9.9045e-06],
         [2.4725e+01, 1.0197e-01, 6.9446e-01],
         ...,
         [4.4180e+01, 3.6016e-05, 1.5392e-02],
         [3.9132e+01, 8.2012e-02, 2.1034e-02],
         [2.7671e+01, 8.2113e-02, 6.8328e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [5.6125e+00, 1.1251e+01, 6.9919e-01],
         [2.5205e+01, 1.0993e+00, 1.5392e-05],
         ...,
         [4.4198e+01, 8.0854e-02, 5.0198e-05],
         [3.9129e+01, 1.9290e-05, 2.8739e-05],
         [2.7602e+01, 5.1890e-06, 7.5037e-05]]], grad_fn=<CatBackward>)



Train Diffusion:  17%|█▋        | 331/2000 [11:17<50:05,  1.80s/it][A
Train Diffusion:  17%|█▋        | 332/2000 [11:19<49:52,  1.79s/it][A
Train Diffusion:  17%|█▋        | 333/2000 [11:21<49:49,  1.79s/it][A
Train Diffusion:  17%|█▋        | 334/2000 [11:23<49:40,  1.79s/it][A
Train Diffusion:  17%|█▋        | 335/2000 [11:24<49:39,  1.79s/it][A
Train Diffusion:  17%|█▋        | 336/2000 [11:26<49:31,  1.79s/it][A
Train Diffusion:  17%|█▋        | 337/2000 [11:28<49:28,  1.78s/it][A
Train Diffusion:  17%|█▋        | 338/2000 [11:30<49:30,  1.79s/it][A
Train Diffusion:  17%|█▋        | 339/2000 [11:32<49:26,  1.79s/it][A
Train Diffusion:  17%|█▋        | 340/2000 [11:33<49:39,  1.79s/it][A

Moving average norm loss at 340 iterations is: 3185.41015625. Best norm loss value is: 2957.83154296875.

C_PATH mean = tensor([[4.5457e+01, 3.9763e-02, 2.3995e-01],
        [4.5456e+01, 4.1074e-02, 2.5703e-01]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [7.8552e+00, 1.0605e+01, 7.1145e-01],
         [2.4844e+01, 6.4598e-02, 1.6420e-05],
         ...,
         [4.4131e+01, 3.4565e-05, 7.0342e-05],
         [3.8961e+01, 2.0166e-05, 2.5852e-02],
         [2.7146e+01, 5.7543e-06, 7.0041e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [5.7506e+00, 1.1071e+01, 1.0495e-05],
         [2.5404e+01, 8.8793e-01, 7.0701e-01],
         ...,
         [4.4137e+01, 8.3542e-02, 1.8867e-02],
         [3.8933e+01, 8.3461e-02, 3.6242e-05],
         [2.7293e+01, 8.4043e-02, 8.2186e-05]]], grad_fn=<CatBackward>)



Train Diffusion:  17%|█▋        | 341/2000 [11:35<49:57,  1.81s/it][A
Train Diffusion:  17%|█▋        | 342/2000 [11:37<49:40,  1.80s/it][A
Train Diffusion:  17%|█▋        | 343/2000 [11:39<49:36,  1.80s/it][A
Train Diffusion:  17%|█▋        | 344/2000 [11:41<49:32,  1.80s/it][A
Train Diffusion:  17%|█▋        | 345/2000 [11:42<49:29,  1.79s/it][A
Train Diffusion:  17%|█▋        | 346/2000 [11:44<49:26,  1.79s/it][A
Train Diffusion:  17%|█▋        | 347/2000 [11:46<49:21,  1.79s/it][A
Train Diffusion:  17%|█▋        | 348/2000 [11:48<49:19,  1.79s/it][A
Train Diffusion:  17%|█▋        | 349/2000 [11:49<49:12,  1.79s/it][A
Train Diffusion:  18%|█▊        | 350/2000 [11:51<49:14,  1.79s/it][A

Moving average norm loss at 350 iterations is: 3138.218359375. Best norm loss value is: 2892.955322265625.

C_PATH mean = tensor([[4.5462e+01, 4.1859e-02, 2.6022e-01],
        [4.5463e+01, 4.2311e-02, 2.6374e-01]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [6.1626e+00, 1.0966e+01, 1.1113e-05],
         [2.5522e+01, 8.7161e-01, 7.3410e-01],
         ...,
         [4.4113e+01, 8.5567e-02, 6.5947e-05],
         [3.8879e+01, 8.4543e-02, 3.5788e-05],
         [2.7013e+01, 5.1541e-06, 9.2002e-05]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [8.2169e+00, 1.0492e+01, 7.3520e-01],
         [2.5936e+01, 6.9815e-02, 1.6522e-05],
         ...,
         [4.4108e+01, 3.7893e-05, 1.9502e-02],
         [3.8886e+01, 2.1423e-05, 2.8111e-02],
         [2.7150e+01, 8.7770e-02, 7.2911e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  18%|█▊        | 351/2000 [11:53<51:42,  1.88s/it][A
Train Diffusion:  18%|█▊        | 352/2000 [11:55<50:59,  1.86s/it][A
Train Diffusion:  18%|█▊        | 353/2000 [11:57<50:20,  1.83s/it][A
Train Diffusion:  18%|█▊        | 354/2000 [11:59<49:54,  1.82s/it][A
Train Diffusion:  18%|█▊        | 355/2000 [12:01<49:39,  1.81s/it][A
Train Diffusion:  18%|█▊        | 356/2000 [12:02<49:29,  1.81s/it][A
Train Diffusion:  18%|█▊        | 357/2000 [12:04<49:19,  1.80s/it][A
Train Diffusion:  18%|█▊        | 358/2000 [12:06<49:06,  1.79s/it][A
Train Diffusion:  18%|█▊        | 359/2000 [12:08<49:02,  1.79s/it][A
Train Diffusion:  18%|█▊        | 360/2000 [12:10<52:07,  1.91s/it][A

Moving average norm loss at 360 iterations is: 3212.0831298828125. Best norm loss value is: 2892.955322265625.

C_PATH mean = tensor([[4.5486e+01, 4.4607e-02, 2.6131e-01],
        [4.5487e+01, 4.1823e-02, 2.6962e-01]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [7.1683e+00, 1.1248e+01, 7.2728e-01],
         [2.7042e+01, 1.1072e+00, 1.6928e-05],
         ...,
         [4.3875e+01, 8.4630e-02, 4.0445e-05],
         [3.8678e+01, 2.0832e-05, 2.8392e-02],
         [2.6911e+01, 5.7631e-06, 7.2945e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [9.0380e+00, 1.0691e+01, 1.0313e-05],
         [2.7248e+01, 1.1016e-01, 7.1747e-01],
         ...,
         [4.3899e+01, 3.7129e-05, 1.6410e-02],
         [3.8679e+01, 8.4110e-02, 2.5902e-05],
         [2.7152e+01, 8.5031e-02, 1.0656e-04]]], grad_fn=<CatBackward>)



Train Diffusion:  18%|█▊        | 361/2000 [12:12<51:28,  1.88s/it][A
Train Diffusion:  18%|█▊        | 362/2000 [12:14<50:59,  1.87s/it][A
Train Diffusion:  18%|█▊        | 363/2000 [12:15<51:35,  1.89s/it][A
Train Diffusion:  18%|█▊        | 364/2000 [12:17<50:44,  1.86s/it][A
Train Diffusion:  18%|█▊        | 365/2000 [12:19<50:05,  1.84s/it][A
Train Diffusion:  18%|█▊        | 366/2000 [12:21<49:54,  1.83s/it][A
Train Diffusion:  18%|█▊        | 367/2000 [12:23<49:30,  1.82s/it][A
Train Diffusion:  18%|█▊        | 368/2000 [12:24<49:20,  1.81s/it][A
Train Diffusion:  18%|█▊        | 369/2000 [12:26<49:26,  1.82s/it][A
Train Diffusion:  18%|█▊        | 370/2000 [12:28<49:31,  1.82s/it][A

Moving average norm loss at 370 iterations is: 3055.51884765625. Best norm loss value is: 2832.54833984375.

C_PATH mean = tensor([[4.5527e+01, 4.1168e-02, 2.7063e-01],
        [4.5527e+01, 4.4581e-02, 2.6370e-01]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [9.2767e+00, 1.1083e+01, 7.2196e-01],
         [2.7655e+01, 9.5692e-02, 1.7507e-05],
         ...,
         [4.3968e+01, 8.3591e-02, 1.7133e-02],
         [3.8683e+01, 2.0929e-05, 3.0469e-02],
         [2.7121e+01, 8.4747e-02, 1.1404e-04]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [7.3642e+00, 1.0491e+01, 1.1367e-05],
         [2.7332e+01, 1.0251e+00, 7.0623e-01],
         ...,
         [4.3987e+01, 3.9655e-05, 4.5113e-05],
         [3.8758e+01, 8.4138e-02, 2.9437e-05],
         [2.6789e+01, 5.9556e-06, 7.1277e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  19%|█▊        | 371/2000 [12:30<50:22,  1.86s/it][A
Train Diffusion:  19%|█▊        | 372/2000 [12:32<50:11,  1.85s/it][A
Train Diffusion:  19%|█▊        | 373/2000 [12:34<50:15,  1.85s/it][A
Train Diffusion:  19%|█▊        | 374/2000 [12:36<49:47,  1.84s/it][A
Train Diffusion:  19%|█▉        | 375/2000 [12:37<49:23,  1.82s/it][A
Train Diffusion:  19%|█▉        | 376/2000 [12:39<49:03,  1.81s/it][A
Train Diffusion:  19%|█▉        | 377/2000 [12:41<48:47,  1.80s/it][A
Train Diffusion:  19%|█▉        | 378/2000 [12:43<48:35,  1.80s/it][A
Train Diffusion:  19%|█▉        | 379/2000 [12:44<48:25,  1.79s/it][A
Train Diffusion:  19%|█▉        | 380/2000 [12:46<48:28,  1.80s/it][A

Moving average norm loss at 380 iterations is: 3077.427490234375. Best norm loss value is: 2776.149658203125.

C_PATH mean = tensor([[4.5493e+01, 4.1094e-02, 2.7757e-01],
        [4.5492e+01, 4.5030e-02, 2.7354e-01]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [9.1273e+00, 1.0258e+01, 1.1664e-05],
         [2.7471e+01, 6.9055e-02, 1.8051e-05],
         ...,
         [4.4085e+01, 8.5731e-02, 5.5802e-05],
         [3.8759e+01, 2.1948e-05, 3.5475e-02],
         [2.6741e+01, 5.9538e-06, 1.1222e-04]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [7.1663e+00, 1.0798e+01, 7.3159e-01],
         [2.7163e+01, 8.6405e-01, 7.2919e-01],
         ...,
         [4.4064e+01, 4.0749e-05, 1.9977e-02],
         [3.8765e+01, 8.6407e-02, 3.4583e-05],
         [2.6896e+01, 8.5738e-02, 7.0363e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  19%|█▉        | 381/2000 [12:48<48:49,  1.81s/it][A
Train Diffusion:  19%|█▉        | 382/2000 [12:50<48:37,  1.80s/it][A
Train Diffusion:  19%|█▉        | 383/2000 [12:52<48:27,  1.80s/it][A
Train Diffusion:  19%|█▉        | 384/2000 [12:53<48:16,  1.79s/it][A
Train Diffusion:  19%|█▉        | 385/2000 [12:55<48:19,  1.80s/it][A
Train Diffusion:  19%|█▉        | 386/2000 [12:57<48:11,  1.79s/it][A
Train Diffusion:  19%|█▉        | 387/2000 [12:59<48:09,  1.79s/it][A
Train Diffusion:  19%|█▉        | 388/2000 [13:01<48:08,  1.79s/it][A
Train Diffusion:  19%|█▉        | 389/2000 [13:02<48:14,  1.80s/it][A
Train Diffusion:  20%|█▉        | 390/2000 [13:04<48:16,  1.80s/it][A

Moving average norm loss at 390 iterations is: 2906.395849609375. Best norm loss value is: 2683.041748046875.

C_PATH mean = tensor([[4.5526e+01, 4.2505e-02, 2.6279e-01],
        [4.5525e+01, 4.0222e-02, 2.7414e-01]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [7.4705e+00, 1.0063e+01, 1.1718e-05],
         [2.8124e+01, 8.2558e-01, 1.8970e-05],
         ...,
         [4.4000e+01, 4.2633e-05, 1.9655e-02],
         [3.8788e+01, 8.1534e-02, 3.8444e-02],
         [2.6929e+01, 5.8603e-06, 1.2248e-04]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [9.4484e+00, 1.0687e+01, 6.8458e-01],
         [2.7591e+01, 6.3532e-02, 6.9324e-01],
         ...,
         [4.4071e+01, 8.1302e-02, 5.7284e-05],
         [3.8636e+01, 2.2869e-05, 3.4112e-05],
         [2.6577e+01, 8.1574e-02, 6.8669e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  20%|█▉        | 391/2000 [13:06<48:28,  1.81s/it][A
Train Diffusion:  20%|█▉        | 392/2000 [13:08<48:17,  1.80s/it][A
Train Diffusion:  20%|█▉        | 393/2000 [13:10<48:05,  1.80s/it][A
Train Diffusion:  20%|█▉        | 394/2000 [13:11<47:55,  1.79s/it][A
Train Diffusion:  20%|█▉        | 395/2000 [13:13<47:48,  1.79s/it][A
Train Diffusion:  20%|█▉        | 396/2000 [13:15<47:46,  1.79s/it][A
Train Diffusion:  20%|█▉        | 397/2000 [13:17<47:43,  1.79s/it][A
Train Diffusion:  20%|█▉        | 398/2000 [13:19<47:40,  1.79s/it][A
Train Diffusion:  20%|█▉        | 399/2000 [13:20<47:43,  1.79s/it][A
Train Diffusion:  20%|██        | 400/2000 [13:22<47:37,  1.79s/it][A

Moving average norm loss at 400 iterations is: 2774.72666015625. Best norm loss value is: 2652.848388671875.

C_PATH mean = tensor([[4.5507e+01, 4.3004e-02, 2.7782e-01],
        [4.5508e+01, 4.0479e-02, 2.9420e-01]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [7.8734e+00, 1.0244e+01, 7.2131e-01],
         [2.8136e+01, 6.9237e-01, 1.9815e-05],
         ...,
         [4.4013e+01, 8.3381e-02, 7.1394e-05],
         [3.8601e+01, 8.2996e-02, 4.6182e-02],
         [2.6583e+01, 6.0910e-06, 1.2685e-04]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [9.6872e+00, 9.5513e+00, 1.2070e-05],
         [2.8494e+01, 4.5176e-02, 7.0834e-01],
         ...,
         [4.4008e+01, 4.2339e-05, 2.1812e-02],
         [3.8546e+01, 2.2921e-05, 3.6993e-05],
         [2.6199e+01, 8.4579e-02, 7.1019e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  20%|██        | 401/2000 [13:24<47:55,  1.80s/it][A
Train Diffusion:  20%|██        | 402/2000 [13:26<47:46,  1.79s/it][A
Train Diffusion:  20%|██        | 403/2000 [13:28<47:43,  1.79s/it][A
Train Diffusion:  20%|██        | 404/2000 [13:29<47:41,  1.79s/it][A
Train Diffusion:  20%|██        | 405/2000 [13:31<47:36,  1.79s/it][A
Train Diffusion:  20%|██        | 406/2000 [13:33<47:54,  1.80s/it][A
Train Diffusion:  20%|██        | 407/2000 [13:35<47:50,  1.80s/it][A
Train Diffusion:  20%|██        | 408/2000 [13:37<47:51,  1.80s/it][A
Train Diffusion:  20%|██        | 409/2000 [13:39<49:05,  1.85s/it][A
Train Diffusion:  20%|██        | 410/2000 [13:41<50:32,  1.91s/it][A

Moving average norm loss at 410 iterations is: 2865.6176025390623. Best norm loss value is: 2652.848388671875.

C_PATH mean = tensor([[4.5476e+01, 4.3193e-02, 2.8892e-01],
        [4.5475e+01, 4.2713e-02, 2.8040e-01]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [1.0136e+01, 9.5920e+00, 1.2063e-05],
         [2.9238e+01, 7.4994e-01, 1.8447e-05],
         ...,
         [4.3826e+01, 4.4423e-05, 2.0530e-02],
         [3.8353e+01, 2.4585e-05, 3.3424e-05],
         [2.6062e+01, 6.4071e-06, 1.3895e-04]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [8.3758e+00, 1.0310e+01, 7.0006e-01],
         [2.8860e+01, 5.9331e-02, 7.1145e-01],
         ...,
         [4.3830e+01, 8.0689e-02, 4.7242e-05],
         [3.8455e+01, 8.1423e-02, 4.6088e-02],
         [2.6470e+01, 8.2739e-02, 7.0409e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  21%|██        | 411/2000 [13:43<51:08,  1.93s/it][A
Train Diffusion:  21%|██        | 412/2000 [13:44<50:17,  1.90s/it][A
Train Diffusion:  21%|██        | 413/2000 [13:46<50:06,  1.89s/it][A
Train Diffusion:  21%|██        | 414/2000 [13:48<49:15,  1.86s/it][A
Train Diffusion:  21%|██        | 415/2000 [13:50<48:44,  1.85s/it][A
Train Diffusion:  21%|██        | 416/2000 [13:52<48:16,  1.83s/it][A
Train Diffusion:  21%|██        | 417/2000 [13:53<47:56,  1.82s/it][A
Train Diffusion:  21%|██        | 418/2000 [13:55<47:42,  1.81s/it][A
Train Diffusion:  21%|██        | 419/2000 [13:57<47:31,  1.80s/it][A
Train Diffusion:  21%|██        | 420/2000 [13:59<47:21,  1.80s/it][A

Moving average norm loss at 420 iterations is: 2786.0677978515623. Best norm loss value is: 2636.59716796875.

C_PATH mean = tensor([[4.5432e+01, 4.5695e-02, 2.9804e-01],
        [4.5433e+01, 4.5314e-02, 2.9964e-01]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [8.4717e+00, 9.6482e+00, 7.1591e-01],
         [2.9569e+01, 7.6576e-01, 2.0133e-05],
         ...,
         [4.3810e+01, 8.6633e-02, 4.7884e-05],
         [3.8379e+01, 8.6398e-02, 3.2146e-05],
         [2.6081e+01, 6.2558e-06, 7.1629e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [1.0331e+01, 1.0317e+01, 1.2622e-05],
         [2.9185e+01, 5.9708e-02, 7.2388e-01],
         ...,
         [4.3822e+01, 4.1905e-05, 2.1798e-02],
         [3.8370e+01, 2.4494e-05, 5.5393e-02],
         [2.6357e+01, 8.5499e-02, 1.3527e-04]]], grad_fn=<CatBackward>)



Train Diffusion:  21%|██        | 421/2000 [14:01<47:42,  1.81s/it][A
Train Diffusion:  21%|██        | 422/2000 [14:02<47:29,  1.81s/it][A
Train Diffusion:  21%|██        | 423/2000 [14:04<47:27,  1.81s/it][A
Train Diffusion:  21%|██        | 424/2000 [14:06<47:14,  1.80s/it][A
Train Diffusion:  21%|██▏       | 425/2000 [14:08<47:31,  1.81s/it][A
Train Diffusion:  21%|██▏       | 426/2000 [14:10<48:14,  1.84s/it][A
Train Diffusion:  21%|██▏       | 427/2000 [14:12<51:37,  1.97s/it][A
Train Diffusion:  21%|██▏       | 428/2000 [14:14<53:22,  2.04s/it][A
Train Diffusion:  21%|██▏       | 429/2000 [14:16<52:34,  2.01s/it][A
Train Diffusion:  22%|██▏       | 430/2000 [14:18<51:44,  1.98s/it][A

Moving average norm loss at 430 iterations is: 2821.280908203125. Best norm loss value is: 2636.59716796875.

C_PATH mean = tensor([[4.5467e+01, 4.1753e-02, 2.8921e-01],
        [4.5469e+01, 4.1430e-02, 3.1157e-01]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [8.8179e+00, 9.0520e+00, 6.9992e-01],
         [3.0195e+01, 3.7921e-02, 6.9565e-01],
         ...,
         [4.3890e+01, 4.5832e-05, 6.7775e-05],
         [3.8193e+01, 2.4891e-05, 6.6246e-02],
         [2.5636e+01, 8.2724e-02, 1.4372e-04]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [1.0695e+01, 9.8202e+00, 1.3194e-05],
         [2.9671e+01, 6.2253e-01, 2.0734e-05],
         ...,
         [4.3834e+01, 8.1774e-02, 2.4928e-02],
         [3.8342e+01, 8.2861e-02, 4.0871e-05],
         [2.6047e+01, 6.6989e-06, 7.0028e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  22%|██▏       | 431/2000 [14:20<51:38,  1.97s/it][A
Train Diffusion:  22%|██▏       | 432/2000 [14:22<51:25,  1.97s/it][A
Train Diffusion:  22%|██▏       | 433/2000 [14:24<53:10,  2.04s/it][A
Train Diffusion:  22%|██▏       | 434/2000 [14:26<52:28,  2.01s/it][A
Train Diffusion:  22%|██▏       | 435/2000 [14:28<52:57,  2.03s/it][A
Train Diffusion:  22%|██▏       | 436/2000 [14:30<52:20,  2.01s/it][A
Train Diffusion:  22%|██▏       | 437/2000 [14:32<51:43,  1.99s/it][A
Train Diffusion:  22%|██▏       | 438/2000 [14:34<51:41,  1.99s/it][A
Train Diffusion:  22%|██▏       | 439/2000 [14:36<51:27,  1.98s/it][A
Train Diffusion:  22%|██▏       | 440/2000 [14:38<50:11,  1.93s/it][A

Moving average norm loss at 440 iterations is: 2637.4630615234373. Best norm loss value is: 2562.183837890625.

C_PATH mean = tensor([[4.5496e+01, 4.3644e-02, 2.9518e-01],
        [4.5498e+01, 4.0920e-02, 3.0954e-01]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [8.8049e+00, 9.8814e+00, 7.0836e-01],
         [2.9785e+01, 6.2491e-01, 1.9678e-05],
         ...,
         [4.3899e+01, 8.2931e-02, 2.5784e-02],
         [3.8271e+01, 2.4174e-05, 7.5848e-02],
         [2.6077e+01, 6.6575e-06, 7.0759e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [1.0861e+01, 9.1391e+00, 1.2936e-05],
         [3.0361e+01, 3.9217e-02, 7.0863e-01],
         ...,
         [4.3927e+01, 4.4942e-05, 6.6257e-05],
         [3.8380e+01, 8.3417e-02, 3.7513e-05],
         [2.5716e+01, 8.3826e-02, 1.4712e-04]]], grad_fn=<CatBackward>)



Train Diffusion:  22%|██▏       | 441/2000 [14:40<48:50,  1.88s/it][A
Train Diffusion:  22%|██▏       | 442/2000 [14:41<48:06,  1.85s/it][A
Train Diffusion:  22%|██▏       | 443/2000 [14:43<47:34,  1.83s/it][A
Train Diffusion:  22%|██▏       | 444/2000 [14:45<47:21,  1.83s/it][A
Train Diffusion:  22%|██▏       | 445/2000 [14:47<47:02,  1.82s/it][A
Train Diffusion:  22%|██▏       | 446/2000 [14:49<46:43,  1.80s/it][A
Train Diffusion:  22%|██▏       | 447/2000 [14:50<46:29,  1.80s/it][A
Train Diffusion:  22%|██▏       | 448/2000 [14:52<46:39,  1.80s/it][A
Train Diffusion:  22%|██▏       | 449/2000 [14:54<47:00,  1.82s/it][A
Train Diffusion:  22%|██▎       | 450/2000 [14:56<47:28,  1.84s/it][A

Moving average norm loss at 450 iterations is: 2809.0297119140623. Best norm loss value is: 2498.264404296875.

C_PATH mean = tensor([[4.5561e+01, 4.2186e-02, 2.8704e-01],
        [4.5564e+01, 4.4576e-02, 3.0496e-01]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [9.0885e+00, 9.2127e+00, 6.9170e-01],
         [3.0318e+01, 6.5949e-01, 1.9689e-05],
         ...,
         [4.3850e+01, 8.1758e-02, 5.6845e-05],
         [3.8228e+01, 2.4878e-05, 7.9746e-02],
         [2.5692e+01, 6.7225e-06, 1.5844e-04]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [1.1202e+01, 9.9034e+00, 1.2908e-05],
         [3.0928e+01, 4.5642e-02, 6.9431e-01],
         ...,
         [4.3915e+01, 4.4328e-05, 2.3216e-02],
         [3.8343e+01, 8.0527e-02, 3.3029e-05],
         [2.6013e+01, 8.1472e-02, 6.9729e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  23%|██▎       | 451/2000 [14:58<47:25,  1.84s/it][A
Train Diffusion:  23%|██▎       | 452/2000 [15:00<47:01,  1.82s/it][A
Train Diffusion:  23%|██▎       | 453/2000 [15:01<46:59,  1.82s/it][A
Train Diffusion:  23%|██▎       | 454/2000 [15:03<46:46,  1.82s/it][A
Train Diffusion:  23%|██▎       | 455/2000 [15:05<46:35,  1.81s/it][A
Train Diffusion:  23%|██▎       | 456/2000 [15:07<46:26,  1.80s/it][A
Train Diffusion:  23%|██▎       | 457/2000 [15:08<45:56,  1.79s/it][A
Train Diffusion:  23%|██▎       | 458/2000 [15:10<45:58,  1.79s/it][A
Train Diffusion:  23%|██▎       | 459/2000 [15:12<45:36,  1.78s/it][A
Train Diffusion:  23%|██▎       | 460/2000 [15:14<45:15,  1.76s/it][A

Moving average norm loss at 460 iterations is: 2916.6234375. Best norm loss value is: 2498.264404296875.

C_PATH mean = tensor([[4.5497e+01, 4.3754e-02, 3.2490e-01],
        [4.5499e+01, 4.3181e-02, 3.1970e-01]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [9.4818e+00, 8.7735e+00, 7.2745e-01],
         [3.1128e+01, 5.6519e-01, 7.3246e-01],
         ...,
         [4.3905e+01, 8.6071e-02, 7.8048e-05],
         [3.8210e+01, 2.4827e-05, 4.0033e-05],
         [2.5428e+01, 7.1497e-06, 1.7177e-04]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [1.1501e+01, 9.6102e+00, 1.2901e-05],
         [3.0843e+01, 2.8905e-02, 2.0894e-05],
         ...,
         [4.3930e+01, 4.6890e-05, 2.9010e-02],
         [3.8191e+01, 8.5698e-02, 1.0708e-01],
         [2.5748e+01, 8.5707e-02, 7.3682e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  23%|██▎       | 461/2000 [15:16<45:02,  1.76s/it][A
Train Diffusion:  23%|██▎       | 462/2000 [15:17<44:52,  1.75s/it][A
Train Diffusion:  23%|██▎       | 463/2000 [15:19<44:42,  1.75s/it][A
Train Diffusion:  23%|██▎       | 464/2000 [15:21<44:35,  1.74s/it][A
Train Diffusion:  23%|██▎       | 465/2000 [15:22<44:35,  1.74s/it][A
Train Diffusion:  23%|██▎       | 466/2000 [15:24<44:26,  1.74s/it][A
Train Diffusion:  23%|██▎       | 467/2000 [15:26<44:20,  1.74s/it][A
Train Diffusion:  23%|██▎       | 468/2000 [15:28<44:16,  1.73s/it][A
Train Diffusion:  23%|██▎       | 469/2000 [15:29<44:16,  1.73s/it][A
Train Diffusion:  24%|██▎       | 470/2000 [15:31<44:13,  1.73s/it][A

Moving average norm loss at 470 iterations is: 2817.7242919921873. Best norm loss value is: 2498.264404296875.

C_PATH mean = tensor([[4.5545e+01, 3.9878e-02, 2.9832e-01],
        [4.5543e+01, 4.0456e-02, 3.1229e-01]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [1.1918e+01, 9.3109e+00, 1.3539e-05],
         [3.1117e+01, 4.9954e-01, 2.0528e-05],
         ...,
         [4.4134e+01, 4.9656e-05, 8.5798e-05],
         [3.8382e+01, 2.7577e-05, 1.1690e-01],
         [2.5689e+01, 7.2791e-06, 1.8160e-04]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [9.7255e+00, 8.3493e+00, 6.8023e-01],
         [3.1764e+01, 2.0818e-02, 6.8117e-01],
         ...,
         [4.3846e+01, 7.8830e-02, 3.0808e-02],
         [3.7957e+01, 7.8534e-02, 4.8490e-05],
         [2.5082e+01, 8.0593e-02, 6.7746e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  24%|██▎       | 471/2000 [15:33<44:15,  1.74s/it][A
Train Diffusion:  24%|██▎       | 472/2000 [15:35<44:17,  1.74s/it][A
Train Diffusion:  24%|██▎       | 473/2000 [15:36<44:28,  1.75s/it][A
Train Diffusion:  24%|██▎       | 474/2000 [15:38<44:36,  1.75s/it][A
Train Diffusion:  24%|██▍       | 475/2000 [15:40<45:06,  1.77s/it][A
Train Diffusion:  24%|██▍       | 476/2000 [15:42<44:41,  1.76s/it][A
Train Diffusion:  24%|██▍       | 477/2000 [15:43<44:24,  1.75s/it][A
Train Diffusion:  24%|██▍       | 478/2000 [15:45<44:14,  1.74s/it][A
Train Diffusion:  24%|██▍       | 479/2000 [15:47<44:03,  1.74s/it][A
Train Diffusion:  24%|██▍       | 480/2000 [15:49<43:59,  1.74s/it][A

Moving average norm loss at 480 iterations is: 3018.4815185546877. Best norm loss value is: 2498.264404296875.

C_PATH mean = tensor([[4.5424e+01, 4.4359e-02, 3.2558e-01],
        [4.5426e+01, 4.3998e-02, 3.4523e-01]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [1.2128e+01, 8.5642e+00, 1.3898e-05],
         [3.1540e+01, 5.2590e-01, 2.1692e-05],
         ...,
         [4.3929e+01, 4.8141e-05, 9.4926e-05],
         [3.8248e+01, 8.7007e-02, 4.4728e-05],
         [2.5292e+01, 8.7852e-02, 1.9005e-04]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [9.9698e+00, 9.5089e+00, 7.3804e-01],
         [3.2002e+01, 2.3485e-02, 7.4191e-01],
         ...,
         [4.3967e+01, 8.8346e-02, 3.4010e-02],
         [3.8158e+01, 2.7351e-05, 1.5536e-01],
         [2.5724e+01, 7.5346e-06, 7.2897e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  24%|██▍       | 481/2000 [15:50<43:52,  1.73s/it][A
Train Diffusion:  24%|██▍       | 482/2000 [15:52<43:49,  1.73s/it][A
Train Diffusion:  24%|██▍       | 483/2000 [15:54<43:42,  1.73s/it][A
Train Diffusion:  24%|██▍       | 484/2000 [15:55<43:39,  1.73s/it][A
Train Diffusion:  24%|██▍       | 485/2000 [15:57<43:39,  1.73s/it][A
Train Diffusion:  24%|██▍       | 486/2000 [15:59<43:44,  1.73s/it][A
Train Diffusion:  24%|██▍       | 487/2000 [16:01<43:41,  1.73s/it][A
Train Diffusion:  24%|██▍       | 488/2000 [16:02<43:50,  1.74s/it][A
Train Diffusion:  24%|██▍       | 489/2000 [16:04<43:57,  1.75s/it][A
Train Diffusion:  24%|██▍       | 490/2000 [16:06<43:52,  1.74s/it][A

Moving average norm loss at 490 iterations is: 2893.6558837890625. Best norm loss value is: 2498.264404296875.

C_PATH mean = tensor([[4.5465e+01, 4.5214e-02, 3.2372e-01],
        [4.5466e+01, 4.4162e-02, 3.1559e-01]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [1.0680e+01, 9.9997e+00, 6.9872e-01],
         [3.3067e+01, 7.4572e-01, 7.0243e-01],
         ...,
         [4.3618e+01, 5.0590e-05, 2.6746e-02],
         [3.7969e+01, 8.2807e-02, 1.6667e-01],
         [2.5407e+01, 8.2236e-02, 7.1196e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [1.2768e+01, 9.1031e+00, 1.4687e-05],
         [3.2697e+01, 4.2710e-02, 2.1138e-05],
         ...,
         [4.3637e+01, 8.1925e-02, 5.3204e-05],
         [3.8011e+01, 2.6206e-05, 2.9878e-05],
         [2.5818e+01, 7.5382e-06, 2.2004e-04]]], grad_fn=<CatBackward>)



Train Diffusion:  25%|██▍       | 491/2000 [16:08<43:57,  1.75s/it][A
Train Diffusion:  25%|██▍       | 492/2000 [16:09<44:02,  1.75s/it][A
Train Diffusion:  25%|██▍       | 493/2000 [16:11<44:18,  1.76s/it][A
Train Diffusion:  25%|██▍       | 494/2000 [16:13<44:32,  1.77s/it][A
Train Diffusion:  25%|██▍       | 495/2000 [16:15<47:54,  1.91s/it][A
Train Diffusion:  25%|██▍       | 496/2000 [16:17<46:37,  1.86s/it][A
Train Diffusion:  25%|██▍       | 497/2000 [16:19<48:49,  1.95s/it][A
Train Diffusion:  25%|██▍       | 498/2000 [16:21<48:04,  1.92s/it][A
Train Diffusion:  25%|██▍       | 499/2000 [16:23<47:17,  1.89s/it][A
Train Diffusion:  25%|██▌       | 500/2000 [16:25<46:22,  1.86s/it][A

Moving average norm loss at 500 iterations is: 2687.759326171875. Best norm loss value is: 2498.264404296875.

C_PATH mean = tensor([[4.5448e+01, 4.6160e-02, 3.4726e-01],
        [4.5446e+01, 4.4070e-02, 3.4008e-01]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [1.3258e+01, 9.6863e+00, 1.3776e-05],
         [3.3739e+01, 6.3732e-01, 2.1323e-05],
         ...,
         [4.3595e+01, 5.2150e-05, 6.3160e-05],
         [3.7791e+01, 2.9175e-05, 2.2842e-01],
         [2.5058e+01, 8.7515e-02, 2.4283e-04]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [1.1168e+01, 8.6372e+00, 7.4695e-01],
         [3.3346e+01, 3.0969e-02, 7.3797e-01],
         ...,
         [4.3628e+01, 8.7937e-02, 3.1471e-02],
         [3.7948e+01, 8.7105e-02, 3.5390e-05],
         [2.5598e+01, 7.5130e-06, 7.5577e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  25%|██▌       | 501/2000 [16:26<45:41,  1.83s/it][A
Train Diffusion:  25%|██▌       | 502/2000 [16:28<45:13,  1.81s/it][A
Train Diffusion:  25%|██▌       | 503/2000 [16:30<44:42,  1.79s/it][A
Train Diffusion:  25%|██▌       | 504/2000 [16:32<44:24,  1.78s/it][A
Train Diffusion:  25%|██▌       | 505/2000 [16:33<44:06,  1.77s/it][A
Train Diffusion:  25%|██▌       | 506/2000 [16:35<44:05,  1.77s/it][A
Train Diffusion:  25%|██▌       | 507/2000 [16:37<43:55,  1.77s/it][A
Train Diffusion:  25%|██▌       | 508/2000 [16:39<43:48,  1.76s/it][A
Train Diffusion:  25%|██▌       | 509/2000 [16:40<43:40,  1.76s/it][A
Train Diffusion:  26%|██▌       | 510/2000 [16:42<43:38,  1.76s/it][A

Moving average ELBO loss at 510 iterations is: 76109.5853515625. Best ELBO loss value is: 28723.03125.

C_PATH mean = tensor([[4.5088e+01, 3.8171e-02, 2.8632e-01],
        [4.5075e+01, 3.7906e-02, 3.0447e-01]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [1.4599e+01, 1.0025e+00, 6.4983e-01],
         [3.6836e+01, 2.7726e-04, 3.7560e-05],
         ...,
         [4.3733e+01, 9.0808e-02, 4.4180e-04],
         [3.3112e+01, 4.8304e-05, 2.4313e-01],
         [2.1303e+01, 1.3779e-05, 1.9187e-04]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [1.4599e+01, 3.5177e+00, 2.2963e-05],
         [3.6839e+01, 5.6603e-02, 6.4052e-01],
         ...,
         [4.2308e+01, 8.1883e-05, 5.6975e-02],
         [3.6575e+01, 8.7962e-02, 1.6475e-04],
         [1.7498e+01, 9.4075e-02, 5.8024e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  26%|██▌       | 511/2000 [16:44<43:57,  1.77s/it][A
Train Diffusion:  26%|██▌       | 512/2000 [16:46<43:56,  1.77s/it][A
Train Diffusion:  26%|██▌       | 513/2000 [16:48<43:45,  1.77s/it][A
Train Diffusion:  26%|██▌       | 514/2000 [16:49<43:38,  1.76s/it][A
Train Diffusion:  26%|██▌       | 515/2000 [16:51<43:33,  1.76s/it][A
Train Diffusion:  26%|██▌       | 516/2000 [16:53<43:27,  1.76s/it][A
Train Diffusion:  26%|██▌       | 517/2000 [16:55<43:26,  1.76s/it][A
Train Diffusion:  26%|██▌       | 518/2000 [16:56<43:27,  1.76s/it][A
Train Diffusion:  26%|██▌       | 519/2000 [16:58<43:22,  1.76s/it][A
Train Diffusion:  26%|██▌       | 520/2000 [17:00<43:18,  1.76s/it][A

Moving average ELBO loss at 520 iterations is: 28559.299609375. Best ELBO loss value is: 16509.736328125.

C_PATH mean = tensor([[43.9160,  0.0481,  0.2889],
        [43.9174,  0.0479,  0.2892]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [1.7767e+01, 2.3188e-01, 6.9148e-01],
         [4.6034e+01, 2.2925e-02, 6.0202e-05],
         ...,
         [4.1272e+01, 9.2308e-02, 6.3375e-02],
         [3.1090e+01, 1.0484e-01, 1.7542e-01],
         [1.4538e+01, 2.5775e-05, 5.7402e-05]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [2.5946e+01, 7.4700e-01, 4.4905e-05],
         [4.4749e+01, 1.0105e-04, 6.9191e-01],
         ...,
         [4.1348e+01, 1.2603e-04, 6.8204e-04],
         [3.1337e+01, 7.3326e-05, 1.0276e-04],
         [1.3573e+01, 1.1529e-01, 3.4629e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  26%|██▌       | 521/2000 [17:02<43:18,  1.76s/it][A
Train Diffusion:  26%|██▌       | 522/2000 [17:03<43:16,  1.76s/it][A
Train Diffusion:  26%|██▌       | 523/2000 [17:05<43:20,  1.76s/it][A
Train Diffusion:  26%|██▌       | 524/2000 [17:07<43:22,  1.76s/it][A
Train Diffusion:  26%|██▋       | 525/2000 [17:09<43:15,  1.76s/it][A
Train Diffusion:  26%|██▋       | 526/2000 [17:10<43:13,  1.76s/it][A
Train Diffusion:  26%|██▋       | 527/2000 [17:12<43:10,  1.76s/it][A
Train Diffusion:  26%|██▋       | 528/2000 [17:14<43:07,  1.76s/it][A
Train Diffusion:  26%|██▋       | 529/2000 [17:16<43:27,  1.77s/it][A
Train Diffusion:  26%|██▋       | 530/2000 [17:18<43:41,  1.78s/it][A

Moving average ELBO loss at 530 iterations is: 32320.3560546875. Best ELBO loss value is: 14904.763671875.

C_PATH mean = tensor([[4.3581e+01, 3.5513e-02, 1.6720e-01],
        [4.3658e+01, 3.5092e-02, 1.5560e-01]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [2.7213e+01, 1.4176e-03, 8.3750e-05],
         [4.9888e+01, 3.7762e-03, 1.0345e-04],
         ...,
         [4.1656e+01, 5.2708e-02, 6.9094e-04],
         [3.1511e+01, 1.6224e-04, 9.9513e-05],
         [8.6504e+00, 9.4096e-02, 3.4233e-05]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [2.7213e+01, 3.6459e-02, 4.2501e-01],
         [5.2646e+01, 1.1126e-05, 4.6704e-01],
         ...,
         [3.9522e+01, 2.3064e-04, 4.0002e-02],
         [2.6455e+01, 6.0083e-02, 1.0030e-01],
         [1.2274e+01, 4.8296e-05, 2.1424e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  27%|██▋       | 531/2000 [17:19<43:31,  1.78s/it][A
Train Diffusion:  27%|██▋       | 532/2000 [17:21<43:17,  1.77s/it][A
Train Diffusion:  27%|██▋       | 533/2000 [17:23<43:49,  1.79s/it][A
Train Diffusion:  27%|██▋       | 534/2000 [17:25<43:35,  1.78s/it][A
Train Diffusion:  27%|██▋       | 535/2000 [17:26<43:35,  1.79s/it][A
Train Diffusion:  27%|██▋       | 536/2000 [17:28<43:18,  1.77s/it][A
Train Diffusion:  27%|██▋       | 537/2000 [17:30<43:05,  1.77s/it][A
Train Diffusion:  27%|██▋       | 538/2000 [17:32<42:58,  1.76s/it][A
Train Diffusion:  27%|██▋       | 539/2000 [17:33<42:53,  1.76s/it][A
Train Diffusion:  27%|██▋       | 540/2000 [17:35<42:47,  1.76s/it][A

Moving average ELBO loss at 540 iterations is: 14159.81669921875. Best ELBO loss value is: 10456.64453125.

C_PATH mean = tensor([[4.4058e+01, 1.7931e-02, 1.0848e-01],
        [4.4041e+01, 1.6676e-02, 1.0230e-01]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [2.4057e+01, 5.8989e-01, 2.2392e-01],
         [4.6400e+01, 1.1487e-04, 8.4594e-05],
         ...,
         [4.1203e+01, 3.5502e-02, 1.0109e-03],
         [3.0696e+01, 4.1123e-02, 1.4367e-04],
         [1.3057e+01, 4.2632e-05, 1.6578e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [2.0212e+01, 1.9692e-01, 6.0483e-05],
         [4.4623e+01, 1.6979e-02, 2.0222e-01],
         ...,
         [4.0442e+01, 1.4051e-04, 2.8743e-02],
         [2.9741e+01, 9.4529e-05, 9.5383e-02],
         [1.3703e+01, 5.1243e-02, 9.0442e-05]]], grad_fn=<CatBackward>)



Train Diffusion:  27%|██▋       | 541/2000 [17:37<42:50,  1.76s/it][A
Train Diffusion:  27%|██▋       | 542/2000 [17:39<42:45,  1.76s/it][A
Train Diffusion:  27%|██▋       | 543/2000 [17:40<42:45,  1.76s/it][A
Train Diffusion:  27%|██▋       | 544/2000 [17:42<42:50,  1.77s/it][A
Train Diffusion:  27%|██▋       | 545/2000 [17:44<42:55,  1.77s/it][A
Train Diffusion:  27%|██▋       | 546/2000 [17:46<42:44,  1.76s/it][A
Train Diffusion:  27%|██▋       | 547/2000 [17:48<42:47,  1.77s/it][A
Train Diffusion:  27%|██▋       | 548/2000 [17:49<43:03,  1.78s/it][A
Train Diffusion:  27%|██▋       | 549/2000 [17:51<42:48,  1.77s/it][A
Train Diffusion:  28%|██▊       | 550/2000 [17:53<42:37,  1.76s/it][A

Moving average ELBO loss at 550 iterations is: 9539.02041015625. Best ELBO loss value is: 6626.1025390625.

C_PATH mean = tensor([[4.3207e+01, 3.0031e-02, 1.1472e-01],
        [4.3205e+01, 2.9350e-02, 1.1037e-01]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [2.4849e+01, 1.5327e-01, 3.8660e-01],
         [4.6569e+01, 1.7557e-02, 2.9850e-01],
         ...,
         [3.9052e+01, 6.8332e-02, 1.9054e-03],
         [2.7583e+01, 3.4169e-04, 3.2492e-04],
         [1.1026e+01, 8.9451e-02, 2.0360e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [3.4028e+01, 5.9466e-01, 1.1755e-04],
         [5.5985e+01, 4.7996e-04, 1.6554e-04],
         ...,
         [3.9239e+01, 2.8664e-04, 4.9701e-02],
         [2.8478e+01, 5.0676e-02, 1.0698e-01],
         [1.2480e+01, 6.4889e-05, 1.9910e-04]]], grad_fn=<CatBackward>)



Train Diffusion:  28%|██▊       | 551/2000 [17:55<42:28,  1.76s/it][A
Train Diffusion:  28%|██▊       | 552/2000 [17:56<42:27,  1.76s/it][A
Train Diffusion:  28%|██▊       | 553/2000 [17:58<42:21,  1.76s/it][A
Train Diffusion:  28%|██▊       | 554/2000 [18:00<42:15,  1.75s/it][A
Train Diffusion:  28%|██▊       | 555/2000 [18:02<42:17,  1.76s/it][A
Train Diffusion:  28%|██▊       | 556/2000 [18:03<42:15,  1.76s/it][A
Train Diffusion:  28%|██▊       | 557/2000 [18:05<42:16,  1.76s/it][A
Train Diffusion:  28%|██▊       | 558/2000 [18:07<42:13,  1.76s/it][A
Train Diffusion:  28%|██▊       | 559/2000 [18:09<42:16,  1.76s/it][A
Train Diffusion:  28%|██▊       | 560/2000 [18:10<41:59,  1.75s/it][A

Moving average ELBO loss at 560 iterations is: 6228.9068359375. Best ELBO loss value is: 4747.01611328125.

C_PATH mean = tensor([[42.2075,  0.0487,  0.1452],
        [42.1233,  0.0511,  0.1512]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.1276e+01, 1.5623e-01, 5.8293e-01],
         [6.6103e+01, 1.0594e-04, 5.1854e-01],
         ...,
         [3.8120e+01, 1.0617e-01, 2.3366e-03],
         [2.6639e+01, 8.1848e-04, 8.4650e-02],
         [9.8396e+00, 1.2611e-01, 1.4813e-04]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.0597e+01, 6.5567e-02, 1.9397e-04],
         [5.5333e+01, 1.4848e-02, 1.8846e-04],
         ...,
         [3.7449e+01, 6.4341e-04, 4.6576e-02],
         [2.7228e+01, 6.8407e-02, 4.1629e-04],
         [1.1690e+01, 1.8473e-04, 1.8428e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  28%|██▊       | 561/2000 [18:12<41:44,  1.74s/it][A
Train Diffusion:  28%|██▊       | 562/2000 [18:14<41:45,  1.74s/it][A
Train Diffusion:  28%|██▊       | 563/2000 [18:16<42:02,  1.76s/it][A
Train Diffusion:  28%|██▊       | 564/2000 [18:17<41:59,  1.75s/it][A
Train Diffusion:  28%|██▊       | 565/2000 [18:19<41:53,  1.75s/it][A
Train Diffusion:  28%|██▊       | 566/2000 [18:21<41:41,  1.74s/it][A
Train Diffusion:  28%|██▊       | 567/2000 [18:23<41:57,  1.76s/it][A
Train Diffusion:  28%|██▊       | 568/2000 [18:24<41:54,  1.76s/it][A
Train Diffusion:  28%|██▊       | 569/2000 [18:26<41:58,  1.76s/it][A
Train Diffusion:  28%|██▊       | 570/2000 [18:28<41:46,  1.75s/it][A

Moving average ELBO loss at 570 iterations is: 4480.341162109375. Best ELBO loss value is: 2420.190185546875.

C_PATH mean = tensor([[4.2421e+01, 1.8299e-02, 8.6529e-02],
        [4.2454e+01, 1.8142e-02, 8.8771e-02]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [3.7028e+01, 1.0902e-01, 1.7897e-01],
         [5.3022e+01, 1.3292e-02, 5.8781e-04],
         ...,
         [3.8055e+01, 3.5275e-02, 3.5347e-03],
         [2.7286e+01, 5.2215e-02, 6.3266e-02],
         [1.1396e+01, 3.6698e-04, 1.5101e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [2.9176e+01, 6.2155e-01, 4.3249e-04],
         [4.7216e+01, 3.0743e-04, 1.3314e-01],
         ...,
         [3.8570e+01, 1.3062e-03, 8.9729e-02],
         [2.8243e+01, 5.6943e-04, 1.8213e-03],
         [1.2900e+01, 5.8838e-02, 1.8654e-04]]], grad_fn=<CatBackward>)



Train Diffusion:  29%|██▊       | 571/2000 [18:30<41:37,  1.75s/it][A
Train Diffusion:  29%|██▊       | 572/2000 [18:31<41:24,  1.74s/it][A
Train Diffusion:  29%|██▊       | 573/2000 [18:33<41:39,  1.75s/it][A
Train Diffusion:  29%|██▊       | 574/2000 [18:35<41:53,  1.76s/it][A
Train Diffusion:  29%|██▉       | 575/2000 [18:37<41:57,  1.77s/it][A
Train Diffusion:  29%|██▉       | 576/2000 [18:38<41:39,  1.76s/it][A
Train Diffusion:  29%|██▉       | 577/2000 [18:40<41:27,  1.75s/it][A
Train Diffusion:  29%|██▉       | 578/2000 [18:42<41:19,  1.74s/it][A
Train Diffusion:  29%|██▉       | 579/2000 [18:44<41:18,  1.74s/it][A
Train Diffusion:  29%|██▉       | 580/2000 [18:45<41:13,  1.74s/it][A

Moving average ELBO loss at 580 iterations is: 2203.7712890625. Best ELBO loss value is: -134.520263671875.

C_PATH mean = tensor([[4.1322e+01, 3.2331e-02, 1.0104e-01],
        [4.1295e+01, 3.7101e-02, 1.0380e-01]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.7808e+01, 1.7830e-01, 2.6851e-01],
         [5.4338e+01, 1.6761e-02, 1.5067e-03],
         ...,
         [3.7306e+01, 8.7033e-02, 7.4199e-02],
         [2.5475e+01, 1.1156e-01, 8.1961e-02],
         [9.4424e+00, 1.5775e-03, 5.7922e-04]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [3.0529e+01, 1.1185e-01, 8.4324e-04],
         [4.8673e+01, 2.6556e-04, 1.4874e-01],
         ...,
         [3.7615e+01, 1.1308e-03, 1.2587e-02],
         [2.7884e+01, 8.8980e-04, 2.0700e-03],
         [1.2772e+01, 7.2375e-02, 1.0392e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  29%|██▉       | 581/2000 [18:47<41:17,  1.75s/it][A
Train Diffusion:  29%|██▉       | 582/2000 [18:49<41:29,  1.76s/it][A
Train Diffusion:  29%|██▉       | 583/2000 [18:51<41:32,  1.76s/it][A
Train Diffusion:  29%|██▉       | 584/2000 [18:52<41:21,  1.75s/it][A
Train Diffusion:  29%|██▉       | 585/2000 [18:54<41:10,  1.75s/it][A
Train Diffusion:  29%|██▉       | 586/2000 [18:56<41:06,  1.74s/it][A
Train Diffusion:  29%|██▉       | 587/2000 [18:58<41:02,  1.74s/it][A
Train Diffusion:  29%|██▉       | 588/2000 [18:59<40:58,  1.74s/it][A
Train Diffusion:  29%|██▉       | 589/2000 [19:01<40:53,  1.74s/it][A
Train Diffusion:  30%|██▉       | 590/2000 [19:03<40:55,  1.74s/it][A

Moving average ELBO loss at 590 iterations is: 6956.6971435546875. Best ELBO loss value is: -989.40966796875.

C_PATH mean = tensor([[4.0921e+01, 3.4928e-02, 1.3359e-01],
        [4.0932e+01, 3.2780e-02, 1.2785e-01]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.6406e+01, 1.5023e-01, 1.0987e-03],
         [4.5046e+01, 2.0323e-02, 1.8879e-01],
         ...,
         [3.6857e+01, 1.6827e-03, 1.2644e-01],
         [2.6422e+01, 6.5409e-02, 6.9330e-03],
         [1.0447e+01, 1.1321e-03, 1.1228e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [3.0896e+01, 6.1244e-01, 3.5517e-01],
         [5.1343e+01, 1.2689e-03, 4.6449e-03],
         ...,
         [3.6436e+01, 1.1175e-01, 1.8375e-02],
         [2.5975e+01, 4.3859e-03, 7.6958e-02],
         [1.1399e+01, 1.0127e-01, 7.3587e-04]]], grad_fn=<CatBackward>)



Train Diffusion:  30%|██▉       | 591/2000 [19:05<40:55,  1.74s/it][A
Train Diffusion:  30%|██▉       | 592/2000 [19:06<40:51,  1.74s/it][A
Train Diffusion:  30%|██▉       | 593/2000 [19:08<40:44,  1.74s/it][A
Train Diffusion:  30%|██▉       | 594/2000 [19:10<40:37,  1.73s/it][A
Train Diffusion:  30%|██▉       | 595/2000 [19:12<40:33,  1.73s/it][A
Train Diffusion:  30%|██▉       | 596/2000 [19:13<40:37,  1.74s/it][A
Train Diffusion:  30%|██▉       | 597/2000 [19:15<40:49,  1.75s/it][A
Train Diffusion:  30%|██▉       | 598/2000 [19:17<40:51,  1.75s/it][A
Train Diffusion:  30%|██▉       | 599/2000 [19:19<41:01,  1.76s/it][A
Train Diffusion:  30%|███       | 600/2000 [19:20<41:15,  1.77s/it][A

Moving average ELBO loss at 600 iterations is: -1530.390478515625. Best ELBO loss value is: -2803.2890625.

C_PATH mean = tensor([[40.1447,  0.0451,  0.1526],
        [40.1537,  0.0458,  0.1519]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [3.3199e+01, 3.3605e-01, 3.3397e-03],
         [4.6206e+01, 6.8090e-04, 2.2498e-01],
         ...,
         [3.6547e+01, 6.1193e-03, 1.2719e-01],
         [2.5671e+01, 4.0691e-03, 4.3397e-03],
         [9.8900e+00, 1.2574e-01, 4.9602e-04]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [3.8360e+01, 1.3985e-01, 3.3725e-01],
         [4.4644e+01, 2.3237e-02, 3.8274e-03],
         ...,
         [3.5596e+01, 9.0681e-02, 2.4757e-02],
         [2.4867e+01, 1.0861e-01, 8.3323e-02],
         [1.0983e+01, 3.5741e-03, 1.5185e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  30%|███       | 601/2000 [19:22<42:08,  1.81s/it][A
Train Diffusion:  30%|███       | 602/2000 [19:24<41:59,  1.80s/it][A
Train Diffusion:  30%|███       | 603/2000 [19:26<41:40,  1.79s/it][A
Train Diffusion:  30%|███       | 604/2000 [19:28<41:29,  1.78s/it][A
Train Diffusion:  30%|███       | 605/2000 [19:29<41:13,  1.77s/it][A
Train Diffusion:  30%|███       | 606/2000 [19:31<41:07,  1.77s/it][A
Train Diffusion:  30%|███       | 607/2000 [19:33<41:04,  1.77s/it][A
Train Diffusion:  30%|███       | 608/2000 [19:35<40:55,  1.76s/it][A
Train Diffusion:  30%|███       | 609/2000 [19:36<40:48,  1.76s/it][A
Train Diffusion:  30%|███       | 610/2000 [19:38<40:36,  1.75s/it][A

Moving average ELBO loss at 610 iterations is: -1887.2951416015626. Best ELBO loss value is: -2803.2890625.

C_PATH mean = tensor([[3.9733e+01, 3.3775e-02, 1.2783e-01],
        [3.9718e+01, 3.1524e-02, 1.3291e-01]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.6496e+01, 3.2372e-01, 4.6031e-03],
         [4.8938e+01, 1.3429e-03, 5.2635e-03],
         ...,
         [3.5957e+01, 1.0883e-02, 1.3098e-01],
         [2.4148e+01, 6.1592e-03, 7.8229e-03],
         [1.0662e+01, 8.9149e-02, 5.5134e-02]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [3.9014e+01, 1.4161e-01, 2.3215e-01],
         [4.8068e+01, 1.7538e-02, 2.3162e-01],
         ...,
         [3.4403e+01, 6.3384e-02, 4.8208e-02],
         [2.4413e+01, 8.3227e-02, 6.9022e-02],
         [9.3910e+00, 5.6118e-03, 8.1585e-04]]], grad_fn=<CatBackward>)



Train Diffusion:  31%|███       | 611/2000 [19:40<40:24,  1.75s/it][A
Train Diffusion:  31%|███       | 612/2000 [19:42<40:21,  1.74s/it][A
Train Diffusion:  31%|███       | 613/2000 [19:43<40:12,  1.74s/it][A
Train Diffusion:  31%|███       | 614/2000 [19:45<40:16,  1.74s/it][A
Train Diffusion:  31%|███       | 615/2000 [19:47<40:11,  1.74s/it][A
Train Diffusion:  31%|███       | 616/2000 [19:49<40:09,  1.74s/it][A
Train Diffusion:  31%|███       | 617/2000 [19:50<40:03,  1.74s/it][A
Train Diffusion:  31%|███       | 618/2000 [19:52<40:11,  1.74s/it][A
Train Diffusion:  31%|███       | 619/2000 [19:54<40:19,  1.75s/it][A
Train Diffusion:  31%|███       | 620/2000 [19:56<40:26,  1.76s/it][A

Moving average ELBO loss at 620 iterations is: -1694.8380126953125. Best ELBO loss value is: -2975.58447265625.

C_PATH mean = tensor([[3.9084e+01, 3.2636e-02, 1.6084e-01],
        [3.9033e+01, 3.2452e-02, 1.5703e-01]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [3.0565e+01, 4.0327e-01, 1.0024e-02],
         [4.4118e+01, 2.8847e-02, 1.0616e-02],
         ...,
         [3.4397e+01, 1.0819e-02, 1.5818e-01],
         [2.3853e+01, 7.3964e-03, 1.5997e-02],
         [9.7501e+00, 1.0229e-01, 6.2702e-02]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [3.7196e+01, 3.7259e-01, 2.1007e-01],
         [4.2573e+01, 1.5553e-03, 2.3313e-01],
         ...,
         [3.5606e+01, 1.0225e-01, 1.8022e-01],
         [2.5405e+01, 1.1052e-01, 1.2101e-01],
         [1.1739e+01, 9.2661e-03, 2.6065e-03]]], grad_fn=<CatBackward>)



Train Diffusion:  31%|███       | 621/2000 [19:57<40:21,  1.76s/it][A
Train Diffusion:  31%|███       | 622/2000 [19:59<40:16,  1.75s/it][A
Train Diffusion:  31%|███       | 623/2000 [20:01<40:11,  1.75s/it][A
Train Diffusion:  31%|███       | 624/2000 [20:03<40:09,  1.75s/it][A
Train Diffusion:  31%|███▏      | 625/2000 [20:04<40:15,  1.76s/it][A
Train Diffusion:  31%|███▏      | 626/2000 [20:06<40:17,  1.76s/it][A
Train Diffusion:  31%|███▏      | 627/2000 [20:08<40:20,  1.76s/it][A
Train Diffusion:  31%|███▏      | 628/2000 [20:10<40:15,  1.76s/it][A
Train Diffusion:  31%|███▏      | 629/2000 [20:11<40:06,  1.76s/it][A
Train Diffusion:  32%|███▏      | 630/2000 [20:13<40:06,  1.76s/it][A

Moving average ELBO loss at 630 iterations is: -3732.6908203125. Best ELBO loss value is: -4709.71728515625.

C_PATH mean = tensor([[38.6778,  0.0412,  0.2014],
        [38.7789,  0.0400,  0.1993]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.1397e+01, 2.8101e-01, 1.4620e-02],
         [4.3828e+01, 1.7391e-03, 9.0524e-03],
         ...,
         [3.4054e+01, 9.1051e-02, 3.6185e-01],
         [2.3222e+01, 1.1650e-01, 1.8500e-01],
         [1.1051e+01, 2.3413e-02, 5.5807e-03]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [3.3352e+01, 1.6493e-01, 2.7847e-01],
         [4.2114e+01, 1.7176e-02, 2.8178e-01],
         ...,
         [3.4688e+01, 2.3212e-02, 7.5212e-02],
         [2.5186e+01, 1.8546e-02, 1.0713e-02],
         [1.0349e+01, 1.0782e-01, 6.3797e-02]]], grad_fn=<CatBackward>)



Train Diffusion:  32%|███▏      | 631/2000 [20:15<39:59,  1.75s/it][A
Train Diffusion:  32%|███▏      | 632/2000 [20:17<40:12,  1.76s/it][A
Train Diffusion:  32%|███▏      | 633/2000 [20:18<40:09,  1.76s/it][A
Train Diffusion:  32%|███▏      | 634/2000 [20:20<40:11,  1.77s/it][A
Train Diffusion:  32%|███▏      | 635/2000 [20:22<40:02,  1.76s/it][A
Train Diffusion:  32%|███▏      | 636/2000 [20:24<40:07,  1.76s/it][A
Train Diffusion:  32%|███▏      | 637/2000 [20:26<40:26,  1.78s/it][A
Train Diffusion:  32%|███▏      | 638/2000 [20:27<40:25,  1.78s/it][A
Train Diffusion:  32%|███▏      | 639/2000 [20:29<40:13,  1.77s/it][A
Train Diffusion:  32%|███▏      | 640/2000 [20:31<40:01,  1.77s/it][A

Moving average ELBO loss at 640 iterations is: -5101.196484375. Best ELBO loss value is: -5868.10009765625.

C_PATH mean = tensor([[38.2893,  0.0519,  0.2004],
        [38.3169,  0.0509,  0.2053]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.3529e+01, 1.5722e-01, 1.9546e-01],
         [4.4234e+01, 2.3341e-02, 5.2839e-02],
         ...,
         [3.2911e+01, 2.6327e-02, 2.3037e-01],
         [2.2788e+01, 1.6426e-02, 1.5105e-01],
         [1.1687e+01, 1.3588e-02, 9.7793e-02]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [3.7272e+01, 4.3037e-01, 5.0348e-02],
         [4.6540e+01, 1.2775e-03, 1.8616e-01],
         ...,
         [3.3889e+01, 1.2096e-01, 1.7686e-01],
         [2.4394e+01, 1.6073e-01, 3.9002e-02],
         [9.7633e+00, 1.8117e-01, 5.0515e-03]]], grad_fn=<CatBackward>)



Train Diffusion:  32%|███▏      | 641/2000 [20:33<40:01,  1.77s/it][A
Train Diffusion:  32%|███▏      | 642/2000 [20:34<40:01,  1.77s/it][A
Train Diffusion:  32%|███▏      | 643/2000 [20:36<39:52,  1.76s/it][A
Train Diffusion:  32%|███▏      | 644/2000 [20:38<39:55,  1.77s/it][A
Train Diffusion:  32%|███▏      | 645/2000 [20:40<39:40,  1.76s/it][A
Train Diffusion:  32%|███▏      | 646/2000 [20:41<39:49,  1.76s/it][A
Train Diffusion:  32%|███▏      | 647/2000 [20:43<39:41,  1.76s/it][A
Train Diffusion:  32%|███▏      | 648/2000 [20:45<42:32,  1.89s/it][A
Train Diffusion:  32%|███▏      | 649/2000 [20:47<43:39,  1.94s/it][A
Train Diffusion:  32%|███▎      | 650/2000 [20:50<47:48,  2.13s/it][A

Moving average ELBO loss at 650 iterations is: -5526.2638671875. Best ELBO loss value is: -6593.7421875.

C_PATH mean = tensor([[37.7331,  0.0598,  0.3035],
        [37.7715,  0.0603,  0.3103]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [3.6224e+01, 5.9892e-02, 2.0253e-01],
         [4.0707e+01, 1.3433e-02, 2.8125e-01],
         ...,
         [3.1917e+01, 5.3039e-02, 6.7172e-01],
         [2.1620e+01, 4.2871e-02, 1.1975e-01],
         [8.5756e+00, 3.4980e-02, 8.0497e-02]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [3.9341e+01, 4.7257e-01, 8.8680e-02],
         [4.2005e+01, 2.7618e-03, 5.4403e-02],
         ...,
         [3.3736e+01, 1.5063e-01, 1.5511e-01],
         [2.4345e+01, 1.7242e-01, 1.5164e-01],
         [1.1533e+01, 2.1134e-01, 1.2276e-02]]], grad_fn=<CatBackward>)



Train Diffusion:  33%|███▎      | 651/2000 [20:52<48:25,  2.15s/it][A
Train Diffusion:  33%|███▎      | 652/2000 [20:54<49:30,  2.20s/it][A
Train Diffusion:  33%|███▎      | 653/2000 [20:57<49:19,  2.20s/it][A
Train Diffusion:  33%|███▎      | 654/2000 [20:59<48:38,  2.17s/it][A
Train Diffusion:  33%|███▎      | 655/2000 [21:01<47:33,  2.12s/it][A
Train Diffusion:  33%|███▎      | 656/2000 [21:03<47:47,  2.13s/it][A
Train Diffusion:  33%|███▎      | 657/2000 [21:05<47:58,  2.14s/it][A
Train Diffusion:  33%|███▎      | 658/2000 [21:07<47:33,  2.13s/it][A
Train Diffusion:  33%|███▎      | 659/2000 [21:09<47:33,  2.13s/it][A
Train Diffusion:  33%|███▎      | 660/2000 [21:12<47:51,  2.14s/it][A

Moving average ELBO loss at 660 iterations is: -7811.4404296875. Best ELBO loss value is: -8463.6708984375.

C_PATH mean = tensor([[37.1908,  0.0897,  0.3913],
        [37.1959,  0.0940,  0.4036]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5375e+01, 1.7466e-01, 2.8054e-01],
         [4.9227e+01, 2.3738e-02, 3.5945e-01],
         ...,
         [3.1794e+01, 1.6760e-01, 6.0555e-01],
         [2.1745e+01, 1.8431e-01, 3.2631e-01],
         [1.0695e+01, 2.0012e-01, 1.3589e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.9293e+01, 3.9535e-01, 5.6627e-02],
         [4.0849e+01, 2.9500e-03, 6.2502e-02],
         ...,
         [3.2241e+01, 8.3543e-02, 2.7908e-01],
         [2.3998e+01, 7.4826e-02, 8.3393e-02],
         [1.0244e+01, 6.0136e-02, 1.6550e-02]]], grad_fn=<CatBackward>)



Train Diffusion:  33%|███▎      | 661/2000 [21:14<48:16,  2.16s/it][A
Train Diffusion:  33%|███▎      | 662/2000 [21:16<47:11,  2.12s/it][A
Train Diffusion:  33%|███▎      | 663/2000 [21:18<46:29,  2.09s/it][A
Train Diffusion:  33%|███▎      | 664/2000 [21:20<48:02,  2.16s/it][A
Train Diffusion:  33%|███▎      | 665/2000 [21:22<48:06,  2.16s/it][A
Train Diffusion:  33%|███▎      | 666/2000 [21:25<49:06,  2.21s/it][A
Train Diffusion:  33%|███▎      | 667/2000 [21:27<49:16,  2.22s/it][A
Train Diffusion:  33%|███▎      | 668/2000 [21:29<48:20,  2.18s/it][A
Train Diffusion:  33%|███▎      | 669/2000 [21:31<48:06,  2.17s/it][A
Train Diffusion:  34%|███▎      | 670/2000 [21:33<48:09,  2.17s/it][A

Moving average ELBO loss at 670 iterations is: -9005.45966796875. Best ELBO loss value is: -9955.33203125.

C_PATH mean = tensor([[37.2892,  0.1008,  0.4508],
        [37.2872,  0.1015,  0.4418]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [3.9048e+01, 1.6102e-01, 1.1527e-01],
         [4.4455e+01, 2.1157e-02, 2.4826e-01],
         ...,
         [3.1157e+01, 1.3097e-01, 5.3486e-01],
         [2.2783e+01, 9.7988e-02, 3.9065e-01],
         [9.5581e+00, 1.5640e-01, 2.1969e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5210e+01, 3.5804e-01, 2.4947e-01],
         [4.1164e+01, 4.9094e-03, 2.1419e-01],
         ...,
         [3.1999e+01, 1.3358e-01, 4.7009e-01],
         [2.2223e+01, 1.7731e-01, 1.7224e-01],
         [1.0855e+01, 1.1315e-01, 3.7302e-02]]], grad_fn=<CatBackward>)



Train Diffusion:  34%|███▎      | 671/2000 [21:35<47:05,  2.13s/it][A
Train Diffusion:  34%|███▎      | 672/2000 [21:37<46:07,  2.08s/it][A
Train Diffusion:  34%|███▎      | 673/2000 [21:40<47:56,  2.17s/it][A
Train Diffusion:  34%|███▎      | 674/2000 [21:42<47:18,  2.14s/it][A
Train Diffusion:  34%|███▍      | 675/2000 [21:44<46:42,  2.11s/it][A
Train Diffusion:  34%|███▍      | 676/2000 [21:46<45:25,  2.06s/it][A
Train Diffusion:  34%|███▍      | 677/2000 [21:48<45:39,  2.07s/it][A
Train Diffusion:  34%|███▍      | 678/2000 [21:50<44:25,  2.02s/it][A
Train Diffusion:  34%|███▍      | 679/2000 [21:52<43:53,  1.99s/it][A
Train Diffusion:  34%|███▍      | 680/2000 [21:54<43:28,  1.98s/it][A

Moving average ELBO loss at 680 iterations is: -9908.45087890625. Best ELBO loss value is: -10941.5751953125.

C_PATH mean = tensor([[37.3530,  0.0954,  0.5101],
        [37.2615,  0.0942,  0.5193]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [3.8898e+01, 1.6432e-01, 2.5308e-01],
         [4.2428e+01, 1.3655e-02, 4.2272e-01],
         ...,
         [3.1340e+01, 1.0166e-01, 7.5578e-01],
         [2.1222e+01, 8.3341e-02, 4.5660e-01],
         [8.6089e+00, 1.3123e-01, 9.2837e-02]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.2542e+01, 1.9957e-01, 7.4606e-02],
         [4.1043e+01, 5.4720e-03, 7.3993e-02],
         ...,
         [3.1169e+01, 1.3025e-01, 3.1772e-01],
         [2.3153e+01, 1.5859e-01, 1.3154e-01],
         [1.1316e+01, 1.0459e-01, 1.5350e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  34%|███▍      | 681/2000 [21:55<43:11,  1.96s/it][A
Train Diffusion:  34%|███▍      | 682/2000 [21:57<42:41,  1.94s/it][A
Train Diffusion:  34%|███▍      | 683/2000 [21:59<42:19,  1.93s/it][A
Train Diffusion:  34%|███▍      | 684/2000 [22:01<42:27,  1.94s/it][A
Train Diffusion:  34%|███▍      | 685/2000 [22:03<42:15,  1.93s/it][A
Train Diffusion:  34%|███▍      | 686/2000 [22:05<41:45,  1.91s/it][A
Train Diffusion:  34%|███▍      | 687/2000 [22:07<41:25,  1.89s/it][A
Train Diffusion:  34%|███▍      | 688/2000 [22:09<41:12,  1.88s/it][A
Train Diffusion:  34%|███▍      | 689/2000 [22:11<40:59,  1.88s/it][A
Train Diffusion:  34%|███▍      | 690/2000 [22:12<40:48,  1.87s/it][A

Moving average ELBO loss at 690 iterations is: -10582.903125. Best ELBO loss value is: -11338.408203125.

C_PATH mean = tensor([[37.0498,  0.1081,  0.5358],
        [37.0386,  0.1107,  0.5403]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4055e+01, 5.4275e-01, 9.5160e-02],
         [4.1677e+01, 3.7827e-03, 2.4942e-01],
         ...,
         [3.1568e+01, 1.1273e-01, 6.5698e-01],
         [2.3823e+01, 1.5481e-01, 5.8714e-01],
         [1.0868e+01, 9.8296e-02, 4.0068e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.3896e+01, 2.5522e-01, 2.7697e-01],
         [4.5838e+01, 3.8840e-02, 2.5197e-01],
         ...,
         [2.9629e+01, 1.6165e-01, 6.7597e-01],
         [2.1295e+01, 1.0818e-01, 3.2030e-01],
         [1.1535e+01, 1.7247e-01, 1.0603e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  35%|███▍      | 691/2000 [22:14<40:57,  1.88s/it][A
Train Diffusion:  35%|███▍      | 692/2000 [22:16<41:15,  1.89s/it][A
Train Diffusion:  35%|███▍      | 693/2000 [22:18<41:05,  1.89s/it][A
Train Diffusion:  35%|███▍      | 694/2000 [22:20<40:53,  1.88s/it][A
Train Diffusion:  35%|███▍      | 695/2000 [22:22<40:44,  1.87s/it][A
Train Diffusion:  35%|███▍      | 696/2000 [22:24<40:43,  1.87s/it][A
Train Diffusion:  35%|███▍      | 697/2000 [22:26<40:38,  1.87s/it][A
Train Diffusion:  35%|███▍      | 698/2000 [22:27<40:41,  1.87s/it][A
Train Diffusion:  35%|███▍      | 699/2000 [22:29<40:29,  1.87s/it][A
Train Diffusion:  35%|███▌      | 700/2000 [22:31<40:29,  1.87s/it][A

Moving average ELBO loss at 700 iterations is: -8922.22724609375. Best ELBO loss value is: -11338.408203125.

C_PATH mean = tensor([[37.2376,  0.1021,  0.5263],
        [37.2514,  0.1013,  0.5286]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [3.9739e+01, 1.1155e-01, 8.4777e-02],
         [4.4851e+01, 2.0926e-03, 8.8807e-02],
         ...,
         [3.0876e+01, 1.0050e-01, 4.1917e-01],
         [2.1757e+01, 7.0077e-02, 2.4394e-01],
         [1.1041e+01, 1.1027e-01, 2.7125e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4289e+01, 3.0712e-01, 1.9190e-01],
         [4.3640e+01, 1.3417e-02, 3.6765e-01],
         ...,
         [3.0186e+01, 1.1574e-01, 8.4768e-01],
         [2.3173e+01, 1.2471e-01, 6.8697e-01],
         [1.0582e+01, 7.6319e-02, 1.9120e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  35%|███▌      | 701/2000 [22:33<40:40,  1.88s/it][A
Train Diffusion:  35%|███▌      | 702/2000 [22:35<40:36,  1.88s/it][A
Train Diffusion:  35%|███▌      | 703/2000 [22:37<40:27,  1.87s/it][A
Train Diffusion:  35%|███▌      | 704/2000 [22:39<40:23,  1.87s/it][A
Train Diffusion:  35%|███▌      | 705/2000 [22:41<40:16,  1.87s/it][A
Train Diffusion:  35%|███▌      | 706/2000 [22:42<40:10,  1.86s/it][A
Train Diffusion:  35%|███▌      | 707/2000 [22:44<40:04,  1.86s/it][A
Train Diffusion:  35%|███▌      | 708/2000 [22:46<40:04,  1.86s/it][A
Train Diffusion:  35%|███▌      | 709/2000 [22:48<40:01,  1.86s/it][A
Train Diffusion:  36%|███▌      | 710/2000 [22:50<39:57,  1.86s/it][A

Moving average ELBO loss at 710 iterations is: -10109.425. Best ELBO loss value is: -11338.408203125.

C_PATH mean = tensor([[37.2035,  0.1103,  0.5515],
        [37.1837,  0.1083,  0.5419]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.3567e+01, 1.7668e-01, 7.3265e-02],
         [4.2475e+01, 2.2205e-02, 2.0238e-01],
         ...,
         [2.9281e+01, 8.9933e-02, 6.6009e-01],
         [2.3002e+01, 1.1693e-01, 6.8637e-01],
         [1.2393e+01, 7.7212e-02, 3.4063e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.1368e+01, 2.2660e-01, 2.3094e-01],
         [4.3989e+01, 6.7716e-04, 2.2504e-01],
         ...,
         [3.1289e+01, 1.3546e-01, 6.6719e-01],
         [2.2709e+01, 9.6352e-02, 4.0800e-01],
         [1.0536e+01, 1.4178e-01, 4.3116e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  36%|███▌      | 711/2000 [22:52<40:13,  1.87s/it][A
Train Diffusion:  36%|███▌      | 712/2000 [22:54<40:42,  1.90s/it][A
Train Diffusion:  36%|███▌      | 713/2000 [22:56<41:22,  1.93s/it][A
Train Diffusion:  36%|███▌      | 714/2000 [22:58<40:53,  1.91s/it][A
Train Diffusion:  36%|███▌      | 715/2000 [22:59<40:31,  1.89s/it][A
Train Diffusion:  36%|███▌      | 716/2000 [23:01<40:19,  1.88s/it][A
Train Diffusion:  36%|███▌      | 717/2000 [23:03<40:23,  1.89s/it][A
Train Diffusion:  36%|███▌      | 718/2000 [23:05<40:08,  1.88s/it][A
Train Diffusion:  36%|███▌      | 719/2000 [23:07<40:00,  1.87s/it][A
Train Diffusion:  36%|███▌      | 720/2000 [23:09<39:56,  1.87s/it][A

Moving average ELBO loss at 720 iterations is: -11552.816015625. Best ELBO loss value is: -12355.12890625.

C_PATH mean = tensor([[37.0473,  0.1136,  0.5662],
        [37.2171,  0.1142,  0.5820]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4115e+01, 8.3524e-02, 1.8567e-01],
         [4.3456e+01, 7.4876e-03, 1.9039e-01],
         ...,
         [2.9314e+01, 1.3294e-01, 5.7436e-01],
         [2.3382e+01, 1.6197e-01, 8.0281e-01],
         [1.1332e+01, 1.0046e-01, 9.4760e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.3004e+01, 3.2167e-01, 1.0573e-01],
         [4.3766e+01, 6.5759e-04, 2.0560e-01],
         ...,
         [3.1077e+01, 9.2999e-02, 8.2161e-01],
         [2.2889e+01, 6.1116e-02, 6.3786e-01],
         [1.2753e+01, 1.1097e-01, 7.8164e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  36%|███▌      | 721/2000 [23:11<40:03,  1.88s/it][A
Train Diffusion:  36%|███▌      | 722/2000 [23:12<39:55,  1.87s/it][A
Train Diffusion:  36%|███▌      | 723/2000 [23:14<39:54,  1.87s/it][A
Train Diffusion:  36%|███▌      | 724/2000 [23:16<39:48,  1.87s/it][A
Train Diffusion:  36%|███▋      | 725/2000 [23:18<39:40,  1.87s/it][A
Train Diffusion:  36%|███▋      | 726/2000 [23:20<39:32,  1.86s/it][A
Train Diffusion:  36%|███▋      | 727/2000 [23:22<39:28,  1.86s/it][A
Train Diffusion:  36%|███▋      | 728/2000 [23:24<39:34,  1.87s/it][A
Train Diffusion:  36%|███▋      | 729/2000 [23:26<39:30,  1.87s/it][A
Train Diffusion:  36%|███▋      | 730/2000 [23:27<39:31,  1.87s/it][A

Moving average ELBO loss at 730 iterations is: -12463.62861328125. Best ELBO loss value is: -12946.6318359375.

C_PATH mean = tensor([[37.0858,  0.1234,  0.5644],
        [37.0316,  0.1248,  0.5796]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.3620e+01, 8.1805e-02, 9.8423e-02],
         [4.3494e+01, 6.7594e-03, 1.9744e-01],
         ...,
         [2.9581e+01, 1.4595e-01, 1.0300e+00],
         [2.3814e+01, 9.1617e-02, 1.0839e+00],
         [1.1835e+01, 1.2199e-01, 8.7217e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4225e+01, 2.8855e-01, 2.1679e-01],
         [4.3714e+01, 4.3536e-04, 2.2181e-01],
         ...,
         [2.9850e+01, 8.8150e-02, 4.3285e-01],
         [2.2159e+01, 1.3317e-01, 4.0786e-01],
         [1.2873e+01, 1.0098e-01, 5.1888e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  37%|███▋      | 731/2000 [23:29<39:40,  1.88s/it][A
Train Diffusion:  37%|███▋      | 732/2000 [23:31<39:33,  1.87s/it][A
Train Diffusion:  37%|███▋      | 733/2000 [23:33<39:50,  1.89s/it][A
Train Diffusion:  37%|███▋      | 734/2000 [23:35<39:51,  1.89s/it][A
Train Diffusion:  37%|███▋      | 735/2000 [23:37<39:43,  1.88s/it][A
Train Diffusion:  37%|███▋      | 736/2000 [23:39<39:34,  1.88s/it][A
Train Diffusion:  37%|███▋      | 737/2000 [23:41<39:26,  1.87s/it][A
Train Diffusion:  37%|███▋      | 738/2000 [23:42<39:20,  1.87s/it][A
Train Diffusion:  37%|███▋      | 739/2000 [23:44<39:59,  1.90s/it][A
Train Diffusion:  37%|███▋      | 740/2000 [23:46<39:40,  1.89s/it][A

Moving average ELBO loss at 740 iterations is: -12047.39853515625. Best ELBO loss value is: -12946.6318359375.

C_PATH mean = tensor([[36.9936,  0.1306,  0.5933],
        [36.9215,  0.1337,  0.5778]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4792e+01, 6.8838e-02, 1.8996e-01],
         [4.4815e+01, 3.7756e-03, 3.6507e-01],
         ...,
         [2.8712e+01, 1.4895e-01, 8.0447e-01],
         [2.3646e+01, 1.6852e-01, 9.8053e-01],
         [1.1868e+01, 1.0508e-01, 8.7049e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5420e+01, 2.8835e-01, 1.0584e-01],
         [4.3669e+01, 2.2620e-04, 1.1457e-01],
         ...,
         [2.9628e+01, 9.3537e-02, 6.2497e-01],
         [2.2403e+01, 6.6223e-02, 5.1956e-01],
         [1.3654e+01, 1.1296e-01, 6.6598e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  37%|███▋      | 741/2000 [23:48<39:41,  1.89s/it][A
Train Diffusion:  37%|███▋      | 742/2000 [23:50<39:24,  1.88s/it][A
Train Diffusion:  37%|███▋      | 743/2000 [23:52<39:18,  1.88s/it][A
Train Diffusion:  37%|███▋      | 744/2000 [23:54<39:11,  1.87s/it][A
Train Diffusion:  37%|███▋      | 745/2000 [23:56<39:02,  1.87s/it][A
Train Diffusion:  37%|███▋      | 746/2000 [23:57<38:59,  1.87s/it][A
Train Diffusion:  37%|███▋      | 747/2000 [23:59<38:54,  1.86s/it][A
Train Diffusion:  37%|███▋      | 748/2000 [24:01<38:54,  1.86s/it][A
Train Diffusion:  37%|███▋      | 749/2000 [24:03<38:54,  1.87s/it][A
Train Diffusion:  38%|███▊      | 750/2000 [24:05<38:46,  1.86s/it][A

Moving average ELBO loss at 750 iterations is: -13270.14189453125. Best ELBO loss value is: -13736.791015625.

C_PATH mean = tensor([[36.9055,  0.1343,  0.5755],
        [36.8766,  0.1366,  0.6044]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5401e+01, 6.4666e-02, 9.5900e-02],
         [4.5004e+01, 1.6689e-04, 1.0354e-01],
         ...,
         [2.8922e+01, 1.4320e-01, 8.9746e-01],
         [2.2151e+01, 9.0773e-02, 8.2486e-01],
         [1.3662e+01, 1.1835e-01, 5.4470e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4262e+01, 3.2033e-01, 2.1096e-01],
         [4.4503e+01, 3.3517e-03, 4.0280e-01],
         ...,
         [2.8704e+01, 9.4961e-02, 5.2980e-01],
         [2.3756e+01, 1.2371e-01, 8.6441e-01],
         [1.2450e+01, 9.9583e-02, 9.7401e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  38%|███▊      | 751/2000 [24:07<38:58,  1.87s/it][A
Train Diffusion:  38%|███▊      | 752/2000 [24:09<38:49,  1.87s/it][A
Train Diffusion:  38%|███▊      | 753/2000 [24:11<38:41,  1.86s/it][A
Train Diffusion:  38%|███▊      | 754/2000 [24:12<38:40,  1.86s/it][A
Train Diffusion:  38%|███▊      | 755/2000 [24:14<38:41,  1.86s/it][A
Train Diffusion:  38%|███▊      | 756/2000 [24:16<39:07,  1.89s/it][A
Train Diffusion:  38%|███▊      | 757/2000 [24:18<39:22,  1.90s/it][A
Train Diffusion:  38%|███▊      | 758/2000 [24:20<39:28,  1.91s/it][A
Train Diffusion:  38%|███▊      | 759/2000 [24:22<39:25,  1.91s/it][A
Train Diffusion:  38%|███▊      | 760/2000 [24:24<39:33,  1.91s/it][A

Moving average ELBO loss at 760 iterations is: -12964.41298828125. Best ELBO loss value is: -13736.791015625.

C_PATH mean = tensor([[36.6784,  0.1461,  0.6185],
        [36.7010,  0.1482,  0.6356]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.7851e+01, 6.1424e-02, 9.2794e-02],
         [4.5180e+01, 3.5461e-03, 1.7011e-01],
         ...,
         [2.8494e+01, 8.2384e-02, 7.4766e-01],
         [2.3544e+01, 1.3457e-01, 7.6993e-01],
         [1.4331e+01, 9.8679e-02, 8.5167e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4493e+01, 5.7427e-02, 1.8283e-01],
         [4.4705e+01, 1.1407e-05, 2.8580e-01],
         ...,
         [2.7557e+01, 1.4009e-01, 7.0109e-01],
         [2.1366e+01, 8.6164e-02, 6.9367e-01],
         [1.0979e+01, 1.0610e-01, 3.6678e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  38%|███▊      | 761/2000 [24:26<39:43,  1.92s/it][A
Train Diffusion:  38%|███▊      | 762/2000 [24:28<39:54,  1.93s/it][A
Train Diffusion:  38%|███▊      | 763/2000 [24:30<39:23,  1.91s/it][A
Train Diffusion:  38%|███▊      | 764/2000 [24:31<39:00,  1.89s/it][A
Train Diffusion:  38%|███▊      | 765/2000 [24:33<38:54,  1.89s/it][A
Train Diffusion:  38%|███▊      | 766/2000 [24:35<38:39,  1.88s/it][A
Train Diffusion:  38%|███▊      | 767/2000 [24:37<38:28,  1.87s/it][A
Train Diffusion:  38%|███▊      | 768/2000 [24:39<38:20,  1.87s/it][A
Train Diffusion:  38%|███▊      | 769/2000 [24:41<38:13,  1.86s/it][A
Train Diffusion:  38%|███▊      | 770/2000 [24:43<38:08,  1.86s/it][A

Moving average ELBO loss at 770 iterations is: -12514.53701171875. Best ELBO loss value is: -13736.791015625.

C_PATH mean = tensor([[36.7765,  0.1436,  0.5717],
        [36.8364,  0.1456,  0.5739]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.8218e+01, 7.6851e-02, 8.4581e-02],
         [4.4819e+01, 3.7254e-03, 1.8261e-01],
         ...,
         [2.7463e+01, 1.3280e-01, 3.9902e-01],
         [2.3017e+01, 1.3525e-01, 6.5069e-01],
         [1.2046e+01, 1.0496e-01, 7.3986e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.3839e+01, 2.4866e-01, 2.3477e-01],
         [4.6346e+01, 3.5079e-04, 2.7761e-01],
         ...,
         [2.7582e+01, 1.1936e-01, 8.8409e-01],
         [2.1019e+01, 9.0712e-02, 6.7325e-01],
         [1.3043e+01, 1.2776e-01, 3.6016e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  39%|███▊      | 771/2000 [24:45<38:18,  1.87s/it][A
Train Diffusion:  39%|███▊      | 772/2000 [24:46<38:12,  1.87s/it][A
Train Diffusion:  39%|███▊      | 773/2000 [24:48<38:08,  1.86s/it][A
Train Diffusion:  39%|███▊      | 774/2000 [24:50<38:02,  1.86s/it][A
Train Diffusion:  39%|███▉      | 775/2000 [24:52<38:24,  1.88s/it][A
Train Diffusion:  39%|███▉      | 776/2000 [24:54<38:11,  1.87s/it][A
Train Diffusion:  39%|███▉      | 777/2000 [24:56<38:04,  1.87s/it][A
Train Diffusion:  39%|███▉      | 778/2000 [24:58<38:00,  1.87s/it][A
Train Diffusion:  39%|███▉      | 779/2000 [24:59<37:52,  1.86s/it][A
Train Diffusion:  39%|███▉      | 780/2000 [25:01<38:34,  1.90s/it][A

Moving average ELBO loss at 780 iterations is: -13712.09814453125. Best ELBO loss value is: -13988.5078125.

C_PATH mean = tensor([[36.7895,  0.1433,  0.5784],
        [36.7997,  0.1426,  0.5820]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5834e+01, 6.1519e-02, 1.9664e-01],
         [4.5775e+01, 3.3127e-03, 2.5391e-01],
         ...,
         [2.5571e+01, 1.0632e-01, 3.5327e-01],
         [2.1857e+01, 7.6458e-02, 6.6707e-01],
         [1.1242e+01, 1.0696e-01, 8.4146e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4101e+01, 2.5667e-01, 7.6736e-02],
         [4.5633e+01, 5.3685e-04, 1.6907e-01],
         ...,
         [2.7601e+01, 1.1432e-01, 9.4583e-01],
         [2.0959e+01, 1.1558e-01, 8.0662e-01],
         [1.2912e+01, 9.0133e-02, 5.0129e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  39%|███▉      | 781/2000 [25:03<37:49,  1.86s/it][A
Train Diffusion:  39%|███▉      | 782/2000 [25:05<36:57,  1.82s/it][A
Train Diffusion:  39%|███▉      | 783/2000 [25:07<36:49,  1.82s/it][A
Train Diffusion:  39%|███▉      | 784/2000 [25:09<36:30,  1.80s/it][A
Train Diffusion:  39%|███▉      | 785/2000 [25:10<36:00,  1.78s/it][A
Train Diffusion:  39%|███▉      | 786/2000 [25:12<35:39,  1.76s/it][A
Train Diffusion:  39%|███▉      | 787/2000 [25:14<35:20,  1.75s/it][A
Train Diffusion:  39%|███▉      | 788/2000 [25:15<35:25,  1.75s/it][A
Train Diffusion:  39%|███▉      | 789/2000 [25:17<35:15,  1.75s/it][A
Train Diffusion:  40%|███▉      | 790/2000 [25:19<35:03,  1.74s/it][A

Moving average ELBO loss at 790 iterations is: -14142.2556640625. Best ELBO loss value is: -14512.0771484375.

C_PATH mean = tensor([[36.6049,  0.1487,  0.5983],
        [36.6106,  0.1483,  0.6026]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.6497e+01, 9.8753e-02, 2.0078e-01],
         [4.6606e+01, 5.8891e-03, 3.9396e-01],
         ...,
         [2.5194e+01, 1.2969e-01, 5.2896e-01],
         [2.0323e+01, 1.0396e-01, 5.2237e-01],
         [1.3214e+01, 1.3667e-01, 4.2448e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.6264e+01, 2.6441e-01, 8.2906e-02],
         [4.7063e+01, 1.7632e-04, 9.4862e-02],
         ...,
         [2.6947e+01, 1.0632e-01, 9.2011e-01],
         [2.2648e+01, 1.0659e-01, 1.0245e+00],
         [1.2342e+01, 8.2880e-02, 1.1691e+00]]], grad_fn=<CatBackward>)



Train Diffusion:  40%|███▉      | 791/2000 [25:21<34:55,  1.73s/it][A
Train Diffusion:  40%|███▉      | 792/2000 [25:22<34:48,  1.73s/it][A
Train Diffusion:  40%|███▉      | 793/2000 [25:24<34:48,  1.73s/it][A
Train Diffusion:  40%|███▉      | 794/2000 [25:26<34:47,  1.73s/it][A
Train Diffusion:  40%|███▉      | 795/2000 [25:28<34:49,  1.73s/it][A
Train Diffusion:  40%|███▉      | 796/2000 [25:29<34:47,  1.73s/it][A
Train Diffusion:  40%|███▉      | 797/2000 [25:31<34:43,  1.73s/it][A
Train Diffusion:  40%|███▉      | 798/2000 [25:33<34:45,  1.73s/it][A
Train Diffusion:  40%|███▉      | 799/2000 [25:34<34:41,  1.73s/it][A
Train Diffusion:  40%|████      | 800/2000 [25:36<34:37,  1.73s/it][A

Moving average ELBO loss at 800 iterations is: -14209.8677734375. Best ELBO loss value is: -14512.0771484375.

C_PATH mean = tensor([[36.5949,  0.1428,  0.5721],
        [36.5925,  0.1489,  0.5681]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.6116e+01, 3.2602e-01, 1.1608e-01],
         [4.6776e+01, 2.5102e-04, 1.9052e-01],
         ...,
         [2.6679e+01, 9.5917e-02, 6.4070e-01],
         [2.1180e+01, 7.9568e-02, 7.8598e-01],
         [1.3853e+01, 1.3556e-01, 8.8468e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.7536e+01, 8.2803e-02, 1.5957e-01],
         [4.7277e+01, 2.7984e-03, 2.0154e-01],
         ...,
         [2.4774e+01, 1.3744e-01, 7.2723e-01],
         [2.1890e+01, 1.2063e-01, 6.7971e-01],
         [1.2218e+01, 8.3448e-02, 5.0810e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  40%|████      | 801/2000 [25:38<34:36,  1.73s/it][A
Train Diffusion:  40%|████      | 802/2000 [25:40<34:33,  1.73s/it][A
Train Diffusion:  40%|████      | 803/2000 [25:41<34:32,  1.73s/it][A
Train Diffusion:  40%|████      | 804/2000 [25:43<34:28,  1.73s/it][A
Train Diffusion:  40%|████      | 805/2000 [25:45<34:26,  1.73s/it][A
Train Diffusion:  40%|████      | 806/2000 [25:47<34:31,  1.73s/it][A
Train Diffusion:  40%|████      | 807/2000 [25:48<34:28,  1.73s/it][A
Train Diffusion:  40%|████      | 808/2000 [25:50<34:28,  1.74s/it][A
Train Diffusion:  40%|████      | 809/2000 [25:52<34:23,  1.73s/it][A
Train Diffusion:  40%|████      | 810/2000 [25:54<34:21,  1.73s/it][A

Moving average ELBO loss at 810 iterations is: -12506.664453125. Best ELBO loss value is: -14512.0771484375.

C_PATH mean = tensor([[36.8669,  0.1301,  0.6313],
        [36.9112,  0.1297,  0.6453]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.0981e+01, 2.5017e-02, 7.5653e-02],
         [4.0975e+01, 1.1622e-03, 1.7367e-01],
         ...,
         [2.5162e+01, 9.8430e-02, 8.0339e-01],
         [2.1697e+01, 1.0321e-01, 8.8526e-01],
         [1.1097e+01, 8.0838e-02, 7.1478e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [3.9297e+01, 1.1234e-01, 1.8822e-01],
         [4.1406e+01, 8.2368e-05, 2.4307e-01],
         ...,
         [2.6523e+01, 1.2409e-01, 7.7602e-01],
         [2.0019e+01, 9.2930e-02, 5.3756e-01],
         [1.2951e+01, 1.1722e-01, 4.2887e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  41%|████      | 811/2000 [25:55<34:34,  1.74s/it][A
Train Diffusion:  41%|████      | 812/2000 [25:57<34:41,  1.75s/it][A
Train Diffusion:  41%|████      | 813/2000 [25:59<34:30,  1.74s/it][A
Train Diffusion:  41%|████      | 814/2000 [26:01<34:21,  1.74s/it][A
Train Diffusion:  41%|████      | 815/2000 [26:02<34:17,  1.74s/it][A
Train Diffusion:  41%|████      | 816/2000 [26:04<34:20,  1.74s/it][A
Train Diffusion:  41%|████      | 817/2000 [26:06<34:15,  1.74s/it][A
Train Diffusion:  41%|████      | 818/2000 [26:07<34:10,  1.73s/it][A
Train Diffusion:  41%|████      | 819/2000 [26:09<34:20,  1.74s/it][A
Train Diffusion:  41%|████      | 820/2000 [26:11<34:12,  1.74s/it][A

Moving average ELBO loss at 820 iterations is: -14091.758203125. Best ELBO loss value is: -14646.046875.

C_PATH mean = tensor([[36.6974,  0.1367,  0.6138],
        [36.6678,  0.1328,  0.6433]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.6910e+01, 1.0011e-01, 1.0126e-01],
         [4.5331e+01, 3.1180e-03, 1.8693e-01],
         ...,
         [2.5027e+01, 1.0551e-01, 7.8834e-01],
         [2.1557e+01, 7.4412e-02, 8.9867e-01],
         [1.2989e+01, 1.1253e-01, 9.1302e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.3420e+01, 1.4144e-01, 1.8019e-01],
         [4.5452e+01, 7.2195e-05, 2.2420e-01],
         ...,
         [2.5795e+01, 1.2979e-01, 7.3323e-01],
         [2.0622e+01, 1.3882e-01, 4.8423e-01],
         [1.2049e+01, 8.9797e-02, 3.0273e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  41%|████      | 821/2000 [26:13<34:07,  1.74s/it][A
Train Diffusion:  41%|████      | 822/2000 [26:14<34:03,  1.73s/it][A
Train Diffusion:  41%|████      | 823/2000 [26:16<34:10,  1.74s/it][A
Train Diffusion:  41%|████      | 824/2000 [26:18<34:08,  1.74s/it][A
Train Diffusion:  41%|████▏     | 825/2000 [26:20<34:01,  1.74s/it][A
Train Diffusion:  41%|████▏     | 826/2000 [26:21<33:57,  1.74s/it][A
Train Diffusion:  41%|████▏     | 827/2000 [26:23<33:53,  1.73s/it][A
Train Diffusion:  41%|████▏     | 828/2000 [26:25<33:57,  1.74s/it][A
Train Diffusion:  41%|████▏     | 829/2000 [26:27<33:54,  1.74s/it][A
Train Diffusion:  42%|████▏     | 830/2000 [26:28<34:12,  1.75s/it][A

Moving average ELBO loss at 830 iterations is: -14616.7607421875. Best ELBO loss value is: -15117.9912109375.

C_PATH mean = tensor([[36.5435,  0.1531,  0.6280],
        [36.5661,  0.1570,  0.6330]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.7792e+01, 2.4917e-01, 1.8673e-01],
         [4.7374e+01, 2.5812e-04, 4.0747e-01],
         ...,
         [2.6046e+01, 1.3873e-01, 5.9909e-01],
         [2.0787e+01, 1.4516e-01, 7.5109e-01],
         [1.3182e+01, 9.7427e-02, 7.0190e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5370e+01, 6.5309e-02, 1.0827e-01],
         [4.7309e+01, 2.4399e-03, 1.0092e-01],
         ...,
         [2.4074e+01, 9.3501e-02, 8.1175e-01],
         [2.1237e+01, 6.8927e-02, 7.2301e-01],
         [1.1889e+01, 1.1456e-01, 7.2339e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  42%|████▏     | 831/2000 [26:30<34:11,  1.75s/it][A
Train Diffusion:  42%|████▏     | 832/2000 [26:32<34:00,  1.75s/it][A
Train Diffusion:  42%|████▏     | 833/2000 [26:34<33:58,  1.75s/it][A
Train Diffusion:  42%|████▏     | 834/2000 [26:35<33:51,  1.74s/it][A
Train Diffusion:  42%|████▏     | 835/2000 [26:37<33:43,  1.74s/it][A
Train Diffusion:  42%|████▏     | 836/2000 [26:39<33:42,  1.74s/it][A
Train Diffusion:  42%|████▏     | 837/2000 [26:41<33:41,  1.74s/it][A
Train Diffusion:  42%|████▏     | 838/2000 [26:42<33:36,  1.74s/it][A
Train Diffusion:  42%|████▏     | 839/2000 [26:44<33:30,  1.73s/it][A
Train Diffusion:  42%|████▏     | 840/2000 [26:46<33:27,  1.73s/it][A

Moving average ELBO loss at 840 iterations is: -15298.52919921875. Best ELBO loss value is: -15828.484375.

C_PATH mean = tensor([[36.6060,  0.1493,  0.5998],
        [36.6116,  0.1489,  0.6135]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.6290e+01, 1.2544e-01, 1.0928e-01],
         [4.7714e+01, 5.1427e-03, 1.8327e-01],
         ...,
         [2.3938e+01, 9.1356e-02, 4.8596e-01],
         [2.1144e+01, 6.7781e-02, 4.7535e-01],
         [1.3501e+01, 1.0972e-01, 4.4643e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.7945e+01, 1.6392e-01, 1.7134e-01],
         [4.7268e+01, 8.6051e-05, 2.3041e-01],
         ...,
         [2.5639e+01, 1.3525e-01, 8.3805e-01],
         [2.0519e+01, 1.3716e-01, 9.4146e-01],
         [1.1773e+01, 9.4186e-02, 1.0030e+00]]], grad_fn=<CatBackward>)



Train Diffusion:  42%|████▏     | 841/2000 [26:47<33:28,  1.73s/it][A
Train Diffusion:  42%|████▏     | 842/2000 [26:49<33:27,  1.73s/it][A
Train Diffusion:  42%|████▏     | 843/2000 [26:51<33:22,  1.73s/it][A
Train Diffusion:  42%|████▏     | 844/2000 [26:53<33:19,  1.73s/it][A
Train Diffusion:  42%|████▏     | 845/2000 [26:54<33:17,  1.73s/it][A
Train Diffusion:  42%|████▏     | 846/2000 [26:56<33:15,  1.73s/it][A
Train Diffusion:  42%|████▏     | 847/2000 [26:58<33:14,  1.73s/it][A
Train Diffusion:  42%|████▏     | 848/2000 [27:00<33:20,  1.74s/it][A
Train Diffusion:  42%|████▏     | 849/2000 [27:01<33:28,  1.74s/it][A
Train Diffusion:  42%|████▎     | 850/2000 [27:03<33:27,  1.75s/it][A

Moving average ELBO loss at 850 iterations is: -15828.7458984375. Best ELBO loss value is: -16414.95703125.

C_PATH mean = tensor([[36.6207,  0.1489,  0.6160],
        [36.6050,  0.1528,  0.6241]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.6233e+01, 1.5418e-01, 1.8206e-01],
         [4.6121e+01, 5.7786e-05, 3.6961e-01],
         ...,
         [2.4877e+01, 1.1486e-01, 6.3427e-01],
         [1.9912e+01, 1.1742e-01, 7.3222e-01],
         [1.1778e+01, 8.8317e-02, 7.1532e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5380e+01, 1.1587e-01, 1.0147e-01],
         [4.6751e+01, 4.5639e-03, 1.0323e-01],
         ...,
         [2.3942e+01, 1.1286e-01, 7.7056e-01],
         [2.1150e+01, 7.7243e-02, 7.2907e-01],
         [1.3584e+01, 1.1592e-01, 6.5835e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  43%|████▎     | 851/2000 [27:05<33:20,  1.74s/it][A
Train Diffusion:  43%|████▎     | 852/2000 [27:07<33:15,  1.74s/it][A
Train Diffusion:  43%|████▎     | 853/2000 [27:08<33:09,  1.73s/it][A
Train Diffusion:  43%|████▎     | 854/2000 [27:10<33:11,  1.74s/it][A
Train Diffusion:  43%|████▎     | 855/2000 [27:12<33:10,  1.74s/it][A
Train Diffusion:  43%|████▎     | 856/2000 [27:14<33:06,  1.74s/it][A
Train Diffusion:  43%|████▎     | 857/2000 [27:15<33:01,  1.73s/it][A
Train Diffusion:  43%|████▎     | 858/2000 [27:17<32:59,  1.73s/it][A
Train Diffusion:  43%|████▎     | 859/2000 [27:19<32:56,  1.73s/it][A
Train Diffusion:  43%|████▎     | 860/2000 [27:20<32:52,  1.73s/it][A

Moving average ELBO loss at 860 iterations is: -16152.39052734375. Best ELBO loss value is: -16670.4921875.

C_PATH mean = tensor([[36.5671,  0.1508,  0.6280],
        [36.5557,  0.1524,  0.6176]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5280e+01, 1.4270e-01, 9.9421e-02],
         [4.6354e+01, 4.1702e-03, 1.0157e-01],
         ...,
         [2.4599e+01, 7.6395e-02, 8.6788e-01],
         [2.1349e+01, 6.3876e-02, 7.5654e-01],
         [1.4139e+01, 1.1590e-01, 5.6966e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.6000e+01, 1.4898e-01, 1.9215e-01],
         [4.6282e+01, 4.5463e-05, 3.7693e-01],
         ...,
         [2.3603e+01, 1.1496e-01, 5.0764e-01],
         [1.9437e+01, 1.1196e-01, 7.5597e-01],
         [1.1690e+01, 8.1167e-02, 8.6248e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  43%|████▎     | 861/2000 [27:22<32:50,  1.73s/it][A
Train Diffusion:  43%|████▎     | 862/2000 [27:24<32:58,  1.74s/it][A
Train Diffusion:  43%|████▎     | 863/2000 [27:26<32:57,  1.74s/it][A
Train Diffusion:  43%|████▎     | 864/2000 [27:27<32:50,  1.73s/it][A
Train Diffusion:  43%|████▎     | 865/2000 [27:29<32:47,  1.73s/it][A
Train Diffusion:  43%|████▎     | 866/2000 [27:31<32:44,  1.73s/it][A
Train Diffusion:  43%|████▎     | 867/2000 [27:33<32:54,  1.74s/it][A
Train Diffusion:  43%|████▎     | 868/2000 [27:34<33:00,  1.75s/it][A
Train Diffusion:  43%|████▎     | 869/2000 [27:36<32:53,  1.74s/it][A
Train Diffusion:  44%|████▎     | 870/2000 [27:38<32:46,  1.74s/it][A

Moving average ELBO loss at 870 iterations is: -15389.98212890625. Best ELBO loss value is: -16670.4921875.

C_PATH mean = tensor([[36.4639,  0.1554,  0.6424],
        [36.4583,  0.1539,  0.6687]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.6084e+01, 8.1028e-02, 1.1648e-01],
         [4.6514e+01, 3.9921e-05, 1.0214e-01],
         ...,
         [2.4415e+01, 1.0147e-01, 8.2907e-01],
         [2.1577e+01, 9.0222e-02, 6.7696e-01],
         [1.2985e+01, 1.4834e-01, 6.2119e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5740e+01, 2.8722e-01, 1.6515e-01],
         [4.6863e+01, 2.8070e-03, 3.6210e-01],
         ...,
         [2.3381e+01, 1.2761e-01, 6.4832e-01],
         [1.9402e+01, 1.0868e-01, 8.3056e-01],
         [1.3696e+01, 7.8954e-02, 7.7592e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  44%|████▎     | 871/2000 [27:40<32:41,  1.74s/it][A
Train Diffusion:  44%|████▎     | 872/2000 [27:41<32:34,  1.73s/it][A
Train Diffusion:  44%|████▎     | 873/2000 [27:43<32:30,  1.73s/it][A
Train Diffusion:  44%|████▎     | 874/2000 [27:45<32:27,  1.73s/it][A
Train Diffusion:  44%|████▍     | 875/2000 [27:47<32:56,  1.76s/it][A
Train Diffusion:  44%|████▍     | 876/2000 [27:49<34:06,  1.82s/it][A
Train Diffusion:  44%|████▍     | 877/2000 [27:50<33:34,  1.79s/it][A
Train Diffusion:  44%|████▍     | 878/2000 [27:52<33:08,  1.77s/it][A
Train Diffusion:  44%|████▍     | 879/2000 [27:54<32:57,  1.76s/it][A
Train Diffusion:  44%|████▍     | 880/2000 [27:55<32:42,  1.75s/it][A

Moving average ELBO loss at 880 iterations is: -13092.94267578125. Best ELBO loss value is: -16670.4921875.

C_PATH mean = tensor([[36.5259,  0.1453,  0.6234],
        [36.5151,  0.1454,  0.6255]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.6178e+01, 1.7297e-01, 1.8379e-01],
         [4.7464e+01, 7.2301e-04, 2.0191e-01],
         ...,
         [2.3868e+01, 1.3125e-01, 8.2388e-01],
         [2.1123e+01, 8.7670e-02, 8.7689e-01],
         [1.2584e+01, 1.1992e-01, 8.4082e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.6638e+01, 3.2503e-02, 7.3604e-02],
         [4.7206e+01, 3.2358e-05, 1.9257e-01],
         ...,
         [2.3419e+01, 1.0273e-01, 6.2701e-01],
         [1.9276e+01, 1.1080e-01, 5.7349e-01],
         [1.3691e+01, 7.5460e-02, 3.8062e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  44%|████▍     | 881/2000 [27:57<32:33,  1.75s/it][A
Train Diffusion:  44%|████▍     | 882/2000 [27:59<32:23,  1.74s/it][A
Train Diffusion:  44%|████▍     | 883/2000 [28:01<32:16,  1.73s/it][A
Train Diffusion:  44%|████▍     | 884/2000 [28:02<32:17,  1.74s/it][A
Train Diffusion:  44%|████▍     | 885/2000 [28:04<32:25,  1.74s/it][A
Train Diffusion:  44%|████▍     | 886/2000 [28:06<32:27,  1.75s/it][A
Train Diffusion:  44%|████▍     | 887/2000 [28:08<32:25,  1.75s/it][A
Train Diffusion:  44%|████▍     | 888/2000 [28:09<32:16,  1.74s/it][A
Train Diffusion:  44%|████▍     | 889/2000 [28:11<32:10,  1.74s/it][A
Train Diffusion:  44%|████▍     | 890/2000 [28:13<32:23,  1.75s/it][A

Moving average ELBO loss at 890 iterations is: -15236.14921875. Best ELBO loss value is: -16670.4921875.

C_PATH mean = tensor([[36.5311,  0.1498,  0.6273],
        [36.5846,  0.1415,  0.6200]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4899e+01, 1.6666e-01, 1.6581e-01],
         [4.6509e+01, 1.1983e-04, 3.5118e-01],
         ...,
         [2.4220e+01, 1.1663e-01, 8.4669e-01],
         [2.1500e+01, 7.5214e-02, 9.2480e-01],
         [1.4967e+01, 9.9059e-02, 9.2079e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.6007e+01, 2.0032e-01, 9.2099e-02],
         [4.6901e+01, 1.2807e-03, 1.0208e-01],
         ...,
         [2.2945e+01, 1.0129e-01, 5.3024e-01],
         [1.9373e+01, 1.3757e-01, 4.4590e-01],
         [1.2330e+01, 9.8490e-02, 3.7814e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  45%|████▍     | 891/2000 [28:15<32:18,  1.75s/it][A
Train Diffusion:  45%|████▍     | 892/2000 [28:16<32:22,  1.75s/it][A
Train Diffusion:  45%|████▍     | 893/2000 [28:18<32:13,  1.75s/it][A
Train Diffusion:  45%|████▍     | 894/2000 [28:20<32:07,  1.74s/it][A
Train Diffusion:  45%|████▍     | 895/2000 [28:22<31:58,  1.74s/it][A
Train Diffusion:  45%|████▍     | 896/2000 [28:23<31:53,  1.73s/it][A
Train Diffusion:  45%|████▍     | 897/2000 [28:25<31:53,  1.73s/it][A
Train Diffusion:  45%|████▍     | 898/2000 [28:27<31:48,  1.73s/it][A
Train Diffusion:  45%|████▍     | 899/2000 [28:28<31:47,  1.73s/it][A
Train Diffusion:  45%|████▌     | 900/2000 [28:30<31:43,  1.73s/it][A

Moving average ELBO loss at 900 iterations is: -16598.36220703125. Best ELBO loss value is: -17111.12890625.

C_PATH mean = tensor([[36.5316,  0.1510,  0.6341],
        [36.4360,  0.1527,  0.6491]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.7216e+01, 6.6076e-02, 8.9165e-02],
         [4.7262e+01, 1.9883e-03, 1.8695e-01],
         ...,
         [2.4014e+01, 1.4158e-01, 4.6977e-01],
         [1.9894e+01, 1.4189e-01, 7.3308e-01],
         [1.4525e+01, 9.3278e-02, 8.4173e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.6095e+01, 2.0968e-01, 1.7968e-01],
         [4.7258e+01, 1.8839e-05, 2.3552e-01],
         ...,
         [2.2796e+01, 1.0686e-01, 9.0425e-01],
         [2.1007e+01, 7.4060e-02, 7.4866e-01],
         [1.3028e+01, 1.0694e-01, 6.0593e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  45%|████▌     | 901/2000 [28:32<31:40,  1.73s/it][A
Train Diffusion:  45%|████▌     | 902/2000 [28:34<31:46,  1.74s/it][A
Train Diffusion:  45%|████▌     | 903/2000 [28:35<31:46,  1.74s/it][A
Train Diffusion:  45%|████▌     | 904/2000 [28:37<31:53,  1.75s/it][A
Train Diffusion:  45%|████▌     | 905/2000 [28:39<31:59,  1.75s/it][A
Train Diffusion:  45%|████▌     | 906/2000 [28:41<31:47,  1.74s/it][A
Train Diffusion:  45%|████▌     | 907/2000 [28:42<31:38,  1.74s/it][A
Train Diffusion:  45%|████▌     | 908/2000 [28:44<31:31,  1.73s/it][A
Train Diffusion:  45%|████▌     | 909/2000 [28:46<31:27,  1.73s/it][A
Train Diffusion:  46%|████▌     | 910/2000 [28:48<31:21,  1.73s/it][A

Moving average ELBO loss at 910 iterations is: -16923.9982421875. Best ELBO loss value is: -17202.765625.

C_PATH mean = tensor([[36.4124,  0.1620,  0.6552],
        [36.3770,  0.1667,  0.6788]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.6981e+01, 5.1564e-02, 9.7405e-02],
         [4.7100e+01, 2.0003e-05, 9.0501e-02],
         ...,
         [2.4018e+01, 1.5916e-01, 8.3901e-01],
         [2.0163e+01, 9.2939e-02, 9.2209e-01],
         [1.4958e+01, 1.1391e-01, 9.7026e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.7562e+01, 2.0504e-01, 2.2025e-01],
         [4.7182e+01, 9.3100e-04, 4.3767e-01],
         ...,
         [2.2514e+01, 9.2203e-02, 5.6066e-01],
         [2.0971e+01, 1.3076e-01, 5.3586e-01],
         [1.3217e+01, 1.0186e-01, 4.6802e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  46%|████▌     | 911/2000 [28:49<31:21,  1.73s/it][A
Train Diffusion:  46%|████▌     | 912/2000 [28:51<31:17,  1.73s/it][A
Train Diffusion:  46%|████▌     | 913/2000 [28:53<31:16,  1.73s/it][A
Train Diffusion:  46%|████▌     | 914/2000 [28:54<31:15,  1.73s/it][A
Train Diffusion:  46%|████▌     | 915/2000 [28:56<31:14,  1.73s/it][A
Train Diffusion:  46%|████▌     | 916/2000 [28:58<31:12,  1.73s/it][A
Train Diffusion:  46%|████▌     | 917/2000 [29:00<31:08,  1.73s/it][A
Train Diffusion:  46%|████▌     | 918/2000 [29:01<31:04,  1.72s/it][A
Train Diffusion:  46%|████▌     | 919/2000 [29:03<31:05,  1.73s/it][A
Train Diffusion:  46%|████▌     | 920/2000 [29:05<31:05,  1.73s/it][A

Moving average ELBO loss at 920 iterations is: -17525.93046875. Best ELBO loss value is: -17794.81640625.

C_PATH mean = tensor([[36.4706,  0.1670,  0.6411],
        [36.4192,  0.1704,  0.6531]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5390e+01, 2.8230e-01, 1.0938e-01],
         [4.6566e+01, 1.1013e-03, 9.7390e-02],
         ...,
         [2.2579e+01, 1.5381e-01, 5.9894e-01],
         [2.1080e+01, 1.0762e-01, 5.2303e-01],
         [1.3732e+01, 1.3385e-01, 4.6208e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.6076e+01, 7.5389e-02, 2.0546e-01],
         [4.6643e+01, 2.1087e-05, 4.3299e-01],
         ...,
         [2.3932e+01, 1.2004e-01, 8.5141e-01],
         [2.0414e+01, 1.2063e-01, 9.4190e-01],
         [1.5423e+01, 9.6490e-02, 9.0066e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  46%|████▌     | 921/2000 [29:07<31:06,  1.73s/it][A
Train Diffusion:  46%|████▌     | 922/2000 [29:08<31:03,  1.73s/it][A
Train Diffusion:  46%|████▌     | 923/2000 [29:10<31:17,  1.74s/it][A
Train Diffusion:  46%|████▌     | 924/2000 [29:12<31:17,  1.75s/it][A
Train Diffusion:  46%|████▋     | 925/2000 [29:14<31:08,  1.74s/it][A
Train Diffusion:  46%|████▋     | 926/2000 [29:15<31:05,  1.74s/it][A
Train Diffusion:  46%|████▋     | 927/2000 [29:17<31:00,  1.73s/it][A
Train Diffusion:  46%|████▋     | 928/2000 [29:19<30:58,  1.73s/it][A
Train Diffusion:  46%|████▋     | 929/2000 [29:20<30:54,  1.73s/it][A
Train Diffusion:  46%|████▋     | 930/2000 [29:22<30:47,  1.73s/it][A

Moving average ELBO loss at 930 iterations is: -17745.3083984375. Best ELBO loss value is: -18553.166015625.

C_PATH mean = tensor([[36.5525,  0.1681,  0.6480],
        [36.5557,  0.1707,  0.6387]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5278e+01, 7.7212e-02, 2.1481e-01],
         [4.5753e+01, 2.0277e-03, 2.5507e-01],
         ...,
         [2.2999e+01, 8.8724e-02, 6.7335e-01],
         [2.1319e+01, 8.2699e-02, 5.4445e-01],
         [1.5742e+01, 1.3062e-01, 4.6235e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4954e+01, 1.3929e-01, 8.1730e-02],
         [4.6005e+01, 3.0762e-06, 1.8325e-01],
         ...,
         [2.3733e+01, 1.3877e-01, 6.5642e-01],
         [2.0103e+01, 1.1209e-01, 7.8531e-01],
         [1.3476e+01, 8.1163e-02, 8.8853e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  47%|████▋     | 931/2000 [29:24<30:46,  1.73s/it][A
Train Diffusion:  47%|████▋     | 932/2000 [29:26<30:47,  1.73s/it][A
Train Diffusion:  47%|████▋     | 933/2000 [29:27<30:43,  1.73s/it][A
Train Diffusion:  47%|████▋     | 934/2000 [29:29<30:40,  1.73s/it][A
Train Diffusion:  47%|████▋     | 935/2000 [29:31<30:38,  1.73s/it][A
Train Diffusion:  47%|████▋     | 936/2000 [29:33<30:36,  1.73s/it][A
Train Diffusion:  47%|████▋     | 937/2000 [29:34<30:36,  1.73s/it][A
Train Diffusion:  47%|████▋     | 938/2000 [29:36<30:37,  1.73s/it][A
Train Diffusion:  47%|████▋     | 939/2000 [29:38<30:34,  1.73s/it][A
Train Diffusion:  47%|████▋     | 940/2000 [29:39<30:36,  1.73s/it][A

Moving average ELBO loss at 940 iterations is: -17375.541015625. Best ELBO loss value is: -18553.166015625.

C_PATH mean = tensor([[36.5660,  0.1622,  0.6337],
        [36.5602,  0.1628,  0.6384]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5890e+01, 3.6576e-02, 9.4539e-02],
         [4.6553e+01, 1.4012e-03, 2.0223e-01],
         ...,
         [2.3060e+01, 9.8725e-02, 4.9911e-01],
         [1.9847e+01, 1.1019e-01, 4.8624e-01],
         [1.5530e+01, 9.6073e-02, 4.6661e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5171e+01, 2.2247e-01, 1.8942e-01],
         [4.6687e+01, 4.7756e-06, 2.5111e-01],
         ...,
         [2.3625e+01, 1.3591e-01, 8.1335e-01],
         [2.1735e+01, 9.5479e-02, 8.8196e-01],
         [1.4358e+01, 1.1475e-01, 9.2682e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  47%|████▋     | 941/2000 [29:41<30:44,  1.74s/it][A
Train Diffusion:  47%|████▋     | 942/2000 [29:43<30:47,  1.75s/it][A
Train Diffusion:  47%|████▋     | 943/2000 [29:45<30:42,  1.74s/it][A
Train Diffusion:  47%|████▋     | 944/2000 [29:46<30:39,  1.74s/it][A
Train Diffusion:  47%|████▋     | 945/2000 [29:48<30:32,  1.74s/it][A
Train Diffusion:  47%|████▋     | 946/2000 [29:50<30:25,  1.73s/it][A
Train Diffusion:  47%|████▋     | 947/2000 [29:52<30:24,  1.73s/it][A
Train Diffusion:  47%|████▋     | 948/2000 [29:53<30:18,  1.73s/it][A
Train Diffusion:  47%|████▋     | 949/2000 [29:55<30:15,  1.73s/it][A
Train Diffusion:  48%|████▊     | 950/2000 [29:57<30:15,  1.73s/it][A

Moving average ELBO loss at 950 iterations is: -17754.1958984375. Best ELBO loss value is: -18553.166015625.

C_PATH mean = tensor([[36.6103,  0.1462,  0.6494],
        [36.6818,  0.1471,  0.6257]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.6019e+01, 2.0696e-01, 1.8367e-01],
         [4.6475e+01, 3.5471e-06, 3.5161e-01],
         ...,
         [2.4141e+01, 1.2503e-01, 6.9802e-01],
         [2.0498e+01, 1.1217e-01, 7.5139e-01],
         [1.4428e+01, 7.9922e-02, 6.8348e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5108e+01, 8.6822e-02, 9.1856e-02],
         [4.6148e+01, 1.9386e-03, 9.5827e-02],
         ...,
         [2.2880e+01, 1.0724e-01, 6.6454e-01],
         [2.1854e+01, 8.4522e-02, 5.3561e-01],
         [1.6623e+01, 1.1796e-01, 4.8620e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  48%|████▊     | 951/2000 [29:59<30:15,  1.73s/it][A
Train Diffusion:  48%|████▊     | 952/2000 [30:00<30:13,  1.73s/it][A
Train Diffusion:  48%|████▊     | 953/2000 [30:02<30:09,  1.73s/it][A
Train Diffusion:  48%|████▊     | 954/2000 [30:04<30:08,  1.73s/it][A
Train Diffusion:  48%|████▊     | 955/2000 [30:05<30:06,  1.73s/it][A
Train Diffusion:  48%|████▊     | 956/2000 [30:07<30:05,  1.73s/it][A
Train Diffusion:  48%|████▊     | 957/2000 [30:09<30:07,  1.73s/it][A
Train Diffusion:  48%|████▊     | 958/2000 [30:11<30:04,  1.73s/it][A
Train Diffusion:  48%|████▊     | 959/2000 [30:12<30:01,  1.73s/it][A
Train Diffusion:  48%|████▊     | 960/2000 [30:14<30:04,  1.74s/it][A

Moving average ELBO loss at 960 iterations is: -18011.3154296875. Best ELBO loss value is: -18622.783203125.

C_PATH mean = tensor([[36.4789,  0.1549,  0.6916],
        [36.6115,  0.1642,  0.7021]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5858e+01, 1.1688e-01, 1.7013e-01],
         [4.6173e+01, 1.7037e-06, 4.0515e-01],
         ...,
         [2.3355e+01, 9.2218e-02, 7.2826e-01],
         [2.1678e+01, 7.1274e-02, 5.5801e-01],
         [1.4380e+01, 9.9626e-02, 4.5490e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.6358e+01, 1.2789e-02, 9.5128e-02],
         [4.6457e+01, 7.1967e-04, 9.5989e-02],
         ...,
         [2.3503e+01, 1.3390e-01, 7.5715e-01],
         [2.0181e+01, 1.2064e-01, 8.1751e-01],
         [1.5686e+01, 9.1472e-02, 8.8882e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  48%|████▊     | 961/2000 [30:16<30:14,  1.75s/it][A
Train Diffusion:  48%|████▊     | 962/2000 [30:18<30:15,  1.75s/it][A
Train Diffusion:  48%|████▊     | 963/2000 [30:19<30:06,  1.74s/it][A
Train Diffusion:  48%|████▊     | 964/2000 [30:21<29:59,  1.74s/it][A
Train Diffusion:  48%|████▊     | 965/2000 [30:23<29:55,  1.73s/it][A
Train Diffusion:  48%|████▊     | 966/2000 [30:25<29:55,  1.74s/it][A
Train Diffusion:  48%|████▊     | 967/2000 [30:26<29:54,  1.74s/it][A
Train Diffusion:  48%|████▊     | 968/2000 [30:28<29:49,  1.73s/it][A
Train Diffusion:  48%|████▊     | 969/2000 [30:30<29:47,  1.73s/it][A
Train Diffusion:  48%|████▊     | 970/2000 [30:32<29:44,  1.73s/it][A

Moving average ELBO loss at 970 iterations is: -18767.25625. Best ELBO loss value is: -19143.076171875.

C_PATH mean = tensor([[36.5133,  0.1620,  0.6618],
        [36.4713,  0.1533,  0.6791]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.7187e+01, 2.6255e-01, 8.7676e-02],
         [4.7799e+01, 2.2634e-06, 9.5445e-02],
         ...,
         [2.3951e+01, 1.2164e-01, 8.5096e-01],
         [2.2552e+01, 1.2300e-01, 7.3908e-01],
         [1.5709e+01, 8.3578e-02, 6.0147e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.9413e+01, 4.1759e-02, 1.7886e-01],
         [4.8349e+01, 1.0856e-03, 3.9199e-01],
         ...,
         [2.3409e+01, 1.0739e-01, 6.1247e-01],
         [2.0886e+01, 7.1753e-02, 7.3734e-01],
         [1.6903e+01, 1.0872e-01, 8.0566e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  49%|████▊     | 971/2000 [30:33<29:45,  1.74s/it][A
Train Diffusion:  49%|████▊     | 972/2000 [30:35<29:40,  1.73s/it][A
Train Diffusion:  49%|████▊     | 973/2000 [30:37<29:39,  1.73s/it][A
Train Diffusion:  49%|████▊     | 974/2000 [30:38<29:34,  1.73s/it][A
Train Diffusion:  49%|████▉     | 975/2000 [30:40<29:35,  1.73s/it][A
Train Diffusion:  49%|████▉     | 976/2000 [30:42<29:33,  1.73s/it][A
Train Diffusion:  49%|████▉     | 977/2000 [30:44<29:29,  1.73s/it][A
Train Diffusion:  49%|████▉     | 978/2000 [30:45<29:27,  1.73s/it][A
Train Diffusion:  49%|████▉     | 979/2000 [30:47<29:46,  1.75s/it][A
Train Diffusion:  49%|████▉     | 980/2000 [30:49<29:53,  1.76s/it][A

Moving average ELBO loss at 980 iterations is: -19226.075. Best ELBO loss value is: -19909.65234375.

C_PATH mean = tensor([[36.6731,  0.1552,  0.6576],
        [36.6711,  0.1537,  0.6709]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5066e+01, 2.3474e-01, 8.3855e-02],
         [4.5844e+01, 1.7534e-06, 8.0341e-02],
         ...,
         [2.3947e+01, 1.2996e-01, 8.0741e-01],
         [2.2383e+01, 7.9596e-02, 8.5184e-01],
         [1.5689e+01, 9.7409e-02, 8.7322e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.6004e+01, 4.0515e-02, 2.2133e-01],
         [4.5925e+01, 1.0259e-03, 4.1872e-01],
         ...,
         [2.3284e+01, 8.3981e-02, 5.0640e-01],
         [2.0601e+01, 1.2271e-01, 5.1372e-01],
         [1.7018e+01, 9.9049e-02, 4.7749e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  49%|████▉     | 981/2000 [30:51<30:17,  1.78s/it][A
Train Diffusion:  49%|████▉     | 982/2000 [30:53<30:00,  1.77s/it][A
Train Diffusion:  49%|████▉     | 983/2000 [30:54<29:45,  1.76s/it][A
Train Diffusion:  49%|████▉     | 984/2000 [30:56<29:35,  1.75s/it][A
Train Diffusion:  49%|████▉     | 985/2000 [30:58<29:28,  1.74s/it][A
Train Diffusion:  49%|████▉     | 986/2000 [30:59<29:21,  1.74s/it][A
Train Diffusion:  49%|████▉     | 987/2000 [31:01<29:16,  1.73s/it][A
Train Diffusion:  49%|████▉     | 988/2000 [31:03<29:18,  1.74s/it][A
Train Diffusion:  49%|████▉     | 989/2000 [31:05<29:14,  1.74s/it][A
Train Diffusion:  50%|████▉     | 990/2000 [31:06<29:11,  1.73s/it][A

Moving average ELBO loss at 990 iterations is: -19848.7689453125. Best ELBO loss value is: -20991.953125.

C_PATH mean = tensor([[36.5883,  0.1592,  0.7097],
        [36.7063,  0.1523,  0.6912]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.6500e+01, 6.4947e-02, 9.8517e-02],
         [4.6386e+01, 8.0301e-04, 2.1375e-01],
         ...,
         [2.3683e+01, 8.1899e-02, 6.8758e-01],
         [2.2496e+01, 1.1515e-01, 6.1958e-01],
         [1.8487e+01, 8.8156e-02, 5.7761e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5731e+01, 1.1201e-01, 1.5443e-01],
         [4.6612e+01, 5.1290e-07, 2.0863e-01],
         ...,
         [2.3455e+01, 1.3198e-01, 5.9780e-01],
         [2.1253e+01, 8.1533e-02, 7.5267e-01],
         [1.5481e+01, 9.7228e-02, 7.7788e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  50%|████▉     | 991/2000 [31:08<29:09,  1.73s/it][A
Train Diffusion:  50%|████▉     | 992/2000 [31:10<29:09,  1.74s/it][A
Train Diffusion:  50%|████▉     | 993/2000 [31:12<29:06,  1.73s/it][A
Train Diffusion:  50%|████▉     | 994/2000 [31:13<29:04,  1.73s/it][A
Train Diffusion:  50%|████▉     | 995/2000 [31:15<29:01,  1.73s/it][A
Train Diffusion:  50%|████▉     | 996/2000 [31:17<29:03,  1.74s/it][A
Train Diffusion:  50%|████▉     | 997/2000 [31:19<29:06,  1.74s/it][A
Train Diffusion:  50%|████▉     | 998/2000 [31:20<29:12,  1.75s/it][A
Train Diffusion:  50%|████▉     | 999/2000 [31:22<29:11,  1.75s/it][A
Train Diffusion:  50%|█████     | 1000/2000 [31:24<29:08,  1.75s/it][A

Moving average ELBO loss at 1000 iterations is: -19659.4599609375. Best ELBO loss value is: -20991.953125.

C_PATH mean = tensor([[36.6803,  0.1579,  0.7036],
        [36.6942,  0.1600,  0.7034]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5493e+01, 3.1016e-02, 8.7593e-02],
         [4.6222e+01, 1.7089e-06, 9.5853e-02],
         ...,
         [2.4650e+01, 1.1720e-01, 5.0083e-01],
         [2.1844e+01, 1.1101e-01, 6.8895e-01],
         [1.8434e+01, 7.8724e-02, 6.4048e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.6320e+01, 1.7381e-01, 1.7692e-01],
         [4.6190e+01, 2.6935e-04, 4.2338e-01],
         ...,
         [2.2546e+01, 8.1024e-02, 9.7032e-01],
         [2.2313e+01, 6.1879e-02, 7.8015e-01],
         [1.6218e+01, 9.6800e-02, 7.9339e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  50%|█████     | 1001/2000 [31:26<29:10,  1.75s/it][A
Train Diffusion:  50%|█████     | 1002/2000 [31:27<29:04,  1.75s/it][A
Train Diffusion:  50%|█████     | 1003/2000 [31:29<28:57,  1.74s/it][A
Train Diffusion:  50%|█████     | 1004/2000 [31:31<28:53,  1.74s/it][A
Train Diffusion:  50%|█████     | 1005/2000 [31:33<28:48,  1.74s/it][A
Train Diffusion:  50%|█████     | 1006/2000 [31:34<28:47,  1.74s/it][A
Train Diffusion:  50%|█████     | 1007/2000 [31:36<28:42,  1.73s/it][A
Train Diffusion:  50%|█████     | 1008/2000 [31:38<28:39,  1.73s/it][A
Train Diffusion:  50%|█████     | 1009/2000 [31:39<28:36,  1.73s/it][A
Train Diffusion:  50%|█████     | 1010/2000 [31:41<28:37,  1.73s/it][A

Moving average ELBO loss at 1010 iterations is: -20288.934375. Best ELBO loss value is: -21335.2890625.

C_PATH mean = tensor([[36.6629,  0.1621,  0.6667],
        [36.7900,  0.1594,  0.6896]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4925e+01, 1.5534e-01, 1.6847e-01],
         [4.5919e+01, 3.9746e-07, 2.3780e-01],
         ...,
         [2.3846e+01, 1.3257e-01, 8.2883e-01],
         [2.1678e+01, 8.2011e-02, 7.4328e-01],
         [1.6462e+01, 1.0109e-01, 5.8534e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5581e+01, 4.9135e-02, 1.0930e-01],
         [4.6146e+01, 9.2031e-04, 2.2317e-01],
         ...,
         [2.3512e+01, 8.7458e-02, 6.5547e-01],
         [2.2829e+01, 1.2598e-01, 7.1360e-01],
         [1.9141e+01, 1.0525e-01, 7.5779e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  51%|█████     | 1011/2000 [31:43<28:35,  1.73s/it][A
Train Diffusion:  51%|█████     | 1012/2000 [31:45<28:45,  1.75s/it][A
Train Diffusion:  51%|█████     | 1013/2000 [31:46<28:42,  1.75s/it][A
Train Diffusion:  51%|█████     | 1014/2000 [31:48<28:39,  1.74s/it][A
Train Diffusion:  51%|█████     | 1015/2000 [31:50<28:35,  1.74s/it][A
Train Diffusion:  51%|█████     | 1016/2000 [31:52<29:14,  1.78s/it][A
Train Diffusion:  51%|█████     | 1017/2000 [31:54<29:27,  1.80s/it][A
Train Diffusion:  51%|█████     | 1018/2000 [31:55<29:09,  1.78s/it][A
Train Diffusion:  51%|█████     | 1019/2000 [31:57<28:52,  1.77s/it][A
Train Diffusion:  51%|█████     | 1020/2000 [31:59<28:40,  1.76s/it][A

Moving average ELBO loss at 1020 iterations is: -20397.4744140625. Best ELBO loss value is: -21335.2890625.

C_PATH mean = tensor([[36.7129,  0.1657,  0.6807],
        [36.7072,  0.1593,  0.6561]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.6471e+01, 5.2928e-02, 9.7647e-02],
         [4.6376e+01, 7.9214e-04, 1.0324e-01],
         ...,
         [2.4069e+01, 1.5126e-01, 6.9166e-01],
         [2.3391e+01, 1.3801e-01, 7.8627e-01],
         [2.0055e+01, 9.3476e-02, 7.2124e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5769e+01, 1.3851e-01, 1.8186e-01],
         [4.6378e+01, 2.2696e-07, 3.9596e-01],
         ...,
         [2.3231e+01, 9.1417e-02, 6.7564e-01],
         [2.1585e+01, 7.1802e-02, 5.4215e-01],
         [1.6743e+01, 1.0881e-01, 6.6295e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  51%|█████     | 1021/2000 [32:01<28:31,  1.75s/it][A
Train Diffusion:  51%|█████     | 1022/2000 [32:02<28:23,  1.74s/it][A
Train Diffusion:  51%|█████     | 1023/2000 [32:04<28:19,  1.74s/it][A
Train Diffusion:  51%|█████     | 1024/2000 [32:06<28:13,  1.74s/it][A
Train Diffusion:  51%|█████▏    | 1025/2000 [32:07<28:13,  1.74s/it][A
Train Diffusion:  51%|█████▏    | 1026/2000 [32:09<28:11,  1.74s/it][A
Train Diffusion:  51%|█████▏    | 1027/2000 [32:11<28:10,  1.74s/it][A
Train Diffusion:  51%|█████▏    | 1028/2000 [32:13<28:07,  1.74s/it][A
Train Diffusion:  51%|█████▏    | 1029/2000 [32:14<28:03,  1.73s/it][A
Train Diffusion:  52%|█████▏    | 1030/2000 [32:16<28:08,  1.74s/it][A

Moving average ELBO loss at 1030 iterations is: -20839.10703125. Best ELBO loss value is: -21335.2890625.

C_PATH mean = tensor([[36.6741,  0.1602,  0.6800],
        [36.7816,  0.1608,  0.6686]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5783e+01, 1.5553e-01, 1.8570e-01],
         [4.6220e+01, 2.0544e-04, 3.8122e-01],
         ...,
         [2.3624e+01, 1.5184e-01, 8.8088e-01],
         [2.3422e+01, 9.5438e-02, 7.2327e-01],
         [1.7613e+01, 1.1581e-01, 5.7464e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.6431e+01, 2.5039e-02, 9.3412e-02],
         [4.6533e+01, 5.2079e-07, 1.0671e-01],
         ...,
         [2.4167e+01, 9.1200e-02, 5.6448e-01],
         [2.2160e+01, 1.0439e-01, 6.9877e-01],
         [1.9909e+01, 8.9131e-02, 8.1758e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  52%|█████▏    | 1031/2000 [32:18<28:06,  1.74s/it][A
Train Diffusion:  52%|█████▏    | 1032/2000 [32:20<28:02,  1.74s/it][A
Train Diffusion:  52%|█████▏    | 1033/2000 [32:21<27:59,  1.74s/it][A
Train Diffusion:  52%|█████▏    | 1034/2000 [32:23<27:56,  1.74s/it][A
Train Diffusion:  52%|█████▏    | 1035/2000 [32:25<28:09,  1.75s/it][A
Train Diffusion:  52%|█████▏    | 1036/2000 [32:27<28:08,  1.75s/it][A
Train Diffusion:  52%|█████▏    | 1037/2000 [32:28<28:02,  1.75s/it][A
Train Diffusion:  52%|█████▏    | 1038/2000 [32:30<27:58,  1.74s/it][A
Train Diffusion:  52%|█████▏    | 1039/2000 [32:32<27:55,  1.74s/it][A
Train Diffusion:  52%|█████▏    | 1040/2000 [32:34<27:55,  1.74s/it][A

Moving average ELBO loss at 1040 iterations is: -20292.980078125. Best ELBO loss value is: -21335.2890625.

C_PATH mean = tensor([[36.7892,  0.1542,  0.6908],
        [36.7625,  0.1522,  0.7021]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.6279e+01, 3.7077e-02, 9.9619e-02],
         [4.6394e+01, 5.3538e-04, 1.0813e-01],
         ...,
         [2.3358e+01, 8.2353e-02, 6.3984e-01],
         [2.3639e+01, 1.0989e-01, 5.5007e-01],
         [2.1261e+01, 9.0731e-02, 5.1847e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5630e+01, 1.5866e-01, 1.5984e-01],
         [4.5841e+01, 8.2563e-08, 3.6277e-01],
         ...,
         [2.5091e+01, 1.4273e-01, 7.9004e-01],
         [2.3131e+01, 8.3776e-02, 8.0471e-01],
         [1.8247e+01, 9.8744e-02, 8.8435e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  52%|█████▏    | 1041/2000 [32:35<27:54,  1.75s/it][A
Train Diffusion:  52%|█████▏    | 1042/2000 [32:37<27:49,  1.74s/it][A
Train Diffusion:  52%|█████▏    | 1043/2000 [32:39<27:44,  1.74s/it][A
Train Diffusion:  52%|█████▏    | 1044/2000 [32:41<27:40,  1.74s/it][A
Train Diffusion:  52%|█████▏    | 1045/2000 [32:42<27:37,  1.74s/it][A
Train Diffusion:  52%|█████▏    | 1046/2000 [32:44<27:34,  1.73s/it][A
Train Diffusion:  52%|█████▏    | 1047/2000 [32:46<27:32,  1.73s/it][A
Train Diffusion:  52%|█████▏    | 1048/2000 [32:47<27:28,  1.73s/it][A
Train Diffusion:  52%|█████▏    | 1049/2000 [32:49<27:26,  1.73s/it][A
Train Diffusion:  52%|█████▎    | 1050/2000 [32:51<27:24,  1.73s/it][A

Moving average ELBO loss at 1050 iterations is: -19988.0578125. Best ELBO loss value is: -21335.2890625.

C_PATH mean = tensor([[36.8683,  0.1617,  0.7053],
        [36.7904,  0.1543,  0.6839]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.6679e+01, 1.2821e-02, 1.6197e-01],
         [4.6308e+01, 1.7127e-07, 2.3233e-01],
         ...,
         [2.5191e+01, 1.3596e-01, 4.1531e-01],
         [2.3278e+01, 8.3565e-02, 5.9435e-01],
         [2.0782e+01, 9.7927e-02, 6.2880e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.6016e+01, 1.1210e-01, 1.0473e-01],
         [4.6225e+01, 1.0145e-04, 2.1066e-01],
         ...,
         [2.3466e+01, 9.5279e-02, 9.4806e-01],
         [2.3601e+01, 1.3284e-01, 6.6302e-01],
         [1.8176e+01, 1.0223e-01, 6.1566e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  53%|█████▎    | 1051/2000 [32:53<27:25,  1.73s/it][A
Train Diffusion:  53%|█████▎    | 1052/2000 [32:54<27:21,  1.73s/it][A
Train Diffusion:  53%|█████▎    | 1053/2000 [32:56<27:25,  1.74s/it][A
Train Diffusion:  53%|█████▎    | 1054/2000 [32:58<27:35,  1.75s/it][A
Train Diffusion:  53%|█████▎    | 1055/2000 [33:00<27:29,  1.75s/it][A
Train Diffusion:  53%|█████▎    | 1056/2000 [33:01<27:26,  1.74s/it][A
Train Diffusion:  53%|█████▎    | 1057/2000 [33:03<27:25,  1.75s/it][A
Train Diffusion:  53%|█████▎    | 1058/2000 [33:05<27:19,  1.74s/it][A
Train Diffusion:  53%|█████▎    | 1059/2000 [33:07<27:13,  1.74s/it][A
Train Diffusion:  53%|█████▎    | 1060/2000 [33:08<27:08,  1.73s/it][A

Moving average ELBO loss at 1060 iterations is: -20323.136328125. Best ELBO loss value is: -21335.2890625.

C_PATH mean = tensor([[36.8633,  0.1600,  0.6808],
        [36.8550,  0.1609,  0.6737]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5842e+01, 8.5559e-02, 1.6565e-01],
         [4.5814e+01, 4.6752e-08, 2.4141e-01],
         ...,
         [2.4981e+01, 1.3114e-01, 8.1730e-01],
         [2.3340e+01, 8.2614e-02, 6.8532e-01],
         [1.8422e+01, 1.0013e-01, 6.3396e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5313e+01, 1.9056e-02, 1.1190e-01],
         [4.5859e+01, 3.2606e-04, 2.2080e-01],
         ...,
         [2.4076e+01, 1.0055e-01, 5.6485e-01],
         [2.4116e+01, 1.3946e-01, 6.8787e-01],
         [2.1228e+01, 1.0845e-01, 7.8667e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  53%|█████▎    | 1061/2000 [33:10<27:04,  1.73s/it][A
Train Diffusion:  53%|█████▎    | 1062/2000 [33:12<27:04,  1.73s/it][A
Train Diffusion:  53%|█████▎    | 1063/2000 [33:14<27:01,  1.73s/it][A
Train Diffusion:  53%|█████▎    | 1064/2000 [33:15<27:00,  1.73s/it][A
Train Diffusion:  53%|█████▎    | 1065/2000 [33:17<26:58,  1.73s/it][A
Train Diffusion:  53%|█████▎    | 1066/2000 [33:19<26:55,  1.73s/it][A
Train Diffusion:  53%|█████▎    | 1067/2000 [33:20<26:51,  1.73s/it][A
Train Diffusion:  53%|█████▎    | 1068/2000 [33:22<26:49,  1.73s/it][A
Train Diffusion:  53%|█████▎    | 1069/2000 [33:24<26:45,  1.72s/it][A
Train Diffusion:  54%|█████▎    | 1070/2000 [33:26<26:48,  1.73s/it][A

Moving average ELBO loss at 1070 iterations is: -20819.4568359375. Best ELBO loss value is: -21335.2890625.

C_PATH mean = tensor([[36.8883,  0.1588,  0.6568],
        [36.9179,  0.1536,  0.6745]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.6184e+01, 5.0653e-02, 9.6512e-02],
         [4.5778e+01, 2.8798e-07, 1.0491e-01],
         ...,
         [2.4866e+01, 1.3002e-01, 8.1749e-01],
         [2.4767e+01, 1.0998e-01, 6.2479e-01],
         [2.2963e+01, 8.5877e-02, 7.6279e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5212e+01, 1.9400e-01, 2.2695e-01],
         [4.5672e+01, 1.4206e-04, 4.2243e-01],
         ...,
         [2.4396e+01, 9.3099e-02, 5.4442e-01],
         [2.3401e+01, 8.2851e-02, 7.4834e-01],
         [1.9157e+01, 1.3490e-01, 7.0403e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  54%|█████▎    | 1071/2000 [33:27<26:48,  1.73s/it][A
Train Diffusion:  54%|█████▎    | 1072/2000 [33:29<26:59,  1.74s/it][A
Train Diffusion:  54%|█████▎    | 1073/2000 [33:31<27:00,  1.75s/it][A
Train Diffusion:  54%|█████▎    | 1074/2000 [33:33<26:56,  1.75s/it][A
Train Diffusion:  54%|█████▍    | 1075/2000 [33:34<27:01,  1.75s/it][A
Train Diffusion:  54%|█████▍    | 1076/2000 [33:36<26:53,  1.75s/it][A
Train Diffusion:  54%|█████▍    | 1077/2000 [33:38<26:45,  1.74s/it][A
Train Diffusion:  54%|█████▍    | 1078/2000 [33:40<26:41,  1.74s/it][A
Train Diffusion:  54%|█████▍    | 1079/2000 [33:41<26:36,  1.73s/it][A
Train Diffusion:  54%|█████▍    | 1080/2000 [33:43<26:32,  1.73s/it][A

Moving average ELBO loss at 1080 iterations is: -20987.0541015625. Best ELBO loss value is: -21517.107421875.

C_PATH mean = tensor([[36.9022,  0.1561,  0.7139],
        [36.9739,  0.1507,  0.7120]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5674e+01, 9.2570e-03, 1.6933e-01],
         [4.4833e+01, 1.8817e-04, 3.6799e-01],
         ...,
         [2.4735e+01, 1.4772e-01, 4.5703e-01],
         [2.3713e+01, 7.8832e-02, 6.4941e-01],
         [2.2560e+01, 1.0171e-01, 7.9532e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4925e+01, 1.4813e-01, 9.8292e-02],
         [4.5167e+01, 2.0372e-08, 1.0985e-01],
         ...,
         [2.5234e+01, 8.1192e-02, 1.0233e+00],
         [2.5153e+01, 1.1337e-01, 7.1610e-01],
         [2.0376e+01, 8.4329e-02, 6.1376e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  54%|█████▍    | 1081/2000 [33:45<26:29,  1.73s/it][A
Train Diffusion:  54%|█████▍    | 1082/2000 [33:46<26:27,  1.73s/it][A
Train Diffusion:  54%|█████▍    | 1083/2000 [33:48<26:26,  1.73s/it][A
Train Diffusion:  54%|█████▍    | 1084/2000 [33:50<26:23,  1.73s/it][A
Train Diffusion:  54%|█████▍    | 1085/2000 [33:52<26:21,  1.73s/it][A
Train Diffusion:  54%|█████▍    | 1086/2000 [33:53<26:21,  1.73s/it][A
Train Diffusion:  54%|█████▍    | 1087/2000 [33:55<26:20,  1.73s/it][A
Train Diffusion:  54%|█████▍    | 1088/2000 [33:57<26:20,  1.73s/it][A
Train Diffusion:  54%|█████▍    | 1089/2000 [33:59<26:17,  1.73s/it][A
Train Diffusion:  55%|█████▍    | 1090/2000 [34:00<26:15,  1.73s/it][A

Moving average ELBO loss at 1090 iterations is: -21444.0333984375. Best ELBO loss value is: -21864.0.

C_PATH mean = tensor([[36.9739,  0.1587,  0.6824],
        [36.9644,  0.1549,  0.7034]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.6090e+01, 8.3357e-03, 1.0311e-01],
         [4.5697e+01, 4.5747e-08, 1.8484e-01],
         ...,
         [2.5524e+01, 8.5326e-02, 6.7557e-01],
         [2.4370e+01, 7.3414e-02, 5.3854e-01],
         [2.2949e+01, 1.2406e-01, 5.2605e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5276e+01, 8.4823e-02, 2.0276e-01],
         [4.5400e+01, 6.4976e-05, 2.5355e-01],
         ...,
         [2.4609e+01, 1.3400e-01, 7.7683e-01],
         [2.5083e+01, 1.1206e-01, 7.6410e-01],
         [2.0089e+01, 8.6311e-02, 8.1079e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  55%|█████▍    | 1091/2000 [34:02<26:24,  1.74s/it][A
Train Diffusion:  55%|█████▍    | 1092/2000 [34:04<26:29,  1.75s/it][A
Train Diffusion:  55%|█████▍    | 1093/2000 [34:06<26:22,  1.74s/it][A
Train Diffusion:  55%|█████▍    | 1094/2000 [34:07<26:17,  1.74s/it][A
Train Diffusion:  55%|█████▍    | 1095/2000 [34:09<26:13,  1.74s/it][A
Train Diffusion:  55%|█████▍    | 1096/2000 [34:11<26:10,  1.74s/it][A
Train Diffusion:  55%|█████▍    | 1097/2000 [34:13<26:07,  1.74s/it][A
Train Diffusion:  55%|█████▍    | 1098/2000 [34:14<26:02,  1.73s/it][A
Train Diffusion:  55%|█████▍    | 1099/2000 [34:16<25:58,  1.73s/it][A
Train Diffusion:  55%|█████▌    | 1100/2000 [34:18<26:01,  1.74s/it][A

Moving average ELBO loss at 1100 iterations is: -22040.707421875. Best ELBO loss value is: -22311.19921875.

C_PATH mean = tensor([[36.9594,  0.1608,  0.6965],
        [37.0203,  0.1626,  0.6829]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4762e+01, 1.8378e-01, 2.0733e-01],
         [4.5243e+01, 8.0599e-05, 4.4105e-01],
         ...,
         [2.4335e+01, 8.6269e-02, 5.0060e-01],
         [2.5087e+01, 7.4212e-02, 4.1858e-01],
         [2.1045e+01, 1.1638e-01, 4.2900e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5382e+01, 1.6630e-02, 1.0161e-01],
         [4.5441e+01, 5.3355e-08, 1.0329e-01],
         ...,
         [2.6021e+01, 1.5209e-01, 8.9760e-01],
         [2.5102e+01, 1.2194e-01, 8.5394e-01],
         [2.4119e+01, 9.5447e-02, 9.4834e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  55%|█████▌    | 1101/2000 [34:19<25:58,  1.73s/it][A
Train Diffusion:  55%|█████▌    | 1102/2000 [34:21<26:01,  1.74s/it][A
Train Diffusion:  55%|█████▌    | 1103/2000 [34:23<25:59,  1.74s/it][A
Train Diffusion:  55%|█████▌    | 1104/2000 [34:25<25:55,  1.74s/it][A
Train Diffusion:  55%|█████▌    | 1105/2000 [34:26<25:54,  1.74s/it][A
Train Diffusion:  55%|█████▌    | 1106/2000 [34:28<25:50,  1.73s/it][A
Train Diffusion:  55%|█████▌    | 1107/2000 [34:30<25:49,  1.74s/it][A
Train Diffusion:  55%|█████▌    | 1108/2000 [34:32<25:44,  1.73s/it][A
Train Diffusion:  55%|█████▌    | 1109/2000 [34:33<25:53,  1.74s/it][A
Train Diffusion:  56%|█████▌    | 1110/2000 [34:35<25:58,  1.75s/it][A

Moving average ELBO loss at 1110 iterations is: -22220.5244140625. Best ELBO loss value is: -22809.26953125.

C_PATH mean = tensor([[36.9981,  0.1599,  0.6646],
        [37.0985,  0.1588,  0.6764]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5318e+01, 2.0914e-01, 1.0705e-01],
         [4.5353e+01, 8.2788e-05, 1.1416e-01],
         ...,
         [2.5098e+01, 8.3676e-02, 4.1990e-01],
         [2.5829e+01, 1.1103e-01, 6.0419e-01],
         [2.1577e+01, 8.8924e-02, 8.1534e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4805e+01, 1.8448e-02, 1.7317e-01],
         [4.5279e+01, 3.8358e-08, 3.8626e-01],
         ...,
         [2.6023e+01, 1.5293e-01, 9.2872e-01],
         [2.5072e+01, 8.6468e-02, 6.6605e-01],
         [2.4744e+01, 1.0493e-01, 6.3520e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  56%|█████▌    | 1111/2000 [34:37<25:53,  1.75s/it][A
Train Diffusion:  56%|█████▌    | 1112/2000 [34:39<25:47,  1.74s/it][A
Train Diffusion:  56%|█████▌    | 1113/2000 [34:40<25:42,  1.74s/it][A
Train Diffusion:  56%|█████▌    | 1114/2000 [34:42<25:36,  1.73s/it][A
Train Diffusion:  56%|█████▌    | 1115/2000 [34:44<25:33,  1.73s/it][A
Train Diffusion:  56%|█████▌    | 1116/2000 [34:46<25:32,  1.73s/it][A
Train Diffusion:  56%|█████▌    | 1117/2000 [34:47<25:30,  1.73s/it][A
Train Diffusion:  56%|█████▌    | 1118/2000 [34:49<25:27,  1.73s/it][A
Train Diffusion:  56%|█████▌    | 1119/2000 [34:51<25:27,  1.73s/it][A
Train Diffusion:  56%|█████▌    | 1120/2000 [34:52<25:22,  1.73s/it][A

Moving average ELBO loss at 1120 iterations is: -22102.7591796875. Best ELBO loss value is: -22809.26953125.

C_PATH mean = tensor([[37.1415,  0.1565,  0.6739],
        [37.0307,  0.1599,  0.6954]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4573e+01, 1.0254e-02, 1.7756e-01],
         [4.4981e+01, 2.1021e-08, 2.5750e-01],
         ...,
         [2.6597e+01, 9.1181e-02, 1.0012e+00],
         [2.5676e+01, 6.9708e-02, 7.2887e-01],
         [2.4939e+01, 1.0351e-01, 6.5750e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5235e+01, 1.3151e-01, 1.0597e-01],
         [4.5112e+01, 4.7241e-05, 2.0516e-01],
         ...,
         [2.4962e+01, 1.5343e-01, 3.7599e-01],
         [2.5857e+01, 1.3719e-01, 5.8689e-01],
         [2.1779e+01, 9.7533e-02, 7.9533e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  56%|█████▌    | 1121/2000 [34:54<25:21,  1.73s/it][A
Train Diffusion:  56%|█████▌    | 1122/2000 [34:56<25:19,  1.73s/it][A
Train Diffusion:  56%|█████▌    | 1123/2000 [34:58<25:17,  1.73s/it][A
Train Diffusion:  56%|█████▌    | 1124/2000 [34:59<25:16,  1.73s/it][A
Train Diffusion:  56%|█████▋    | 1125/2000 [35:01<25:13,  1.73s/it][A
Train Diffusion:  56%|█████▋    | 1126/2000 [35:03<25:14,  1.73s/it][A
Train Diffusion:  56%|█████▋    | 1127/2000 [35:05<25:11,  1.73s/it][A
Train Diffusion:  56%|█████▋    | 1128/2000 [35:06<25:18,  1.74s/it][A
Train Diffusion:  56%|█████▋    | 1129/2000 [35:08<25:21,  1.75s/it][A
Train Diffusion:  56%|█████▋    | 1130/2000 [35:10<25:13,  1.74s/it][A

Moving average ELBO loss at 1130 iterations is: -22369.7345703125. Best ELBO loss value is: -22809.26953125.

C_PATH mean = tensor([[37.0585,  0.1537,  0.6837],
        [37.1289,  0.1615,  0.6751]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5466e+01, 1.4565e-02, 9.9929e-02],
         [4.5334e+01, 1.5652e-04, 1.0514e-01],
         ...,
         [2.5276e+01, 1.2215e-01, 8.7880e-01],
         [2.6082e+01, 1.2614e-01, 6.8117e-01],
         [2.5487e+01, 8.7127e-02, 6.3994e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.6479e+01, 1.0648e-01, 1.9129e-01],
         [4.5543e+01, 4.4255e-09, 4.1534e-01],
         ...,
         [2.6736e+01, 1.1008e-01, 5.8763e-01],
         [2.6193e+01, 7.2319e-02, 6.7102e-01],
         [2.2324e+01, 1.1015e-01, 8.0036e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  57%|█████▋    | 1131/2000 [35:12<25:10,  1.74s/it][A
Train Diffusion:  57%|█████▋    | 1132/2000 [35:13<25:09,  1.74s/it][A
Train Diffusion:  57%|█████▋    | 1133/2000 [35:15<25:07,  1.74s/it][A
Train Diffusion:  57%|█████▋    | 1134/2000 [35:17<25:03,  1.74s/it][A
Train Diffusion:  57%|█████▋    | 1135/2000 [35:19<25:01,  1.74s/it][A
Train Diffusion:  57%|█████▋    | 1136/2000 [35:20<24:58,  1.73s/it][A
Train Diffusion:  57%|█████▋    | 1137/2000 [35:22<25:05,  1.74s/it][A
Train Diffusion:  57%|█████▋    | 1138/2000 [35:24<25:01,  1.74s/it][A
Train Diffusion:  57%|█████▋    | 1139/2000 [35:25<24:55,  1.74s/it][A
Train Diffusion:  57%|█████▋    | 1140/2000 [35:27<24:57,  1.74s/it][A

Moving average ELBO loss at 1140 iterations is: -22684.306640625. Best ELBO loss value is: -23484.353515625.

C_PATH mean = tensor([[37.1074,  0.1563,  0.6988],
        [37.1307,  0.1539,  0.6931]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5458e+01, 6.2776e-02, 1.7336e-01],
         [4.5489e+01, 1.7543e-09, 3.9119e-01],
         ...,
         [2.7508e+01, 1.4073e-01, 6.0274e-01],
         [2.6660e+01, 7.7208e-02, 6.9180e-01],
         [2.2893e+01, 1.0178e-01, 6.8311e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.6293e+01, 7.2224e-03, 1.0699e-01],
         [4.5410e+01, 9.8738e-05, 1.1588e-01],
         ...,
         [2.5190e+01, 9.1363e-02, 8.9151e-01],
         [2.6339e+01, 1.2559e-01, 7.1261e-01],
         [2.6023e+01, 9.3739e-02, 8.6755e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  57%|█████▋    | 1141/2000 [35:29<24:55,  1.74s/it][A
Train Diffusion:  57%|█████▋    | 1142/2000 [35:31<24:53,  1.74s/it][A
Train Diffusion:  57%|█████▋    | 1143/2000 [35:32<24:48,  1.74s/it][A
Train Diffusion:  57%|█████▋    | 1144/2000 [35:34<24:47,  1.74s/it][A
Train Diffusion:  57%|█████▋    | 1145/2000 [35:36<24:44,  1.74s/it][A
Train Diffusion:  57%|█████▋    | 1146/2000 [35:38<24:40,  1.73s/it][A
Train Diffusion:  57%|█████▋    | 1147/2000 [35:39<24:47,  1.74s/it][A
Train Diffusion:  57%|█████▋    | 1148/2000 [35:41<24:46,  1.74s/it][A
Train Diffusion:  57%|█████▋    | 1149/2000 [35:43<24:40,  1.74s/it][A
Train Diffusion:  57%|█████▊    | 1150/2000 [35:45<24:37,  1.74s/it][A

Moving average ELBO loss at 1150 iterations is: -22282.2498046875. Best ELBO loss value is: -23484.353515625.

C_PATH mean = tensor([[37.1696,  0.1537,  0.7090],
        [37.2015,  0.1563,  0.6616]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5561e+01, 1.0445e-01, 1.0587e-01],
         [4.4788e+01, 2.1706e-09, 1.0498e-01],
         ...,
         [2.8017e+01, 1.1928e-01, 7.4303e-01],
         [2.7231e+01, 1.2420e-01, 5.9130e-01],
         [2.3776e+01, 9.3576e-02, 5.2780e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4806e+01, 1.1449e-02, 1.8358e-01],
         [4.5018e+01, 1.4057e-04, 4.1181e-01],
         ...,
         [2.5887e+01, 1.1304e-01, 7.0659e-01],
         [2.7011e+01, 7.3703e-02, 6.6512e-01],
         [2.7067e+01, 1.0647e-01, 7.4146e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  58%|█████▊    | 1151/2000 [35:46<24:36,  1.74s/it][A
Train Diffusion:  58%|█████▊    | 1152/2000 [35:48<24:32,  1.74s/it][A
Train Diffusion:  58%|█████▊    | 1153/2000 [35:50<24:28,  1.73s/it][A
Train Diffusion:  58%|█████▊    | 1154/2000 [35:52<24:26,  1.73s/it][A
Train Diffusion:  58%|█████▊    | 1155/2000 [35:53<24:23,  1.73s/it][A
Train Diffusion:  58%|█████▊    | 1156/2000 [35:55<24:21,  1.73s/it][A
Train Diffusion:  58%|█████▊    | 1157/2000 [35:57<24:21,  1.73s/it][A
Train Diffusion:  58%|█████▊    | 1158/2000 [35:58<24:20,  1.73s/it][A
Train Diffusion:  58%|█████▊    | 1159/2000 [36:00<24:18,  1.73s/it][A
Train Diffusion:  58%|█████▊    | 1160/2000 [36:02<24:16,  1.73s/it][A

Moving average ELBO loss at 1160 iterations is: -22345.3783203125. Best ELBO loss value is: -23484.353515625.

C_PATH mean = tensor([[37.1923,  0.1539,  0.6861],
        [37.3033,  0.1551,  0.6709]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4813e+01, 7.8024e-02, 1.7392e-01],
         [4.4801e+01, 1.8086e-09, 3.8948e-01],
         ...,
         [2.7836e+01, 1.5342e-01, 4.9208e-01],
         [2.8391e+01, 8.6529e-02, 6.3277e-01],
         [2.4130e+01, 1.0388e-01, 7.3643e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5826e+01, 3.4978e-03, 1.0947e-01],
         [4.4810e+01, 7.4497e-05, 1.1054e-01],
         ...,
         [2.6569e+01, 8.4586e-02, 9.8418e-01],
         [2.6116e+01, 1.1096e-01, 7.4342e-01],
         [2.6211e+01, 9.3262e-02, 6.1383e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  58%|█████▊    | 1161/2000 [36:04<24:17,  1.74s/it][A
Train Diffusion:  58%|█████▊    | 1162/2000 [36:05<24:15,  1.74s/it][A
Train Diffusion:  58%|█████▊    | 1163/2000 [36:07<24:12,  1.73s/it][A
Train Diffusion:  58%|█████▊    | 1164/2000 [36:09<24:08,  1.73s/it][A
Train Diffusion:  58%|█████▊    | 1165/2000 [36:11<24:10,  1.74s/it][A
Train Diffusion:  58%|█████▊    | 1166/2000 [36:12<24:16,  1.75s/it][A
Train Diffusion:  58%|█████▊    | 1167/2000 [36:14<24:16,  1.75s/it][A
Train Diffusion:  58%|█████▊    | 1168/2000 [36:16<24:11,  1.74s/it][A
Train Diffusion:  58%|█████▊    | 1169/2000 [36:18<24:10,  1.75s/it][A
Train Diffusion:  58%|█████▊    | 1170/2000 [36:19<24:08,  1.75s/it][A

Moving average ELBO loss at 1170 iterations is: -22785.3974609375. Best ELBO loss value is: -23484.353515625.

C_PATH mean = tensor([[37.2682,  0.1564,  0.6735],
        [37.3005,  0.1484,  0.7094]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4027e+01, 1.4618e-01, 1.0885e-01],
         [4.4165e+01, 2.2793e-09, 1.1342e-01],
         ...,
         [2.7579e+01, 1.0194e-01, 8.7890e-01],
         [2.8192e+01, 1.2783e-01, 7.1864e-01],
         [2.4609e+01, 9.2955e-02, 6.2458e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4828e+01, 6.2583e-03, 1.7438e-01],
         [4.4417e+01, 8.6685e-05, 3.7854e-01],
         ...,
         [2.7235e+01, 1.2073e-01, 6.3892e-01],
         [2.6780e+01, 7.3941e-02, 7.5689e-01],
         [2.6564e+01, 1.0285e-01, 8.1876e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  59%|█████▊    | 1171/2000 [36:21<24:04,  1.74s/it][A
Train Diffusion:  59%|█████▊    | 1172/2000 [36:23<24:01,  1.74s/it][A
Train Diffusion:  59%|█████▊    | 1173/2000 [36:25<23:55,  1.74s/it][A
Train Diffusion:  59%|█████▊    | 1174/2000 [36:26<23:59,  1.74s/it][A
Train Diffusion:  59%|█████▉    | 1175/2000 [36:28<23:55,  1.74s/it][A
Train Diffusion:  59%|█████▉    | 1176/2000 [36:30<23:51,  1.74s/it][A
Train Diffusion:  59%|█████▉    | 1177/2000 [36:32<23:49,  1.74s/it][A
Train Diffusion:  59%|█████▉    | 1178/2000 [36:33<23:51,  1.74s/it][A
Train Diffusion:  59%|█████▉    | 1179/2000 [36:35<23:44,  1.74s/it][A
Train Diffusion:  59%|█████▉    | 1180/2000 [36:37<23:41,  1.73s/it][A

Moving average ELBO loss at 1180 iterations is: -22294.1013671875. Best ELBO loss value is: -23484.353515625.

C_PATH mean = tensor([[37.2849,  0.1569,  0.7039],
        [37.2762,  0.1559,  0.7172]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.3973e+01, 7.0186e-02, 2.2156e-01],
         [4.4001e+01, 2.5563e-05, 4.2806e-01],
         ...,
         [2.6819e+01, 1.3630e-01, 6.5303e-01],
         [2.7562e+01, 1.3904e-01, 7.9284e-01],
         [2.4212e+01, 9.7167e-02, 7.2188e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4396e+01, 3.8167e-03, 8.7689e-02],
         [4.4238e+01, 2.3260e-09, 9.1534e-02],
         ...,
         [2.8331e+01, 1.0496e-01, 8.9342e-01],
         [2.7655e+01, 7.1052e-02, 7.5186e-01],
         [2.7320e+01, 1.0165e-01, 8.2421e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  59%|█████▉    | 1181/2000 [36:38<23:39,  1.73s/it][A
Train Diffusion:  59%|█████▉    | 1182/2000 [36:40<23:37,  1.73s/it][A
Train Diffusion:  59%|█████▉    | 1183/2000 [36:42<23:33,  1.73s/it][A
Train Diffusion:  59%|█████▉    | 1184/2000 [36:44<23:40,  1.74s/it][A
Train Diffusion:  59%|█████▉    | 1185/2000 [36:45<23:45,  1.75s/it][A
Train Diffusion:  59%|█████▉    | 1186/2000 [36:47<23:38,  1.74s/it][A
Train Diffusion:  59%|█████▉    | 1187/2000 [36:49<23:31,  1.74s/it][A
Train Diffusion:  59%|█████▉    | 1188/2000 [36:51<23:28,  1.73s/it][A
Train Diffusion:  59%|█████▉    | 1189/2000 [36:52<23:26,  1.73s/it][A
Train Diffusion:  60%|█████▉    | 1190/2000 [36:54<23:23,  1.73s/it][A

Moving average ELBO loss at 1190 iterations is: -21471.857421875. Best ELBO loss value is: -23484.353515625.

C_PATH mean = tensor([[37.2786,  0.1597,  0.7018],
        [37.2885,  0.1590,  0.6764]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5431e+01, 1.7625e-01, 1.1018e-01],
         [4.5260e+01, 4.8145e-05, 1.1575e-01],
         ...,
         [2.6789e+01, 1.0468e-01, 6.3401e-01],
         [2.7799e+01, 1.3199e-01, 7.1869e-01],
         [2.5286e+01, 9.5982e-02, 8.0277e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.6256e+01, 8.8550e-03, 1.8451e-01],
         [4.5188e+01, 2.9749e-09, 3.9959e-01],
         ...,
         [2.8279e+01, 1.2316e-01, 9.1019e-01],
         [2.8113e+01, 7.5449e-02, 7.4465e-01],
         [2.8353e+01, 1.0598e-01, 6.3264e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  60%|█████▉    | 1191/2000 [36:56<23:19,  1.73s/it][A
Train Diffusion:  60%|█████▉    | 1192/2000 [36:58<23:15,  1.73s/it][A
Train Diffusion:  60%|█████▉    | 1193/2000 [36:59<23:13,  1.73s/it][A
Train Diffusion:  60%|█████▉    | 1194/2000 [37:01<23:10,  1.73s/it][A
Train Diffusion:  60%|█████▉    | 1195/2000 [37:03<23:10,  1.73s/it][A
Train Diffusion:  60%|█████▉    | 1196/2000 [37:04<23:10,  1.73s/it][A
Train Diffusion:  60%|█████▉    | 1197/2000 [37:06<23:07,  1.73s/it][A
Train Diffusion:  60%|█████▉    | 1198/2000 [37:08<23:06,  1.73s/it][A
Train Diffusion:  60%|█████▉    | 1199/2000 [37:10<23:05,  1.73s/it][A
Train Diffusion:  60%|██████    | 1200/2000 [37:11<23:07,  1.73s/it][A

Moving average ELBO loss at 1200 iterations is: -22900.7564453125. Best ELBO loss value is: -23831.701171875.

C_PATH mean = tensor([[37.3597,  0.1588,  0.6872],
        [37.3664,  0.1601,  0.6801]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4632e+01, 6.0940e-03, 2.3726e-01],
         [4.4667e+01, 9.0273e-05, 2.9215e-01],
         ...,
         [2.7216e+01, 1.0255e-01, 8.8523e-01],
         [2.8125e+01, 1.2654e-01, 7.2667e-01],
         [2.8272e+01, 9.6909e-02, 6.3619e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5260e+01, 7.0885e-02, 9.0839e-02],
         [4.4428e+01, 7.2149e-10, 1.6210e-01],
         ...,
         [2.8769e+01, 1.2020e-01, 5.6970e-01],
         [2.8307e+01, 7.4920e-02, 7.3132e-01],
         [2.5237e+01, 1.0207e-01, 8.3116e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  60%|██████    | 1201/2000 [37:13<23:06,  1.74s/it][A
Train Diffusion:  60%|██████    | 1202/2000 [37:15<23:04,  1.73s/it][A
Train Diffusion:  60%|██████    | 1203/2000 [37:17<23:16,  1.75s/it][A
Train Diffusion:  60%|██████    | 1204/2000 [37:18<23:13,  1.75s/it][A
Train Diffusion:  60%|██████    | 1205/2000 [37:20<23:07,  1.74s/it][A
Train Diffusion:  60%|██████    | 1206/2000 [37:22<23:01,  1.74s/it][A
Train Diffusion:  60%|██████    | 1207/2000 [37:24<22:58,  1.74s/it][A
Train Diffusion:  60%|██████    | 1208/2000 [37:25<23:30,  1.78s/it][A
Train Diffusion:  60%|██████    | 1209/2000 [37:27<23:37,  1.79s/it][A
Train Diffusion:  60%|██████    | 1210/2000 [37:29<23:21,  1.77s/it][A

Moving average ELBO loss at 1210 iterations is: -23580.1234375. Best ELBO loss value is: -24679.23046875.

C_PATH mean = tensor([[37.3867,  0.1556,  0.7055],
        [37.3695,  0.1593,  0.6887]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5939e+01, 5.1320e-03, 1.8180e-01],
         [4.4757e+01, 7.8694e-05, 2.5082e-01],
         ...,
         [2.7751e+01, 1.1797e-01, 4.1065e-01],
         [2.7737e+01, 8.2373e-02, 6.2080e-01],
         [2.8680e+01, 1.0341e-01, 5.9047e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5168e+01, 1.4169e-01, 1.0010e-01],
         [4.5041e+01, 9.6295e-10, 2.1761e-01],
         ...,
         [2.8405e+01, 1.0871e-01, 1.0560e+00],
         [2.9325e+01, 1.0615e-01, 8.0728e-01],
         [2.6264e+01, 8.6986e-02, 8.4660e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  61%|██████    | 1211/2000 [37:31<23:12,  1.77s/it][A
Train Diffusion:  61%|██████    | 1212/2000 [37:33<23:04,  1.76s/it][A
Train Diffusion:  61%|██████    | 1213/2000 [37:34<22:57,  1.75s/it][A
Train Diffusion:  61%|██████    | 1214/2000 [37:36<22:50,  1.74s/it][A
Train Diffusion:  61%|██████    | 1215/2000 [37:38<22:47,  1.74s/it][A
Train Diffusion:  61%|██████    | 1216/2000 [37:39<22:42,  1.74s/it][A
Train Diffusion:  61%|██████    | 1217/2000 [37:41<22:38,  1.74s/it][A
Train Diffusion:  61%|██████    | 1218/2000 [37:43<22:35,  1.73s/it][A
Train Diffusion:  61%|██████    | 1219/2000 [37:45<22:34,  1.73s/it][A
Train Diffusion:  61%|██████    | 1220/2000 [37:46<22:33,  1.74s/it][A

Moving average ELBO loss at 1220 iterations is: -23184.9966796875. Best ELBO loss value is: -24679.23046875.

C_PATH mean = tensor([[37.4167,  0.1591,  0.6901],
        [37.3982,  0.1556,  0.6849]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5086e+01, 9.2551e-02, 1.0593e-01],
         [4.4838e+01, 4.3546e-05, 2.1933e-01],
         ...,
         [2.8338e+01, 1.3987e-01, 9.6121e-01],
         [2.9198e+01, 1.2976e-01, 7.3102e-01],
         [2.6087e+01, 8.9728e-02, 6.4888e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5922e+01, 4.4763e-03, 1.8958e-01],
         [4.4722e+01, 1.3968e-09, 2.4720e-01],
         ...,
         [2.8525e+01, 9.7708e-02, 4.9567e-01],
         [2.8197e+01, 6.6782e-02, 6.6200e-01],
         [2.8849e+01, 1.0554e-01, 7.6617e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  61%|██████    | 1221/2000 [37:48<22:31,  1.73s/it][A
Train Diffusion:  61%|██████    | 1222/2000 [37:50<22:27,  1.73s/it][A
Train Diffusion:  61%|██████    | 1223/2000 [37:52<22:24,  1.73s/it][A
Train Diffusion:  61%|██████    | 1224/2000 [37:53<22:22,  1.73s/it][A
Train Diffusion:  61%|██████▏   | 1225/2000 [37:55<22:21,  1.73s/it][A
Train Diffusion:  61%|██████▏   | 1226/2000 [37:57<22:19,  1.73s/it][A
Train Diffusion:  61%|██████▏   | 1227/2000 [37:59<22:27,  1.74s/it][A
Train Diffusion:  61%|██████▏   | 1228/2000 [38:00<22:29,  1.75s/it][A
Train Diffusion:  61%|██████▏   | 1229/2000 [38:02<22:34,  1.76s/it][A
Train Diffusion:  62%|██████▏   | 1230/2000 [38:04<22:27,  1.75s/it][A

Moving average ELBO loss at 1230 iterations is: -23026.048828125. Best ELBO loss value is: -24679.23046875.

C_PATH mean = tensor([[37.4729,  0.1589,  0.6738],
        [37.4266,  0.1631,  0.6879]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4156e+01, 6.9440e-03, 1.8208e-01],
         [4.3498e+01, 1.2436e-04, 4.0084e-01],
         ...,
         [2.8714e+01, 1.4341e-01, 6.2473e-01],
         [2.9396e+01, 7.6963e-02, 5.6265e-01],
         [2.9669e+01, 1.0070e-01, 5.7420e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4831e+01, 1.0608e-01, 1.1342e-01],
         [4.5064e+01, 5.2409e-10, 1.1605e-01],
         ...,
         [2.8884e+01, 8.3911e-02, 7.6101e-01],
         [2.8691e+01, 1.0870e-01, 7.6986e-01],
         [2.6405e+01, 8.6345e-02, 8.1950e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  62%|██████▏   | 1231/2000 [38:06<22:23,  1.75s/it][A
Train Diffusion:  62%|██████▏   | 1232/2000 [38:07<22:18,  1.74s/it][A
Train Diffusion:  62%|██████▏   | 1233/2000 [38:09<22:16,  1.74s/it][A
Train Diffusion:  62%|██████▏   | 1234/2000 [38:11<22:13,  1.74s/it][A
Train Diffusion:  62%|██████▏   | 1235/2000 [38:12<22:09,  1.74s/it][A
Train Diffusion:  62%|██████▏   | 1236/2000 [38:14<22:05,  1.74s/it][A
Train Diffusion:  62%|██████▏   | 1237/2000 [38:16<22:04,  1.74s/it][A
Train Diffusion:  62%|██████▏   | 1238/2000 [38:18<22:01,  1.73s/it][A
Train Diffusion:  62%|██████▏   | 1239/2000 [38:19<22:05,  1.74s/it][A
Train Diffusion:  62%|██████▏   | 1240/2000 [38:21<22:00,  1.74s/it][A

Moving average ELBO loss at 1240 iterations is: -23375.9986328125. Best ELBO loss value is: -24679.23046875.

C_PATH mean = tensor([[37.5211,  0.1577,  0.6821],
        [37.4875,  0.1544,  0.6950]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.3384e+01, 1.0520e-01, 1.1197e-01],
         [4.2904e+01, 4.0765e-05, 2.1884e-01],
         ...,
         [2.8796e+01, 1.3876e-01, 3.9883e-01],
         [2.9619e+01, 1.2520e-01, 4.0129e-01],
         [2.6657e+01, 9.2637e-02, 4.8231e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.3955e+01, 4.5586e-03, 1.8575e-01],
         [4.4609e+01, 1.0454e-09, 2.5823e-01],
         ...,
         [2.9609e+01, 9.5917e-02, 1.0321e+00],
         [2.9000e+01, 7.1203e-02, 9.5671e-01],
         [2.9826e+01, 1.0453e-01, 9.4865e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  62%|██████▏   | 1241/2000 [38:23<22:09,  1.75s/it][A
Train Diffusion:  62%|██████▏   | 1242/2000 [38:25<22:02,  1.75s/it][A
Train Diffusion:  62%|██████▏   | 1243/2000 [38:26<21:59,  1.74s/it][A
Train Diffusion:  62%|██████▏   | 1244/2000 [38:28<21:55,  1.74s/it][A
Train Diffusion:  62%|██████▏   | 1245/2000 [38:30<21:51,  1.74s/it][A
Train Diffusion:  62%|██████▏   | 1246/2000 [38:32<22:11,  1.77s/it][A
Train Diffusion:  62%|██████▏   | 1247/2000 [38:34<22:13,  1.77s/it][A
Train Diffusion:  62%|██████▏   | 1248/2000 [38:35<22:04,  1.76s/it][A
Train Diffusion:  62%|██████▏   | 1249/2000 [38:37<21:55,  1.75s/it][A
Train Diffusion:  62%|██████▎   | 1250/2000 [38:39<21:49,  1.75s/it][A

Moving average ELBO loss at 1250 iterations is: -23608.375390625. Best ELBO loss value is: -24780.7421875.

C_PATH mean = tensor([[37.5639,  0.1470,  0.7054],
        [37.4818,  0.1477,  0.7118]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5279e+01, 7.4228e-02, 2.2608e-01],
         [4.5081e+01, 3.4205e-10, 2.6981e-01],
         ...,
         [3.0408e+01, 1.1282e-01, 9.1209e-01],
         [2.9712e+01, 1.1446e-01, 7.3515e-01],
         [2.6967e+01, 8.8510e-02, 8.1317e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4315e+01, 5.0120e-03, 9.1981e-02],
         [4.3253e+01, 8.9706e-05, 1.7175e-01],
         ...,
         [2.8449e+01, 1.0969e-01, 6.3522e-01],
         [2.9370e+01, 7.3145e-02, 7.0519e-01],
         [2.9965e+01, 1.0264e-01, 6.0586e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  63%|██████▎   | 1251/2000 [38:40<21:44,  1.74s/it][A
Train Diffusion:  63%|██████▎   | 1252/2000 [38:42<21:39,  1.74s/it][A
Train Diffusion:  63%|██████▎   | 1253/2000 [38:44<21:38,  1.74s/it][A
Train Diffusion:  63%|██████▎   | 1254/2000 [38:46<21:37,  1.74s/it][A
Train Diffusion:  63%|██████▎   | 1255/2000 [38:47<21:33,  1.74s/it][A
Train Diffusion:  63%|██████▎   | 1256/2000 [38:49<21:31,  1.74s/it][A
Train Diffusion:  63%|██████▎   | 1257/2000 [38:51<21:30,  1.74s/it][A
Train Diffusion:  63%|██████▎   | 1258/2000 [38:53<21:28,  1.74s/it][A
Train Diffusion:  63%|██████▎   | 1259/2000 [38:54<21:25,  1.73s/it][A
Train Diffusion:  63%|██████▎   | 1260/2000 [38:56<21:21,  1.73s/it][A

Moving average ELBO loss at 1260 iterations is: -23442.3791015625. Best ELBO loss value is: -24780.7421875.

C_PATH mean = tensor([[37.5659,  0.1548,  0.6816],
        [37.5299,  0.1569,  0.6722]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.6989e+01, 3.2500e-03, 9.6890e-02],
         [4.5989e+01, 7.8990e-10, 9.2600e-02],
         ...,
         [3.0207e+01, 1.1283e-01, 7.6779e-01],
         [2.9672e+01, 7.5897e-02, 7.5764e-01],
         [3.0133e+01, 1.0689e-01, 6.8206e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5462e+01, 9.6777e-02, 2.3398e-01],
         [4.4117e+01, 5.1054e-05, 4.5098e-01],
         ...,
         [2.8627e+01, 1.1513e-01, 7.0265e-01],
         [2.9537e+01, 1.1864e-01, 6.0816e-01],
         [2.7078e+01, 9.3108e-02, 7.4255e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  63%|██████▎   | 1261/2000 [38:58<21:21,  1.73s/it][A
Train Diffusion:  63%|██████▎   | 1262/2000 [38:59<21:18,  1.73s/it][A
Train Diffusion:  63%|██████▎   | 1263/2000 [39:01<21:16,  1.73s/it][A
Train Diffusion:  63%|██████▎   | 1264/2000 [39:03<21:23,  1.74s/it][A
Train Diffusion:  63%|██████▎   | 1265/2000 [39:05<21:29,  1.75s/it][A
Train Diffusion:  63%|██████▎   | 1266/2000 [39:07<21:21,  1.75s/it][A
Train Diffusion:  63%|██████▎   | 1267/2000 [39:08<21:19,  1.75s/it][A
Train Diffusion:  63%|██████▎   | 1268/2000 [39:10<21:16,  1.74s/it][A
Train Diffusion:  63%|██████▎   | 1269/2000 [39:12<21:11,  1.74s/it][A
Train Diffusion:  64%|██████▎   | 1270/2000 [39:13<21:07,  1.74s/it][A

Moving average ELBO loss at 1270 iterations is: -23419.7861328125. Best ELBO loss value is: -24780.7421875.

C_PATH mean = tensor([[37.5447,  0.1593,  0.6699],
        [37.5548,  0.1577,  0.6667]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5714e+01, 5.6116e-03, 1.2294e-01],
         [4.4601e+01, 1.2721e-04, 2.4244e-01],
         ...,
         [2.8982e+01, 8.7424e-02, 7.8339e-01],
         [2.8823e+01, 1.2455e-01, 6.2708e-01],
         [3.0282e+01, 1.0211e-01, 7.8788e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4656e+01, 1.9410e-01, 1.8439e-01],
         [4.4907e+01, 5.8959e-10, 2.4883e-01],
         ...,
         [3.0091e+01, 1.4031e-01, 6.0151e-01],
         [3.0954e+01, 8.4215e-02, 7.5432e-01],
         [2.8367e+01, 1.0723e-01, 6.7260e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  64%|██████▎   | 1271/2000 [39:15<21:05,  1.74s/it][A
Train Diffusion:  64%|██████▎   | 1272/2000 [39:17<21:03,  1.74s/it][A
Train Diffusion:  64%|██████▎   | 1273/2000 [39:19<21:05,  1.74s/it][A
Train Diffusion:  64%|██████▎   | 1274/2000 [39:20<21:02,  1.74s/it][A
Train Diffusion:  64%|██████▍   | 1275/2000 [39:22<20:58,  1.74s/it][A
Train Diffusion:  64%|██████▍   | 1276/2000 [39:24<20:57,  1.74s/it][A
Train Diffusion:  64%|██████▍   | 1277/2000 [39:26<21:00,  1.74s/it][A
Train Diffusion:  64%|██████▍   | 1278/2000 [39:27<21:17,  1.77s/it][A
Train Diffusion:  64%|██████▍   | 1279/2000 [39:29<21:07,  1.76s/it][A
Train Diffusion:  64%|██████▍   | 1280/2000 [39:31<21:00,  1.75s/it][A

Moving average ELBO loss at 1280 iterations is: -23960.1119140625. Best ELBO loss value is: -24780.7421875.

C_PATH mean = tensor([[37.5314,  0.1571,  0.6863],
        [37.5845,  0.1538,  0.6847]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5753e+01, 1.4713e-03, 1.0936e-01],
         [4.5391e+01, 6.9682e-05, 1.1233e-01],
         ...,
         [2.8944e+01, 1.2719e-01, 7.4821e-01],
         [2.9846e+01, 1.2543e-01, 7.6470e-01],
         [3.0230e+01, 9.0685e-02, 6.6670e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4824e+01, 2.4264e-02, 1.9442e-01],
         [4.3661e+01, 1.9353e-10, 4.1629e-01],
         ...,
         [3.0739e+01, 1.0433e-01, 7.3640e-01],
         [3.0191e+01, 6.9009e-02, 6.1669e-01],
         [2.7196e+01, 1.0704e-01, 7.5228e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  64%|██████▍   | 1281/2000 [39:33<20:59,  1.75s/it][A
Train Diffusion:  64%|██████▍   | 1282/2000 [39:34<20:55,  1.75s/it][A
Train Diffusion:  64%|██████▍   | 1283/2000 [39:36<20:59,  1.76s/it][A
Train Diffusion:  64%|██████▍   | 1284/2000 [39:38<20:57,  1.76s/it][A
Train Diffusion:  64%|██████▍   | 1285/2000 [39:40<20:48,  1.75s/it][A
Train Diffusion:  64%|██████▍   | 1286/2000 [39:41<20:42,  1.74s/it][A
Train Diffusion:  64%|██████▍   | 1287/2000 [39:43<20:39,  1.74s/it][A
Train Diffusion:  64%|██████▍   | 1288/2000 [39:45<20:34,  1.73s/it][A
Train Diffusion:  64%|██████▍   | 1289/2000 [39:47<20:31,  1.73s/it][A
Train Diffusion:  64%|██████▍   | 1290/2000 [39:48<20:27,  1.73s/it][A

Moving average ELBO loss at 1290 iterations is: -23568.71796875. Best ELBO loss value is: -24780.7421875.

C_PATH mean = tensor([[37.5737,  0.1540,  0.7320],
        [37.5912,  0.1519,  0.7010]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.3969e+01, 6.0547e-04, 1.0506e-01],
         [4.3807e+01, 1.9573e-10, 1.0086e-01],
         ...,
         [3.0214e+01, 1.0686e-01, 5.0559e-01],
         [3.0960e+01, 8.5541e-02, 6.7743e-01],
         [2.8024e+01, 1.2677e-01, 6.1152e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4656e+01, 7.4872e-02, 2.2093e-01],
         [4.3794e+01, 7.6503e-05, 4.4053e-01],
         ...,
         [2.9895e+01, 1.2946e-01, 1.0509e+00],
         [2.9641e+01, 1.0521e-01, 8.8355e-01],
         [3.0052e+01, 8.5057e-02, 8.4114e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  65%|██████▍   | 1291/2000 [39:50<20:25,  1.73s/it][A
Train Diffusion:  65%|██████▍   | 1292/2000 [39:52<20:32,  1.74s/it][A
Train Diffusion:  65%|██████▍   | 1293/2000 [39:54<20:28,  1.74s/it][A
Train Diffusion:  65%|██████▍   | 1294/2000 [39:55<20:26,  1.74s/it][A
Train Diffusion:  65%|██████▍   | 1295/2000 [39:57<20:23,  1.73s/it][A
Train Diffusion:  65%|██████▍   | 1296/2000 [39:59<20:24,  1.74s/it][A
Train Diffusion:  65%|██████▍   | 1297/2000 [40:00<20:21,  1.74s/it][A
Train Diffusion:  65%|██████▍   | 1298/2000 [40:02<20:20,  1.74s/it][A
Train Diffusion:  65%|██████▍   | 1299/2000 [40:04<20:19,  1.74s/it][A
Train Diffusion:  65%|██████▌   | 1300/2000 [40:06<20:20,  1.74s/it][A

Moving average ELBO loss at 1300 iterations is: -24154.3544921875. Best ELBO loss value is: -24866.123046875.

C_PATH mean = tensor([[37.6099,  0.1587,  0.6949],
        [37.5804,  0.1553,  0.6819]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5653e+01, 3.7924e-04, 1.2177e-01],
         [4.4665e+01, 2.9577e-05, 1.2070e-01],
         ...,
         [2.9855e+01, 9.9426e-02, 1.0571e+00],
         [3.0536e+01, 1.1851e-01, 9.8686e-01],
         [2.7783e+01, 9.5930e-02, 9.5891e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4797e+01, 7.3307e-02, 1.8225e-01],
         [4.4750e+01, 3.6406e-10, 4.0845e-01],
         ...,
         [2.9955e+01, 1.3535e-01, 4.4569e-01],
         [2.9921e+01, 8.4932e-02, 4.7088e-01],
         [3.0666e+01, 1.0730e-01, 5.1807e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  65%|██████▌   | 1301/2000 [40:08<20:31,  1.76s/it][A
Train Diffusion:  65%|██████▌   | 1302/2000 [40:09<21:07,  1.82s/it][A
Train Diffusion:  65%|██████▌   | 1303/2000 [40:11<20:54,  1.80s/it][A
Train Diffusion:  65%|██████▌   | 1304/2000 [40:13<20:44,  1.79s/it][A
Train Diffusion:  65%|██████▌   | 1305/2000 [40:15<20:29,  1.77s/it][A
Train Diffusion:  65%|██████▌   | 1306/2000 [40:16<20:20,  1.76s/it][A
Train Diffusion:  65%|██████▌   | 1307/2000 [40:18<20:13,  1.75s/it][A
Train Diffusion:  65%|██████▌   | 1308/2000 [40:20<20:14,  1.75s/it][A
Train Diffusion:  65%|██████▌   | 1309/2000 [40:22<20:08,  1.75s/it][A
Train Diffusion:  66%|██████▌   | 1310/2000 [40:23<20:03,  1.74s/it][A

Moving average ELBO loss at 1310 iterations is: -24557.1267578125. Best ELBO loss value is: -25387.611328125.

C_PATH mean = tensor([[37.6358,  0.1560,  0.6914],
        [37.6942,  0.1517,  0.6999]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4642e+01, 3.5977e-04, 8.3172e-02],
         [4.3987e+01, 1.6421e-10, 1.6153e-01],
         ...,
         [3.0281e+01, 1.3308e-01, 1.0689e+00],
         [3.0049e+01, 7.2345e-02, 8.4223e-01],
         [2.7755e+01, 9.7701e-02, 8.6229e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.3919e+01, 9.1876e-02, 2.4569e-01],
         [4.4011e+01, 1.1843e-04, 2.8493e-01],
         ...,
         [3.0025e+01, 9.4735e-02, 4.5054e-01],
         [3.0932e+01, 1.1947e-01, 6.4653e-01],
         [3.1158e+01, 9.3669e-02, 6.2302e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  66%|██████▌   | 1311/2000 [40:25<20:09,  1.75s/it][A
Train Diffusion:  66%|██████▌   | 1312/2000 [40:27<20:16,  1.77s/it][A
Train Diffusion:  66%|██████▌   | 1313/2000 [40:29<20:07,  1.76s/it][A
Train Diffusion:  66%|██████▌   | 1314/2000 [40:30<19:58,  1.75s/it][A
Train Diffusion:  66%|██████▌   | 1315/2000 [40:32<19:53,  1.74s/it][A
Train Diffusion:  66%|██████▌   | 1316/2000 [40:34<19:52,  1.74s/it][A
Train Diffusion:  66%|██████▌   | 1317/2000 [40:36<19:49,  1.74s/it][A
Train Diffusion:  66%|██████▌   | 1318/2000 [40:37<19:50,  1.75s/it][A
Train Diffusion:  66%|██████▌   | 1319/2000 [40:39<20:23,  1.80s/it][A
Train Diffusion:  66%|██████▌   | 1320/2000 [40:41<20:28,  1.81s/it][A

Moving average ELBO loss at 1320 iterations is: -24730.3072265625. Best ELBO loss value is: -25421.9453125.

C_PATH mean = tensor([[37.6277,  0.1624,  0.7150],
        [37.6773,  0.1473,  0.6858]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5324e+01, 1.1039e-03, 9.5594e-02],
         [4.4458e+01, 8.1492e-05, 9.5844e-02],
         ...,
         [2.9654e+01, 9.8057e-02, 6.5813e-01],
         [3.0546e+01, 7.2921e-02, 7.8838e-01],
         [3.1018e+01, 1.1339e-01, 8.2930e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4540e+01, 2.6111e-02, 2.2937e-01],
         [4.4324e+01, 1.7191e-10, 4.4803e-01],
         ...,
         [3.0384e+01, 1.3671e-01, 8.3972e-01],
         [3.0288e+01, 1.2001e-01, 6.5611e-01],
         [2.8007e+01, 9.0009e-02, 6.2014e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  66%|██████▌   | 1321/2000 [40:43<20:23,  1.80s/it][A
Train Diffusion:  66%|██████▌   | 1322/2000 [40:45<20:05,  1.78s/it][A
Train Diffusion:  66%|██████▌   | 1323/2000 [40:46<19:54,  1.76s/it][A
Train Diffusion:  66%|██████▌   | 1324/2000 [40:48<19:45,  1.75s/it][A
Train Diffusion:  66%|██████▋   | 1325/2000 [40:50<19:38,  1.75s/it][A
Train Diffusion:  66%|██████▋   | 1326/2000 [40:52<19:32,  1.74s/it][A
Train Diffusion:  66%|██████▋   | 1327/2000 [40:53<19:28,  1.74s/it][A
Train Diffusion:  66%|██████▋   | 1328/2000 [40:55<19:24,  1.73s/it][A
Train Diffusion:  66%|██████▋   | 1329/2000 [40:57<19:19,  1.73s/it][A
Train Diffusion:  66%|██████▋   | 1330/2000 [40:58<19:16,  1.73s/it][A

Moving average ELBO loss at 1330 iterations is: -24855.36640625. Best ELBO loss value is: -25571.94140625.

C_PATH mean = tensor([[37.6933,  0.1637,  0.6799],
        [37.6549,  0.1530,  0.6837]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5271e+01, 5.5669e-04, 1.0084e-01],
         [4.5165e+01, 2.0715e-10, 1.8194e-01],
         ...,
         [3.0078e+01, 1.2431e-01, 5.7775e-01],
         [3.0080e+01, 8.1055e-02, 5.2125e-01],
         [2.8146e+01, 1.1556e-01, 5.9701e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.6288e+01, 2.2849e-01, 2.4040e-01],
         [4.5062e+01, 2.7353e-04, 2.8623e-01],
         ...,
         [2.9778e+01, 1.0965e-01, 8.3935e-01],
         [3.0692e+01, 1.1825e-01, 7.9887e-01],
         [3.1783e+01, 9.4133e-02, 8.2635e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  67%|██████▋   | 1331/2000 [41:00<19:15,  1.73s/it][A
Train Diffusion:  67%|██████▋   | 1332/2000 [41:02<19:13,  1.73s/it][A
Train Diffusion:  67%|██████▋   | 1333/2000 [41:04<19:14,  1.73s/it][A
Train Diffusion:  67%|██████▋   | 1334/2000 [41:05<19:11,  1.73s/it][A
Train Diffusion:  67%|██████▋   | 1335/2000 [41:07<19:08,  1.73s/it][A
Train Diffusion:  67%|██████▋   | 1336/2000 [41:09<19:07,  1.73s/it][A
Train Diffusion:  67%|██████▋   | 1337/2000 [41:11<19:09,  1.73s/it][A
Train Diffusion:  67%|██████▋   | 1338/2000 [41:12<19:08,  1.73s/it][A
Train Diffusion:  67%|██████▋   | 1339/2000 [41:14<19:05,  1.73s/it][A
Train Diffusion:  67%|██████▋   | 1340/2000 [41:16<19:03,  1.73s/it][A

Moving average ELBO loss at 1340 iterations is: -24736.7625. Best ELBO loss value is: -25571.94140625.

C_PATH mean = tensor([[37.6593,  0.1664,  0.6868],
        [37.7164,  0.1558,  0.6888]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5773e+01, 2.5119e-04, 1.2092e-01],
         [4.4686e+01, 3.2448e-05, 2.4684e-01],
         ...,
         [3.0437e+01, 1.0055e-01, 4.2252e-01],
         [3.0295e+01, 1.3900e-01, 6.8135e-01],
         [2.7913e+01, 1.0385e-01, 6.4695e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5047e+01, 9.9841e-02, 1.8151e-01],
         [4.4733e+01, 4.7208e-10, 2.4663e-01],
         ...,
         [2.9826e+01, 1.3367e-01, 1.0099e+00],
         [3.0689e+01, 7.7638e-02, 7.4739e-01],
         [3.1419e+01, 1.0137e-01, 8.2939e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  67%|██████▋   | 1341/2000 [41:18<19:02,  1.73s/it][A
Train Diffusion:  67%|██████▋   | 1342/2000 [41:19<18:59,  1.73s/it][A
Train Diffusion:  67%|██████▋   | 1343/2000 [41:21<18:55,  1.73s/it][A
Train Diffusion:  67%|██████▋   | 1344/2000 [41:23<19:05,  1.75s/it][A
Train Diffusion:  67%|██████▋   | 1345/2000 [41:25<19:12,  1.76s/it][A
Train Diffusion:  67%|██████▋   | 1346/2000 [41:26<19:03,  1.75s/it][A
Train Diffusion:  67%|██████▋   | 1347/2000 [41:28<18:58,  1.74s/it][A
Train Diffusion:  67%|██████▋   | 1348/2000 [41:30<18:54,  1.74s/it][A
Train Diffusion:  67%|██████▋   | 1349/2000 [41:31<18:49,  1.73s/it][A
Train Diffusion:  68%|██████▊   | 1350/2000 [41:33<18:49,  1.74s/it][A

Moving average ELBO loss at 1350 iterations is: -24558.698046875. Best ELBO loss value is: -25628.205078125.

C_PATH mean = tensor([[37.7458,  0.1581,  0.6806],
        [37.6949,  0.1560,  0.6841]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4857e+01, 6.0379e-04, 1.0868e-01],
         [4.4644e+01, 1.8551e-10, 1.1948e-01],
         ...,
         [3.0543e+01, 1.0915e-01, 6.3238e-01],
         [3.0417e+01, 1.2001e-01, 7.4808e-01],
         [2.8721e+01, 9.2506e-02, 7.9044e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5673e+01, 1.3342e-01, 1.9995e-01],
         [4.4597e+01, 2.2217e-04, 4.0976e-01],
         ...,
         [2.9790e+01, 1.1817e-01, 9.0077e-01],
         [3.0727e+01, 7.6454e-02, 7.1390e-01],
         [3.1411e+01, 1.0788e-01, 6.3928e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  68%|██████▊   | 1351/2000 [41:35<18:45,  1.73s/it][A
Train Diffusion:  68%|██████▊   | 1352/2000 [41:37<18:42,  1.73s/it][A
Train Diffusion:  68%|██████▊   | 1353/2000 [41:38<18:40,  1.73s/it][A
Train Diffusion:  68%|██████▊   | 1354/2000 [41:40<18:37,  1.73s/it][A
Train Diffusion:  68%|██████▊   | 1355/2000 [41:42<18:35,  1.73s/it][A
Train Diffusion:  68%|██████▊   | 1356/2000 [41:44<18:34,  1.73s/it][A
Train Diffusion:  68%|██████▊   | 1357/2000 [41:45<18:30,  1.73s/it][A
Train Diffusion:  68%|██████▊   | 1358/2000 [41:47<18:28,  1.73s/it][A
Train Diffusion:  68%|██████▊   | 1359/2000 [41:49<18:25,  1.73s/it][A
Train Diffusion:  68%|██████▊   | 1360/2000 [41:50<18:23,  1.72s/it][A

Moving average ELBO loss at 1360 iterations is: -25051.3427734375. Best ELBO loss value is: -25841.23046875.

C_PATH mean = tensor([[37.7430,  0.1639,  0.6413],
        [37.7788,  0.1613,  0.7324]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4937e+01, 5.4075e-04, 2.1896e-01],
         [4.4620e+01, 4.5771e-05, 4.3996e-01],
         ...,
         [3.0455e+01, 1.0372e-01, 6.4768e-01],
         [3.0284e+01, 7.7762e-02, 5.5534e-01],
         [2.8385e+01, 1.2033e-01, 5.1896e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5980e+01, 1.2622e-01, 1.0941e-01],
         [4.4587e+01, 6.0468e-10, 1.1012e-01],
         ...,
         [3.0234e+01, 1.2370e-01, 9.0680e-01],
         [3.1063e+01, 1.2020e-01, 9.2213e-01],
         [3.1666e+01, 9.4346e-02, 9.2184e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  68%|██████▊   | 1361/2000 [41:52<18:22,  1.73s/it][A
Train Diffusion:  68%|██████▊   | 1362/2000 [41:54<18:19,  1.72s/it][A
Train Diffusion:  68%|██████▊   | 1363/2000 [41:56<18:29,  1.74s/it][A
Train Diffusion:  68%|██████▊   | 1364/2000 [41:57<18:28,  1.74s/it][A
Train Diffusion:  68%|██████▊   | 1365/2000 [41:59<18:21,  1.74s/it][A
Train Diffusion:  68%|██████▊   | 1366/2000 [42:01<18:18,  1.73s/it][A
Train Diffusion:  68%|██████▊   | 1367/2000 [42:03<18:16,  1.73s/it][A
Train Diffusion:  68%|██████▊   | 1368/2000 [42:04<18:16,  1.74s/it][A
Train Diffusion:  68%|██████▊   | 1369/2000 [42:06<18:13,  1.73s/it][A
Train Diffusion:  68%|██████▊   | 1370/2000 [42:08<18:10,  1.73s/it][A

Moving average ELBO loss at 1370 iterations is: -25623.9291015625. Best ELBO loss value is: -26300.033203125.

C_PATH mean = tensor([[37.8334,  0.1509,  0.6552],
        [37.7439,  0.1642,  0.7045]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5103e+01, 2.6091e-03, 1.8372e-01],
         [4.4234e+01, 1.8218e-04, 3.8812e-01],
         ...,
         [3.0399e+01, 1.3958e-01, 8.2296e-01],
         [3.1266e+01, 7.8844e-02, 8.0943e-01],
         [3.1924e+01, 1.0397e-01, 8.1254e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4267e+01, 6.4127e-02, 1.1779e-01],
         [4.4278e+01, 2.6469e-10, 1.2470e-01],
         ...,
         [3.0608e+01, 9.1960e-02, 7.1037e-01],
         [3.0420e+01, 1.2579e-01, 6.2334e-01],
         [2.8679e+01, 9.6550e-02, 5.8998e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  69%|██████▊   | 1371/2000 [42:10<18:08,  1.73s/it][A
Train Diffusion:  69%|██████▊   | 1372/2000 [42:11<18:05,  1.73s/it][A
Train Diffusion:  69%|██████▊   | 1373/2000 [42:13<18:02,  1.73s/it][A
Train Diffusion:  69%|██████▊   | 1374/2000 [42:15<18:00,  1.73s/it][A
Train Diffusion:  69%|██████▉   | 1375/2000 [42:16<17:59,  1.73s/it][A
Train Diffusion:  69%|██████▉   | 1376/2000 [42:18<17:55,  1.72s/it][A
Train Diffusion:  69%|██████▉   | 1377/2000 [42:20<17:58,  1.73s/it][A
Train Diffusion:  69%|██████▉   | 1378/2000 [42:22<17:56,  1.73s/it][A
Train Diffusion:  69%|██████▉   | 1379/2000 [42:23<17:54,  1.73s/it][A
Train Diffusion:  69%|██████▉   | 1380/2000 [42:25<17:50,  1.73s/it][A

Moving average ELBO loss at 1380 iterations is: -25757.9203125. Best ELBO loss value is: -26300.033203125.

C_PATH mean = tensor([[37.8149,  0.1538,  0.7150],
        [37.7232,  0.1532,  0.7094]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4863e+01, 3.7729e-03, 2.1359e-01],
         [4.4264e+01, 1.9594e-04, 2.7905e-01],
         ...,
         [2.9740e+01, 9.6719e-02, 8.7946e-01],
         [3.0851e+01, 7.3119e-02, 8.4217e-01],
         [3.2368e+01, 1.1618e-01, 8.2655e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5007e+01, 7.6413e-02, 1.0825e-01],
         [4.3839e+01, 2.3437e-10, 2.1989e-01],
         ...,
         [3.1286e+01, 1.2248e-01, 7.4773e-01],
         [3.1077e+01, 1.1613e-01, 6.5012e-01],
         [2.9249e+01, 9.2027e-02, 5.8832e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  69%|██████▉   | 1381/2000 [42:27<17:53,  1.73s/it][A
Train Diffusion:  69%|██████▉   | 1382/2000 [42:29<18:00,  1.75s/it][A
Train Diffusion:  69%|██████▉   | 1383/2000 [42:30<18:00,  1.75s/it][A
Train Diffusion:  69%|██████▉   | 1384/2000 [42:32<17:53,  1.74s/it][A
Train Diffusion:  69%|██████▉   | 1385/2000 [42:34<17:50,  1.74s/it][A
Train Diffusion:  69%|██████▉   | 1386/2000 [42:36<17:45,  1.74s/it][A
Train Diffusion:  69%|██████▉   | 1387/2000 [42:37<17:39,  1.73s/it][A
Train Diffusion:  69%|██████▉   | 1388/2000 [42:39<17:38,  1.73s/it][A
Train Diffusion:  69%|██████▉   | 1389/2000 [42:41<17:36,  1.73s/it][A
Train Diffusion:  70%|██████▉   | 1390/2000 [42:42<17:33,  1.73s/it][A

Moving average ELBO loss at 1390 iterations is: -25883.12109375. Best ELBO loss value is: -26781.84375.

C_PATH mean = tensor([[37.7450,  0.1520,  0.7023],
        [37.8403,  0.1529,  0.7214]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4166e+01, 1.7818e-03, 2.4043e-01],
         [4.3989e+01, 3.9321e-10, 4.4233e-01],
         ...,
         [3.0697e+01, 1.2703e-01, 9.4745e-01],
         [3.0611e+01, 1.2913e-01, 7.5539e-01],
         [3.1637e+01, 9.7689e-02, 6.1450e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4895e+01, 6.0213e-02, 9.4681e-02],
         [4.3986e+01, 7.7256e-05, 9.5395e-02],
         ...,
         [3.0824e+01, 1.0939e-01, 5.4906e-01],
         [3.1560e+01, 7.1620e-02, 6.7613e-01],
         [2.9273e+01, 1.0881e-01, 7.6713e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  70%|██████▉   | 1391/2000 [42:44<17:31,  1.73s/it][A
Train Diffusion:  70%|██████▉   | 1392/2000 [42:46<17:29,  1.73s/it][A
Train Diffusion:  70%|██████▉   | 1393/2000 [42:48<17:27,  1.73s/it][A
Train Diffusion:  70%|██████▉   | 1394/2000 [42:49<17:24,  1.72s/it][A
Train Diffusion:  70%|██████▉   | 1395/2000 [42:51<17:23,  1.72s/it][A
Train Diffusion:  70%|██████▉   | 1396/2000 [42:53<17:21,  1.72s/it][A
Train Diffusion:  70%|██████▉   | 1397/2000 [42:55<17:19,  1.72s/it][A
Train Diffusion:  70%|██████▉   | 1398/2000 [42:56<17:20,  1.73s/it][A
Train Diffusion:  70%|██████▉   | 1399/2000 [42:58<17:19,  1.73s/it][A
Train Diffusion:  70%|███████   | 1400/2000 [43:00<17:22,  1.74s/it][A

Moving average ELBO loss at 1400 iterations is: -26145.64609375. Best ELBO loss value is: -26965.30078125.

C_PATH mean = tensor([[37.8158,  0.1554,  0.6844],
        [37.8465,  0.1502,  0.7012]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4741e+01, 2.2088e-03, 1.9168e-01],
         [4.4470e+01, 4.8616e-10, 2.5650e-01],
         ...,
         [3.1882e+01, 1.0838e-01, 7.0470e-01],
         [3.1434e+01, 7.0658e-02, 6.1926e-01],
         [3.2158e+01, 1.0654e-01, 7.6334e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5567e+01, 8.5521e-02, 1.1620e-01],
         [4.4461e+01, 1.1408e-04, 2.2788e-01],
         ...,
         [2.9743e+01, 1.2409e-01, 8.3108e-01],
         [3.0774e+01, 1.2597e-01, 8.0654e-01],
         [2.9133e+01, 9.4879e-02, 6.7358e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  70%|███████   | 1401/2000 [43:02<17:27,  1.75s/it][A
Train Diffusion:  70%|███████   | 1402/2000 [43:03<17:25,  1.75s/it][A
Train Diffusion:  70%|███████   | 1403/2000 [43:05<17:19,  1.74s/it][A
Train Diffusion:  70%|███████   | 1404/2000 [43:07<17:13,  1.73s/it][A
Train Diffusion:  70%|███████   | 1405/2000 [43:08<17:09,  1.73s/it][A
Train Diffusion:  70%|███████   | 1406/2000 [43:10<17:06,  1.73s/it][A
Train Diffusion:  70%|███████   | 1407/2000 [43:12<17:03,  1.73s/it][A
Train Diffusion:  70%|███████   | 1408/2000 [43:14<17:02,  1.73s/it][A
Train Diffusion:  70%|███████   | 1409/2000 [43:15<16:59,  1.73s/it][A
Train Diffusion:  70%|███████   | 1410/2000 [43:17<17:01,  1.73s/it][A

Moving average ELBO loss at 1410 iterations is: -26531.4671875. Best ELBO loss value is: -26986.984375.

C_PATH mean = tensor([[37.7894,  0.1490,  0.7200],
        [37.8651,  0.1535,  0.6978]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4217e+01, 6.3105e-02, 9.8933e-02],
         [4.4044e+01, 2.1584e-10, 1.7749e-01],
         ...,
         [3.1620e+01, 1.0763e-01, 9.0764e-01],
         [3.1420e+01, 6.7991e-02, 7.1927e-01],
         [2.9440e+01, 1.0236e-01, 7.6070e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4753e+01, 3.0112e-03, 2.1777e-01],
         [4.3987e+01, 2.0727e-04, 2.2127e-01],
         ...,
         [3.0189e+01, 1.1873e-01, 6.2323e-01],
         [3.1137e+01, 1.1777e-01, 7.4144e-01],
         [3.2481e+01, 9.0796e-02, 6.3158e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  71%|███████   | 1411/2000 [43:19<17:00,  1.73s/it][A
Train Diffusion:  71%|███████   | 1412/2000 [43:21<16:56,  1.73s/it][A
Train Diffusion:  71%|███████   | 1413/2000 [43:22<16:54,  1.73s/it][A
Train Diffusion:  71%|███████   | 1414/2000 [43:24<16:51,  1.73s/it][A
Train Diffusion:  71%|███████   | 1415/2000 [43:26<16:47,  1.72s/it][A
Train Diffusion:  71%|███████   | 1416/2000 [43:27<16:44,  1.72s/it][A
Train Diffusion:  71%|███████   | 1417/2000 [43:29<16:46,  1.73s/it][A
Train Diffusion:  71%|███████   | 1418/2000 [43:31<16:49,  1.74s/it][A
Train Diffusion:  71%|███████   | 1419/2000 [43:33<16:57,  1.75s/it][A
Train Diffusion:  71%|███████   | 1420/2000 [43:34<17:01,  1.76s/it][A

Moving average ELBO loss at 1420 iterations is: -27098.4330078125. Best ELBO loss value is: -27801.4921875.

C_PATH mean = tensor([[37.8533,  0.1525,  0.6931],
        [37.8348,  0.1567,  0.7203]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5628e+01, 9.2411e-02, 9.8479e-02],
         [4.4349e+01, 2.6317e-10, 1.8990e-01],
         ...,
         [3.1008e+01, 9.7700e-02, 1.0730e+00],
         [3.0942e+01, 7.8754e-02, 9.9652e-01],
         [3.0173e+01, 1.2251e-01, 8.1076e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4918e+01, 2.3224e-03, 2.2664e-01],
         [4.4546e+01, 1.6459e-04, 2.6757e-01],
         ...,
         [3.0853e+01, 1.3620e-01, 4.3357e-01],
         [3.1704e+01, 1.1280e-01, 4.5121e-01],
         [3.1810e+01, 8.6451e-02, 7.2075e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  71%|███████   | 1421/2000 [43:36<16:53,  1.75s/it][A
Train Diffusion:  71%|███████   | 1422/2000 [43:38<16:48,  1.75s/it][A
Train Diffusion:  71%|███████   | 1423/2000 [43:40<16:45,  1.74s/it][A
Train Diffusion:  71%|███████   | 1424/2000 [43:41<16:41,  1.74s/it][A
Train Diffusion:  71%|███████▏  | 1425/2000 [43:43<16:36,  1.73s/it][A
Train Diffusion:  71%|███████▏  | 1426/2000 [43:45<16:34,  1.73s/it][A
Train Diffusion:  71%|███████▏  | 1427/2000 [43:47<16:30,  1.73s/it][A
Train Diffusion:  71%|███████▏  | 1428/2000 [43:48<16:28,  1.73s/it][A
Train Diffusion:  71%|███████▏  | 1429/2000 [43:50<16:26,  1.73s/it][A
Train Diffusion:  72%|███████▏  | 1430/2000 [43:52<16:24,  1.73s/it][A

Moving average ELBO loss at 1430 iterations is: -27036.3650390625. Best ELBO loss value is: -28550.56640625.

C_PATH mean = tensor([[37.8508,  0.1573,  0.6803],
        [37.8854,  0.1565,  0.6867]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5034e+01, 8.8684e-02, 1.7555e-01],
         [4.4715e+01, 1.6442e-04, 2.3065e-01],
         ...,
         [3.0631e+01, 1.3486e-01, 8.4371e-01],
         [3.1479e+01, 7.6651e-02, 7.0436e-01],
         [2.9614e+01, 1.0055e-01, 6.1437e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5761e+01, 2.5629e-03, 1.1904e-01],
         [4.4675e+01, 4.6499e-10, 2.3727e-01],
         ...,
         [3.1247e+01, 9.5889e-02, 6.3389e-01],
         [3.1136e+01, 1.2631e-01, 7.5201e-01],
         [3.2468e+01, 9.7196e-02, 7.8385e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  72%|███████▏  | 1431/2000 [43:53<16:22,  1.73s/it][A
Train Diffusion:  72%|███████▏  | 1432/2000 [43:55<16:26,  1.74s/it][A
Train Diffusion:  72%|███████▏  | 1433/2000 [43:57<16:21,  1.73s/it][A
Train Diffusion:  72%|███████▏  | 1434/2000 [43:59<16:18,  1.73s/it][A
Train Diffusion:  72%|███████▏  | 1435/2000 [44:00<16:15,  1.73s/it][A
Train Diffusion:  72%|███████▏  | 1436/2000 [44:02<16:12,  1.72s/it][A
Train Diffusion:  72%|███████▏  | 1437/2000 [44:04<16:11,  1.73s/it][A
Train Diffusion:  72%|███████▏  | 1438/2000 [44:06<16:19,  1.74s/it][A
Train Diffusion:  72%|███████▏  | 1439/2000 [44:07<16:17,  1.74s/it][A
Train Diffusion:  72%|███████▏  | 1440/2000 [44:09<16:12,  1.74s/it][A

Moving average ELBO loss at 1440 iterations is: -26548.884375. Best ELBO loss value is: -28550.56640625.

C_PATH mean = tensor([[37.8061,  0.1569,  0.6936],
        [37.9210,  0.1553,  0.6902]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4866e+01, 1.7863e-03, 2.3172e-01],
         [4.4538e+01, 2.5132e-04, 2.6754e-01],
         ...,
         [3.1108e+01, 1.4008e-01, 7.7293e-01],
         [3.0863e+01, 1.1811e-01, 8.5657e-01],
         [3.1930e+01, 9.0470e-02, 7.0269e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5656e+01, 9.4300e-02, 9.9407e-02],
         [4.4596e+01, 2.8649e-10, 1.9176e-01],
         ...,
         [3.0785e+01, 8.8286e-02, 7.3194e-01],
         [3.1773e+01, 7.4361e-02, 5.7652e-01],
         [3.0067e+01, 1.1827e-01, 7.0118e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  72%|███████▏  | 1441/2000 [44:11<16:09,  1.73s/it][A
Train Diffusion:  72%|███████▏  | 1442/2000 [44:13<16:05,  1.73s/it][A
Train Diffusion:  72%|███████▏  | 1443/2000 [44:14<16:03,  1.73s/it][A
Train Diffusion:  72%|███████▏  | 1444/2000 [44:16<16:02,  1.73s/it][A
Train Diffusion:  72%|███████▏  | 1445/2000 [44:18<15:59,  1.73s/it][A
Train Diffusion:  72%|███████▏  | 1446/2000 [44:19<15:56,  1.73s/it][A
Train Diffusion:  72%|███████▏  | 1447/2000 [44:21<15:59,  1.73s/it][A
Train Diffusion:  72%|███████▏  | 1448/2000 [44:23<15:55,  1.73s/it][A
Train Diffusion:  72%|███████▏  | 1449/2000 [44:25<15:53,  1.73s/it][A
Train Diffusion:  72%|███████▎  | 1450/2000 [44:26<15:51,  1.73s/it][A

Moving average ELBO loss at 1450 iterations is: -27541.6875. Best ELBO loss value is: -28550.56640625.

C_PATH mean = tensor([[37.8436,  0.1459,  0.7112],
        [37.9204,  0.1578,  0.7252]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4042e+01, 2.3673e-04, 1.2889e-01],
         [4.3044e+01, 4.7385e-05, 2.3336e-01],
         ...,
         [3.1358e+01, 1.4060e-01, 7.0438e-01],
         [3.0954e+01, 1.4168e-01, 8.4655e-01],
         [2.8545e+01, 1.0398e-01, 9.2057e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4596e+01, 9.3505e-02, 1.7499e-01],
         [4.4523e+01, 4.4211e-10, 2.2560e-01],
         ...,
         [3.1083e+01, 9.9271e-02, 8.4122e-01],
         [3.1862e+01, 6.7361e-02, 6.2155e-01],
         [3.2265e+01, 9.9784e-02, 5.3569e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  73%|███████▎  | 1451/2000 [44:28<15:54,  1.74s/it][A
Train Diffusion:  73%|███████▎  | 1452/2000 [44:30<15:50,  1.73s/it][A
Train Diffusion:  73%|███████▎  | 1453/2000 [44:32<15:46,  1.73s/it][A
Train Diffusion:  73%|███████▎  | 1454/2000 [44:33<15:44,  1.73s/it][A
Train Diffusion:  73%|███████▎  | 1455/2000 [44:35<15:42,  1.73s/it][A
Train Diffusion:  73%|███████▎  | 1456/2000 [44:37<15:44,  1.74s/it][A
Train Diffusion:  73%|███████▎  | 1457/2000 [44:39<15:46,  1.74s/it][A
Train Diffusion:  73%|███████▎  | 1458/2000 [44:40<15:42,  1.74s/it][A
Train Diffusion:  73%|███████▎  | 1459/2000 [44:42<15:37,  1.73s/it][A
Train Diffusion:  73%|███████▎  | 1460/2000 [44:44<15:33,  1.73s/it][A

Moving average ELBO loss at 1460 iterations is: -24250.630078125. Best ELBO loss value is: -28550.56640625.

C_PATH mean = tensor([[37.9500,  0.1589,  0.6826],
        [37.8778,  0.1578,  0.6932]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4939e+01, 1.1401e-01, 1.8949e-01],
         [4.4668e+01, 6.2610e-10, 2.4527e-01],
         ...,
         [3.0471e+01, 8.7276e-02, 1.0289e+00],
         [3.1188e+01, 6.5893e-02, 9.5727e-01],
         [3.2055e+01, 1.0494e-01, 9.2456e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5930e+01, 4.0297e-04, 1.1619e-01],
         [4.4742e+01, 9.3334e-05, 2.3369e-01],
         ...,
         [3.1708e+01, 1.4501e-01, 4.8443e-01],
         [3.1323e+01, 1.3582e-01, 5.0079e-01],
         [2.9066e+01, 9.7010e-02, 5.5696e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  73%|███████▎  | 1461/2000 [44:45<15:31,  1.73s/it][A
Train Diffusion:  73%|███████▎  | 1462/2000 [44:47<15:30,  1.73s/it][A
Train Diffusion:  73%|███████▎  | 1463/2000 [44:49<15:27,  1.73s/it][A
Train Diffusion:  73%|███████▎  | 1464/2000 [44:51<15:24,  1.73s/it][A
Train Diffusion:  73%|███████▎  | 1465/2000 [44:52<15:22,  1.72s/it][A
Train Diffusion:  73%|███████▎  | 1466/2000 [44:54<15:21,  1.73s/it][A
Train Diffusion:  73%|███████▎  | 1467/2000 [44:56<15:26,  1.74s/it][A
Train Diffusion:  73%|███████▎  | 1468/2000 [44:58<15:27,  1.74s/it][A
Train Diffusion:  73%|███████▎  | 1469/2000 [44:59<15:23,  1.74s/it][A
Train Diffusion:  74%|███████▎  | 1470/2000 [45:01<15:20,  1.74s/it][A

Moving average ELBO loss at 1470 iterations is: -25058.186328125. Best ELBO loss value is: -28550.56640625.

C_PATH mean = tensor([[37.9293,  0.1554,  0.6966],
        [37.9295,  0.1517,  0.7027]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4109e+01, 3.2339e-02, 1.1211e-01],
         [4.3324e+01, 1.9048e-10, 2.2709e-01],
         ...,
         [3.1625e+01, 1.2141e-01, 9.8001e-01],
         [3.1311e+01, 1.0786e-01, 7.5529e-01],
         [2.9420e+01, 9.0111e-02, 6.3196e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4833e+01, 1.2142e-03, 2.0996e-01],
         [4.5089e+01, 2.2199e-04, 2.7137e-01],
         ...,
         [3.0838e+01, 9.7752e-02, 5.9421e-01],
         [3.1649e+01, 7.4295e-02, 7.5716e-01],
         [3.2292e+01, 1.1445e-01, 7.9756e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  74%|███████▎  | 1471/2000 [45:03<15:19,  1.74s/it][A
Train Diffusion:  74%|███████▎  | 1472/2000 [45:05<15:15,  1.73s/it][A
Train Diffusion:  74%|███████▎  | 1473/2000 [45:06<15:13,  1.73s/it][A
Train Diffusion:  74%|███████▎  | 1474/2000 [45:08<15:09,  1.73s/it][A
Train Diffusion:  74%|███████▍  | 1475/2000 [45:10<15:12,  1.74s/it][A
Train Diffusion:  74%|███████▍  | 1476/2000 [45:11<15:12,  1.74s/it][A
Train Diffusion:  74%|███████▍  | 1477/2000 [45:13<15:08,  1.74s/it][A
Train Diffusion:  74%|███████▍  | 1478/2000 [45:15<15:05,  1.73s/it][A
Train Diffusion:  74%|███████▍  | 1479/2000 [45:17<15:04,  1.74s/it][A
Train Diffusion:  74%|███████▍  | 1480/2000 [45:18<15:02,  1.74s/it][A

Moving average ELBO loss at 1480 iterations is: -25608.1326171875. Best ELBO loss value is: -28550.56640625.

C_PATH mean = tensor([[37.9131,  0.1465,  0.7475],
        [37.8996,  0.1547,  0.7319]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5179e+01, 7.3210e-04, 9.2612e-02],
         [4.5027e+01, 1.5937e-04, 9.5984e-02],
         ...,
         [3.0810e+01, 8.7012e-02, 4.8513e-01],
         [3.0643e+01, 6.7658e-02, 4.7344e-01],
         [3.1783e+01, 1.0435e-01, 5.1608e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4472e+01, 4.0963e-02, 2.2891e-01],
         [4.3257e+01, 2.0885e-10, 4.4032e-01],
         ...,
         [3.1902e+01, 1.3853e-01, 1.1664e+00],
         [3.2515e+01, 1.1477e-01, 1.0575e+00],
         [3.0024e+01, 9.5571e-02, 9.3905e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  74%|███████▍  | 1481/2000 [45:20<15:00,  1.73s/it][A
Train Diffusion:  74%|███████▍  | 1482/2000 [45:22<14:55,  1.73s/it][A
Train Diffusion:  74%|███████▍  | 1483/2000 [45:24<14:52,  1.73s/it][A
Train Diffusion:  74%|███████▍  | 1484/2000 [45:25<14:50,  1.73s/it][A
Train Diffusion:  74%|███████▍  | 1485/2000 [45:27<14:47,  1.72s/it][A
Train Diffusion:  74%|███████▍  | 1486/2000 [45:29<14:50,  1.73s/it][A
Train Diffusion:  74%|███████▍  | 1487/2000 [45:31<14:47,  1.73s/it][A
Train Diffusion:  74%|███████▍  | 1488/2000 [45:32<14:47,  1.73s/it][A
Train Diffusion:  74%|███████▍  | 1489/2000 [45:34<14:46,  1.73s/it][A
Train Diffusion:  74%|███████▍  | 1490/2000 [45:36<14:44,  1.73s/it][A

Moving average ELBO loss at 1490 iterations is: -25382.9525390625. Best ELBO loss value is: -28550.56640625.

C_PATH mean = tensor([[37.9731,  0.1574,  0.7090],
        [37.9252,  0.1437,  0.7110]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.3768e+01, 5.3639e-04, 2.5659e-01],
         [4.3603e+01, 1.5479e-04, 2.9308e-01],
         ...,
         [3.1723e+01, 1.0108e-01, 6.9469e-01],
         [3.1336e+01, 1.1945e-01, 7.7099e-01],
         [3.1470e+01, 9.0690e-02, 6.4047e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4495e+01, 3.2976e-02, 8.6784e-02],
         [4.3621e+01, 2.3808e-10, 1.7117e-01],
         ...,
         [3.1108e+01, 1.3798e-01, 8.9616e-01],
         [3.1682e+01, 8.0238e-02, 7.1856e-01],
         [2.9376e+01, 1.0434e-01, 8.0339e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  75%|███████▍  | 1491/2000 [45:37<14:39,  1.73s/it][A
Train Diffusion:  75%|███████▍  | 1492/2000 [45:39<14:36,  1.73s/it][A
Train Diffusion:  75%|███████▍  | 1493/2000 [45:41<14:35,  1.73s/it][A
Train Diffusion:  75%|███████▍  | 1494/2000 [45:43<14:40,  1.74s/it][A
Train Diffusion:  75%|███████▍  | 1495/2000 [45:44<14:41,  1.74s/it][A
Train Diffusion:  75%|███████▍  | 1496/2000 [45:46<14:35,  1.74s/it][A
Train Diffusion:  75%|███████▍  | 1497/2000 [45:48<14:31,  1.73s/it][A
Train Diffusion:  75%|███████▍  | 1498/2000 [45:50<14:29,  1.73s/it][A
Train Diffusion:  75%|███████▍  | 1499/2000 [45:51<14:26,  1.73s/it][A
Train Diffusion:  75%|███████▌  | 1500/2000 [45:53<14:23,  1.73s/it][A

Moving average ELBO loss at 1500 iterations is: -25845.95234375. Best ELBO loss value is: -28550.56640625.

C_PATH mean = tensor([[37.9655,  0.1542,  0.6949],
        [37.9599,  0.1558,  0.6872]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4969e+01, 6.5334e-02, 2.4171e-01],
         [4.3884e+01, 4.4575e-10, 2.7199e-01],
         ...,
         [3.2296e+01, 1.3472e-01, 8.1761e-01],
         [3.1717e+01, 1.1396e-01, 8.3281e-01],
         [3.2324e+01, 9.1759e-02, 7.0112e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5770e+01, 3.9020e-04, 9.7270e-02],
         [4.5424e+01, 1.3910e-04, 1.8890e-01],
         ...,
         [3.0218e+01, 1.0233e-01, 6.8781e-01],
         [3.1152e+01, 7.8327e-02, 6.0878e-01],
         [2.9379e+01, 1.1573e-01, 7.8608e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  75%|███████▌  | 1501/2000 [45:55<14:22,  1.73s/it][A
Train Diffusion:  75%|███████▌  | 1502/2000 [45:56<14:19,  1.73s/it][A
Train Diffusion:  75%|███████▌  | 1503/2000 [45:58<14:16,  1.72s/it][A
Train Diffusion:  75%|███████▌  | 1504/2000 [46:00<14:15,  1.72s/it][A
Train Diffusion:  75%|███████▌  | 1505/2000 [46:02<14:13,  1.72s/it][A
Train Diffusion:  75%|███████▌  | 1506/2000 [46:03<14:13,  1.73s/it][A
Train Diffusion:  75%|███████▌  | 1507/2000 [46:05<14:12,  1.73s/it][A
Train Diffusion:  75%|███████▌  | 1508/2000 [46:07<14:09,  1.73s/it][A
Train Diffusion:  75%|███████▌  | 1509/2000 [46:09<14:07,  1.73s/it][A
Train Diffusion:  76%|███████▌  | 1510/2000 [46:10<14:06,  1.73s/it][A

Moving average ELBO loss at 1510 iterations is: -26152.36328125. Best ELBO loss value is: -28550.56640625.

C_PATH mean = tensor([[37.9466,  0.1549,  0.6976],
        [38.0118,  0.1494,  0.6914]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4506e+01, 1.4461e-03, 9.2276e-02],
         [4.4485e+01, 3.9160e-04, 1.8840e-01],
         ...,
         [3.0344e+01, 1.1960e-01, 7.1689e-01],
         [3.1315e+01, 7.6206e-02, 7.6687e-01],
         [3.2469e+01, 1.1130e-01, 6.5040e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5443e+01, 3.7494e-02, 2.4837e-01],
         [4.4214e+01, 2.6013e-10, 2.7133e-01],
         ...,
         [3.2464e+01, 1.1535e-01, 8.8888e-01],
         [3.1961e+01, 1.2303e-01, 7.5807e-01],
         [2.9841e+01, 9.5570e-02, 8.1384e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  76%|███████▌  | 1511/2000 [46:12<14:04,  1.73s/it][A
Train Diffusion:  76%|███████▌  | 1512/2000 [46:14<14:04,  1.73s/it][A
Train Diffusion:  76%|███████▌  | 1513/2000 [46:16<14:07,  1.74s/it][A
Train Diffusion:  76%|███████▌  | 1514/2000 [46:17<14:09,  1.75s/it][A
Train Diffusion:  76%|███████▌  | 1515/2000 [46:19<14:05,  1.74s/it][A
Train Diffusion:  76%|███████▌  | 1516/2000 [46:21<14:04,  1.74s/it][A
Train Diffusion:  76%|███████▌  | 1517/2000 [46:22<13:59,  1.74s/it][A
Train Diffusion:  76%|███████▌  | 1518/2000 [46:24<13:55,  1.73s/it][A
Train Diffusion:  76%|███████▌  | 1519/2000 [46:26<13:51,  1.73s/it][A
Train Diffusion:  76%|███████▌  | 1520/2000 [46:28<13:53,  1.74s/it][A

Moving average ELBO loss at 1520 iterations is: -25830.9484375. Best ELBO loss value is: -28550.56640625.

C_PATH mean = tensor([[37.9692,  0.1577,  0.6726],
        [38.0203,  0.1553,  0.6951]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.6081e+01, 1.2980e-03, 1.8082e-01],
         [4.4598e+01, 4.8958e-04, 2.2803e-01],
         ...,
         [3.1173e+01, 1.4035e-01, 9.1653e-01],
         [3.1881e+01, 8.0425e-02, 9.2340e-01],
         [3.2517e+01, 1.0444e-01, 7.2627e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4987e+01, 3.7793e-02, 1.2196e-01],
         [4.4860e+01, 2.7794e-10, 2.4872e-01],
         ...,
         [3.1533e+01, 8.6543e-02, 5.7838e-01],
         [3.1287e+01, 1.2164e-01, 5.0328e-01],
         [2.9635e+01, 9.4904e-02, 6.6974e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  76%|███████▌  | 1521/2000 [46:29<13:54,  1.74s/it][A
Train Diffusion:  76%|███████▌  | 1522/2000 [46:31<13:50,  1.74s/it][A
Train Diffusion:  76%|███████▌  | 1523/2000 [46:33<13:47,  1.74s/it][A
Train Diffusion:  76%|███████▌  | 1524/2000 [46:35<13:44,  1.73s/it][A
Train Diffusion:  76%|███████▋  | 1525/2000 [46:36<13:41,  1.73s/it][A
Train Diffusion:  76%|███████▋  | 1526/2000 [46:38<13:39,  1.73s/it][A
Train Diffusion:  76%|███████▋  | 1527/2000 [46:40<13:36,  1.73s/it][A
Train Diffusion:  76%|███████▋  | 1528/2000 [46:42<13:34,  1.73s/it][A
Train Diffusion:  76%|███████▋  | 1529/2000 [46:43<13:32,  1.72s/it][A
Train Diffusion:  76%|███████▋  | 1530/2000 [46:45<13:29,  1.72s/it][A

Moving average ELBO loss at 1530 iterations is: -26076.40390625. Best ELBO loss value is: -28550.56640625.

C_PATH mean = tensor([[38.0413,  0.1584,  0.6949],
        [37.9384,  0.1517,  0.7120]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4748e+01, 4.7841e-02, 8.7477e-02],
         [4.4063e+01, 2.0021e-04, 1.7466e-01],
         ...,
         [3.0900e+01, 1.0970e-01, 6.8038e-01],
         [3.1509e+01, 7.0455e-02, 8.1303e-01],
         [3.0209e+01, 1.0088e-01, 6.3305e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4133e+01, 8.8973e-04, 2.3559e-01],
         [4.4000e+01, 3.1810e-10, 2.3623e-01],
         ...,
         [3.2070e+01, 1.1659e-01, 8.4947e-01],
         [3.2017e+01, 1.1911e-01, 6.8019e-01],
         [3.2498e+01, 9.0720e-02, 7.8991e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  77%|███████▋  | 1531/2000 [46:47<13:33,  1.73s/it][A
Train Diffusion:  77%|███████▋  | 1532/2000 [46:48<13:35,  1.74s/it][A
Train Diffusion:  77%|███████▋  | 1533/2000 [46:50<13:30,  1.74s/it][A
Train Diffusion:  77%|███████▋  | 1534/2000 [46:52<13:27,  1.73s/it][A
Train Diffusion:  77%|███████▋  | 1535/2000 [46:54<13:28,  1.74s/it][A
Train Diffusion:  77%|███████▋  | 1536/2000 [46:55<13:24,  1.73s/it][A
Train Diffusion:  77%|███████▋  | 1537/2000 [46:57<13:20,  1.73s/it][A
Train Diffusion:  77%|███████▋  | 1538/2000 [46:59<13:18,  1.73s/it][A
Train Diffusion:  77%|███████▋  | 1539/2000 [47:01<13:16,  1.73s/it][A
Train Diffusion:  77%|███████▋  | 1540/2000 [47:02<13:13,  1.72s/it][A

Moving average ELBO loss at 1540 iterations is: -26414.9177734375. Best ELBO loss value is: -28550.56640625.

C_PATH mean = tensor([[37.9754,  0.1573,  0.6902],
        [38.0359,  0.1562,  0.6970]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4742e+01, 8.7670e-02, 1.0031e-01],
         [4.4638e+01, 5.5818e-10, 9.8664e-02],
         ...,
         [3.1572e+01, 9.2900e-02, 9.1605e-01],
         [3.1421e+01, 7.7129e-02, 9.3828e-01],
         [3.2479e+01, 1.2084e-01, 7.4104e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5624e+01, 6.9137e-04, 2.3701e-01],
         [4.4550e+01, 2.4483e-04, 4.4610e-01],
         ...,
         [3.1323e+01, 1.4157e-01, 6.2346e-01],
         [3.2034e+01, 1.1806e-01, 5.4067e-01],
         [3.0380e+01, 9.2319e-02, 7.0460e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  77%|███████▋  | 1541/2000 [47:04<13:13,  1.73s/it][A
Train Diffusion:  77%|███████▋  | 1542/2000 [47:06<13:11,  1.73s/it][A
Train Diffusion:  77%|███████▋  | 1543/2000 [47:07<13:07,  1.72s/it][A
Train Diffusion:  77%|███████▋  | 1544/2000 [47:09<13:05,  1.72s/it][A
Train Diffusion:  77%|███████▋  | 1545/2000 [47:11<13:04,  1.72s/it][A
Train Diffusion:  77%|███████▋  | 1546/2000 [47:13<13:02,  1.72s/it][A
Train Diffusion:  77%|███████▋  | 1547/2000 [47:14<13:00,  1.72s/it][A
Train Diffusion:  77%|███████▋  | 1548/2000 [47:16<12:58,  1.72s/it][A
Train Diffusion:  77%|███████▋  | 1549/2000 [47:18<12:56,  1.72s/it][A
Train Diffusion:  78%|███████▊  | 1550/2000 [47:20<12:59,  1.73s/it][A

Moving average ELBO loss at 1550 iterations is: -27093.828515625. Best ELBO loss value is: -28550.56640625.

C_PATH mean = tensor([[38.0813,  0.1601,  0.6951],
        [37.9476,  0.1513,  0.6973]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.6042e+01, 1.2875e-03, 2.4073e-01],
         [4.5699e+01, 5.4750e-04, 2.7352e-01],
         ...,
         [3.0639e+01, 1.4162e-01, 4.9110e-01],
         [3.1447e+01, 1.2769e-01, 4.8002e-01],
         [3.2769e+01, 9.8644e-02, 7.2483e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5102e+01, 3.2466e-02, 1.0039e-01],
         [4.3984e+01, 2.3397e-10, 1.9558e-01],
         ...,
         [3.2386e+01, 1.0467e-01, 1.0457e+00],
         [3.2203e+01, 7.6102e-02, 9.9809e-01],
         [3.0150e+01, 1.1635e-01, 7.8970e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  78%|███████▊  | 1551/2000 [47:21<13:00,  1.74s/it][A
Train Diffusion:  78%|███████▊  | 1552/2000 [47:23<12:56,  1.73s/it][A
Train Diffusion:  78%|███████▊  | 1553/2000 [47:25<12:53,  1.73s/it][A
Train Diffusion:  78%|███████▊  | 1554/2000 [47:26<12:49,  1.73s/it][A
Train Diffusion:  78%|███████▊  | 1555/2000 [47:28<12:46,  1.72s/it][A
Train Diffusion:  78%|███████▊  | 1556/2000 [47:30<12:46,  1.73s/it][A
Train Diffusion:  78%|███████▊  | 1557/2000 [47:32<12:45,  1.73s/it][A
Train Diffusion:  78%|███████▊  | 1558/2000 [47:33<12:45,  1.73s/it][A
Train Diffusion:  78%|███████▊  | 1559/2000 [47:35<12:42,  1.73s/it][A
Train Diffusion:  78%|███████▊  | 1560/2000 [47:37<12:40,  1.73s/it][A

Moving average ELBO loss at 1560 iterations is: -26771.126171875. Best ELBO loss value is: -28550.56640625.

C_PATH mean = tensor([[38.0530,  0.1525,  0.7090],
        [38.0328,  0.1476,  0.7072]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4194e+01, 5.2886e-02, 2.5455e-01],
         [4.3840e+01, 2.3586e-10, 2.8310e-01],
         ...,
         [3.1423e+01, 9.9705e-02, 9.9540e-01],
         [3.2193e+01, 1.1890e-01, 7.7216e-01],
         [3.0574e+01, 9.0711e-02, 6.1480e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4789e+01, 7.6890e-04, 8.6335e-02],
         [4.3951e+01, 3.4529e-04, 1.7204e-01],
         ...,
         [3.2022e+01, 1.4093e-01, 5.2022e-01],
         [3.1857e+01, 8.0588e-02, 7.1777e-01],
         [3.2754e+01, 1.0382e-01, 7.9085e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  78%|███████▊  | 1561/2000 [47:39<12:38,  1.73s/it][A
Train Diffusion:  78%|███████▊  | 1562/2000 [47:40<12:35,  1.73s/it][A
Train Diffusion:  78%|███████▊  | 1563/2000 [47:42<12:32,  1.72s/it][A
Train Diffusion:  78%|███████▊  | 1564/2000 [47:44<12:31,  1.72s/it][A
Train Diffusion:  78%|███████▊  | 1565/2000 [47:45<12:28,  1.72s/it][A
Train Diffusion:  78%|███████▊  | 1566/2000 [47:47<12:27,  1.72s/it][A
Train Diffusion:  78%|███████▊  | 1567/2000 [47:49<12:25,  1.72s/it][A
Train Diffusion:  78%|███████▊  | 1568/2000 [47:51<12:24,  1.72s/it][A
Train Diffusion:  78%|███████▊  | 1569/2000 [47:52<12:29,  1.74s/it][A
Train Diffusion:  78%|███████▊  | 1570/2000 [47:54<12:27,  1.74s/it][A

Moving average ELBO loss at 1570 iterations is: -28362.2765625. Best ELBO loss value is: -29323.798828125.

C_PATH mean = tensor([[38.0359,  0.1503,  0.6890],
        [38.0893,  0.1480,  0.7075]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5748e+01, 3.0306e-02, 8.3422e-02],
         [4.4528e+01, 2.1739e-10, 1.6992e-01],
         ...,
         [3.2432e+01, 1.2255e-01, 9.2449e-01],
         [3.2251e+01, 7.1328e-02, 9.0711e-01],
         [3.0260e+01, 1.0056e-01, 7.1759e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4825e+01, 1.1787e-03, 2.5333e-01],
         [4.4500e+01, 5.3713e-04, 2.7747e-01],
         ...,
         [3.0923e+01, 9.9005e-02, 6.2805e-01],
         [3.1706e+01, 1.2480e-01, 5.6320e-01],
         [3.2921e+01, 9.1508e-02, 7.4825e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  79%|███████▊  | 1571/2000 [47:56<12:24,  1.74s/it][A
Train Diffusion:  79%|███████▊  | 1572/2000 [47:58<12:21,  1.73s/it][A
Train Diffusion:  79%|███████▊  | 1573/2000 [47:59<12:17,  1.73s/it][A
Train Diffusion:  79%|███████▊  | 1574/2000 [48:01<12:16,  1.73s/it][A
Train Diffusion:  79%|███████▉  | 1575/2000 [48:03<12:16,  1.73s/it][A
Train Diffusion:  79%|███████▉  | 1576/2000 [48:05<12:15,  1.73s/it][A
Train Diffusion:  79%|███████▉  | 1577/2000 [48:06<12:11,  1.73s/it][A
Train Diffusion:  79%|███████▉  | 1578/2000 [48:08<12:08,  1.73s/it][A
Train Diffusion:  79%|███████▉  | 1579/2000 [48:10<12:05,  1.72s/it][A
Train Diffusion:  79%|███████▉  | 1580/2000 [48:11<12:03,  1.72s/it][A

Moving average ELBO loss at 1580 iterations is: -26371.6671875. Best ELBO loss value is: -29323.798828125.

C_PATH mean = tensor([[38.0048,  0.1549,  0.6932],
        [38.1158,  0.1570,  0.7158]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4278e+01, 2.3169e-04, 2.6082e-01],
         [4.4205e+01, 1.5570e-04, 2.9545e-01],
         ...,
         [3.2362e+01, 9.8069e-02, 7.2729e-01],
         [3.2175e+01, 1.1998e-01, 5.8991e-01],
         [3.0165e+01, 9.4430e-02, 7.0934e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4943e+01, 8.2001e-02, 8.6455e-02],
         [4.4214e+01, 4.3219e-10, 1.7280e-01],
         ...,
         [3.1106e+01, 1.4597e-01, 8.8740e-01],
         [3.1929e+01, 8.2303e-02, 9.4223e-01],
         [3.3107e+01, 1.0419e-01, 7.8782e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  79%|███████▉  | 1581/2000 [48:13<12:02,  1.72s/it][A
Train Diffusion:  79%|███████▉  | 1582/2000 [48:15<12:00,  1.72s/it][A
Train Diffusion:  79%|███████▉  | 1583/2000 [48:17<11:58,  1.72s/it][A
Train Diffusion:  79%|███████▉  | 1584/2000 [48:18<11:56,  1.72s/it][A
Train Diffusion:  79%|███████▉  | 1585/2000 [48:20<11:54,  1.72s/it][A
Train Diffusion:  79%|███████▉  | 1586/2000 [48:22<11:57,  1.73s/it][A
Train Diffusion:  79%|███████▉  | 1587/2000 [48:23<11:55,  1.73s/it][A
Train Diffusion:  79%|███████▉  | 1588/2000 [48:25<11:52,  1.73s/it][A
Train Diffusion:  79%|███████▉  | 1589/2000 [48:27<11:49,  1.73s/it][A
Train Diffusion:  80%|███████▉  | 1590/2000 [48:29<11:47,  1.73s/it][A

Moving average ELBO loss at 1590 iterations is: -25814.6291015625. Best ELBO loss value is: -29323.798828125.

C_PATH mean = tensor([[38.0553,  0.1526,  0.7173],
        [38.0756,  0.1553,  0.6869]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5750e+01, 1.0248e-03, 8.5646e-02],
         [4.4697e+01, 6.0540e-04, 1.6920e-01],
         ...,
         [3.1352e+01, 1.2348e-01, 6.1399e-01],
         [3.2199e+01, 7.2270e-02, 5.2645e-01],
         [3.3445e+01, 1.0067e-01, 4.8009e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4919e+01, 1.8976e-02, 2.5708e-01],
         [4.4725e+01, 1.7387e-10, 2.9003e-01],
         ...,
         [3.2038e+01, 1.0411e-01, 8.1518e-01],
         [3.1894e+01, 1.2525e-01, 8.4315e-01],
         [3.0066e+01, 9.5735e-02, 8.6350e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  80%|███████▉  | 1591/2000 [48:30<11:48,  1.73s/it][A
Train Diffusion:  80%|███████▉  | 1592/2000 [48:32<11:46,  1.73s/it][A
Train Diffusion:  80%|███████▉  | 1593/2000 [48:34<11:46,  1.74s/it][A
Train Diffusion:  80%|███████▉  | 1594/2000 [48:36<11:43,  1.73s/it][A
Train Diffusion:  80%|███████▉  | 1595/2000 [48:37<11:40,  1.73s/it][A
Train Diffusion:  80%|███████▉  | 1596/2000 [48:39<11:38,  1.73s/it][A
Train Diffusion:  80%|███████▉  | 1597/2000 [48:41<11:36,  1.73s/it][A
Train Diffusion:  80%|███████▉  | 1598/2000 [48:43<11:33,  1.73s/it][A
Train Diffusion:  80%|███████▉  | 1599/2000 [48:44<11:36,  1.74s/it][A
Train Diffusion:  80%|████████  | 1600/2000 [48:46<11:36,  1.74s/it][A

Moving average ELBO loss at 1600 iterations is: -25432.843359375. Best ELBO loss value is: -29323.798828125.

C_PATH mean = tensor([[38.0828,  0.1548,  0.7099],
        [38.0906,  0.1495,  0.7126]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5034e+01, 8.8909e-04, 1.8966e-01],
         [4.4095e+01, 5.1468e-04, 2.4576e-01],
         ...,
         [3.1839e+01, 1.1657e-01, 8.4154e-01],
         [3.2574e+01, 6.8893e-02, 7.0341e-01],
         [3.3369e+01, 1.0043e-01, 7.8377e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4349e+01, 2.7401e-02, 1.1991e-01],
         [4.4025e+01, 1.8299e-10, 2.3856e-01],
         ...,
         [3.1964e+01, 1.1756e-01, 7.4618e-01],
         [3.1917e+01, 1.2642e-01, 7.8234e-01],
         [3.0283e+01, 9.7327e-02, 6.4567e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  80%|████████  | 1601/2000 [48:48<11:33,  1.74s/it][A
Train Diffusion:  80%|████████  | 1602/2000 [48:49<11:32,  1.74s/it][A
Train Diffusion:  80%|████████  | 1603/2000 [48:51<11:28,  1.73s/it][A
Train Diffusion:  80%|████████  | 1604/2000 [48:53<11:26,  1.73s/it][A
Train Diffusion:  80%|████████  | 1605/2000 [48:55<11:24,  1.73s/it][A
Train Diffusion:  80%|████████  | 1606/2000 [48:56<11:21,  1.73s/it][A
Train Diffusion:  80%|████████  | 1607/2000 [48:58<11:19,  1.73s/it][A
Train Diffusion:  80%|████████  | 1608/2000 [49:00<11:16,  1.73s/it][A
Train Diffusion:  80%|████████  | 1609/2000 [49:02<11:14,  1.72s/it][A
Train Diffusion:  80%|████████  | 1610/2000 [49:03<11:14,  1.73s/it][A

Moving average ELBO loss at 1610 iterations is: -25495.95625. Best ELBO loss value is: -29323.798828125.

C_PATH mean = tensor([[38.1676,  0.1542,  0.7003],
        [38.0457,  0.1540,  0.7051]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4782e+01, 2.8036e-02, 2.4936e-01],
         [4.3648e+01, 2.0360e-10, 4.5505e-01],
         ...,
         [3.2953e+01, 1.3924e-01, 6.6633e-01],
         [3.2565e+01, 1.3230e-01, 7.4942e-01],
         [3.0567e+01, 9.8335e-02, 7.9948e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5683e+01, 1.0595e-03, 9.3287e-02],
         [4.5498e+01, 7.4858e-04, 9.4025e-02],
         ...,
         [3.0860e+01, 9.4125e-02, 9.0641e-01],
         [3.1796e+01, 6.7891e-02, 7.2646e-01],
         [3.3159e+01, 1.0901e-01, 6.1520e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  81%|████████  | 1611/2000 [49:05<11:11,  1.73s/it][A
Train Diffusion:  81%|████████  | 1612/2000 [49:07<11:09,  1.73s/it][A
Train Diffusion:  81%|████████  | 1613/2000 [49:08<11:07,  1.73s/it][A
Train Diffusion:  81%|████████  | 1614/2000 [49:10<11:06,  1.73s/it][A
Train Diffusion:  81%|████████  | 1615/2000 [49:12<11:04,  1.72s/it][A
Train Diffusion:  81%|████████  | 1616/2000 [49:14<11:02,  1.72s/it][A
Train Diffusion:  81%|████████  | 1617/2000 [49:15<11:02,  1.73s/it][A
Train Diffusion:  81%|████████  | 1618/2000 [49:17<11:07,  1.75s/it][A
Train Diffusion:  81%|████████  | 1619/2000 [49:19<11:07,  1.75s/it][A
Train Diffusion:  81%|████████  | 1620/2000 [49:21<11:02,  1.74s/it][A

Moving average ELBO loss at 1620 iterations is: -25173.08828125. Best ELBO loss value is: -29323.798828125.

C_PATH mean = tensor([[38.1034,  0.1564,  0.7167],
        [38.0766,  0.1575,  0.7053]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4643e+01, 1.9728e-04, 1.2702e-01],
         [4.4373e+01, 2.2253e-04, 1.1881e-01],
         ...,
         [3.1678e+01, 1.3731e-01, 7.9547e-01],
         [3.1590e+01, 1.2539e-01, 7.7521e-01],
         [3.0103e+01, 1.0190e-01, 6.6776e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5362e+01, 9.5694e-02, 1.9104e-01],
         [4.4395e+01, 4.9553e-10, 4.1077e-01],
         ...,
         [3.2160e+01, 1.0383e-01, 7.5505e-01],
         [3.2931e+01, 7.6010e-02, 6.2635e-01],
         [3.3681e+01, 1.0814e-01, 7.0561e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  81%|████████  | 1621/2000 [49:22<10:58,  1.74s/it][A
Train Diffusion:  81%|████████  | 1622/2000 [49:24<10:55,  1.73s/it][A
Train Diffusion:  81%|████████  | 1623/2000 [49:26<10:51,  1.73s/it][A
Train Diffusion:  81%|████████  | 1624/2000 [49:28<10:50,  1.73s/it][A
Train Diffusion:  81%|████████▏ | 1625/2000 [49:29<10:49,  1.73s/it][A
Train Diffusion:  81%|████████▏ | 1626/2000 [49:31<10:46,  1.73s/it][A
Train Diffusion:  81%|████████▏ | 1627/2000 [49:33<10:45,  1.73s/it][A
Train Diffusion:  81%|████████▏ | 1628/2000 [49:34<10:42,  1.73s/it][A
Train Diffusion:  81%|████████▏ | 1629/2000 [49:36<10:41,  1.73s/it][A
Train Diffusion:  82%|████████▏ | 1630/2000 [49:38<10:37,  1.72s/it][A

Moving average ELBO loss at 1630 iterations is: -25893.7259765625. Best ELBO loss value is: -29323.798828125.

C_PATH mean = tensor([[38.0865,  0.1514,  0.7181],
        [38.1766,  0.1509,  0.7305]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4724e+01, 3.4718e-04, 1.7640e-01],
         [4.3926e+01, 2.6640e-04, 3.8530e-01],
         ...,
         [3.1203e+01, 1.4188e-01, 7.4276e-01],
         [3.1960e+01, 7.5849e-02, 6.3521e-01],
         [3.0616e+01, 9.7053e-02, 7.5791e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4094e+01, 6.5897e-02, 1.2268e-01],
         [4.3707e+01, 3.5318e-10, 1.2106e-01],
         ...,
         [3.3039e+01, 8.3867e-02, 8.8173e-01],
         [3.2764e+01, 1.1538e-01, 8.5379e-01],
         [3.3410e+01, 9.1741e-02, 6.8211e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  82%|████████▏ | 1631/2000 [49:40<10:36,  1.73s/it][A
Train Diffusion:  82%|████████▏ | 1632/2000 [49:41<10:34,  1.72s/it][A
Train Diffusion:  82%|████████▏ | 1633/2000 [49:43<10:32,  1.72s/it][A
Train Diffusion:  82%|████████▏ | 1634/2000 [49:45<10:30,  1.72s/it][A
Train Diffusion:  82%|████████▏ | 1635/2000 [49:47<10:29,  1.72s/it][A
Train Diffusion:  82%|████████▏ | 1636/2000 [49:48<10:29,  1.73s/it][A
Train Diffusion:  82%|████████▏ | 1637/2000 [49:50<10:30,  1.74s/it][A
Train Diffusion:  82%|████████▏ | 1638/2000 [49:52<10:29,  1.74s/it][A
Train Diffusion:  82%|████████▏ | 1639/2000 [49:54<10:26,  1.73s/it][A
Train Diffusion:  82%|████████▏ | 1640/2000 [49:55<10:23,  1.73s/it][A

Moving average ELBO loss at 1640 iterations is: -25632.5728515625. Best ELBO loss value is: -29323.798828125.

C_PATH mean = tensor([[38.1563,  0.1498,  0.7090],
        [38.1530,  0.1498,  0.7022]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5242e+01, 2.2908e-04, 8.5684e-02],
         [4.4186e+01, 2.5452e-04, 8.8740e-02],
         ...,
         [3.2750e+01, 9.9873e-02, 6.1924e-01],
         [3.2526e+01, 7.0196e-02, 7.1681e-01],
         [3.0521e+01, 1.0232e-01, 7.5183e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4553e+01, 8.1337e-02, 2.3987e-01],
         [4.4189e+01, 4.7080e-10, 4.4671e-01],
         ...,
         [3.1475e+01, 1.3856e-01, 9.2599e-01],
         [3.2148e+01, 1.2370e-01, 7.2223e-01],
         [3.3347e+01, 9.2394e-02, 5.7017e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  82%|████████▏ | 1641/2000 [49:57<10:21,  1.73s/it][A
Train Diffusion:  82%|████████▏ | 1642/2000 [49:59<10:19,  1.73s/it][A
Train Diffusion:  82%|████████▏ | 1643/2000 [50:00<10:17,  1.73s/it][A
Train Diffusion:  82%|████████▏ | 1644/2000 [50:02<10:15,  1.73s/it][A
Train Diffusion:  82%|████████▏ | 1645/2000 [50:04<10:14,  1.73s/it][A
Train Diffusion:  82%|████████▏ | 1646/2000 [50:06<10:13,  1.73s/it][A
Train Diffusion:  82%|████████▏ | 1647/2000 [50:07<10:10,  1.73s/it][A
Train Diffusion:  82%|████████▏ | 1648/2000 [50:09<10:08,  1.73s/it][A
Train Diffusion:  82%|████████▏ | 1649/2000 [50:11<10:06,  1.73s/it][A
Train Diffusion:  82%|████████▎ | 1650/2000 [50:13<10:05,  1.73s/it][A

Moving average ELBO loss at 1650 iterations is: -26221.346875. Best ELBO loss value is: -29323.798828125.

C_PATH mean = tensor([[38.1582,  0.1515,  0.7020],
        [38.2297,  0.1477,  0.6948]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4949e+01, 9.9735e-02, 8.4449e-02],
         [4.3836e+01, 5.4517e-10, 8.4491e-02],
         ...,
         [3.3207e+01, 1.4355e-01, 6.9312e-01],
         [3.2782e+01, 6.9172e-02, 5.7444e-01],
         [3.3647e+01, 9.1882e-02, 4.9923e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5914e+01, 5.2448e-04, 2.5682e-01],
         [4.5592e+01, 5.5704e-04, 4.6636e-01],
         ...,
         [3.1009e+01, 8.1239e-02, 8.3140e-01],
         [3.1893e+01, 1.0930e-01, 8.8032e-01],
         [3.0809e+01, 8.5252e-02, 8.6785e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  83%|████████▎ | 1651/2000 [50:14<10:03,  1.73s/it][A
Train Diffusion:  83%|████████▎ | 1652/2000 [50:16<10:01,  1.73s/it][A
Train Diffusion:  83%|████████▎ | 1653/2000 [50:18<09:59,  1.73s/it][A
Train Diffusion:  83%|████████▎ | 1654/2000 [50:19<09:57,  1.73s/it][A
Train Diffusion:  83%|████████▎ | 1655/2000 [50:21<09:58,  1.74s/it][A
Train Diffusion:  83%|████████▎ | 1656/2000 [50:23<10:03,  1.76s/it][A
Train Diffusion:  83%|████████▎ | 1657/2000 [50:25<09:58,  1.75s/it][A
Train Diffusion:  83%|████████▎ | 1658/2000 [50:26<09:55,  1.74s/it][A
Train Diffusion:  83%|████████▎ | 1659/2000 [50:28<09:51,  1.73s/it][A
Train Diffusion:  83%|████████▎ | 1660/2000 [50:30<09:49,  1.73s/it][A

Moving average ELBO loss at 1660 iterations is: -26416.79375. Best ELBO loss value is: -29323.798828125.

C_PATH mean = tensor([[38.2489,  0.1537,  0.6886],
        [38.2024,  0.1553,  0.6690]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5864e+01, 4.7871e-02, 2.2245e-01],
         [4.4648e+01, 4.3714e-10, 4.2893e-01],
         ...,
         [3.2316e+01, 9.3267e-02, 7.0139e-01],
         [3.2023e+01, 7.6794e-02, 5.6052e-01],
         [3.2904e+01, 1.2326e-01, 4.9621e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4855e+01, 3.4320e-04, 1.1362e-01],
         [4.4589e+01, 5.6920e-04, 1.0875e-01],
         ...,
         [3.1995e+01, 1.4563e-01, 7.3116e-01],
         [3.2634e+01, 1.2526e-01, 8.1796e-01],
         [3.1021e+01, 9.7224e-02, 8.6574e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  83%|████████▎ | 1661/2000 [50:32<09:48,  1.73s/it][A
Train Diffusion:  83%|████████▎ | 1662/2000 [50:33<09:46,  1.74s/it][A
Train Diffusion:  83%|████████▎ | 1663/2000 [50:35<09:43,  1.73s/it][A
Train Diffusion:  83%|████████▎ | 1664/2000 [50:37<09:41,  1.73s/it][A
Train Diffusion:  83%|████████▎ | 1665/2000 [50:39<09:39,  1.73s/it][A
Train Diffusion:  83%|████████▎ | 1666/2000 [50:40<09:37,  1.73s/it][A
Train Diffusion:  83%|████████▎ | 1667/2000 [50:42<09:35,  1.73s/it][A
Train Diffusion:  83%|████████▎ | 1668/2000 [50:44<09:33,  1.73s/it][A
Train Diffusion:  83%|████████▎ | 1669/2000 [50:45<09:31,  1.73s/it][A
Train Diffusion:  84%|████████▎ | 1670/2000 [50:47<09:29,  1.73s/it][A

Moving average ELBO loss at 1670 iterations is: -25602.894921875. Best ELBO loss value is: -29323.798828125.

C_PATH mean = tensor([[38.2261,  0.1610,  0.6981],
        [38.2208,  0.1527,  0.6736]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4609e+01, 8.3169e-02, 2.0368e-01],
         [4.4186e+01, 1.6913e-03, 2.5393e-01],
         ...,
         [3.2431e+01, 1.2489e-01, 7.0646e-01],
         [3.2220e+01, 8.1766e-02, 5.7083e-01],
         [3.3375e+01, 1.1558e-01, 5.1288e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5091e+01, 6.0311e-04, 1.1410e-01],
         [4.4101e+01, 2.4448e-10, 2.4076e-01],
         ...,
         [3.2101e+01, 1.2118e-01, 7.6741e-01],
         [3.2802e+01, 1.1356e-01, 8.5224e-01],
         [3.1566e+01, 9.3289e-02, 8.7971e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  84%|████████▎ | 1671/2000 [50:49<09:31,  1.74s/it][A
Train Diffusion:  84%|████████▎ | 1672/2000 [50:51<09:28,  1.73s/it][A
Train Diffusion:  84%|████████▎ | 1673/2000 [50:52<09:26,  1.73s/it][A
Train Diffusion:  84%|████████▎ | 1674/2000 [50:54<09:28,  1.74s/it][A
Train Diffusion:  84%|████████▍ | 1675/2000 [50:56<09:27,  1.75s/it][A
Train Diffusion:  84%|████████▍ | 1676/2000 [50:58<09:24,  1.74s/it][A
Train Diffusion:  84%|████████▍ | 1677/2000 [50:59<09:20,  1.73s/it][A
Train Diffusion:  84%|████████▍ | 1678/2000 [51:01<09:16,  1.73s/it][A
Train Diffusion:  84%|████████▍ | 1679/2000 [51:03<09:15,  1.73s/it][A
Train Diffusion:  84%|████████▍ | 1680/2000 [51:05<09:13,  1.73s/it][A

Moving average ELBO loss at 1680 iterations is: -26137.9060546875. Best ELBO loss value is: -29323.798828125.

C_PATH mean = tensor([[38.2653,  0.1493,  0.7066],
        [38.2225,  0.1551,  0.6986]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4851e+01, 1.8979e-04, 1.1512e-01],
         [4.3781e+01, 4.5887e-04, 1.1605e-01],
         ...,
         [3.2382e+01, 1.4193e-01, 5.2294e-01],
         [3.2228e+01, 1.1995e-01, 7.3051e-01],
         [3.0438e+01, 9.3514e-02, 6.5128e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.6063e+01, 8.2149e-02, 2.1360e-01],
         [4.5388e+01, 4.9350e-10, 4.1879e-01],
         ...,
         [3.2365e+01, 9.0262e-02, 9.3775e-01],
         [3.2801e+01, 7.4217e-02, 7.4744e-01],
         [3.3741e+01, 1.1781e-01, 7.7286e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  84%|████████▍ | 1681/2000 [51:06<09:10,  1.73s/it][A
Train Diffusion:  84%|████████▍ | 1682/2000 [51:08<09:09,  1.73s/it][A
Train Diffusion:  84%|████████▍ | 1683/2000 [51:10<09:07,  1.73s/it][A
Train Diffusion:  84%|████████▍ | 1684/2000 [51:11<09:05,  1.73s/it][A
Train Diffusion:  84%|████████▍ | 1685/2000 [51:13<09:03,  1.72s/it][A
Train Diffusion:  84%|████████▍ | 1686/2000 [51:15<09:01,  1.73s/it][A
Train Diffusion:  84%|████████▍ | 1687/2000 [51:17<09:00,  1.73s/it][A
Train Diffusion:  84%|████████▍ | 1688/2000 [51:18<08:57,  1.72s/it][A
Train Diffusion:  84%|████████▍ | 1689/2000 [51:20<09:01,  1.74s/it][A
Train Diffusion:  84%|████████▍ | 1690/2000 [51:22<09:07,  1.77s/it][A

Moving average ELBO loss at 1690 iterations is: -26093.87890625. Best ELBO loss value is: -29323.798828125.

C_PATH mean = tensor([[38.2753,  0.1486,  0.7024],
        [38.2400,  0.1597,  0.7030]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4481e+01, 1.0975e-04, 1.7835e-01],
         [4.3550e+01, 1.5499e-10, 3.8469e-01],
         ...,
         [3.2785e+01, 1.4097e-01, 8.0110e-01],
         [3.2481e+01, 7.8403e-02, 6.7756e-01],
         [3.0526e+01, 9.9151e-02, 7.4008e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5471e+01, 5.5685e-02, 1.2310e-01],
         [4.5254e+01, 1.5930e-03, 1.1998e-01],
         ...,
         [3.2146e+01, 8.8685e-02, 7.7823e-01],
         [3.2730e+01, 1.1875e-01, 7.8394e-01],
         [3.3702e+01, 9.6693e-02, 6.6873e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  85%|████████▍ | 1691/2000 [51:24<09:16,  1.80s/it][A
Train Diffusion:  85%|████████▍ | 1692/2000 [51:26<09:21,  1.82s/it][A
Train Diffusion:  85%|████████▍ | 1693/2000 [51:28<09:24,  1.84s/it][A
Train Diffusion:  85%|████████▍ | 1694/2000 [51:29<09:26,  1.85s/it][A
Train Diffusion:  85%|████████▍ | 1695/2000 [51:31<09:25,  1.85s/it][A
Train Diffusion:  85%|████████▍ | 1696/2000 [51:33<09:18,  1.84s/it][A
Train Diffusion:  85%|████████▍ | 1697/2000 [51:35<09:12,  1.82s/it][A
Train Diffusion:  85%|████████▍ | 1698/2000 [51:37<09:08,  1.82s/it][A
Train Diffusion:  85%|████████▍ | 1699/2000 [51:38<09:03,  1.81s/it][A
Train Diffusion:  85%|████████▌ | 1700/2000 [51:40<08:55,  1.79s/it][A

Moving average ELBO loss at 1700 iterations is: -26729.534765625. Best ELBO loss value is: -29323.798828125.

C_PATH mean = tensor([[38.2934,  0.1486,  0.7126],
        [38.2833,  0.1555,  0.6949]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5156e+01, 9.9228e-04, 1.3950e-01],
         [4.4879e+01, 1.4716e-03, 1.2569e-01],
         ...,
         [3.2561e+01, 1.0243e-01, 6.2859e-01],
         [3.3184e+01, 1.2390e-01, 5.3654e-01],
         [3.4345e+01, 9.6145e-02, 5.1733e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4393e+01, 3.3824e-02, 1.6178e-01],
         [4.3192e+01, 1.7590e-10, 3.0359e-01],
         ...,
         [3.2653e+01, 1.2952e-01, 9.5791e-01],
         [3.2569e+01, 7.5457e-02, 9.1595e-01],
         [3.1412e+01, 9.8931e-02, 8.6976e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  85%|████████▌ | 1701/2000 [51:42<08:49,  1.77s/it][A
Train Diffusion:  85%|████████▌ | 1702/2000 [51:44<08:45,  1.76s/it][A
Train Diffusion:  85%|████████▌ | 1703/2000 [51:45<08:40,  1.75s/it][A
Train Diffusion:  85%|████████▌ | 1704/2000 [51:47<08:37,  1.75s/it][A
Train Diffusion:  85%|████████▌ | 1705/2000 [51:49<08:33,  1.74s/it][A
Train Diffusion:  85%|████████▌ | 1706/2000 [51:51<08:30,  1.74s/it][A
Train Diffusion:  85%|████████▌ | 1707/2000 [51:52<08:27,  1.73s/it][A
Train Diffusion:  85%|████████▌ | 1708/2000 [51:54<08:26,  1.73s/it][A
Train Diffusion:  85%|████████▌ | 1709/2000 [51:56<08:23,  1.73s/it][A
Train Diffusion:  86%|████████▌ | 1710/2000 [51:58<08:21,  1.73s/it][A

Moving average ELBO loss at 1710 iterations is: -26453.946484375. Best ELBO loss value is: -29323.798828125.

C_PATH mean = tensor([[38.3165,  0.1463,  0.6976],
        [38.3225,  0.1504,  0.7030]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5522e+01, 3.1552e-03, 1.1493e-01],
         [4.4134e+01, 5.2509e-10, 2.4021e-01],
         ...,
         [3.2504e+01, 1.4397e-01, 6.1584e-01],
         [3.3119e+01, 1.2446e-01, 7.8179e-01],
         [3.4610e+01, 9.3803e-02, 8.2344e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4430e+01, 6.6023e-02, 2.1461e-01],
         [4.4137e+01, 9.9750e-04, 2.5213e-01],
         ...,
         [3.2943e+01, 8.6349e-02, 8.9026e-01],
         [3.2749e+01, 6.8462e-02, 6.5714e-01],
         [3.1441e+01, 1.1568e-01, 5.5999e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  86%|████████▌ | 1711/2000 [51:59<08:23,  1.74s/it][A
Train Diffusion:  86%|████████▌ | 1712/2000 [52:01<08:22,  1.74s/it][A
Train Diffusion:  86%|████████▌ | 1713/2000 [52:03<08:19,  1.74s/it][A
Train Diffusion:  86%|████████▌ | 1714/2000 [52:05<08:16,  1.73s/it][A
Train Diffusion:  86%|████████▌ | 1715/2000 [52:06<08:13,  1.73s/it][A
Train Diffusion:  86%|████████▌ | 1716/2000 [52:08<08:15,  1.74s/it][A
Train Diffusion:  86%|████████▌ | 1717/2000 [52:10<08:13,  1.74s/it][A
Train Diffusion:  86%|████████▌ | 1718/2000 [52:11<08:09,  1.74s/it][A
Train Diffusion:  86%|████████▌ | 1719/2000 [52:13<08:06,  1.73s/it][A
Train Diffusion:  86%|████████▌ | 1720/2000 [52:15<08:03,  1.73s/it][A

Moving average ELBO loss at 1720 iterations is: -25963.587109375. Best ELBO loss value is: -29323.798828125.

C_PATH mean = tensor([[38.3244,  0.1440,  0.7000],
        [38.4624,  0.1443,  0.7057]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.3978e+01, 1.4190e-03, 2.5334e-01],
         [4.3785e+01, 1.5810e-03, 2.7539e-01],
         ...,
         [3.2557e+01, 9.9718e-02, 9.4355e-01],
         [3.2950e+01, 1.1757e-01, 9.4961e-01],
         [3.3639e+01, 8.5574e-02, 9.0246e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5098e+01, 4.3889e-02, 9.0632e-02],
         [4.4029e+01, 2.2573e-10, 1.8798e-01],
         ...,
         [3.3241e+01, 1.1727e-01, 5.6843e-01],
         [3.2828e+01, 7.0775e-02, 5.1596e-01],
         [3.1241e+01, 1.0810e-01, 5.0694e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  86%|████████▌ | 1721/2000 [52:17<08:01,  1.73s/it][A
Train Diffusion:  86%|████████▌ | 1722/2000 [52:18<07:59,  1.72s/it][A
Train Diffusion:  86%|████████▌ | 1723/2000 [52:20<07:57,  1.72s/it][A
Train Diffusion:  86%|████████▌ | 1724/2000 [52:22<07:57,  1.73s/it][A
Train Diffusion:  86%|████████▋ | 1725/2000 [52:24<07:55,  1.73s/it][A
Train Diffusion:  86%|████████▋ | 1726/2000 [52:25<07:53,  1.73s/it][A
Train Diffusion:  86%|████████▋ | 1727/2000 [52:27<07:53,  1.74s/it][A
Train Diffusion:  86%|████████▋ | 1728/2000 [52:29<07:51,  1.73s/it][A
Train Diffusion:  86%|████████▋ | 1729/2000 [52:31<07:53,  1.75s/it][A
Train Diffusion:  86%|████████▋ | 1730/2000 [52:32<07:52,  1.75s/it][A

Moving average ELBO loss at 1730 iterations is: -25243.23671875. Best ELBO loss value is: -29323.798828125.

C_PATH mean = tensor([[38.4098,  0.1623,  0.6544],
        [38.4294,  0.1537,  0.6731]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5335e+01, 9.9777e-02, 1.2429e-01],
         [4.5284e+01, 3.6625e-10, 2.5432e-01],
         ...,
         [3.3951e+01, 1.4529e-01, 8.7284e-01],
         [3.3190e+01, 1.2313e-01, 9.2543e-01],
         [3.1508e+01, 9.7402e-02, 9.0756e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4192e+01, 3.3901e-03, 2.1748e-01],
         [4.3615e+01, 2.9994e-03, 2.4688e-01],
         ...,
         [3.1814e+01, 9.6987e-02, 6.0158e-01],
         [3.2488e+01, 7.8377e-02, 5.5183e-01],
         [3.3757e+01, 1.2372e-01, 5.6633e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  87%|████████▋ | 1731/2000 [52:34<07:51,  1.75s/it][A
Train Diffusion:  87%|████████▋ | 1732/2000 [52:36<07:47,  1.74s/it][A
Train Diffusion:  87%|████████▋ | 1733/2000 [52:37<07:43,  1.74s/it][A
Train Diffusion:  87%|████████▋ | 1734/2000 [52:39<07:40,  1.73s/it][A
Train Diffusion:  87%|████████▋ | 1735/2000 [52:41<07:37,  1.73s/it][A
Train Diffusion:  87%|████████▋ | 1736/2000 [52:43<07:36,  1.73s/it][A
Train Diffusion:  87%|████████▋ | 1737/2000 [52:44<07:33,  1.73s/it][A
Train Diffusion:  87%|████████▋ | 1738/2000 [52:46<07:32,  1.73s/it][A
Train Diffusion:  87%|████████▋ | 1739/2000 [52:48<07:30,  1.73s/it][A
Train Diffusion:  87%|████████▋ | 1740/2000 [52:50<07:27,  1.72s/it][A

Moving average ELBO loss at 1740 iterations is: -27223.2310546875. Best ELBO loss value is: -29323.798828125.

C_PATH mean = tensor([[38.3687,  0.1594,  0.6864],
        [38.3797,  0.1556,  0.6906]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5912e+01, 5.3762e-02, 9.3361e-02],
         [4.4468e+01, 2.5022e-10, 8.6818e-02],
         ...,
         [3.3201e+01, 1.2766e-01, 7.5667e-01],
         [3.2751e+01, 7.7520e-02, 8.4956e-01],
         [3.1420e+01, 1.0583e-01, 8.6213e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.7093e+01, 1.5616e-03, 2.6231e-01],
         [4.6237e+01, 2.9230e-03, 4.4205e-01],
         ...,
         [3.2391e+01, 9.6640e-02, 7.3616e-01],
         [3.2881e+01, 1.2464e-01, 6.2860e-01],
         [3.3803e+01, 9.3521e-02, 5.6623e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  87%|████████▋ | 1741/2000 [52:51<07:26,  1.72s/it][A
Train Diffusion:  87%|████████▋ | 1742/2000 [52:53<07:23,  1.72s/it][A
Train Diffusion:  87%|████████▋ | 1743/2000 [52:55<07:22,  1.72s/it][A
Train Diffusion:  87%|████████▋ | 1744/2000 [52:56<07:20,  1.72s/it][A
Train Diffusion:  87%|████████▋ | 1745/2000 [52:58<07:18,  1.72s/it][A
Train Diffusion:  87%|████████▋ | 1746/2000 [53:00<07:38,  1.80s/it][A
Train Diffusion:  87%|████████▋ | 1747/2000 [53:02<07:40,  1.82s/it][A
Train Diffusion:  87%|████████▋ | 1748/2000 [53:04<07:34,  1.81s/it][A
Train Diffusion:  87%|████████▋ | 1749/2000 [53:06<08:20,  1.99s/it][A
Train Diffusion:  88%|████████▊ | 1750/2000 [53:08<08:18,  1.99s/it][A

Moving average ELBO loss at 1750 iterations is: -28243.67578125. Best ELBO loss value is: -29323.798828125.

C_PATH mean = tensor([[38.4812,  0.1529,  0.7132],
        [38.3585,  0.1531,  0.6739]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4029e+01, 1.4645e-03, 1.7388e-01],
         [4.4191e+01, 3.4755e-10, 3.4087e-01],
         ...,
         [3.2744e+01, 1.2286e-01, 4.5311e-01],
         [3.2457e+01, 7.2864e-02, 4.5595e-01],
         [3.3580e+01, 1.0189e-01, 7.1974e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.3382e+01, 8.7781e-02, 1.4218e-01],
         [4.2713e+01, 9.3476e-04, 1.3388e-01],
         ...,
         [3.3276e+01, 9.7732e-02, 1.0111e+00],
         [3.3655e+01, 1.3187e-01, 9.6754e-01],
         [3.1935e+01, 9.8804e-02, 8.0087e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  88%|████████▊ | 1751/2000 [53:10<08:12,  1.98s/it][A
Train Diffusion:  88%|████████▊ | 1752/2000 [53:12<08:13,  1.99s/it][A
Train Diffusion:  88%|████████▊ | 1753/2000 [53:14<07:58,  1.94s/it][A
Train Diffusion:  88%|████████▊ | 1754/2000 [53:16<07:49,  1.91s/it][A
Train Diffusion:  88%|████████▊ | 1755/2000 [53:18<07:37,  1.87s/it][A
Train Diffusion:  88%|████████▊ | 1756/2000 [53:19<07:28,  1.84s/it][A
Train Diffusion:  88%|████████▊ | 1757/2000 [53:21<07:29,  1.85s/it][A
Train Diffusion:  88%|████████▊ | 1758/2000 [53:23<07:44,  1.92s/it][A
Train Diffusion:  88%|████████▊ | 1759/2000 [53:25<07:46,  1.93s/it][A
Train Diffusion:  88%|████████▊ | 1760/2000 [53:27<07:50,  1.96s/it][A

Moving average ELBO loss at 1760 iterations is: -27065.923828125. Best ELBO loss value is: -29323.798828125.

C_PATH mean = tensor([[38.3884,  0.1474,  0.7172],
        [38.4496,  0.1501,  0.7062]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5218e+01, 1.6889e-03, 2.1957e-01],
         [4.3919e+01, 3.7821e-10, 4.3329e-01],
         ...,
         [3.3606e+01, 1.0016e-01, 5.2206e-01],
         [3.3208e+01, 7.7403e-02, 4.9100e-01],
         [3.4039e+01, 1.2145e-01, 7.0244e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.6521e+01, 8.2583e-02, 1.1326e-01],
         [4.5587e+01, 1.3707e-03, 1.1583e-01],
         ...,
         [3.2372e+01, 1.3852e-01, 7.8998e-01],
         [3.2798e+01, 1.1514e-01, 8.0352e-01],
         [3.1814e+01, 8.8320e-02, 6.8164e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  88%|████████▊ | 1761/2000 [53:29<07:49,  1.96s/it][A
Train Diffusion:  88%|████████▊ | 1762/2000 [53:31<08:04,  2.04s/it][A
Train Diffusion:  88%|████████▊ | 1763/2000 [53:33<07:47,  1.97s/it][A
Train Diffusion:  88%|████████▊ | 1764/2000 [53:35<07:33,  1.92s/it][A
Train Diffusion:  88%|████████▊ | 1765/2000 [53:37<07:27,  1.90s/it][A
Train Diffusion:  88%|████████▊ | 1766/2000 [53:39<07:32,  1.93s/it][A
Train Diffusion:  88%|████████▊ | 1767/2000 [53:41<07:27,  1.92s/it][A
Train Diffusion:  88%|████████▊ | 1768/2000 [53:43<07:33,  1.95s/it][A
Train Diffusion:  88%|████████▊ | 1769/2000 [53:45<07:19,  1.90s/it][A
Train Diffusion:  88%|████████▊ | 1770/2000 [53:47<07:16,  1.90s/it][A

Moving average ELBO loss at 1770 iterations is: -28331.3119140625. Best ELBO loss value is: -29323.798828125.

C_PATH mean = tensor([[38.5077,  0.1543,  0.6938],
        [38.3792,  0.1481,  0.6958]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4682e+01, 5.8927e-02, 1.1461e-01],
         [4.4780e+01, 2.1628e-10, 1.2040e-01],
         ...,
         [3.4118e+01, 1.0413e-01, 5.7191e-01],
         [3.3593e+01, 1.1219e-01, 5.2391e-01],
         [3.1874e+01, 8.5317e-02, 5.1850e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.3925e+01, 2.1524e-03, 1.9744e-01],
         [4.3183e+01, 2.1258e-03, 4.0192e-01],
         ...,
         [3.2050e+01, 1.2989e-01, 9.3858e-01],
         [3.2733e+01, 7.2786e-02, 9.1958e-01],
         [3.4050e+01, 1.0286e-01, 8.7745e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  89%|████████▊ | 1771/2000 [53:49<07:34,  1.98s/it][A
Train Diffusion:  89%|████████▊ | 1772/2000 [53:51<07:18,  1.92s/it][A
Train Diffusion:  89%|████████▊ | 1773/2000 [53:52<07:07,  1.88s/it][A
Train Diffusion:  89%|████████▊ | 1774/2000 [53:54<06:58,  1.85s/it][A
Train Diffusion:  89%|████████▉ | 1775/2000 [53:56<07:05,  1.89s/it][A
Train Diffusion:  89%|████████▉ | 1776/2000 [53:58<07:31,  2.01s/it][A
Train Diffusion:  89%|████████▉ | 1777/2000 [54:00<07:20,  1.98s/it][A
Train Diffusion:  89%|████████▉ | 1778/2000 [54:02<07:23,  2.00s/it][A
Train Diffusion:  89%|████████▉ | 1779/2000 [54:04<07:22,  2.00s/it][A
Train Diffusion:  89%|████████▉ | 1780/2000 [54:06<07:16,  1.98s/it][A

Moving average ELBO loss at 1780 iterations is: -28535.697265625. Best ELBO loss value is: -29716.169921875.

C_PATH mean = tensor([[38.4727,  0.1446,  0.7172],
        [38.4289,  0.1540,  0.6999]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4781e+01, 1.1768e-03, 2.5964e-01],
         [4.4528e+01, 2.7755e-10, 4.6299e-01],
         ...,
         [3.3875e+01, 1.2393e-01, 7.3374e-01],
         [3.3366e+01, 1.2495e-01, 8.0551e-01],
         [3.4166e+01, 9.4648e-02, 8.1375e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5682e+01, 6.4128e-02, 8.5387e-02],
         [4.4350e+01, 1.3052e-03, 8.9716e-02],
         ...,
         [3.2343e+01, 1.0698e-01, 8.2607e-01],
         [3.3034e+01, 7.0476e-02, 6.8292e-01],
         [3.1976e+01, 1.0298e-01, 5.9745e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  89%|████████▉ | 1781/2000 [54:08<07:13,  1.98s/it][A
Train Diffusion:  89%|████████▉ | 1782/2000 [54:10<07:16,  2.00s/it][A
Train Diffusion:  89%|████████▉ | 1783/2000 [54:13<07:39,  2.12s/it][A
Train Diffusion:  89%|████████▉ | 1784/2000 [54:15<07:46,  2.16s/it][A
Train Diffusion:  89%|████████▉ | 1785/2000 [54:17<07:24,  2.07s/it][A
Train Diffusion:  89%|████████▉ | 1786/2000 [54:19<07:08,  2.00s/it][A
Train Diffusion:  89%|████████▉ | 1787/2000 [54:21<07:11,  2.03s/it][A
Train Diffusion:  89%|████████▉ | 1788/2000 [54:26<10:16,  2.91s/it][A
Train Diffusion:  89%|████████▉ | 1789/2000 [54:32<14:07,  4.02s/it][A
Train Diffusion:  90%|████████▉ | 1790/2000 [54:36<14:11,  4.06s/it][A

Moving average ELBO loss at 1790 iterations is: -27823.091015625. Best ELBO loss value is: -29716.169921875.

C_PATH mean = tensor([[38.4452,  0.1526,  0.6978],
        [38.4902,  0.1482,  0.6946]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5452e+01, 1.0176e-01, 2.5878e-01],
         [4.4343e+01, 2.5300e-10, 2.6714e-01],
         ...,
         [3.2662e+01, 1.2218e-01, 5.6800e-01],
         [3.3205e+01, 1.2188e-01, 5.2638e-01],
         [3.2282e+01, 9.1448e-02, 7.7789e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4591e+01, 1.9371e-03, 8.8355e-02],
         [4.4468e+01, 2.4858e-03, 1.9524e-01],
         ...,
         [3.3635e+01, 1.0607e-01, 9.4801e-01],
         [3.3377e+01, 7.0729e-02, 9.0766e-01],
         [3.4328e+01, 1.0856e-01, 7.2299e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  90%|████████▉ | 1791/2000 [54:39<12:46,  3.67s/it][A
Train Diffusion:  90%|████████▉ | 1792/2000 [54:42<11:34,  3.34s/it][A
Train Diffusion:  90%|████████▉ | 1793/2000 [54:44<10:17,  2.98s/it][A
Train Diffusion:  90%|████████▉ | 1794/2000 [54:46<09:28,  2.76s/it][A
Train Diffusion:  90%|████████▉ | 1795/2000 [54:48<08:47,  2.57s/it][A
Train Diffusion:  90%|████████▉ | 1796/2000 [54:50<08:18,  2.44s/it][A
Train Diffusion:  90%|████████▉ | 1797/2000 [54:53<07:53,  2.33s/it][A
Train Diffusion:  90%|████████▉ | 1798/2000 [54:55<07:32,  2.24s/it][A
Train Diffusion:  90%|████████▉ | 1799/2000 [54:57<07:18,  2.18s/it][A
Train Diffusion:  90%|█████████ | 1800/2000 [54:59<07:08,  2.14s/it][A

Moving average ELBO loss at 1800 iterations is: -26004.806640625. Best ELBO loss value is: -29716.169921875.

C_PATH mean = tensor([[38.4242,  0.1514,  0.7080],
        [38.5298,  0.1516,  0.7003]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5157e+01, 7.7305e-02, 1.2998e-01],
         [4.4176e+01, 1.5197e-03, 1.2363e-01],
         ...,
         [3.2903e+01, 1.0964e-01, 7.3908e-01],
         [3.3394e+01, 1.3328e-01, 8.3360e-01],
         [3.2062e+01, 9.9300e-02, 7.2612e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4504e+01, 1.8238e-03, 1.8185e-01],
         [4.4236e+01, 3.0595e-10, 3.8802e-01],
         ...,
         [3.3577e+01, 1.2225e-01, 8.1126e-01],
         [3.3327e+01, 7.3203e-02, 5.9408e-01],
         [3.4370e+01, 1.0170e-01, 6.8340e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  90%|█████████ | 1801/2000 [55:01<07:12,  2.17s/it][A
Train Diffusion:  90%|█████████ | 1802/2000 [55:03<06:55,  2.10s/it][A
Train Diffusion:  90%|█████████ | 1803/2000 [55:05<06:40,  2.03s/it][A
Train Diffusion:  90%|█████████ | 1804/2000 [55:07<06:29,  1.99s/it][A
Train Diffusion:  90%|█████████ | 1805/2000 [55:09<06:36,  2.03s/it][A
Train Diffusion:  90%|█████████ | 1806/2000 [55:11<06:49,  2.11s/it][A
Train Diffusion:  90%|█████████ | 1807/2000 [55:13<06:48,  2.11s/it][A
Train Diffusion:  90%|█████████ | 1808/2000 [55:15<06:33,  2.05s/it][A
Train Diffusion:  90%|█████████ | 1809/2000 [55:17<06:26,  2.02s/it][A
Train Diffusion:  90%|█████████ | 1810/2000 [55:19<06:34,  2.08s/it][A

Moving average ELBO loss at 1810 iterations is: -27224.858984375. Best ELBO loss value is: -30623.275390625.

C_PATH mean = tensor([[38.5335,  0.1512,  0.6906],
        [38.4612,  0.1548,  0.6899]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5726e+01, 9.3028e-02, 1.3259e-01],
         [4.5447e+01, 2.3864e-10, 1.2928e-01],
         ...,
         [3.3404e+01, 9.9014e-02, 4.3249e-01],
         [3.3860e+01, 1.3595e-01, 4.5680e-01],
         [3.2609e+01, 1.0111e-01, 7.1916e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4948e+01, 1.2403e-03, 1.8760e-01],
         [4.3769e+01, 2.3179e-03, 3.9338e-01],
         ...,
         [3.3075e+01, 1.4533e-01, 1.0333e+00],
         [3.2853e+01, 7.7013e-02, 9.5722e-01],
         [3.3890e+01, 1.0206e-01, 7.2717e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  91%|█████████ | 1811/2000 [55:21<06:31,  2.07s/it][A
Train Diffusion:  91%|█████████ | 1812/2000 [55:23<06:32,  2.09s/it][A
Train Diffusion:  91%|█████████ | 1813/2000 [55:25<06:27,  2.07s/it][A
Train Diffusion:  91%|█████████ | 1814/2000 [55:27<06:17,  2.03s/it][A
Train Diffusion:  91%|█████████ | 1815/2000 [55:30<06:27,  2.10s/it][A
Train Diffusion:  91%|█████████ | 1816/2000 [55:32<06:58,  2.28s/it][A
Train Diffusion:  91%|█████████ | 1817/2000 [55:36<08:10,  2.68s/it][A
Train Diffusion:  91%|█████████ | 1818/2000 [55:39<08:15,  2.72s/it][A
Train Diffusion:  91%|█████████ | 1819/2000 [55:41<07:44,  2.57s/it][A
Train Diffusion:  91%|█████████ | 1820/2000 [55:43<07:27,  2.49s/it][A

Moving average ELBO loss at 1820 iterations is: -28747.1041015625. Best ELBO loss value is: -30623.275390625.

C_PATH mean = tensor([[38.5141,  0.1478,  0.7122],
        [38.4896,  0.1507,  0.6970]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4547e+01, 1.3515e-03, 2.5253e-01],
         [4.4308e+01, 2.1214e-03, 2.7463e-01],
         ...,
         [3.3429e+01, 1.2545e-01, 4.9675e-01],
         [3.3204e+01, 1.1747e-01, 4.2228e-01],
         [3.4241e+01, 8.8218e-02, 6.3281e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5246e+01, 1.0702e-01, 1.0039e-01],
         [4.4289e+01, 2.1579e-10, 1.9363e-01],
         ...,
         [3.3194e+01, 1.2927e-01, 8.5005e-01],
         [3.3726e+01, 7.6031e-02, 8.9009e-01],
         [3.2632e+01, 1.1013e-01, 7.0974e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  91%|█████████ | 1821/2000 [55:45<07:05,  2.38s/it][A
Train Diffusion:  91%|█████████ | 1822/2000 [55:48<07:17,  2.46s/it][A
Train Diffusion:  91%|█████████ | 1823/2000 [55:50<06:58,  2.36s/it][A
Train Diffusion:  91%|█████████ | 1824/2000 [55:52<06:42,  2.29s/it][A
Train Diffusion:  91%|█████████▏| 1825/2000 [55:54<06:24,  2.20s/it][A
Train Diffusion:  91%|█████████▏| 1826/2000 [55:56<06:11,  2.14s/it][A
Train Diffusion:  91%|█████████▏| 1827/2000 [55:58<06:04,  2.10s/it][A
Train Diffusion:  91%|█████████▏| 1828/2000 [56:00<05:51,  2.04s/it][A
Train Diffusion:  91%|█████████▏| 1829/2000 [56:03<06:15,  2.20s/it][A
Train Diffusion:  92%|█████████▏| 1830/2000 [56:05<06:08,  2.17s/it][A

Moving average ELBO loss at 1830 iterations is: -28446.65625. Best ELBO loss value is: -30623.275390625.

C_PATH mean = tensor([[38.5204,  0.1618,  0.7066],
        [38.4963,  0.1471,  0.7069]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4459e+01, 1.7923e-03, 1.2893e-01],
         [4.4321e+01, 2.4405e-03, 2.6458e-01],
         ...,
         [3.2405e+01, 9.7279e-02, 6.1828e-01],
         [3.3002e+01, 1.3116e-01, 5.3867e-01],
         [3.4393e+01, 9.7000e-02, 4.9596e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5224e+01, 5.2348e-02, 1.8109e-01],
         [4.4334e+01, 1.7607e-10, 2.3239e-01],
         ...,
         [3.4255e+01, 1.4368e-01, 9.3558e-01],
         [3.3851e+01, 7.5509e-02, 9.1406e-01],
         [3.2221e+01, 9.8376e-02, 8.6302e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  92%|█████████▏| 1831/2000 [56:07<05:52,  2.09s/it][A
Train Diffusion:  92%|█████████▏| 1832/2000 [56:09<05:41,  2.03s/it][A
Train Diffusion:  92%|█████████▏| 1833/2000 [56:11<05:41,  2.05s/it][A
Train Diffusion:  92%|█████████▏| 1834/2000 [56:13<05:46,  2.08s/it][A
Train Diffusion:  92%|█████████▏| 1835/2000 [56:15<05:47,  2.11s/it][A
Train Diffusion:  92%|█████████▏| 1836/2000 [56:17<05:47,  2.12s/it][A
Train Diffusion:  92%|█████████▏| 1837/2000 [56:20<06:01,  2.22s/it][A
Train Diffusion:  92%|█████████▏| 1838/2000 [56:22<05:49,  2.16s/it][A
Train Diffusion:  92%|█████████▏| 1839/2000 [56:24<05:47,  2.16s/it][A
Train Diffusion:  92%|█████████▏| 1840/2000 [56:26<05:54,  2.22s/it][A

Moving average ELBO loss at 1840 iterations is: -28450.678515625. Best ELBO loss value is: -30623.275390625.

C_PATH mean = tensor([[38.4553,  0.1488,  0.6943],
        [38.5827,  0.1550,  0.6962]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4372e+01, 1.4308e-03, 1.4079e-01],
         [4.3299e+01, 2.0446e-03, 2.5770e-01],
         ...,
         [3.2378e+01, 1.3262e-01, 7.9912e-01],
         [3.3111e+01, 1.3190e-01, 6.0759e-01],
         [3.4458e+01, 1.0171e-01, 5.2461e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5002e+01, 4.0726e-02, 1.7694e-01],
         [4.4987e+01, 1.5641e-10, 2.1786e-01],
         ...,
         [3.4445e+01, 1.1609e-01, 6.6833e-01],
         [3.3988e+01, 6.9486e-02, 7.7986e-01],
         [3.2273e+01, 9.9721e-02, 8.7587e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  92%|█████████▏| 1841/2000 [56:28<05:40,  2.14s/it][A
Train Diffusion:  92%|█████████▏| 1842/2000 [56:30<05:23,  2.05s/it][A
Train Diffusion:  92%|█████████▏| 1843/2000 [56:32<05:11,  1.99s/it][A
Train Diffusion:  92%|█████████▏| 1844/2000 [56:34<05:04,  1.95s/it][A
Train Diffusion:  92%|█████████▏| 1845/2000 [56:36<05:13,  2.02s/it][A
Train Diffusion:  92%|█████████▏| 1846/2000 [56:38<05:06,  1.99s/it][A
Train Diffusion:  92%|█████████▏| 1847/2000 [56:40<04:57,  1.94s/it][A
Train Diffusion:  92%|█████████▏| 1848/2000 [56:42<05:13,  2.06s/it][A
Train Diffusion:  92%|█████████▏| 1849/2000 [56:44<05:12,  2.07s/it][A
Train Diffusion:  92%|█████████▎| 1850/2000 [56:47<05:44,  2.30s/it][A

Moving average ELBO loss at 1850 iterations is: -28138.8787109375. Best ELBO loss value is: -30623.275390625.

C_PATH mean = tensor([[38.4547,  0.1468,  0.7309],
        [38.6187,  0.1494,  0.6905]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4775e+01, 6.8274e-02, 2.6104e-01],
         [4.3897e+01, 1.7343e-10, 2.7627e-01],
         ...,
         [3.3632e+01, 1.4161e-01, 8.8313e-01],
         [3.3409e+01, 1.2888e-01, 6.5642e-01],
         [3.2204e+01, 9.5048e-02, 5.9240e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.3999e+01, 1.9563e-03, 8.8876e-02],
         [4.3758e+01, 2.5403e-03, 1.9487e-01],
         ...,
         [3.3284e+01, 1.0571e-01, 7.3581e-01],
         [3.3724e+01, 7.1631e-02, 8.5364e-01],
         [3.4578e+01, 1.0442e-01, 8.4530e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  93%|█████████▎| 1851/2000 [56:49<05:46,  2.32s/it][A
Train Diffusion:  93%|█████████▎| 1852/2000 [56:52<05:44,  2.33s/it][A
Train Diffusion:  93%|█████████▎| 1853/2000 [56:54<05:38,  2.30s/it][A
Train Diffusion:  93%|█████████▎| 1854/2000 [56:56<05:20,  2.19s/it][A
Train Diffusion:  93%|█████████▎| 1855/2000 [56:58<05:07,  2.12s/it][A
Train Diffusion:  93%|█████████▎| 1856/2000 [57:00<04:56,  2.06s/it][A
Train Diffusion:  93%|█████████▎| 1857/2000 [57:02<04:47,  2.01s/it][A
Train Diffusion:  93%|█████████▎| 1858/2000 [57:04<05:03,  2.14s/it][A
Train Diffusion:  93%|█████████▎| 1859/2000 [57:06<04:48,  2.05s/it][A
Train Diffusion:  93%|█████████▎| 1860/2000 [57:08<04:36,  1.97s/it][A

Moving average ELBO loss at 1860 iterations is: -27479.055078125. Best ELBO loss value is: -30623.275390625.

C_PATH mean = tensor([[38.5856,  0.1528,  0.7084],
        [38.5236,  0.1554,  0.6940]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4484e+01, 1.1927e-01, 1.1428e-01],
         [4.4256e+01, 3.3560e-10, 1.1669e-01],
         ...,
         [3.3835e+01, 1.4036e-01, 8.4250e-01],
         [3.3483e+01, 1.2012e-01, 6.6993e-01],
         [3.4188e+01, 9.0747e-02, 5.9111e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5479e+01, 7.1957e-04, 2.1090e-01],
         [4.4345e+01, 1.3833e-03, 4.2226e-01],
         ...,
         [3.2945e+01, 9.3399e-02, 6.4282e-01],
         [3.3361e+01, 7.2607e-02, 7.9554e-01],
         [3.2254e+01, 1.1506e-01, 8.2453e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  93%|█████████▎| 1861/2000 [57:10<04:35,  1.98s/it][A
Train Diffusion:  93%|█████████▎| 1862/2000 [57:11<04:26,  1.93s/it][A
Train Diffusion:  93%|█████████▎| 1863/2000 [57:13<04:19,  1.90s/it][A
Train Diffusion:  93%|█████████▎| 1864/2000 [57:15<04:16,  1.89s/it][A
Train Diffusion:  93%|█████████▎| 1865/2000 [57:17<04:26,  1.98s/it][A
Train Diffusion:  93%|█████████▎| 1866/2000 [57:19<04:18,  1.93s/it][A
Train Diffusion:  93%|█████████▎| 1867/2000 [57:21<04:11,  1.89s/it][A
Train Diffusion:  93%|█████████▎| 1868/2000 [57:23<04:06,  1.87s/it][A
Train Diffusion:  93%|█████████▎| 1869/2000 [57:25<04:27,  2.04s/it][A
Train Diffusion:  94%|█████████▎| 1870/2000 [57:27<04:28,  2.07s/it][A

Moving average ELBO loss at 1870 iterations is: -26212.2115234375. Best ELBO loss value is: -30623.275390625.

C_PATH mean = tensor([[38.5929,  0.1572,  0.6974],
        [38.5370,  0.1598,  0.6787]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4918e+01, 4.3168e-02, 1.7681e-01],
         [4.4827e+01, 1.7567e-10, 3.5062e-01],
         ...,
         [3.4132e+01, 1.4068e-01, 7.7254e-01],
         [3.3521e+01, 7.4416e-02, 6.4767e-01],
         [3.1873e+01, 1.0458e-01, 6.1858e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5998e+01, 1.3823e-03, 1.3293e-01],
         [4.4729e+01, 3.4695e-03, 1.3383e-01],
         ...,
         [3.2587e+01, 9.9493e-02, 6.5466e-01],
         [3.3154e+01, 1.3017e-01, 8.1522e-01],
         [3.4273e+01, 9.8043e-02, 8.3589e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  94%|█████████▎| 1871/2000 [57:30<04:41,  2.18s/it][A
Train Diffusion:  94%|█████████▎| 1872/2000 [57:32<04:34,  2.14s/it][A
Train Diffusion:  94%|█████████▎| 1873/2000 [57:34<04:26,  2.10s/it][A
Train Diffusion:  94%|█████████▎| 1874/2000 [57:36<04:23,  2.09s/it][A
Train Diffusion:  94%|█████████▍| 1875/2000 [57:38<04:18,  2.07s/it][A
Train Diffusion:  94%|█████████▍| 1876/2000 [57:40<04:10,  2.02s/it][A
Train Diffusion:  94%|█████████▍| 1877/2000 [57:42<04:10,  2.04s/it][A
Train Diffusion:  94%|█████████▍| 1878/2000 [57:44<04:08,  2.04s/it][A
Train Diffusion:  94%|█████████▍| 1879/2000 [57:47<04:33,  2.26s/it][A
Train Diffusion:  94%|█████████▍| 1880/2000 [57:50<04:53,  2.44s/it][A

Moving average ELBO loss at 1880 iterations is: -27339.418359375. Best ELBO loss value is: -30623.275390625.

C_PATH mean = tensor([[38.6367,  0.1480,  0.7004],
        [38.5177,  0.1501,  0.6892]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4045e+01, 2.3835e-03, 1.2114e-01],
         [4.3833e+01, 3.1022e-03, 1.2559e-01],
         ...,
         [3.3486e+01, 1.1035e-01, 8.4116e-01],
         [3.3839e+01, 1.1529e-01, 6.6817e-01],
         [3.4876e+01, 9.0461e-02, 5.9875e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4711e+01, 7.8034e-02, 1.9748e-01],
         [4.4275e+01, 1.8219e-10, 4.0973e-01],
         ...,
         [3.3595e+01, 1.3132e-01, 7.2028e-01],
         [3.3484e+01, 7.8248e-02, 7.7103e-01],
         [3.2327e+01, 1.0740e-01, 7.6663e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  94%|█████████▍| 1881/2000 [57:52<04:58,  2.51s/it][A
Train Diffusion:  94%|█████████▍| 1882/2000 [57:54<04:46,  2.43s/it][A
Train Diffusion:  94%|█████████▍| 1883/2000 [57:56<04:26,  2.28s/it][A
Train Diffusion:  94%|█████████▍| 1884/2000 [57:58<04:09,  2.15s/it][A
Train Diffusion:  94%|█████████▍| 1885/2000 [58:00<04:07,  2.15s/it][A
Train Diffusion:  94%|█████████▍| 1886/2000 [58:02<03:55,  2.06s/it][A
Train Diffusion:  94%|█████████▍| 1887/2000 [58:04<03:45,  2.00s/it][A
Train Diffusion:  94%|█████████▍| 1888/2000 [58:06<03:53,  2.09s/it][A
Train Diffusion:  94%|█████████▍| 1889/2000 [58:08<03:51,  2.08s/it][A
Train Diffusion:  94%|█████████▍| 1890/2000 [58:10<03:40,  2.01s/it][A

Moving average ELBO loss at 1890 iterations is: -27470.8693359375. Best ELBO loss value is: -30623.275390625.

C_PATH mean = tensor([[38.5333,  0.1523,  0.7268],
        [38.6193,  0.1508,  0.7116]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.6234e+01, 4.3692e-02, 1.4614e-01],
         [4.4651e+01, 1.8095e-10, 1.4253e-01],
         ...,
         [3.3527e+01, 1.0924e-01, 9.7377e-01],
         [3.3223e+01, 1.2974e-01, 7.5366e-01],
         [3.1849e+01, 9.9443e-02, 6.2081e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5186e+01, 1.3242e-03, 1.7301e-01],
         [4.4872e+01, 3.3254e-03, 3.3431e-01],
         ...,
         [3.3394e+01, 1.4278e-01, 5.9559e-01],
         [3.3776e+01, 8.2058e-02, 7.7262e-01],
         [3.4384e+01, 1.0572e-01, 8.3493e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  95%|█████████▍| 1891/2000 [58:12<03:38,  2.01s/it][A
Train Diffusion:  95%|█████████▍| 1892/2000 [58:14<03:30,  1.95s/it][A
Train Diffusion:  95%|█████████▍| 1893/2000 [58:16<03:24,  1.91s/it][A
Train Diffusion:  95%|█████████▍| 1894/2000 [58:18<03:19,  1.88s/it][A
Train Diffusion:  95%|█████████▍| 1895/2000 [58:20<03:14,  1.86s/it][A
Train Diffusion:  95%|█████████▍| 1896/2000 [58:22<03:18,  1.91s/it][A
Train Diffusion:  95%|█████████▍| 1897/2000 [58:24<03:25,  2.00s/it][A
Train Diffusion:  95%|█████████▍| 1898/2000 [58:26<03:18,  1.95s/it][A
Train Diffusion:  95%|█████████▍| 1899/2000 [58:28<03:17,  1.96s/it][A
Train Diffusion:  95%|█████████▌| 1900/2000 [58:29<03:11,  1.91s/it][A

Moving average ELBO loss at 1900 iterations is: -28184.0615234375. Best ELBO loss value is: -30623.275390625.

C_PATH mean = tensor([[38.5945,  0.1599,  0.7132],
        [38.6057,  0.1601,  0.6911]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4757e+01, 4.4384e-02, 1.2975e-01],
         [4.4018e+01, 1.3100e-03, 2.6459e-01],
         ...,
         [3.3589e+01, 9.8597e-02, 6.9610e-01],
         [3.3309e+01, 1.2428e-01, 5.9179e-01],
         [3.1872e+01, 9.7258e-02, 5.4413e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4099e+01, 2.3349e-03, 1.7584e-01],
         [4.4145e+01, 3.0614e-10, 2.2929e-01],
         ...,
         [3.3464e+01, 1.2075e-01, 8.4362e-01],
         [3.3830e+01, 7.4289e-02, 8.8801e-01],
         [3.4753e+01, 9.7735e-02, 8.7215e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  95%|█████████▌| 1901/2000 [58:31<03:08,  1.90s/it][A
Train Diffusion:  95%|█████████▌| 1902/2000 [58:33<03:05,  1.89s/it][A
Train Diffusion:  95%|█████████▌| 1903/2000 [58:35<03:11,  1.97s/it][A
Train Diffusion:  95%|█████████▌| 1904/2000 [58:37<03:14,  2.02s/it][A
Train Diffusion:  95%|█████████▌| 1905/2000 [58:39<03:06,  1.96s/it][A
Train Diffusion:  95%|█████████▌| 1906/2000 [58:41<03:07,  1.99s/it][A
Train Diffusion:  95%|█████████▌| 1907/2000 [58:43<03:00,  1.94s/it][A
Train Diffusion:  95%|█████████▌| 1908/2000 [58:45<03:00,  1.96s/it][A
Train Diffusion:  95%|█████████▌| 1909/2000 [58:47<02:54,  1.92s/it][A
Train Diffusion:  96%|█████████▌| 1910/2000 [58:49<02:53,  1.93s/it][A

Moving average ELBO loss at 1910 iterations is: -28052.7142578125. Best ELBO loss value is: -30623.275390625.

C_PATH mean = tensor([[38.6383,  0.1528,  0.7226],
        [38.5519,  0.1460,  0.7163]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5114e+01, 1.1919e-01, 2.6257e-01],
         [4.4131e+01, 2.8116e-10, 4.8923e-01],
         ...,
         [3.4585e+01, 1.0765e-01, 7.3440e-01],
         [3.4069e+01, 1.1998e-01, 6.0775e-01],
         [3.4556e+01, 9.4450e-02, 7.4377e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4478e+01, 6.7666e-04, 9.5634e-02],
         [4.4148e+01, 1.5671e-03, 9.1603e-02],
         ...,
         [3.2614e+01, 1.2669e-01, 7.7268e-01],
         [3.3174e+01, 7.6956e-02, 8.1010e-01],
         [3.2185e+01, 1.0756e-01, 6.6739e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  96%|█████████▌| 1911/2000 [58:51<02:53,  1.95s/it][A
Train Diffusion:  96%|█████████▌| 1912/2000 [58:53<02:48,  1.91s/it][A
Train Diffusion:  96%|█████████▌| 1913/2000 [58:55<02:51,  1.97s/it][A
Train Diffusion:  96%|█████████▌| 1914/2000 [58:58<03:07,  2.18s/it][A
Train Diffusion:  96%|█████████▌| 1915/2000 [59:01<03:32,  2.50s/it][A
Train Diffusion:  96%|█████████▌| 1916/2000 [59:03<03:23,  2.42s/it][A
Train Diffusion:  96%|█████████▌| 1917/2000 [59:06<03:24,  2.47s/it][A
Train Diffusion:  96%|█████████▌| 1918/2000 [59:09<03:44,  2.73s/it][A
Train Diffusion:  96%|█████████▌| 1919/2000 [59:11<03:37,  2.68s/it][A
Train Diffusion:  96%|█████████▌| 1920/2000 [59:14<03:21,  2.52s/it][A

Moving average ELBO loss at 1920 iterations is: -28424.761328125. Best ELBO loss value is: -30623.275390625.

C_PATH mean = tensor([[38.6633,  0.1482,  0.7048],
        [38.5820,  0.1536,  0.7106]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4863e+01, 3.2460e-04, 2.6539e-01],
         [4.3737e+01, 1.3907e-03, 4.7716e-01],
         ...,
         [3.3193e+01, 1.2908e-01, 5.8693e-01],
         [3.3608e+01, 1.2683e-01, 7.6066e-01],
         [3.2123e+01, 9.6967e-02, 6.6746e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5702e+01, 6.6271e-02, 9.3376e-02],
         [4.5373e+01, 2.2672e-10, 9.4360e-02],
         ...,
         [3.3893e+01, 1.1199e-01, 9.5439e-01],
         [3.3593e+01, 7.5020e-02, 7.4028e-01],
         [3.4267e+01, 1.1249e-01, 7.6622e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  96%|█████████▌| 1921/2000 [59:16<03:04,  2.33s/it][A
Train Diffusion:  96%|█████████▌| 1922/2000 [59:17<02:52,  2.21s/it][A
Train Diffusion:  96%|█████████▌| 1923/2000 [59:19<02:43,  2.12s/it][A
Train Diffusion:  96%|█████████▌| 1924/2000 [59:21<02:35,  2.05s/it][A
Train Diffusion:  96%|█████████▋| 1925/2000 [59:23<02:28,  1.98s/it][A
Train Diffusion:  96%|█████████▋| 1926/2000 [59:25<02:23,  1.94s/it][A
Train Diffusion:  96%|█████████▋| 1927/2000 [59:27<02:24,  1.98s/it][A
Train Diffusion:  96%|█████████▋| 1928/2000 [59:29<02:22,  1.97s/it][A
Train Diffusion:  96%|█████████▋| 1929/2000 [59:31<02:17,  1.93s/it][A
Train Diffusion:  96%|█████████▋| 1930/2000 [59:33<02:13,  1.91s/it][A

Moving average ELBO loss at 1930 iterations is: -28579.8064453125. Best ELBO loss value is: -30623.275390625.

C_PATH mean = tensor([[38.6440,  0.1564,  0.7074],
        [38.6343,  0.1584,  0.6851]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4938e+01, 1.6965e-01, 1.3484e-01],
         [4.4577e+01, 3.3336e-10, 2.7311e-01],
         ...,
         [3.3678e+01, 1.0684e-01, 9.2219e-01],
         [3.4053e+01, 1.3077e-01, 6.8684e-01],
         [3.5117e+01, 9.7989e-02, 5.9375e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5498e+01, 3.6428e-04, 1.8346e-01],
         [4.4476e+01, 1.6307e-03, 2.2234e-01],
         ...,
         [3.3370e+01, 1.3689e-01, 5.1066e-01],
         [3.3172e+01, 7.9584e-02, 7.2278e-01],
         [3.1859e+01, 1.0475e-01, 7.9477e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  97%|█████████▋| 1931/2000 [59:35<02:11,  1.90s/it][A
Train Diffusion:  97%|█████████▋| 1932/2000 [59:36<02:08,  1.88s/it][A
Train Diffusion:  97%|█████████▋| 1933/2000 [59:38<02:06,  1.88s/it][A
Train Diffusion:  97%|█████████▋| 1934/2000 [59:40<02:09,  1.96s/it][A
Train Diffusion:  97%|█████████▋| 1935/2000 [59:42<02:05,  1.93s/it][A
Train Diffusion:  97%|█████████▋| 1936/2000 [59:44<02:01,  1.90s/it][A
Train Diffusion:  97%|█████████▋| 1937/2000 [59:46<01:59,  1.90s/it][A
Train Diffusion:  97%|█████████▋| 1938/2000 [59:48<02:01,  1.96s/it][A
Train Diffusion:  97%|█████████▋| 1939/2000 [59:50<02:03,  2.03s/it][A
Train Diffusion:  97%|█████████▋| 1940/2000 [59:52<02:01,  2.02s/it][A

Moving average ELBO loss at 1940 iterations is: -29205.355078125. Best ELBO loss value is: -30623.275390625.

C_PATH mean = tensor([[38.6365,  0.1476,  0.7294],
        [38.6322,  0.1497,  0.7014]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5227e+01, 1.4176e-04, 2.5818e-01],
         [4.5161e+01, 9.5391e-04, 2.8600e-01],
         ...,
         [3.3405e+01, 1.3094e-01, 9.6326e-01],
         [3.3193e+01, 1.2705e-01, 7.5466e-01],
         [3.1614e+01, 9.4117e-02, 7.6404e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4535e+01, 7.6705e-02, 8.6115e-02],
         [4.3581e+01, 2.0838e-10, 1.8773e-01],
         ...,
         [3.3713e+01, 1.1201e-01, 6.0143e-01],
         [3.4145e+01, 6.9681e-02, 7.5171e-01],
         [3.4968e+01, 1.0293e-01, 6.3519e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  97%|█████████▋| 1941/2000 [59:55<02:04,  2.11s/it][A
Train Diffusion:  97%|█████████▋| 1942/2000 [59:57<02:02,  2.12s/it][A
Train Diffusion:  97%|█████████▋| 1943/2000 [59:59<01:56,  2.05s/it][A
Train Diffusion:  97%|█████████▋| 1944/2000 [1:00:01<01:55,  2.06s/it][A
Train Diffusion:  97%|█████████▋| 1945/2000 [1:00:03<01:59,  2.17s/it][A
Train Diffusion:  97%|█████████▋| 1946/2000 [1:00:05<01:56,  2.16s/it][A
Train Diffusion:  97%|█████████▋| 1947/2000 [1:00:08<01:55,  2.19s/it][A
Train Diffusion:  97%|█████████▋| 1948/2000 [1:00:10<01:58,  2.27s/it][A
Train Diffusion:  97%|█████████▋| 1949/2000 [1:00:12<01:57,  2.31s/it][A
Train Diffusion:  98%|█████████▊| 1950/2000 [1:00:15<01:58,  2.37s/it][A

Moving average ELBO loss at 1950 iterations is: -27738.9568359375. Best ELBO loss value is: -30623.275390625.

C_PATH mean = tensor([[38.7438,  0.1554,  0.7049],
        [38.5370,  0.1534,  0.7275]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4875e+01, 2.9213e-04, 1.3546e-01],
         [4.4934e+01, 1.0688e-03, 1.3172e-01],
         ...,
         [3.2685e+01, 1.0659e-01, 4.6066e-01],
         [3.3383e+01, 1.3365e-01, 4.8456e-01],
         [3.2387e+01, 1.0487e-01, 7.4034e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4199e+01, 5.6624e-02, 1.7174e-01],
         [4.3269e+01, 1.7577e-10, 3.8742e-01],
         ...,
         [3.4675e+01, 1.3582e-01, 1.0410e+00],
         [3.4206e+01, 7.1997e-02, 9.5097e-01],
         [3.4756e+01, 9.1835e-02, 7.1395e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  98%|█████████▊| 1951/2000 [1:00:17<01:57,  2.41s/it][A
Train Diffusion:  98%|█████████▊| 1952/2000 [1:00:20<01:54,  2.39s/it][A
Train Diffusion:  98%|█████████▊| 1953/2000 [1:00:22<01:49,  2.33s/it][A
Train Diffusion:  98%|█████████▊| 1954/2000 [1:00:24<01:44,  2.27s/it][A
Train Diffusion:  98%|█████████▊| 1955/2000 [1:00:26<01:37,  2.16s/it][A
Train Diffusion:  98%|█████████▊| 1956/2000 [1:00:28<01:31,  2.08s/it][A
Train Diffusion:  98%|█████████▊| 1957/2000 [1:00:30<01:30,  2.10s/it][A
Train Diffusion:  98%|█████████▊| 1958/2000 [1:00:33<01:35,  2.28s/it][A
Train Diffusion:  98%|█████████▊| 1959/2000 [1:00:35<01:35,  2.32s/it][A
Train Diffusion:  98%|█████████▊| 1960/2000 [1:00:37<01:31,  2.29s/it][A

Moving average ELBO loss at 1960 iterations is: -28163.80078125. Best ELBO loss value is: -30623.275390625.

C_PATH mean = tensor([[38.7212,  0.1528,  0.6969],
        [38.6307,  0.1497,  0.7352]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4828e+01, 2.4570e-04, 2.0049e-01],
         [4.4770e+01, 8.8052e-04, 4.0885e-01],
         ...,
         [3.4506e+01, 1.2155e-01, 6.8952e-01],
         [3.3928e+01, 8.0502e-02, 6.2785e-01],
         [3.2297e+01, 1.1392e-01, 7.0770e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4139e+01, 7.8765e-02, 1.1835e-01],
         [4.3125e+01, 1.9642e-10, 1.2443e-01],
         ...,
         [3.2989e+01, 1.2774e-01, 8.7581e-01],
         [3.3674e+01, 1.1123e-01, 8.5877e-01],
         [3.4803e+01, 8.9425e-02, 7.0212e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  98%|█████████▊| 1961/2000 [1:00:39<01:24,  2.17s/it][A
Train Diffusion:  98%|█████████▊| 1962/2000 [1:00:42<01:25,  2.26s/it][A
Train Diffusion:  98%|█████████▊| 1963/2000 [1:00:44<01:24,  2.29s/it][A
Train Diffusion:  98%|█████████▊| 1964/2000 [1:00:46<01:20,  2.23s/it][A
Train Diffusion:  98%|█████████▊| 1965/2000 [1:00:48<01:18,  2.23s/it][A
Train Diffusion:  98%|█████████▊| 1966/2000 [1:00:51<01:16,  2.24s/it][A
Train Diffusion:  98%|█████████▊| 1967/2000 [1:00:53<01:10,  2.14s/it][A
Train Diffusion:  98%|█████████▊| 1968/2000 [1:00:54<01:05,  2.06s/it][A
Train Diffusion:  98%|█████████▊| 1969/2000 [1:00:57<01:04,  2.08s/it][A
Train Diffusion:  98%|█████████▊| 1970/2000 [1:00:59<01:02,  2.09s/it][A

Moving average ELBO loss at 1970 iterations is: -28430.0013671875. Best ELBO loss value is: -30623.275390625.

C_PATH mean = tensor([[38.7922,  0.1465,  0.7011],
        [38.6735,  0.1464,  0.7094]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4523e+01, 1.5933e-04, 1.3937e-01],
         [4.3773e+01, 1.1921e-10, 1.3053e-01],
         ...,
         [3.4942e+01, 9.5556e-02, 8.5072e-01],
         [3.4183e+01, 1.2213e-01, 8.3576e-01],
         [3.2337e+01, 1.0254e-01, 6.8491e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.3833e+01, 5.1470e-02, 1.7817e-01],
         [4.3818e+01, 2.2651e-03, 3.9912e-01],
         ...,
         [3.2723e+01, 1.4582e-01, 7.4887e-01],
         [3.3507e+01, 8.0800e-02, 6.6506e-01],
         [3.4473e+01, 9.8701e-02, 7.6548e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  99%|█████████▊| 1971/2000 [1:01:02<01:08,  2.36s/it][A
Train Diffusion:  99%|█████████▊| 1972/2000 [1:01:04<01:04,  2.29s/it][A
Train Diffusion:  99%|█████████▊| 1973/2000 [1:01:06<01:00,  2.25s/it][A
Train Diffusion:  99%|█████████▊| 1974/2000 [1:01:08<00:57,  2.21s/it][A
Train Diffusion:  99%|█████████▉| 1975/2000 [1:01:10<00:54,  2.16s/it][A
Train Diffusion:  99%|█████████▉| 1976/2000 [1:01:12<00:53,  2.23s/it][A
Train Diffusion:  99%|█████████▉| 1977/2000 [1:01:14<00:48,  2.11s/it][A
Train Diffusion:  99%|█████████▉| 1978/2000 [1:01:17<00:47,  2.16s/it][A
Train Diffusion:  99%|█████████▉| 1979/2000 [1:01:19<00:45,  2.19s/it][A
Train Diffusion:  99%|█████████▉| 1980/2000 [1:01:21<00:43,  2.17s/it][A

Moving average ELBO loss at 1980 iterations is: -27597.4421875. Best ELBO loss value is: -30623.275390625.

C_PATH mean = tensor([[38.7771,  0.1495,  0.6940],
        [38.7755,  0.1405,  0.7048]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5320e+01, 4.4770e-04, 1.2939e-01],
         [4.4016e+01, 1.6022e-03, 1.2980e-01],
         ...,
         [3.3520e+01, 1.0579e-01, 8.6868e-01],
         [3.3935e+01, 1.2907e-01, 7.5202e-01],
         [3.2524e+01, 9.7689e-02, 6.2848e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5027e+01, 8.5407e-02, 1.8315e-01],
         [4.4505e+01, 2.2501e-10, 3.7656e-01],
         ...,
         [3.3995e+01, 1.3217e-01, 7.3599e-01],
         [3.3516e+01, 7.8371e-02, 8.3766e-01],
         [3.4562e+01, 1.0285e-01, 8.4814e-01]]], grad_fn=<CatBackward>)



Train Diffusion:  99%|█████████▉| 1981/2000 [1:01:23<00:39,  2.08s/it][A
Train Diffusion:  99%|█████████▉| 1982/2000 [1:01:25<00:36,  2.02s/it][A
Train Diffusion:  99%|█████████▉| 1983/2000 [1:01:27<00:33,  1.96s/it][A
Train Diffusion:  99%|█████████▉| 1984/2000 [1:01:28<00:30,  1.92s/it][A
Train Diffusion:  99%|█████████▉| 1985/2000 [1:01:30<00:29,  1.95s/it][A
Train Diffusion:  99%|█████████▉| 1986/2000 [1:01:33<00:28,  2.04s/it][A
Train Diffusion:  99%|█████████▉| 1987/2000 [1:01:35<00:26,  2.02s/it][A
Train Diffusion:  99%|█████████▉| 1988/2000 [1:01:37<00:25,  2.09s/it][A
Train Diffusion:  99%|█████████▉| 1989/2000 [1:01:39<00:24,  2.23s/it][A
Train Diffusion: 100%|█████████▉| 1990/2000 [1:01:41<00:21,  2.17s/it][A

Moving average ELBO loss at 1990 iterations is: -27952.694921875. Best ELBO loss value is: -30623.275390625.

C_PATH mean = tensor([[38.7859,  0.1467,  0.6963],
        [38.7458,  0.1487,  0.6815]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.4754e+01, 6.2614e-02, 1.7618e-01],
         [4.4571e+01, 2.2705e-10, 3.8343e-01],
         ...,
         [3.4502e+01, 1.1753e-01, 6.1767e-01],
         [3.4070e+01, 7.3851e-02, 7.7076e-01],
         [3.4469e+01, 9.8758e-02, 8.0266e-01]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01],
         [4.5679e+01, 3.9978e-04, 1.3691e-01],
         [4.4418e+01, 1.2513e-03, 1.3367e-01],
         ...,
         [3.3017e+01, 1.2303e-01, 8.6218e-01],
         [3.3467e+01, 1.3293e-01, 7.0659e-01],
         [3.2227e+01, 1.0331e-01, 5.7786e-01]]], grad_fn=<CatBackward>)



Train Diffusion: 100%|█████████▉| 1991/2000 [1:01:44<00:19,  2.17s/it][A
Train Diffusion: 100%|█████████▉| 1992/2000 [1:01:46<00:16,  2.10s/it][A
Train Diffusion: 100%|█████████▉| 1993/2000 [1:01:48<00:15,  2.14s/it][A
Train Diffusion: 100%|█████████▉| 1994/2000 [1:01:50<00:12,  2.12s/it][A
Train Diffusion: 100%|█████████▉| 1995/2000 [1:01:52<00:10,  2.03s/it][A
Train Diffusion: 100%|█████████▉| 1996/2000 [1:01:54<00:07,  1.98s/it][A
Train Diffusion: 100%|█████████▉| 1997/2000 [1:01:56<00:06,  2.04s/it][A
Train Diffusion: 100%|█████████▉| 1998/2000 [1:01:58<00:04,  2.22s/it][A
Train Diffusion: 100%|█████████▉| 1999/2000 [1:02:00<00:02,  2.12s/it][A
Train Diffusion: 100%|██████████| 2000/2000 [1:02:02<00:00,  1.86s/it][A


In [62]:
train(devi, l_r, niter, piter, batch_size, obs_model_AWB_noCO2, state_dim_SAWB, t, dt, n, t_span_tensor, i_s_tensor, i_d_tensor, drift_diffusion_SAWB_C, SAWB_C_params_dict, temp_ref, analytical_steady_state_init_AWB)


Train Diffusion:   0%|          | 0/2000 [00:00<?, ?it/s][A

Moving average norm loss at 0 iterations is: 9000027066.54375. Best norm loss value is: 270665.4375.

C_PATH mean = tensor([[0.6320, 0.7462, 0.6426, 0.5870],
        [0.6330, 0.7444, 0.6428, 0.5859]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.1403e+00, 6.2321e-01, 7.2588e-01, 6.5004e-01],
         [6.6174e-01, 6.6133e-01, 6.6457e-01, 6.0769e-01],
         ...,
         [5.6391e-01, 6.0876e-01, 6.0509e-01, 5.1487e-01],
         [6.2496e-01, 7.1293e-01, 4.6782e-01, 4.4324e-01],
         [6.0580e-01, 6.8121e-01, 5.2485e-01, 1.7011e+00]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.9373e-01, 6.4837e-01, 7.1434e-01, 6.4217e-01],
         [7.3071e-01, 3.0127e-01, 7.3029e-01, 6.6940e-01],
         ...,
         [6.5609e-01, 7.3394e-01, 6.8027e-01, 6.3848e-01],
         [5.7086e-01, 6.0173e-01, 5.5484e-01, 5.7558e-01],
         [6.1817e-01, 6.7112e-01, 5.1461e-01, 6.6502e-01]]],
       grad_fn=<CatBackw


Train Diffusion:   0%|          | 1/2000 [00:02<1:27:19,  2.62s/it][A
Train Diffusion:   0%|          | 2/2000 [00:05<1:23:27,  2.51s/it][A
Train Diffusion:   0%|          | 3/2000 [00:07<1:22:33,  2.48s/it][A
Train Diffusion:   0%|          | 4/2000 [00:10<1:28:19,  2.66s/it][A
Train Diffusion:   0%|          | 5/2000 [00:12<1:27:04,  2.62s/it][A
Train Diffusion:   0%|          | 6/2000 [00:15<1:27:21,  2.63s/it][A
Train Diffusion:   0%|          | 7/2000 [00:18<1:32:24,  2.78s/it][A
Train Diffusion:   0%|          | 8/2000 [00:22<1:42:09,  3.08s/it][A
Train Diffusion:   0%|          | 9/2000 [00:25<1:46:57,  3.22s/it][A
Train Diffusion:   0%|          | 10/2000 [00:30<1:55:42,  3.49s/it][A

Moving average norm loss at 10 iterations is: 257370.3703125. Best norm loss value is: 246773.375.

C_PATH mean = tensor([[4.9852, 0.2565, 0.3246, 0.2442],
        [4.7244, 0.2545, 0.3267, 0.2390]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [3.8175e-01, 4.3417e+00, 3.6923e-01, 2.6660e-01],
         [4.9992e+00, 2.6817e+00, 3.4076e-01, 3.1594e-01],
         ...,
         [7.9261e+00, 3.7483e-01, 3.4608e-01, 1.7128e-01],
         [1.0779e+01, 3.6442e-01, 3.6749e-01, 2.9755e-01],
         [9.2617e+00, 3.7162e-01, 3.3881e-01, 9.1452e-01]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [2.3471e+00, 1.7419e+00, 2.8974e-01, 3.1252e-01],
         [1.3114e-01, 1.6971e-01, 3.1184e-01, 2.3019e-01],
         ...,
         [2.4854e+00, 2.5222e-01, 2.2388e-01, 1.9537e-01],
         [7.0885e-02, 2.6377e-01, 2.5491e-01, 2.3473e-01],
         [5.0569e-01, 2.8238e-01, 3.0075e-01, 6.0470e-01]]],
       grad_fn=<CatBackwar


Train Diffusion:   1%|          | 11/2000 [00:33<1:59:53,  3.62s/it][A
Train Diffusion:   1%|          | 12/2000 [00:36<1:48:26,  3.27s/it][A
Train Diffusion:   1%|          | 13/2000 [00:39<1:47:23,  3.24s/it][A
Train Diffusion:   1%|          | 14/2000 [00:42<1:42:43,  3.10s/it][A
Train Diffusion:   1%|          | 15/2000 [00:45<1:37:55,  2.96s/it][A
Train Diffusion:   1%|          | 16/2000 [00:47<1:36:57,  2.93s/it][A
Train Diffusion:   1%|          | 17/2000 [00:50<1:32:10,  2.79s/it][A
Train Diffusion:   1%|          | 18/2000 [00:52<1:29:33,  2.71s/it][A
Train Diffusion:   1%|          | 19/2000 [00:56<1:33:57,  2.85s/it][A
Train Diffusion:   1%|          | 20/2000 [00:58<1:30:25,  2.74s/it][A

Moving average norm loss at 20 iterations is: 237155.4484375. Best norm loss value is: 228395.15625.

C_PATH mean = tensor([[8.4282, 0.1636, 0.1582, 0.1301],
        [8.5601, 0.1664, 0.1589, 0.1303]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [2.9934e-01, 1.5682e+00, 1.7991e-01, 1.3965e-01],
         [4.0901e-02, 2.0576e-01, 1.8280e-01, 1.5030e-01],
         ...,
         [1.4204e+01, 1.4563e-01, 1.2938e-01, 1.1794e-01],
         [1.6944e-03, 1.4960e-01, 1.3433e-01, 1.1667e-01],
         [2.6278e-01, 1.7195e-01, 1.3014e-01, 1.5606e-01]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [2.3689e+00, 6.5446e+00, 1.6540e-01, 1.4768e-01],
         [7.8589e+00, 3.0505e+00, 1.5773e-01, 1.3885e-01],
         ...,
         [3.6065e+00, 1.5247e-01, 1.3852e-01, 1.0668e-01],
         [1.7726e+01, 1.5610e-01, 1.2904e-01, 1.2394e-01],
         [1.4394e+01, 1.5737e-01, 1.4249e-01, 1.4991e-01]]],
       grad_fn=<CatBackw


Train Diffusion:   1%|          | 21/2000 [01:01<1:28:29,  2.68s/it][A
Train Diffusion:   1%|          | 22/2000 [01:04<1:34:29,  2.87s/it][A
Train Diffusion:   1%|          | 23/2000 [01:06<1:29:54,  2.73s/it][A
Train Diffusion:   1%|          | 24/2000 [01:09<1:32:14,  2.80s/it][A
Train Diffusion:   1%|▏         | 25/2000 [01:12<1:31:51,  2.79s/it][A
Train Diffusion:   1%|▏         | 26/2000 [01:15<1:31:21,  2.78s/it][A
Train Diffusion:   1%|▏         | 27/2000 [01:17<1:29:42,  2.73s/it][A
Train Diffusion:   1%|▏         | 28/2000 [01:20<1:30:52,  2.76s/it][A
Train Diffusion:   1%|▏         | 29/2000 [01:23<1:32:40,  2.82s/it][A
Train Diffusion:   2%|▏         | 30/2000 [01:26<1:32:44,  2.82s/it][A

Moving average norm loss at 30 iterations is: 217434.503125. Best norm loss value is: 208101.421875.

C_PATH mean = tensor([[12.6785,  0.1224,  0.0947,  0.0666],
        [12.3779,  0.1211,  0.0947,  0.0666]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [3.3816e-01, 7.8579e+00, 1.0598e-01, 7.4882e-02],
         [1.6604e-02, 2.4240e-01, 1.0653e-01, 7.9368e-02],
         ...,
         [5.1913e+00, 1.0182e-01, 7.4453e-02, 5.8262e-02],
         [3.9610e-05, 9.8255e-02, 7.7182e-02, 6.6006e-02],
         [2.0057e+01, 9.8499e-02, 7.5874e-02, 9.6990e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [2.4853e+00, 2.7582e+00, 1.0506e-01, 8.3087e-02],
         [1.1205e+01, 2.7387e+00, 1.1487e-01, 8.0268e-02],
         ...,
         [2.2441e+01, 1.0227e-01, 7.4908e-02, 5.1353e-02],
         [2.8922e+01, 9.5974e-02, 7.5233e-02, 5.9355e-02],
         [8.3052e-02, 1.0061e-01, 7.5388e-02, 8.9797e-02]]],
       grad_fn=<


Train Diffusion:   2%|▏         | 31/2000 [01:29<1:35:43,  2.92s/it][A
Train Diffusion:   2%|▏         | 32/2000 [01:32<1:37:58,  2.99s/it][A
Train Diffusion:   2%|▏         | 33/2000 [01:35<1:37:47,  2.98s/it][A
Train Diffusion:   2%|▏         | 34/2000 [01:38<1:36:51,  2.96s/it][A
Train Diffusion:   2%|▏         | 35/2000 [01:41<1:37:06,  2.97s/it][A
Train Diffusion:   2%|▏         | 36/2000 [01:44<1:40:12,  3.06s/it][A
Train Diffusion:   2%|▏         | 37/2000 [01:47<1:36:44,  2.96s/it][A
Train Diffusion:   2%|▏         | 38/2000 [01:50<1:35:28,  2.92s/it][A
Train Diffusion:   2%|▏         | 39/2000 [01:53<1:33:55,  2.87s/it][A
Train Diffusion:   2%|▏         | 40/2000 [01:56<1:33:03,  2.85s/it][A

Moving average norm loss at 40 iterations is: 195707.54375. Best norm loss value is: 185210.53125.

C_PATH mean = tensor([[17.1210,  0.1050,  0.0589,  0.0306],
        [17.1324,  0.0988,  0.0588,  0.0305]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [3.4155e+00, 1.4084e+01, 7.7438e-02, 3.9281e-02],
         [1.5986e+01, 5.3081e+00, 7.9463e-02, 4.1061e-02],
         ...,
         [7.6045e+00, 7.3355e-02, 3.9491e-02, 2.1432e-02],
         [3.9812e+01, 7.3654e-02, 3.9773e-02, 2.4204e-02],
         [3.2174e+01, 7.5839e-02, 4.0774e-02, 4.3542e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [2.9463e-01, 2.9143e+00, 7.3690e-02, 4.2980e-02],
         [3.2170e-03, 4.9346e-01, 7.8553e-02, 4.0496e-02],
         ...,
         [2.7808e+01, 7.0714e-02, 4.6655e-02, 2.2654e-02],
         [6.2452e-07, 6.9328e-02, 4.6018e-02, 2.7581e-02],
         [8.0206e-02, 6.8313e-02, 4.4802e-02, 4.8379e-02]]],
       grad_fn=<Ca


Train Diffusion:   2%|▏         | 41/2000 [01:58<1:29:53,  2.75s/it][A
Train Diffusion:   2%|▏         | 42/2000 [02:01<1:29:24,  2.74s/it][A
Train Diffusion:   2%|▏         | 43/2000 [02:03<1:29:05,  2.73s/it][A
Train Diffusion:   2%|▏         | 44/2000 [02:06<1:26:36,  2.66s/it][A
Train Diffusion:   2%|▏         | 45/2000 [02:09<1:29:09,  2.74s/it][A
Train Diffusion:   2%|▏         | 46/2000 [02:12<1:31:02,  2.80s/it][A
Train Diffusion:   2%|▏         | 47/2000 [02:14<1:28:56,  2.73s/it][A
Train Diffusion:   2%|▏         | 48/2000 [02:17<1:27:08,  2.68s/it][A
Train Diffusion:   2%|▏         | 49/2000 [02:20<1:30:23,  2.78s/it][A
Train Diffusion:   2%|▎         | 50/2000 [02:23<1:36:40,  2.97s/it][A

Moving average norm loss at 50 iterations is: 172776.615625. Best norm loss value is: 162687.046875.

C_PATH mean = tensor([[2.1238e+01, 1.0039e-01, 4.0160e-02, 1.2793e-02],
        [2.2636e+01, 1.0355e-01, 4.0170e-02, 1.2802e-02]],
       grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2800e+00, 3.6597e+00, 6.5057e-02, 2.1444e-02],
         [2.2261e+01, 2.7100e+00, 6.3703e-02, 2.0070e-02],
         ...,
         [8.8408e+00, 6.3014e-02, 1.8397e-02, 6.3008e-03],
         [4.6537e+01, 5.4519e-02, 2.0294e-02, 7.7742e-03],
         [3.2221e-02, 5.9826e-02, 2.2797e-02, 2.1403e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [2.2889e-01, 1.3457e+01, 6.1343e-02, 2.2799e-02],
         [2.7977e-04, 2.9678e-01, 6.4971e-02, 2.3489e-02],
         ...,
         [3.8877e+01, 5.9788e-02, 2.3003e-02, 7.1110e-03],
         [2.9536e-09, 6.6232e-02, 2.2491e-02, 9.1559e-03],
         [3.7317e+01, 6.5036e-02, 2.1097e-02, 


Train Diffusion:   3%|▎         | 51/2000 [02:26<1:37:12,  2.99s/it][A
Train Diffusion:   3%|▎         | 52/2000 [02:30<1:44:54,  3.23s/it][A
Train Diffusion:   3%|▎         | 53/2000 [02:34<1:45:11,  3.24s/it][A
Train Diffusion:   3%|▎         | 54/2000 [02:36<1:41:34,  3.13s/it][A
Train Diffusion:   3%|▎         | 55/2000 [02:39<1:36:54,  2.99s/it][A
Train Diffusion:   3%|▎         | 56/2000 [02:42<1:36:19,  2.97s/it][A
Train Diffusion:   3%|▎         | 57/2000 [02:45<1:35:45,  2.96s/it][A
Train Diffusion:   3%|▎         | 58/2000 [02:48<1:33:50,  2.90s/it][A
Train Diffusion:   3%|▎         | 59/2000 [02:50<1:29:41,  2.77s/it][A
Train Diffusion:   3%|▎         | 60/2000 [02:53<1:25:57,  2.66s/it][A

Moving average norm loss at 60 iterations is: 155284.5234375. Best norm loss value is: 151742.296875.

C_PATH mean = tensor([[2.6440e+01, 1.4535e-01, 3.1634e-02, 3.6932e-03],
        [2.7258e+01, 1.4217e-01, 3.1652e-02, 3.6933e-03]],
       grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [7.6655e+00, 1.5416e+01, 7.1681e-02, 9.1305e-03],
         [2.1510e-05, 3.0461e-01, 7.3448e-02, 8.5408e-03],
         ...,
         [4.6859e+01, 7.0338e-02, 9.9359e-03, 1.0079e-03],
         [1.9604e-10, 6.1737e-02, 1.0067e-02, 1.5401e-03],
         [2.2139e-02, 6.5037e-02, 9.7194e-03, 8.7217e-03]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.6870e-01, 4.1261e+00, 6.6862e-02, 9.7532e-03],
         [2.8156e+01, 3.3391e+00, 6.6339e-02, 9.7677e-03],
         ...,
         [1.1551e+01, 6.4562e-02, 8.8909e-03, 1.0715e-03],
         [5.7395e+01, 7.1689e-02, 8.8426e-03, 1.6747e-03],
         [4.4633e+01, 7.1939e-02, 9.2395e-03,


Train Diffusion:   3%|▎         | 61/2000 [02:55<1:26:00,  2.66s/it][A
Train Diffusion:   3%|▎         | 62/2000 [02:58<1:26:48,  2.69s/it][A
Train Diffusion:   3%|▎         | 63/2000 [03:01<1:26:54,  2.69s/it][A
Train Diffusion:   3%|▎         | 64/2000 [03:03<1:26:04,  2.67s/it][A
Train Diffusion:   3%|▎         | 65/2000 [03:07<1:33:57,  2.91s/it][A
Train Diffusion:   3%|▎         | 66/2000 [03:10<1:36:47,  3.00s/it][A
Train Diffusion:   3%|▎         | 67/2000 [03:12<1:31:17,  2.83s/it][A
Train Diffusion:   3%|▎         | 68/2000 [03:15<1:27:02,  2.70s/it][A
Train Diffusion:   3%|▎         | 69/2000 [03:17<1:24:06,  2.61s/it][A
Train Diffusion:   4%|▎         | 70/2000 [03:20<1:23:06,  2.58s/it][A

Moving average norm loss at 70 iterations is: 148944.0875. Best norm loss value is: 145973.859375.

C_PATH mean = tensor([[2.7550e+01, 1.0657e-01, 3.4758e-02, 2.2133e-03],
        [2.7548e+01, 1.1075e-01, 3.4816e-02, 2.2181e-03]],
       grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [9.6857e+00, 4.0005e+00, 6.6795e-02, 7.7390e-03],
         [2.9107e-05, 2.3350e-01, 6.7973e-02, 7.8553e-03],
         ...,
         [4.6862e+01, 7.2038e-02, 7.0808e-03, 2.6045e-04],
         [2.5892e-08, 7.2041e-02, 6.7895e-03, 4.7175e-04],
         [2.5275e-01, 6.7610e-02, 7.0058e-03, 4.6951e-03]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.5198e-01, 1.2618e+01, 6.8470e-02, 6.5505e-03],
         [3.2370e+01, 2.3538e+00, 7.1661e-02, 6.6085e-03],
         ...,
         [1.4159e+01, 7.7751e-02, 6.8645e-03, 2.5538e-04],
         [5.4906e+01, 7.6523e-02, 6.8830e-03, 4.6424e-04],
         [4.0614e+01, 7.2025e-02, 6.8345e-03, 4.


Train Diffusion:   4%|▎         | 71/2000 [03:22<1:22:47,  2.58s/it][A
Train Diffusion:   4%|▎         | 72/2000 [03:25<1:26:24,  2.69s/it][A
Train Diffusion:   4%|▎         | 73/2000 [03:28<1:26:50,  2.70s/it][A
Train Diffusion:   4%|▎         | 74/2000 [03:31<1:26:14,  2.69s/it][A
Train Diffusion:   4%|▍         | 75/2000 [03:33<1:23:21,  2.60s/it][A
Train Diffusion:   4%|▍         | 76/2000 [03:35<1:21:15,  2.53s/it][A
Train Diffusion:   4%|▍         | 77/2000 [03:38<1:19:49,  2.49s/it][A
Train Diffusion:   4%|▍         | 78/2000 [03:40<1:21:25,  2.54s/it][A
Train Diffusion:   4%|▍         | 79/2000 [03:43<1:24:48,  2.65s/it][A
Train Diffusion:   4%|▍         | 80/2000 [03:46<1:28:31,  2.77s/it][A

Moving average norm loss at 80 iterations is: 141277.93125. Best norm loss value is: 136839.0625.

C_PATH mean = tensor([[2.9094e+01, 1.1389e-01, 3.3466e-02, 8.6445e-04],
        [2.8428e+01, 1.1568e-01, 3.3332e-02, 8.6603e-04]],
       grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.3181e+01, 4.1255e+00, 8.3330e-02, 3.7932e-03],
         [1.4180e-04, 2.2403e+00, 8.6206e-02, 4.8947e-03],
         ...,
         [4.7069e+01, 9.6062e-02, 2.5968e-03, 8.3729e-05],
         [1.7097e-05, 9.6932e-02, 2.7851e-03, 1.6220e-04],
         [3.9120e+01, 8.7722e-02, 3.0315e-03, 1.8555e-03]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [2.1142e-01, 1.1641e+01, 7.8418e-02, 3.5826e-03],
         [3.8756e+01, 1.5267e-01, 7.2818e-02, 2.6861e-03],
         ...,
         [1.7802e+01, 6.7150e-02, 5.6149e-03, 4.1403e-05],
         [5.2698e+01, 7.0386e-02, 6.2321e-03, 8.7226e-05],
         [1.8332e+00, 7.8764e-02, 5.2847e-03, 1.7


Train Diffusion:   4%|▍         | 81/2000 [03:54<2:13:41,  4.18s/it][A
Train Diffusion:   4%|▍         | 82/2000 [03:58<2:09:27,  4.05s/it][A
Train Diffusion:   4%|▍         | 83/2000 [04:01<1:58:36,  3.71s/it][A
Train Diffusion:   4%|▍         | 84/2000 [04:03<1:50:59,  3.48s/it][A
Train Diffusion:   4%|▍         | 85/2000 [04:06<1:40:53,  3.16s/it][A
Train Diffusion:   4%|▍         | 86/2000 [04:08<1:34:48,  2.97s/it][A
Train Diffusion:   4%|▍         | 87/2000 [04:11<1:32:13,  2.89s/it][A
Train Diffusion:   4%|▍         | 88/2000 [04:14<1:28:14,  2.77s/it][A
Train Diffusion:   4%|▍         | 89/2000 [04:16<1:28:54,  2.79s/it][A
Train Diffusion:   4%|▍         | 90/2000 [04:20<1:36:14,  3.02s/it][A

Moving average norm loss at 90 iterations is: 129966.9015625. Best norm loss value is: 123733.515625.

C_PATH mean = tensor([[3.0695e+01, 1.0451e-01, 2.0731e-02, 3.9045e-04],
        [3.1036e+01, 1.0584e-01, 2.0815e-02, 3.7422e-04]],
       grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.9759e-01, 3.0911e+00, 6.9517e-02, 1.6376e-03],
         [2.6760e-02, 4.9551e-01, 6.5876e-02, 1.2263e-03],
         ...,
         [2.2019e+01, 4.5973e-02, 1.1127e-03, 2.1194e-05],
         [5.1899e+01, 5.7360e-02, 1.7380e-03, 5.0270e-05],
         [3.4791e+01, 6.7936e-02, 2.2103e-03, 7.0595e-04]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.3879e+01, 7.6810e+00, 5.6419e-02, 1.9461e-03],
         [4.0069e+01, 1.2867e-01, 5.9108e-02, 2.2207e-03],
         ...,
         [4.7308e+01, 6.3883e-02, 2.3341e-03, 1.6323e-05],
         [1.2245e-02, 5.7871e-02, 1.7726e-03, 2.7489e-05],
         [4.8849e+00, 5.3924e-02, 1.5634e-03,


Train Diffusion:   5%|▍         | 91/2000 [04:23<1:34:23,  2.97s/it][A
Train Diffusion:   5%|▍         | 92/2000 [04:26<1:31:30,  2.88s/it][A
Train Diffusion:   5%|▍         | 93/2000 [04:28<1:32:26,  2.91s/it][A
Train Diffusion:   5%|▍         | 94/2000 [04:31<1:31:09,  2.87s/it][A
Train Diffusion:   5%|▍         | 95/2000 [04:34<1:29:31,  2.82s/it][A
Train Diffusion:   5%|▍         | 96/2000 [04:37<1:30:05,  2.84s/it][A
Train Diffusion:   5%|▍         | 97/2000 [04:40<1:31:42,  2.89s/it][A
Train Diffusion:   5%|▍         | 98/2000 [04:42<1:28:26,  2.79s/it][A
Train Diffusion:   5%|▍         | 99/2000 [04:47<1:40:48,  3.18s/it][A
Train Diffusion:   5%|▌         | 100/2000 [04:50<1:42:51,  3.25s/it][A

Moving average norm loss at 100 iterations is: 114831.41171875. Best norm loss value is: 107155.8515625.

C_PATH mean = tensor([[3.3671e+01, 6.9051e-02, 7.1362e-03, 2.1736e-04],
        [3.3853e+01, 7.0475e-02, 7.0918e-03, 2.2178e-04]],
       grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.3707e+00, 4.0054e+00, 2.0879e-02, 6.1400e-04],
         [4.0375e+01, 1.2863e-01, 2.5585e-02, 5.8339e-04],
         ...,
         [4.8001e+01, 2.1230e-02, 7.7682e-04, 1.6520e-05],
         [2.9743e+00, 2.3744e-02, 7.2166e-04, 3.6247e-05],
         [3.3617e+01, 2.0296e-02, 6.5649e-04, 3.4957e-04]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.3463e+01, 7.2802e+00, 2.4809e-02, 1.1632e-03],
         [1.6641e+00, 2.8811e-01, 2.1821e-02, 1.0113e-03],
         ...,
         [2.6502e+01, 1.6029e-02, 7.3017e-04, 2.6748e-05],
         [5.1228e+01, 1.6436e-02, 9.0242e-04, 3.4308e-05],
         [8.8176e+00, 2.3252e-02, 1.2029e-


Train Diffusion:   5%|▌         | 101/2000 [04:53<1:43:47,  3.28s/it][A
Train Diffusion:   5%|▌         | 102/2000 [04:56<1:38:46,  3.12s/it][A
Train Diffusion:   5%|▌         | 103/2000 [04:59<1:34:20,  2.98s/it][A
Train Diffusion:   5%|▌         | 104/2000 [05:01<1:29:28,  2.83s/it][A
Train Diffusion:   5%|▌         | 105/2000 [05:04<1:25:01,  2.69s/it][A
Train Diffusion:   5%|▌         | 106/2000 [05:06<1:21:57,  2.60s/it][A
Train Diffusion:   5%|▌         | 107/2000 [05:09<1:22:42,  2.62s/it][A
Train Diffusion:   5%|▌         | 108/2000 [05:11<1:24:09,  2.67s/it][A
Train Diffusion:   5%|▌         | 109/2000 [05:14<1:22:53,  2.63s/it][A
Train Diffusion:   6%|▌         | 110/2000 [05:16<1:20:19,  2.55s/it][A

Moving average norm loss at 110 iterations is: 97276.8671875. Best norm loss value is: 89048.1484375.

C_PATH mean = tensor([[3.7829e+01, 9.4401e-03, 2.0886e-03, 4.2306e-04],
        [3.6729e+01, 8.3154e-03, 2.0927e-03, 4.2620e-04]],
       grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.2659e+01, 8.9053e+00, 3.2558e-03, 7.9662e-04],
         [3.9873e+01, 1.6002e-01, 3.3991e-03, 6.8262e-04],
         ...,
         [3.1408e+01, 3.3712e-03, 8.1014e-04, 1.7263e-04],
         [5.1658e+01, 3.9400e-03, 7.3264e-04, 1.6836e-04],
         [1.3429e+01, 3.5639e-03, 7.9391e-04, 5.3506e-04]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [2.3356e+00, 5.8457e+00, 3.2149e-03, 8.6140e-04],
         [7.1182e+00, 9.3366e-02, 3.2573e-03, 8.3769e-04],
         ...,
         [4.9458e+01, 2.2958e-03, 7.6036e-04, 1.7378e-04],
         [1.1041e+01, 2.1361e-03, 9.4451e-04, 2.5323e-04],
         [3.4942e+01, 2.7528e-03, 9.8372e-04,


Train Diffusion:   6%|▌         | 111/2000 [05:19<1:20:08,  2.55s/it][A
Train Diffusion:   6%|▌         | 112/2000 [05:21<1:19:35,  2.53s/it][A
Train Diffusion:   6%|▌         | 113/2000 [05:24<1:18:02,  2.48s/it][A
Train Diffusion:   6%|▌         | 114/2000 [05:26<1:16:55,  2.45s/it][A
Train Diffusion:   6%|▌         | 115/2000 [05:29<1:18:31,  2.50s/it][A
Train Diffusion:   6%|▌         | 116/2000 [05:31<1:17:40,  2.47s/it][A
Train Diffusion:   6%|▌         | 117/2000 [05:34<1:19:22,  2.53s/it][A
Train Diffusion:   6%|▌         | 118/2000 [05:37<1:23:56,  2.68s/it][A
Train Diffusion:   6%|▌         | 119/2000 [05:40<1:25:18,  2.72s/it][A
Train Diffusion:   6%|▌         | 120/2000 [05:42<1:23:21,  2.66s/it][A

Moving average norm loss at 120 iterations is: 78782.42734375. Best norm loss value is: 70291.078125.

C_PATH mean = tensor([[4.0876e+01, 4.2947e-03, 1.2389e-03, 7.1690e-04],
        [4.0740e+01, 5.3426e-03, 1.2430e-03, 7.1542e-04]],
       grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.2327e+01, 7.6960e+00, 5.7471e-04, 7.0357e-04],
         [1.2370e+01, 3.0438e-01, 5.9427e-04, 1.1853e-03],
         ...,
         [3.6199e+01, 8.9554e-04, 1.3283e-03, 1.0189e-03],
         [5.1179e+01, 8.9900e-04, 1.1505e-03, 7.6994e-04],
         [1.8198e+01, 1.0749e-03, 1.2922e-03, 6.9971e-04]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [3.4332e+00, 1.0557e+01, 1.3131e-03, 7.7285e-04],
         [3.8765e+01, 6.2460e-02, 1.3354e-03, 7.0064e-04],
         ...,
         [5.0411e+01, 8.9965e-04, 3.1232e-04, 2.5105e-04],
         [1.9425e+01, 9.7038e-04, 4.5258e-04, 2.6612e-04],
         [3.5008e+01, 9.9519e-04, 6.5631e-04,


Train Diffusion:   6%|▌         | 121/2000 [05:45<1:21:32,  2.60s/it][A
Train Diffusion:   6%|▌         | 122/2000 [05:47<1:19:50,  2.55s/it][A
Train Diffusion:   6%|▌         | 123/2000 [05:49<1:18:02,  2.49s/it][A
Train Diffusion:   6%|▌         | 124/2000 [05:52<1:18:32,  2.51s/it][A
Train Diffusion:   6%|▋         | 125/2000 [05:55<1:21:40,  2.61s/it][A
Train Diffusion:   6%|▋         | 126/2000 [05:58<1:23:57,  2.69s/it][A
Train Diffusion:   6%|▋         | 127/2000 [06:01<1:27:13,  2.79s/it][A
Train Diffusion:   6%|▋         | 128/2000 [06:04<1:30:43,  2.91s/it][A
Train Diffusion:   6%|▋         | 129/2000 [06:07<1:29:31,  2.87s/it][A
Train Diffusion:   6%|▋         | 130/2000 [06:09<1:25:22,  2.74s/it][A

Moving average norm loss at 130 iterations is: 59712.178515625. Best norm loss value is: 50914.26171875.

C_PATH mean = tensor([[4.4556e+01, 5.5043e-03, 1.2711e-03, 7.5312e-04],
        [4.4450e+01, 4.7332e-03, 1.2589e-03, 7.8341e-04]],
       grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.3133e+01, 1.0820e+01, 9.8868e-04, 6.3960e-04],
         [3.9671e+01, 8.2012e-02, 1.3521e-03, 9.4885e-05],
         ...,
         [4.0905e+01, 1.1145e-03, 9.7874e-05, 1.4157e-04],
         [2.7818e+01, 2.3326e-04, 1.2890e-03, 1.3539e-04],
         [2.3147e+01, 2.9470e-04, 2.7915e-03, 2.0529e-03]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.0884e+00, 8.6341e+00, 2.0443e-04, 2.1859e-04],
         [1.9089e+01, 3.2108e-01, 1.4491e-04, 2.8573e-03],
         ...,
         [5.1234e+01, 2.0294e-04, 1.2045e-03, 1.1424e-03],
         [5.0267e+01, 1.0065e-03, 1.3080e-04, 8.3885e-04],
         [3.4023e+01, 1.2452e-03, 1.2865e-


Train Diffusion:   7%|▋         | 131/2000 [06:12<1:24:03,  2.70s/it][A
Train Diffusion:   7%|▋         | 132/2000 [06:15<1:25:29,  2.75s/it][A
Train Diffusion:   7%|▋         | 133/2000 [06:17<1:22:00,  2.64s/it][A
Train Diffusion:   7%|▋         | 134/2000 [06:19<1:21:02,  2.61s/it][A
Train Diffusion:   7%|▋         | 135/2000 [06:22<1:19:02,  2.54s/it][A
Train Diffusion:   7%|▋         | 136/2000 [06:24<1:17:20,  2.49s/it][A
Train Diffusion:   7%|▋         | 137/2000 [06:27<1:18:40,  2.53s/it][A
Train Diffusion:   7%|▋         | 138/2000 [06:29<1:18:52,  2.54s/it][A
Train Diffusion:   7%|▋         | 139/2000 [06:32<1:21:01,  2.61s/it][A
Train Diffusion:   7%|▋         | 140/2000 [06:35<1:25:54,  2.77s/it][A

Moving average norm loss at 140 iterations is: 39917.5376953125. Best norm loss value is: 30780.560546875.

C_PATH mean = tensor([[4.8405e+01, 5.6564e-03, 1.6241e-03, 1.1134e-03],
        [4.8292e+01, 5.5035e-03, 1.6264e-03, 1.1907e-03]],
       grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.3856e+01, 1.0285e+01, 9.0700e-04, 5.7334e-04],
         [2.5798e+01, 2.7919e-02, 1.3761e-03, 2.8870e-05],
         ...,
         [4.5791e+01, 3.4306e-05, 1.0420e-03, 2.4141e-03],
         [4.9142e+01, 1.0407e-04, 4.3713e-05, 1.4335e-03],
         [2.7041e+01, 1.6720e-04, 5.5741e-03, 1.6553e-03]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [7.0644e+00, 9.6142e+00, 5.7219e-05, 5.9385e-05],
         [4.0031e+01, 2.9172e-01, 3.7423e-05, 5.8024e-03],
         ...,
         [5.1781e+01, 1.3686e-03, 5.0483e-05, 2.4736e-05],
         [3.7318e+01, 8.7868e-04, 1.3267e-03, 1.5641e-05],
         [3.1999e+01, 1.1892e-03, 4.2040


Train Diffusion:   7%|▋         | 141/2000 [06:38<1:26:10,  2.78s/it][A
Train Diffusion:   7%|▋         | 142/2000 [06:42<1:40:21,  3.24s/it][A
Train Diffusion:   7%|▋         | 143/2000 [06:45<1:38:31,  3.18s/it][A
Train Diffusion:   7%|▋         | 144/2000 [06:49<1:38:32,  3.19s/it][A
Train Diffusion:   7%|▋         | 145/2000 [06:52<1:37:34,  3.16s/it][A
Train Diffusion:   7%|▋         | 146/2000 [06:55<1:37:40,  3.16s/it][A
Train Diffusion:   7%|▋         | 147/2000 [06:57<1:31:05,  2.95s/it][A
Train Diffusion:   7%|▋         | 148/2000 [07:00<1:31:09,  2.95s/it][A
Train Diffusion:   7%|▋         | 149/2000 [07:04<1:36:44,  3.14s/it][A
Train Diffusion:   8%|▊         | 150/2000 [07:07<1:34:36,  3.07s/it][A

Moving average norm loss at 150 iterations is: 19424.2876953125. Best norm loss value is: 10097.546875.

C_PATH mean = tensor([[5.2364e+01, 6.1813e-03, 2.2857e-03, 1.9747e-03],
        [5.2337e+01, 6.5153e-03, 2.3852e-03, 1.9063e-03]],
       grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.4507e+01, 9.9482e+00, 2.6500e-05, 4.8897e-04],
         [4.0339e+01, 1.7831e-02, 1.2826e-03, 1.3398e-05],
         ...,
         [5.1154e+01, 1.4889e-03, 1.6393e-03, 3.2486e-03],
         [4.7766e+01, 2.3673e-05, 1.6468e-03, 2.6384e-06],
         [3.1159e+01, 1.3514e-03, 1.0126e-05, 2.0674e-03]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [9.2210e+00, 1.0096e+01, 6.3211e-04, 2.1786e-05],
         [3.3786e+01, 2.7638e-01, 1.2930e-05, 1.0968e-02],
         ...,
         [5.2565e+01, 9.6310e-06, 5.0071e-06, 2.1413e-05],
         [4.5783e+01, 1.0919e-03, 9.7164e-06, 2.2282e-03],
         [3.1025e+01, 5.9820e-05, 1.1581e-0


Train Diffusion:   8%|▊         | 151/2000 [07:10<1:31:52,  2.98s/it][A
Train Diffusion:   8%|▊         | 152/2000 [07:12<1:28:32,  2.87s/it][A
Train Diffusion:   8%|▊         | 153/2000 [07:15<1:28:05,  2.86s/it][A
Train Diffusion:   8%|▊         | 154/2000 [07:18<1:26:03,  2.80s/it][A
Train Diffusion:   8%|▊         | 155/2000 [07:20<1:25:20,  2.78s/it][A
Train Diffusion:   8%|▊         | 156/2000 [07:23<1:25:59,  2.80s/it][A
Train Diffusion:   8%|▊         | 157/2000 [07:26<1:25:21,  2.78s/it][A
Train Diffusion:   8%|▊         | 158/2000 [07:30<1:32:15,  3.01s/it][A
Train Diffusion:   8%|▊         | 159/2000 [07:33<1:34:14,  3.07s/it][A
Train Diffusion:   8%|▊         | 160/2000 [07:35<1:30:44,  2.96s/it][A

Moving average norm loss at 160 iterations is: 9044.863134765625. Best norm loss value is: 6297.17431640625.

C_PATH mean = tensor([[5.4628e+01, 1.2626e-02, 2.8755e-03, 1.0020e-03],
        [5.4631e+01, 1.3298e-02, 2.8312e-03, 1.0079e-03]],
       grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.1611e+01, 1.3617e+01, 4.0985e-05, 5.1669e-06],
         [3.8346e+01, 1.7474e-01, 1.7723e-05, 2.2193e-06],
         ...,
         [5.3296e+01, 2.2266e-03, 6.2246e-04, 6.7417e-07],
         [4.8939e+01, 3.4062e-05, 3.0692e-06, 6.2885e-04],
         [3.4160e+01, 2.4494e-03, 3.5354e-06, 1.4573e-03]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.6038e+01, 1.3339e+01, 1.0033e-03, 2.4446e-04],
         [4.2290e+01, 1.7480e+00, 2.3505e-03, 1.1529e-02],
         ...,
         [5.3401e+01, 1.7597e-05, 3.7132e-06, 9.3721e-04],
         [4.9124e+01, 1.8960e-03, 8.0924e-04, 4.7316e-07],
         [3.4444e+01, 7.5025e-05, 8.45


Train Diffusion:   8%|▊         | 161/2000 [07:38<1:25:20,  2.78s/it][A
Train Diffusion:   8%|▊         | 162/2000 [07:40<1:21:27,  2.66s/it][A
Train Diffusion:   8%|▊         | 163/2000 [07:43<1:18:40,  2.57s/it][A
Train Diffusion:   8%|▊         | 164/2000 [07:45<1:16:45,  2.51s/it][A
Train Diffusion:   8%|▊         | 165/2000 [07:48<1:17:34,  2.54s/it][A
Train Diffusion:   8%|▊         | 166/2000 [07:51<1:23:32,  2.73s/it][A
Train Diffusion:   8%|▊         | 167/2000 [07:55<1:40:24,  3.29s/it][A
Train Diffusion:   8%|▊         | 168/2000 [07:58<1:39:13,  3.25s/it][A
Train Diffusion:   8%|▊         | 169/2000 [08:01<1:31:32,  3.00s/it][A
Train Diffusion:   8%|▊         | 170/2000 [08:03<1:25:58,  2.82s/it][A

Moving average norm loss at 170 iterations is: 8196.779736328124. Best norm loss value is: 5324.08056640625.

C_PATH mean = tensor([[5.3413e+01, 4.6728e-02, 3.0531e-03, 3.6243e-04],
        [5.3395e+01, 4.4821e-02, 3.0030e-03, 3.7945e-04]],
       grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.1352e+01, 1.6916e+01, 1.2475e-04, 3.3981e-06],
         [3.4965e+01, 4.0977e+00, 5.3541e-05, 2.3602e-06],
         ...,
         [5.2715e+01, 1.1488e-04, 2.5363e-06, 5.3775e-08],
         [4.9060e+01, 3.2819e-03, 4.5475e-04, 1.5749e-04],
         [3.6285e+01, 4.9452e-03, 1.7485e-06, 1.8675e-05]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.5988e+01, 1.8472e+01, 2.6441e-03, 1.3179e-04],
         [4.0868e+01, 2.7334e+00, 5.4950e-03, 4.1343e-03],
         ...,
         [5.2341e+01, 3.5974e-03, 2.5953e-04, 1.7748e-04],
         [4.9009e+01, 1.7112e-04, 2.0164e-06, 4.2519e-08],
         [3.7349e+01, 2.5300e-04, 5.02


Train Diffusion:   9%|▊         | 171/2000 [08:06<1:22:00,  2.69s/it][A
Train Diffusion:   9%|▊         | 172/2000 [08:08<1:22:30,  2.71s/it][A
Train Diffusion:   9%|▊         | 173/2000 [08:12<1:28:38,  2.91s/it][A
Train Diffusion:   9%|▊         | 174/2000 [08:14<1:23:42,  2.75s/it][A
Train Diffusion:   9%|▉         | 175/2000 [08:17<1:20:07,  2.63s/it][A
Train Diffusion:   9%|▉         | 176/2000 [08:19<1:18:00,  2.57s/it][A
Train Diffusion:   9%|▉         | 177/2000 [08:21<1:17:38,  2.56s/it][A
Train Diffusion:   9%|▉         | 178/2000 [08:24<1:16:07,  2.51s/it][A
Train Diffusion:   9%|▉         | 179/2000 [08:26<1:14:50,  2.47s/it][A
Train Diffusion:   9%|▉         | 180/2000 [08:29<1:16:07,  2.51s/it][A

Moving average norm loss at 180 iterations is: 5817.160693359375. Best norm loss value is: 5124.72265625.

C_PATH mean = tensor([[5.3414e+01, 2.3571e-02, 2.8634e-03, 7.1421e-04],
        [5.3426e+01, 2.2910e-02, 2.8349e-03, 6.5776e-04]],
       grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [8.9887e+00, 2.0145e+01, 6.7418e-05, 1.7878e-04],
         [3.0396e+01, 4.7993e+00, 3.1228e-03, 7.4645e-03],
         ...,
         [5.3877e+01, 3.9244e-05, 2.3909e-06, 5.2256e-04],
         [5.0264e+01, 2.3695e-03, 2.7431e-06, 2.0286e-07],
         [3.8967e+01, 3.1006e-03, 7.0443e-03, 1.7463e-05]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.3299e+01, 2.0217e+01, 1.5303e-03, 5.1351e-06],
         [3.6347e+01, 3.0282e+00, 3.2005e-05, 2.7108e-06],
         ...,
         [5.3731e+01, 3.1530e-03, 6.2465e-04, 2.7888e-07],
         [5.0383e+01, 7.9798e-05, 7.7130e-04, 3.8318e-04],
         [3.8989e+01, 1.3801e-04, 2.7825e


Train Diffusion:   9%|▉         | 181/2000 [08:31<1:15:09,  2.48s/it][A
Train Diffusion:   9%|▉         | 182/2000 [08:34<1:15:02,  2.48s/it][A
Train Diffusion:   9%|▉         | 183/2000 [08:37<1:24:24,  2.79s/it][A
Train Diffusion:   9%|▉         | 184/2000 [08:41<1:32:12,  3.05s/it][A
Train Diffusion:   9%|▉         | 185/2000 [08:44<1:29:16,  2.95s/it][A
Train Diffusion:   9%|▉         | 186/2000 [08:46<1:24:51,  2.81s/it][A
Train Diffusion:   9%|▉         | 187/2000 [08:49<1:22:26,  2.73s/it][A
Train Diffusion:   9%|▉         | 188/2000 [08:51<1:21:18,  2.69s/it][A
Train Diffusion:   9%|▉         | 189/2000 [08:54<1:20:36,  2.67s/it][A
Train Diffusion:  10%|▉         | 190/2000 [08:57<1:20:17,  2.66s/it][A

Moving average norm loss at 190 iterations is: 5761.068896484375. Best norm loss value is: 5124.72265625.

C_PATH mean = tensor([[5.3422e+01, 2.5329e-02, 3.2649e-03, 8.0746e-04],
        [5.3423e+01, 2.8604e-02, 3.6184e-03, 8.1415e-04]],
       grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.3084e+01, 2.0105e+01, 1.5024e-03, 1.9241e-04],
         [3.0630e+01, 3.2241e+00, 2.9761e-05, 2.1101e-06],
         ...,
         [5.3457e+01, 4.1172e-05, 2.0901e-06, 2.2448e-07],
         [4.9848e+01, 9.3352e-05, 7.0177e-04, 3.9375e-04],
         [3.8598e+01, 1.2753e-04, 1.8601e-06, 8.7403e-04]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [8.9477e+00, 2.0260e+01, 7.9196e-05, 4.0989e-06],
         [3.5913e+01, 4.7691e+00, 3.6927e-03, 9.4747e-03],
         ...,
         [5.3506e+01, 3.5364e-03, 6.0337e-04, 5.9651e-04],
         [5.0003e+01, 2.3858e-03, 2.5327e-06, 1.8755e-07],
         [3.8782e+01, 3.8706e-03, 9.6959e


Train Diffusion:  10%|▉         | 191/2000 [08:59<1:20:58,  2.69s/it][A
Train Diffusion:  10%|▉         | 192/2000 [09:02<1:25:41,  2.84s/it][A
Train Diffusion:  10%|▉         | 193/2000 [09:07<1:38:40,  3.28s/it][A
Train Diffusion:  10%|▉         | 194/2000 [09:10<1:41:31,  3.37s/it][A
Train Diffusion:  10%|▉         | 195/2000 [09:14<1:39:53,  3.32s/it][A
Train Diffusion:  10%|▉         | 196/2000 [09:16<1:33:48,  3.12s/it][A
Train Diffusion:  10%|▉         | 197/2000 [09:19<1:32:13,  3.07s/it][A
Train Diffusion:  10%|▉         | 198/2000 [09:22<1:25:51,  2.86s/it][A
Train Diffusion:  10%|▉         | 199/2000 [09:24<1:21:33,  2.72s/it][A
Train Diffusion:  10%|█         | 200/2000 [09:26<1:18:47,  2.63s/it][A

Moving average norm loss at 200 iterations is: 5074.9126953125. Best norm loss value is: 4486.47314453125.

C_PATH mean = tensor([[5.3325e+01, 3.4381e-02, 4.3238e-03, 8.1180e-04],
        [5.3328e+01, 3.2956e-02, 3.8415e-03, 7.9162e-04]],
       grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [9.4105e+00, 1.9852e+01, 9.0134e-05, 1.9125e-04],
         [3.1441e+01, 3.1141e+00, 4.3262e-03, 1.0463e-02],
         ...,
         [5.3041e+01, 4.9171e-03, 5.7025e-04, 5.0781e-04],
         [4.9405e+01, 3.3538e-03, 7.1994e-04, 3.2690e-04],
         [3.8199e+01, 1.7374e-04, 1.0445e-02, 1.8031e-05]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.3527e+01, 1.9831e+01, 2.0493e-03, 3.4737e-06],
         [3.6640e+01, 4.7391e+00, 4.0761e-05, 1.8031e-06],
         ...,
         [5.2958e+01, 4.3369e-05, 1.7636e-06, 1.3199e-07],
         [4.9392e+01, 9.9569e-05, 1.9353e-06, 1.2416e-07],
         [3.8187e+01, 4.6498e-03, 1.6005


Train Diffusion:  10%|█         | 201/2000 [09:29<1:20:09,  2.67s/it][A
Train Diffusion:  10%|█         | 202/2000 [09:32<1:21:16,  2.71s/it][A
Train Diffusion:  10%|█         | 203/2000 [09:35<1:20:33,  2.69s/it][A
Train Diffusion:  10%|█         | 204/2000 [09:38<1:29:27,  2.99s/it][A
Train Diffusion:  10%|█         | 205/2000 [09:41<1:27:05,  2.91s/it][A
Train Diffusion:  10%|█         | 206/2000 [09:44<1:29:39,  3.00s/it][A
Train Diffusion:  10%|█         | 207/2000 [09:48<1:33:18,  3.12s/it][A
Train Diffusion:  10%|█         | 208/2000 [09:51<1:33:35,  3.13s/it][A
Train Diffusion:  10%|█         | 209/2000 [09:54<1:38:22,  3.30s/it][A
Train Diffusion:  10%|█         | 210/2000 [09:57<1:32:32,  3.10s/it][A

Moving average norm loss at 210 iterations is: 5139.07509765625. Best norm loss value is: 4486.47314453125.

C_PATH mean = tensor([[5.3267e+01, 3.6694e-02, 4.3634e-03, 7.5545e-04],
        [5.3264e+01, 3.9359e-02, 4.4525e-03, 7.5266e-04]],
       grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.3830e+01, 1.9601e+01, 1.0353e-04, 1.7714e-04],
         [3.7165e+01, 3.0669e+00, 4.5473e-05, 1.7148e-06],
         ...,
         [5.2910e+01, 5.6985e-03, 1.5252e-06, 4.0708e-04],
         [4.9255e+01, 1.2945e-04, 1.7922e-06, 2.9351e-04],
         [3.8008e+01, 5.4664e-03, 1.5370e-06, 8.6673e-04]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [9.6427e+00, 1.9595e+01, 2.3255e-03, 3.2998e-06],
         [3.1795e+01, 4.6525e+00, 5.0594e-03, 1.0104e-02],
         ...,
         [5.2902e+01, 4.9664e-05, 5.8340e-04, 1.0426e-07],
         [4.9277e+01, 3.5331e-03, 7.0426e-04, 9.2220e-08],
         [3.7954e+01, 1.9253e-04, 1.026


Train Diffusion:  11%|█         | 211/2000 [09:59<1:26:17,  2.89s/it][A
Train Diffusion:  11%|█         | 212/2000 [10:02<1:22:43,  2.78s/it][A
Train Diffusion:  11%|█         | 213/2000 [10:05<1:23:35,  2.81s/it][A
Train Diffusion:  11%|█         | 214/2000 [10:07<1:21:47,  2.75s/it][A
Train Diffusion:  11%|█         | 215/2000 [10:10<1:22:15,  2.77s/it][A
Train Diffusion:  11%|█         | 216/2000 [10:14<1:29:48,  3.02s/it][A
Train Diffusion:  11%|█         | 217/2000 [10:17<1:29:55,  3.03s/it][A
Train Diffusion:  11%|█         | 218/2000 [10:20<1:26:49,  2.92s/it][A
Train Diffusion:  11%|█         | 219/2000 [10:22<1:23:44,  2.82s/it][A
Train Diffusion:  11%|█         | 220/2000 [10:25<1:26:24,  2.91s/it][A

Moving average norm loss at 220 iterations is: 4900.654736328125. Best norm loss value is: 4291.69189453125.

C_PATH mean = tensor([[5.3429e+01, 3.8754e-02, 4.6702e-03, 8.1455e-04],
        [5.3425e+01, 3.8806e-02, 4.8752e-03, 7.8993e-04]],
       grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.3907e+01, 1.9194e+01, 1.0660e-04, 1.7387e-04],
         [3.7347e+01, 4.2569e+00, 5.3259e-03, 1.1005e-02],
         ...,
         [5.3022e+01, 5.2699e-05, 1.5436e-06, 4.3081e-04],
         [4.9305e+01, 3.8309e-03, 7.0456e-04, 8.9606e-08],
         [3.7815e+01, 1.9157e-04, 1.4395e-06, 8.9779e-04]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [9.7557e+00, 1.9196e+01, 2.3979e-03, 3.1181e-06],
         [3.2189e+01, 2.7654e+00, 4.6581e-05, 1.5809e-06],
         ...,
         [5.3037e+01, 5.9240e-03, 5.8757e-04, 9.0977e-08],
         [4.9292e+01, 1.2818e-04, 1.7580e-06, 2.9479e-04],
         [3.7780e+01, 5.7187e-03, 1.06


Train Diffusion:  11%|█         | 221/2000 [10:28<1:26:13,  2.91s/it][A
Train Diffusion:  11%|█         | 222/2000 [10:31<1:22:36,  2.79s/it][A
Train Diffusion:  11%|█         | 223/2000 [10:33<1:20:38,  2.72s/it][A
Train Diffusion:  11%|█         | 224/2000 [10:36<1:23:51,  2.83s/it][A
Train Diffusion:  11%|█▏        | 225/2000 [10:39<1:23:55,  2.84s/it][A
Train Diffusion:  11%|█▏        | 226/2000 [10:42<1:23:32,  2.83s/it][A
Train Diffusion:  11%|█▏        | 227/2000 [10:45<1:23:24,  2.82s/it][A
Train Diffusion:  11%|█▏        | 228/2000 [10:48<1:23:27,  2.83s/it][A
Train Diffusion:  11%|█▏        | 229/2000 [10:50<1:22:57,  2.81s/it][A
Train Diffusion:  12%|█▏        | 230/2000 [10:53<1:20:12,  2.72s/it][A

Moving average norm loss at 230 iterations is: 4989.1384765625. Best norm loss value is: 4291.69189453125.

C_PATH mean = tensor([[5.3528e+01, 4.1105e-02, 4.7796e-03, 7.9494e-04],
        [5.3518e+01, 4.1681e-02, 5.0587e-03, 6.7946e-04]],
       grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [9.9988e+00, 1.8675e+01, 2.5891e-03, 1.6432e-04],
         [3.2617e+01, 3.9727e+00, 5.3210e-05, 1.5587e-06],
         ...,
         [5.2997e+01, 6.2021e-03, 1.4288e-06, 3.6730e-04],
         [4.9203e+01, 1.3603e-04, 6.4583e-04, 2.7552e-04],
         [3.7442e+01, 6.0240e-03, 1.0231e-02, 8.8926e-04]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.4254e+01, 1.8660e+01, 1.2033e-04, 3.0218e-06],
         [3.8073e+01, 2.4811e+00, 5.7368e-03, 1.0362e-02],
         ...,
         [5.3144e+01, 6.2079e-05, 5.7538e-04, 7.5514e-08],
         [4.9198e+01, 4.2790e-03, 1.8480e-06, 7.1583e-08],
         [3.7468e+01, 2.1903e-04, 1.4344


Train Diffusion:  12%|█▏        | 231/2000 [10:56<1:21:07,  2.75s/it][A
Train Diffusion:  12%|█▏        | 232/2000 [10:59<1:23:21,  2.83s/it][A
Train Diffusion:  12%|█▏        | 233/2000 [11:02<1:27:33,  2.97s/it][A
Train Diffusion:  12%|█▏        | 234/2000 [11:05<1:29:34,  3.04s/it][A
Train Diffusion:  12%|█▏        | 235/2000 [11:08<1:27:38,  2.98s/it][A
Train Diffusion:  12%|█▏        | 236/2000 [11:11<1:27:30,  2.98s/it][A
Train Diffusion:  12%|█▏        | 237/2000 [11:14<1:25:51,  2.92s/it][A
Train Diffusion:  12%|█▏        | 238/2000 [11:17<1:23:26,  2.84s/it][A
Train Diffusion:  12%|█▏        | 239/2000 [11:19<1:23:01,  2.83s/it][A
Train Diffusion:  12%|█▏        | 240/2000 [11:22<1:19:54,  2.72s/it][A

Moving average norm loss at 240 iterations is: 5187.43291015625. Best norm loss value is: 4291.69189453125.

C_PATH mean = tensor([[5.3480e+01, 4.4018e-02, 5.3074e-03, 7.8437e-04],
        [5.3481e+01, 4.5130e-02, 5.4474e-03, 7.3166e-04]],
       grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.0279e+01, 1.8036e+01, 1.3499e-04, 1.6278e-04],
         [3.8529e+01, 3.6028e+00, 6.2522e-03, 1.1648e-02],
         ...,
         [5.2850e+01, 6.6992e-05, 1.3797e-06, 6.1136e-08],
         [4.9019e+01, 1.6396e-04, 6.5760e-04, 6.2073e-08],
         [3.7016e+01, 2.4581e-04, 1.1037e-02, 1.6420e-05]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.4504e+01, 1.8016e+01, 2.9077e-03, 2.8771e-06],
         [3.3157e+01, 2.1569e+00, 6.0461e-05, 1.3930e-06],
         ...,
         [5.3067e+01, 7.0059e-03, 5.6682e-04, 3.6718e-04],
         [4.9001e+01, 4.4031e-03, 1.6974e-06, 2.6341e-04],
         [3.7006e+01, 6.6384e-03, 1.310


Train Diffusion:  12%|█▏        | 241/2000 [11:24<1:18:48,  2.69s/it][A
Train Diffusion:  12%|█▏        | 242/2000 [11:27<1:16:39,  2.62s/it][A
Train Diffusion:  12%|█▏        | 243/2000 [11:30<1:16:51,  2.62s/it][A
Train Diffusion:  12%|█▏        | 244/2000 [11:33<1:22:49,  2.83s/it][A
Train Diffusion:  12%|█▏        | 245/2000 [11:37<1:30:35,  3.10s/it][A
Train Diffusion:  12%|█▏        | 246/2000 [11:39<1:26:32,  2.96s/it][A
Train Diffusion:  12%|█▏        | 247/2000 [11:42<1:22:09,  2.81s/it][A
Train Diffusion:  12%|█▏        | 248/2000 [11:44<1:18:45,  2.70s/it][A
Train Diffusion:  12%|█▏        | 249/2000 [11:47<1:16:19,  2.62s/it][A
Train Diffusion:  12%|█▎        | 250/2000 [11:49<1:15:44,  2.60s/it][A

Moving average norm loss at 250 iterations is: 4692.88330078125. Best norm loss value is: 4291.69189453125.

C_PATH mean = tensor([[5.3440e+01, 4.6704e-02, 6.1871e-03, 8.6828e-04],
        [5.3444e+01, 4.1857e-02, 6.3574e-03, 1.0499e-03]],
       grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.0373e+01, 1.7787e+01, 1.3200e-04, 1.8226e-04],
         [3.3408e+01, 1.9160e+00, 5.6220e-05, 1.3883e-06],
         ...,
         [5.3109e+01, 6.2111e-05, 6.2958e-04, 6.6934e-08],
         [4.9222e+01, 1.5216e-04, 7.6488e-04, 3.1821e-04],
         [3.7047e+01, 2.3595e-04, 1.4448e-02, 1.0116e-03]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.4395e+01, 1.7796e+01, 2.9214e-03, 2.9537e-06],
         [3.8104e+01, 3.3184e+00, 6.6275e-03, 1.4123e-02],
         ...,
         [5.3120e+01, 7.3567e-03, 1.4178e-06, 4.5819e-04],
         [4.9234e+01, 4.7019e-03, 1.6815e-06, 6.6750e-08],
         [3.7065e+01, 7.1781e-03, 1.277


Train Diffusion:  13%|█▎        | 251/2000 [11:52<1:16:48,  2.63s/it][A
Train Diffusion:  13%|█▎        | 252/2000 [11:55<1:21:50,  2.81s/it][A
Train Diffusion:  13%|█▎        | 253/2000 [11:58<1:23:46,  2.88s/it][A
Train Diffusion:  13%|█▎        | 254/2000 [12:01<1:22:42,  2.84s/it][A
Train Diffusion:  13%|█▎        | 255/2000 [12:04<1:24:31,  2.91s/it][A
Train Diffusion:  13%|█▎        | 256/2000 [12:07<1:29:19,  3.07s/it][A
Train Diffusion:  13%|█▎        | 257/2000 [12:11<1:37:32,  3.36s/it][A
Train Diffusion:  13%|█▎        | 258/2000 [12:14<1:31:18,  3.15s/it][A
Train Diffusion:  13%|█▎        | 259/2000 [12:17<1:29:03,  3.07s/it][A
Train Diffusion:  13%|█▎        | 260/2000 [12:19<1:24:50,  2.93s/it][A

Moving average norm loss at 260 iterations is: 4514.996142578125. Best norm loss value is: 4290.12109375.

C_PATH mean = tensor([[5.3369e+01, 5.2200e-02, 7.0721e-03, 9.2430e-04],
        [5.3372e+01, 5.0603e-02, 7.0548e-03, 9.5707e-04]],
       grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.0808e+01, 1.7017e+01, 1.5444e-04, 2.6027e-06],
         [3.4012e+01, 2.9860e+00, 7.6492e-03, 1.4543e-02],
         ...,
         [5.2873e+01, 7.5731e-05, 1.2220e-06, 4.7042e-08],
         [4.8924e+01, 5.4736e-03, 7.4195e-04, 4.8502e-08],
         [3.6471e+01, 2.7428e-04, 1.5296e-02, 1.6755e-05]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.4835e+01, 1.7028e+01, 3.3692e-03, 1.7312e-04],
         [3.8731e+01, 1.6143e+00, 6.6798e-05, 1.2392e-06],
         ...,
         [5.2873e+01, 8.5536e-03, 6.1965e-04, 4.0445e-04],
         [4.8928e+01, 1.8269e-04, 1.4265e-06, 2.8797e-04],
         [3.6470e+01, 8.1017e-03, 1.0711e


Train Diffusion:  13%|█▎        | 261/2000 [12:23<1:29:22,  3.08s/it][A
Train Diffusion:  13%|█▎        | 262/2000 [12:26<1:26:44,  2.99s/it][A
Train Diffusion:  13%|█▎        | 263/2000 [12:28<1:22:01,  2.83s/it][A
Train Diffusion:  13%|█▎        | 264/2000 [12:31<1:21:01,  2.80s/it][A
Train Diffusion:  13%|█▎        | 265/2000 [12:33<1:17:44,  2.69s/it][A
Train Diffusion:  13%|█▎        | 266/2000 [12:36<1:15:07,  2.60s/it][A
Train Diffusion:  13%|█▎        | 267/2000 [12:39<1:17:19,  2.68s/it][A
Train Diffusion:  13%|█▎        | 268/2000 [12:41<1:17:02,  2.67s/it][A
Train Diffusion:  13%|█▎        | 269/2000 [12:44<1:18:15,  2.71s/it][A
Train Diffusion:  14%|█▎        | 270/2000 [12:47<1:18:15,  2.71s/it][A

Moving average norm loss at 270 iterations is: 4433.7291015625. Best norm loss value is: 4269.8662109375.

C_PATH mean = tensor([[5.3399e+01, 5.1047e-02, 8.1281e-03, 1.0488e-03],
        [5.3399e+01, 5.0712e-02, 7.3808e-03, 9.9191e-04]],
       grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.1038e+01, 1.6523e+01, 3.4211e-03, 1.7703e-04],
         [3.9079e+01, 2.6653e+00, 6.6441e-05, 1.4581e-02],
         ...,
         [5.3040e+01, 8.9111e-03, 6.2028e-04, 4.4633e-04],
         [4.8989e+01, 1.8644e-04, 1.4738e-06, 5.0474e-08],
         [3.6234e+01, 2.8698e-04, 1.7659e-02, 1.0975e-03]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.5068e+01, 1.6537e+01, 1.5815e-04, 2.6526e-06],
         [3.4442e+01, 1.3241e+00, 7.9883e-03, 1.2732e-06],
         ...,
         [5.3037e+01, 7.5623e-05, 1.2591e-06, 4.7242e-08],
         [4.8991e+01, 5.5164e-03, 7.7213e-04, 3.0824e-04],
         [3.6237e+01, 8.2405e-03, 1.0459e


Train Diffusion:  14%|█▎        | 271/2000 [12:49<1:16:43,  2.66s/it][A
Train Diffusion:  14%|█▎        | 272/2000 [12:53<1:23:15,  2.89s/it][A
Train Diffusion:  14%|█▎        | 273/2000 [12:55<1:18:57,  2.74s/it][A
Train Diffusion:  14%|█▎        | 274/2000 [12:58<1:15:52,  2.64s/it][A
Train Diffusion:  14%|█▍        | 275/2000 [13:00<1:13:49,  2.57s/it][A
Train Diffusion:  14%|█▍        | 276/2000 [13:02<1:12:07,  2.51s/it][A
Train Diffusion:  14%|█▍        | 277/2000 [13:06<1:18:34,  2.74s/it][A
Train Diffusion:  14%|█▍        | 278/2000 [13:09<1:21:28,  2.84s/it][A
Train Diffusion:  14%|█▍        | 279/2000 [13:12<1:24:34,  2.95s/it][A
Train Diffusion:  14%|█▍        | 280/2000 [13:15<1:22:37,  2.88s/it][A

Moving average norm loss at 280 iterations is: 4493.643017578125. Best norm loss value is: 4199.91552734375.

C_PATH mean = tensor([[5.3466e+01, 5.0064e-02, 8.2241e-03, 1.0767e-03],
        [5.3472e+01, 5.1350e-02, 8.2296e-03, 9.6377e-04]],
       grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.1358e+01, 1.5784e+01, 3.5612e-03, 2.6242e-06],
         [3.9797e+01, 9.7050e-01, 8.1260e-03, 1.2785e-06],
         ...,
         [5.2950e+01, 9.4507e-03, 6.2963e-04, 4.3665e-08],
         [4.8801e+01, 1.9230e-04, 7.6529e-04, 4.7766e-08],
         [3.5721e+01, 2.9353e-04, 1.0169e-06, 1.1294e-03]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.5460e+01, 1.5785e+01, 1.6582e-04, 1.7616e-04],
         [3.5007e+01, 2.3077e+00, 7.1320e-05, 1.3904e-02],
         ...,
         [5.3045e+01, 7.8516e-05, 1.2060e-06, 4.4147e-04],
         [4.8803e+01, 5.7460e-03, 1.4472e-06, 3.0400e-04],
         [3.5721e+01, 8.5899e-03, 1.83


Train Diffusion:  14%|█▍        | 281/2000 [13:17<1:19:12,  2.76s/it][A
Train Diffusion:  14%|█▍        | 282/2000 [13:20<1:22:50,  2.89s/it][A
Train Diffusion:  14%|█▍        | 283/2000 [13:23<1:20:55,  2.83s/it][A
Train Diffusion:  14%|█▍        | 284/2000 [13:25<1:17:02,  2.69s/it][A
Train Diffusion:  14%|█▍        | 285/2000 [13:28<1:14:12,  2.60s/it][A
Train Diffusion:  14%|█▍        | 286/2000 [13:30<1:12:49,  2.55s/it][A
Train Diffusion:  14%|█▍        | 287/2000 [13:33<1:11:39,  2.51s/it][A
Train Diffusion:  14%|█▍        | 288/2000 [13:36<1:16:12,  2.67s/it][A
Train Diffusion:  14%|█▍        | 289/2000 [13:39<1:21:49,  2.87s/it][A
Train Diffusion:  14%|█▍        | 290/2000 [13:42<1:23:49,  2.94s/it][A

Moving average norm loss at 290 iterations is: 4384.218603515625. Best norm loss value is: 4199.91552734375.

C_PATH mean = tensor([[5.3475e+01, 5.0042e-02, 8.6857e-03, 1.2729e-03],
        [5.3475e+01, 5.1392e-02, 1.0040e-02, 9.9370e-04]],
       grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.1737e+01, 1.5046e+01, 3.6842e-03, 2.5858e-06],
         [4.0291e+01, 6.8081e-01, 8.6149e-03, 1.4306e-02],
         ...,
         [5.2925e+01, 8.0382e-05, 6.6923e-04, 4.8331e-04],
         [4.8670e+01, 1.9653e-04, 8.1903e-04, 3.4113e-04],
         [3.5245e+01, 8.9409e-03, 9.7705e-07, 1.9481e-05]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.5808e+01, 1.5044e+01, 1.7038e-04, 1.8286e-04],
         [3.5803e+01, 1.9367e+00, 7.1535e-05, 1.2898e-06],
         ...,
         [5.2926e+01, 9.8337e-03, 1.2075e-06, 4.2417e-08],
         [4.8666e+01, 6.0308e-03, 1.4350e-06, 4.6199e-08],
         [3.5239e+01, 3.0525e-04, 2.13


Train Diffusion:  15%|█▍        | 291/2000 [13:45<1:20:54,  2.84s/it][A
Train Diffusion:  15%|█▍        | 292/2000 [13:47<1:17:05,  2.71s/it][A
Train Diffusion:  15%|█▍        | 293/2000 [13:49<1:14:21,  2.61s/it][A
Train Diffusion:  15%|█▍        | 294/2000 [13:52<1:12:30,  2.55s/it][A
Train Diffusion:  15%|█▍        | 295/2000 [13:55<1:14:41,  2.63s/it][A
Train Diffusion:  15%|█▍        | 296/2000 [13:58<1:20:01,  2.82s/it][A
Train Diffusion:  15%|█▍        | 297/2000 [14:01<1:22:41,  2.91s/it][A
Train Diffusion:  15%|█▍        | 298/2000 [14:04<1:22:03,  2.89s/it][A
Train Diffusion:  15%|█▍        | 299/2000 [14:06<1:17:33,  2.74s/it][A
Train Diffusion:  15%|█▌        | 300/2000 [14:09<1:14:24,  2.63s/it][A

Moving average norm loss at 300 iterations is: 4371.7767578125. Best norm loss value is: 4163.38427734375.

C_PATH mean = tensor([[5.3459e+01, 5.1853e-02, 1.0598e-02, 1.2455e-03],
        [5.3465e+01, 5.1161e-02, 1.0701e-02, 1.1424e-03]],
       grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.2156e+01, 1.4396e+01, 3.8516e-03, 2.5715e-06],
         [3.6546e+01, 4.7765e-01, 7.5661e-05, 1.4306e-02],
         ...,
         [5.2991e+01, 8.3456e-05, 1.2136e-06, 3.9544e-08],
         [4.8627e+01, 6.2899e-03, 8.5757e-04, 3.5011e-04],
         [3.4843e+01, 9.5640e-03, 9.2875e-07, 2.0088e-05]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.6272e+01, 1.4401e+01, 1.7935e-04, 1.8660e-04],
         [4.0979e+01, 1.6855e+00, 9.1573e-03, 1.2877e-06],
         ...,
         [5.2997e+01, 1.0446e-02, 6.9180e-04, 5.0939e-04],
         [4.8627e+01, 2.0687e-04, 1.4050e-06, 4.4215e-08],
         [3.4847e+01, 3.1460e-04, 2.5257


Train Diffusion:  15%|█▌        | 301/2000 [14:11<1:14:35,  2.63s/it][A
Train Diffusion:  15%|█▌        | 302/2000 [14:14<1:12:19,  2.56s/it][A
Train Diffusion:  15%|█▌        | 303/2000 [14:16<1:13:05,  2.58s/it][A
Train Diffusion:  15%|█▌        | 304/2000 [14:19<1:12:17,  2.56s/it][A
Train Diffusion:  15%|█▌        | 305/2000 [14:21<1:13:23,  2.60s/it][A
Train Diffusion:  15%|█▌        | 306/2000 [14:24<1:11:51,  2.55s/it][A
Train Diffusion:  15%|█▌        | 307/2000 [14:27<1:13:39,  2.61s/it][A
Train Diffusion:  15%|█▌        | 308/2000 [14:29<1:15:23,  2.67s/it][A
Train Diffusion:  15%|█▌        | 309/2000 [14:33<1:18:51,  2.80s/it][A
Train Diffusion:  16%|█▌        | 310/2000 [14:35<1:15:44,  2.69s/it][A

Moving average norm loss at 310 iterations is: 4460.172265625. Best norm loss value is: 4156.63671875.

C_PATH mean = tensor([[5.3280e+01, 5.6971e-02, 1.3684e-02, 1.1817e-03],
        [5.3282e+01, 6.2812e-02, 1.1752e-02, 1.2424e-03]],
       grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.2721e+01, 1.3654e+01, 4.5950e-03, 2.3182e-06],
         [4.1710e+01, 1.5235e+00, 1.0856e-02, 1.3987e-02],
         ...,
         [5.2818e+01, 1.2679e-02, 1.0750e-06, 4.7031e-04],
         [4.8395e+01, 2.5703e-04, 1.2622e-06, 3.0974e-08],
         [3.4267e+01, 3.8531e-04, 2.9205e-02, 1.9067e-05]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.6841e+01, 1.3649e+01, 2.1739e-04, 1.8425e-04],
         [3.7497e+01, 3.7087e-01, 9.1950e-05, 1.1542e-06],
         ...,
         [5.2815e+01, 1.0311e-04, 6.8482e-04, 2.7071e-08],
         [4.8394e+01, 7.5169e-03, 8.4834e-04, 3.2808e-04],
         [3.4265e+01, 1.1113e-02, 7.9384e-07


Train Diffusion:  16%|█▌        | 311/2000 [14:38<1:16:13,  2.71s/it][A
Train Diffusion:  16%|█▌        | 312/2000 [14:40<1:15:09,  2.67s/it][A
Train Diffusion:  16%|█▌        | 313/2000 [14:43<1:13:31,  2.62s/it][A
Train Diffusion:  16%|█▌        | 314/2000 [14:45<1:14:00,  2.63s/it][A
Train Diffusion:  16%|█▌        | 315/2000 [14:48<1:11:44,  2.55s/it][A
Train Diffusion:  16%|█▌        | 316/2000 [14:50<1:12:08,  2.57s/it][A
Train Diffusion:  16%|█▌        | 317/2000 [14:53<1:12:09,  2.57s/it][A
Train Diffusion:  16%|█▌        | 318/2000 [14:56<1:11:06,  2.54s/it][A
Train Diffusion:  16%|█▌        | 319/2000 [14:58<1:09:36,  2.48s/it][A
Train Diffusion:  16%|█▌        | 320/2000 [15:01<1:11:47,  2.56s/it][A

Moving average norm loss at 320 iterations is: 4485.083544921875. Best norm loss value is: 4151.43408203125.

C_PATH mean = tensor([[5.3438e+01, 5.2553e-02, 1.4162e-02, 1.3376e-03],
        [5.3440e+01, 5.5331e-02, 1.3276e-02, 1.3028e-03]],
       grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.6926e+01, 1.3482e+01, 4.2784e-03, 2.0033e-04],
         [4.2054e+01, 2.7419e-01, 1.0325e-02, 1.4136e-02],
         ...,
         [5.3152e+01, 1.2050e-02, 7.4113e-04, 5.5162e-04],
         [4.8580e+01, 2.3215e-04, 9.2442e-04, 3.9007e-08],
         [3.4322e+01, 3.4959e-04, 3.4680e-02, 1.3981e-03]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.2704e+01, 1.3498e+01, 1.9723e-04, 2.5551e-06],
         [3.7526e+01, 1.3339e+00, 8.2504e-05, 1.3279e-06],
         ...,
         [5.3032e+01, 9.3453e-05, 1.1764e-06, 3.5610e-08],
         [4.8583e+01, 7.0891e-03, 1.3833e-06, 3.7774e-04],
         [3.4328e+01, 1.0530e-02, 8.13


Train Diffusion:  16%|█▌        | 321/2000 [15:03<1:11:30,  2.56s/it][A
Train Diffusion:  16%|█▌        | 322/2000 [15:06<1:09:51,  2.50s/it][A
Train Diffusion:  16%|█▌        | 323/2000 [15:08<1:09:29,  2.49s/it][A
Train Diffusion:  16%|█▌        | 324/2000 [15:11<1:11:22,  2.56s/it][A
Train Diffusion:  16%|█▋        | 325/2000 [15:13<1:11:51,  2.57s/it][A
Train Diffusion:  16%|█▋        | 326/2000 [15:16<1:11:21,  2.56s/it][A
Train Diffusion:  16%|█▋        | 327/2000 [15:19<1:13:17,  2.63s/it][A
Train Diffusion:  16%|█▋        | 328/2000 [15:21<1:13:45,  2.65s/it][A
Train Diffusion:  16%|█▋        | 329/2000 [15:24<1:15:40,  2.72s/it][A
Train Diffusion:  16%|█▋        | 330/2000 [15:27<1:14:31,  2.68s/it][A

Moving average norm loss at 330 iterations is: 4746.61015625. Best norm loss value is: 4105.59765625.

C_PATH mean = tensor([[5.3388e+01, 5.7944e-02, 1.6011e-02, 1.3152e-03],
        [5.3387e+01, 5.8299e-02, 1.5717e-02, 1.3400e-03]],
       grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.3062e+01, 1.2907e+01, 2.2096e-04, 1.9778e-04],
         [4.2868e+01, 2.1017e-01, 1.1420e-02, 1.2901e-06],
         ...,
         [5.2865e+01, 1.3564e-02, 1.1064e-06, 5.4601e-04],
         [4.8365e+01, 2.6230e-04, 9.1072e-04, 3.7953e-04],
         [3.3851e+01, 3.8359e-04, 4.1531e-02, 2.0880e-05]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.7404e+01, 1.2906e+01, 4.7064e-03, 2.4668e-06],
         [3.8093e+01, 1.2068e+00, 9.2789e-05, 1.3710e-02],
         ...,
         [5.3152e+01, 1.0125e-04, 7.3936e-04, 3.0255e-08],
         [4.8366e+01, 7.6647e-03, 1.3247e-06, 3.4890e-08],
         [3.3853e+01, 1.1741e-02, 7.3612e-07,


Train Diffusion:  17%|█▋        | 331/2000 [15:30<1:22:45,  2.97s/it][A
Train Diffusion:  17%|█▋        | 332/2000 [15:33<1:22:47,  2.98s/it][A
Train Diffusion:  17%|█▋        | 333/2000 [15:36<1:17:44,  2.80s/it][A
Train Diffusion:  17%|█▋        | 334/2000 [15:38<1:14:15,  2.67s/it][A
Train Diffusion:  17%|█▋        | 335/2000 [15:41<1:15:02,  2.70s/it][A
Train Diffusion:  17%|█▋        | 336/2000 [15:44<1:14:22,  2.68s/it][A
Train Diffusion:  17%|█▋        | 337/2000 [15:46<1:13:41,  2.66s/it][A
Train Diffusion:  17%|█▋        | 338/2000 [15:49<1:12:40,  2.62s/it][A
Train Diffusion:  17%|█▋        | 339/2000 [15:51<1:10:36,  2.55s/it][A
Train Diffusion:  17%|█▋        | 340/2000 [15:54<1:15:37,  2.73s/it][A

Moving average norm loss at 340 iterations is: 4571.32451171875. Best norm loss value is: 4105.59765625.

C_PATH mean = tensor([[5.3294e+01, 5.8627e-02, 2.2529e-02, 1.5396e-03],
        [5.3297e+01, 5.8031e-02, 1.6444e-02, 1.6571e-03]],
       grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.3178e+01, 1.2759e+01, 4.7519e-03, 2.1573e-04],
         [4.2907e+01, 1.1225e+00, 1.2081e-02, 1.3403e-06],
         ...,
         [5.2874e+01, 9.6250e-05, 1.1373e-06, 3.4719e-08],
         [4.8368e+01, 2.4787e-04, 1.0351e-03, 4.5368e-04],
         [3.3781e+01, 3.6720e-04, 7.0365e-07, 2.1769e-05]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.7457e+01, 1.2737e+01, 2.1279e-04, 2.5721e-06],
         [3.8395e+01, 1.8146e-01, 8.6082e-05, 1.4917e-02],
         ...,
         [5.2974e+01, 1.4188e-02, 8.3673e-04, 6.8768e-04],
         [4.8370e+01, 7.9240e-03, 1.3659e-06, 3.6770e-08],
         [3.3779e+01, 1.2142e-02, 5.5049e-


Train Diffusion:  17%|█▋        | 341/2000 [15:57<1:18:44,  2.85s/it][A
Train Diffusion:  17%|█▋        | 342/2000 [16:00<1:19:15,  2.87s/it][A
Train Diffusion:  17%|█▋        | 343/2000 [16:03<1:16:37,  2.77s/it][A
Train Diffusion:  17%|█▋        | 344/2000 [16:06<1:20:32,  2.92s/it][A
Train Diffusion:  17%|█▋        | 345/2000 [16:09<1:22:16,  2.98s/it][A
Train Diffusion:  17%|█▋        | 346/2000 [16:12<1:19:37,  2.89s/it][A
Train Diffusion:  17%|█▋        | 347/2000 [16:15<1:20:22,  2.92s/it][A
Train Diffusion:  17%|█▋        | 348/2000 [16:19<1:26:59,  3.16s/it][A
Train Diffusion:  17%|█▋        | 349/2000 [16:22<1:27:09,  3.17s/it][A
Train Diffusion:  18%|█▊        | 350/2000 [16:25<1:23:59,  3.05s/it][A

Moving average norm loss at 350 iterations is: 4478.07607421875. Best norm loss value is: 4105.59765625.

C_PATH mean = tensor([[5.3454e+01, 5.5096e-02, 2.3249e-02, 1.9963e-03],
        [5.3453e+01, 5.3949e-02, 2.4563e-02, 1.8914e-03]],
       grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.7331e+01, 1.2699e+01, 1.8214e-04, 2.9316e-06],
         [3.8457e+01, 1.0040e+00, 1.1795e-02, 1.5390e-06],
         ...,
         [5.3151e+01, 1.4280e-02, 9.5732e-04, 4.9959e-08],
         [4.8621e+01, 7.8698e-03, 1.2247e-03, 5.9604e-04],
         [3.3942e+01, 1.2248e-02, 7.9351e-02, 2.4679e-05]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.3095e+01, 1.2700e+01, 4.5163e-03, 2.4990e-04],
         [4.2813e+01, 1.3687e-01, 7.1921e-05, 1.7012e-02],
         ...,
         [5.3147e+01, 7.9939e-05, 1.3484e-06, 9.4414e-04],
         [4.8621e+01, 2.0659e-04, 1.5486e-06, 5.3147e-08],
         [3.3943e+01, 3.1764e-04, 7.0727e-


Train Diffusion:  18%|█▊        | 351/2000 [16:27<1:20:50,  2.94s/it][A
Train Diffusion:  18%|█▊        | 352/2000 [16:30<1:16:43,  2.79s/it][A
Train Diffusion:  18%|█▊        | 353/2000 [16:32<1:13:57,  2.69s/it][A
Train Diffusion:  18%|█▊        | 354/2000 [16:35<1:11:14,  2.60s/it][A
Train Diffusion:  18%|█▊        | 355/2000 [16:37<1:09:11,  2.52s/it][A
Train Diffusion:  18%|█▊        | 356/2000 [16:39<1:07:52,  2.48s/it][A
Train Diffusion:  18%|█▊        | 357/2000 [16:42<1:09:41,  2.54s/it][A
Train Diffusion:  18%|█▊        | 358/2000 [16:45<1:13:01,  2.67s/it][A
Train Diffusion:  18%|█▊        | 359/2000 [16:48<1:12:20,  2.64s/it][A
Train Diffusion:  18%|█▊        | 360/2000 [16:50<1:09:55,  2.56s/it][A

Moving average norm loss at 360 iterations is: 4657.205419921875. Best norm loss value is: 4091.41650390625.

C_PATH mean = tensor([[5.3358e+01, 5.9754e-02, 3.1209e-02, 2.0809e-03],
        [5.3358e+01, 5.9513e-02, 2.9731e-02, 2.2423e-03]],
       grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.3381e+01, 1.2437e+01, 1.9232e-04, 2.8355e-06],
         [4.3049e+01, 1.1462e-01, 1.3484e-02, 1.7984e-02],
         ...,
         [5.3255e+01, 1.6647e-02, 1.0695e-03, 4.4732e-08],
         [4.8672e+01, 2.3086e-04, 1.3659e-03, 6.6650e-04],
         [3.3804e+01, 3.4321e-04, 6.2241e-07, 2.4871e-05]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.7574e+01, 1.2440e+01, 4.9906e-03, 2.7114e-04],
         [3.8943e+01, 9.4951e-01, 7.5257e-05, 1.5637e-06],
         ...,
         [5.3255e+01, 8.3177e-05, 1.2378e-06, 1.0616e-03],
         [4.8670e+01, 8.6433e-03, 1.5048e-06, 4.6180e-08],
         [3.3804e+01, 1.3641e-02, 1.15


Train Diffusion:  18%|█▊        | 361/2000 [16:52<1:08:26,  2.51s/it][A
Train Diffusion:  18%|█▊        | 362/2000 [16:55<1:11:19,  2.61s/it][A
Train Diffusion:  18%|█▊        | 363/2000 [16:58<1:11:55,  2.64s/it][A
Train Diffusion:  18%|█▊        | 364/2000 [17:00<1:10:46,  2.60s/it][A
Train Diffusion:  18%|█▊        | 365/2000 [17:04<1:16:45,  2.82s/it][A
Train Diffusion:  18%|█▊        | 366/2000 [17:07<1:20:03,  2.94s/it][A
Train Diffusion:  18%|█▊        | 367/2000 [17:10<1:17:50,  2.86s/it][A
Train Diffusion:  18%|█▊        | 368/2000 [17:12<1:13:56,  2.72s/it][A
Train Diffusion:  18%|█▊        | 369/2000 [17:14<1:10:58,  2.61s/it][A
Train Diffusion:  18%|█▊        | 370/2000 [17:17<1:13:52,  2.72s/it][A

Moving average norm loss at 370 iterations is: 4415.39541015625. Best norm loss value is: 4091.41650390625.

C_PATH mean = tensor([[5.3390e+01, 6.1722e-02, 3.7527e-02, 2.4477e-03],
        [5.3390e+01, 6.1052e-02, 3.8746e-02, 2.5610e-03]],
       grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.3678e+01, 1.2024e+01, 1.9576e-04, 2.8220e-06],
         [4.3231e+01, 8.7324e-01, 7.3752e-05, 1.5189e-06],
         ...,
         [5.3047e+01, 1.8264e-02, 1.1879e-06, 1.3194e-03],
         [4.8431e+01, 9.5180e-03, 1.4818e-03, 8.1223e-04],
         [3.3441e+01, 1.4771e-02, 5.2348e-07, 2.7210e-05]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.7740e+01, 1.2004e+01, 5.2660e-03, 2.8759e-04],
         [3.9579e+01, 8.6013e-02, 1.4882e-02, 1.9379e-02],
         ...,
         [5.3043e+01, 8.3774e-05, 1.2087e-03, 4.4009e-08],
         [4.8431e+01, 2.2657e-04, 1.4957e-06, 4.4160e-08],
         [3.3433e+01, 3.4385e-04, 1.738


Train Diffusion:  19%|█▊        | 371/2000 [17:20<1:12:05,  2.66s/it][A
Train Diffusion:  19%|█▊        | 372/2000 [17:22<1:10:55,  2.61s/it][A
Train Diffusion:  19%|█▊        | 373/2000 [17:25<1:10:38,  2.61s/it][A
Train Diffusion:  19%|█▊        | 374/2000 [17:27<1:09:45,  2.57s/it][A
Train Diffusion:  19%|█▉        | 375/2000 [17:30<1:08:24,  2.53s/it][A
Train Diffusion:  19%|█▉        | 376/2000 [17:32<1:07:19,  2.49s/it][A
Train Diffusion:  19%|█▉        | 377/2000 [17:35<1:06:29,  2.46s/it][A
Train Diffusion:  19%|█▉        | 378/2000 [17:37<1:06:03,  2.44s/it][A
Train Diffusion:  19%|█▉        | 379/2000 [17:40<1:12:18,  2.68s/it][A
Train Diffusion:  19%|█▉        | 380/2000 [17:43<1:15:11,  2.78s/it][A

Moving average norm loss at 380 iterations is: 4143.790380859375. Best norm loss value is: 4017.898193359375.

C_PATH mean = tensor([[5.3383e+01, 5.9128e-02, 4.4643e-02, 2.8425e-03],
        [5.3383e+01, 5.6684e-02, 4.2465e-02, 2.5855e-03]],
       grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.3800e+01, 1.1794e+01, 1.9129e-04, 2.9840e-06],
         [4.3411e+01, 7.0378e-02, 1.5063e-02, 1.9693e-02],
         ...,
         [5.3001e+01, 8.0923e-05, 1.2537e-06, 1.5529e-03],
         [4.8387e+01, 9.3991e-03, 1.5173e-06, 9.3871e-04],
         [3.3306e+01, 3.3652e-04, 2.4260e-01, 2.6020e-03]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.7850e+01, 1.1820e+01, 5.1979e-03, 3.0809e-04],
         [3.9707e+01, 8.2040e-01, 7.2112e-05, 1.6257e-06],
         ...,
         [5.3002e+01, 1.8789e-02, 1.2693e-03, 4.9316e-08],
         [4.8387e+01, 2.2238e-04, 1.6400e-03, 4.9484e-08],
         [3.3315e+01, 1.4754e-02, 4.7


Train Diffusion:  19%|█▉        | 381/2000 [17:46<1:16:27,  2.83s/it][A
Train Diffusion:  19%|█▉        | 382/2000 [17:49<1:13:43,  2.73s/it][A
Train Diffusion:  19%|█▉        | 383/2000 [17:52<1:14:20,  2.76s/it][A
Train Diffusion:  19%|█▉        | 384/2000 [17:54<1:12:58,  2.71s/it][A
Train Diffusion:  19%|█▉        | 385/2000 [17:57<1:14:54,  2.78s/it][A
Train Diffusion:  19%|█▉        | 386/2000 [18:00<1:16:11,  2.83s/it][A
Train Diffusion:  19%|█▉        | 387/2000 [18:02<1:12:33,  2.70s/it][A
Train Diffusion:  19%|█▉        | 388/2000 [18:05<1:11:33,  2.66s/it][A
Train Diffusion:  19%|█▉        | 389/2000 [18:08<1:14:00,  2.76s/it][A
Train Diffusion:  20%|█▉        | 390/2000 [18:11<1:12:47,  2.71s/it][A

Moving average norm loss at 390 iterations is: 4458.0968017578125. Best norm loss value is: 3907.433349609375.

C_PATH mean = tensor([[5.3407e+01, 6.1599e-02, 4.5270e-02, 2.6893e-03],
        [5.3419e+01, 6.0907e-02, 5.2684e-02, 2.5420e-03]],
       grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.3916e+01, 1.1707e+01, 5.6224e-03, 3.1652e-04],
         [4.3727e+01, 6.3336e-02, 7.9089e-05, 1.6784e-06],
         ...,
         [5.3209e+01, 8.7196e-05, 1.3487e-03, 4.6582e-08],
         [4.8576e+01, 2.4361e-04, 1.7410e-03, 4.6171e-08],
         [3.3357e+01, 1.6050e-02, 3.1783e-01, 2.8736e-05]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.8064e+01, 1.1710e+01, 2.1032e-04, 3.0125e-06],
         [3.9688e+01, 8.1734e-01, 1.6459e-02, 1.9324e-02],
         ...,
         [5.3352e+01, 2.0972e-02, 1.2776e-06, 1.5635e-03],
         [4.8577e+01, 1.0365e-02, 1.5212e-06, 9.4619e-04],
         [3.3358e+01, 3.7016e-04, 4.


Train Diffusion:  20%|█▉        | 391/2000 [18:13<1:11:56,  2.68s/it][A
Train Diffusion:  20%|█▉        | 392/2000 [18:16<1:10:15,  2.62s/it][A
Train Diffusion:  20%|█▉        | 393/2000 [18:18<1:07:55,  2.54s/it][A
Train Diffusion:  20%|█▉        | 394/2000 [18:21<1:08:27,  2.56s/it][A
Train Diffusion:  20%|█▉        | 395/2000 [18:24<1:12:42,  2.72s/it][A
Train Diffusion:  20%|█▉        | 396/2000 [18:27<1:14:32,  2.79s/it][A
Train Diffusion:  20%|█▉        | 397/2000 [18:30<1:18:00,  2.92s/it][A
Train Diffusion:  20%|█▉        | 398/2000 [18:33<1:15:39,  2.83s/it][A
Train Diffusion:  20%|█▉        | 399/2000 [18:35<1:15:08,  2.82s/it][A
Train Diffusion:  20%|██        | 400/2000 [18:38<1:13:08,  2.74s/it][A

Moving average norm loss at 400 iterations is: 4166.387133789062. Best norm loss value is: 3907.433349609375.

C_PATH mean = tensor([[5.3489e+01, 5.6020e-02, 5.3476e-02, 3.1598e-03],
        [5.3490e+01, 5.7453e-02, 6.2040e-02, 2.9998e-03]],
       grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.4012e+01, 1.1507e+01, 5.4032e-03, 3.1906e-06],
         [4.3619e+01, 5.0522e-02, 1.6898e-02, 2.0495e-02],
         ...,
         [5.3125e+01, 7.2237e-05, 1.3426e-06, 2.1634e-03],
         [4.8498e+01, 2.1251e-04, 1.6136e-06, 5.7716e-08],
         [3.3226e+01, 3.2204e-04, 4.9184e-01, 3.2476e-05]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.8007e+01, 1.1505e+01, 1.8313e-04, 3.5413e-04],
         [4.0045e+01, 7.3943e-01, 6.5595e-05, 1.8129e-06],
         ...,
         [5.3126e+01, 2.2202e-02, 1.5093e-03, 5.9833e-08],
         [4.8499e+01, 1.0289e-02, 1.9736e-03, 1.2369e-03],
         [3.3230e+01, 1.6285e-02, 3.5


Train Diffusion:  20%|██        | 401/2000 [18:41<1:13:17,  2.75s/it][A
Train Diffusion:  20%|██        | 402/2000 [18:43<1:13:25,  2.76s/it][A
Train Diffusion:  20%|██        | 403/2000 [18:46<1:15:15,  2.83s/it][A
Train Diffusion:  20%|██        | 404/2000 [18:49<1:14:45,  2.81s/it][A
Train Diffusion:  20%|██        | 405/2000 [18:52<1:14:41,  2.81s/it][A
Train Diffusion:  20%|██        | 406/2000 [18:55<1:14:10,  2.79s/it][A
Train Diffusion:  20%|██        | 407/2000 [18:57<1:13:48,  2.78s/it][A
Train Diffusion:  20%|██        | 408/2000 [19:00<1:10:19,  2.65s/it][A
Train Diffusion:  20%|██        | 409/2000 [19:02<1:07:50,  2.56s/it][A
Train Diffusion:  20%|██        | 410/2000 [19:05<1:08:54,  2.60s/it][A

Moving average norm loss at 410 iterations is: 4187.68369140625. Best norm loss value is: 3907.433349609375.

C_PATH mean = tensor([[5.3340e+01, 6.2258e-02, 6.5897e-02, 2.9214e-03],
        [5.3342e+01, 6.4352e-02, 6.5939e-02, 3.0170e-03]],
       grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.4297e+01, 1.1179e+01, 2.1253e-04, 3.0821e-06],
         [4.4050e+01, 4.6346e-02, 1.9583e-02, 1.9665e-02],
         ...,
         [5.3000e+01, 2.5669e-02, 1.2957e-06, 4.6794e-08],
         [4.8384e+01, 1.1580e-02, 1.5449e-06, 4.6014e-08],
         [3.2964e+01, 3.6365e-04, 6.6556e-01, 3.2059e-03]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.8365e+01, 1.1221e+01, 6.1702e-03, 3.5707e-04],
         [4.0293e+01, 7.4481e-01, 7.5967e-05, 1.7915e-06],
         ...,
         [5.2995e+01, 8.5445e-05, 1.5721e-03, 2.1758e-03],
         [4.8396e+01, 2.5487e-04, 2.0977e-03, 1.2349e-03],
         [3.2991e+01, 1.9065e-02, 2.82


Train Diffusion:  21%|██        | 411/2000 [19:07<1:08:29,  2.59s/it][A
Train Diffusion:  21%|██        | 412/2000 [19:10<1:07:02,  2.53s/it][A
Train Diffusion:  21%|██        | 413/2000 [19:12<1:06:46,  2.52s/it][A
Train Diffusion:  21%|██        | 414/2000 [19:15<1:05:17,  2.47s/it][A
Train Diffusion:  21%|██        | 415/2000 [19:17<1:05:51,  2.49s/it][A
Train Diffusion:  21%|██        | 416/2000 [19:20<1:05:01,  2.46s/it][A
Train Diffusion:  21%|██        | 417/2000 [19:22<1:04:03,  2.43s/it][A
Train Diffusion:  21%|██        | 418/2000 [19:24<1:03:17,  2.40s/it][A
Train Diffusion:  21%|██        | 419/2000 [19:27<1:02:47,  2.38s/it][A
Train Diffusion:  21%|██        | 420/2000 [19:29<1:04:22,  2.44s/it][A

Moving average norm loss at 420 iterations is: 4063.3503173828126. Best norm loss value is: 3905.809326171875.

C_PATH mean = tensor([[5.3418e+01, 6.0588e-02, 7.8879e-02, 3.1063e-03],
        [5.3418e+01, 6.2468e-02, 6.9712e-02, 3.2415e-03]],
       grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.4298e+01, 1.1197e+01, 6.0624e-03, 3.9595e-04],
         [4.3841e+01, 3.8604e-02, 6.8029e-05, 2.0308e-02],
         ...,
         [5.3261e+01, 7.5342e-05, 1.4016e-06, 5.7801e-08],
         [4.8657e+01, 1.1928e-02, 1.6709e-06, 1.4462e-03],
         [3.3159e+01, 3.4248e-04, 2.8854e-07, 3.4862e-03]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.8295e+01, 1.1225e+01, 1.9828e-04, 3.2209e-06],
         [4.0282e+01, 7.0713e-01, 2.0579e-02, 1.9403e-06],
         ...,
         [5.3259e+01, 2.8058e-02, 1.7351e-03, 2.6034e-03],
         [4.8666e+01, 2.2982e-04, 2.3293e-03, 5.3938e-08],
         [3.3176e+01, 1.9545e-02, 7.


Train Diffusion:  21%|██        | 421/2000 [19:32<1:05:17,  2.48s/it][A
Train Diffusion:  21%|██        | 422/2000 [19:34<1:06:50,  2.54s/it][A
Train Diffusion:  21%|██        | 423/2000 [19:37<1:07:23,  2.56s/it][A
Train Diffusion:  21%|██        | 424/2000 [19:40<1:12:33,  2.76s/it][A
Train Diffusion:  21%|██▏       | 425/2000 [19:43<1:11:46,  2.73s/it][A
Train Diffusion:  21%|██▏       | 426/2000 [19:46<1:17:43,  2.96s/it][A
Train Diffusion:  21%|██▏       | 427/2000 [19:49<1:14:48,  2.85s/it][A
Train Diffusion:  21%|██▏       | 428/2000 [19:52<1:11:42,  2.74s/it][A
Train Diffusion:  21%|██▏       | 429/2000 [19:54<1:10:38,  2.70s/it][A
Train Diffusion:  22%|██▏       | 430/2000 [19:57<1:12:03,  2.75s/it][A

Moving average norm loss at 430 iterations is: 4043.8619140625. Best norm loss value is: 3830.623291015625.

C_PATH mean = tensor([[5.3490e+01, 5.8947e-02, 7.8289e-02, 3.3551e-03],
        [5.3491e+01, 6.0055e-02, 8.1603e-02, 3.1264e-03]],
       grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.8476e+01, 1.0809e+01, 1.9839e-04, 3.3276e-06],
         [4.0615e+01, 6.3077e-01, 2.1883e-02, 2.0449e-06],
         ...,
         [5.3229e+01, 7.2893e-05, 1.8597e-03, 2.9876e-03],
         [4.8612e+01, 2.3065e-04, 2.4804e-03, 1.6433e-03],
         [3.2915e+01, 3.3882e-04, 7.2935e-01, 3.8001e-03]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.4513e+01, 1.0853e+01, 6.1208e-03, 4.1103e-04],
         [4.4138e+01, 2.7614e-02, 6.5869e-05, 1.9791e-02],
         ...,
         [5.3234e+01, 3.0098e-02, 1.4059e-06, 6.0458e-08],
         [4.8603e+01, 1.2225e-02, 1.6972e-06, 5.7375e-08],
         [3.2920e+01, 2.0085e-02, 3.187


Train Diffusion:  22%|██▏       | 431/2000 [20:01<1:19:00,  3.02s/it][A
Train Diffusion:  22%|██▏       | 432/2000 [20:04<1:22:01,  3.14s/it][A
Train Diffusion:  22%|██▏       | 433/2000 [20:09<1:32:53,  3.56s/it][A
Train Diffusion:  22%|██▏       | 434/2000 [20:13<1:36:30,  3.70s/it][A
Train Diffusion:  22%|██▏       | 435/2000 [20:16<1:36:54,  3.72s/it][A
Train Diffusion:  22%|██▏       | 436/2000 [20:19<1:27:25,  3.35s/it][A
Train Diffusion:  22%|██▏       | 437/2000 [20:22<1:26:50,  3.33s/it][A
Train Diffusion:  22%|██▏       | 438/2000 [20:25<1:20:14,  3.08s/it][A
Train Diffusion:  22%|██▏       | 439/2000 [20:27<1:15:10,  2.89s/it][A
Train Diffusion:  22%|██▏       | 440/2000 [20:30<1:15:29,  2.90s/it][A

Moving average norm loss at 440 iterations is: 3937.4370849609377. Best norm loss value is: 3742.967041015625.

C_PATH mean = tensor([[5.3353e+01, 6.3674e-02, 8.8377e-02, 3.1024e-03],
        [5.3351e+01, 6.7772e-02, 8.8666e-02, 3.2095e-03]],
       grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.8858e+01, 1.0425e+01, 2.2889e-04, 4.0607e-04],
         [4.1170e+01, 2.2307e-02, 7.6574e-05, 1.9534e-06],
         ...,
         [5.3129e+01, 3.5958e-02, 1.9254e-03, 4.6654e-08],
         [4.8428e+01, 1.3993e-02, 1.5825e-06, 1.6497e-03],
         [3.2437e+01, 3.8236e-04, 6.7631e-01, 3.5307e-05]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.4930e+01, 1.0262e+01, 7.0069e-03, 3.1974e-06],
         [4.4538e+01, 6.1676e-01, 2.5527e-02, 1.9152e-02],
         ...,
         [5.3098e+01, 8.4052e-05, 1.2966e-06, 3.0258e-03],
         [4.8429e+01, 2.6906e-04, 2.5551e-03, 4.4336e-08],
         [3.2442e+01, 2.3440e-02, 3.


Train Diffusion:  22%|██▏       | 441/2000 [20:33<1:14:07,  2.85s/it][A
Train Diffusion:  22%|██▏       | 442/2000 [20:35<1:11:08,  2.74s/it][A
Train Diffusion:  22%|██▏       | 443/2000 [20:38<1:09:55,  2.69s/it][A
Train Diffusion:  22%|██▏       | 444/2000 [20:40<1:08:01,  2.62s/it][A
Train Diffusion:  22%|██▏       | 445/2000 [20:43<1:06:22,  2.56s/it][A
Train Diffusion:  22%|██▏       | 446/2000 [20:45<1:06:01,  2.55s/it][A
Train Diffusion:  22%|██▏       | 447/2000 [20:48<1:05:05,  2.52s/it][A
Train Diffusion:  22%|██▏       | 448/2000 [20:50<1:04:15,  2.48s/it][A
Train Diffusion:  22%|██▏       | 449/2000 [20:53<1:03:44,  2.47s/it][A
Train Diffusion:  22%|██▎       | 450/2000 [20:55<1:04:04,  2.48s/it][A

Moving average norm loss at 450 iterations is: 3999.63232421875. Best norm loss value is: 3742.967041015625.

C_PATH mean = tensor([[5.3479e+01, 6.2370e-02, 9.5922e-02, 3.5530e-03],
        [5.3479e+01, 5.8124e-02, 9.6663e-02, 3.4728e-03]],
       grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.4814e+01, 1.0560e+01, 6.5119e-03, 4.5322e-04],
         [4.4271e+01, 5.7871e-01, 6.2733e-05, 2.1958e-06],
         ...,
         [5.3313e+01, 3.7902e-02, 1.4544e-06, 6.4882e-08],
         [4.8710e+01, 2.2840e-04, 2.9369e-03, 2.0703e-03],
         [3.2776e+01, 2.3025e-02, 3.5447e-07, 3.8381e-05]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.8679e+01, 1.0433e+01, 1.9819e-04, 3.4825e-06],
         [4.1033e+01, 2.0216e-02, 2.6070e-02, 1.9482e-02],
         ...,
         [5.3313e+01, 6.7588e-05, 2.1360e-03, 3.9356e-03],
         [4.8707e+01, 1.3641e-02, 1.7072e-06, 6.0613e-08],
         [3.2768e+01, 3.3223e-04, 7.28


Train Diffusion:  23%|██▎       | 451/2000 [20:58<1:04:29,  2.50s/it][A
Train Diffusion:  23%|██▎       | 452/2000 [21:00<1:05:12,  2.53s/it][A
Train Diffusion:  23%|██▎       | 453/2000 [21:03<1:07:00,  2.60s/it][A
Train Diffusion:  23%|██▎       | 454/2000 [21:06<1:11:48,  2.79s/it][A
Train Diffusion:  23%|██▎       | 455/2000 [21:10<1:19:24,  3.08s/it][A
Train Diffusion:  23%|██▎       | 456/2000 [21:12<1:14:35,  2.90s/it][A
Train Diffusion:  23%|██▎       | 457/2000 [21:15<1:11:48,  2.79s/it][A
Train Diffusion:  23%|██▎       | 458/2000 [21:17<1:09:12,  2.69s/it][A
Train Diffusion:  23%|██▎       | 459/2000 [21:21<1:15:10,  2.93s/it][A
Train Diffusion:  23%|██▎       | 460/2000 [21:24<1:17:25,  3.02s/it][A

Moving average norm loss at 460 iterations is: 3774.076123046875. Best norm loss value is: 3648.6787109375.

C_PATH mean = tensor([[5.3436e+01, 6.3703e-02, 1.0433e-01, 3.3071e-03],
        [5.3436e+01, 6.2094e-02, 1.0282e-01, 3.5965e-03]],
       grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.4921e+01, 1.0470e+01, 2.0539e-04, 4.6449e-04],
         [4.4417e+01, 5.6131e-01, 2.9239e-02, 2.3015e-06],
         ...,
         [5.3423e+01, 6.7631e-05, 1.4624e-06, 4.1388e-03],
         [4.8836e+01, 1.4878e-02, 1.7612e-06, 2.1663e-03],
         [3.2803e+01, 3.4059e-04, 6.9595e-01, 4.5572e-03]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.8846e+01, 1.0346e+01, 6.8737e-03, 3.5755e-06],
         [4.1069e+01, 1.8821e-02, 6.3153e-05, 1.8789e-02],
         ...,
         [5.3428e+01, 4.3870e-02, 2.2544e-03, 6.4079e-08],
         [4.8843e+01, 2.3406e-04, 3.0620e-03, 5.7886e-08],
         [3.2800e+01, 2.5340e-02, 3.775


Train Diffusion:  23%|██▎       | 461/2000 [21:27<1:12:57,  2.84s/it][A
Train Diffusion:  23%|██▎       | 462/2000 [21:30<1:14:38,  2.91s/it][A
Train Diffusion:  23%|██▎       | 463/2000 [21:32<1:11:48,  2.80s/it][A
Train Diffusion:  23%|██▎       | 464/2000 [21:35<1:13:38,  2.88s/it][A
Train Diffusion:  23%|██▎       | 465/2000 [21:38<1:15:43,  2.96s/it][A
Train Diffusion:  23%|██▎       | 466/2000 [21:41<1:15:20,  2.95s/it][A
Train Diffusion:  23%|██▎       | 467/2000 [21:45<1:19:49,  3.12s/it][A
Train Diffusion:  23%|██▎       | 468/2000 [21:48<1:20:40,  3.16s/it][A
Train Diffusion:  23%|██▎       | 469/2000 [21:52<1:23:12,  3.26s/it][A
Train Diffusion:  24%|██▎       | 470/2000 [21:54<1:17:33,  3.04s/it][A

Moving average norm loss at 470 iterations is: 3995.2879638671875. Best norm loss value is: 3590.313232421875.

C_PATH mean = tensor([[5.3309e+01, 6.4794e-02, 1.1751e-01, 3.6281e-03],
        [5.3309e+01, 6.7867e-02, 1.1655e-01, 3.7214e-03]],
       grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.9176e+01, 1.0014e+01, 7.5244e-03, 4.9097e-04],
         [4.1525e+01, 5.4787e-01, 6.5960e-05, 2.2785e-06],
         ...,
         [5.3428e+01, 6.9073e-05, 1.4457e-06, 5.7248e-08],
         [4.8835e+01, 1.6748e-02, 3.4694e-03, 2.4729e-03],
         [3.2598e+01, 2.8948e-02, 7.3228e-01, 3.7483e-05]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.5292e+01, 1.0235e+01, 2.2438e-04, 3.5258e-06],
         [4.4661e+01, 1.5158e-02, 3.4464e-02, 1.9085e-02],
         ...,
         [5.3426e+01, 5.3519e-02, 2.4562e-03, 4.8903e-03],
         [4.8832e+01, 2.5585e-04, 1.6815e-06, 5.0305e-08],
         [3.2587e+01, 3.6569e-04, 3.


Train Diffusion:  24%|██▎       | 471/2000 [21:57<1:13:01,  2.87s/it][A
Train Diffusion:  24%|██▎       | 472/2000 [22:00<1:13:30,  2.89s/it][A
Train Diffusion:  24%|██▎       | 473/2000 [22:02<1:11:57,  2.83s/it][A
Train Diffusion:  24%|██▎       | 474/2000 [22:05<1:14:10,  2.92s/it][A
Train Diffusion:  24%|██▍       | 475/2000 [22:09<1:21:47,  3.22s/it][A
Train Diffusion:  24%|██▍       | 476/2000 [22:14<1:30:00,  3.54s/it][A
Train Diffusion:  24%|██▍       | 477/2000 [22:17<1:28:33,  3.49s/it][A
Train Diffusion:  24%|██▍       | 478/2000 [22:21<1:32:54,  3.66s/it][A
Train Diffusion:  24%|██▍       | 479/2000 [22:26<1:42:00,  4.02s/it][A
Train Diffusion:  24%|██▍       | 480/2000 [22:29<1:34:34,  3.73s/it][A

Moving average norm loss at 480 iterations is: 4119.270654296875. Best norm loss value is: 3590.313232421875.

C_PATH mean = tensor([[5.3488e+01, 5.8501e-02, 1.2034e-01, 3.3676e-03],
        [5.3481e+01, 6.3447e-02, 1.0323e-01, 3.3999e-03]],
       grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.5535e+01, 9.7209e+00, 2.2102e-04, 3.7207e-06],
         [4.5418e+01, 5.0202e-01, 6.4376e-05, 2.4521e-06],
         ...,
         [5.3666e+01, 6.7082e-05, 1.5305e-06, 5.0404e-03],
         [4.8813e+01, 1.6623e-02, 3.5221e-03, 2.5823e-03],
         [3.2403e+01, 3.4856e-04, 4.3533e-07, 4.0076e-05]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.9589e+01, 1.0128e+01, 7.3101e-03, 4.8858e-04],
         [4.1742e+01, 1.1800e-02, 3.5484e-02, 1.6608e-02],
         ...,
         [5.3448e+01, 5.5941e-02, 2.4599e-03, 6.3426e-08],
         [4.8815e+01, 2.4557e-04, 1.7820e-06, 5.7127e-08],
         [3.2417e+01, 2.9076e-02, 6.4


Train Diffusion:  24%|██▍       | 481/2000 [22:32<1:30:15,  3.56s/it][A
Train Diffusion:  24%|██▍       | 482/2000 [22:35<1:27:38,  3.46s/it][A
Train Diffusion:  24%|██▍       | 483/2000 [22:39<1:32:05,  3.64s/it][A
Train Diffusion:  24%|██▍       | 484/2000 [22:43<1:34:18,  3.73s/it][A
Train Diffusion:  24%|██▍       | 485/2000 [22:47<1:35:59,  3.80s/it][A
Train Diffusion:  24%|██▍       | 486/2000 [22:51<1:34:14,  3.73s/it][A
Train Diffusion:  24%|██▍       | 487/2000 [22:54<1:28:30,  3.51s/it][A
Train Diffusion:  24%|██▍       | 488/2000 [22:57<1:22:26,  3.27s/it][A
Train Diffusion:  24%|██▍       | 489/2000 [22:59<1:17:22,  3.07s/it][A
Train Diffusion:  24%|██▍       | 490/2000 [23:02<1:17:04,  3.06s/it][A

Moving average norm loss at 490 iterations is: 3916.4578857421875. Best norm loss value is: 3577.6298828125.

C_PATH mean = tensor([[5.3566e+01, 6.2103e-02, 1.1672e-01, 3.4452e-03],
        [5.3559e+01, 5.4646e-02, 1.1680e-01, 3.5738e-03]],
       grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.9867e+01, 9.9844e+00, 2.0804e-04, 3.9819e-06],
         [4.1946e+01, 4.6377e-01, 3.8523e-02, 1.6050e-02],
         ...,
         [5.3755e+01, 6.2837e-02, 1.6569e-06, 7.5913e-08],
         [4.8827e+01, 1.7156e-02, 1.9192e-06, 2.9726e-03],
         [3.2333e+01, 3.0458e-02, 4.7432e-07, 4.3167e-05]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.5717e+01, 9.5749e+00, 7.2527e-03, 5.1619e-04],
         [4.5830e+01, 9.9177e-03, 5.8020e-05, 2.6596e-06],
         ...,
         [5.3478e+01, 5.8993e-05, 2.6228e-03, 5.8638e-03],
         [4.8833e+01, 2.3306e-04, 3.8132e-03, 6.5209e-08],
         [3.2330e+01, 3.2638e-04, 6.51


Train Diffusion:  25%|██▍       | 491/2000 [23:05<1:15:20,  3.00s/it][A
Train Diffusion:  25%|██▍       | 492/2000 [23:09<1:20:58,  3.22s/it][A
Train Diffusion:  25%|██▍       | 493/2000 [23:12<1:20:13,  3.19s/it][A
Train Diffusion:  25%|██▍       | 494/2000 [23:15<1:19:14,  3.16s/it][A
Train Diffusion:  25%|██▍       | 495/2000 [23:18<1:17:22,  3.08s/it][A
Train Diffusion:  25%|██▍       | 496/2000 [23:21<1:15:11,  3.00s/it][A
Train Diffusion:  25%|██▍       | 497/2000 [23:24<1:14:55,  2.99s/it][A
Train Diffusion:  25%|██▍       | 498/2000 [23:27<1:18:16,  3.13s/it][A
Train Diffusion:  25%|██▍       | 499/2000 [23:30<1:17:00,  3.08s/it][A
Train Diffusion:  25%|██▌       | 500/2000 [23:33<1:15:58,  3.04s/it][A

Moving average norm loss at 500 iterations is: 3845.7297607421874. Best norm loss value is: 3505.509033203125.

C_PATH mean = tensor([[5.3374e+01, 6.9367e-02, 1.3321e-01, 3.6550e-03],
        [5.3374e+01, 6.1486e-02, 1.3559e-01, 3.7857e-03]],
       grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [1.6154e+01, 9.5556e+00, 2.2561e-04, 3.7386e-06],
         [4.5976e+01, 7.7644e-03, 4.7546e-02, 2.5836e-06],
         ...,
         [5.3439e+01, 7.9972e-02, 1.5046e-06, 6.2343e-08],
         [4.8768e+01, 2.0068e-02, 1.7546e-06, 3.3267e-03],
         [3.2022e+01, 3.6375e-02, 7.1381e-01, 6.0389e-03]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [2.0189e+01, 9.1385e+00, 8.2000e-03, 5.4007e-04],
         [4.2649e+01, 4.3318e-01, 5.9860e-05, 1.6641e-02],
         ...,
         [5.3445e+01, 6.0238e-05, 2.8995e-03, 6.7688e-03],
         [4.8763e+01, 2.4970e-04, 4.2823e-03, 5.3902e-08],
         [3.2026e+01, 3.4717e-04, 4.


Train Diffusion:  25%|██▌       | 501/2000 [23:36<1:15:35,  3.03s/it][A
Train Diffusion:  25%|██▌       | 502/2000 [23:39<1:15:17,  3.02s/it][A
Train Diffusion:  25%|██▌       | 503/2000 [23:42<1:17:54,  3.12s/it][A
Train Diffusion:  25%|██▌       | 504/2000 [23:45<1:15:06,  3.01s/it][A
Train Diffusion:  25%|██▌       | 505/2000 [23:48<1:14:12,  2.98s/it][A
Train Diffusion:  25%|██▌       | 506/2000 [23:51<1:12:57,  2.93s/it][A
Train Diffusion:  25%|██▌       | 507/2000 [23:54<1:12:57,  2.93s/it][A
Train Diffusion:  25%|██▌       | 508/2000 [23:57<1:12:44,  2.93s/it][A
Train Diffusion:  25%|██▌       | 509/2000 [24:00<1:12:42,  2.93s/it][A
Train Diffusion:  26%|██▌       | 510/2000 [24:02<1:11:19,  2.87s/it][A

Moving average ELBO loss at 510 iterations is: 147911.9734375. Best ELBO loss value is: 42540.53125.

C_PATH mean = tensor([[5.1654e+01, 7.0482e-02, 1.0427e-01, 3.9834e-03],
        [5.1769e+01, 7.2254e-02, 1.0304e-01, 4.0888e-03]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [2.1624e+01, 3.9106e-04, 1.4840e-02, 7.8336e-06],
         [4.9765e+01, 6.8891e-02, 5.9349e-02, 5.6542e-06],
         ...,
         [4.9929e+01, 3.2032e-04, 3.5714e-06, 7.5048e-08],
         [3.7359e+01, 9.6311e-04, 4.0345e-06, 1.3068e-07],
         [1.2701e+01, 4.7258e-02, 1.1452e-06, 2.3824e-04]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [2.1624e+01, 1.8315e+00, 9.1032e-04, 7.5813e-04],
         [4.9756e+01, 1.2775e-07, 3.0704e-04, 1.7569e-02],
         ...,
         [5.2232e+01, 8.7876e-02, 3.6520e-03, 8.4124e-03],
         [4.6580e+01, 2.9709e-02, 5.2934e-03, 4.9385e-03],
         [2.7790e+01, 1.2730e-03, 5.7544e-01,


Train Diffusion:  26%|██▌       | 511/2000 [24:05<1:09:22,  2.80s/it][A
Train Diffusion:  26%|██▌       | 512/2000 [24:08<1:07:44,  2.73s/it][A
Train Diffusion:  26%|██▌       | 513/2000 [24:10<1:08:31,  2.76s/it][A
Train Diffusion:  26%|██▌       | 514/2000 [24:13<1:07:28,  2.72s/it][A
Train Diffusion:  26%|██▌       | 515/2000 [24:16<1:07:46,  2.74s/it][A
Train Diffusion:  26%|██▌       | 516/2000 [24:19<1:10:53,  2.87s/it][A
Train Diffusion:  26%|██▌       | 517/2000 [24:22<1:11:12,  2.88s/it][A
Train Diffusion:  26%|██▌       | 518/2000 [24:25<1:15:20,  3.05s/it][A
Train Diffusion:  26%|██▌       | 519/2000 [24:28<1:15:57,  3.08s/it][A
Train Diffusion:  26%|██▌       | 520/2000 [24:32<1:20:09,  3.25s/it][A

Moving average ELBO loss at 520 iterations is: 54305.026171875. Best ELBO loss value is: 25856.21875.

C_PATH mean = tensor([[5.1506e+01, 5.0326e-02, 1.0693e-01, 5.3589e-03],
        [5.1519e+01, 4.7712e-02, 1.1529e-01, 5.1384e-03]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [2.0274e+01, 2.2134e-01, 3.3190e-04, 1.0830e-03],
         [4.8224e+01, 7.9872e-06, 8.9355e-05, 1.9215e-02],
         ...,
         [4.9580e+01, 9.4470e-05, 5.1143e-03, 5.3091e-07],
         [4.2673e+01, 3.6025e-04, 7.2449e-03, 6.9753e-03],
         [1.7583e+01, 4.1097e-02, 1.9637e-06, 1.8702e-04]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [2.2358e+01, 1.2404e-02, 9.3168e-03, 1.2998e-05],
         [5.1475e+01, 1.3569e-02, 5.0814e-02, 9.5500e-06],
         ...,
         [5.0223e+01, 7.6910e-02, 5.4512e-06, 1.2806e-02],
         [4.0804e+01, 2.2641e-02, 6.3896e-06, 5.0896e-07],
         [1.9661e+01, 4.9273e-04, 5.6116e-01


Train Diffusion:  26%|██▌       | 521/2000 [24:35<1:19:09,  3.21s/it][A
Train Diffusion:  26%|██▌       | 522/2000 [24:40<1:27:46,  3.56s/it][A
Train Diffusion:  26%|██▌       | 523/2000 [24:43<1:24:49,  3.45s/it][A
Train Diffusion:  26%|██▌       | 524/2000 [24:46<1:20:06,  3.26s/it][A
Train Diffusion:  26%|██▋       | 525/2000 [24:48<1:13:39,  3.00s/it][A
Train Diffusion:  26%|██▋       | 526/2000 [24:50<1:08:37,  2.79s/it][A
Train Diffusion:  26%|██▋       | 527/2000 [24:53<1:05:06,  2.65s/it][A
Train Diffusion:  26%|██▋       | 528/2000 [24:55<1:02:33,  2.55s/it][A
Train Diffusion:  26%|██▋       | 529/2000 [24:57<1:00:50,  2.48s/it][A
Train Diffusion:  26%|██▋       | 530/2000 [25:00<59:34,  2.43s/it]  [A

Moving average ELBO loss at 530 iterations is: 48903.0548828125. Best ELBO loss value is: 22380.32421875.

C_PATH mean = tensor([[5.1753e+01, 1.7080e-02, 4.7689e-02, 3.9572e-03],
        [5.1752e+01, 1.7103e-02, 4.9721e-02, 4.0878e-03]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [2.1334e+01, 1.8259e-01, 1.8293e-04, 1.1145e-03],
         [4.7990e+01, 9.9233e-03, 2.3239e-02, 1.2351e-02],
         ...,
         [5.0475e+01, 2.8170e-02, 9.9745e-06, 1.2421e-02],
         [4.0763e+01, 1.1671e-02, 1.0406e-05, 2.8077e-06],
         [1.8846e+01, 2.8704e-04, 3.9839e-06, 3.2792e-04]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [2.4047e+01, 1.8910e-02, 5.1058e-03, 2.3746e-05],
         [5.4057e+01, 1.2805e-05, 5.8688e-05, 1.7379e-05],
         ...,
         [5.1560e+01, 7.0562e-05, 4.6180e-03, 2.7358e-06],
         [4.3374e+01, 1.9614e-04, 6.5831e-03, 7.2800e-03],
         [1.7662e+01, 1.9046e-02, 2.9317


Train Diffusion:  27%|██▋       | 531/2000 [25:02<59:03,  2.41s/it][A
Train Diffusion:  27%|██▋       | 532/2000 [25:04<58:15,  2.38s/it][A
Train Diffusion:  27%|██▋       | 533/2000 [25:07<57:38,  2.36s/it][A
Train Diffusion:  27%|██▋       | 534/2000 [25:09<57:15,  2.34s/it][A
Train Diffusion:  27%|██▋       | 535/2000 [25:11<56:57,  2.33s/it][A
Train Diffusion:  27%|██▋       | 536/2000 [25:14<56:46,  2.33s/it][A
Train Diffusion:  27%|██▋       | 537/2000 [25:16<56:37,  2.32s/it][A
Train Diffusion:  27%|██▋       | 538/2000 [25:18<56:26,  2.32s/it][A
Train Diffusion:  27%|██▋       | 539/2000 [25:20<56:21,  2.31s/it][A
Train Diffusion:  27%|██▋       | 540/2000 [25:23<56:18,  2.31s/it][A

Moving average ELBO loss at 540 iterations is: 36706.4595703125. Best ELBO loss value is: 17043.05859375.

C_PATH mean = tensor([[5.0329e+01, 2.6358e-02, 7.4091e-02, 7.2720e-03],
        [5.0336e+01, 2.6178e-02, 8.1253e-02, 8.0002e-03]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [2.5037e+01, 1.7034e-01, 8.0116e-03, 2.2321e-03],
         [5.2310e+01, 2.7590e-05, 8.1625e-05, 3.1930e-05],
         ...,
         [4.9368e+01, 4.4251e-02, 1.6608e-05, 2.2807e-02],
         [4.1289e+01, 2.0009e-02, 1.1849e-02, 1.7065e-02],
         [1.7842e+01, 4.8416e-04, 7.4282e-06, 3.1940e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [3.0869e+01, 2.1194e-02, 3.1996e-04, 4.1560e-05],
         [5.8018e+01, 1.1737e-02, 4.1630e-02, 2.1510e-02],
         ...,
         [4.9635e+01, 1.0667e-04, 1.0118e-02, 6.1660e-06],
         [4.1181e+01, 3.1613e-04, 2.2789e-05, 4.3379e-06],
         [1.7618e+01, 3.2491e-02, 4.8110


Train Diffusion:  27%|██▋       | 541/2000 [25:25<56:30,  2.32s/it][A
Train Diffusion:  27%|██▋       | 542/2000 [25:27<56:22,  2.32s/it][A
Train Diffusion:  27%|██▋       | 543/2000 [25:30<56:22,  2.32s/it][A
Train Diffusion:  27%|██▋       | 544/2000 [25:32<56:21,  2.32s/it][A
Train Diffusion:  27%|██▋       | 545/2000 [25:34<56:22,  2.32s/it][A
Train Diffusion:  27%|██▋       | 546/2000 [25:37<56:35,  2.34s/it][A
Train Diffusion:  27%|██▋       | 547/2000 [25:39<56:30,  2.33s/it][A
Train Diffusion:  27%|██▋       | 548/2000 [25:41<56:23,  2.33s/it][A
Train Diffusion:  27%|██▋       | 549/2000 [25:44<56:13,  2.33s/it][A
Train Diffusion:  28%|██▊       | 550/2000 [25:46<56:09,  2.32s/it][A

Moving average ELBO loss at 550 iterations is: 15401.1630859375. Best ELBO loss value is: 12319.7861328125.

C_PATH mean = tensor([[5.0336e+01, 3.1932e-02, 6.2331e-02, 5.1908e-03],
        [5.0338e+01, 3.1117e-02, 6.2009e-02, 5.3821e-03]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [2.4303e+01, 4.5754e-01, 1.2035e-02, 2.9394e-03],
         [5.1075e+01, 9.6927e-05, 5.1394e-02, 4.8837e-05],
         ...,
         [4.9166e+01, 4.5591e-02, 9.3079e-03, 4.6775e-06],
         [4.0538e+01, 2.4901e-02, 1.3281e-02, 3.8804e-06],
         [1.9321e+01, 4.5426e-02, 4.4712e-01, 4.3194e-04]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [2.9535e+01, 5.8610e-02, 4.0397e-04, 3.6837e-05],
         [5.4844e+01, 2.5102e-02, 1.1830e-04, 1.5213e-02],
         ...,
         [4.9744e+01, 2.0930e-04, 2.1482e-05, 1.6740e-02],
         [4.2059e+01, 4.5430e-04, 1.9005e-05, 1.3041e-02],
         [1.7982e+01, 4.9748e-04, 7.36


Train Diffusion:  28%|██▊       | 551/2000 [25:48<56:23,  2.33s/it][A
Train Diffusion:  28%|██▊       | 552/2000 [25:51<56:10,  2.33s/it][A
Train Diffusion:  28%|██▊       | 553/2000 [25:53<56:03,  2.32s/it][A
Train Diffusion:  28%|██▊       | 554/2000 [25:55<56:02,  2.33s/it][A
Train Diffusion:  28%|██▊       | 555/2000 [25:58<55:57,  2.32s/it][A
Train Diffusion:  28%|██▊       | 556/2000 [26:00<56:09,  2.33s/it][A
Train Diffusion:  28%|██▊       | 557/2000 [26:02<55:58,  2.33s/it][A
Train Diffusion:  28%|██▊       | 558/2000 [26:05<55:49,  2.32s/it][A
Train Diffusion:  28%|██▊       | 559/2000 [26:07<55:43,  2.32s/it][A
Train Diffusion:  28%|██▊       | 560/2000 [26:09<55:37,  2.32s/it][A

Moving average ELBO loss at 560 iterations is: 11305.0142578125. Best ELBO loss value is: 9062.3515625.

C_PATH mean = tensor([[4.9535e+01, 3.5287e-02, 5.3336e-02, 6.7565e-03],
        [4.9559e+01, 3.5981e-02, 5.3167e-02, 6.2587e-03]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [4.4844e+01, 1.0541e-01, 9.9088e-04, 5.4885e-03],
         [6.1593e+01, 2.7430e-04, 2.3533e-04, 1.4671e-04],
         ...,
         [4.7871e+01, 2.8748e-04, 1.2888e-02, 1.6373e-02],
         [3.9542e+01, 2.0211e-02, 1.8769e-02, 1.5350e-05],
         [1.9267e+01, 5.1062e-02, 4.2218e-01, 1.7219e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [3.8373e+01, 6.1969e-01, 1.1186e-02, 6.5775e-05],
         [6.0422e+01, 4.6120e-02, 5.7778e-02, 1.3340e-02],
         ...,
         [4.8770e+01, 5.5687e-02, 5.7947e-05, 2.6432e-05],
         [4.0635e+01, 1.1276e-03, 3.9498e-05, 1.3157e-02],
         [1.7904e+01, 8.9566e-04, 1.5348e-


Train Diffusion:  28%|██▊       | 561/2000 [26:12<58:26,  2.44s/it][A
Train Diffusion:  28%|██▊       | 562/2000 [26:15<59:15,  2.47s/it][A
Train Diffusion:  28%|██▊       | 563/2000 [26:17<59:02,  2.47s/it][A
Train Diffusion:  28%|██▊       | 564/2000 [26:20<59:18,  2.48s/it][A
Train Diffusion:  28%|██▊       | 565/2000 [26:22<1:00:48,  2.54s/it][A
Train Diffusion:  28%|██▊       | 566/2000 [26:25<1:01:01,  2.55s/it][A
Train Diffusion:  28%|██▊       | 567/2000 [26:27<59:25,  2.49s/it]  [A
Train Diffusion:  28%|██▊       | 568/2000 [26:30<59:12,  2.48s/it][A
Train Diffusion:  28%|██▊       | 569/2000 [26:32<1:00:21,  2.53s/it][A
Train Diffusion:  28%|██▊       | 570/2000 [26:35<1:01:37,  2.59s/it][A

Moving average ELBO loss at 570 iterations is: 6019.024560546875. Best ELBO loss value is: 2800.32080078125.

C_PATH mean = tensor([[4.8529e+01, 3.4623e-02, 4.7705e-02, 8.2416e-03],
        [4.8520e+01, 3.4521e-02, 4.9733e-02, 8.7089e-03]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [4.2449e+01, 2.9554e-01, 1.4858e-02, 2.5602e-04],
         [5.2971e+01, 5.1321e-04, 4.1019e-04, 1.7428e-02],
         ...,
         [4.6325e+01, 1.0007e-03, 2.4370e-02, 1.7932e-02],
         [3.8150e+01, 1.3529e-03, 3.0919e-04, 7.3161e-05],
         [1.8825e+01, 1.2421e-03, 9.7259e-05, 1.2224e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [3.2034e+01, 6.8924e-01, 1.7060e-03, 9.5140e-03],
         [5.7761e+01, 3.7146e-02, 6.0121e-02, 5.1806e-04],
         ...,
         [4.7262e+01, 4.3525e-02, 1.3958e-04, 1.4238e-04],
         [3.9005e+01, 3.4078e-02, 1.9197e-02, 1.6463e-02],
         [1.8518e+01, 5.7751e-02, 3.4


Train Diffusion:  29%|██▊       | 571/2000 [26:38<1:03:13,  2.65s/it][A
Train Diffusion:  29%|██▊       | 572/2000 [26:40<1:00:36,  2.55s/it][A
Train Diffusion:  29%|██▊       | 573/2000 [26:42<58:48,  2.47s/it]  [A
Train Diffusion:  29%|██▊       | 574/2000 [26:45<58:01,  2.44s/it][A
Train Diffusion:  29%|██▉       | 575/2000 [26:47<56:55,  2.40s/it][A
Train Diffusion:  29%|██▉       | 576/2000 [26:49<56:08,  2.37s/it][A
Train Diffusion:  29%|██▉       | 577/2000 [26:52<55:27,  2.34s/it][A
Train Diffusion:  29%|██▉       | 578/2000 [26:54<55:10,  2.33s/it][A
Train Diffusion:  29%|██▉       | 579/2000 [26:56<54:43,  2.31s/it][A
Train Diffusion:  29%|██▉       | 580/2000 [26:58<54:28,  2.30s/it][A

Moving average ELBO loss at 580 iterations is: 80.492578125. Best ELBO loss value is: -1687.5234375.

C_PATH mean = tensor([[4.7144e+01, 5.8761e-02, 4.8401e-02, 8.5907e-03],
        [4.7122e+01, 5.6684e-02, 4.9028e-02, 8.6530e-03]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.6596e+01, 5.7526e-02, 2.3203e-02, 8.6451e-03],
         [6.5094e+01, 1.7044e-02, 1.0644e-03, 6.8481e-04],
         ...,
         [4.4790e+01, 5.7225e-02, 1.8915e-02, 2.5416e-02],
         [3.7085e+01, 2.9884e-03, 2.8872e-02, 3.3290e-04],
         [1.8109e+01, 3.5639e-03, 2.9237e-04, 1.6434e-03]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [4.2269e+01, 1.0475e-01, 4.6034e-03, 8.6584e-04],
         [6.1201e+01, 2.0384e-04, 7.4309e-02, 2.1805e-02],
         ...,
         [4.4612e+01, 1.6168e-03, 8.8771e-04, 2.1840e-04],
         [3.6172e+01, 4.2808e-02, 5.8560e-04, 1.3854e-02],
         [1.4902e+01, 9.0795e-02, 2.5631e-01,


Train Diffusion:  29%|██▉       | 581/2000 [27:01<54:17,  2.30s/it][A
Train Diffusion:  29%|██▉       | 582/2000 [27:03<54:14,  2.30s/it][A
Train Diffusion:  29%|██▉       | 583/2000 [27:05<54:01,  2.29s/it][A
Train Diffusion:  29%|██▉       | 584/2000 [27:08<53:56,  2.29s/it][A
Train Diffusion:  29%|██▉       | 585/2000 [27:10<53:48,  2.28s/it][A
Train Diffusion:  29%|██▉       | 586/2000 [27:12<53:43,  2.28s/it][A
Train Diffusion:  29%|██▉       | 587/2000 [27:14<53:50,  2.29s/it][A
Train Diffusion:  29%|██▉       | 588/2000 [27:17<53:45,  2.28s/it][A
Train Diffusion:  29%|██▉       | 589/2000 [27:19<53:36,  2.28s/it][A
Train Diffusion:  30%|██▉       | 590/2000 [27:21<53:30,  2.28s/it][A

Moving average ELBO loss at 590 iterations is: -1047.3279296875. Best ELBO loss value is: -3052.9931640625.

C_PATH mean = tensor([[4.7271e+01, 4.5498e-02, 5.1727e-02, 9.3691e-03],
        [4.7236e+01, 4.4534e-02, 5.1609e-02, 9.3657e-03]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [4.9720e+01, 5.6431e-02, 3.9323e-03, 2.2413e-02],
         [5.9147e+01, 4.2266e-04, 1.8495e-03, 3.2740e-03],
         ...,
         [4.4351e+01, 4.3928e-03, 2.5963e-02, 1.8364e-02],
         [3.5673e+01, 4.5446e-03, 3.4529e-02, 2.0850e-02],
         [1.5116e+01, 6.9805e-02, 2.6998e-01, 2.4703e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.8103e+01, 4.8409e-01, 3.1596e-02, 7.5803e-04],
         [6.2466e+01, 2.5457e-02, 8.9461e-02, 2.0364e-02],
         ...,
         [4.4798e+01, 5.8539e-02, 1.8858e-03, 9.6655e-04],
         [3.7563e+01, 5.7138e-02, 1.3642e-03, 4.7285e-04],
         [2.0178e+01, 6.2560e-03, 2.97


Train Diffusion:  30%|██▉       | 591/2000 [27:24<53:28,  2.28s/it][A
Train Diffusion:  30%|██▉       | 592/2000 [27:26<53:24,  2.28s/it][A
Train Diffusion:  30%|██▉       | 593/2000 [27:28<53:21,  2.28s/it][A
Train Diffusion:  30%|██▉       | 594/2000 [27:30<53:30,  2.28s/it][A
Train Diffusion:  30%|██▉       | 595/2000 [27:33<53:26,  2.28s/it][A
Train Diffusion:  30%|██▉       | 596/2000 [27:35<53:27,  2.28s/it][A
Train Diffusion:  30%|██▉       | 597/2000 [27:37<53:46,  2.30s/it][A
Train Diffusion:  30%|██▉       | 598/2000 [27:40<53:37,  2.29s/it][A
Train Diffusion:  30%|██▉       | 599/2000 [27:42<53:24,  2.29s/it][A
Train Diffusion:  30%|███       | 600/2000 [27:44<53:21,  2.29s/it][A

Moving average ELBO loss at 600 iterations is: -1820.085009765625. Best ELBO loss value is: -3321.3095703125.

C_PATH mean = tensor([[4.6431e+01, 4.7918e-02, 5.6171e-02, 9.9607e-03],
        [4.6452e+01, 4.7606e-02, 5.8805e-02, 1.0867e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [4.5895e+01, 7.8983e-03, 6.4728e-03, 1.2771e-03],
         [5.5979e+01, 1.8973e-04, 7.7416e-02, 2.0501e-02],
         ...,
         [4.2512e+01, 6.3138e-02, 3.1346e-02, 2.1393e-03],
         [3.3713e+01, 7.9402e-02, 1.6701e-03, 9.7745e-04],
         [1.3014e+01, 1.2119e-02, 7.0454e-04, 2.1064e-03]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.8814e+01, 5.7960e-02, 5.2265e-02, 3.8976e-02],
         [5.9240e+01, 1.6877e-02, 4.6529e-03, 6.0662e-03],
         ...,
         [4.3375e+01, 4.9596e-03, 4.0990e-03, 1.3966e-02],
         [3.5484e+01, 7.0495e-03, 3.8861e-02, 2.0663e-02],
         [1.6560e+01, 7.3301e-02, 3.


Train Diffusion:  30%|███       | 601/2000 [27:46<53:19,  2.29s/it][A
Train Diffusion:  30%|███       | 602/2000 [27:49<53:13,  2.28s/it][A
Train Diffusion:  30%|███       | 603/2000 [27:51<53:07,  2.28s/it][A
Train Diffusion:  30%|███       | 604/2000 [27:53<53:05,  2.28s/it][A
Train Diffusion:  30%|███       | 605/2000 [27:56<53:01,  2.28s/it][A
Train Diffusion:  30%|███       | 606/2000 [27:58<52:59,  2.28s/it][A
Train Diffusion:  30%|███       | 607/2000 [28:00<52:59,  2.28s/it][A
Train Diffusion:  30%|███       | 608/2000 [28:02<52:58,  2.28s/it][A
Train Diffusion:  30%|███       | 609/2000 [28:05<52:54,  2.28s/it][A
Train Diffusion:  30%|███       | 610/2000 [28:07<52:50,  2.28s/it][A

Moving average ELBO loss at 610 iterations is: -5057.582055664063. Best ELBO loss value is: -7683.31689453125.

C_PATH mean = tensor([[4.6299e+01, 5.9105e-02, 7.4537e-02, 1.1181e-02],
        [4.6288e+01, 5.7323e-02, 6.9683e-02, 1.0271e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2339e+01, 8.5580e-02, 4.5350e-02, 4.4403e-02],
         [5.0933e+01, 1.7831e-02, 4.7138e-03, 6.9555e-02],
         ...,
         [4.2530e+01, 3.6857e-03, 3.4089e-03, 1.8029e-02],
         [3.4340e+01, 5.7909e-02, 4.8970e-02, 1.1030e-03],
         [1.6928e+01, 9.0979e-03, 2.6945e-01, 1.4647e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [4.1659e+01, 4.1647e-02, 1.4649e-02, 2.5088e-03],
         [5.4104e+01, 1.9118e-04, 1.0689e-01, 2.0303e-03],
         ...,
         [4.2425e+01, 1.5787e-01, 8.2676e-02, 1.4110e-03],
         [3.5853e+01, 2.9840e-02, 7.1777e-03, 1.9716e-02],
         [1.6355e+01, 1.1126e-01, 2


Train Diffusion:  31%|███       | 611/2000 [28:09<52:51,  2.28s/it][A
Train Diffusion:  31%|███       | 612/2000 [28:11<52:47,  2.28s/it][A
Train Diffusion:  31%|███       | 613/2000 [28:14<52:41,  2.28s/it][A
Train Diffusion:  31%|███       | 614/2000 [28:16<52:39,  2.28s/it][A
Train Diffusion:  31%|███       | 615/2000 [28:18<52:36,  2.28s/it][A
Train Diffusion:  31%|███       | 616/2000 [28:21<52:33,  2.28s/it][A
Train Diffusion:  31%|███       | 617/2000 [28:23<52:40,  2.29s/it][A
Train Diffusion:  31%|███       | 618/2000 [28:25<52:46,  2.29s/it][A
Train Diffusion:  31%|███       | 619/2000 [28:27<52:36,  2.29s/it][A
Train Diffusion:  31%|███       | 620/2000 [28:30<52:41,  2.29s/it][A

Moving average ELBO loss at 620 iterations is: -8463.642236328125. Best ELBO loss value is: -10743.6171875.

C_PATH mean = tensor([[4.5783e+01, 6.6406e-02, 1.0894e-01, 1.2417e-02],
        [4.5763e+01, 6.4054e-02, 1.1013e-01, 1.2166e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2666e+01, 4.6848e-01, 1.0771e-01, 2.6254e-02],
         [5.5606e+01, 3.6864e-02, 1.4392e-01, 4.7581e-03],
         ...,
         [4.1995e+01, 2.4215e-02, 6.3032e-03, 1.5691e-03],
         [3.6237e+01, 8.6549e-02, 5.9600e-02, 2.7986e-02],
         [2.0377e+01, 1.4068e-01, 1.5792e-03, 5.4411e-03]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.6317e+01, 1.1136e-02, 1.3758e-02, 2.6539e-02],
         [5.4467e+01, 2.9784e-05, 3.6675e-03, 1.4637e-01],
         ...,
         [4.0380e+01, 1.4622e-01, 1.1796e-01, 2.6693e-02],
         [3.3395e+01, 4.4270e-02, 4.1501e-02, 2.3422e-03],
         [1.6440e+01, 3.8136e-02, 1.98


Train Diffusion:  31%|███       | 621/2000 [28:32<52:35,  2.29s/it][A
Train Diffusion:  31%|███       | 622/2000 [28:34<52:33,  2.29s/it][A
Train Diffusion:  31%|███       | 623/2000 [28:37<52:26,  2.28s/it][A
Train Diffusion:  31%|███       | 624/2000 [28:39<54:37,  2.38s/it][A
Train Diffusion:  31%|███▏      | 625/2000 [28:42<57:53,  2.53s/it][A
Train Diffusion:  31%|███▏      | 626/2000 [28:44<56:39,  2.47s/it][A
Train Diffusion:  31%|███▏      | 627/2000 [28:47<55:40,  2.43s/it][A
Train Diffusion:  31%|███▏      | 628/2000 [28:49<54:35,  2.39s/it][A
Train Diffusion:  31%|███▏      | 629/2000 [28:51<53:47,  2.35s/it][A
Train Diffusion:  32%|███▏      | 630/2000 [28:54<53:14,  2.33s/it][A

Moving average ELBO loss at 630 iterations is: -10854.82060546875. Best ELBO loss value is: -11477.5380859375.

C_PATH mean = tensor([[4.5226e+01, 8.6468e-02, 1.3324e-01, 1.3123e-02],
        [4.5196e+01, 8.5123e-02, 1.4623e-01, 1.2465e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [4.8632e+01, 9.1285e-03, 8.7829e-02, 1.5738e-01],
         [4.7779e+01, 1.3582e-03, 2.6906e-02, 8.2922e-02],
         ...,
         [4.0119e+01, 1.0852e-01, 8.1920e-02, 2.7655e-02],
         [3.4164e+01, 2.0573e-01, 4.4684e-02, 7.5120e-03],
         [1.7710e+01, 1.1146e-01, 3.0158e-01, 5.6940e-03]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.1476e+01, 1.4121e-01, 5.0580e-02, 9.0538e-03],
         [5.7478e+01, 4.7141e-04, 1.3499e-01, 3.5963e-02],
         ...,
         [4.0648e+01, 5.0266e-02, 1.2569e-01, 1.2260e-03],
         [3.6027e+01, 4.6381e-02, 1.9608e-01, 1.8885e-02],
         [2.1896e+01, 1.3062e-01, 2


Train Diffusion:  32%|███▏      | 631/2000 [28:56<52:49,  2.32s/it][A
Train Diffusion:  32%|███▏      | 632/2000 [28:58<52:33,  2.31s/it][A
Train Diffusion:  32%|███▏      | 633/2000 [29:00<52:28,  2.30s/it][A
Train Diffusion:  32%|███▏      | 634/2000 [29:03<52:12,  2.29s/it][A
Train Diffusion:  32%|███▏      | 635/2000 [29:05<51:59,  2.29s/it][A
Train Diffusion:  32%|███▏      | 636/2000 [29:07<51:53,  2.28s/it][A
Train Diffusion:  32%|███▏      | 637/2000 [29:10<52:29,  2.31s/it][A
Train Diffusion:  32%|███▏      | 638/2000 [29:12<54:13,  2.39s/it][A
Train Diffusion:  32%|███▏      | 639/2000 [29:15<56:02,  2.47s/it][A
Train Diffusion:  32%|███▏      | 640/2000 [29:17<55:38,  2.45s/it][A

Moving average ELBO loss at 640 iterations is: -11062.5404296875. Best ELBO loss value is: -11706.6435546875.

C_PATH mean = tensor([[4.4918e+01, 9.8074e-02, 1.9091e-01, 1.4937e-02],
        [4.4882e+01, 9.7896e-02, 1.9129e-01, 1.4914e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.5054e+01, 4.8720e-02, 3.5241e-02, 1.3889e-01],
         [5.6387e+01, 7.8691e-05, 1.0863e-01, 2.1021e-01],
         ...,
         [3.9657e+01, 1.3987e-01, 2.5659e-02, 2.0323e-02],
         [3.4913e+01, 7.6504e-02, 1.1429e-02, 4.6791e-03],
         [2.0758e+01, 6.6357e-02, 2.2349e-01, 1.2545e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [4.7916e+01, 1.9073e-01, 1.8153e-01, 2.2733e-02],
         [4.8465e+01, 5.2368e-03, 7.8215e-02, 1.0523e-02],
         ...,
         [3.9299e+01, 3.5167e-02, 3.8920e-01, 7.6524e-03],
         [3.4910e+01, 1.3918e-01, 3.6505e-01, 3.0501e-02],
         [2.2339e+01, 1.8691e-01, 6.


Train Diffusion:  32%|███▏      | 641/2000 [29:20<55:26,  2.45s/it][A
Train Diffusion:  32%|███▏      | 642/2000 [29:22<55:20,  2.45s/it][A
Train Diffusion:  32%|███▏      | 643/2000 [29:25<54:54,  2.43s/it][A
Train Diffusion:  32%|███▏      | 644/2000 [29:27<54:42,  2.42s/it][A
Train Diffusion:  32%|███▏      | 645/2000 [29:30<57:13,  2.53s/it][A
Train Diffusion:  32%|███▏      | 646/2000 [29:32<56:18,  2.49s/it][A
Train Diffusion:  32%|███▏      | 647/2000 [29:35<55:47,  2.47s/it][A
Train Diffusion:  32%|███▏      | 648/2000 [29:37<55:45,  2.47s/it][A
Train Diffusion:  32%|███▏      | 649/2000 [29:39<55:11,  2.45s/it][A
Train Diffusion:  32%|███▎      | 650/2000 [29:42<54:43,  2.43s/it][A

Moving average ELBO loss at 650 iterations is: -12920.2154296875. Best ELBO loss value is: -14961.013671875.

C_PATH mean = tensor([[4.4991e+01, 8.3979e-02, 2.1771e-01, 2.1268e-02],
        [4.4952e+01, 8.5035e-02, 2.0995e-01, 2.1412e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3237e+01, 9.1270e-02, 1.2622e-01, 1.7176e-01],
         [5.0105e+01, 5.6649e-03, 1.8558e-01, 1.2710e-01],
         ...,
         [3.9302e+01, 1.1039e-01, 2.9497e-01, 9.3220e-03],
         [3.6677e+01, 1.7641e-01, 2.4964e-01, 3.9945e-02],
         [2.3810e+01, 2.2655e-01, 2.4024e-01, 5.7944e-03]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.5421e+01, 1.3538e-01, 7.4661e-02, 1.6177e-02],
         [5.1510e+01, 6.6127e-05, 2.1061e-02, 3.8049e-02],
         ...,
         [3.8393e+01, 6.1312e-02, 1.7284e-01, 3.9959e-02],
         [3.3919e+01, 5.3014e-02, 1.1078e-01, 2.3558e-02],
         [2.4858e+01, 3.5091e-02, 2.6


Train Diffusion:  33%|███▎      | 651/2000 [29:44<54:28,  2.42s/it][A
Train Diffusion:  33%|███▎      | 652/2000 [29:47<54:16,  2.42s/it][A
Train Diffusion:  33%|███▎      | 653/2000 [29:49<54:04,  2.41s/it][A
Train Diffusion:  33%|███▎      | 654/2000 [29:51<53:53,  2.40s/it][A
Train Diffusion:  33%|███▎      | 655/2000 [29:54<55:05,  2.46s/it][A
Train Diffusion:  33%|███▎      | 656/2000 [29:56<54:46,  2.45s/it][A
Train Diffusion:  33%|███▎      | 657/2000 [29:59<54:12,  2.42s/it][A
Train Diffusion:  33%|███▎      | 658/2000 [30:01<53:26,  2.39s/it][A
Train Diffusion:  33%|███▎      | 659/2000 [30:03<52:54,  2.37s/it][A
Train Diffusion:  33%|███▎      | 660/2000 [30:06<52:25,  2.35s/it][A

Moving average ELBO loss at 660 iterations is: -14982.35751953125. Best ELBO loss value is: -16340.880859375.

C_PATH mean = tensor([[4.4705e+01, 1.1047e-01, 2.5845e-01, 2.1334e-02],
        [4.4729e+01, 1.1157e-01, 2.4624e-01, 2.1277e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.6764e+01, 2.5672e-02, 2.6238e-01, 1.9259e-02],
         [5.0538e+01, 2.5268e-04, 2.7145e-01, 3.3218e-02],
         ...,
         [3.7938e+01, 1.1004e-01, 1.8647e-01, 1.5290e-02],
         [3.6314e+01, 1.7735e-01, 1.2350e-01, 4.5014e-02],
         [2.8204e+01, 2.2784e-01, 3.3320e-02, 7.5679e-03]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.4001e+01, 3.5182e-03, 2.9142e-02, 1.6865e-01],
         [5.6145e+01, 7.4719e-07, 1.5236e-02, 1.3748e-01],
         ...,
         [3.8564e+01, 1.1780e-01, 3.0296e-01, 3.1719e-02],
         [3.5184e+01, 7.8951e-02, 1.9443e-01, 1.6482e-02],
         [2.4224e+01, 4.8050e-02, 1.


Train Diffusion:  33%|███▎      | 661/2000 [30:08<52:14,  2.34s/it][A
Train Diffusion:  33%|███▎      | 662/2000 [30:10<51:55,  2.33s/it][A
Train Diffusion:  33%|███▎      | 663/2000 [30:13<51:54,  2.33s/it][A
Train Diffusion:  33%|███▎      | 664/2000 [30:15<51:49,  2.33s/it][A
Train Diffusion:  33%|███▎      | 665/2000 [30:17<51:34,  2.32s/it][A
Train Diffusion:  33%|███▎      | 666/2000 [30:20<51:23,  2.31s/it][A
Train Diffusion:  33%|███▎      | 667/2000 [30:22<51:17,  2.31s/it][A
Train Diffusion:  33%|███▎      | 668/2000 [30:24<51:33,  2.32s/it][A
Train Diffusion:  33%|███▎      | 669/2000 [30:27<51:23,  2.32s/it][A
Train Diffusion:  34%|███▎      | 670/2000 [30:29<51:19,  2.32s/it][A

Moving average ELBO loss at 670 iterations is: -15824.2982421875. Best ELBO loss value is: -16919.419921875.

C_PATH mean = tensor([[4.4739e+01, 1.1561e-01, 2.7555e-01, 2.4344e-02],
        [4.4720e+01, 1.1887e-01, 2.5900e-01, 2.4235e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [4.7317e+01, 5.3596e-02, 3.5723e-02, 3.4667e-02],
         [4.7332e+01, 1.9840e-06, 1.0003e-01, 3.9270e-02],
         ...,
         [3.8415e+01, 3.1449e-02, 2.8438e-01, 3.2452e-02],
         [3.5101e+01, 3.9710e-02, 3.6765e-01, 1.7743e-02],
         [3.1335e+01, 4.0221e-02, 1.1637e-01, 9.4336e-03]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [4.9454e+01, 7.0481e-02, 2.3040e-01, 1.5110e-01],
         [5.2080e+01, 2.6018e-04, 1.0203e-01, 1.3300e-01],
         ...,
         [3.8330e+01, 1.7091e-01, 2.1692e-01, 1.6355e-02],
         [3.7118e+01, 1.9746e-01, 5.5821e-02, 4.8364e-02],
         [2.8078e+01, 2.1798e-01, 1.3


Train Diffusion:  34%|███▎      | 671/2000 [30:31<51:19,  2.32s/it][A
Train Diffusion:  34%|███▎      | 672/2000 [30:34<51:19,  2.32s/it][A
Train Diffusion:  34%|███▎      | 673/2000 [30:36<51:07,  2.31s/it][A
Train Diffusion:  34%|███▎      | 674/2000 [30:38<50:58,  2.31s/it][A
Train Diffusion:  34%|███▍      | 675/2000 [30:40<50:51,  2.30s/it][A
Train Diffusion:  34%|███▍      | 676/2000 [30:43<50:52,  2.31s/it][A
Train Diffusion:  34%|███▍      | 677/2000 [30:45<50:47,  2.30s/it][A
Train Diffusion:  34%|███▍      | 678/2000 [30:47<50:41,  2.30s/it][A
Train Diffusion:  34%|███▍      | 679/2000 [30:50<50:37,  2.30s/it][A
Train Diffusion:  34%|███▍      | 680/2000 [30:52<50:35,  2.30s/it][A

Moving average ELBO loss at 680 iterations is: -14657.29658203125. Best ELBO loss value is: -16919.419921875.

C_PATH mean = tensor([[4.4714e+01, 1.2579e-01, 2.7402e-01, 2.2971e-02],
        [4.4638e+01, 1.2361e-01, 2.8347e-01, 2.3049e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3354e+01, 1.1446e-01, 1.4370e-01, 3.2576e-02],
         [5.3142e+01, 1.9230e-04, 2.1064e-01, 1.4273e-02],
         ...,
         [3.7507e+01, 1.1429e-01, 2.7241e-01, 3.7696e-02],
         [3.7063e+01, 1.5185e-01, 3.5585e-01, 6.0577e-02],
         [3.6357e+01, 2.1262e-01, 1.2772e-01, 1.3010e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.4971e+01, 1.5067e-02, 9.2878e-02, 1.1710e-01],
         [5.0834e+01, 6.5652e-08, 3.7771e-02, 1.8390e-01],
         ...,
         [3.8301e+01, 7.9003e-02, 2.4012e-01, 9.5826e-03],
         [3.5481e+01, 6.5880e-02, 6.0666e-02, 1.3000e-02],
         [3.0963e+01, 4.7624e-02, 1.


Train Diffusion:  34%|███▍      | 681/2000 [30:54<50:34,  2.30s/it][A
Train Diffusion:  34%|███▍      | 682/2000 [30:57<50:31,  2.30s/it][A
Train Diffusion:  34%|███▍      | 683/2000 [30:59<50:35,  2.30s/it][A
Train Diffusion:  34%|███▍      | 684/2000 [31:01<50:50,  2.32s/it][A
Train Diffusion:  34%|███▍      | 685/2000 [31:04<50:52,  2.32s/it][A
Train Diffusion:  34%|███▍      | 686/2000 [31:06<51:24,  2.35s/it][A
Train Diffusion:  34%|███▍      | 687/2000 [31:08<51:18,  2.34s/it][A
Train Diffusion:  34%|███▍      | 688/2000 [31:11<51:35,  2.36s/it][A
Train Diffusion:  34%|███▍      | 689/2000 [31:13<51:23,  2.35s/it][A
Train Diffusion:  34%|███▍      | 690/2000 [31:15<51:12,  2.35s/it][A

Moving average ELBO loss at 690 iterations is: -14935.92939453125. Best ELBO loss value is: -16919.419921875.

C_PATH mean = tensor([[4.4214e+01, 1.5344e-01, 3.5121e-01, 3.0399e-02],
        [4.4182e+01, 1.6029e-01, 3.4121e-01, 3.1226e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [4.9926e+01, 1.2429e-01, 2.2620e-01, 2.1182e-01],
         [5.1503e+01, 5.4156e-06, 1.4011e-01, 2.2488e-01],
         ...,
         [3.7607e+01, 1.1801e-01, 4.0641e-01, 1.4221e-02],
         [3.7503e+01, 1.5517e-01, 2.4092e-01, 3.3661e-02],
         [3.3990e+01, 1.7650e-01, 2.7857e-01, 1.4017e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [4.7951e+01, 3.0468e-03, 6.1029e-02, 2.8404e-02],
         [4.8408e+01, 7.6378e-09, 1.3357e-01, 1.3121e-02],
         ...,
         [3.7963e+01, 1.4201e-01, 2.1237e-01, 4.9415e-02],
         [3.5368e+01, 1.1181e-01, 2.9341e-01, 2.8488e-02],
         [3.8433e+01, 1.2438e-01, 7.


Train Diffusion:  35%|███▍      | 691/2000 [31:18<51:03,  2.34s/it][A
Train Diffusion:  35%|███▍      | 692/2000 [31:20<50:51,  2.33s/it][A
Train Diffusion:  35%|███▍      | 693/2000 [31:22<50:44,  2.33s/it][A
Train Diffusion:  35%|███▍      | 694/2000 [31:25<50:49,  2.33s/it][A
Train Diffusion:  35%|███▍      | 695/2000 [31:27<50:34,  2.33s/it][A
Train Diffusion:  35%|███▍      | 696/2000 [31:29<50:39,  2.33s/it][A
Train Diffusion:  35%|███▍      | 697/2000 [31:32<50:42,  2.33s/it][A
Train Diffusion:  35%|███▍      | 698/2000 [31:34<50:46,  2.34s/it][A
Train Diffusion:  35%|███▍      | 699/2000 [31:36<50:38,  2.34s/it][A
Train Diffusion:  35%|███▌      | 700/2000 [31:39<50:31,  2.33s/it][A

Moving average ELBO loss at 700 iterations is: -13752.1296875. Best ELBO loss value is: -16919.419921875.

C_PATH mean = tensor([[4.4283e+01, 1.4836e-01, 2.7560e-01, 2.5484e-02],
        [4.4253e+01, 1.4914e-01, 2.8293e-01, 2.5893e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2034e+01, 4.0618e-04, 2.2880e-01, 1.1992e-01],
         [5.2809e+01, 2.0990e-06, 1.2719e-01, 9.2040e-02],
         ...,
         [3.8437e+01, 1.4691e-01, 7.8162e-02, 1.2360e-02],
         [3.8844e+01, 1.9305e-01, 1.4699e-01, 3.1356e-02],
         [3.2758e+01, 2.1264e-01, 1.6000e-01, 1.3224e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.4301e+01, 1.5994e-02, 4.6384e-02, 2.6400e-02],
         [5.1717e+01, 4.4110e-09, 1.3001e-01, 3.9363e-02],
         ...,
         [3.7798e+01, 1.0301e-01, 4.5602e-01, 3.6715e-02],
         [3.6099e+01, 8.3519e-02, 2.1917e-01, 1.9287e-02],
         [3.8454e+01, 6.3971e-02, 7.8228


Train Diffusion:  35%|███▌      | 701/2000 [31:41<50:40,  2.34s/it][A
Train Diffusion:  35%|███▌      | 702/2000 [31:43<50:43,  2.35s/it][A
Train Diffusion:  35%|███▌      | 703/2000 [31:46<50:44,  2.35s/it][A
Train Diffusion:  35%|███▌      | 704/2000 [31:48<50:34,  2.34s/it][A
Train Diffusion:  35%|███▌      | 705/2000 [31:50<50:32,  2.34s/it][A
Train Diffusion:  35%|███▌      | 706/2000 [31:53<50:26,  2.34s/it][A
Train Diffusion:  35%|███▌      | 707/2000 [31:55<50:18,  2.33s/it][A
Train Diffusion:  35%|███▌      | 708/2000 [31:57<50:10,  2.33s/it][A
Train Diffusion:  35%|███▌      | 709/2000 [32:00<50:25,  2.34s/it][A
Train Diffusion:  36%|███▌      | 710/2000 [32:02<50:17,  2.34s/it][A

Moving average ELBO loss at 710 iterations is: -14217.25947265625. Best ELBO loss value is: -16919.419921875.

C_PATH mean = tensor([[4.4159e+01, 1.4000e-01, 3.1674e-01, 2.7668e-02],
        [4.4149e+01, 1.3842e-01, 3.1064e-01, 2.6675e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2013e+01, 1.0868e-01, 1.0999e-01, 2.9061e-02],
         [5.1273e+01, 1.7177e-06, 5.9598e-02, 3.3757e-02],
         ...,
         [3.9490e+01, 6.4201e-02, 2.9245e-01, 9.8268e-03],
         [3.6742e+01, 1.0923e-01, 2.3714e-01, 2.4481e-02],
         [3.3876e+01, 7.5964e-02, 8.8354e-02, 1.3725e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3188e+01, 1.0486e-02, 1.3744e-01, 1.5680e-01],
         [5.2732e+01, 1.7096e-09, 2.1906e-01, 1.2060e-01],
         ...,
         [3.8038e+01, 1.5222e-01, 2.2489e-01, 4.2989e-02],
         [3.7955e+01, 1.2130e-01, 1.7032e-01, 2.3776e-02],
         [4.0761e+01, 1.6664e-01, 2.


Train Diffusion:  36%|███▌      | 711/2000 [32:04<50:47,  2.36s/it][A
Train Diffusion:  36%|███▌      | 712/2000 [32:07<50:28,  2.35s/it][A
Train Diffusion:  36%|███▌      | 713/2000 [32:09<50:29,  2.35s/it][A
Train Diffusion:  36%|███▌      | 714/2000 [32:11<50:07,  2.34s/it][A
Train Diffusion:  36%|███▌      | 715/2000 [32:14<50:08,  2.34s/it][A
Train Diffusion:  36%|███▌      | 716/2000 [32:16<49:57,  2.33s/it][A
Train Diffusion:  36%|███▌      | 717/2000 [32:18<49:47,  2.33s/it][A
Train Diffusion:  36%|███▌      | 718/2000 [32:21<49:32,  2.32s/it][A
Train Diffusion:  36%|███▌      | 719/2000 [32:23<49:32,  2.32s/it][A
Train Diffusion:  36%|███▌      | 720/2000 [32:25<49:30,  2.32s/it][A

Moving average ELBO loss at 720 iterations is: -15296.1533203125. Best ELBO loss value is: -16919.419921875.

C_PATH mean = tensor([[4.4135e+01, 1.3545e-01, 3.1982e-01, 3.1888e-02],
        [4.4126e+01, 1.3453e-01, 3.1725e-01, 3.0705e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3736e+01, 3.1382e-05, 1.0448e-01, 3.6341e-02],
         [5.5730e+01, 3.4282e-10, 2.0682e-01, 1.8270e-02],
         ...,
         [3.8923e+01, 8.8877e-02, 3.9080e-01, 8.6880e-03],
         [3.9281e+01, 7.1459e-02, 4.3204e-01, 2.4114e-02],
         [3.3354e+01, 1.2269e-01, 1.6192e-01, 9.2302e-03]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.4348e+01, 1.6871e-03, 1.4190e-01, 1.1001e-01],
         [4.6577e+01, 1.6646e-08, 8.6239e-02, 1.4911e-01],
         ...,
         [3.8361e+01, 1.4795e-01, 1.6479e-01, 5.6181e-02],
         [3.7301e+01, 2.0049e-01, 6.1393e-02, 2.4369e-02],
         [3.8512e+01, 1.2068e-01, 1.0


Train Diffusion:  36%|███▌      | 721/2000 [32:28<49:20,  2.31s/it][A
Train Diffusion:  36%|███▌      | 722/2000 [32:30<49:13,  2.31s/it][A
Train Diffusion:  36%|███▌      | 723/2000 [32:32<49:05,  2.31s/it][A
Train Diffusion:  36%|███▌      | 724/2000 [32:35<48:59,  2.30s/it][A
Train Diffusion:  36%|███▋      | 725/2000 [32:37<48:56,  2.30s/it][A
Train Diffusion:  36%|███▋      | 726/2000 [32:39<49:00,  2.31s/it][A
Train Diffusion:  36%|███▋      | 727/2000 [32:41<48:54,  2.31s/it][A
Train Diffusion:  36%|███▋      | 728/2000 [32:44<48:51,  2.30s/it][A
Train Diffusion:  36%|███▋      | 729/2000 [32:46<49:04,  2.32s/it][A
Train Diffusion:  36%|███▋      | 730/2000 [32:48<49:07,  2.32s/it][A

Moving average ELBO loss at 730 iterations is: -15888.67373046875. Best ELBO loss value is: -17124.53125.

C_PATH mean = tensor([[4.4116e+01, 1.4798e-01, 3.2993e-01, 3.0374e-02],
        [4.4091e+01, 1.4748e-01, 3.3289e-01, 2.9962e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [4.8674e+01, 1.3360e-02, 6.6173e-02, 3.9653e-02],
         [5.3425e+01, 6.4267e-08, 3.4279e-02, 1.5937e-02],
         ...,
         [3.9161e+01, 1.0965e-01, 4.8005e-02, 7.4112e-03],
         [3.7599e+01, 9.1833e-02, 1.4371e-01, 2.3141e-02],
         [3.8900e+01, 8.8184e-02, 4.5608e-02, 1.5546e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [4.9252e+01, 4.9040e-04, 1.7286e-01, 1.0337e-01],
         [4.6388e+01, 6.8165e-10, 2.7421e-01, 1.6863e-01],
         ...,
         [3.9238e+01, 8.6169e-02, 6.5803e-01, 5.5277e-02],
         [3.9112e+01, 1.2803e-01, 3.1714e-01, 2.3241e-02],
         [3.4319e+01, 1.5165e-01, 2.4258


Train Diffusion:  37%|███▋      | 731/2000 [32:51<48:59,  2.32s/it][A
Train Diffusion:  37%|███▋      | 732/2000 [32:53<48:51,  2.31s/it][A
Train Diffusion:  37%|███▋      | 733/2000 [32:55<48:47,  2.31s/it][A
Train Diffusion:  37%|███▋      | 734/2000 [32:58<48:38,  2.31s/it][A
Train Diffusion:  37%|███▋      | 735/2000 [33:00<48:30,  2.30s/it][A
Train Diffusion:  37%|███▋      | 736/2000 [33:02<48:29,  2.30s/it][A
Train Diffusion:  37%|███▋      | 737/2000 [33:05<48:25,  2.30s/it][A
Train Diffusion:  37%|███▋      | 738/2000 [33:07<48:18,  2.30s/it][A
Train Diffusion:  37%|███▋      | 739/2000 [33:09<48:14,  2.30s/it][A
Train Diffusion:  37%|███▋      | 740/2000 [33:11<48:13,  2.30s/it][A

Moving average ELBO loss at 740 iterations is: -16631.45078125. Best ELBO loss value is: -19419.875.

C_PATH mean = tensor([[4.4138e+01, 1.4740e-01, 3.4645e-01, 3.0522e-02],
        [4.4082e+01, 1.4853e-01, 3.3464e-01, 2.9131e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3046e+01, 2.3446e-03, 1.9204e-01, 1.7250e-01],
         [4.8204e+01, 2.1773e-07, 2.4212e-01, 1.5334e-01],
         ...,
         [3.8800e+01, 3.5910e-02, 5.6144e-01, 3.3832e-02],
         [3.7338e+01, 1.2791e-01, 3.1480e-01, 2.1166e-02],
         [4.1003e+01, 1.6316e-01, 2.3425e-01, 1.5727e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.1421e+01, 1.0769e-01, 6.8236e-02, 2.1673e-02],
         [5.3487e+01, 1.0960e-09, 3.4370e-02, 1.5395e-02],
         ...,
         [3.9797e+01, 1.8630e-01, 1.0807e-01, 1.3365e-02],
         [3.9827e+01, 1.4126e-01, 1.9175e-01, 2.5311e-02],
         [3.7284e+01, 1.0173e-01, 4.8641e-02,


Train Diffusion:  37%|███▋      | 741/2000 [33:14<48:11,  2.30s/it][A
Train Diffusion:  37%|███▋      | 742/2000 [33:16<48:04,  2.29s/it][A
Train Diffusion:  37%|███▋      | 743/2000 [33:18<48:11,  2.30s/it][A
Train Diffusion:  37%|███▋      | 744/2000 [33:21<48:11,  2.30s/it][A
Train Diffusion:  37%|███▋      | 745/2000 [33:23<47:59,  2.29s/it][A
Train Diffusion:  37%|███▋      | 746/2000 [33:25<47:54,  2.29s/it][A
Train Diffusion:  37%|███▋      | 747/2000 [33:27<47:45,  2.29s/it][A
Train Diffusion:  37%|███▋      | 748/2000 [33:30<47:38,  2.28s/it][A
Train Diffusion:  37%|███▋      | 749/2000 [33:32<47:39,  2.29s/it][A
Train Diffusion:  38%|███▊      | 750/2000 [33:34<47:31,  2.28s/it][A

Moving average ELBO loss at 750 iterations is: -19843.45078125. Best ELBO loss value is: -20292.87109375.

C_PATH mean = tensor([[4.4056e+01, 1.5509e-01, 3.5591e-01, 3.3828e-02],
        [4.4015e+01, 1.5993e-01, 3.7891e-01, 3.2920e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.1908e+01, 5.6333e-04, 7.4357e-02, 2.1425e-02],
         [4.9450e+01, 1.0584e-10, 1.3705e-01, 1.5418e-02],
         ...,
         [3.9141e+01, 1.7319e-01, 1.7092e-01, 4.4991e-02],
         [3.9145e+01, 1.3959e-01, 1.4690e-01, 2.0749e-02],
         [3.5226e+01, 1.1273e-01, 4.7147e-02, 1.2580e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [4.9551e+01, 2.8526e-05, 2.1126e-01, 2.2827e-01],
         [5.1853e+01, 7.8765e-09, 1.4787e-01, 1.9443e-01],
         ...,
         [3.9356e+01, 4.8904e-02, 5.0056e-01, 1.3327e-02],
         [3.7761e+01, 1.3701e-01, 3.4909e-01, 2.6516e-02],
         [3.9058e+01, 1.7804e-01, 3.5385


Train Diffusion:  38%|███▊      | 751/2000 [33:37<47:33,  2.28s/it][A
Train Diffusion:  38%|███▊      | 752/2000 [33:39<47:29,  2.28s/it][A
Train Diffusion:  38%|███▊      | 753/2000 [33:41<47:25,  2.28s/it][A
Train Diffusion:  38%|███▊      | 754/2000 [33:43<47:20,  2.28s/it][A
Train Diffusion:  38%|███▊      | 755/2000 [33:46<49:22,  2.38s/it][A
Train Diffusion:  38%|███▊      | 756/2000 [33:48<48:38,  2.35s/it][A
Train Diffusion:  38%|███▊      | 757/2000 [33:51<48:30,  2.34s/it][A
Train Diffusion:  38%|███▊      | 758/2000 [33:53<48:14,  2.33s/it][A
Train Diffusion:  38%|███▊      | 759/2000 [33:55<47:50,  2.31s/it][A
Train Diffusion:  38%|███▊      | 760/2000 [33:58<47:32,  2.30s/it][A

Moving average ELBO loss at 760 iterations is: -19026.3484375. Best ELBO loss value is: -20912.583984375.

C_PATH mean = tensor([[4.4052e+01, 1.4793e-01, 3.6496e-01, 3.4008e-02],
        [4.4102e+01, 1.5178e-01, 3.6927e-01, 3.3239e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.1478e+01, 1.8446e-04, 1.5064e-01, 3.7264e-02],
         [5.0198e+01, 5.2259e-08, 9.5257e-02, 1.8125e-02],
         ...,
         [3.9119e+01, 1.6249e-01, 1.2944e-01, 3.5525e-02],
         [3.7717e+01, 1.2778e-01, 9.6964e-02, 2.1132e-02],
         [4.0291e+01, 1.0675e-01, 3.5590e-02, 1.0409e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3230e+01, 4.5350e-03, 1.0656e-01, 1.4162e-01],
         [5.2188e+01, 1.2254e-10, 1.8960e-01, 1.6466e-01],
         ...,
         [3.9882e+01, 4.9501e-02, 6.0881e-01, 2.2163e-02],
         [4.0248e+01, 1.3490e-01, 4.2213e-01, 3.0917e-02],
         [3.7302e+01, 1.6823e-01, 2.6644


Train Diffusion:  38%|███▊      | 761/2000 [34:00<47:21,  2.29s/it][A
Train Diffusion:  38%|███▊      | 762/2000 [34:02<47:15,  2.29s/it][A
Train Diffusion:  38%|███▊      | 763/2000 [34:04<47:04,  2.28s/it][A
Train Diffusion:  38%|███▊      | 764/2000 [34:07<47:00,  2.28s/it][A
Train Diffusion:  38%|███▊      | 765/2000 [34:09<46:59,  2.28s/it][A
Train Diffusion:  38%|███▊      | 766/2000 [34:11<46:52,  2.28s/it][A
Train Diffusion:  38%|███▊      | 767/2000 [34:13<46:48,  2.28s/it][A
Train Diffusion:  38%|███▊      | 768/2000 [34:16<46:44,  2.28s/it][A
Train Diffusion:  38%|███▊      | 769/2000 [34:18<46:41,  2.28s/it][A
Train Diffusion:  38%|███▊      | 770/2000 [34:20<46:39,  2.28s/it][A

Moving average ELBO loss at 770 iterations is: -21780.73515625. Best ELBO loss value is: -22991.4140625.

C_PATH mean = tensor([[4.4098e+01, 1.5595e-01, 3.7301e-01, 3.5643e-02],
        [4.4108e+01, 1.5784e-01, 3.6788e-01, 3.6531e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.1692e+01, 4.6936e-03, 1.1696e-01, 1.2906e-01],
         [5.1760e+01, 6.5058e-07, 2.0423e-01, 1.6382e-01],
         ...,
         [3.9770e+01, 7.8751e-02, 2.9308e-01, 1.4500e-02],
         [3.8604e+01, 1.2067e-01, 3.1144e-01, 3.0513e-02],
         [4.2002e+01, 1.3944e-01, 2.4295e-01, 1.2320e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.0517e+01, 1.1782e-01, 1.5512e-01, 3.8901e-02],
         [5.0118e+01, 4.2328e-10, 9.8696e-02, 1.9400e-02],
         ...,
         [3.9720e+01, 1.6315e-01, 4.3509e-01, 4.6437e-02],
         [4.0478e+01, 1.3265e-01, 2.5474e-01, 2.9298e-02],
         [3.8514e+01, 1.0981e-01, 6.8292e


Train Diffusion:  39%|███▊      | 771/2000 [34:23<46:43,  2.28s/it][A
Train Diffusion:  39%|███▊      | 772/2000 [34:25<47:10,  2.31s/it][A
Train Diffusion:  39%|███▊      | 773/2000 [34:27<46:54,  2.29s/it][A
Train Diffusion:  39%|███▊      | 774/2000 [34:29<46:50,  2.29s/it][A
Train Diffusion:  39%|███▉      | 775/2000 [34:32<46:44,  2.29s/it][A
Train Diffusion:  39%|███▉      | 776/2000 [34:34<46:39,  2.29s/it][A
Train Diffusion:  39%|███▉      | 777/2000 [34:36<46:33,  2.28s/it][A
Train Diffusion:  39%|███▉      | 778/2000 [34:39<46:26,  2.28s/it][A
Train Diffusion:  39%|███▉      | 779/2000 [34:41<46:23,  2.28s/it][A
Train Diffusion:  39%|███▉      | 780/2000 [34:43<46:17,  2.28s/it][A

Moving average ELBO loss at 780 iterations is: -22239.5935546875. Best ELBO loss value is: -23667.927734375.

C_PATH mean = tensor([[4.4094e+01, 1.6778e-01, 3.8362e-01, 3.4558e-02],
        [4.4117e+01, 1.6965e-01, 3.7668e-01, 3.4707e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.0924e+01, 1.9393e-03, 1.5791e-01, 2.3183e-02],
         [5.1569e+01, 2.7162e-10, 1.0397e-01, 3.6716e-02],
         ...,
         [3.9628e+01, 1.7728e-01, 4.8670e-01, 3.2504e-02],
         [3.8700e+01, 1.4354e-01, 3.2695e-01, 2.4101e-02],
         [3.6938e+01, 1.9646e-01, 7.4917e-02, 1.4450e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.0240e+01, 1.7843e-01, 1.3941e-01, 1.5977e-01],
         [4.9552e+01, 4.6581e-07, 2.0228e-01, 1.0146e-01],
         ...,
         [4.0121e+01, 5.8038e-02, 1.7171e-01, 1.6829e-02],
         [4.0806e+01, 1.0821e-01, 2.3812e-01, 3.1209e-02],
         [4.2277e+01, 8.9676e-02, 2.0


Train Diffusion:  39%|███▉      | 781/2000 [34:45<46:18,  2.28s/it][A
Train Diffusion:  39%|███▉      | 782/2000 [34:48<46:16,  2.28s/it][A
Train Diffusion:  39%|███▉      | 783/2000 [34:50<46:15,  2.28s/it][A
Train Diffusion:  39%|███▉      | 784/2000 [34:52<46:11,  2.28s/it][A
Train Diffusion:  39%|███▉      | 785/2000 [34:55<46:05,  2.28s/it][A
Train Diffusion:  39%|███▉      | 786/2000 [34:57<46:29,  2.30s/it][A
Train Diffusion:  39%|███▉      | 787/2000 [34:59<46:23,  2.29s/it][A
Train Diffusion:  39%|███▉      | 788/2000 [35:01<46:18,  2.29s/it][A
Train Diffusion:  39%|███▉      | 789/2000 [35:04<46:15,  2.29s/it][A
Train Diffusion:  40%|███▉      | 790/2000 [35:06<46:07,  2.29s/it][A

Moving average ELBO loss at 790 iterations is: -20911.9609375. Best ELBO loss value is: -23667.927734375.

C_PATH mean = tensor([[4.4149e+01, 1.5486e-01, 3.8613e-01, 3.6582e-02],
        [4.4106e+01, 1.5561e-01, 4.0261e-01, 3.6827e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3280e+01, 3.7248e-02, 1.9341e-01, 3.1497e-02],
         [5.0535e+01, 2.2872e-10, 2.6710e-01, 1.8224e-02],
         ...,
         [3.9614e+01, 4.0368e-02, 1.6642e-01, 1.6228e-02],
         [3.8639e+01, 1.2707e-01, 8.8222e-02, 2.7445e-02],
         [3.6997e+01, 1.5694e-01, 3.8001e-02, 8.4343e-03]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2521e+01, 1.8901e-02, 7.2775e-02, 1.4500e-01],
         [5.1390e+01, 5.4377e-07, 4.1881e-02, 1.6815e-01],
         ...,
         [4.0548e+01, 1.9223e-01, 5.5219e-01, 3.9664e-02],
         [4.1260e+01, 1.4144e-01, 5.3495e-01, 2.1222e-02],
         [4.2743e+01, 1.1102e-01, 3.2473


Train Diffusion:  40%|███▉      | 791/2000 [35:08<46:07,  2.29s/it][A
Train Diffusion:  40%|███▉      | 792/2000 [35:11<46:02,  2.29s/it][A
Train Diffusion:  40%|███▉      | 793/2000 [35:13<45:55,  2.28s/it][A
Train Diffusion:  40%|███▉      | 794/2000 [35:15<45:53,  2.28s/it][A
Train Diffusion:  40%|███▉      | 795/2000 [35:17<45:48,  2.28s/it][A
Train Diffusion:  40%|███▉      | 796/2000 [35:20<45:46,  2.28s/it][A
Train Diffusion:  40%|███▉      | 797/2000 [35:22<45:53,  2.29s/it][A
Train Diffusion:  40%|███▉      | 798/2000 [35:24<46:44,  2.33s/it][A
Train Diffusion:  40%|███▉      | 799/2000 [35:27<46:25,  2.32s/it][A
Train Diffusion:  40%|████      | 800/2000 [35:29<46:10,  2.31s/it][A

Moving average ELBO loss at 800 iterations is: -22126.68828125. Best ELBO loss value is: -23667.927734375.

C_PATH mean = tensor([[4.4136e+01, 1.6090e-01, 4.0406e-01, 3.7203e-02],
        [4.4120e+01, 1.6376e-01, 4.1317e-01, 3.7033e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.1756e+01, 2.9260e-03, 8.8212e-02, 1.5805e-01],
         [5.0085e+01, 1.8149e-10, 4.6590e-02, 1.1763e-01],
         ...,
         [3.9898e+01, 1.4758e-01, 2.5040e-01, 2.0399e-02],
         [3.8881e+01, 1.8346e-01, 2.8850e-01, 1.3294e-02],
         [3.7618e+01, 1.4392e-01, 2.0511e-01, 8.2846e-03]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.1198e+01, 2.6125e-01, 1.7833e-01, 2.5527e-02],
         [5.0819e+01, 4.5195e-07, 2.8239e-01, 3.6867e-02],
         ...,
         [4.0547e+01, 9.2071e-02, 5.5713e-01, 4.0650e-02],
         [4.1100e+01, 7.5770e-02, 3.3789e-01, 3.9371e-02],
         [4.2633e+01, 1.1424e-01, 9.163


Train Diffusion:  40%|████      | 801/2000 [35:31<46:03,  2.30s/it][A
Train Diffusion:  40%|████      | 802/2000 [35:34<45:58,  2.30s/it][A
Train Diffusion:  40%|████      | 803/2000 [35:36<45:55,  2.30s/it][A
Train Diffusion:  40%|████      | 804/2000 [35:38<45:47,  2.30s/it][A
Train Diffusion:  40%|████      | 805/2000 [35:40<45:39,  2.29s/it][A
Train Diffusion:  40%|████      | 806/2000 [35:43<45:30,  2.29s/it][A
Train Diffusion:  40%|████      | 807/2000 [35:45<45:24,  2.28s/it][A
Train Diffusion:  40%|████      | 808/2000 [35:47<45:15,  2.28s/it][A
Train Diffusion:  40%|████      | 809/2000 [35:50<45:12,  2.28s/it][A
Train Diffusion:  40%|████      | 810/2000 [35:52<45:08,  2.28s/it][A

Moving average ELBO loss at 810 iterations is: -24534.9130859375. Best ELBO loss value is: -25244.251953125.

C_PATH mean = tensor([[4.4129e+01, 1.6780e-01, 4.3643e-01, 3.5977e-02],
        [4.4128e+01, 1.6843e-01, 4.2992e-01, 3.5988e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.1949e+01, 5.6488e-03, 1.3893e-01, 3.7708e-02],
         [5.1439e+01, 1.5551e-07, 2.2144e-01, 2.6120e-02],
         ...,
         [4.0605e+01, 1.5412e-01, 2.0720e-01, 1.2261e-02],
         [4.1199e+01, 1.9261e-01, 1.9264e-01, 2.4562e-02],
         [4.2162e+01, 2.1182e-01, 6.6331e-02, 1.4955e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2583e+01, 1.2966e-02, 1.2960e-01, 1.7919e-01],
         [5.0639e+01, 1.3946e-10, 9.4249e-02, 1.2656e-01],
         ...,
         [3.9937e+01, 9.8315e-02, 5.2278e-01, 4.6213e-02],
         [3.8998e+01, 7.6353e-02, 4.2377e-01, 2.5139e-02],
         [3.7295e+01, 6.2575e-02, 3.8


Train Diffusion:  41%|████      | 811/2000 [35:54<45:30,  2.30s/it][A
Train Diffusion:  41%|████      | 812/2000 [35:57<45:27,  2.30s/it][A
Train Diffusion:  41%|████      | 813/2000 [35:59<45:46,  2.31s/it][A
Train Diffusion:  41%|████      | 814/2000 [36:01<46:08,  2.33s/it][A
Train Diffusion:  41%|████      | 815/2000 [36:04<45:45,  2.32s/it][A
Train Diffusion:  41%|████      | 816/2000 [36:06<45:28,  2.30s/it][A
Train Diffusion:  41%|████      | 817/2000 [36:08<45:16,  2.30s/it][A
Train Diffusion:  41%|████      | 818/2000 [36:10<45:07,  2.29s/it][A
Train Diffusion:  41%|████      | 819/2000 [36:13<44:59,  2.29s/it][A
Train Diffusion:  41%|████      | 820/2000 [36:15<44:57,  2.29s/it][A

Moving average ELBO loss at 820 iterations is: -23495.369140625. Best ELBO loss value is: -25244.251953125.

C_PATH mean = tensor([[4.4034e+01, 1.7041e-01, 4.6291e-01, 3.4858e-02],
        [4.4097e+01, 1.7832e-01, 4.6971e-01, 3.5671e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2549e+01, 9.1194e-05, 2.6321e-01, 4.5064e-02],
         [5.0054e+01, 9.2194e-09, 2.2880e-01, 3.6688e-02],
         ...,
         [4.0033e+01, 2.1732e-01, 7.4303e-01, 3.0516e-02],
         [3.8965e+01, 1.5526e-01, 4.6629e-01, 3.2775e-02],
         [3.7256e+01, 1.8248e-01, 1.7454e-01, 1.1131e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3307e+01, 1.0497e-02, 5.6049e-02, 1.2651e-01],
         [5.2283e+01, 1.0789e-10, 1.1269e-01, 9.9131e-02],
         ...,
         [4.0528e+01, 4.1781e-02, 1.9050e-01, 2.3468e-02],
         [4.1176e+01, 9.1721e-02, 3.3407e-01, 1.6485e-02],
         [4.2203e+01, 7.6206e-02, 2.20


Train Diffusion:  41%|████      | 821/2000 [36:17<44:51,  2.28s/it][A
Train Diffusion:  41%|████      | 822/2000 [36:19<44:45,  2.28s/it][A
Train Diffusion:  41%|████      | 823/2000 [36:22<44:41,  2.28s/it][A
Train Diffusion:  41%|████      | 824/2000 [36:24<44:45,  2.28s/it][A
Train Diffusion:  41%|████▏     | 825/2000 [36:26<45:03,  2.30s/it][A
Train Diffusion:  41%|████▏     | 826/2000 [36:29<45:07,  2.31s/it][A
Train Diffusion:  41%|████▏     | 827/2000 [36:31<45:07,  2.31s/it][A
Train Diffusion:  41%|████▏     | 828/2000 [36:33<45:00,  2.30s/it][A
Train Diffusion:  41%|████▏     | 829/2000 [36:36<44:49,  2.30s/it][A
Train Diffusion:  42%|████▏     | 830/2000 [36:38<44:47,  2.30s/it][A

Moving average ELBO loss at 830 iterations is: -24708.77421875. Best ELBO loss value is: -25760.83984375.

C_PATH mean = tensor([[4.4126e+01, 1.6504e-01, 4.4460e-01, 4.0702e-02],
        [4.4119e+01, 1.6809e-01, 4.3816e-01, 3.8479e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2523e+01, 1.5902e-02, 1.0845e-01, 5.8214e-02],
         [5.0393e+01, 3.4562e-08, 1.9503e-01, 2.9372e-02],
         ...,
         [4.0851e+01, 9.5633e-02, 3.9399e-01, 3.4088e-02],
         [3.9699e+01, 1.3035e-01, 4.4799e-01, 3.9451e-02],
         [4.1780e+01, 1.3936e-01, 1.9007e-01, 1.6909e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.1823e+01, 1.1117e-03, 1.5299e-01, 1.0855e-01],
         [5.1089e+01, 1.0503e-10, 1.1852e-01, 1.0316e-01],
         ...,
         [4.0045e+01, 1.4770e-01, 4.4578e-01, 3.1650e-02],
         [4.0822e+01, 1.0929e-01, 3.0008e-01, 2.3315e-02],
         [3.8929e+01, 9.3217e-02, 2.0871


Train Diffusion:  42%|████▏     | 831/2000 [36:40<44:40,  2.29s/it][A
Train Diffusion:  42%|████▏     | 832/2000 [36:42<44:35,  2.29s/it][A
Train Diffusion:  42%|████▏     | 833/2000 [36:45<44:31,  2.29s/it][A
Train Diffusion:  42%|████▏     | 834/2000 [36:47<44:26,  2.29s/it][A
Train Diffusion:  42%|████▏     | 835/2000 [36:49<44:25,  2.29s/it][A
Train Diffusion:  42%|████▏     | 836/2000 [36:52<44:23,  2.29s/it][A
Train Diffusion:  42%|████▏     | 837/2000 [36:54<44:16,  2.28s/it][A
Train Diffusion:  42%|████▏     | 838/2000 [36:56<44:16,  2.29s/it][A
Train Diffusion:  42%|████▏     | 839/2000 [36:58<44:17,  2.29s/it][A
Train Diffusion:  42%|████▏     | 840/2000 [37:01<44:15,  2.29s/it][A

Moving average ELBO loss at 840 iterations is: -26129.5427734375. Best ELBO loss value is: -27015.587890625.

C_PATH mean = tensor([[4.4127e+01, 1.6517e-01, 4.5983e-01, 4.1843e-02],
        [4.4149e+01, 1.6084e-01, 4.5389e-01, 4.2693e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.1654e+01, 1.2152e-01, 7.5632e-02, 1.3799e-01],
         [4.9661e+01, 3.4535e-10, 4.6960e-02, 1.5503e-01],
         ...,
         [4.0418e+01, 2.0936e-01, 5.9089e-01, 6.4060e-02],
         [4.1028e+01, 1.4568e-01, 5.6995e-01, 4.6722e-02],
         [4.2537e+01, 1.0916e-01, 3.2671e-01, 1.3399e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.0925e+01, 1.6157e-03, 2.0050e-01, 3.7495e-02],
         [5.0450e+01, 8.5941e-08, 2.9120e-01, 2.0762e-02],
         ...,
         [4.0711e+01, 4.7521e-02, 3.6883e-01, 9.8694e-03],
         [3.9696e+01, 1.3371e-01, 2.4020e-01, 9.1811e-03],
         [3.8228e+01, 1.6289e-01, 6.6


Train Diffusion:  42%|████▏     | 841/2000 [37:03<44:09,  2.29s/it][A
Train Diffusion:  42%|████▏     | 842/2000 [37:05<44:07,  2.29s/it][A
Train Diffusion:  42%|████▏     | 843/2000 [37:08<44:05,  2.29s/it][A
Train Diffusion:  42%|████▏     | 844/2000 [37:10<44:06,  2.29s/it][A
Train Diffusion:  42%|████▏     | 845/2000 [37:12<44:01,  2.29s/it][A
Train Diffusion:  42%|████▏     | 846/2000 [37:14<43:57,  2.29s/it][A
Train Diffusion:  42%|████▏     | 847/2000 [37:17<43:52,  2.28s/it][A
Train Diffusion:  42%|████▏     | 848/2000 [37:19<43:52,  2.28s/it][A
Train Diffusion:  42%|████▏     | 849/2000 [37:21<43:47,  2.28s/it][A
Train Diffusion:  42%|████▎     | 850/2000 [37:24<43:46,  2.28s/it][A

Moving average ELBO loss at 850 iterations is: -26459.9865234375. Best ELBO loss value is: -27015.587890625.

C_PATH mean = tensor([[4.4115e+01, 1.9363e-01, 4.6690e-01, 3.6190e-02],
        [4.4138e+01, 1.9477e-01, 4.6512e-01, 3.4049e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2375e+01, 7.4984e-04, 1.3021e-01, 9.5944e-02],
         [5.0426e+01, 5.1576e-08, 2.1571e-01, 1.2379e-01],
         ...,
         [4.0118e+01, 7.5149e-02, 6.1083e-01, 2.8185e-02],
         [3.9296e+01, 1.6029e-01, 4.1200e-01, 3.1171e-02],
         [3.7505e+01, 1.9083e-01, 2.1327e-01, 1.8148e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.1573e+01, 5.8776e-02, 1.6518e-01, 5.0543e-02],
         [5.1228e+01, 3.3143e-10, 1.3917e-01, 2.0916e-02],
         ...,
         [4.0831e+01, 1.6695e-01, 2.8757e-01, 2.1503e-02],
         [4.1641e+01, 1.3261e-01, 3.0002e-01, 1.7233e-02],
         [4.2278e+01, 1.0980e-01, 1.2


Train Diffusion:  43%|████▎     | 851/2000 [37:26<43:43,  2.28s/it][A
Train Diffusion:  43%|████▎     | 852/2000 [37:28<43:56,  2.30s/it][A
Train Diffusion:  43%|████▎     | 853/2000 [37:31<44:45,  2.34s/it][A
Train Diffusion:  43%|████▎     | 854/2000 [37:33<44:21,  2.32s/it][A
Train Diffusion:  43%|████▎     | 855/2000 [37:35<44:01,  2.31s/it][A
Train Diffusion:  43%|████▎     | 856/2000 [37:37<43:57,  2.31s/it][A
Train Diffusion:  43%|████▎     | 857/2000 [37:40<43:45,  2.30s/it][A
Train Diffusion:  43%|████▎     | 858/2000 [37:42<43:33,  2.29s/it][A
Train Diffusion:  43%|████▎     | 859/2000 [37:44<43:29,  2.29s/it][A
Train Diffusion:  43%|████▎     | 860/2000 [37:47<43:24,  2.29s/it][A

Moving average ELBO loss at 860 iterations is: -25062.148046875. Best ELBO loss value is: -27015.587890625.

C_PATH mean = tensor([[4.4320e+01, 1.5909e-01, 4.3916e-01, 3.9342e-02],
        [4.4281e+01, 1.5949e-01, 4.2837e-01, 3.8538e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.0779e+01, 3.8006e-02, 9.4091e-02, 9.3722e-02],
         [4.9594e+01, 4.2031e-07, 1.7013e-01, 1.1257e-01],
         ...,
         [4.1318e+01, 1.2426e-01, 5.8618e-01, 2.7516e-02],
         [4.0586e+01, 9.4571e-02, 4.2796e-01, 3.2536e-02],
         [4.2692e+01, 1.2994e-01, 2.1826e-01, 1.3564e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.0170e+01, 3.5510e-01, 1.4351e-01, 6.7194e-02],
         [5.1182e+01, 6.7612e-10, 1.1597e-01, 3.0833e-02],
         ...,
         [4.0376e+01, 1.3497e-01, 2.9697e-01, 3.5382e-02],
         [4.1394e+01, 1.6407e-01, 2.1388e-01, 1.9238e-02],
         [3.9981e+01, 1.2010e-01, 9.16


Train Diffusion:  43%|████▎     | 861/2000 [37:49<43:35,  2.30s/it][A
Train Diffusion:  43%|████▎     | 862/2000 [37:51<43:26,  2.29s/it][A
Train Diffusion:  43%|████▎     | 863/2000 [37:53<43:20,  2.29s/it][A
Train Diffusion:  43%|████▎     | 864/2000 [37:56<43:12,  2.28s/it][A
Train Diffusion:  43%|████▎     | 865/2000 [37:58<43:09,  2.28s/it][A
Train Diffusion:  43%|████▎     | 866/2000 [38:00<43:25,  2.30s/it][A
Train Diffusion:  43%|████▎     | 867/2000 [38:03<43:23,  2.30s/it][A
Train Diffusion:  43%|████▎     | 868/2000 [38:05<43:16,  2.29s/it][A
Train Diffusion:  43%|████▎     | 869/2000 [38:07<43:10,  2.29s/it][A
Train Diffusion:  44%|████▎     | 870/2000 [38:10<43:03,  2.29s/it][A

Moving average ELBO loss at 870 iterations is: -25592.762890625. Best ELBO loss value is: -27015.587890625.

C_PATH mean = tensor([[4.4317e+01, 1.6774e-01, 4.2956e-01, 3.7573e-02],
        [4.4281e+01, 1.6975e-01, 4.2743e-01, 3.8190e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.1825e+01, 9.0451e-02, 6.6097e-02, 1.1526e-01],
         [5.1327e+01, 3.6372e-10, 4.5057e-02, 8.2918e-02],
         ...,
         [4.0864e+01, 1.8487e-01, 1.9360e-01, 4.5955e-02],
         [4.1734e+01, 1.3635e-01, 2.8926e-01, 2.5069e-02],
         [3.9971e+01, 1.0908e-01, 1.9554e-01, 1.5939e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2314e+01, 7.4719e-03, 2.1114e-01, 2.8538e-02],
         [5.0182e+01, 2.6247e-07, 3.1366e-01, 3.5470e-02],
         ...,
         [4.0930e+01, 5.6642e-02, 6.1210e-01, 1.3376e-02],
         [4.0240e+01, 1.0030e-01, 3.2005e-01, 2.4939e-02],
         [4.2160e+01, 1.3159e-01, 9.65


Train Diffusion:  44%|████▎     | 871/2000 [38:12<42:58,  2.28s/it][A
Train Diffusion:  44%|████▎     | 872/2000 [38:14<42:51,  2.28s/it][A
Train Diffusion:  44%|████▎     | 873/2000 [38:16<42:48,  2.28s/it][A
Train Diffusion:  44%|████▎     | 874/2000 [38:19<42:42,  2.28s/it][A
Train Diffusion:  44%|████▍     | 875/2000 [38:21<42:40,  2.28s/it][A
Train Diffusion:  44%|████▍     | 876/2000 [38:23<42:40,  2.28s/it][A
Train Diffusion:  44%|████▍     | 877/2000 [38:25<42:48,  2.29s/it][A
Train Diffusion:  44%|████▍     | 878/2000 [38:28<42:52,  2.29s/it][A
Train Diffusion:  44%|████▍     | 879/2000 [38:30<42:56,  2.30s/it][A
Train Diffusion:  44%|████▍     | 880/2000 [38:32<42:57,  2.30s/it][A

Moving average ELBO loss at 880 iterations is: -25507.0345703125. Best ELBO loss value is: -27015.587890625.

C_PATH mean = tensor([[4.4199e+01, 1.8103e-01, 4.8689e-01, 3.8272e-02],
        [4.4197e+01, 1.8460e-01, 4.7883e-01, 3.7996e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2437e+01, 1.1869e-02, 1.5601e-01, 4.2578e-02],
         [5.1437e+01, 1.2705e-10, 1.3840e-01, 1.8630e-02],
         ...,
         [4.0521e+01, 1.0119e-01, 6.2068e-01, 1.7183e-02],
         [4.1394e+01, 1.3380e-01, 3.9146e-01, 2.4978e-02],
         [3.9741e+01, 1.2034e-01, 1.0387e-01, 1.3306e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3428e+01, 1.3517e-03, 9.7097e-02, 1.1173e-01],
         [5.0992e+01, 4.4401e-08, 1.6717e-01, 1.4276e-01],
         ...,
         [4.0994e+01, 1.3919e-01, 3.2549e-01, 4.3984e-02],
         [4.0126e+01, 1.0072e-01, 3.6351e-01, 2.2840e-02],
         [4.2006e+01, 1.3110e-01, 2.3


Train Diffusion:  44%|████▍     | 881/2000 [38:35<43:00,  2.31s/it][A
Train Diffusion:  44%|████▍     | 882/2000 [38:37<42:48,  2.30s/it][A
Train Diffusion:  44%|████▍     | 883/2000 [38:39<42:45,  2.30s/it][A
Train Diffusion:  44%|████▍     | 884/2000 [38:42<42:37,  2.29s/it][A
Train Diffusion:  44%|████▍     | 885/2000 [38:44<42:34,  2.29s/it][A
Train Diffusion:  44%|████▍     | 886/2000 [38:46<42:33,  2.29s/it][A
Train Diffusion:  44%|████▍     | 887/2000 [38:48<42:30,  2.29s/it][A
Train Diffusion:  44%|████▍     | 888/2000 [38:51<42:24,  2.29s/it][A
Train Diffusion:  44%|████▍     | 889/2000 [38:53<42:27,  2.29s/it][A
Train Diffusion:  44%|████▍     | 890/2000 [38:55<42:20,  2.29s/it][A

Moving average ELBO loss at 890 iterations is: -25553.7498046875. Best ELBO loss value is: -27015.587890625.

C_PATH mean = tensor([[4.4238e+01, 1.7245e-01, 4.8095e-01, 4.0109e-02],
        [4.4263e+01, 1.7547e-01, 4.8478e-01, 3.9486e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2672e+01, 1.1605e-02, 2.1247e-01, 3.6314e-02],
         [5.0599e+01, 4.4804e-08, 1.8970e-01, 2.0009e-02],
         ...,
         [4.0842e+01, 1.3636e-01, 3.4410e-01, 2.6508e-02],
         [3.9971e+01, 1.4400e-01, 2.2681e-01, 1.7937e-02],
         [3.8794e+01, 1.4942e-01, 6.6495e-02, 1.5006e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3303e+01, 8.8879e-03, 6.7991e-02, 1.3452e-01],
         [5.2330e+01, 3.0558e-10, 1.3948e-01, 1.3584e-01],
         ...,
         [4.0912e+01, 1.4298e-01, 5.9351e-01, 2.8323e-02],
         [4.1698e+01, 1.2687e-01, 5.6567e-01, 2.8950e-02],
         [4.3003e+01, 1.0703e-01, 3.2


Train Diffusion:  45%|████▍     | 891/2000 [38:58<42:13,  2.28s/it][A
Train Diffusion:  45%|████▍     | 892/2000 [39:00<42:07,  2.28s/it][A
Train Diffusion:  45%|████▍     | 893/2000 [39:02<42:07,  2.28s/it][A
Train Diffusion:  45%|████▍     | 894/2000 [39:04<42:15,  2.29s/it][A
Train Diffusion:  45%|████▍     | 895/2000 [39:07<42:19,  2.30s/it][A
Train Diffusion:  45%|████▍     | 896/2000 [39:09<42:11,  2.29s/it][A
Train Diffusion:  45%|████▍     | 897/2000 [39:11<42:04,  2.29s/it][A
Train Diffusion:  45%|████▍     | 898/2000 [39:14<41:59,  2.29s/it][A
Train Diffusion:  45%|████▍     | 899/2000 [39:16<41:58,  2.29s/it][A
Train Diffusion:  45%|████▌     | 900/2000 [39:18<41:52,  2.28s/it][A

Moving average ELBO loss at 900 iterations is: -26507.123046875. Best ELBO loss value is: -27231.37109375.

C_PATH mean = tensor([[4.4250e+01, 1.7731e-01, 4.8964e-01, 4.0891e-02],
        [4.4228e+01, 1.7670e-01, 4.9454e-01, 4.1067e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.1558e+01, 1.9609e-03, 5.6474e-02, 1.5975e-01],
         [5.0443e+01, 1.9796e-10, 1.3529e-01, 9.4832e-02],
         ...,
         [4.1151e+01, 1.3766e-01, 7.7210e-01, 1.9733e-02],
         [4.0289e+01, 1.0062e-01, 5.8398e-01, 1.6343e-02],
         [4.1303e+01, 7.8088e-02, 3.0424e-01, 1.0045e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2382e+01, 1.9103e-02, 2.4588e-01, 3.5886e-02],
         [5.1945e+01, 6.9190e-08, 2.2497e-01, 4.3812e-02],
         ...,
         [4.0721e+01, 1.0075e-01, 1.7743e-01, 3.7073e-02],
         [4.1445e+01, 1.3534e-01, 1.8125e-01, 3.5089e-02],
         [3.9389e+01, 1.6250e-01, 8.824


Train Diffusion:  45%|████▌     | 901/2000 [39:20<41:48,  2.28s/it][A
Train Diffusion:  45%|████▌     | 902/2000 [39:23<41:47,  2.28s/it][A
Train Diffusion:  45%|████▌     | 903/2000 [39:25<41:46,  2.29s/it][A
Train Diffusion:  45%|████▌     | 904/2000 [39:27<41:41,  2.28s/it][A
Train Diffusion:  45%|████▌     | 905/2000 [39:30<41:40,  2.28s/it][A
Train Diffusion:  45%|████▌     | 906/2000 [39:32<41:38,  2.28s/it][A
Train Diffusion:  45%|████▌     | 907/2000 [39:34<41:35,  2.28s/it][A
Train Diffusion:  45%|████▌     | 908/2000 [39:36<41:35,  2.29s/it][A
Train Diffusion:  45%|████▌     | 909/2000 [39:39<42:06,  2.32s/it][A
Train Diffusion:  46%|████▌     | 910/2000 [39:41<41:53,  2.31s/it][A

Moving average ELBO loss at 910 iterations is: -26656.430078125. Best ELBO loss value is: -27231.37109375.

C_PATH mean = tensor([[4.4368e+01, 1.7788e-01, 4.7208e-01, 3.6978e-02],
        [4.4374e+01, 1.7717e-01, 4.8785e-01, 3.7528e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2320e+01, 8.4520e-03, 1.3097e-01, 5.1362e-02],
         [5.0879e+01, 1.7130e-07, 1.0789e-01, 2.0156e-02],
         ...,
         [4.1089e+01, 6.1888e-02, 3.5245e-01, 2.0888e-02],
         [4.0345e+01, 8.0922e-02, 3.8561e-01, 1.3175e-02],
         [4.2350e+01, 1.8120e-01, 1.4248e-01, 1.1912e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.1577e+01, 8.5428e-02, 1.7040e-01, 9.1958e-02],
         [5.1354e+01, 2.1555e-10, 2.9570e-01, 1.1490e-01],
         ...,
         [4.1155e+01, 1.6759e-01, 5.8749e-01, 3.2933e-02],
         [4.2067e+01, 1.6942e-01, 3.4905e-01, 3.1715e-02],
         [4.0594e+01, 1.2690e-01, 2.178


Train Diffusion:  46%|████▌     | 911/2000 [39:43<41:44,  2.30s/it][A
Train Diffusion:  46%|████▌     | 912/2000 [39:46<41:41,  2.30s/it][A
Train Diffusion:  46%|████▌     | 913/2000 [39:48<41:32,  2.29s/it][A
Train Diffusion:  46%|████▌     | 914/2000 [39:50<41:23,  2.29s/it][A
Train Diffusion:  46%|████▌     | 915/2000 [39:53<41:19,  2.29s/it][A
Train Diffusion:  46%|████▌     | 916/2000 [39:55<41:14,  2.28s/it][A
Train Diffusion:  46%|████▌     | 917/2000 [39:57<41:14,  2.28s/it][A
Train Diffusion:  46%|████▌     | 918/2000 [39:59<41:07,  2.28s/it][A
Train Diffusion:  46%|████▌     | 919/2000 [40:02<41:09,  2.28s/it][A
Train Diffusion:  46%|████▌     | 920/2000 [40:04<41:06,  2.28s/it][A

Moving average ELBO loss at 920 iterations is: -26690.0017578125. Best ELBO loss value is: -27283.4765625.

C_PATH mean = tensor([[4.4212e+01, 1.7200e-01, 5.7176e-01, 3.7777e-02],
        [4.4147e+01, 1.6288e-01, 5.7792e-01, 3.6715e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.5161e+01, 2.1390e-05, 1.5038e-01, 2.0260e-01],
         [5.2980e+01, 2.6489e-09, 2.7840e-01, 1.1483e-01],
         ...,
         [4.0891e+01, 1.4463e-01, 6.2886e-01, 4.8757e-02],
         [4.1289e+01, 1.3271e-01, 4.8067e-01, 2.3392e-02],
         [4.0149e+01, 1.1910e-01, 3.8245e-01, 1.1998e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.5132e+01, 2.1354e-05, 1.2270e-01, 3.5054e-02],
         [5.1611e+01, 1.0041e-10, 9.7067e-02, 2.3703e-02],
         ...,
         [4.0749e+01, 1.0441e-01, 5.3060e-01, 1.5769e-02],
         [4.0011e+01, 1.1837e-01, 4.3369e-01, 2.0842e-02],
         [3.6780e+01, 1.2422e-01, 1.632


Train Diffusion:  46%|████▌     | 921/2000 [40:06<41:03,  2.28s/it][A
Train Diffusion:  46%|████▌     | 922/2000 [40:08<40:58,  2.28s/it][A
Train Diffusion:  46%|████▌     | 923/2000 [40:11<41:19,  2.30s/it][A
Train Diffusion:  46%|████▌     | 924/2000 [40:13<41:12,  2.30s/it][A
Train Diffusion:  46%|████▋     | 925/2000 [40:15<41:06,  2.29s/it][A
Train Diffusion:  46%|████▋     | 926/2000 [40:18<41:00,  2.29s/it][A
Train Diffusion:  46%|████▋     | 927/2000 [40:20<40:51,  2.28s/it][A
Train Diffusion:  46%|████▋     | 928/2000 [40:22<40:49,  2.29s/it][A
Train Diffusion:  46%|████▋     | 929/2000 [40:25<40:57,  2.29s/it][A
Train Diffusion:  46%|████▋     | 930/2000 [40:27<40:58,  2.30s/it][A

Moving average ELBO loss at 930 iterations is: -27664.966796875. Best ELBO loss value is: -28568.43359375.

C_PATH mean = tensor([[4.4524e+01, 1.7726e-01, 4.3168e-01, 4.0812e-02],
        [4.4460e+01, 1.7039e-01, 4.2894e-01, 4.0779e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [4.8349e+01, 5.8935e-03, 1.2702e-01, 5.7770e-02],
         [4.8734e+01, 5.4311e-07, 1.0809e-01, 2.0011e-02],
         ...,
         [4.1398e+01, 1.8412e-01, 6.1016e-01, 3.2391e-02],
         [4.2479e+01, 1.2472e-01, 4.6800e-01, 3.2341e-02],
         [4.0209e+01, 1.7760e-01, 1.6359e-01, 1.5385e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.0040e+01, 2.7919e-02, 1.6091e-01, 8.3278e-02],
         [5.0171e+01, 5.4709e-10, 2.9729e-01, 1.3397e-01],
         ...,
         [4.1336e+01, 5.3123e-02, 1.7747e-01, 2.5582e-02],
         [4.1022e+01, 1.0673e-01, 1.6672e-01, 1.5270e-02],
         [4.2165e+01, 8.4887e-02, 1.999


Train Diffusion:  47%|████▋     | 931/2000 [40:29<40:55,  2.30s/it][A
Train Diffusion:  47%|████▋     | 932/2000 [40:32<43:04,  2.42s/it][A
Train Diffusion:  47%|████▋     | 933/2000 [40:35<45:59,  2.59s/it][A
Train Diffusion:  47%|████▋     | 934/2000 [40:38<47:11,  2.66s/it][A
Train Diffusion:  47%|████▋     | 935/2000 [40:40<46:23,  2.61s/it][A
Train Diffusion:  47%|████▋     | 936/2000 [40:43<45:20,  2.56s/it][A
Train Diffusion:  47%|████▋     | 937/2000 [40:45<44:18,  2.50s/it][A
Train Diffusion:  47%|████▋     | 938/2000 [40:47<43:29,  2.46s/it][A
Train Diffusion:  47%|████▋     | 939/2000 [40:50<42:59,  2.43s/it][A
Train Diffusion:  47%|████▋     | 940/2000 [40:52<42:36,  2.41s/it][A

Moving average ELBO loss at 940 iterations is: -28136.14375. Best ELBO loss value is: -28744.513671875.

C_PATH mean = tensor([[4.4525e+01, 1.8540e-01, 4.4905e-01, 3.3886e-02],
        [4.4629e+01, 1.7636e-01, 4.4371e-01, 3.4685e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [4.9747e+01, 1.5368e-02, 1.4515e-01, 9.6695e-02],
         [4.9445e+01, 4.9562e-07, 1.0193e-01, 9.5315e-02],
         ...,
         [4.1597e+01, 1.4521e-01, 6.6910e-01, 2.3590e-02],
         [4.0900e+01, 1.6373e-01, 5.2259e-01, 2.6392e-02],
         [4.3474e+01, 1.6874e-01, 2.4878e-01, 1.6684e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.1092e+01, 1.2921e-01, 1.6217e-01, 4.9595e-02],
         [5.1730e+01, 3.7043e-10, 2.7383e-01, 2.3086e-02],
         ...,
         [4.1011e+01, 7.5185e-02, 1.1489e-01, 2.1773e-02],
         [4.2207e+01, 8.3012e-02, 1.2512e-01, 1.3332e-02],
         [4.1231e+01, 8.7007e-02, 7.2690e-


Train Diffusion:  47%|████▋     | 941/2000 [40:54<42:32,  2.41s/it][A
Train Diffusion:  47%|████▋     | 942/2000 [40:57<42:15,  2.40s/it][A
Train Diffusion:  47%|████▋     | 943/2000 [40:59<42:03,  2.39s/it][A
Train Diffusion:  47%|████▋     | 944/2000 [41:02<41:59,  2.39s/it][A
Train Diffusion:  47%|████▋     | 945/2000 [41:04<42:21,  2.41s/it][A
Train Diffusion:  47%|████▋     | 946/2000 [41:07<43:02,  2.45s/it][A
Train Diffusion:  47%|████▋     | 947/2000 [41:09<42:31,  2.42s/it][A
Train Diffusion:  47%|████▋     | 948/2000 [41:11<42:42,  2.44s/it][A
Train Diffusion:  47%|████▋     | 949/2000 [41:14<44:01,  2.51s/it][A
Train Diffusion:  48%|████▊     | 950/2000 [41:17<44:17,  2.53s/it][A

Moving average ELBO loss at 950 iterations is: -28205.128515625. Best ELBO loss value is: -28744.513671875.

C_PATH mean = tensor([[4.4566e+01, 1.5932e-01, 5.0857e-01, 3.5695e-02],
        [4.4538e+01, 1.5998e-01, 4.9036e-01, 3.7141e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3729e+01, 7.4680e-02, 5.9768e-02, 5.5851e-02],
         [5.1574e+01, 7.5022e-07, 3.8324e-02, 1.9353e-02],
         ...,
         [4.1529e+01, 1.2012e-01, 6.0782e-01, 1.9678e-02],
         [4.2335e+01, 1.0572e-01, 5.3526e-01, 1.4413e-02],
         [4.3619e+01, 1.0196e-01, 2.0509e-01, 8.7062e-03]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2428e+01, 1.6718e-03, 1.9620e-01, 6.8377e-02],
         [5.2319e+01, 2.0075e-10, 3.3050e-01, 1.0346e-01],
         ...,
         [4.0940e+01, 1.2686e-01, 4.1725e-01, 3.5656e-02],
         [4.0363e+01, 1.3096e-01, 3.4329e-01, 3.7554e-02],
         [3.9510e+01, 1.3753e-01, 1.80


Train Diffusion:  48%|████▊     | 951/2000 [41:19<45:16,  2.59s/it][A
Train Diffusion:  48%|████▊     | 952/2000 [41:22<46:45,  2.68s/it][A
Train Diffusion:  48%|████▊     | 953/2000 [41:25<47:24,  2.72s/it][A
Train Diffusion:  48%|████▊     | 954/2000 [41:28<46:49,  2.69s/it][A
Train Diffusion:  48%|████▊     | 955/2000 [41:30<46:54,  2.69s/it][A
Train Diffusion:  48%|████▊     | 956/2000 [41:33<47:25,  2.73s/it][A
Train Diffusion:  48%|████▊     | 957/2000 [41:36<49:08,  2.83s/it][A
Train Diffusion:  48%|████▊     | 958/2000 [41:39<48:55,  2.82s/it][A
Train Diffusion:  48%|████▊     | 959/2000 [41:43<54:02,  3.11s/it][A
Train Diffusion:  48%|████▊     | 960/2000 [41:46<55:22,  3.19s/it][A

Moving average ELBO loss at 960 iterations is: -28463.6025390625. Best ELBO loss value is: -29500.390625.

C_PATH mean = tensor([[4.4523e+01, 1.7360e-01, 5.2741e-01, 3.7884e-02],
        [4.4505e+01, 1.7138e-01, 5.2193e-01, 3.8625e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2103e+01, 2.8421e-02, 1.3900e-01, 4.3792e-02],
         [5.0542e+01, 5.7221e-10, 2.1140e-01, 4.2059e-02],
         ...,
         [4.1024e+01, 9.6368e-02, 5.7871e-01, 1.3544e-02],
         [4.1902e+01, 1.2875e-01, 4.5747e-01, 2.1541e-02],
         [4.2757e+01, 1.6208e-01, 1.8366e-01, 1.2301e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3418e+01, 6.8145e-04, 1.3871e-01, 1.1419e-01],
         [5.2524e+01, 3.8728e-07, 1.2853e-01, 7.9755e-02],
         ...,
         [4.1287e+01, 9.3535e-02, 4.0794e-01, 4.2815e-02],
         [4.0356e+01, 7.4858e-02, 3.4145e-01, 2.2411e-02],
         [3.9031e+01, 6.1756e-02, 1.8291


Train Diffusion:  48%|████▊     | 961/2000 [41:49<55:00,  3.18s/it][A
Train Diffusion:  48%|████▊     | 962/2000 [41:59<1:26:35,  5.01s/it][A
Train Diffusion:  48%|████▊     | 963/2000 [42:02<1:16:36,  4.43s/it][A
Train Diffusion:  48%|████▊     | 964/2000 [42:04<1:06:58,  3.88s/it][A
Train Diffusion:  48%|████▊     | 965/2000 [42:07<1:00:22,  3.50s/it][A
Train Diffusion:  48%|████▊     | 966/2000 [42:10<55:43,  3.23s/it]  [A
Train Diffusion:  48%|████▊     | 967/2000 [42:12<51:12,  2.97s/it][A
Train Diffusion:  48%|████▊     | 968/2000 [42:14<48:07,  2.80s/it][A
Train Diffusion:  48%|████▊     | 969/2000 [42:17<45:50,  2.67s/it][A
Train Diffusion:  48%|████▊     | 970/2000 [42:19<44:12,  2.58s/it][A

Moving average ELBO loss at 970 iterations is: -29092.6306640625. Best ELBO loss value is: -29500.390625.

C_PATH mean = tensor([[4.4656e+01, 1.7034e-01, 5.2780e-01, 3.5596e-02],
        [4.4555e+01, 1.6504e-01, 5.0079e-01, 3.6107e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.4589e+01, 1.9881e-02, 1.6041e-01, 4.9082e-02],
         [5.2811e+01, 2.2506e-06, 1.3459e-01, 2.4382e-02],
         ...,
         [4.0926e+01, 1.1831e-01, 7.2064e-01, 1.8061e-02],
         [4.1812e+01, 1.7457e-01, 5.2552e-01, 2.3309e-02],
         [4.2224e+01, 1.3253e-01, 1.7382e-01, 1.1339e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.5045e+01, 7.1987e-04, 1.1187e-01, 1.0140e-01],
         [5.2067e+01, 5.2433e-10, 2.2076e-01, 8.2148e-02],
         ...,
         [4.1454e+01, 1.4439e-01, 2.9353e-01, 3.7566e-02],
         [4.0700e+01, 9.4703e-02, 3.1893e-01, 2.2486e-02],
         [3.8910e+01, 1.3515e-01, 2.7080


Train Diffusion:  49%|████▊     | 971/2000 [42:22<43:27,  2.53s/it][A
Train Diffusion:  49%|████▊     | 972/2000 [42:24<42:34,  2.48s/it][A
Train Diffusion:  49%|████▊     | 973/2000 [42:26<42:21,  2.47s/it][A
Train Diffusion:  49%|████▊     | 974/2000 [42:29<41:51,  2.45s/it][A
Train Diffusion:  49%|████▉     | 975/2000 [42:31<41:30,  2.43s/it][A
Train Diffusion:  49%|████▉     | 976/2000 [42:34<41:14,  2.42s/it][A
Train Diffusion:  49%|████▉     | 977/2000 [42:36<41:04,  2.41s/it][A
Train Diffusion:  49%|████▉     | 978/2000 [42:38<40:57,  2.40s/it][A
Train Diffusion:  49%|████▉     | 979/2000 [42:41<40:41,  2.39s/it][A
Train Diffusion:  49%|████▉     | 980/2000 [42:43<40:34,  2.39s/it][A

Moving average ELBO loss at 980 iterations is: -28602.4845703125. Best ELBO loss value is: -29865.53515625.

C_PATH mean = tensor([[4.4651e+01, 1.6598e-01, 5.2067e-01, 3.7728e-02],
        [4.4608e+01, 1.6400e-01, 5.1140e-01, 3.7332e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.4024e+01, 5.8286e-03, 7.6692e-02, 7.6412e-02],
         [5.2728e+01, 7.5558e-07, 5.0184e-02, 6.9058e-02],
         ...,
         [4.1000e+01, 1.0770e-01, 4.9632e-01, 2.3356e-02],
         [4.1932e+01, 1.2354e-01, 3.1028e-01, 2.6145e-02],
         [4.2187e+01, 1.0386e-01, 1.9794e-01, 1.4486e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3487e+01, 2.6982e-04, 2.4549e-01, 6.6906e-02],
         [5.0659e+01, 2.5519e-10, 4.1680e-01, 2.8589e-02],
         ...,
         [4.1468e+01, 1.1301e-01, 4.7311e-01, 3.4969e-02],
         [4.0833e+01, 9.1164e-02, 5.5611e-01, 2.0416e-02],
         [3.8952e+01, 1.4211e-01, 2.43


Train Diffusion:  49%|████▉     | 981/2000 [42:45<40:46,  2.40s/it][A
Train Diffusion:  49%|████▉     | 982/2000 [42:48<40:32,  2.39s/it][A
Train Diffusion:  49%|████▉     | 983/2000 [42:50<40:26,  2.39s/it][A
Train Diffusion:  49%|████▉     | 984/2000 [42:53<40:22,  2.38s/it][A
Train Diffusion:  49%|████▉     | 985/2000 [42:55<40:12,  2.38s/it][A
Train Diffusion:  49%|████▉     | 986/2000 [42:57<40:10,  2.38s/it][A
Train Diffusion:  49%|████▉     | 987/2000 [43:00<40:05,  2.37s/it][A
Train Diffusion:  49%|████▉     | 988/2000 [43:02<40:06,  2.38s/it][A
Train Diffusion:  49%|████▉     | 989/2000 [43:04<39:59,  2.37s/it][A
Train Diffusion:  50%|████▉     | 990/2000 [43:07<39:50,  2.37s/it][A

Moving average ELBO loss at 990 iterations is: -25836.56953125. Best ELBO loss value is: -29865.53515625.

C_PATH mean = tensor([[4.4592e+01, 1.7343e-01, 5.3158e-01, 3.9807e-02],
        [4.4585e+01, 1.7777e-01, 5.0092e-01, 3.9824e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2238e+01, 1.0259e-02, 1.4681e-01, 3.3964e-02],
         [5.0908e+01, 3.3230e-10, 1.2867e-01, 3.2707e-02],
         ...,
         [4.1316e+01, 1.2157e-01, 6.6001e-01, 4.3698e-02],
         [4.2043e+01, 9.8037e-02, 4.2319e-01, 2.5483e-02],
         [4.0696e+01, 7.6197e-02, 1.4324e-01, 1.6020e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.0978e+01, 1.2038e-03, 1.4112e-01, 1.2177e-01],
         [5.1324e+01, 7.7875e-07, 2.2590e-01, 8.6885e-02],
         ...,
         [4.1044e+01, 1.1387e-01, 3.8059e-01, 1.8248e-02],
         [4.0407e+01, 1.3432e-01, 4.1111e-01, 2.5274e-02],
         [4.1953e+01, 1.6389e-01, 2.3716


Train Diffusion:  50%|████▉     | 991/2000 [43:09<40:03,  2.38s/it][A
Train Diffusion:  50%|████▉     | 992/2000 [43:12<39:52,  2.37s/it][A
Train Diffusion:  50%|████▉     | 993/2000 [43:14<39:44,  2.37s/it][A
Train Diffusion:  50%|████▉     | 994/2000 [43:16<39:40,  2.37s/it][A
Train Diffusion:  50%|████▉     | 995/2000 [43:19<39:35,  2.36s/it][A
Train Diffusion:  50%|████▉     | 996/2000 [43:21<39:34,  2.37s/it][A
Train Diffusion:  50%|████▉     | 997/2000 [43:23<39:32,  2.37s/it][A
Train Diffusion:  50%|████▉     | 998/2000 [43:26<39:29,  2.36s/it][A
Train Diffusion:  50%|████▉     | 999/2000 [43:28<39:43,  2.38s/it][A
Train Diffusion:  50%|█████     | 1000/2000 [43:31<39:45,  2.39s/it][A

Moving average ELBO loss at 1000 iterations is: -29101.6416015625. Best ELBO loss value is: -29877.7578125.

C_PATH mean = tensor([[4.4660e+01, 1.6897e-01, 5.1635e-01, 4.1308e-02],
        [4.4608e+01, 1.7085e-01, 5.0271e-01, 4.0535e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.1429e+01, 1.6571e-03, 1.6008e-01, 4.6165e-02],
         [5.1660e+01, 2.6962e-10, 3.0028e-01, 3.1630e-02],
         ...,
         [4.1443e+01, 8.9431e-02, 7.1682e-01, 3.7328e-02],
         [4.0711e+01, 1.2131e-01, 5.2675e-01, 3.4193e-02],
         [4.0134e+01, 9.8019e-02, 1.5688e-01, 1.3749e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2410e+01, 4.2453e-02, 1.3094e-01, 9.6404e-02],
         [5.1058e+01, 1.2713e-06, 1.0906e-01, 7.7673e-02],
         ...,
         [4.0979e+01, 1.6034e-01, 2.8444e-01, 2.0377e-02],
         [4.1729e+01, 1.3321e-01, 3.1787e-01, 1.4160e-02],
         [4.3200e+01, 1.6474e-01, 2.06


Train Diffusion:  50%|█████     | 1001/2000 [43:33<39:48,  2.39s/it][A
Train Diffusion:  50%|█████     | 1002/2000 [43:35<39:40,  2.39s/it][A
Train Diffusion:  50%|█████     | 1003/2000 [43:38<39:35,  2.38s/it][A
Train Diffusion:  50%|█████     | 1004/2000 [43:40<39:37,  2.39s/it][A
Train Diffusion:  50%|█████     | 1005/2000 [43:42<39:30,  2.38s/it][A
Train Diffusion:  50%|█████     | 1006/2000 [43:45<39:31,  2.39s/it][A
Train Diffusion:  50%|█████     | 1007/2000 [43:47<39:23,  2.38s/it][A
Train Diffusion:  50%|█████     | 1008/2000 [43:50<39:20,  2.38s/it][A
Train Diffusion:  50%|█████     | 1009/2000 [43:52<39:17,  2.38s/it][A
Train Diffusion:  50%|█████     | 1010/2000 [43:54<39:20,  2.38s/it][A

Moving average ELBO loss at 1010 iterations is: -29744.821875. Best ELBO loss value is: -30773.47265625.

C_PATH mean = tensor([[4.4638e+01, 1.8883e-01, 5.1345e-01, 3.7972e-02],
        [4.4654e+01, 1.7957e-01, 4.9977e-01, 3.7325e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.1905e+01, 5.4188e-02, 8.1075e-02, 5.5124e-02],
         [5.1890e+01, 1.7684e-06, 1.4126e-01, 1.8367e-02],
         ...,
         [4.1123e+01, 5.2759e-02, 6.9005e-01, 2.3445e-02],
         [4.1877e+01, 1.4146e-01, 5.2066e-01, 1.4868e-02],
         [4.0849e+01, 1.2508e-01, 1.9718e-01, 1.0428e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2402e+01, 9.1644e-03, 2.7522e-01, 1.0304e-01],
         [5.1240e+01, 1.9617e-09, 2.5737e-01, 1.2302e-01],
         ...,
         [4.1421e+01, 2.1910e-01, 2.7562e-01, 3.0646e-02],
         [4.0706e+01, 1.5777e-01, 3.0074e-01, 3.2501e-02],
         [4.2166e+01, 1.7414e-01, 2.6281e


Train Diffusion:  51%|█████     | 1011/2000 [43:57<39:25,  2.39s/it][A
Train Diffusion:  51%|█████     | 1012/2000 [43:59<39:17,  2.39s/it][A
Train Diffusion:  51%|█████     | 1013/2000 [44:02<39:13,  2.38s/it][A
Train Diffusion:  51%|█████     | 1014/2000 [44:04<39:03,  2.38s/it][A
Train Diffusion:  51%|█████     | 1015/2000 [44:06<38:54,  2.37s/it][A
Train Diffusion:  51%|█████     | 1016/2000 [44:09<38:48,  2.37s/it][A
Train Diffusion:  51%|█████     | 1017/2000 [44:11<38:42,  2.36s/it][A
Train Diffusion:  51%|█████     | 1018/2000 [44:13<38:39,  2.36s/it][A
Train Diffusion:  51%|█████     | 1019/2000 [44:16<38:38,  2.36s/it][A
Train Diffusion:  51%|█████     | 1020/2000 [44:18<38:32,  2.36s/it][A

Moving average ELBO loss at 1020 iterations is: -30350.871484375. Best ELBO loss value is: -30949.27734375.

C_PATH mean = tensor([[4.4700e+01, 1.7205e-01, 5.1734e-01, 4.2519e-02],
        [4.4647e+01, 1.7177e-01, 5.2136e-01, 4.2700e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2011e+01, 6.8819e-03, 1.6268e-01, 1.0347e-01],
         [5.1695e+01, 3.2610e-06, 3.0621e-01, 7.3513e-02],
         ...,
         [4.1560e+01, 5.1182e-02, 4.3297e-01, 2.4858e-02],
         [4.0811e+01, 1.0491e-01, 3.3663e-01, 1.7301e-02],
         [4.0082e+01, 9.2107e-02, 1.8408e-01, 1.0878e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2808e+01, 1.6075e-01, 1.2871e-01, 4.9614e-02],
         [5.1085e+01, 7.2840e-09, 1.0819e-01, 4.4113e-02],
         ...,
         [4.1079e+01, 1.9700e-01, 6.0585e-01, 3.2137e-02],
         [4.1721e+01, 1.3785e-01, 5.0454e-01, 2.9111e-02],
         [4.2880e+01, 1.6848e-01, 2.04


Train Diffusion:  51%|█████     | 1021/2000 [44:20<38:41,  2.37s/it][A
Train Diffusion:  51%|█████     | 1022/2000 [44:23<38:36,  2.37s/it][A
Train Diffusion:  51%|█████     | 1023/2000 [44:25<38:49,  2.38s/it][A
Train Diffusion:  51%|█████     | 1024/2000 [44:28<38:51,  2.39s/it][A
Train Diffusion:  51%|█████▏    | 1025/2000 [44:30<38:50,  2.39s/it][A
Train Diffusion:  51%|█████▏    | 1026/2000 [44:32<38:41,  2.38s/it][A
Train Diffusion:  51%|█████▏    | 1027/2000 [44:35<38:33,  2.38s/it][A
Train Diffusion:  51%|█████▏    | 1028/2000 [44:37<38:28,  2.37s/it][A
Train Diffusion:  51%|█████▏    | 1029/2000 [44:40<38:28,  2.38s/it][A
Train Diffusion:  52%|█████▏    | 1030/2000 [44:42<38:24,  2.38s/it][A

Moving average ELBO loss at 1030 iterations is: -27374.854296875. Best ELBO loss value is: -30949.27734375.

C_PATH mean = tensor([[4.4689e+01, 1.8175e-01, 5.1572e-01, 4.1269e-02],
        [4.4676e+01, 1.8264e-01, 5.3578e-01, 3.8039e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.1653e+01, 7.0386e-03, 2.5961e-01, 3.5959e-02],
         [5.0564e+01, 2.2104e-09, 2.5886e-01, 2.9123e-02],
         ...,
         [4.1462e+01, 1.1189e-01, 7.1207e-01, 2.4709e-02],
         [4.0729e+01, 9.2410e-02, 5.7376e-01, 2.9358e-02],
         [3.9770e+01, 1.4235e-01, 1.9000e-01, 1.3500e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2522e+01, 1.4408e-01, 7.9702e-02, 1.3779e-01],
         [5.2409e+01, 8.9010e-06, 1.3972e-01, 9.5996e-02],
         ...,
         [4.1191e+01, 1.4380e-01, 3.3257e-01, 2.3826e-02],
         [4.1720e+01, 1.5474e-01, 2.4765e-01, 1.5393e-02],
         [4.2449e+01, 1.1604e-01, 2.19


Train Diffusion:  52%|█████▏    | 1031/2000 [44:44<38:35,  2.39s/it][A
Train Diffusion:  52%|█████▏    | 1032/2000 [44:47<38:27,  2.38s/it][A
Train Diffusion:  52%|█████▏    | 1033/2000 [44:49<38:18,  2.38s/it][A
Train Diffusion:  52%|█████▏    | 1034/2000 [44:51<38:11,  2.37s/it][A
Train Diffusion:  52%|█████▏    | 1035/2000 [44:54<38:07,  2.37s/it][A
Train Diffusion:  52%|█████▏    | 1036/2000 [44:56<38:00,  2.37s/it][A
Train Diffusion:  52%|█████▏    | 1037/2000 [44:58<37:57,  2.37s/it][A
Train Diffusion:  52%|█████▏    | 1038/2000 [45:01<37:57,  2.37s/it][A
Train Diffusion:  52%|█████▏    | 1039/2000 [45:03<37:53,  2.37s/it][A
Train Diffusion:  52%|█████▏    | 1040/2000 [45:06<37:47,  2.36s/it][A

Moving average ELBO loss at 1040 iterations is: -26959.998046875. Best ELBO loss value is: -30949.27734375.

C_PATH mean = tensor([[4.4692e+01, 1.7036e-01, 5.2754e-01, 3.9921e-02],
        [4.4719e+01, 1.7564e-01, 5.4030e-01, 3.9584e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2122e+01, 1.7231e-01, 1.9115e-01, 1.1561e-01],
         [5.0324e+01, 1.0772e-05, 3.5410e-01, 7.6143e-02],
         ...,
         [4.1604e+01, 6.4171e-02, 4.1139e-01, 3.5693e-02],
         [4.2353e+01, 8.4551e-02, 4.1385e-01, 3.1892e-02],
         [4.3386e+01, 9.8242e-02, 2.3347e-01, 1.3858e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3073e+01, 8.6277e-03, 1.3783e-01, 3.4937e-02],
         [5.2028e+01, 2.4665e-09, 1.0665e-01, 3.4752e-02],
         ...,
         [4.1319e+01, 1.9192e-01, 6.5468e-01, 1.7741e-02],
         [4.0488e+01, 1.7880e-01, 4.8716e-01, 1.1756e-02],
         [4.0204e+01, 1.7556e-01, 1.55


Train Diffusion:  52%|█████▏    | 1041/2000 [45:08<37:55,  2.37s/it][A
Train Diffusion:  52%|█████▏    | 1042/2000 [45:10<37:46,  2.37s/it][A
Train Diffusion:  52%|█████▏    | 1043/2000 [45:13<37:44,  2.37s/it][A
Train Diffusion:  52%|█████▏    | 1044/2000 [45:15<37:39,  2.36s/it][A
Train Diffusion:  52%|█████▏    | 1045/2000 [45:17<37:35,  2.36s/it][A
Train Diffusion:  52%|█████▏    | 1046/2000 [45:20<37:30,  2.36s/it][A
Train Diffusion:  52%|█████▏    | 1047/2000 [45:22<37:29,  2.36s/it][A
Train Diffusion:  52%|█████▏    | 1048/2000 [45:25<38:07,  2.40s/it][A
Train Diffusion:  52%|█████▏    | 1049/2000 [45:27<37:41,  2.38s/it][A
Train Diffusion:  52%|█████▎    | 1050/2000 [45:29<37:20,  2.36s/it][A

Moving average ELBO loss at 1050 iterations is: -25748.013671875. Best ELBO loss value is: -30949.27734375.

C_PATH mean = tensor([[4.4735e+01, 1.7004e-01, 5.2408e-01, 3.8337e-02],
        [4.4736e+01, 1.7041e-01, 5.3827e-01, 3.8302e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3349e+01, 3.2383e-02, 1.1660e-01, 7.5495e-02],
         [5.2318e+01, 3.8600e-09, 1.7891e-01, 8.4813e-02],
         ...,
         [4.1553e+01, 1.4339e-01, 5.2128e-01, 3.8640e-02],
         [4.0814e+01, 1.3973e-01, 4.3382e-01, 2.1206e-02],
         [4.1659e+01, 1.4656e-01, 1.9400e-01, 1.0351e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3910e+01, 7.8643e-03, 1.6172e-01, 8.3205e-02],
         [5.2085e+01, 4.2322e-06, 1.5411e-01, 2.6435e-02],
         ...,
         [4.1305e+01, 1.2367e-01, 5.5488e-01, 1.4406e-02],
         [4.2190e+01, 1.1703e-01, 4.5033e-01, 2.1467e-02],
         [4.0808e+01, 1.0690e-01, 2.28


Train Diffusion:  53%|█████▎    | 1051/2000 [45:32<37:07,  2.35s/it][A
Train Diffusion:  53%|█████▎    | 1052/2000 [45:34<37:07,  2.35s/it][A
Train Diffusion:  53%|█████▎    | 1053/2000 [45:36<36:53,  2.34s/it][A
Train Diffusion:  53%|█████▎    | 1054/2000 [45:39<36:43,  2.33s/it][A
Train Diffusion:  53%|█████▎    | 1055/2000 [45:41<36:42,  2.33s/it][A
Train Diffusion:  53%|█████▎    | 1056/2000 [45:43<36:36,  2.33s/it][A
Train Diffusion:  53%|█████▎    | 1057/2000 [45:46<36:32,  2.33s/it][A
Train Diffusion:  53%|█████▎    | 1058/2000 [45:48<36:27,  2.32s/it][A
Train Diffusion:  53%|█████▎    | 1059/2000 [45:50<36:23,  2.32s/it][A
Train Diffusion:  53%|█████▎    | 1060/2000 [45:52<36:26,  2.33s/it][A

Moving average ELBO loss at 1060 iterations is: -27282.533984375. Best ELBO loss value is: -30949.27734375.

C_PATH mean = tensor([[4.4746e+01, 1.6557e-01, 5.4507e-01, 4.2010e-02],
        [4.4768e+01, 1.6156e-01, 5.3994e-01, 4.0210e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2466e+01, 8.7376e-03, 2.2712e-01, 5.0997e-02],
         [5.1828e+01, 2.5058e-09, 2.3270e-01, 1.9218e-02],
         ...,
         [4.1172e+01, 1.6428e-01, 7.9881e-01, 1.7764e-02],
         [4.1919e+01, 1.5908e-01, 5.7780e-01, 1.2204e-02],
         [4.0811e+01, 1.5759e-01, 1.7307e-01, 1.2415e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3248e+01, 3.1617e-02, 6.3728e-02, 8.1884e-02],
         [5.1762e+01, 6.0977e-06, 1.4259e-01, 1.0122e-01],
         ...,
         [4.1807e+01, 5.9308e-02, 3.5563e-01, 4.2093e-02],
         [4.1124e+01, 7.5721e-02, 3.6305e-01, 3.2881e-02],
         [4.1889e+01, 9.2619e-02, 2.03


Train Diffusion:  53%|█████▎    | 1061/2000 [45:55<36:24,  2.33s/it][A
Train Diffusion:  53%|█████▎    | 1062/2000 [45:57<36:50,  2.36s/it][A
Train Diffusion:  53%|█████▎    | 1063/2000 [46:00<38:15,  2.45s/it][A
Train Diffusion:  53%|█████▎    | 1064/2000 [46:02<37:46,  2.42s/it][A
Train Diffusion:  53%|█████▎    | 1065/2000 [46:05<37:22,  2.40s/it][A
Train Diffusion:  53%|█████▎    | 1066/2000 [46:07<37:04,  2.38s/it][A
Train Diffusion:  53%|█████▎    | 1067/2000 [46:09<37:49,  2.43s/it][A
Train Diffusion:  53%|█████▎    | 1068/2000 [46:12<37:16,  2.40s/it][A
Train Diffusion:  53%|█████▎    | 1069/2000 [46:14<36:51,  2.38s/it][A
Train Diffusion:  54%|█████▎    | 1070/2000 [46:16<36:35,  2.36s/it][A

Moving average ELBO loss at 1070 iterations is: -29312.4974609375. Best ELBO loss value is: -30949.27734375.

C_PATH mean = tensor([[4.4786e+01, 1.7595e-01, 5.4120e-01, 4.0291e-02],
        [4.4782e+01, 1.7651e-01, 5.3647e-01, 3.7980e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2762e+01, 1.6400e-02, 6.6218e-02, 1.2480e-01],
         [5.2459e+01, 2.1075e-09, 1.3996e-01, 1.1949e-01],
         ...,
         [4.1350e+01, 2.0621e-01, 6.3659e-01, 3.1671e-02],
         [4.0618e+01, 1.4441e-01, 5.1623e-01, 3.1076e-02],
         [3.9777e+01, 1.0650e-01, 3.6887e-01, 1.6807e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.1959e+01, 1.6591e-02, 2.7338e-01, 4.2396e-02],
         [5.0990e+01, 6.9144e-06, 2.3454e-01, 1.8097e-02],
         ...,
         [4.1693e+01, 5.6457e-02, 4.5996e-01, 1.9340e-02],
         [4.2381e+01, 1.4579e-01, 3.7466e-01, 1.6058e-02],
         [4.2691e+01, 1.9455e-01, 1.2


Train Diffusion:  54%|█████▎    | 1071/2000 [46:19<36:24,  2.35s/it][A
Train Diffusion:  54%|█████▎    | 1072/2000 [46:21<36:12,  2.34s/it][A
Train Diffusion:  54%|█████▎    | 1073/2000 [46:23<36:06,  2.34s/it][A
Train Diffusion:  54%|█████▎    | 1074/2000 [46:26<36:08,  2.34s/it][A
Train Diffusion:  54%|█████▍    | 1075/2000 [46:28<36:06,  2.34s/it][A
Train Diffusion:  54%|█████▍    | 1076/2000 [46:31<36:10,  2.35s/it][A
Train Diffusion:  54%|█████▍    | 1077/2000 [46:33<36:09,  2.35s/it][A
Train Diffusion:  54%|█████▍    | 1078/2000 [46:35<36:15,  2.36s/it][A
Train Diffusion:  54%|█████▍    | 1079/2000 [46:38<36:00,  2.35s/it][A
Train Diffusion:  54%|█████▍    | 1080/2000 [46:40<35:56,  2.34s/it][A

Moving average ELBO loss at 1080 iterations is: -29707.8689453125. Best ELBO loss value is: -30949.27734375.

C_PATH mean = tensor([[4.4855e+01, 1.6202e-01, 5.2974e-01, 4.0612e-02],
        [4.4861e+01, 1.6017e-01, 5.3156e-01, 4.0254e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3546e+01, 1.2275e-02, 1.3872e-01, 5.4107e-02],
         [5.1170e+01, 4.5335e-06, 1.0385e-01, 4.1484e-02],
         ...,
         [4.1838e+01, 5.6785e-02, 6.5749e-01, 2.8181e-02],
         [4.2787e+01, 1.3496e-01, 5.5234e-01, 3.2040e-02],
         [4.1558e+01, 1.8357e-01, 2.5309e-01, 1.5368e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.4232e+01, 4.3199e-02, 1.3909e-01, 1.0200e-01],
         [5.2817e+01, 5.2676e-09, 2.7784e-01, 7.2288e-02],
         ...,
         [4.1414e+01, 1.6915e-01, 4.9232e-01, 3.2867e-02],
         [4.0547e+01, 1.2021e-01, 3.8436e-01, 1.8918e-02],
         [4.2365e+01, 9.2213e-02, 1.8


Train Diffusion:  54%|█████▍    | 1081/2000 [46:42<35:51,  2.34s/it][A
Train Diffusion:  54%|█████▍    | 1082/2000 [46:45<35:45,  2.34s/it][A
Train Diffusion:  54%|█████▍    | 1083/2000 [46:47<35:40,  2.33s/it][A
Train Diffusion:  54%|█████▍    | 1084/2000 [46:49<35:34,  2.33s/it][A
Train Diffusion:  54%|█████▍    | 1085/2000 [46:52<35:33,  2.33s/it][A
Train Diffusion:  54%|█████▍    | 1086/2000 [46:54<35:29,  2.33s/it][A
Train Diffusion:  54%|█████▍    | 1087/2000 [46:56<35:30,  2.33s/it][A
Train Diffusion:  54%|█████▍    | 1088/2000 [46:59<35:25,  2.33s/it][A
Train Diffusion:  54%|█████▍    | 1089/2000 [47:01<35:24,  2.33s/it][A
Train Diffusion:  55%|█████▍    | 1090/2000 [47:03<35:21,  2.33s/it][A

Moving average ELBO loss at 1090 iterations is: -28990.471875. Best ELBO loss value is: -30949.27734375.

C_PATH mean = tensor([[4.4785e+01, 1.7844e-01, 5.5339e-01, 3.7160e-02],
        [4.4854e+01, 1.7458e-01, 5.4923e-01, 3.6352e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3436e+01, 4.2355e-03, 2.3191e-01, 6.7361e-02],
         [5.2937e+01, 3.5452e-06, 2.3207e-01, 9.0240e-02],
         ...,
         [4.1869e+01, 1.5258e-01, 5.1115e-01, 2.4133e-02],
         [4.1296e+01, 1.4763e-01, 4.5002e-01, 1.6352e-02],
         [4.0362e+01, 1.5661e-01, 2.2462e-01, 1.2950e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2743e+01, 6.2249e-02, 7.2532e-02, 6.2868e-02],
         [5.1579e+01, 5.9940e-09, 1.5311e-01, 1.9559e-02],
         ...,
         [4.1291e+01, 7.3543e-02, 6.5315e-01, 2.8959e-02],
         [4.2028e+01, 9.1084e-02, 5.9951e-01, 3.1722e-02],
         [4.2746e+01, 1.0607e-01, 2.4742e


Train Diffusion:  55%|█████▍    | 1091/2000 [47:06<35:26,  2.34s/it][A
Train Diffusion:  55%|█████▍    | 1092/2000 [47:08<35:25,  2.34s/it][A
Train Diffusion:  55%|█████▍    | 1093/2000 [47:10<35:20,  2.34s/it][A
Train Diffusion:  55%|█████▍    | 1094/2000 [47:13<35:12,  2.33s/it][A
Train Diffusion:  55%|█████▍    | 1095/2000 [47:15<35:19,  2.34s/it][A
Train Diffusion:  55%|█████▍    | 1096/2000 [47:17<35:26,  2.35s/it][A
Train Diffusion:  55%|█████▍    | 1097/2000 [47:20<36:43,  2.44s/it][A
Train Diffusion:  55%|█████▍    | 1098/2000 [47:22<36:07,  2.40s/it][A
Train Diffusion:  55%|█████▍    | 1099/2000 [47:25<35:47,  2.38s/it][A
Train Diffusion:  55%|█████▌    | 1100/2000 [47:27<35:26,  2.36s/it][A

Moving average ELBO loss at 1100 iterations is: -29891.7837890625. Best ELBO loss value is: -30949.27734375.

C_PATH mean = tensor([[4.4886e+01, 1.6538e-01, 5.5309e-01, 4.1638e-02],
        [4.4799e+01, 1.6496e-01, 5.3651e-01, 4.0431e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3368e+01, 2.8020e-02, 7.3790e-02, 7.6479e-02],
         [5.2122e+01, 6.8014e-09, 1.3425e-01, 8.6909e-02],
         ...,
         [4.1740e+01, 5.7409e-02, 4.8793e-01, 1.4641e-02],
         [4.1008e+01, 1.0368e-01, 3.9464e-01, 2.3719e-02],
         [4.1763e+01, 9.7505e-02, 1.9741e-01, 1.5016e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2478e+01, 8.7751e-03, 2.5384e-01, 8.4330e-02],
         [5.0773e+01, 7.2648e-06, 2.5036e-01, 2.6784e-02],
         ...,
         [4.1465e+01, 1.7292e-01, 6.5633e-01, 4.0605e-02],
         [4.2313e+01, 1.2586e-01, 5.5739e-01, 2.3711e-02],
         [4.1103e+01, 1.4734e-01, 2.6


Train Diffusion:  55%|█████▌    | 1101/2000 [47:29<35:14,  2.35s/it][A
Train Diffusion:  55%|█████▌    | 1102/2000 [47:32<35:04,  2.34s/it][A
Train Diffusion:  55%|█████▌    | 1103/2000 [47:34<34:54,  2.34s/it][A
Train Diffusion:  55%|█████▌    | 1104/2000 [47:36<34:48,  2.33s/it][A
Train Diffusion:  55%|█████▌    | 1105/2000 [47:39<34:54,  2.34s/it][A
Train Diffusion:  55%|█████▌    | 1106/2000 [47:41<34:56,  2.34s/it][A
Train Diffusion:  55%|█████▌    | 1107/2000 [47:43<34:46,  2.34s/it][A
Train Diffusion:  55%|█████▌    | 1108/2000 [47:46<34:41,  2.33s/it][A
Train Diffusion:  55%|█████▌    | 1109/2000 [47:48<34:34,  2.33s/it][A
Train Diffusion:  56%|█████▌    | 1110/2000 [47:50<34:31,  2.33s/it][A

Moving average ELBO loss at 1110 iterations is: -29860.6326171875. Best ELBO loss value is: -30949.27734375.

C_PATH mean = tensor([[4.4867e+01, 1.8395e-01, 5.3304e-01, 3.6375e-02],
        [4.4872e+01, 1.8125e-01, 5.5392e-01, 3.8630e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2309e+01, 7.3246e-03, 8.7354e-02, 5.4896e-02],
         [5.1225e+01, 3.7567e-09, 1.3331e-01, 3.3092e-02],
         ...,
         [4.1897e+01, 7.9620e-02, 5.9054e-01, 3.2277e-02],
         [4.1190e+01, 1.5671e-01, 5.5374e-01, 2.9045e-02],
         [4.0547e+01, 1.4066e-01, 3.0644e-01, 1.6228e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3131e+01, 1.1451e-01, 2.6830e-01, 1.0932e-01],
         [5.2936e+01, 1.7395e-05, 2.8037e-01, 7.2796e-02],
         ...,
         [4.1342e+01, 1.4623e-01, 5.2439e-01, 1.5576e-02],
         [4.2038e+01, 1.1171e-01, 3.9641e-01, 1.6256e-02],
         [4.3017e+01, 1.4778e-01, 2.1


Train Diffusion:  56%|█████▌    | 1111/2000 [47:53<34:27,  2.33s/it][A
Train Diffusion:  56%|█████▌    | 1112/2000 [47:55<34:24,  2.33s/it][A
Train Diffusion:  56%|█████▌    | 1113/2000 [47:57<34:20,  2.32s/it][A
Train Diffusion:  56%|█████▌    | 1114/2000 [47:59<34:18,  2.32s/it][A
Train Diffusion:  56%|█████▌    | 1115/2000 [48:02<34:15,  2.32s/it][A
Train Diffusion:  56%|█████▌    | 1116/2000 [48:04<34:15,  2.32s/it][A
Train Diffusion:  56%|█████▌    | 1117/2000 [48:06<34:10,  2.32s/it][A
Train Diffusion:  56%|█████▌    | 1118/2000 [48:09<34:08,  2.32s/it][A
Train Diffusion:  56%|█████▌    | 1119/2000 [48:11<34:15,  2.33s/it][A
Train Diffusion:  56%|█████▌    | 1120/2000 [48:13<34:14,  2.33s/it][A

Moving average ELBO loss at 1120 iterations is: -30624.096875. Best ELBO loss value is: -31416.962890625.

C_PATH mean = tensor([[4.4847e+01, 1.7612e-01, 5.3296e-01, 4.0072e-02],
        [4.4909e+01, 1.6863e-01, 5.6206e-01, 3.9979e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3644e+01, 5.4279e-03, 2.6485e-01, 8.3091e-02],
         [5.2142e+01, 3.8452e-09, 2.6793e-01, 2.6480e-02],
         ...,
         [4.1193e+01, 1.7690e-01, 6.4573e-01, 1.7469e-02],
         [4.0586e+01, 1.2997e-01, 5.7323e-01, 1.2553e-02],
         [4.0116e+01, 1.5206e-01, 2.6778e-01, 9.4599e-03]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2831e+01, 1.0341e-01, 7.7088e-02, 7.8552e-02],
         [5.2325e+01, 2.3053e-05, 1.3746e-01, 8.6379e-02],
         ...,
         [4.2129e+01, 6.0600e-02, 5.0496e-01, 3.6492e-02],
         [4.2808e+01, 1.0718e-01, 3.8389e-01, 3.1173e-02],
         [4.3308e+01, 1.0160e-01, 1.9633


Train Diffusion:  56%|█████▌    | 1121/2000 [48:16<34:13,  2.34s/it][A
Train Diffusion:  56%|█████▌    | 1122/2000 [48:18<34:06,  2.33s/it][A
Train Diffusion:  56%|█████▌    | 1123/2000 [48:20<34:00,  2.33s/it][A
Train Diffusion:  56%|█████▌    | 1124/2000 [48:23<33:54,  2.32s/it][A
Train Diffusion:  56%|█████▋    | 1125/2000 [48:25<33:55,  2.33s/it][A
Train Diffusion:  56%|█████▋    | 1126/2000 [48:27<33:55,  2.33s/it][A
Train Diffusion:  56%|█████▋    | 1127/2000 [48:30<33:51,  2.33s/it][A
Train Diffusion:  56%|█████▋    | 1128/2000 [48:32<33:48,  2.33s/it][A
Train Diffusion:  56%|█████▋    | 1129/2000 [48:34<33:41,  2.32s/it][A
Train Diffusion:  56%|█████▋    | 1130/2000 [48:37<33:36,  2.32s/it][A

Moving average ELBO loss at 1130 iterations is: -31061.5814453125. Best ELBO loss value is: -31729.662109375.

C_PATH mean = tensor([[4.4893e+01, 1.8006e-01, 5.4615e-01, 4.0568e-02],
        [4.4889e+01, 1.7305e-01, 5.6152e-01, 3.9215e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2081e+01, 1.3111e-02, 2.3882e-01, 3.5477e-02],
         [5.1823e+01, 1.2367e-05, 2.5811e-01, 3.3802e-02],
         ...,
         [4.1700e+01, 1.5698e-01, 3.9537e-01, 2.0074e-02],
         [4.2395e+01, 1.7257e-01, 3.9374e-01, 1.2203e-02],
         [4.1411e+01, 1.6795e-01, 1.5327e-01, 1.2819e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2840e+01, 4.6661e-02, 6.9636e-02, 1.3684e-01],
         [5.1909e+01, 8.6865e-09, 1.5558e-01, 7.1281e-02],
         ...,
         [4.1659e+01, 7.8807e-02, 8.0719e-01, 3.3050e-02],
         [4.0987e+01, 7.3679e-02, 6.0685e-01, 2.9935e-02],
         [4.1988e+01, 8.8499e-02, 3.


Train Diffusion:  57%|█████▋    | 1131/2000 [48:39<33:35,  2.32s/it][A
Train Diffusion:  57%|█████▋    | 1132/2000 [48:41<33:35,  2.32s/it][A
Train Diffusion:  57%|█████▋    | 1133/2000 [48:44<33:48,  2.34s/it][A
Train Diffusion:  57%|█████▋    | 1134/2000 [48:46<33:44,  2.34s/it][A
Train Diffusion:  57%|█████▋    | 1135/2000 [48:48<33:36,  2.33s/it][A
Train Diffusion:  57%|█████▋    | 1136/2000 [48:51<33:34,  2.33s/it][A
Train Diffusion:  57%|█████▋    | 1137/2000 [48:53<33:26,  2.33s/it][A
Train Diffusion:  57%|█████▋    | 1138/2000 [48:55<33:22,  2.32s/it][A
Train Diffusion:  57%|█████▋    | 1139/2000 [48:58<33:18,  2.32s/it][A
Train Diffusion:  57%|█████▋    | 1140/2000 [49:00<33:17,  2.32s/it][A

Moving average ELBO loss at 1140 iterations is: -30463.3294921875. Best ELBO loss value is: -31729.662109375.

C_PATH mean = tensor([[4.4894e+01, 1.7624e-01, 5.5711e-01, 3.9965e-02],
        [4.4911e+01, 1.7267e-01, 5.6176e-01, 4.1068e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2289e+01, 5.9861e-02, 7.0829e-02, 4.7584e-02],
         [5.2005e+01, 1.6177e-05, 5.1872e-02, 1.7201e-02],
         ...,
         [4.1605e+01, 1.3531e-01, 8.0830e-01, 3.4269e-02],
         [4.2373e+01, 1.3528e-01, 6.1917e-01, 2.1183e-02],
         [4.1344e+01, 1.0347e-01, 2.4440e-01, 1.1853e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3074e+01, 1.2816e-02, 2.5760e-01, 1.3889e-01],
         [5.1998e+01, 1.1201e-08, 4.6793e-01, 1.2886e-01],
         ...,
         [4.1771e+01, 8.8712e-02, 3.6238e-01, 2.0189e-02],
         [4.1032e+01, 7.6480e-02, 3.7918e-01, 3.0815e-02],
         [4.2075e+01, 1.2587e-01, 2.


Train Diffusion:  57%|█████▋    | 1141/2000 [49:02<33:15,  2.32s/it][A
Train Diffusion:  57%|█████▋    | 1142/2000 [49:05<33:09,  2.32s/it][A
Train Diffusion:  57%|█████▋    | 1143/2000 [49:07<33:05,  2.32s/it][A
Train Diffusion:  57%|█████▋    | 1144/2000 [49:09<33:04,  2.32s/it][A
Train Diffusion:  57%|█████▋    | 1145/2000 [49:12<32:59,  2.32s/it][A
Train Diffusion:  57%|█████▋    | 1146/2000 [49:14<32:56,  2.31s/it][A
Train Diffusion:  57%|█████▋    | 1147/2000 [49:16<33:07,  2.33s/it][A
Train Diffusion:  57%|█████▋    | 1148/2000 [49:19<33:04,  2.33s/it][A
Train Diffusion:  57%|█████▋    | 1149/2000 [49:21<32:58,  2.32s/it][A
Train Diffusion:  57%|█████▊    | 1150/2000 [49:23<32:54,  2.32s/it][A

Moving average ELBO loss at 1150 iterations is: -30539.356640625. Best ELBO loss value is: -31729.662109375.

C_PATH mean = tensor([[4.4909e+01, 1.7572e-01, 5.6566e-01, 4.0987e-02],
        [4.4918e+01, 1.7565e-01, 5.5583e-01, 4.0254e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2468e+01, 3.6898e-02, 1.0415e-01, 6.9392e-02],
         [5.2023e+01, 9.3637e-06, 2.1106e-01, 2.2918e-02],
         ...,
         [4.1288e+01, 6.1416e-02, 6.0770e-01, 4.2652e-02],
         [4.1979e+01, 1.0061e-01, 5.0977e-01, 2.4655e-02],
         [4.1047e+01, 1.2643e-01, 2.1087e-01, 1.0042e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3123e+01, 1.1188e-02, 1.3051e-01, 8.0102e-02],
         [5.1999e+01, 6.5543e-09, 1.3453e-01, 9.1219e-02],
         ...,
         [4.2098e+01, 1.4510e-01, 5.4223e-01, 1.9937e-02],
         [4.1585e+01, 8.8967e-02, 4.4845e-01, 2.2756e-02],
         [4.2187e+01, 7.2947e-02, 2.1


Train Diffusion:  58%|█████▊    | 1151/2000 [49:26<32:51,  2.32s/it][A
Train Diffusion:  58%|█████▊    | 1152/2000 [49:28<32:51,  2.32s/it][A
Train Diffusion:  58%|█████▊    | 1153/2000 [49:30<32:49,  2.33s/it][A
Train Diffusion:  58%|█████▊    | 1154/2000 [49:32<32:44,  2.32s/it][A
Train Diffusion:  58%|█████▊    | 1155/2000 [49:35<32:40,  2.32s/it][A
Train Diffusion:  58%|█████▊    | 1156/2000 [49:37<32:33,  2.31s/it][A
Train Diffusion:  58%|█████▊    | 1157/2000 [49:39<32:30,  2.31s/it][A
Train Diffusion:  58%|█████▊    | 1158/2000 [49:42<32:30,  2.32s/it][A
Train Diffusion:  58%|█████▊    | 1159/2000 [49:44<32:26,  2.32s/it][A
Train Diffusion:  58%|█████▊    | 1160/2000 [49:47<33:07,  2.37s/it][A

Moving average ELBO loss at 1160 iterations is: -31017.6673828125. Best ELBO loss value is: -31731.525390625.

C_PATH mean = tensor([[4.4931e+01, 1.7026e-01, 5.5972e-01, 3.9888e-02],
        [4.4998e+01, 1.7229e-01, 5.5597e-01, 4.0617e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2998e+01, 3.5631e-02, 2.7049e-01, 7.2236e-02],
         [5.2497e+01, 4.2722e-05, 2.8203e-01, 5.5983e-02],
         ...,
         [4.1628e+01, 1.2990e-01, 5.1096e-01, 3.9609e-02],
         [4.2422e+01, 8.7986e-02, 4.4192e-01, 2.3348e-02],
         [4.3064e+01, 1.3556e-01, 2.2269e-01, 1.4181e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2711e+01, 3.7965e-02, 7.7368e-02, 5.8738e-02],
         [5.1200e+01, 1.1012e-08, 1.3071e-01, 3.2050e-02],
         ...,
         [4.1880e+01, 1.2786e-01, 6.6342e-01, 1.2695e-02],
         [4.1203e+01, 1.7020e-01, 6.1179e-01, 2.2079e-02],
         [4.0586e+01, 1.3517e-01, 2.


Train Diffusion:  58%|█████▊    | 1161/2000 [49:49<34:16,  2.45s/it][A
Train Diffusion:  58%|█████▊    | 1162/2000 [49:52<34:00,  2.43s/it][A
Train Diffusion:  58%|█████▊    | 1163/2000 [49:54<33:50,  2.43s/it][A
Train Diffusion:  58%|█████▊    | 1164/2000 [49:56<33:37,  2.41s/it][A
Train Diffusion:  58%|█████▊    | 1165/2000 [49:59<33:22,  2.40s/it][A
Train Diffusion:  58%|█████▊    | 1166/2000 [50:01<33:43,  2.43s/it][A
Train Diffusion:  58%|█████▊    | 1167/2000 [50:04<33:34,  2.42s/it][A
Train Diffusion:  58%|█████▊    | 1168/2000 [50:06<33:11,  2.39s/it][A
Train Diffusion:  58%|█████▊    | 1169/2000 [50:08<32:52,  2.37s/it][A
Train Diffusion:  58%|█████▊    | 1170/2000 [50:11<32:39,  2.36s/it][A

Moving average ELBO loss at 1170 iterations is: -31832.9462890625. Best ELBO loss value is: -32420.66015625.

C_PATH mean = tensor([[4.5014e+01, 1.7962e-01, 5.6191e-01, 3.9415e-02],
        [4.4954e+01, 1.8315e-01, 5.6466e-01, 3.8443e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2960e+01, 3.2677e-02, 7.6644e-02, 3.4938e-02],
         [5.2456e+01, 3.9045e-05, 1.3684e-01, 3.1708e-02],
         ...,
         [4.1649e+01, 1.8697e-01, 8.4042e-01, 4.8863e-02],
         [4.2461e+01, 1.7294e-01, 5.9991e-01, 3.8013e-02],
         [4.3178e+01, 1.2161e-01, 1.9606e-01, 1.8202e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3827e+01, 3.5189e-02, 2.7497e-01, 1.0787e-01],
         [5.2217e+01, 7.8656e-09, 2.9637e-01, 7.1740e-02],
         ...,
         [4.1871e+01, 7.3488e-02, 3.6175e-01, 1.1114e-02],
         [4.1160e+01, 7.6877e-02, 3.7491e-01, 1.1118e-02],
         [4.0650e+01, 1.4028e-01, 2.1


Train Diffusion:  59%|█████▊    | 1171/2000 [50:13<32:28,  2.35s/it][A
Train Diffusion:  59%|█████▊    | 1172/2000 [50:15<32:19,  2.34s/it][A
Train Diffusion:  59%|█████▊    | 1173/2000 [50:18<32:16,  2.34s/it][A
Train Diffusion:  59%|█████▊    | 1174/2000 [50:20<32:22,  2.35s/it][A
Train Diffusion:  59%|█████▉    | 1175/2000 [50:22<32:26,  2.36s/it][A
Train Diffusion:  59%|█████▉    | 1176/2000 [50:25<32:22,  2.36s/it][A
Train Diffusion:  59%|█████▉    | 1177/2000 [50:27<32:31,  2.37s/it][A
Train Diffusion:  59%|█████▉    | 1178/2000 [50:29<32:26,  2.37s/it][A
Train Diffusion:  59%|█████▉    | 1179/2000 [50:32<32:23,  2.37s/it][A
Train Diffusion:  59%|█████▉    | 1180/2000 [50:34<32:12,  2.36s/it][A

Moving average ELBO loss at 1180 iterations is: -32494.8828125. Best ELBO loss value is: -33064.5.

C_PATH mean = tensor([[4.5031e+01, 1.7781e-01, 5.5003e-01, 4.2152e-02],
        [4.4991e+01, 1.7298e-01, 5.6919e-01, 3.9206e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2446e+01, 1.0517e-02, 8.6423e-02, 1.0240e-01],
         [5.2123e+01, 1.3440e-08, 1.4267e-01, 6.7578e-02],
         ...,
         [4.1958e+01, 8.2995e-02, 5.0152e-01, 2.6731e-02],
         [4.1370e+01, 1.5486e-01, 4.4867e-01, 1.5337e-02],
         [4.2028e+01, 1.4045e-01, 2.3249e-01, 9.4587e-03]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3418e+01, 4.9446e-02, 2.7735e-01, 4.5708e-02],
         [5.2022e+01, 2.2443e-05, 2.8103e-01, 3.3266e-02],
         ...,
         [4.1649e+01, 1.5565e-01, 6.6435e-01, 2.4130e-02],
         [4.2324e+01, 1.1661e-01, 6.2268e-01, 2.9054e-02],
         [4.1270e+01, 1.5181e-01, 2.6572e-01, 1


Train Diffusion:  59%|█████▉    | 1181/2000 [50:37<32:23,  2.37s/it][A
Train Diffusion:  59%|█████▉    | 1182/2000 [50:39<32:52,  2.41s/it][A
Train Diffusion:  59%|█████▉    | 1183/2000 [50:41<32:36,  2.39s/it][A
Train Diffusion:  59%|█████▉    | 1184/2000 [50:44<32:20,  2.38s/it][A
Train Diffusion:  59%|█████▉    | 1185/2000 [50:46<32:06,  2.36s/it][A
Train Diffusion:  59%|█████▉    | 1186/2000 [50:48<31:58,  2.36s/it][A
Train Diffusion:  59%|█████▉    | 1187/2000 [50:51<31:51,  2.35s/it][A
Train Diffusion:  59%|█████▉    | 1188/2000 [50:53<32:01,  2.37s/it][A
Train Diffusion:  59%|█████▉    | 1189/2000 [50:56<31:59,  2.37s/it][A
Train Diffusion:  60%|█████▉    | 1190/2000 [50:58<31:49,  2.36s/it][A

Moving average ELBO loss at 1190 iterations is: -32392.9802734375. Best ELBO loss value is: -33127.74609375.

C_PATH mean = tensor([[4.5006e+01, 1.7944e-01, 5.6385e-01, 4.0650e-02],
        [4.5037e+01, 1.7816e-01, 5.5274e-01, 4.0508e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3217e+01, 2.5903e-02, 8.5314e-02, 9.2775e-02],
         [5.2850e+01, 6.6401e-09, 1.4432e-01, 9.0772e-02],
         ...,
         [4.1693e+01, 5.4538e-02, 4.2037e-01, 3.9089e-02],
         [4.1118e+01, 1.4348e-01, 4.0040e-01, 2.3526e-02],
         [4.0607e+01, 1.3388e-01, 2.0482e-01, 1.5237e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2392e+01, 2.8521e-02, 2.7980e-01, 4.8252e-02],
         [5.1209e+01, 4.1217e-05, 2.8444e-01, 2.1147e-02],
         ...,
         [4.1977e+01, 2.1978e-01, 7.7467e-01, 1.5557e-02],
         [4.2607e+01, 1.5334e-01, 6.3020e-01, 2.1587e-02],
         [4.3192e+01, 1.6496e-01, 2.4


Train Diffusion:  60%|█████▉    | 1191/2000 [51:00<31:47,  2.36s/it][A
Train Diffusion:  60%|█████▉    | 1192/2000 [51:03<31:39,  2.35s/it][A
Train Diffusion:  60%|█████▉    | 1193/2000 [51:05<31:44,  2.36s/it][A
Train Diffusion:  60%|█████▉    | 1194/2000 [51:07<31:39,  2.36s/it][A
Train Diffusion:  60%|█████▉    | 1195/2000 [51:10<31:33,  2.35s/it][A
Train Diffusion:  60%|█████▉    | 1196/2000 [51:12<31:27,  2.35s/it][A
Train Diffusion:  60%|█████▉    | 1197/2000 [51:14<31:22,  2.34s/it][A
Train Diffusion:  60%|█████▉    | 1198/2000 [51:17<31:26,  2.35s/it][A
Train Diffusion:  60%|█████▉    | 1199/2000 [51:19<31:21,  2.35s/it][A
Train Diffusion:  60%|██████    | 1200/2000 [51:21<31:18,  2.35s/it][A

Moving average ELBO loss at 1200 iterations is: -31641.05078125. Best ELBO loss value is: -33127.74609375.

C_PATH mean = tensor([[4.5085e+01, 1.7598e-01, 5.5178e-01, 4.0821e-02],
        [4.5021e+01, 1.8000e-01, 5.4399e-01, 4.0370e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2302e+01, 1.1171e-01, 2.8017e-01, 4.1601e-02],
         [5.2226e+01, 3.2211e-08, 2.8156e-01, 1.6026e-02],
         ...,
         [4.2042e+01, 1.5305e-01, 7.5707e-01, 1.3106e-02],
         [4.1298e+01, 1.1875e-01, 5.7642e-01, 1.0971e-02],
         [4.2361e+01, 1.5853e-01, 2.2614e-01, 1.4028e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3032e+01, 2.5844e-02, 8.4881e-02, 1.1819e-01],
         [5.1902e+01, 3.8296e-05, 1.4654e-01, 1.1635e-01],
         ...,
         [4.1655e+01, 8.7321e-02, 4.5275e-01, 3.4446e-02],
         [4.2498e+01, 1.5450e-01, 3.6116e-01, 3.2607e-02],
         [4.1738e+01, 1.3263e-01, 2.511


Train Diffusion:  60%|██████    | 1201/2000 [51:24<31:15,  2.35s/it][A
Train Diffusion:  60%|██████    | 1202/2000 [51:26<31:25,  2.36s/it][A
Train Diffusion:  60%|██████    | 1203/2000 [51:28<31:24,  2.36s/it][A
Train Diffusion:  60%|██████    | 1204/2000 [51:31<31:19,  2.36s/it][A
Train Diffusion:  60%|██████    | 1205/2000 [51:33<31:13,  2.36s/it][A
Train Diffusion:  60%|██████    | 1206/2000 [51:36<31:05,  2.35s/it][A
Train Diffusion:  60%|██████    | 1207/2000 [51:38<31:04,  2.35s/it][A
Train Diffusion:  60%|██████    | 1208/2000 [51:40<30:59,  2.35s/it][A
Train Diffusion:  60%|██████    | 1209/2000 [51:43<30:53,  2.34s/it][A
Train Diffusion:  60%|██████    | 1210/2000 [51:45<30:47,  2.34s/it][A

Moving average ELBO loss at 1210 iterations is: -31658.8564453125. Best ELBO loss value is: -33163.9609375.

C_PATH mean = tensor([[4.5047e+01, 1.8139e-01, 5.4102e-01, 4.0186e-02],
        [4.5087e+01, 1.7776e-01, 5.6265e-01, 3.9446e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2255e+01, 1.0876e-01, 2.7420e-01, 6.6838e-02],
         [5.2490e+01, 2.4300e-08, 2.8090e-01, 8.1169e-02],
         ...,
         [4.1753e+01, 2.2406e-01, 5.0662e-01, 3.3617e-02],
         [4.1246e+01, 1.4791e-01, 4.9514e-01, 1.8523e-02],
         [4.2323e+01, 1.6330e-01, 2.1454e-01, 1.0381e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2873e+01, 3.1381e-02, 8.1398e-02, 6.5727e-02],
         [5.2163e+01, 3.0235e-05, 1.4366e-01, 2.4175e-02],
         ...,
         [4.2006e+01, 4.6839e-02, 7.0829e-01, 2.0998e-02],
         [4.2584e+01, 1.4048e-01, 5.2632e-01, 2.6873e-02],
         [4.1777e+01, 1.2591e-01, 2.07


Train Diffusion:  61%|██████    | 1211/2000 [51:47<30:41,  2.33s/it][A
Train Diffusion:  61%|██████    | 1212/2000 [51:50<30:36,  2.33s/it][A
Train Diffusion:  61%|██████    | 1213/2000 [51:52<30:29,  2.32s/it][A
Train Diffusion:  61%|██████    | 1214/2000 [51:54<30:27,  2.32s/it][A
Train Diffusion:  61%|██████    | 1215/2000 [51:57<30:28,  2.33s/it][A
Train Diffusion:  61%|██████    | 1216/2000 [51:59<30:36,  2.34s/it][A
Train Diffusion:  61%|██████    | 1217/2000 [52:01<30:30,  2.34s/it][A
Train Diffusion:  61%|██████    | 1218/2000 [52:04<30:24,  2.33s/it][A
Train Diffusion:  61%|██████    | 1219/2000 [52:06<30:20,  2.33s/it][A
Train Diffusion:  61%|██████    | 1220/2000 [52:08<30:17,  2.33s/it][A

Moving average ELBO loss at 1220 iterations is: -33374.015234375. Best ELBO loss value is: -33950.6640625.

C_PATH mean = tensor([[4.5054e+01, 1.7673e-01, 5.6553e-01, 3.9921e-02],
        [4.5083e+01, 1.7753e-01, 5.5189e-01, 4.0016e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3281e+01, 1.2987e-02, 2.0160e-01, 4.6259e-02],
         [5.2113e+01, 3.6905e-05, 4.0372e-01, 3.1005e-02],
         ...,
         [4.2304e+01, 1.0371e-01, 8.5528e-01, 2.0872e-02],
         [4.1549e+01, 8.4068e-02, 6.3553e-01, 1.4846e-02],
         [4.1229e+01, 8.8735e-02, 2.6501e-01, 1.2590e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2764e+01, 1.6108e-01, 1.3440e-01, 1.3818e-01],
         [5.2263e+01, 3.9621e-08, 1.1838e-01, 8.1544e-02],
         ...,
         [4.1473e+01, 1.5086e-01, 3.8749e-01, 3.0085e-02],
         [4.2360e+01, 1.6905e-01, 4.0408e-01, 2.7543e-02],
         [4.3082e+01, 1.7126e-01, 2.986


Train Diffusion:  61%|██████    | 1221/2000 [52:11<30:15,  2.33s/it][A
Train Diffusion:  61%|██████    | 1222/2000 [52:13<30:08,  2.32s/it][A
Train Diffusion:  61%|██████    | 1223/2000 [52:15<30:07,  2.33s/it][A
Train Diffusion:  61%|██████    | 1224/2000 [52:17<30:06,  2.33s/it][A
Train Diffusion:  61%|██████▏   | 1225/2000 [52:20<29:59,  2.32s/it][A
Train Diffusion:  61%|██████▏   | 1226/2000 [52:22<29:56,  2.32s/it][A
Train Diffusion:  61%|██████▏   | 1227/2000 [52:24<29:56,  2.32s/it][A
Train Diffusion:  61%|██████▏   | 1228/2000 [52:27<30:05,  2.34s/it][A
Train Diffusion:  61%|██████▏   | 1229/2000 [52:29<30:09,  2.35s/it][A
Train Diffusion:  62%|██████▏   | 1230/2000 [52:32<30:31,  2.38s/it][A

Moving average ELBO loss at 1230 iterations is: -33219.737109375. Best ELBO loss value is: -33950.6640625.

C_PATH mean = tensor([[4.5109e+01, 1.7468e-01, 5.6484e-01, 3.9469e-02],
        [4.5095e+01, 1.6738e-01, 5.5372e-01, 3.8560e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3135e+01, 1.9193e-01, 2.5988e-01, 6.8761e-02],
         [5.2969e+01, 4.7942e-08, 2.7808e-01, 2.4784e-02],
         ...,
         [4.2412e+01, 1.5065e-01, 7.7560e-01, 2.1618e-02],
         [4.2861e+01, 1.1519e-01, 6.2521e-01, 1.5379e-02],
         [4.3201e+01, 1.5096e-01, 2.4489e-01, 9.3589e-03]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2563e+01, 1.3360e-02, 8.0430e-02, 7.0587e-02],
         [5.1362e+01, 4.2953e-05, 1.4647e-01, 7.6269e-02],
         ...,
         [4.1402e+01, 9.5373e-02, 4.5205e-01, 3.0922e-02],
         [4.1009e+01, 1.3667e-01, 4.3305e-01, 2.7730e-02],
         [4.0727e+01, 1.1251e-01, 2.188


Train Diffusion:  62%|██████▏   | 1231/2000 [52:34<30:15,  2.36s/it][A
Train Diffusion:  62%|██████▏   | 1232/2000 [52:36<30:04,  2.35s/it][A
Train Diffusion:  62%|██████▏   | 1233/2000 [52:39<29:54,  2.34s/it][A
Train Diffusion:  62%|██████▏   | 1234/2000 [52:41<29:50,  2.34s/it][A
Train Diffusion:  62%|██████▏   | 1235/2000 [52:43<29:42,  2.33s/it][A
Train Diffusion:  62%|██████▏   | 1236/2000 [52:46<29:38,  2.33s/it][A
Train Diffusion:  62%|██████▏   | 1237/2000 [52:48<29:41,  2.33s/it][A
Train Diffusion:  62%|██████▏   | 1238/2000 [52:50<29:35,  2.33s/it][A
Train Diffusion:  62%|██████▏   | 1239/2000 [52:53<29:34,  2.33s/it][A
Train Diffusion:  62%|██████▏   | 1240/2000 [52:55<29:29,  2.33s/it][A

Moving average ELBO loss at 1240 iterations is: -33081.0484375. Best ELBO loss value is: -33950.6640625.

C_PATH mean = tensor([[4.5077e+01, 1.7491e-01, 5.7424e-01, 3.8185e-02],
        [4.5112e+01, 1.7581e-01, 5.6630e-01, 3.8585e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3414e+01, 7.9753e-03, 2.4829e-01, 1.2068e-01],
         [5.2186e+01, 2.9642e-05, 2.4602e-01, 1.0548e-01],
         ...,
         [4.1347e+01, 1.3225e-01, 3.8555e-01, 2.9396e-02],
         [4.0998e+01, 1.6408e-01, 5.1226e-01, 3.1813e-02],
         [4.1612e+01, 1.8312e-01, 2.8919e-01, 1.3549e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2728e+01, 4.7729e-02, 5.7798e-02, 4.4602e-02],
         [5.2283e+01, 8.9214e-09, 1.4673e-01, 1.7783e-02],
         ...,
         [4.2531e+01, 1.1638e-01, 8.4627e-01, 2.3061e-02],
         [4.2848e+01, 8.9122e-02, 5.5828e-01, 1.5747e-02],
         [4.1696e+01, 7.8918e-02, 2.3174e


Train Diffusion:  62%|██████▏   | 1241/2000 [52:57<29:26,  2.33s/it][A
Train Diffusion:  62%|██████▏   | 1242/2000 [53:00<29:23,  2.33s/it][A
Train Diffusion:  62%|██████▏   | 1243/2000 [53:02<29:29,  2.34s/it][A
Train Diffusion:  62%|██████▏   | 1244/2000 [53:04<29:33,  2.35s/it][A
Train Diffusion:  62%|██████▏   | 1245/2000 [53:07<29:25,  2.34s/it][A
Train Diffusion:  62%|██████▏   | 1246/2000 [53:09<29:22,  2.34s/it][A
Train Diffusion:  62%|██████▏   | 1247/2000 [53:11<29:14,  2.33s/it][A
Train Diffusion:  62%|██████▏   | 1248/2000 [53:14<29:09,  2.33s/it][A
Train Diffusion:  62%|██████▏   | 1249/2000 [53:16<29:07,  2.33s/it][A
Train Diffusion:  62%|██████▎   | 1250/2000 [53:18<29:04,  2.33s/it][A

Moving average ELBO loss at 1250 iterations is: -31800.38828125. Best ELBO loss value is: -33950.6640625.

C_PATH mean = tensor([[4.5137e+01, 1.7581e-01, 5.7942e-01, 3.7864e-02],
        [4.5084e+01, 1.7465e-01, 5.6280e-01, 3.8416e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3310e+01, 1.0613e-01, 1.2595e-01, 9.2505e-02],
         [5.2173e+01, 4.3322e-05, 2.2513e-01, 8.6191e-02],
         ...,
         [4.1858e+01, 1.7518e-01, 3.8631e-01, 3.9200e-02],
         [4.2574e+01, 1.6326e-01, 5.1951e-01, 2.2843e-02],
         [4.3188e+01, 1.5219e-01, 2.9435e-01, 1.5333e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2533e+01, 7.6982e-03, 2.3044e-01, 6.1284e-02],
         [5.2335e+01, 6.3783e-09, 1.8821e-01, 2.2483e-02],
         ...,
         [4.2069e+01, 6.3393e-02, 7.9681e-01, 1.9664e-02],
         [4.1314e+01, 8.2132e-02, 5.4962e-01, 2.5775e-02],
         [4.1039e+01, 1.1319e-01, 2.1755


Train Diffusion:  63%|██████▎   | 1251/2000 [53:21<29:06,  2.33s/it][A
Train Diffusion:  63%|██████▎   | 1252/2000 [53:23<29:03,  2.33s/it][A
Train Diffusion:  63%|██████▎   | 1253/2000 [53:25<29:01,  2.33s/it][A
Train Diffusion:  63%|██████▎   | 1254/2000 [53:28<28:57,  2.33s/it][A
Train Diffusion:  63%|██████▎   | 1255/2000 [53:30<28:55,  2.33s/it][A
Train Diffusion:  63%|██████▎   | 1256/2000 [53:32<28:53,  2.33s/it][A
Train Diffusion:  63%|██████▎   | 1257/2000 [53:35<28:55,  2.34s/it][A
Train Diffusion:  63%|██████▎   | 1258/2000 [53:37<28:57,  2.34s/it][A
Train Diffusion:  63%|██████▎   | 1259/2000 [53:39<28:51,  2.34s/it][A
Train Diffusion:  63%|██████▎   | 1260/2000 [53:42<28:50,  2.34s/it][A

Moving average ELBO loss at 1260 iterations is: -32579.2603515625. Best ELBO loss value is: -33950.6640625.

C_PATH mean = tensor([[4.5183e+01, 1.7650e-01, 5.5513e-01, 3.9545e-02],
        [4.5081e+01, 1.7926e-01, 5.5625e-01, 4.0084e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3257e+01, 1.8175e-01, 1.3084e-01, 1.0656e-01],
         [5.3251e+01, 7.7739e-05, 2.2370e-01, 6.5486e-02],
         ...,
         [4.1877e+01, 1.9073e-01, 4.9442e-01, 2.5713e-02],
         [4.2650e+01, 1.7123e-01, 4.7666e-01, 1.8137e-02],
         [4.3406e+01, 1.6348e-01, 2.4232e-01, 1.0180e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2487e+01, 1.2077e-02, 2.4010e-01, 4.5003e-02],
         [5.1523e+01, 1.3274e-08, 2.0068e-01, 3.4700e-02],
         ...,
         [4.2039e+01, 6.7074e-02, 7.2390e-01, 2.5601e-02],
         [4.1316e+01, 9.3519e-02, 5.6883e-01, 2.6562e-02],
         [4.1168e+01, 1.1697e-01, 2.58


Train Diffusion:  63%|██████▎   | 1261/2000 [53:44<28:46,  2.34s/it][A
Train Diffusion:  63%|██████▎   | 1262/2000 [53:46<28:41,  2.33s/it][A
Train Diffusion:  63%|██████▎   | 1263/2000 [53:49<28:38,  2.33s/it][A
Train Diffusion:  63%|██████▎   | 1264/2000 [53:51<28:34,  2.33s/it][A
Train Diffusion:  63%|██████▎   | 1265/2000 [53:53<28:39,  2.34s/it][A
Train Diffusion:  63%|██████▎   | 1266/2000 [53:56<28:33,  2.33s/it][A
Train Diffusion:  63%|██████▎   | 1267/2000 [53:58<28:28,  2.33s/it][A
Train Diffusion:  63%|██████▎   | 1268/2000 [54:00<28:26,  2.33s/it][A
Train Diffusion:  63%|██████▎   | 1269/2000 [54:03<28:22,  2.33s/it][A
Train Diffusion:  64%|██████▎   | 1270/2000 [54:05<28:20,  2.33s/it][A

Moving average ELBO loss at 1270 iterations is: -33610.78359375. Best ELBO loss value is: -34262.734375.

C_PATH mean = tensor([[4.5131e+01, 1.7928e-01, 5.6148e-01, 3.9647e-02],
        [4.5168e+01, 1.8298e-01, 5.4490e-01, 4.0183e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2201e+01, 1.6286e-02, 7.6629e-02, 9.5259e-02],
         [5.1333e+01, 5.0663e-05, 1.6108e-01, 9.0642e-02],
         ...,
         [4.1672e+01, 9.8811e-02, 5.5455e-01, 2.2520e-02],
         [4.1162e+01, 9.4703e-02, 4.5546e-01, 2.6447e-02],
         [4.0974e+01, 1.0194e-01, 2.2014e-01, 1.3089e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2897e+01, 2.2883e-01, 2.6248e-01, 5.5603e-02],
         [5.3042e+01, 5.3092e-08, 2.8362e-01, 2.1821e-02],
         ...,
         [4.2246e+01, 1.5820e-01, 5.7652e-01, 3.1896e-02],
         [4.2920e+01, 1.5955e-01, 4.9572e-01, 1.9923e-02],
         [4.3394e+01, 1.6142e-01, 2.3840e


Train Diffusion:  64%|██████▎   | 1271/2000 [54:07<28:26,  2.34s/it][A
Train Diffusion:  64%|██████▎   | 1272/2000 [54:10<28:24,  2.34s/it][A
Train Diffusion:  64%|██████▎   | 1273/2000 [54:12<28:18,  2.34s/it][A
Train Diffusion:  64%|██████▎   | 1274/2000 [54:14<28:14,  2.33s/it][A
Train Diffusion:  64%|██████▍   | 1275/2000 [54:17<28:10,  2.33s/it][A
Train Diffusion:  64%|██████▍   | 1276/2000 [54:19<28:08,  2.33s/it][A
Train Diffusion:  64%|██████▍   | 1277/2000 [54:21<28:05,  2.33s/it][A
Train Diffusion:  64%|██████▍   | 1278/2000 [54:24<28:02,  2.33s/it][A
Train Diffusion:  64%|██████▍   | 1279/2000 [54:26<27:59,  2.33s/it][A
Train Diffusion:  64%|██████▍   | 1280/2000 [54:28<28:04,  2.34s/it][A

Moving average ELBO loss at 1280 iterations is: -31944.112109375. Best ELBO loss value is: -34262.734375.

C_PATH mean = tensor([[4.5135e+01, 1.7856e-01, 5.8353e-01, 4.1088e-02],
        [4.5148e+01, 1.8123e-01, 5.5198e-01, 4.1756e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3109e+01, 7.9755e-03, 2.5271e-01, 1.3282e-01],
         [5.2049e+01, 3.6180e-05, 4.5280e-01, 8.0373e-02],
         ...,
         [4.2087e+01, 1.8181e-01, 2.9465e-01, 8.7174e-03],
         [4.1428e+01, 1.7912e-01, 2.9686e-01, 9.5663e-03],
         [4.0780e+01, 1.6467e-01, 2.6803e-01, 9.7611e-03]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2353e+01, 1.0682e-01, 7.4747e-02, 4.5912e-02],
         [5.2262e+01, 3.4633e-08, 5.7361e-02, 3.1857e-02],
         ...,
         [4.1844e+01, 8.2993e-02, 8.9076e-01, 5.5693e-02],
         [4.2637e+01, 7.8459e-02, 7.1002e-01, 4.0136e-02],
         [4.3003e+01, 9.5207e-02, 2.7740


Train Diffusion:  64%|██████▍   | 1281/2000 [54:31<28:03,  2.34s/it][A
Train Diffusion:  64%|██████▍   | 1282/2000 [54:33<27:57,  2.34s/it][A
Train Diffusion:  64%|██████▍   | 1283/2000 [54:35<27:54,  2.33s/it][A
Train Diffusion:  64%|██████▍   | 1284/2000 [54:38<27:50,  2.33s/it][A
Train Diffusion:  64%|██████▍   | 1285/2000 [54:40<27:56,  2.34s/it][A
Train Diffusion:  64%|██████▍   | 1286/2000 [54:42<27:56,  2.35s/it][A
Train Diffusion:  64%|██████▍   | 1287/2000 [54:45<27:51,  2.34s/it][A
Train Diffusion:  64%|██████▍   | 1288/2000 [54:47<27:43,  2.34s/it][A
Train Diffusion:  64%|██████▍   | 1289/2000 [54:49<27:40,  2.34s/it][A
Train Diffusion:  64%|██████▍   | 1290/2000 [54:52<27:37,  2.33s/it][A

Moving average ELBO loss at 1290 iterations is: -30039.3880859375. Best ELBO loss value is: -34262.734375.

C_PATH mean = tensor([[4.5177e+01, 1.8860e-01, 5.6657e-01, 3.9162e-02],
        [4.5160e+01, 1.8307e-01, 5.6681e-01, 3.9135e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2055e+01, 3.8485e-02, 2.7203e-01, 1.3865e-01],
         [5.2016e+01, 2.2791e-05, 2.8487e-01, 6.5877e-02],
         ...,
         [4.1810e+01, 1.5122e-01, 7.0562e-01, 1.4238e-02],
         [4.1362e+01, 1.0799e-01, 6.7786e-01, 1.0496e-02],
         [4.0911e+01, 1.4906e-01, 3.8144e-01, 1.0215e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2849e+01, 4.0970e-02, 8.0199e-02, 3.0817e-02],
         [5.1880e+01, 1.4907e-08, 1.5206e-01, 3.6720e-02],
         ...,
         [4.2258e+01, 1.0523e-01, 4.8709e-01, 3.8522e-02],
         [4.2692e+01, 1.4836e-01, 3.3836e-01, 3.3533e-02],
         [4.3219e+01, 1.1913e-01, 1.201


Train Diffusion:  65%|██████▍   | 1291/2000 [54:54<27:35,  2.34s/it][A
Train Diffusion:  65%|██████▍   | 1292/2000 [54:56<27:32,  2.33s/it][A
Train Diffusion:  65%|██████▍   | 1293/2000 [54:59<27:27,  2.33s/it][A
Train Diffusion:  65%|██████▍   | 1294/2000 [55:01<27:25,  2.33s/it][A
Train Diffusion:  65%|██████▍   | 1295/2000 [55:03<27:23,  2.33s/it][A
Train Diffusion:  65%|██████▍   | 1296/2000 [55:06<27:21,  2.33s/it][A
Train Diffusion:  65%|██████▍   | 1297/2000 [55:08<27:17,  2.33s/it][A
Train Diffusion:  65%|██████▍   | 1298/2000 [55:10<27:15,  2.33s/it][A
Train Diffusion:  65%|██████▍   | 1299/2000 [55:13<27:23,  2.34s/it][A
Train Diffusion:  65%|██████▌   | 1300/2000 [55:15<27:20,  2.34s/it][A

Moving average ELBO loss at 1300 iterations is: -30191.0681640625. Best ELBO loss value is: -34262.734375.

C_PATH mean = tensor([[4.5175e+01, 1.6975e-01, 5.7313e-01, 4.0450e-02],
        [4.5202e+01, 1.7832e-01, 5.6708e-01, 4.0364e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2537e+01, 2.2003e-02, 8.7415e-02, 4.1580e-02],
         [5.2561e+01, 2.8936e-08, 1.5055e-01, 1.8663e-02],
         ...,
         [4.1528e+01, 8.2504e-02, 7.6791e-01, 4.3852e-02],
         [4.1160e+01, 1.5688e-01, 5.2570e-01, 2.3360e-02],
         [4.2189e+01, 1.4140e-01, 2.0781e-01, 1.1776e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3337e+01, 8.7025e-02, 2.7915e-01, 1.0327e-01],
         [5.2285e+01, 4.9555e-05, 2.8713e-01, 9.6226e-02],
         ...,
         [4.2502e+01, 1.5295e-01, 4.6729e-01, 1.4092e-02],
         [4.2955e+01, 1.1525e-01, 4.7487e-01, 2.3037e-02],
         [4.2046e+01, 1.5246e-01, 2.465


Train Diffusion:  65%|██████▌   | 1301/2000 [55:17<27:17,  2.34s/it][A
Train Diffusion:  65%|██████▌   | 1302/2000 [55:20<27:11,  2.34s/it][A
Train Diffusion:  65%|██████▌   | 1303/2000 [55:22<27:05,  2.33s/it][A
Train Diffusion:  65%|██████▌   | 1304/2000 [55:24<27:03,  2.33s/it][A
Train Diffusion:  65%|██████▌   | 1305/2000 [55:27<27:00,  2.33s/it][A
Train Diffusion:  65%|██████▌   | 1306/2000 [55:29<26:59,  2.33s/it][A
Train Diffusion:  65%|██████▌   | 1307/2000 [55:31<26:59,  2.34s/it][A
Train Diffusion:  65%|██████▌   | 1308/2000 [55:34<26:55,  2.34s/it][A
Train Diffusion:  65%|██████▌   | 1309/2000 [55:36<26:52,  2.33s/it][A
Train Diffusion:  66%|██████▌   | 1310/2000 [55:38<26:51,  2.34s/it][A

Moving average ELBO loss at 1310 iterations is: -31591.6693359375. Best ELBO loss value is: -34262.734375.

C_PATH mean = tensor([[4.5173e+01, 1.7833e-01, 5.7103e-01, 3.8344e-02],
        [4.5240e+01, 1.7729e-01, 5.7815e-01, 3.8206e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3027e+01, 8.2921e-02, 2.8679e-01, 7.8182e-02],
         [5.1917e+01, 4.8384e-05, 3.0712e-01, 8.3747e-02],
         ...,
         [4.1785e+01, 9.4832e-02, 4.0342e-01, 4.6297e-02],
         [4.2396e+01, 7.9508e-02, 4.0509e-01, 3.5012e-02],
         [4.1948e+01, 1.4591e-01, 2.2539e-01, 1.6024e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3616e+01, 1.9889e-02, 7.4204e-02, 5.9104e-02],
         [5.3516e+01, 2.2344e-08, 1.3556e-01, 2.0363e-02],
         ...,
         [4.2295e+01, 1.4158e-01, 8.6886e-01, 1.2342e-02],
         [4.1709e+01, 1.4928e-01, 6.4502e-01, 1.2159e-02],
         [4.2590e+01, 1.1022e-01, 2.351


Train Diffusion:  66%|██████▌   | 1311/2000 [55:41<26:48,  2.33s/it][A
Train Diffusion:  66%|██████▌   | 1312/2000 [55:43<26:50,  2.34s/it][A
Train Diffusion:  66%|██████▌   | 1313/2000 [55:45<27:00,  2.36s/it][A
Train Diffusion:  66%|██████▌   | 1314/2000 [55:48<26:54,  2.35s/it][A
Train Diffusion:  66%|██████▌   | 1315/2000 [55:50<26:47,  2.35s/it][A
Train Diffusion:  66%|██████▌   | 1316/2000 [55:52<26:40,  2.34s/it][A
Train Diffusion:  66%|██████▌   | 1317/2000 [55:55<26:34,  2.34s/it][A
Train Diffusion:  66%|██████▌   | 1318/2000 [55:57<26:29,  2.33s/it][A
Train Diffusion:  66%|██████▌   | 1319/2000 [55:59<26:28,  2.33s/it][A
Train Diffusion:  66%|██████▌   | 1320/2000 [56:02<26:26,  2.33s/it][A

Moving average ELBO loss at 1320 iterations is: -31957.230078125. Best ELBO loss value is: -34262.734375.

C_PATH mean = tensor([[4.5215e+01, 1.7738e-01, 5.7897e-01, 3.8502e-02],
        [4.5215e+01, 1.7351e-01, 5.7981e-01, 3.9467e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3498e+01, 1.0024e-01, 2.4505e-01, 1.0327e-01],
         [5.3119e+01, 7.1182e-05, 2.8313e-01, 6.4582e-02],
         ...,
         [4.2138e+01, 1.7003e-01, 5.3263e-01, 1.9060e-02],
         [4.2687e+01, 1.6488e-01, 4.7405e-01, 2.7049e-02],
         [4.3247e+01, 1.5349e-01, 2.5987e-01, 1.2400e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2721e+01, 9.4770e-03, 7.2092e-02, 4.7934e-02],
         [5.1572e+01, 1.0974e-08, 1.7063e-01, 3.4069e-02],
         ...,
         [4.2004e+01, 5.6576e-02, 7.1151e-01, 3.4827e-02],
         [4.1597e+01, 5.3696e-02, 6.5464e-01, 1.8784e-02],
         [4.1285e+01, 7.9433e-02, 2.8620


Train Diffusion:  66%|██████▌   | 1321/2000 [56:04<26:26,  2.34s/it][A
Train Diffusion:  66%|██████▌   | 1322/2000 [56:06<26:23,  2.34s/it][A
Train Diffusion:  66%|██████▌   | 1323/2000 [56:09<26:20,  2.34s/it][A
Train Diffusion:  66%|██████▌   | 1324/2000 [56:11<26:17,  2.33s/it][A
Train Diffusion:  66%|██████▋   | 1325/2000 [56:13<26:14,  2.33s/it][A
Train Diffusion:  66%|██████▋   | 1326/2000 [56:16<26:13,  2.33s/it][A
Train Diffusion:  66%|██████▋   | 1327/2000 [56:18<26:19,  2.35s/it][A
Train Diffusion:  66%|██████▋   | 1328/2000 [56:20<26:14,  2.34s/it][A
Train Diffusion:  66%|██████▋   | 1329/2000 [56:23<26:09,  2.34s/it][A
Train Diffusion:  66%|██████▋   | 1330/2000 [56:25<26:05,  2.34s/it][A

Moving average ELBO loss at 1330 iterations is: -32337.5798828125. Best ELBO loss value is: -34262.734375.

C_PATH mean = tensor([[4.5289e+01, 1.8246e-01, 5.7942e-01, 3.8018e-02],
        [4.5172e+01, 1.7503e-01, 5.6915e-01, 3.8008e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3219e+01, 6.7006e-02, 1.0438e-01, 1.3209e-01],
         [5.2013e+01, 2.5478e-08, 1.6502e-01, 7.4967e-02],
         ...,
         [4.2320e+01, 1.3825e-01, 3.8799e-01, 9.2115e-03],
         [4.1635e+01, 1.7870e-01, 4.2664e-01, 2.1832e-02],
         [4.2428e+01, 1.7301e-01, 3.0765e-01, 1.5713e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2435e+01, 2.0099e-02, 1.8431e-01, 4.5982e-02],
         [5.2285e+01, 4.8787e-05, 1.9095e-01, 3.0849e-02],
         ...,
         [4.1850e+01, 1.3143e-01, 8.6207e-01, 4.5794e-02],
         [4.2700e+01, 9.3905e-02, 6.2534e-01, 2.8708e-02],
         [4.2063e+01, 9.4543e-02, 2.633


Train Diffusion:  67%|██████▋   | 1331/2000 [56:27<26:06,  2.34s/it][A
Train Diffusion:  67%|██████▋   | 1332/2000 [56:30<26:07,  2.35s/it][A
Train Diffusion:  67%|██████▋   | 1333/2000 [56:32<26:06,  2.35s/it][A
Train Diffusion:  67%|██████▋   | 1334/2000 [56:35<26:00,  2.34s/it][A
Train Diffusion:  67%|██████▋   | 1335/2000 [56:37<25:56,  2.34s/it][A
Train Diffusion:  67%|██████▋   | 1336/2000 [56:39<25:50,  2.34s/it][A
Train Diffusion:  67%|██████▋   | 1337/2000 [56:41<25:47,  2.33s/it][A
Train Diffusion:  67%|██████▋   | 1338/2000 [56:44<25:46,  2.34s/it][A
Train Diffusion:  67%|██████▋   | 1339/2000 [56:46<25:42,  2.33s/it][A
Train Diffusion:  67%|██████▋   | 1340/2000 [56:49<25:44,  2.34s/it][A

Moving average ELBO loss at 1340 iterations is: -33761.56875. Best ELBO loss value is: -34755.31640625.

C_PATH mean = tensor([[4.5234e+01, 1.6776e-01, 5.7021e-01, 4.0339e-02],
        [4.5292e+01, 1.6966e-01, 5.6044e-01, 4.0752e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3224e+01, 1.7866e-01, 1.6813e-01, 1.3429e-01],
         [5.3114e+01, 1.0868e-04, 3.4642e-01, 7.5878e-02],
         ...,
         [4.2233e+01, 1.0899e-01, 3.9883e-01, 3.3443e-02],
         [4.2766e+01, 1.5736e-01, 4.3088e-01, 3.2796e-02],
         [4.3289e+01, 1.2282e-01, 3.1290e-01, 1.6149e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2565e+01, 1.3253e-02, 1.2276e-01, 4.5582e-02],
         [5.1288e+01, 1.5107e-08, 1.2272e-01, 3.2845e-02],
         ...,
         [4.2082e+01, 1.4965e-01, 8.4785e-01, 2.2240e-02],
         [4.1646e+01, 9.9785e-02, 6.2073e-01, 1.5514e-02],
         [4.1169e+01, 1.4964e-01, 2.5503e-


Train Diffusion:  67%|██████▋   | 1341/2000 [56:51<25:46,  2.35s/it][A
Train Diffusion:  67%|██████▋   | 1342/2000 [56:53<25:42,  2.34s/it][A
Train Diffusion:  67%|██████▋   | 1343/2000 [56:56<25:35,  2.34s/it][A
Train Diffusion:  67%|██████▋   | 1344/2000 [56:58<25:30,  2.33s/it][A
Train Diffusion:  67%|██████▋   | 1345/2000 [57:00<25:30,  2.34s/it][A
Train Diffusion:  67%|██████▋   | 1346/2000 [57:03<25:25,  2.33s/it][A
Train Diffusion:  67%|██████▋   | 1347/2000 [57:05<25:22,  2.33s/it][A
Train Diffusion:  67%|██████▋   | 1348/2000 [57:07<25:20,  2.33s/it][A
Train Diffusion:  67%|██████▋   | 1349/2000 [57:10<25:16,  2.33s/it][A
Train Diffusion:  68%|██████▊   | 1350/2000 [57:12<25:14,  2.33s/it][A

Moving average ELBO loss at 1350 iterations is: -34425.298046875. Best ELBO loss value is: -35913.1171875.

C_PATH mean = tensor([[4.5179e+01, 1.8218e-01, 5.9260e-01, 3.8378e-02],
        [4.5221e+01, 1.8234e-01, 5.9485e-01, 3.9541e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2771e+01, 3.8267e-03, 7.9879e-02, 5.1356e-02],
         [5.2812e+01, 4.2001e-09, 1.3448e-01, 3.5833e-02],
         ...,
         [4.2194e+01, 1.7793e-01, 6.8625e-01, 1.3743e-02],
         [4.1463e+01, 1.7148e-01, 5.6791e-01, 2.1042e-02],
         [4.1052e+01, 1.2590e-01, 2.6801e-01, 1.1819e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3494e+01, 7.8098e-02, 2.9628e-01, 1.0255e-01],
         [5.2552e+01, 6.9221e-05, 3.3916e-01, 6.9791e-02],
         ...,
         [4.2016e+01, 6.6430e-02, 5.8481e-01, 3.7537e-02],
         [4.2769e+01, 7.3979e-02, 4.9476e-01, 2.4834e-02],
         [4.3287e+01, 1.3983e-01, 2.359


Train Diffusion:  68%|██████▊   | 1351/2000 [57:14<25:11,  2.33s/it][A
Train Diffusion:  68%|██████▊   | 1352/2000 [57:17<25:10,  2.33s/it][A
Train Diffusion:  68%|██████▊   | 1353/2000 [57:19<25:07,  2.33s/it][A
Train Diffusion:  68%|██████▊   | 1354/2000 [57:21<25:10,  2.34s/it][A
Train Diffusion:  68%|██████▊   | 1355/2000 [57:24<25:20,  2.36s/it][A
Train Diffusion:  68%|██████▊   | 1356/2000 [57:26<25:12,  2.35s/it][A
Train Diffusion:  68%|██████▊   | 1357/2000 [57:28<25:05,  2.34s/it][A
Train Diffusion:  68%|██████▊   | 1358/2000 [57:31<25:02,  2.34s/it][A
Train Diffusion:  68%|██████▊   | 1359/2000 [57:33<24:56,  2.34s/it][A
Train Diffusion:  68%|██████▊   | 1360/2000 [57:35<24:52,  2.33s/it][A

Moving average ELBO loss at 1360 iterations is: -33230.6509765625. Best ELBO loss value is: -35913.1171875.

C_PATH mean = tensor([[4.5214e+01, 1.7203e-01, 6.0094e-01, 4.0349e-02],
        [4.5224e+01, 1.7537e-01, 5.7973e-01, 4.2402e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3034e+01, 3.4383e-02, 7.2958e-02, 3.9361e-02],
         [5.2381e+01, 7.7609e-09, 1.3649e-01, 3.1526e-02],
         ...,
         [4.2160e+01, 7.9813e-02, 9.3592e-01, 3.0563e-02],
         [4.1978e+01, 1.3032e-01, 6.8812e-01, 3.2617e-02],
         [4.2388e+01, 1.1157e-01, 2.4658e-01, 1.5727e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2467e+01, 3.7278e-03, 2.6050e-01, 9.6568e-02],
         [5.2390e+01, 1.4148e-05, 2.9458e-01, 7.2000e-02],
         ...,
         [4.2127e+01, 1.3998e-01, 3.8606e-01, 2.5791e-02],
         [4.2327e+01, 1.0513e-01, 4.0041e-01, 1.7177e-02],
         [4.1587e+01, 1.3888e-01, 2.14


Train Diffusion:  68%|██████▊   | 1361/2000 [57:38<24:50,  2.33s/it][A
Train Diffusion:  68%|██████▊   | 1362/2000 [57:40<24:47,  2.33s/it][A
Train Diffusion:  68%|██████▊   | 1363/2000 [57:42<24:46,  2.33s/it][A
Train Diffusion:  68%|██████▊   | 1364/2000 [57:45<24:43,  2.33s/it][A
Train Diffusion:  68%|██████▊   | 1365/2000 [57:47<24:41,  2.33s/it][A
Train Diffusion:  68%|██████▊   | 1366/2000 [57:49<24:39,  2.33s/it][A
Train Diffusion:  68%|██████▊   | 1367/2000 [57:52<24:37,  2.33s/it][A
Train Diffusion:  68%|██████▊   | 1368/2000 [57:54<24:40,  2.34s/it][A
Train Diffusion:  68%|██████▊   | 1369/2000 [57:56<24:39,  2.34s/it][A
Train Diffusion:  68%|██████▊   | 1370/2000 [57:59<24:32,  2.34s/it][A

Moving average ELBO loss at 1370 iterations is: -34109.037109375. Best ELBO loss value is: -35913.1171875.

C_PATH mean = tensor([[4.5248e+01, 1.8116e-01, 5.8189e-01, 4.0429e-02],
        [4.5278e+01, 1.8238e-01, 5.8785e-01, 4.0021e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2941e+01, 9.9278e-03, 2.7536e-01, 1.0546e-01],
         [5.2719e+01, 1.1734e-08, 3.1021e-01, 7.1491e-02],
         ...,
         [4.1815e+01, 1.0010e-01, 4.8471e-01, 2.2169e-02],
         [4.1405e+01, 1.4205e-01, 4.1370e-01, 1.5803e-02],
         [4.1232e+01, 1.5962e-01, 2.3465e-01, 9.3498e-03]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2335e+01, 2.2146e-01, 6.3133e-02, 4.8699e-02],
         [5.0933e+01, 1.3893e-04, 1.3615e-01, 3.6618e-02],
         ...,
         [4.2589e+01, 1.6158e-01, 8.2405e-01, 2.9769e-02],
         [4.3152e+01, 1.1482e-01, 6.4129e-01, 2.9866e-02],
         [4.3671e+01, 1.0860e-01, 2.678


Train Diffusion:  69%|██████▊   | 1371/2000 [58:01<24:31,  2.34s/it][A
Train Diffusion:  69%|██████▊   | 1372/2000 [58:03<24:28,  2.34s/it][A
Train Diffusion:  69%|██████▊   | 1373/2000 [58:06<24:24,  2.34s/it][A
Train Diffusion:  69%|██████▊   | 1374/2000 [58:08<24:19,  2.33s/it][A
Train Diffusion:  69%|██████▉   | 1375/2000 [58:10<24:15,  2.33s/it][A
Train Diffusion:  69%|██████▉   | 1376/2000 [58:13<24:10,  2.33s/it][A
Train Diffusion:  69%|██████▉   | 1377/2000 [58:15<24:10,  2.33s/it][A
Train Diffusion:  69%|██████▉   | 1378/2000 [58:17<24:10,  2.33s/it][A
Train Diffusion:  69%|██████▉   | 1379/2000 [58:20<24:06,  2.33s/it][A
Train Diffusion:  69%|██████▉   | 1380/2000 [58:22<24:02,  2.33s/it][A

Moving average ELBO loss at 1380 iterations is: -33875.8822265625. Best ELBO loss value is: -35913.1171875.

C_PATH mean = tensor([[4.5252e+01, 1.7139e-01, 5.8502e-01, 4.2221e-02],
        [4.5304e+01, 1.7331e-01, 5.6001e-01, 4.1157e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3204e+01, 5.8570e-02, 2.4624e-01, 4.3146e-02],
         [5.3301e+01, 1.3242e-08, 2.9047e-01, 1.8391e-02],
         ...,
         [4.2028e+01, 1.2231e-01, 4.0842e-01, 1.8435e-02],
         [4.2789e+01, 1.3088e-01, 4.1243e-01, 1.2793e-02],
         [4.2903e+01, 1.4825e-01, 1.5412e-01, 9.0596e-03]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2694e+01, 3.5101e-03, 5.6334e-02, 1.0524e-01],
         [5.1608e+01, 2.1992e-05, 1.2700e-01, 9.0628e-02],
         ...,
         [4.2472e+01, 1.0938e-01, 8.9290e-01, 4.8975e-02],
         [4.1835e+01, 8.5813e-02, 6.8038e-01, 3.9162e-02],
         [4.1102e+01, 8.4589e-02, 3.77


Train Diffusion:  69%|██████▉   | 1381/2000 [58:24<24:00,  2.33s/it][A
Train Diffusion:  69%|██████▉   | 1382/2000 [58:27<24:06,  2.34s/it][A
Train Diffusion:  69%|██████▉   | 1383/2000 [58:29<24:10,  2.35s/it][A
Train Diffusion:  69%|██████▉   | 1384/2000 [58:31<24:08,  2.35s/it][A
Train Diffusion:  69%|██████▉   | 1385/2000 [58:34<24:01,  2.34s/it][A
Train Diffusion:  69%|██████▉   | 1386/2000 [58:36<23:55,  2.34s/it][A
Train Diffusion:  69%|██████▉   | 1387/2000 [58:38<23:50,  2.33s/it][A
Train Diffusion:  69%|██████▉   | 1388/2000 [58:41<23:46,  2.33s/it][A
Train Diffusion:  69%|██████▉   | 1389/2000 [58:43<23:45,  2.33s/it][A
Train Diffusion:  70%|██████▉   | 1390/2000 [58:45<23:45,  2.34s/it][A

Moving average ELBO loss at 1390 iterations is: -31128.2255859375. Best ELBO loss value is: -35913.1171875.

C_PATH mean = tensor([[45.4071,  0.1465,  0.5341,  0.0472],
        [45.3631,  0.1450,  0.5343,  0.0466]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3994e+01, 1.4362e-01, 4.9165e-02, 5.5208e-02],
         [5.0709e+01, 4.9130e-05, 1.1780e-01, 2.2483e-02],
         ...,
         [4.2236e+01, 1.3264e-01, 7.0152e-01, 3.7920e-02],
         [4.3004e+01, 8.8979e-02, 6.3457e-01, 3.4080e-02],
         [4.3551e+01, 7.5091e-02, 2.3172e-01, 1.1893e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3994e+01, 9.0306e-03, 2.3805e-01, 6.8745e-02],
         [5.2422e+01, 6.8506e-09, 2.4999e-01, 8.0212e-02],
         ...,
         [4.2632e+01, 7.2386e-02, 5.5196e-01, 2.7836e-02],
         [4.2149e+01, 1.1074e-01, 4.6331e-01, 1.6518e-02],
         [4.1554e+01, 1.3844e-01, 2.3930e-01, 1.3204e-02]]],
       g


Train Diffusion:  70%|██████▉   | 1391/2000 [58:48<23:45,  2.34s/it][A
Train Diffusion:  70%|██████▉   | 1392/2000 [58:50<23:45,  2.35s/it][A
Train Diffusion:  70%|██████▉   | 1393/2000 [58:52<23:38,  2.34s/it][A
Train Diffusion:  70%|██████▉   | 1394/2000 [58:55<23:33,  2.33s/it][A
Train Diffusion:  70%|██████▉   | 1395/2000 [58:57<23:29,  2.33s/it][A
Train Diffusion:  70%|██████▉   | 1396/2000 [58:59<23:27,  2.33s/it][A
Train Diffusion:  70%|██████▉   | 1397/2000 [59:02<23:25,  2.33s/it][A
Train Diffusion:  70%|██████▉   | 1398/2000 [59:04<23:19,  2.32s/it][A
Train Diffusion:  70%|██████▉   | 1399/2000 [59:06<23:17,  2.32s/it][A
Train Diffusion:  70%|███████   | 1400/2000 [59:09<23:14,  2.32s/it][A

Moving average ELBO loss at 1400 iterations is: -28557.87265625. Best ELBO loss value is: -35913.1171875.

C_PATH mean = tensor([[4.5385e+01, 1.5699e-01, 5.5836e-01, 3.8990e-02],
        [4.5361e+01, 1.5449e-01, 5.4194e-01, 4.1704e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.7063e+01, 7.6815e-02, 1.1511e-01, 7.4290e-02],
         [5.1144e+01, 1.9662e-08, 2.3132e-01, 8.7510e-02],
         ...,
         [4.2329e+01, 1.4818e-01, 4.2744e-01, 3.4845e-02],
         [4.3217e+01, 1.0229e-01, 3.4402e-01, 3.7469e-02],
         [4.3815e+01, 1.3586e-01, 2.1591e-01, 1.7572e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [4.7422e+01, 2.4993e-02, 1.3427e-01, 4.8192e-02],
         [5.0729e+01, 3.0612e-05, 1.3910e-01, 1.6717e-02],
         ...,
         [4.2823e+01, 1.1446e-01, 7.6984e-01, 2.7820e-02],
         [4.1860e+01, 1.4268e-01, 5.8414e-01, 2.1807e-02],
         [4.1261e+01, 1.1347e-01, 2.1242


Train Diffusion:  70%|███████   | 1401/2000 [59:11<23:11,  2.32s/it][A
Train Diffusion:  70%|███████   | 1402/2000 [59:13<23:09,  2.32s/it][A
Train Diffusion:  70%|███████   | 1403/2000 [59:16<23:06,  2.32s/it][A
Train Diffusion:  70%|███████   | 1404/2000 [59:18<23:15,  2.34s/it][A
Train Diffusion:  70%|███████   | 1405/2000 [59:20<23:26,  2.36s/it][A
Train Diffusion:  70%|███████   | 1406/2000 [59:23<23:16,  2.35s/it][A
Train Diffusion:  70%|███████   | 1407/2000 [59:25<23:11,  2.35s/it][A
Train Diffusion:  70%|███████   | 1408/2000 [59:27<23:11,  2.35s/it][A
Train Diffusion:  70%|███████   | 1409/2000 [59:30<23:10,  2.35s/it][A
Train Diffusion:  70%|███████   | 1410/2000 [59:32<23:26,  2.38s/it][A

Moving average ELBO loss at 1410 iterations is: -32122.9263671875. Best ELBO loss value is: -35913.1171875.

C_PATH mean = tensor([[4.5395e+01, 1.6778e-01, 5.4652e-01, 4.1117e-02],
        [4.5314e+01, 1.6694e-01, 5.6398e-01, 4.2329e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [4.7006e+01, 2.1450e-02, 6.7559e-02, 2.5866e-02],
         [5.0922e+01, 3.7138e-05, 4.1325e-02, 3.0746e-02],
         ...,
         [4.2857e+01, 1.8549e-01, 3.8237e-01, 2.1934e-02],
         [4.3481e+01, 1.7959e-01, 3.1822e-01, 2.3648e-02],
         [4.1613e+01, 1.2674e-01, 1.2331e-01, 1.0743e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.6099e+01, 2.6892e-02, 3.0279e-01, 1.2282e-01],
         [5.1232e+01, 1.6741e-08, 4.9865e-01, 6.3306e-02],
         ...,
         [4.2139e+01, 6.1725e-02, 8.3526e-01, 4.3789e-02],
         [4.1902e+01, 7.0322e-02, 7.0741e-01, 2.1582e-02],
         [4.2138e+01, 1.6024e-01, 3.31


Train Diffusion:  71%|███████   | 1411/2000 [59:35<23:29,  2.39s/it][A
Train Diffusion:  71%|███████   | 1412/2000 [59:37<23:30,  2.40s/it][A
Train Diffusion:  71%|███████   | 1413/2000 [59:39<23:21,  2.39s/it][A
Train Diffusion:  71%|███████   | 1414/2000 [59:42<23:11,  2.37s/it][A
Train Diffusion:  71%|███████   | 1415/2000 [59:44<23:06,  2.37s/it][A
Train Diffusion:  71%|███████   | 1416/2000 [59:46<22:57,  2.36s/it][A
Train Diffusion:  71%|███████   | 1417/2000 [59:49<22:51,  2.35s/it][A
Train Diffusion:  71%|███████   | 1418/2000 [59:51<22:48,  2.35s/it][A
Train Diffusion:  71%|███████   | 1419/2000 [59:53<22:45,  2.35s/it][A
Train Diffusion:  71%|███████   | 1420/2000 [59:56<22:39,  2.34s/it][A

Moving average ELBO loss at 1420 iterations is: -33027.186328125. Best ELBO loss value is: -35913.1171875.

C_PATH mean = tensor([[4.5373e+01, 1.6965e-01, 5.7527e-01, 3.9743e-02],
        [4.5384e+01, 1.6921e-01, 5.6019e-01, 4.0799e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.6345e+01, 3.9768e-02, 1.2106e-01, 5.5926e-02],
         [5.2175e+01, 8.2078e-05, 1.1781e-01, 3.5388e-02],
         ...,
         [4.2754e+01, 1.2483e-01, 8.5571e-01, 4.5657e-02],
         [4.2363e+01, 9.2707e-02, 5.9848e-01, 3.2012e-02],
         [4.3180e+01, 1.4293e-01, 2.1834e-01, 1.1347e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [4.8651e+01, 2.2571e-02, 1.5998e-01, 7.7015e-02],
         [5.0841e+01, 8.4163e-09, 3.2789e-01, 5.0982e-02],
         ...,
         [4.2104e+01, 1.3760e-01, 4.5547e-01, 2.1123e-02],
         [4.3152e+01, 1.7812e-01, 4.8792e-01, 1.2798e-02],
         [4.2142e+01, 1.3687e-01, 2.406


Train Diffusion:  71%|███████   | 1421/2000 [59:58<22:36,  2.34s/it][A
Train Diffusion:  71%|███████   | 1422/2000 [1:00:00<22:36,  2.35s/it][A
Train Diffusion:  71%|███████   | 1423/2000 [1:00:03<22:37,  2.35s/it][A
Train Diffusion:  71%|███████   | 1424/2000 [1:00:05<22:31,  2.35s/it][A
Train Diffusion:  71%|███████▏  | 1425/2000 [1:00:08<22:29,  2.35s/it][A
Train Diffusion:  71%|███████▏  | 1426/2000 [1:00:10<22:25,  2.34s/it][A
Train Diffusion:  71%|███████▏  | 1427/2000 [1:00:12<22:18,  2.34s/it][A
Train Diffusion:  71%|███████▏  | 1428/2000 [1:00:15<22:15,  2.34s/it][A
Train Diffusion:  71%|███████▏  | 1429/2000 [1:00:17<22:20,  2.35s/it][A
Train Diffusion:  72%|███████▏  | 1430/2000 [1:00:19<22:21,  2.35s/it][A

Moving average ELBO loss at 1430 iterations is: -31449.0568359375. Best ELBO loss value is: -35913.1171875.

C_PATH mean = tensor([[4.5391e+01, 1.7099e-01, 5.7167e-01, 3.8734e-02],
        [4.5400e+01, 1.6750e-01, 5.7424e-01, 4.0494e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.6115e+01, 3.6804e-02, 2.2980e-01, 7.4386e-02],
         [5.2401e+01, 9.6025e-09, 2.6930e-01, 8.1072e-02],
         ...,
         [4.2332e+01, 1.4290e-01, 4.7979e-01, 1.5121e-02],
         [4.3210e+01, 1.5451e-01, 3.8604e-01, 1.2534e-02],
         [4.2057e+01, 1.5310e-01, 2.3331e-01, 1.0795e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [4.9949e+01, 1.0919e-02, 6.8404e-02, 5.3020e-02],
         [5.2896e+01, 6.9450e-05, 1.4330e-01, 1.7318e-02],
         ...,
         [4.2652e+01, 8.5583e-02, 7.8377e-01, 4.6577e-02],
         [4.2011e+01, 7.0234e-02, 6.3827e-01, 3.7172e-02],
         [4.2834e+01, 8.4376e-02, 2.51


Train Diffusion:  72%|███████▏  | 1431/2000 [1:00:22<22:26,  2.37s/it][A
Train Diffusion:  72%|███████▏  | 1432/2000 [1:00:24<22:25,  2.37s/it][A
Train Diffusion:  72%|███████▏  | 1433/2000 [1:00:26<22:20,  2.36s/it][A
Train Diffusion:  72%|███████▏  | 1434/2000 [1:00:29<22:30,  2.39s/it][A
Train Diffusion:  72%|███████▏  | 1435/2000 [1:00:31<22:33,  2.40s/it][A
Train Diffusion:  72%|███████▏  | 1436/2000 [1:00:34<22:35,  2.40s/it][A
Train Diffusion:  72%|███████▏  | 1437/2000 [1:00:36<22:34,  2.41s/it][A
Train Diffusion:  72%|███████▏  | 1438/2000 [1:00:39<22:41,  2.42s/it][A
Train Diffusion:  72%|███████▏  | 1439/2000 [1:00:41<22:37,  2.42s/it][A
Train Diffusion:  72%|███████▏  | 1440/2000 [1:00:43<22:28,  2.41s/it][A

Moving average ELBO loss at 1440 iterations is: -31773.7841796875. Best ELBO loss value is: -35913.1171875.

C_PATH mean = tensor([[4.5380e+01, 1.7610e-01, 5.5513e-01, 4.1282e-02],
        [4.5435e+01, 1.7803e-01, 5.7180e-01, 4.0920e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.5099e+01, 1.8449e-02, 1.6560e-01, 9.4802e-02],
         [5.2775e+01, 2.7845e-08, 1.4992e-01, 7.2190e-02],
         ...,
         [4.2703e+01, 1.3847e-01, 8.8185e-01, 4.3802e-02],
         [4.3109e+01, 1.7220e-01, 6.3739e-01, 2.3152e-02],
         [4.3293e+01, 1.3556e-01, 3.2447e-01, 1.5711e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.0167e+01, 1.3876e-02, 1.2191e-01, 6.0643e-02],
         [5.1139e+01, 7.5013e-05, 2.4683e-01, 2.4203e-02],
         ...,
         [4.2324e+01, 1.2704e-01, 4.1006e-01, 1.4629e-02],
         [4.1861e+01, 9.5222e-02, 4.4545e-01, 2.6188e-02],
         [4.0723e+01, 1.4540e-01, 1.92


Train Diffusion:  72%|███████▏  | 1441/2000 [1:00:46<22:18,  2.39s/it][A
Train Diffusion:  72%|███████▏  | 1442/2000 [1:00:48<22:11,  2.39s/it][A
Train Diffusion:  72%|███████▏  | 1443/2000 [1:00:50<22:13,  2.39s/it][A
Train Diffusion:  72%|███████▏  | 1444/2000 [1:00:53<22:07,  2.39s/it][A
Train Diffusion:  72%|███████▏  | 1445/2000 [1:00:55<21:58,  2.38s/it][A
Train Diffusion:  72%|███████▏  | 1446/2000 [1:00:57<21:45,  2.36s/it][A
Train Diffusion:  72%|███████▏  | 1447/2000 [1:01:00<21:43,  2.36s/it][A
Train Diffusion:  72%|███████▏  | 1448/2000 [1:01:02<21:35,  2.35s/it][A
Train Diffusion:  72%|███████▏  | 1449/2000 [1:01:04<21:29,  2.34s/it][A
Train Diffusion:  72%|███████▎  | 1450/2000 [1:01:07<21:23,  2.33s/it][A

Moving average ELBO loss at 1450 iterations is: -31722.867578125. Best ELBO loss value is: -35913.1171875.

C_PATH mean = tensor([[4.5489e+01, 1.7501e-01, 5.8319e-01, 3.9178e-02],
        [4.5402e+01, 1.7837e-01, 5.6827e-01, 3.9434e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.4951e+01, 1.3852e-02, 7.1534e-02, 1.2217e-01],
         [5.1939e+01, 2.9772e-08, 5.2567e-02, 8.8487e-02],
         ...,
         [4.2491e+01, 7.9383e-02, 7.3637e-01, 2.9060e-02],
         [4.1759e+01, 7.3001e-02, 6.4972e-01, 1.5351e-02],
         [4.3192e+01, 8.6299e-02, 3.4482e-01, 8.5601e-03]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.0868e+01, 3.8178e-02, 2.3308e-01, 3.1870e-02],
         [5.2675e+01, 9.6844e-05, 4.3655e-01, 1.7001e-02],
         ...,
         [4.2707e+01, 1.6121e-01, 5.3512e-01, 2.2826e-02],
         [4.3189e+01, 1.6254e-01, 4.7557e-01, 2.4482e-02],
         [4.2181e+01, 1.5813e-01, 1.666


Train Diffusion:  73%|███████▎  | 1451/2000 [1:01:09<21:18,  2.33s/it][A
Train Diffusion:  73%|███████▎  | 1452/2000 [1:01:11<21:15,  2.33s/it][A
Train Diffusion:  73%|███████▎  | 1453/2000 [1:01:14<21:11,  2.32s/it][A
Train Diffusion:  73%|███████▎  | 1454/2000 [1:01:16<21:07,  2.32s/it][A
Train Diffusion:  73%|███████▎  | 1455/2000 [1:01:18<21:03,  2.32s/it][A
Train Diffusion:  73%|███████▎  | 1456/2000 [1:01:21<21:11,  2.34s/it][A
Train Diffusion:  73%|███████▎  | 1457/2000 [1:01:23<21:08,  2.34s/it][A
Train Diffusion:  73%|███████▎  | 1458/2000 [1:01:25<21:03,  2.33s/it][A
Train Diffusion:  73%|███████▎  | 1459/2000 [1:01:28<20:59,  2.33s/it][A
Train Diffusion:  73%|███████▎  | 1460/2000 [1:01:30<20:59,  2.33s/it][A

Moving average ELBO loss at 1460 iterations is: -33411.3884765625. Best ELBO loss value is: -35913.1171875.

C_PATH mean = tensor([[4.5477e+01, 1.8016e-01, 5.7582e-01, 3.9128e-02],
        [4.5440e+01, 1.7802e-01, 5.6965e-01, 3.8698e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.4257e+01, 5.2421e-02, 8.8427e-02, 3.1289e-02],
         [5.1984e+01, 3.4403e-08, 1.3774e-01, 3.2742e-02],
         ...,
         [4.2860e+01, 6.4457e-02, 4.7546e-01, 3.2081e-02],
         [4.2150e+01, 1.5265e-01, 4.6796e-01, 1.7820e-02],
         [4.3060e+01, 1.3866e-01, 1.8632e-01, 1.0414e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.0918e+01, 1.6527e-02, 2.6435e-01, 1.2972e-01],
         [5.2402e+01, 9.9786e-05, 2.9075e-01, 5.8538e-02],
         ...,
         [4.2242e+01, 1.8772e-01, 7.9844e-01, 2.2359e-02],
         [4.2819e+01, 1.3009e-01, 5.9943e-01, 2.6888e-02],
         [4.2171e+01, 1.5594e-01, 3.39


Train Diffusion:  73%|███████▎  | 1461/2000 [1:01:32<20:55,  2.33s/it][A
Train Diffusion:  73%|███████▎  | 1462/2000 [1:01:35<20:50,  2.32s/it][A
Train Diffusion:  73%|███████▎  | 1463/2000 [1:01:37<20:49,  2.33s/it][A
Train Diffusion:  73%|███████▎  | 1464/2000 [1:01:39<20:44,  2.32s/it][A
Train Diffusion:  73%|███████▎  | 1465/2000 [1:01:42<20:40,  2.32s/it][A
Train Diffusion:  73%|███████▎  | 1466/2000 [1:01:44<20:42,  2.33s/it][A
Train Diffusion:  73%|███████▎  | 1467/2000 [1:01:46<20:39,  2.32s/it][A
Train Diffusion:  73%|███████▎  | 1468/2000 [1:01:49<20:37,  2.33s/it][A
Train Diffusion:  73%|███████▎  | 1469/2000 [1:01:51<20:34,  2.32s/it][A
Train Diffusion:  74%|███████▎  | 1470/2000 [1:01:53<20:40,  2.34s/it][A

Moving average ELBO loss at 1470 iterations is: -35724.784375. Best ELBO loss value is: -36300.0546875.

C_PATH mean = tensor([[4.5481e+01, 1.7037e-01, 5.8212e-01, 3.9989e-02],
        [4.5454e+01, 1.7712e-01, 5.7101e-01, 4.1015e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.4382e+01, 5.1638e-02, 1.5988e-01, 1.0530e-01],
         [5.3369e+01, 8.8885e-05, 1.5611e-01, 6.3104e-02],
         ...,
         [4.2497e+01, 1.3813e-01, 5.3184e-01, 3.1942e-02],
         [4.3118e+01, 1.7085e-01, 4.8774e-01, 1.7119e-02],
         [4.2244e+01, 1.3224e-01, 2.7320e-01, 1.3048e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.1394e+01, 1.1384e-02, 1.1858e-01, 4.3050e-02],
         [5.1449e+01, 2.1811e-08, 2.5289e-01, 3.1543e-02],
         ...,
         [4.2587e+01, 9.9009e-02, 7.7036e-01, 2.4268e-02],
         [4.1893e+01, 7.5047e-02, 6.5885e-01, 2.6955e-02],
         [4.3055e+01, 1.2984e-01, 2.7538e-


Train Diffusion:  74%|███████▎  | 1471/2000 [1:01:56<20:34,  2.33s/it][A
Train Diffusion:  74%|███████▎  | 1472/2000 [1:01:58<20:28,  2.33s/it][A
Train Diffusion:  74%|███████▎  | 1473/2000 [1:02:00<20:25,  2.33s/it][A
Train Diffusion:  74%|███████▎  | 1474/2000 [1:02:03<20:22,  2.32s/it][A
Train Diffusion:  74%|███████▍  | 1475/2000 [1:02:05<20:17,  2.32s/it][A
Train Diffusion:  74%|███████▍  | 1476/2000 [1:02:07<20:14,  2.32s/it][A
Train Diffusion:  74%|███████▍  | 1477/2000 [1:02:10<20:11,  2.32s/it][A
Train Diffusion:  74%|███████▍  | 1478/2000 [1:02:12<20:08,  2.32s/it][A
Train Diffusion:  74%|███████▍  | 1479/2000 [1:02:14<20:06,  2.32s/it][A
Train Diffusion:  74%|███████▍  | 1480/2000 [1:02:17<20:03,  2.31s/it][A

Moving average ELBO loss at 1480 iterations is: -35124.9515625. Best ELBO loss value is: -36300.0546875.

C_PATH mean = tensor([[4.5428e+01, 1.7628e-01, 5.7475e-01, 4.1252e-02],
        [4.5480e+01, 1.7260e-01, 5.8259e-01, 4.1810e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3842e+01, 1.7515e-02, 6.1138e-02, 6.8436e-02],
         [5.2951e+01, 1.1431e-08, 4.5985e-02, 3.8410e-02],
         ...,
         [4.2532e+01, 1.2934e-01, 8.4629e-01, 3.3978e-02],
         [4.2982e+01, 1.3915e-01, 7.8026e-01, 2.0579e-02],
         [4.2328e+01, 9.8279e-02, 3.3230e-01, 1.6725e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.1363e+01, 6.7114e-02, 2.7434e-01, 7.6347e-02],
         [5.1307e+01, 1.4219e-04, 4.7866e-01, 5.1908e-02],
         ...,
         [4.2540e+01, 1.1721e-01, 5.3989e-01, 2.6702e-02],
         [4.2054e+01, 8.9750e-02, 3.6261e-01, 3.0755e-02],
         [4.2971e+01, 1.4371e-01, 2.2699e


Train Diffusion:  74%|███████▍  | 1481/2000 [1:02:19<20:01,  2.31s/it][A
Train Diffusion:  74%|███████▍  | 1482/2000 [1:02:21<19:58,  2.31s/it][A
Train Diffusion:  74%|███████▍  | 1483/2000 [1:02:23<19:59,  2.32s/it][A
Train Diffusion:  74%|███████▍  | 1484/2000 [1:02:26<20:03,  2.33s/it][A
Train Diffusion:  74%|███████▍  | 1485/2000 [1:02:28<19:59,  2.33s/it][A
Train Diffusion:  74%|███████▍  | 1486/2000 [1:02:31<20:02,  2.34s/it][A
Train Diffusion:  74%|███████▍  | 1487/2000 [1:02:33<19:56,  2.33s/it][A
Train Diffusion:  74%|███████▍  | 1488/2000 [1:02:35<19:52,  2.33s/it][A
Train Diffusion:  74%|███████▍  | 1489/2000 [1:02:37<19:48,  2.33s/it][A
Train Diffusion:  74%|███████▍  | 1490/2000 [1:02:40<19:46,  2.33s/it][A

Moving average ELBO loss at 1490 iterations is: -33136.951171875. Best ELBO loss value is: -36300.0546875.

C_PATH mean = tensor([[4.5528e+01, 1.6714e-01, 5.6075e-01, 3.8725e-02],
        [4.5538e+01, 1.7329e-01, 5.6525e-01, 3.8638e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.4622e+01, 4.3101e-02, 1.6843e-01, 4.8529e-02],
         [5.2455e+01, 3.1902e-08, 1.6672e-01, 3.7894e-02],
         ...,
         [4.3003e+01, 1.1235e-01, 7.7687e-01, 3.1409e-02],
         [4.2340e+01, 8.4215e-02, 6.3733e-01, 3.1410e-02],
         [4.1560e+01, 7.3699e-02, 2.7265e-01, 1.2645e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2641e+01, 2.4101e-02, 1.2052e-01, 8.1858e-02],
         [5.3053e+01, 2.3168e-04, 2.0405e-01, 5.3485e-02],
         ...,
         [4.2259e+01, 1.4011e-01, 5.4349e-01, 2.9957e-02],
         [4.3025e+01, 1.5563e-01, 4.2921e-01, 1.8072e-02],
         [4.3458e+01, 1.6160e-01, 1.953


Train Diffusion:  75%|███████▍  | 1491/2000 [1:02:42<19:43,  2.33s/it][A
Train Diffusion:  75%|███████▍  | 1492/2000 [1:02:44<19:42,  2.33s/it][A
Train Diffusion:  75%|███████▍  | 1493/2000 [1:02:47<19:39,  2.33s/it][A
Train Diffusion:  75%|███████▍  | 1494/2000 [1:02:49<19:36,  2.32s/it][A
Train Diffusion:  75%|███████▍  | 1495/2000 [1:02:51<19:33,  2.32s/it][A
Train Diffusion:  75%|███████▍  | 1496/2000 [1:02:54<19:30,  2.32s/it][A
Train Diffusion:  75%|███████▍  | 1497/2000 [1:02:56<19:31,  2.33s/it][A
Train Diffusion:  75%|███████▍  | 1498/2000 [1:02:58<19:33,  2.34s/it][A
Train Diffusion:  75%|███████▍  | 1499/2000 [1:03:01<19:30,  2.34s/it][A
Train Diffusion:  75%|███████▌  | 1500/2000 [1:03:03<19:26,  2.33s/it][A

Moving average ELBO loss at 1500 iterations is: -30700.386328125. Best ELBO loss value is: -36300.0546875.

C_PATH mean = tensor([[4.5481e+01, 1.8258e-01, 5.7450e-01, 4.0823e-02],
        [4.5506e+01, 1.7393e-01, 5.7206e-01, 4.0274e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2066e+01, 3.8918e-02, 1.6930e-01, 5.7109e-02],
         [5.1341e+01, 1.3258e-04, 3.5510e-01, 2.1571e-02],
         ...,
         [4.2132e+01, 1.6674e-01, 7.8656e-01, 3.1788e-02],
         [4.2845e+01, 1.3300e-01, 6.3815e-01, 3.1058e-02],
         [4.1994e+01, 1.1285e-01, 2.6854e-01, 1.3879e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3967e+01, 1.1642e-02, 1.3412e-01, 7.7785e-02],
         [5.3179e+01, 3.5181e-08, 1.1769e-01, 7.4360e-02],
         ...,
         [4.3006e+01, 1.0387e-01, 5.3403e-01, 2.4172e-02],
         [4.2514e+01, 1.3333e-01, 4.9121e-01, 1.5557e-02],
         [4.2957e+01, 1.6006e-01, 2.641


Train Diffusion:  75%|███████▌  | 1501/2000 [1:03:05<19:23,  2.33s/it][A
Train Diffusion:  75%|███████▌  | 1502/2000 [1:03:08<19:19,  2.33s/it][A
Train Diffusion:  75%|███████▌  | 1503/2000 [1:03:10<19:14,  2.32s/it][A
Train Diffusion:  75%|███████▌  | 1504/2000 [1:03:12<19:12,  2.32s/it][A
Train Diffusion:  75%|███████▌  | 1505/2000 [1:03:15<19:10,  2.32s/it][A
Train Diffusion:  75%|███████▌  | 1506/2000 [1:03:17<19:07,  2.32s/it][A
Train Diffusion:  75%|███████▌  | 1507/2000 [1:03:19<19:04,  2.32s/it][A
Train Diffusion:  75%|███████▌  | 1508/2000 [1:03:22<19:16,  2.35s/it][A
Train Diffusion:  75%|███████▌  | 1509/2000 [1:03:24<19:17,  2.36s/it][A
Train Diffusion:  76%|███████▌  | 1510/2000 [1:03:26<19:08,  2.34s/it][A

Moving average ELBO loss at 1510 iterations is: -31467.223828125. Best ELBO loss value is: -36300.0546875.

C_PATH mean = tensor([[4.5521e+01, 1.7908e-01, 5.8359e-01, 3.9297e-02],
        [4.5483e+01, 1.7537e-01, 5.8848e-01, 4.0075e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.1974e+01, 6.3503e-02, 7.8327e-02, 1.2507e-01],
         [5.2659e+01, 1.2108e-04, 4.4368e-02, 7.1197e-02],
         ...,
         [4.2054e+01, 1.1707e-01, 3.4558e-01, 2.2056e-02],
         [4.2935e+01, 1.6485e-01, 3.5460e-01, 1.6155e-02],
         [4.2522e+01, 1.3762e-01, 2.8618e-01, 1.3209e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3626e+01, 1.0488e-02, 2.6688e-01, 4.1590e-02],
         [5.2224e+01, 2.4094e-08, 5.0313e-01, 2.8803e-02],
         ...,
         [4.3089e+01, 1.6603e-01, 9.8310e-01, 3.2825e-02],
         [4.2280e+01, 1.1085e-01, 7.6143e-01, 2.9867e-02],
         [4.3159e+01, 1.4523e-01, 2.924


Train Diffusion:  76%|███████▌  | 1511/2000 [1:03:29<19:09,  2.35s/it][A
Train Diffusion:  76%|███████▌  | 1512/2000 [1:03:31<19:10,  2.36s/it][A
Train Diffusion:  76%|███████▌  | 1513/2000 [1:03:34<19:03,  2.35s/it][A
Train Diffusion:  76%|███████▌  | 1514/2000 [1:03:36<18:57,  2.34s/it][A
Train Diffusion:  76%|███████▌  | 1515/2000 [1:03:38<18:51,  2.33s/it][A
Train Diffusion:  76%|███████▌  | 1516/2000 [1:03:41<18:47,  2.33s/it][A
Train Diffusion:  76%|███████▌  | 1517/2000 [1:03:43<18:44,  2.33s/it][A
Train Diffusion:  76%|███████▌  | 1518/2000 [1:03:45<18:44,  2.33s/it][A
Train Diffusion:  76%|███████▌  | 1519/2000 [1:03:47<18:39,  2.33s/it][A
Train Diffusion:  76%|███████▌  | 1520/2000 [1:03:50<18:36,  2.33s/it][A

Moving average ELBO loss at 1520 iterations is: -30631.5431640625. Best ELBO loss value is: -36300.0546875.

C_PATH mean = tensor([[4.5564e+01, 1.7482e-01, 5.7829e-01, 4.2458e-02],
        [4.5501e+01, 1.7533e-01, 5.6735e-01, 4.0176e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3649e+01, 8.1464e-03, 1.6450e-01, 1.0360e-01],
         [5.2307e+01, 2.3535e-08, 3.4493e-01, 6.0783e-02],
         ...,
         [4.2597e+01, 1.5069e-01, 4.8287e-01, 1.0929e-02],
         [4.2283e+01, 1.6012e-01, 4.2010e-01, 1.0842e-02],
         [4.1919e+01, 1.1404e-01, 2.5420e-01, 1.5448e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2276e+01, 1.3320e-01, 1.1577e-01, 4.1768e-02],
         [5.2524e+01, 2.5593e-04, 1.2277e-01, 3.3845e-02],
         ...,
         [4.2666e+01, 9.9353e-02, 8.5464e-01, 5.7354e-02],
         [4.3175e+01, 8.2020e-02, 7.4057e-01, 4.5797e-02],
         [4.3915e+01, 1.5118e-01, 2.99


Train Diffusion:  76%|███████▌  | 1521/2000 [1:03:52<18:33,  2.32s/it][A
Train Diffusion:  76%|███████▌  | 1522/2000 [1:03:54<18:31,  2.33s/it][A
Train Diffusion:  76%|███████▌  | 1523/2000 [1:03:57<18:27,  2.32s/it][A
Train Diffusion:  76%|███████▌  | 1524/2000 [1:03:59<18:25,  2.32s/it][A
Train Diffusion:  76%|███████▋  | 1525/2000 [1:04:01<18:28,  2.33s/it][A
Train Diffusion:  76%|███████▋  | 1526/2000 [1:04:04<18:27,  2.34s/it][A
Train Diffusion:  76%|███████▋  | 1527/2000 [1:04:06<18:23,  2.33s/it][A
Train Diffusion:  76%|███████▋  | 1528/2000 [1:04:08<18:19,  2.33s/it][A
Train Diffusion:  76%|███████▋  | 1529/2000 [1:04:11<18:16,  2.33s/it][A
Train Diffusion:  76%|███████▋  | 1530/2000 [1:04:13<18:12,  2.32s/it][A

Moving average ELBO loss at 1530 iterations is: -30373.2875. Best ELBO loss value is: -36300.0546875.

C_PATH mean = tensor([[4.5589e+01, 1.6748e-01, 5.9074e-01, 3.9071e-02],
        [4.5567e+01, 1.6830e-01, 5.7604e-01, 3.9635e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3886e+01, 5.9916e-02, 1.9646e-01, 5.7123e-02],
         [5.2309e+01, 1.4837e-04, 3.6373e-01, 3.6009e-02],
         ...,
         [4.2703e+01, 6.7375e-02, 9.4172e-01, 3.1421e-02],
         [4.3347e+01, 7.4217e-02, 7.1906e-01, 1.7451e-02],
         [4.2704e+01, 9.1255e-02, 2.4416e-01, 1.2124e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2673e+01, 1.0697e-02, 1.2522e-01, 7.2578e-02],
         [5.2585e+01, 4.3845e-08, 1.1068e-01, 4.7526e-02],
         ...,
         [4.2741e+01, 1.6450e-01, 4.6009e-01, 2.4157e-02],
         [4.2201e+01, 1.5499e-01, 4.2724e-01, 2.4594e-02],
         [4.3154e+01, 1.4893e-01, 2.3136e-01


Train Diffusion:  77%|███████▋  | 1531/2000 [1:04:15<18:11,  2.33s/it][A
Train Diffusion:  77%|███████▋  | 1532/2000 [1:04:18<18:08,  2.33s/it][A
Train Diffusion:  77%|███████▋  | 1533/2000 [1:04:20<18:05,  2.32s/it][A
Train Diffusion:  77%|███████▋  | 1534/2000 [1:04:22<18:02,  2.32s/it][A
Train Diffusion:  77%|███████▋  | 1535/2000 [1:04:25<18:00,  2.32s/it][A
Train Diffusion:  77%|███████▋  | 1536/2000 [1:04:27<17:56,  2.32s/it][A
Train Diffusion:  77%|███████▋  | 1537/2000 [1:04:29<17:53,  2.32s/it][A
Train Diffusion:  77%|███████▋  | 1538/2000 [1:04:32<17:59,  2.34s/it][A
Train Diffusion:  77%|███████▋  | 1539/2000 [1:04:34<18:01,  2.35s/it][A
Train Diffusion:  77%|███████▋  | 1540/2000 [1:04:36<17:59,  2.35s/it][A

Moving average ELBO loss at 1540 iterations is: -34114.8361328125. Best ELBO loss value is: -36760.96484375.

C_PATH mean = tensor([[4.5573e+01, 1.7230e-01, 5.8019e-01, 4.1376e-02],
        [4.5591e+01, 1.7651e-01, 5.8037e-01, 4.0176e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3422e+01, 3.8593e-02, 1.2454e-01, 3.9613e-02],
         [5.2389e+01, 2.7123e-08, 2.2865e-01, 1.6273e-02],
         ...,
         [4.2720e+01, 1.9877e-01, 8.9666e-01, 2.1287e-02],
         [4.2205e+01, 1.3783e-01, 6.9385e-01, 1.6799e-02],
         [4.1821e+01, 1.4960e-01, 2.3991e-01, 1.3358e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2358e+01, 2.3414e-02, 2.1111e-01, 9.7046e-02],
         [5.2491e+01, 2.3756e-04, 1.7956e-01, 9.2386e-02],
         ...,
         [4.2752e+01, 6.2891e-02, 4.5629e-01, 2.9523e-02],
         [4.3341e+01, 1.4262e-01, 4.2752e-01, 3.3312e-02],
         [4.3917e+01, 1.4768e-01, 2.3


Train Diffusion:  77%|███████▋  | 1541/2000 [1:04:39<17:54,  2.34s/it][A
Train Diffusion:  77%|███████▋  | 1542/2000 [1:04:41<17:49,  2.33s/it][A
Train Diffusion:  77%|███████▋  | 1543/2000 [1:04:43<17:44,  2.33s/it][A
Train Diffusion:  77%|███████▋  | 1544/2000 [1:04:46<17:43,  2.33s/it][A
Train Diffusion:  77%|███████▋  | 1545/2000 [1:04:48<17:39,  2.33s/it][A
Train Diffusion:  77%|███████▋  | 1546/2000 [1:04:50<17:34,  2.32s/it][A
Train Diffusion:  77%|███████▋  | 1547/2000 [1:04:53<17:31,  2.32s/it][A
Train Diffusion:  77%|███████▋  | 1548/2000 [1:04:55<17:26,  2.32s/it][A
Train Diffusion:  77%|███████▋  | 1549/2000 [1:04:57<17:25,  2.32s/it][A
Train Diffusion:  78%|███████▊  | 1550/2000 [1:05:00<17:22,  2.32s/it][A

Moving average ELBO loss at 1550 iterations is: -35508.671875. Best ELBO loss value is: -37413.39453125.

C_PATH mean = tensor([[4.5600e+01, 1.7859e-01, 5.9860e-01, 4.0335e-02],
        [4.5543e+01, 1.7323e-01, 5.8346e-01, 4.0394e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2581e+01, 9.3789e-03, 1.2572e-01, 4.3810e-02],
         [5.2469e+01, 2.9002e-08, 1.1819e-01, 3.6268e-02],
         ...,
         [4.3112e+01, 1.6755e-01, 4.9612e-01, 5.3860e-02],
         [4.2401e+01, 1.5404e-01, 4.9610e-01, 3.3666e-02],
         [4.2907e+01, 1.5132e-01, 1.9471e-01, 1.4127e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3472e+01, 5.8396e-02, 2.0593e-01, 9.6232e-02],
         [5.2602e+01, 1.4313e-04, 4.2314e-01, 4.3899e-02],
         ...,
         [4.2217e+01, 4.9965e-02, 8.2462e-01, 1.0231e-02],
         [4.2944e+01, 5.7600e-02, 6.3384e-01, 2.3923e-02],
         [4.2405e+01, 7.8706e-02, 3.4292e


Train Diffusion:  78%|███████▊  | 1551/2000 [1:05:02<17:23,  2.32s/it][A
Train Diffusion:  78%|███████▊  | 1552/2000 [1:05:04<17:20,  2.32s/it][A
Train Diffusion:  78%|███████▊  | 1553/2000 [1:05:07<17:22,  2.33s/it][A
Train Diffusion:  78%|███████▊  | 1554/2000 [1:05:09<17:21,  2.33s/it][A
Train Diffusion:  78%|███████▊  | 1555/2000 [1:05:11<17:16,  2.33s/it][A
Train Diffusion:  78%|███████▊  | 1556/2000 [1:05:14<17:12,  2.33s/it][A
Train Diffusion:  78%|███████▊  | 1557/2000 [1:05:16<17:09,  2.32s/it][A
Train Diffusion:  78%|███████▊  | 1558/2000 [1:05:18<17:06,  2.32s/it][A
Train Diffusion:  78%|███████▊  | 1559/2000 [1:05:21<17:10,  2.34s/it][A
Train Diffusion:  78%|███████▊  | 1560/2000 [1:05:23<17:06,  2.33s/it][A

Moving average ELBO loss at 1560 iterations is: -36556.272265625. Best ELBO loss value is: -37537.15625.

C_PATH mean = tensor([[4.5589e+01, 1.8147e-01, 5.8840e-01, 3.9966e-02],
        [4.5561e+01, 1.8020e-01, 5.8921e-01, 4.0919e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3594e+01, 7.2936e-02, 6.5527e-02, 4.8007e-02],
         [5.3285e+01, 2.5970e-08, 1.2292e-01, 3.6777e-02],
         ...,
         [4.2643e+01, 9.6844e-02, 8.2098e-01, 2.8613e-02],
         [4.3183e+01, 1.0408e-01, 6.3604e-01, 1.8753e-02],
         [4.2609e+01, 1.1638e-01, 2.7684e-01, 1.0619e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2715e+01, 1.1755e-02, 2.8504e-01, 8.9506e-02],
         [5.1747e+01, 2.4030e-04, 3.1459e-01, 6.0949e-02],
         ...,
         [4.2696e+01, 1.5492e-01, 5.3850e-01, 2.9814e-02],
         [4.2144e+01, 1.3434e-01, 4.5314e-01, 3.1052e-02],
         [4.2971e+01, 1.5247e-01, 2.2007e


Train Diffusion:  78%|███████▊  | 1561/2000 [1:05:25<17:02,  2.33s/it][A
Train Diffusion:  78%|███████▊  | 1562/2000 [1:05:28<16:58,  2.33s/it][A
Train Diffusion:  78%|███████▊  | 1563/2000 [1:05:30<16:55,  2.32s/it][A
Train Diffusion:  78%|███████▊  | 1564/2000 [1:05:32<16:53,  2.32s/it][A
Train Diffusion:  78%|███████▊  | 1565/2000 [1:05:35<16:49,  2.32s/it][A
Train Diffusion:  78%|███████▊  | 1566/2000 [1:05:37<16:47,  2.32s/it][A
Train Diffusion:  78%|███████▊  | 1567/2000 [1:05:39<16:49,  2.33s/it][A
Train Diffusion:  78%|███████▊  | 1568/2000 [1:05:42<16:47,  2.33s/it][A
Train Diffusion:  78%|███████▊  | 1569/2000 [1:05:44<16:43,  2.33s/it][A
Train Diffusion:  78%|███████▊  | 1570/2000 [1:05:46<16:42,  2.33s/it][A

Moving average ELBO loss at 1570 iterations is: -37503.56953125. Best ELBO loss value is: -38153.6328125.

C_PATH mean = tensor([[4.5608e+01, 1.7117e-01, 5.8959e-01, 4.1443e-02],
        [4.5608e+01, 1.7144e-01, 5.8585e-01, 3.9204e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2478e+01, 9.9706e-02, 5.8498e-02, 9.2541e-02],
         [5.1681e+01, 2.2576e-04, 1.3372e-01, 7.6942e-02],
         ...,
         [4.2799e+01, 1.0717e-01, 4.0346e-01, 3.4079e-02],
         [4.3322e+01, 8.7191e-02, 5.0389e-01, 3.1240e-02],
         [4.2750e+01, 8.8636e-02, 3.1828e-01, 1.1909e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3347e+01, 1.7594e-02, 2.5876e-01, 6.1878e-02],
         [5.3328e+01, 5.4638e-08, 2.8402e-01, 2.2779e-02],
         ...,
         [4.2673e+01, 1.0249e-01, 9.8678e-01, 2.1677e-02],
         [4.2234e+01, 1.2572e-01, 7.0006e-01, 1.5971e-02],
         [4.2900e+01, 1.5903e-01, 2.6887


Train Diffusion:  79%|███████▊  | 1571/2000 [1:05:49<16:38,  2.33s/it][A
Train Diffusion:  79%|███████▊  | 1572/2000 [1:05:51<16:34,  2.32s/it][A
Train Diffusion:  79%|███████▊  | 1573/2000 [1:05:53<16:31,  2.32s/it][A
Train Diffusion:  79%|███████▊  | 1574/2000 [1:05:55<16:28,  2.32s/it][A
Train Diffusion:  79%|███████▉  | 1575/2000 [1:05:58<16:26,  2.32s/it][A
Train Diffusion:  79%|███████▉  | 1576/2000 [1:06:00<16:25,  2.32s/it][A
Train Diffusion:  79%|███████▉  | 1577/2000 [1:06:02<16:22,  2.32s/it][A
Train Diffusion:  79%|███████▉  | 1578/2000 [1:06:05<16:24,  2.33s/it][A
Train Diffusion:  79%|███████▉  | 1579/2000 [1:06:07<16:20,  2.33s/it][A
Train Diffusion:  79%|███████▉  | 1580/2000 [1:06:09<16:16,  2.33s/it][A

Moving average ELBO loss at 1580 iterations is: -37088.36875. Best ELBO loss value is: -38153.6328125.

C_PATH mean = tensor([[4.5630e+01, 1.7738e-01, 5.8388e-01, 3.9308e-02],
        [4.5622e+01, 1.6888e-01, 5.8180e-01, 4.1631e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3859e+01, 1.5039e-02, 5.6018e-02, 4.4422e-02],
         [5.3591e+01, 1.6327e-04, 1.5771e-01, 3.5226e-02],
         ...,
         [4.3577e+01, 1.0095e-01, 9.9112e-01, 4.0729e-02],
         [4.4048e+01, 7.1444e-02, 7.0138e-01, 3.9129e-02],
         [4.3084e+01, 6.6842e-02, 2.7499e-01, 1.4562e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2977e+01, 5.5834e-02, 2.5565e-01, 1.1869e-01],
         [5.1747e+01, 4.5784e-08, 2.8010e-01, 6.3793e-02],
         ...,
         [4.1922e+01, 1.2026e-01, 4.0131e-01, 2.3089e-02],
         [4.1554e+01, 1.4909e-01, 4.9735e-01, 1.5909e-02],
         [4.2540e+01, 1.6613e-01, 3.1624e-0


Train Diffusion:  79%|███████▉  | 1581/2000 [1:06:12<16:19,  2.34s/it][A
Train Diffusion:  79%|███████▉  | 1582/2000 [1:06:14<16:17,  2.34s/it][A
Train Diffusion:  79%|███████▉  | 1583/2000 [1:06:16<16:12,  2.33s/it][A
Train Diffusion:  79%|███████▉  | 1584/2000 [1:06:19<16:12,  2.34s/it][A
Train Diffusion:  79%|███████▉  | 1585/2000 [1:06:21<16:07,  2.33s/it][A
Train Diffusion:  79%|███████▉  | 1586/2000 [1:06:23<16:02,  2.33s/it][A
Train Diffusion:  79%|███████▉  | 1587/2000 [1:06:26<15:59,  2.32s/it][A
Train Diffusion:  79%|███████▉  | 1588/2000 [1:06:28<15:56,  2.32s/it][A
Train Diffusion:  79%|███████▉  | 1589/2000 [1:06:30<16:01,  2.34s/it][A
Train Diffusion:  80%|███████▉  | 1590/2000 [1:06:33<15:58,  2.34s/it][A

Moving average ELBO loss at 1590 iterations is: -36629.89453125. Best ELBO loss value is: -38359.63671875.

C_PATH mean = tensor([[4.5643e+01, 1.7146e-01, 5.8828e-01, 4.0836e-02],
        [4.5597e+01, 1.7449e-01, 5.7931e-01, 4.0958e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2457e+01, 7.2507e-02, 2.4051e-01, 4.4215e-02],
         [5.2556e+01, 1.8780e-04, 2.8385e-01, 3.4940e-02],
         ...,
         [4.2274e+01, 2.0048e-01, 9.1426e-01, 2.8061e-02],
         [4.3066e+01, 1.7344e-01, 6.2586e-01, 3.1172e-02],
         [4.2404e+01, 1.6045e-01, 2.5164e-01, 1.3259e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3369e+01, 1.2619e-02, 8.3391e-02, 1.1798e-01],
         [5.2373e+01, 3.7493e-08, 1.7062e-01, 6.5138e-02],
         ...,
         [4.3260e+01, 6.6613e-02, 4.2868e-01, 3.2574e-02],
         [4.2404e+01, 9.0063e-02, 5.4950e-01, 2.0568e-02],
         [4.2943e+01, 1.1429e-01, 3.290


Train Diffusion:  80%|███████▉  | 1591/2000 [1:06:35<15:56,  2.34s/it][A
Train Diffusion:  80%|███████▉  | 1592/2000 [1:06:37<15:51,  2.33s/it][A
Train Diffusion:  80%|███████▉  | 1593/2000 [1:06:40<15:48,  2.33s/it][A
Train Diffusion:  80%|███████▉  | 1594/2000 [1:06:42<15:43,  2.32s/it][A
Train Diffusion:  80%|███████▉  | 1595/2000 [1:06:44<15:51,  2.35s/it][A
Train Diffusion:  80%|███████▉  | 1596/2000 [1:06:47<15:47,  2.35s/it][A
Train Diffusion:  80%|███████▉  | 1597/2000 [1:06:49<15:41,  2.34s/it][A
Train Diffusion:  80%|███████▉  | 1598/2000 [1:06:51<15:38,  2.33s/it][A
Train Diffusion:  80%|███████▉  | 1599/2000 [1:06:54<15:34,  2.33s/it][A
Train Diffusion:  80%|████████  | 1600/2000 [1:06:56<15:30,  2.33s/it][A

Moving average ELBO loss at 1600 iterations is: -36237.91328125. Best ELBO loss value is: -38359.63671875.

C_PATH mean = tensor([[4.5617e+01, 1.7026e-01, 5.6059e-01, 4.1690e-02],
        [4.5722e+01, 1.7257e-01, 5.7511e-01, 3.9880e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2562e+01, 4.3437e-02, 2.6489e-01, 8.1699e-02],
         [5.2532e+01, 7.1707e-08, 4.4794e-01, 7.5235e-02],
         ...,
         [4.2687e+01, 1.1534e-01, 3.8889e-01, 2.7589e-02],
         [4.3394e+01, 1.4176e-01, 4.9678e-01, 1.8124e-02],
         [4.3748e+01, 1.6612e-01, 2.8768e-01, 9.4159e-03]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3327e+01, 9.5050e-02, 6.1263e-02, 5.6692e-02],
         [5.2236e+01, 2.5337e-04, 5.3371e-02, 2.3303e-02],
         ...,
         [4.2953e+01, 1.1696e-01, 7.8298e-01, 2.7801e-02],
         [4.2315e+01, 8.3783e-02, 4.7662e-01, 2.7842e-02],
         [4.1991e+01, 7.9479e-02, 1.942


Train Diffusion:  80%|████████  | 1601/2000 [1:06:58<15:27,  2.32s/it][A
Train Diffusion:  80%|████████  | 1602/2000 [1:07:01<15:24,  2.32s/it][A
Train Diffusion:  80%|████████  | 1603/2000 [1:07:03<15:21,  2.32s/it][A
Train Diffusion:  80%|████████  | 1604/2000 [1:07:05<15:18,  2.32s/it][A
Train Diffusion:  80%|████████  | 1605/2000 [1:07:08<15:16,  2.32s/it][A
Train Diffusion:  80%|████████  | 1606/2000 [1:07:10<15:14,  2.32s/it][A
Train Diffusion:  80%|████████  | 1607/2000 [1:07:12<15:13,  2.32s/it][A
Train Diffusion:  80%|████████  | 1608/2000 [1:07:15<15:10,  2.32s/it][A
Train Diffusion:  80%|████████  | 1609/2000 [1:07:17<15:16,  2.34s/it][A
Train Diffusion:  80%|████████  | 1610/2000 [1:07:19<15:13,  2.34s/it][A

Moving average ELBO loss at 1610 iterations is: -36626.7375. Best ELBO loss value is: -38359.63671875.

C_PATH mean = tensor([[4.5584e+01, 1.6863e-01, 5.9595e-01, 3.9008e-02],
        [4.5679e+01, 1.7456e-01, 6.0172e-01, 4.0454e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3676e+01, 9.6466e-03, 7.5479e-02, 4.6529e-02],
         [5.2712e+01, 3.1239e-08, 1.1058e-01, 2.1891e-02],
         ...,
         [4.3205e+01, 1.4244e-01, 5.1180e-01, 3.3869e-02],
         [4.2387e+01, 1.8301e-01, 5.1864e-01, 2.3201e-02],
         [4.3041e+01, 1.4263e-01, 2.1008e-01, 1.3563e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2663e+01, 5.3650e-02, 2.9858e-01, 1.0762e-01],
         [5.3044e+01, 1.7877e-04, 3.5581e-01, 7.0331e-02],
         ...,
         [4.2299e+01, 1.0026e-01, 8.8010e-01, 2.0947e-02],
         [4.3143e+01, 8.2472e-02, 6.6108e-01, 2.2986e-02],
         [4.2474e+01, 1.4263e-01, 3.7197e-0


Train Diffusion:  81%|████████  | 1611/2000 [1:07:22<15:08,  2.34s/it][A
Train Diffusion:  81%|████████  | 1612/2000 [1:07:24<15:04,  2.33s/it][A
Train Diffusion:  81%|████████  | 1613/2000 [1:07:26<15:01,  2.33s/it][A
Train Diffusion:  81%|████████  | 1614/2000 [1:07:29<14:57,  2.33s/it][A
Train Diffusion:  81%|████████  | 1615/2000 [1:07:31<15:04,  2.35s/it][A
Train Diffusion:  81%|████████  | 1616/2000 [1:07:33<14:58,  2.34s/it][A
Train Diffusion:  81%|████████  | 1617/2000 [1:07:36<14:53,  2.33s/it][A
Train Diffusion:  81%|████████  | 1618/2000 [1:07:38<14:49,  2.33s/it][A
Train Diffusion:  81%|████████  | 1619/2000 [1:07:40<14:46,  2.33s/it][A
Train Diffusion:  81%|████████  | 1620/2000 [1:07:43<14:42,  2.32s/it][A

Moving average ELBO loss at 1620 iterations is: -36518.351171875. Best ELBO loss value is: -38359.63671875.

C_PATH mean = tensor([[4.5691e+01, 1.7324e-01, 5.6328e-01, 4.1769e-02],
        [4.5680e+01, 1.7267e-01, 5.7770e-01, 4.1630e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3346e+01, 8.4108e-02, 2.3922e-01, 1.1124e-01],
         [5.2954e+01, 4.2787e-08, 1.9207e-01, 8.3538e-02],
         ...,
         [4.3101e+01, 1.5431e-01, 9.8493e-01, 2.6931e-02],
         [4.3765e+01, 1.0744e-01, 7.4581e-01, 1.7091e-02],
         [4.2888e+01, 1.5941e-01, 3.5048e-01, 1.4474e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2496e+01, 1.7352e-02, 9.9498e-02, 3.9424e-02],
         [5.1221e+01, 3.1331e-04, 1.0851e-01, 1.9544e-02],
         ...,
         [4.2542e+01, 1.2155e-01, 3.7082e-01, 2.4675e-02],
         [4.2078e+01, 1.7669e-01, 4.0461e-01, 3.0031e-02],
         [4.2844e+01, 1.4144e-01, 1.84


Train Diffusion:  81%|████████  | 1621/2000 [1:07:45<14:42,  2.33s/it][A
Train Diffusion:  81%|████████  | 1622/2000 [1:07:47<14:37,  2.32s/it][A
Train Diffusion:  81%|████████  | 1623/2000 [1:07:50<14:50,  2.36s/it][A
Train Diffusion:  81%|████████  | 1624/2000 [1:07:52<14:43,  2.35s/it][A
Train Diffusion:  81%|████████▏ | 1625/2000 [1:07:54<14:38,  2.34s/it][A
Train Diffusion:  81%|████████▏ | 1626/2000 [1:07:57<14:32,  2.33s/it][A
Train Diffusion:  81%|████████▏ | 1627/2000 [1:07:59<14:27,  2.32s/it][A
Train Diffusion:  81%|████████▏ | 1628/2000 [1:08:01<14:25,  2.33s/it][A
Train Diffusion:  81%|████████▏ | 1629/2000 [1:08:04<14:24,  2.33s/it][A
Train Diffusion:  82%|████████▏ | 1630/2000 [1:08:06<14:21,  2.33s/it][A

Moving average ELBO loss at 1630 iterations is: -36466.54140625. Best ELBO loss value is: -38359.63671875.

C_PATH mean = tensor([[4.5662e+01, 1.7014e-01, 5.8852e-01, 3.9763e-02],
        [4.5702e+01, 1.7115e-01, 5.8740e-01, 3.9175e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3847e+01, 5.3075e-02, 1.5055e-01, 8.4519e-02],
         [5.3666e+01, 2.6753e-04, 3.5805e-01, 8.1730e-02],
         ...,
         [4.2686e+01, 1.9781e-01, 5.1955e-01, 1.4939e-02],
         [4.2213e+01, 1.2559e-01, 5.3712e-01, 2.2278e-02],
         [4.2802e+01, 8.8378e-02, 2.7058e-01, 9.2974e-03]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2935e+01, 1.2919e-02, 1.6360e-01, 5.0135e-02],
         [5.1775e+01, 2.7078e-08, 1.2807e-01, 1.8006e-02],
         ...,
         [4.2925e+01, 5.3899e-02, 9.1477e-01, 3.0571e-02],
         [4.3486e+01, 1.3937e-01, 6.3583e-01, 1.8393e-02],
         [4.2753e+01, 2.2041e-01, 2.479


Train Diffusion:  82%|████████▏ | 1631/2000 [1:08:08<14:18,  2.33s/it][A
Train Diffusion:  82%|████████▏ | 1632/2000 [1:08:11<14:15,  2.33s/it][A
Train Diffusion:  82%|████████▏ | 1633/2000 [1:08:13<14:13,  2.32s/it][A
Train Diffusion:  82%|████████▏ | 1634/2000 [1:08:15<14:09,  2.32s/it][A
Train Diffusion:  82%|████████▏ | 1635/2000 [1:08:18<14:06,  2.32s/it][A
Train Diffusion:  82%|████████▏ | 1636/2000 [1:08:20<14:06,  2.33s/it][A
Train Diffusion:  82%|████████▏ | 1637/2000 [1:08:22<14:07,  2.33s/it][A
Train Diffusion:  82%|████████▏ | 1638/2000 [1:08:25<14:03,  2.33s/it][A
Train Diffusion:  82%|████████▏ | 1639/2000 [1:08:27<13:59,  2.33s/it][A
Train Diffusion:  82%|████████▏ | 1640/2000 [1:08:29<13:56,  2.32s/it][A

Moving average ELBO loss at 1640 iterations is: -35500.554296875. Best ELBO loss value is: -38359.63671875.

C_PATH mean = tensor([[4.5669e+01, 1.7315e-01, 5.7744e-01, 3.7777e-02],
        [4.5721e+01, 1.7080e-01, 5.9112e-01, 4.2202e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3543e+01, 7.2513e-03, 2.5655e-01, 5.3824e-02],
         [5.3239e+01, 8.9390e-05, 4.9092e-01, 3.8719e-02],
         ...,
         [4.3130e+01, 1.7520e-01, 9.6494e-01, 1.8112e-02],
         [4.2501e+01, 1.2696e-01, 7.0670e-01, 1.3133e-02],
         [4.2110e+01, 1.3991e-01, 2.2714e-01, 1.2148e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2770e+01, 8.9874e-02, 8.3350e-02, 6.7047e-02],
         [5.1466e+01, 2.3088e-08, 5.0672e-02, 4.4968e-02],
         ...,
         [4.2560e+01, 8.8077e-02, 4.6874e-01, 3.6147e-02],
         [4.3289e+01, 1.3578e-01, 4.5469e-01, 3.5417e-02],
         [4.3796e+01, 1.2990e-01, 2.24


Train Diffusion:  82%|████████▏ | 1641/2000 [1:08:32<13:59,  2.34s/it][A
Train Diffusion:  82%|████████▏ | 1642/2000 [1:08:34<13:55,  2.33s/it][A
Train Diffusion:  82%|████████▏ | 1643/2000 [1:08:36<13:53,  2.33s/it][A
Train Diffusion:  82%|████████▏ | 1644/2000 [1:08:39<13:51,  2.33s/it][A
Train Diffusion:  82%|████████▏ | 1645/2000 [1:08:41<13:46,  2.33s/it][A
Train Diffusion:  82%|████████▏ | 1646/2000 [1:08:43<13:42,  2.32s/it][A
Train Diffusion:  82%|████████▏ | 1647/2000 [1:08:46<13:43,  2.33s/it][A
Train Diffusion:  82%|████████▏ | 1648/2000 [1:08:48<13:39,  2.33s/it][A
Train Diffusion:  82%|████████▏ | 1649/2000 [1:08:50<13:35,  2.32s/it][A
Train Diffusion:  82%|████████▎ | 1650/2000 [1:08:53<13:35,  2.33s/it][A

Moving average ELBO loss at 1650 iterations is: -35561.75546875. Best ELBO loss value is: -38359.63671875.

C_PATH mean = tensor([[4.5640e+01, 1.7408e-01, 5.7849e-01, 3.9772e-02],
        [4.5742e+01, 1.7665e-01, 5.8034e-01, 4.0408e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3934e+01, 5.8443e-03, 2.6138e-01, 3.6780e-02],
         [5.2884e+01, 1.1758e-04, 2.9203e-01, 1.8942e-02],
         ...,
         [4.2860e+01, 1.4272e-01, 4.2432e-01, 3.7074e-02],
         [4.2278e+01, 1.6043e-01, 4.2666e-01, 3.3824e-02],
         [4.2019e+01, 1.7058e-01, 1.7966e-01, 1.3673e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3110e+01, 1.0147e-01, 5.9055e-02, 1.0590e-01],
         [5.3183e+01, 2.7182e-08, 1.5509e-01, 8.0159e-02],
         ...,
         [4.2765e+01, 1.1194e-01, 9.4753e-01, 1.8893e-02],
         [4.3445e+01, 8.8785e-02, 7.1239e-01, 1.2925e-02],
         [4.3829e+01, 7.6788e-02, 3.149


Train Diffusion:  83%|████████▎ | 1651/2000 [1:08:55<13:37,  2.34s/it][A
Train Diffusion:  83%|████████▎ | 1652/2000 [1:08:57<13:33,  2.34s/it][A
Train Diffusion:  83%|████████▎ | 1653/2000 [1:09:00<13:30,  2.34s/it][A
Train Diffusion:  83%|████████▎ | 1654/2000 [1:09:02<13:27,  2.33s/it][A
Train Diffusion:  83%|████████▎ | 1655/2000 [1:09:04<13:22,  2.33s/it][A
Train Diffusion:  83%|████████▎ | 1656/2000 [1:09:07<13:19,  2.33s/it][A
Train Diffusion:  83%|████████▎ | 1657/2000 [1:09:09<13:17,  2.33s/it][A
Train Diffusion:  83%|████████▎ | 1658/2000 [1:09:11<13:13,  2.32s/it][A
Train Diffusion:  83%|████████▎ | 1659/2000 [1:09:14<13:12,  2.32s/it][A
Train Diffusion:  83%|████████▎ | 1660/2000 [1:09:16<13:10,  2.33s/it][A

Moving average ELBO loss at 1660 iterations is: -36104.659765625. Best ELBO loss value is: -38359.63671875.

C_PATH mean = tensor([[4.5668e+01, 1.7472e-01, 6.0146e-01, 4.1746e-02],
        [4.5700e+01, 1.7386e-01, 5.8244e-01, 4.2416e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2320e+01, 7.8698e-03, 3.6024e-01, 5.3474e-02],
         [5.2665e+01, 1.3706e-04, 3.6494e-01, 3.9783e-02],
         ...,
         [4.2367e+01, 9.6598e-02, 6.3832e-01, 4.9232e-02],
         [4.3044e+01, 1.1614e-01, 5.0551e-01, 4.3518e-02],
         [4.2242e+01, 1.8600e-01, 2.1085e-01, 1.6806e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3147e+01, 4.8616e-02, 6.0471e-02, 7.2383e-02],
         [5.2547e+01, 3.4681e-08, 1.2516e-01, 4.5565e-02],
         ...,
         [4.3291e+01, 1.6395e-01, 4.9411e-01, 1.4281e-02],
         [4.2653e+01, 1.4024e-01, 4.2144e-01, 1.2289e-02],
         [4.2830e+01, 1.0650e-01, 2.43


Train Diffusion:  83%|████████▎ | 1661/2000 [1:09:18<13:08,  2.33s/it][A
Train Diffusion:  83%|████████▎ | 1662/2000 [1:09:21<13:05,  2.32s/it][A
Train Diffusion:  83%|████████▎ | 1663/2000 [1:09:23<13:03,  2.32s/it][A
Train Diffusion:  83%|████████▎ | 1664/2000 [1:09:25<13:03,  2.33s/it][A
Train Diffusion:  83%|████████▎ | 1665/2000 [1:09:28<13:03,  2.34s/it][A
Train Diffusion:  83%|████████▎ | 1666/2000 [1:09:30<13:00,  2.34s/it][A
Train Diffusion:  83%|████████▎ | 1667/2000 [1:09:32<12:58,  2.34s/it][A
Train Diffusion:  83%|████████▎ | 1668/2000 [1:09:35<12:54,  2.33s/it][A
Train Diffusion:  83%|████████▎ | 1669/2000 [1:09:37<12:50,  2.33s/it][A
Train Diffusion:  84%|████████▎ | 1670/2000 [1:09:39<12:47,  2.33s/it][A

Moving average ELBO loss at 1670 iterations is: -35997.73359375. Best ELBO loss value is: -38359.63671875.

C_PATH mean = tensor([[4.5756e+01, 1.7504e-01, 5.9533e-01, 3.9686e-02],
        [4.5735e+01, 1.7490e-01, 5.7230e-01, 3.9526e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2944e+01, 5.1845e-03, 1.5839e-01, 5.5232e-02],
         [5.2602e+01, 1.9323e-08, 1.3644e-01, 1.9920e-02],
         ...,
         [4.2432e+01, 9.2646e-02, 9.0603e-01, 2.8063e-02],
         [4.1957e+01, 1.3495e-01, 6.9598e-01, 3.0476e-02],
         [4.1759e+01, 1.9185e-01, 2.6710e-01, 1.4803e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3690e+01, 1.1335e-01, 1.5159e-01, 8.5947e-02],
         [5.2208e+01, 2.8275e-04, 3.6499e-01, 7.6140e-02],
         ...,
         [4.3385e+01, 1.6706e-01, 4.4502e-01, 2.6841e-02],
         [4.3974e+01, 1.2686e-01, 4.6945e-01, 1.5956e-02],
         [4.4236e+01, 9.2317e-02, 3.188


Train Diffusion:  84%|████████▎ | 1671/2000 [1:09:42<12:45,  2.33s/it][A
Train Diffusion:  84%|████████▎ | 1672/2000 [1:09:44<12:42,  2.32s/it][A
Train Diffusion:  84%|████████▎ | 1673/2000 [1:09:46<12:40,  2.32s/it][A
Train Diffusion:  84%|████████▎ | 1674/2000 [1:09:49<12:39,  2.33s/it][A
Train Diffusion:  84%|████████▍ | 1675/2000 [1:09:51<12:35,  2.33s/it][A
Train Diffusion:  84%|████████▍ | 1676/2000 [1:09:53<12:33,  2.33s/it][A
Train Diffusion:  84%|████████▍ | 1677/2000 [1:09:55<12:31,  2.33s/it][A
Train Diffusion:  84%|████████▍ | 1678/2000 [1:09:58<12:31,  2.33s/it][A
Train Diffusion:  84%|████████▍ | 1679/2000 [1:10:00<12:30,  2.34s/it][A
Train Diffusion:  84%|████████▍ | 1680/2000 [1:10:03<12:26,  2.33s/it][A

Moving average ELBO loss at 1680 iterations is: -35916.02109375. Best ELBO loss value is: -38359.63671875.

C_PATH mean = tensor([[4.5782e+01, 1.7502e-01, 5.7976e-01, 3.8633e-02],
        [4.5758e+01, 1.7143e-01, 5.9474e-01, 4.1107e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3304e+01, 5.5895e-02, 8.5518e-02, 9.5977e-02],
         [5.2323e+01, 3.9055e-08, 1.5253e-01, 9.1020e-02],
         ...,
         [4.3259e+01, 4.1902e-02, 4.9018e-01, 1.9907e-02],
         [4.2663e+01, 1.1037e-01, 5.1532e-01, 2.4862e-02],
         [4.2349e+01, 1.2107e-01, 2.7873e-01, 1.5419e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2588e+01, 2.5845e-02, 2.4574e-01, 4.3139e-02],
         [5.2766e+01, 3.1868e-04, 2.8175e-01, 1.6344e-02],
         ...,
         [4.2534e+01, 1.7326e-01, 8.8116e-01, 3.9283e-02],
         [4.3201e+01, 1.1451e-01, 6.4308e-01, 2.3482e-02],
         [4.3816e+01, 1.3289e-01, 2.507


Train Diffusion:  84%|████████▍ | 1681/2000 [1:10:05<12:22,  2.33s/it][A
Train Diffusion:  84%|████████▍ | 1682/2000 [1:10:07<12:19,  2.33s/it][A
Train Diffusion:  84%|████████▍ | 1683/2000 [1:10:09<12:17,  2.33s/it][A
Train Diffusion:  84%|████████▍ | 1684/2000 [1:10:12<12:14,  2.32s/it][A
Train Diffusion:  84%|████████▍ | 1685/2000 [1:10:14<12:11,  2.32s/it][A
Train Diffusion:  84%|████████▍ | 1686/2000 [1:10:16<12:09,  2.32s/it][A
Train Diffusion:  84%|████████▍ | 1687/2000 [1:10:19<12:07,  2.32s/it][A
Train Diffusion:  84%|████████▍ | 1688/2000 [1:10:21<12:04,  2.32s/it][A
Train Diffusion:  84%|████████▍ | 1689/2000 [1:10:23<12:03,  2.33s/it][A
Train Diffusion:  84%|████████▍ | 1690/2000 [1:10:26<12:00,  2.32s/it][A

Moving average ELBO loss at 1690 iterations is: -34448.237890625. Best ELBO loss value is: -38359.63671875.

C_PATH mean = tensor([[4.5783e+01, 1.7289e-01, 5.7214e-01, 3.9548e-02],
        [4.5800e+01, 1.7262e-01, 5.9236e-01, 3.8850e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3698e+01, 6.2204e-03, 9.3300e-02, 4.3400e-02],
         [5.2490e+01, 1.8570e-04, 8.7044e-02, 1.7003e-02],
         ...,
         [4.2902e+01, 1.4465e-01, 1.0026e+00, 2.5324e-02],
         [4.2406e+01, 9.8753e-02, 7.0018e-01, 2.5657e-02],
         [4.1957e+01, 1.1661e-01, 2.8570e-01, 1.6609e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3034e+01, 1.1731e-01, 1.8341e-01, 1.1565e-01],
         [5.2930e+01, 8.2761e-08, 3.9210e-01, 9.1383e-02],
         ...,
         [4.2955e+01, 6.6132e-02, 3.7528e-01, 3.0055e-02],
         [4.3511e+01, 1.1771e-01, 5.0633e-01, 2.2521e-02],
         [4.3980e+01, 1.0804e-01, 3.28


Train Diffusion:  85%|████████▍ | 1691/2000 [1:10:28<11:57,  2.32s/it][A
Train Diffusion:  85%|████████▍ | 1692/2000 [1:10:30<12:03,  2.35s/it][A
Train Diffusion:  85%|████████▍ | 1693/2000 [1:10:33<12:04,  2.36s/it][A
Train Diffusion:  85%|████████▍ | 1694/2000 [1:10:35<11:59,  2.35s/it][A
Train Diffusion:  85%|████████▍ | 1695/2000 [1:10:38<11:54,  2.34s/it][A
Train Diffusion:  85%|████████▍ | 1696/2000 [1:10:40<11:50,  2.34s/it][A
Train Diffusion:  85%|████████▍ | 1697/2000 [1:10:42<11:46,  2.33s/it][A
Train Diffusion:  85%|████████▍ | 1698/2000 [1:10:44<11:43,  2.33s/it][A
Train Diffusion:  85%|████████▍ | 1699/2000 [1:10:47<11:41,  2.33s/it][A
Train Diffusion:  85%|████████▌ | 1700/2000 [1:10:49<11:38,  2.33s/it][A

Moving average ELBO loss at 1700 iterations is: -35109.89765625. Best ELBO loss value is: -38359.63671875.

C_PATH mean = tensor([[4.5819e+01, 1.7372e-01, 5.5576e-01, 4.0737e-02],
        [4.5792e+01, 1.7531e-01, 5.7991e-01, 4.1425e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3332e+01, 1.1033e-01, 2.8629e-01, 1.1811e-01],
         [5.2399e+01, 8.2901e-08, 5.1268e-01, 7.9149e-02],
         ...,
         [4.3114e+01, 1.2683e-01, 9.3245e-01, 3.6786e-02],
         [4.3664e+01, 8.5339e-02, 7.0867e-01, 2.3198e-02],
         [4.3877e+01, 1.4135e-01, 3.9419e-01, 1.4933e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2577e+01, 4.9069e-03, 7.7033e-02, 4.0243e-02],
         [5.2804e+01, 1.7009e-04, 4.8506e-02, 1.9705e-02],
         ...,
         [4.2850e+01, 1.2101e-01, 4.9236e-01, 2.2364e-02],
         [4.2290e+01, 1.6764e-01, 4.7604e-01, 2.9150e-02],
         [4.1672e+01, 1.3691e-01, 1.959


Train Diffusion:  85%|████████▌ | 1701/2000 [1:10:51<11:36,  2.33s/it][A
Train Diffusion:  85%|████████▌ | 1702/2000 [1:10:54<11:32,  2.32s/it][A
Train Diffusion:  85%|████████▌ | 1703/2000 [1:10:56<11:30,  2.32s/it][A
Train Diffusion:  85%|████████▌ | 1704/2000 [1:10:58<11:28,  2.33s/it][A
Train Diffusion:  85%|████████▌ | 1705/2000 [1:11:01<11:26,  2.33s/it][A
Train Diffusion:  85%|████████▌ | 1706/2000 [1:11:03<11:27,  2.34s/it][A
Train Diffusion:  85%|████████▌ | 1707/2000 [1:11:05<11:25,  2.34s/it][A
Train Diffusion:  85%|████████▌ | 1708/2000 [1:11:08<11:21,  2.33s/it][A
Train Diffusion:  85%|████████▌ | 1709/2000 [1:11:10<11:18,  2.33s/it][A
Train Diffusion:  86%|████████▌ | 1710/2000 [1:11:12<11:15,  2.33s/it][A

Moving average ELBO loss at 1710 iterations is: -37508.2484375. Best ELBO loss value is: -38359.63671875.

C_PATH mean = tensor([[4.5861e+01, 1.7316e-01, 5.6998e-01, 3.9719e-02],
        [4.5765e+01, 1.7465e-01, 5.8491e-01, 4.0045e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2368e+01, 7.0671e-03, 2.4918e-01, 4.3068e-02],
         [5.2662e+01, 2.8846e-08, 2.9837e-01, 3.2118e-02],
         ...,
         [4.1941e+01, 1.3865e-01, 9.2718e-01, 2.0051e-02],
         [4.1686e+01, 1.6057e-01, 7.0596e-01, 1.4751e-02],
         [4.1574e+01, 1.5931e-01, 2.7872e-01, 1.2478e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3007e+01, 1.7928e-01, 7.7721e-02, 1.2026e-01],
         [5.2441e+01, 3.9643e-04, 1.7904e-01, 6.3085e-02],
         ...,
         [4.3998e+01, 1.2289e-01, 4.2827e-01, 3.0925e-02],
         [4.4246e+01, 9.1928e-02, 4.6528e-01, 3.0331e-02],
         [4.4337e+01, 1.0030e-01, 3.3330


Train Diffusion:  86%|████████▌ | 1711/2000 [1:11:15<11:12,  2.33s/it][A
Train Diffusion:  86%|████████▌ | 1712/2000 [1:11:17<11:09,  2.32s/it][A
Train Diffusion:  86%|████████▌ | 1713/2000 [1:11:19<11:06,  2.32s/it][A
Train Diffusion:  86%|████████▌ | 1714/2000 [1:11:22<11:05,  2.33s/it][A
Train Diffusion:  86%|████████▌ | 1715/2000 [1:11:24<11:03,  2.33s/it][A
Train Diffusion:  86%|████████▌ | 1716/2000 [1:11:26<11:00,  2.33s/it][A
Train Diffusion:  86%|████████▌ | 1717/2000 [1:11:29<10:57,  2.32s/it][A
Train Diffusion:  86%|████████▌ | 1718/2000 [1:11:31<10:58,  2.33s/it][A
Train Diffusion:  86%|████████▌ | 1719/2000 [1:11:33<10:55,  2.33s/it][A
Train Diffusion:  86%|████████▌ | 1720/2000 [1:11:36<10:56,  2.34s/it][A

Moving average ELBO loss at 1720 iterations is: -36075.6921875. Best ELBO loss value is: -38359.63671875.

C_PATH mean = tensor([[4.5792e+01, 1.7071e-01, 5.7871e-01, 3.8260e-02],
        [4.5827e+01, 1.6888e-01, 5.8773e-01, 3.9508e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3725e+01, 2.4926e-02, 5.8228e-02, 5.9575e-02],
         [5.2810e+01, 3.5359e-04, 1.6314e-01, 2.2308e-02],
         ...,
         [4.2520e+01, 6.3303e-02, 4.2326e-01, 3.0991e-02],
         [4.3176e+01, 7.0752e-02, 5.4344e-01, 2.8615e-02],
         [4.3785e+01, 7.5572e-02, 2.5137e-01, 1.3889e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3192e+01, 6.4366e-02, 2.5646e-01, 1.0197e-01],
         [5.2822e+01, 4.1752e-08, 2.7395e-01, 7.0463e-02],
         ...,
         [4.3415e+01, 1.7468e-01, 9.8505e-01, 2.4674e-02],
         [4.2895e+01, 1.7348e-01, 6.9587e-01, 1.6343e-02],
         [4.2520e+01, 1.7003e-01, 4.0832


Train Diffusion:  86%|████████▌ | 1721/2000 [1:11:38<10:54,  2.34s/it][A
Train Diffusion:  86%|████████▌ | 1722/2000 [1:11:40<10:50,  2.34s/it][A
Train Diffusion:  86%|████████▌ | 1723/2000 [1:11:43<10:47,  2.34s/it][A
Train Diffusion:  86%|████████▌ | 1724/2000 [1:11:45<10:43,  2.33s/it][A
Train Diffusion:  86%|████████▋ | 1725/2000 [1:11:47<10:41,  2.33s/it][A
Train Diffusion:  86%|████████▋ | 1726/2000 [1:11:50<10:38,  2.33s/it][A
Train Diffusion:  86%|████████▋ | 1727/2000 [1:11:52<10:35,  2.33s/it][A
Train Diffusion:  86%|████████▋ | 1728/2000 [1:11:54<10:31,  2.32s/it][A
Train Diffusion:  86%|████████▋ | 1729/2000 [1:11:57<10:29,  2.32s/it][A
Train Diffusion:  86%|████████▋ | 1730/2000 [1:11:59<10:26,  2.32s/it][A

Moving average ELBO loss at 1730 iterations is: -34386.80703125. Best ELBO loss value is: -38359.63671875.

C_PATH mean = tensor([[4.5813e+01, 1.7515e-01, 5.9194e-01, 3.9901e-02],
        [4.5776e+01, 1.7482e-01, 5.8104e-01, 4.2564e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2701e+01, 8.6351e-03, 7.9513e-02, 6.7198e-02],
         [5.2660e+01, 2.3810e-04, 1.7445e-01, 4.1756e-02],
         ...,
         [4.2912e+01, 9.1009e-02, 8.5306e-01, 2.1980e-02],
         [4.2287e+01, 8.0746e-02, 6.9029e-01, 2.8259e-02],
         [4.2062e+01, 1.0210e-01, 3.0577e-01, 1.6930e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3407e+01, 1.6168e-01, 2.4636e-01, 7.4063e-02],
         [5.2635e+01, 6.9433e-08, 2.9108e-01, 4.4480e-02],
         ...,
         [4.2944e+01, 1.6518e-01, 6.5112e-01, 3.7478e-02],
         [4.3589e+01, 1.6693e-01, 5.1306e-01, 2.4810e-02],
         [4.4052e+01, 1.5177e-01, 2.523


Train Diffusion:  87%|████████▋ | 1731/2000 [1:12:01<10:24,  2.32s/it][A
Train Diffusion:  87%|████████▋ | 1732/2000 [1:12:04<10:22,  2.32s/it][A
Train Diffusion:  87%|████████▋ | 1733/2000 [1:12:06<10:19,  2.32s/it][A
Train Diffusion:  87%|████████▋ | 1734/2000 [1:12:08<10:22,  2.34s/it][A
Train Diffusion:  87%|████████▋ | 1735/2000 [1:12:11<10:20,  2.34s/it][A
Train Diffusion:  87%|████████▋ | 1736/2000 [1:12:13<10:16,  2.33s/it][A
Train Diffusion:  87%|████████▋ | 1737/2000 [1:12:15<10:13,  2.33s/it][A
Train Diffusion:  87%|████████▋ | 1738/2000 [1:12:18<10:11,  2.33s/it][A
Train Diffusion:  87%|████████▋ | 1739/2000 [1:12:20<10:08,  2.33s/it][A
Train Diffusion:  87%|████████▋ | 1740/2000 [1:12:22<10:04,  2.33s/it][A

Moving average ELBO loss at 1740 iterations is: -32960.5908203125. Best ELBO loss value is: -38359.63671875.

C_PATH mean = tensor([[4.5804e+01, 1.7478e-01, 5.9096e-01, 3.9329e-02],
        [4.5806e+01, 1.7199e-01, 5.9124e-01, 3.9020e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2778e+01, 8.0359e-03, 7.6082e-02, 6.3887e-02],
         [5.2891e+01, 2.9700e-08, 1.5096e-01, 3.9106e-02],
         ...,
         [4.2796e+01, 5.2494e-02, 6.2269e-01, 1.0593e-02],
         [4.2399e+01, 1.0084e-01, 5.0960e-01, 2.2189e-02],
         [4.2003e+01, 1.0514e-01, 2.4040e-01, 1.5998e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3474e+01, 1.1317e-01, 2.5309e-01, 7.1853e-02],
         [5.2785e+01, 3.9154e-04, 2.9226e-01, 4.3495e-02],
         ...,
         [4.3071e+01, 1.6716e-01, 8.3188e-01, 4.3735e-02],
         [4.3544e+01, 1.0695e-01, 6.5962e-01, 2.4362e-02],
         [4.3747e+01, 1.2894e-01, 2.9


Train Diffusion:  87%|████████▋ | 1741/2000 [1:12:25<10:02,  2.33s/it][A
Train Diffusion:  87%|████████▋ | 1742/2000 [1:12:27<09:59,  2.32s/it][A
Train Diffusion:  87%|████████▋ | 1743/2000 [1:12:29<09:56,  2.32s/it][A
Train Diffusion:  87%|████████▋ | 1744/2000 [1:12:32<09:57,  2.33s/it][A
Train Diffusion:  87%|████████▋ | 1745/2000 [1:12:34<09:54,  2.33s/it][A
Train Diffusion:  87%|████████▋ | 1746/2000 [1:12:36<09:51,  2.33s/it][A
Train Diffusion:  87%|████████▋ | 1747/2000 [1:12:39<09:48,  2.33s/it][A
Train Diffusion:  87%|████████▋ | 1748/2000 [1:12:41<09:50,  2.34s/it][A
Train Diffusion:  87%|████████▋ | 1749/2000 [1:12:43<09:47,  2.34s/it][A
Train Diffusion:  88%|████████▊ | 1750/2000 [1:12:46<09:44,  2.34s/it][A

Moving average ELBO loss at 1750 iterations is: -32872.30859375. Best ELBO loss value is: -38359.63671875.

C_PATH mean = tensor([[4.5822e+01, 1.7181e-01, 5.8811e-01, 4.0578e-02],
        [4.5792e+01, 1.7449e-01, 5.8974e-01, 3.9945e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3691e+01, 1.8592e-02, 8.1453e-02, 5.7403e-02],
         [5.3472e+01, 4.0130e-08, 5.4323e-02, 3.6157e-02],
         ...,
         [4.3150e+01, 8.5508e-02, 9.2025e-01, 2.5508e-02],
         [4.3561e+01, 1.3641e-01, 6.8652e-01, 2.6581e-02],
         [4.3776e+01, 1.2570e-01, 2.6922e-01, 1.3449e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2981e+01, 2.6250e-02, 2.6253e-01, 8.7197e-02],
         [5.1936e+01, 1.9872e-04, 4.9712e-01, 4.5817e-02],
         ...,
         [4.2720e+01, 1.9205e-01, 4.6529e-01, 2.8671e-02],
         [4.2437e+01, 1.3468e-01, 5.0329e-01, 1.7419e-02],
         [4.1962e+01, 1.4976e-01, 3.402


Train Diffusion:  88%|████████▊ | 1751/2000 [1:12:48<09:41,  2.34s/it][A
Train Diffusion:  88%|████████▊ | 1752/2000 [1:12:50<09:38,  2.33s/it][A
Train Diffusion:  88%|████████▊ | 1753/2000 [1:12:53<09:35,  2.33s/it][A
Train Diffusion:  88%|████████▊ | 1754/2000 [1:12:55<09:32,  2.33s/it][A
Train Diffusion:  88%|████████▊ | 1755/2000 [1:12:57<09:29,  2.32s/it][A
Train Diffusion:  88%|████████▊ | 1756/2000 [1:13:00<09:26,  2.32s/it][A
Train Diffusion:  88%|████████▊ | 1757/2000 [1:13:02<09:25,  2.33s/it][A
Train Diffusion:  88%|████████▊ | 1758/2000 [1:13:04<09:29,  2.35s/it][A
Train Diffusion:  88%|████████▊ | 1759/2000 [1:13:07<09:24,  2.34s/it][A
Train Diffusion:  88%|████████▊ | 1760/2000 [1:13:09<09:20,  2.34s/it][A

Moving average ELBO loss at 1760 iterations is: -33651.7623046875. Best ELBO loss value is: -38359.63671875.

C_PATH mean = tensor([[4.5825e+01, 1.7126e-01, 6.0528e-01, 4.1466e-02],
        [4.5800e+01, 1.6949e-01, 5.8730e-01, 4.0673e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2668e+01, 7.5710e-03, 2.8375e-01, 3.1727e-02],
         [5.1743e+01, 2.3165e-04, 4.8089e-01, 3.6575e-02],
         ...,
         [4.3087e+01, 1.3691e-01, 5.1828e-01, 2.9379e-02],
         [4.2569e+01, 1.7480e-01, 4.4987e-01, 2.9126e-02],
         [4.2311e+01, 1.9458e-01, 1.7509e-01, 1.1726e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3533e+01, 9.2933e-02, 6.8288e-02, 1.1634e-01],
         [5.3369e+01, 5.1398e-08, 6.0754e-02, 5.2691e-02],
         ...,
         [4.2801e+01, 1.2670e-01, 8.8324e-01, 2.6486e-02],
         [4.3401e+01, 9.5057e-02, 7.1706e-01, 1.6192e-02],
         [4.3919e+01, 8.5798e-02, 3.6


Train Diffusion:  88%|████████▊ | 1761/2000 [1:13:11<09:18,  2.33s/it][A
Train Diffusion:  88%|████████▊ | 1762/2000 [1:13:14<09:19,  2.35s/it][A
Train Diffusion:  88%|████████▊ | 1763/2000 [1:13:16<09:15,  2.34s/it][A
Train Diffusion:  88%|████████▊ | 1764/2000 [1:13:18<09:10,  2.33s/it][A
Train Diffusion:  88%|████████▊ | 1765/2000 [1:13:21<09:06,  2.33s/it][A
Train Diffusion:  88%|████████▊ | 1766/2000 [1:13:23<09:04,  2.33s/it][A
Train Diffusion:  88%|████████▊ | 1767/2000 [1:13:25<09:01,  2.32s/it][A
Train Diffusion:  88%|████████▊ | 1768/2000 [1:13:28<08:58,  2.32s/it][A
Train Diffusion:  88%|████████▊ | 1769/2000 [1:13:30<08:56,  2.32s/it][A
Train Diffusion:  88%|████████▊ | 1770/2000 [1:13:32<08:55,  2.33s/it][A

Moving average ELBO loss at 1770 iterations is: -32188.5845703125. Best ELBO loss value is: -38359.63671875.

C_PATH mean = tensor([[4.5812e+01, 1.7054e-01, 5.8557e-01, 4.0051e-02],
        [4.5841e+01, 1.7250e-01, 5.9436e-01, 4.0500e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3673e+01, 6.4909e-03, 2.4405e-01, 8.5347e-02],
         [5.3352e+01, 2.4469e-08, 2.0749e-01, 3.7202e-02],
         ...,
         [4.2847e+01, 6.8840e-02, 8.4762e-01, 1.1802e-02],
         [4.2498e+01, 8.8313e-02, 6.9736e-01, 2.5359e-02],
         [4.2276e+01, 1.1440e-01, 3.5545e-01, 1.6209e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2892e+01, 9.2774e-02, 1.2262e-01, 4.7973e-02],
         [5.1729e+01, 3.5911e-04, 2.3271e-01, 4.2183e-02],
         ...,
         [4.3153e+01, 1.8644e-01, 5.1477e-01, 5.0116e-02],
         [4.3610e+01, 1.6359e-01, 4.4205e-01, 2.9672e-02],
         [4.4054e+01, 1.5396e-01, 1.6


Train Diffusion:  89%|████████▊ | 1771/2000 [1:13:35<08:51,  2.32s/it][A
Train Diffusion:  89%|████████▊ | 1772/2000 [1:13:37<08:49,  2.32s/it][A
Train Diffusion:  89%|████████▊ | 1773/2000 [1:13:39<08:46,  2.32s/it][A
Train Diffusion:  89%|████████▊ | 1774/2000 [1:13:42<08:45,  2.32s/it][A
Train Diffusion:  89%|████████▉ | 1775/2000 [1:13:44<08:43,  2.33s/it][A
Train Diffusion:  89%|████████▉ | 1776/2000 [1:13:46<08:46,  2.35s/it][A
Train Diffusion:  89%|████████▉ | 1777/2000 [1:13:49<08:41,  2.34s/it][A
Train Diffusion:  89%|████████▉ | 1778/2000 [1:13:51<08:39,  2.34s/it][A
Train Diffusion:  89%|████████▉ | 1779/2000 [1:13:53<08:35,  2.33s/it][A
Train Diffusion:  89%|████████▉ | 1780/2000 [1:13:56<08:31,  2.33s/it][A

Moving average ELBO loss at 1780 iterations is: -33831.349609375. Best ELBO loss value is: -38359.63671875.

C_PATH mean = tensor([[4.5890e+01, 1.7658e-01, 5.8709e-01, 4.0954e-02],
        [4.5791e+01, 1.7499e-01, 5.9641e-01, 4.1446e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3143e+01, 1.4739e-02, 2.7789e-01, 6.9620e-02],
         [5.3326e+01, 2.0994e-04, 5.0139e-01, 4.0940e-02],
         ...,
         [4.3055e+01, 6.4385e-02, 6.0379e-01, 4.7451e-02],
         [4.3614e+01, 6.9047e-02, 5.1381e-01, 2.6503e-02],
         [4.2826e+01, 1.5109e-01, 2.5694e-01, 1.5150e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2139e+01, 5.4381e-02, 6.7634e-02, 7.2169e-02],
         [5.1612e+01, 4.6345e-08, 4.8185e-02, 4.3379e-02],
         ...,
         [4.2989e+01, 1.5373e-01, 8.2275e-01, 1.3100e-02],
         [4.2372e+01, 1.3900e-01, 6.3993e-01, 2.6802e-02],
         [4.2927e+01, 1.0817e-01, 2.88


Train Diffusion:  89%|████████▉ | 1781/2000 [1:13:58<08:28,  2.32s/it][A
Train Diffusion:  89%|████████▉ | 1782/2000 [1:14:00<08:27,  2.33s/it][A
Train Diffusion:  89%|████████▉ | 1783/2000 [1:14:03<08:24,  2.33s/it][A
Train Diffusion:  89%|████████▉ | 1784/2000 [1:14:05<08:22,  2.33s/it][A
Train Diffusion:  89%|████████▉ | 1785/2000 [1:14:07<08:20,  2.33s/it][A
Train Diffusion:  89%|████████▉ | 1786/2000 [1:14:10<08:17,  2.33s/it][A
Train Diffusion:  89%|████████▉ | 1787/2000 [1:14:12<08:15,  2.33s/it][A
Train Diffusion:  89%|████████▉ | 1788/2000 [1:14:14<08:13,  2.33s/it][A
Train Diffusion:  89%|████████▉ | 1789/2000 [1:14:17<08:11,  2.33s/it][A
Train Diffusion:  90%|████████▉ | 1790/2000 [1:14:19<08:10,  2.34s/it][A

Moving average ELBO loss at 1790 iterations is: -34900.498046875. Best ELBO loss value is: -38359.63671875.

C_PATH mean = tensor([[4.5834e+01, 1.7957e-01, 5.8642e-01, 3.8376e-02],
        [4.5894e+01, 1.7808e-01, 5.9188e-01, 3.8331e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3742e+01, 1.3053e-02, 1.1642e-01, 4.6836e-02],
         [5.2841e+01, 4.2584e-08, 2.1662e-01, 1.9382e-02],
         ...,
         [4.3089e+01, 1.8784e-01, 5.7089e-01, 2.1955e-02],
         [4.2476e+01, 1.7083e-01, 5.4906e-01, 2.6941e-02],
         [4.2361e+01, 1.5024e-01, 2.0330e-01, 1.5848e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2813e+01, 1.7260e-01, 2.3598e-01, 9.0111e-02],
         [5.3184e+01, 5.3444e-04, 2.3290e-01, 6.8899e-02],
         ...,
         [4.2946e+01, 8.2404e-02, 8.0216e-01, 3.3045e-02],
         [4.3649e+01, 8.0493e-02, 6.1016e-01, 2.2485e-02],
         [4.4149e+01, 1.0501e-01, 3.79


Train Diffusion:  90%|████████▉ | 1791/2000 [1:14:21<08:07,  2.33s/it][A
Train Diffusion:  90%|████████▉ | 1792/2000 [1:14:24<08:05,  2.33s/it][A
Train Diffusion:  90%|████████▉ | 1793/2000 [1:14:26<08:02,  2.33s/it][A
Train Diffusion:  90%|████████▉ | 1794/2000 [1:14:28<07:59,  2.33s/it][A
Train Diffusion:  90%|████████▉ | 1795/2000 [1:14:31<07:57,  2.33s/it][A
Train Diffusion:  90%|████████▉ | 1796/2000 [1:14:33<07:57,  2.34s/it][A
Train Diffusion:  90%|████████▉ | 1797/2000 [1:14:35<07:53,  2.33s/it][A
Train Diffusion:  90%|████████▉ | 1798/2000 [1:14:38<07:50,  2.33s/it][A
Train Diffusion:  90%|████████▉ | 1799/2000 [1:14:40<07:48,  2.33s/it][A
Train Diffusion:  90%|█████████ | 1800/2000 [1:14:42<07:45,  2.33s/it][A

Moving average ELBO loss at 1800 iterations is: -35318.858984375. Best ELBO loss value is: -38359.63671875.

C_PATH mean = tensor([[4.5865e+01, 1.7618e-01, 5.9937e-01, 3.7602e-02],
        [4.5926e+01, 1.8071e-01, 5.8767e-01, 3.7687e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3459e+01, 9.9056e-02, 6.7297e-02, 5.5627e-02],
         [5.3310e+01, 7.6261e-08, 1.7671e-01, 3.7741e-02],
         ...,
         [4.2934e+01, 1.3590e-01, 7.5216e-01, 3.8208e-02],
         [4.2435e+01, 9.1913e-02, 5.7940e-01, 3.4266e-02],
         [4.3397e+01, 8.3346e-02, 2.7169e-01, 1.6289e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2630e+01, 2.6894e-02, 2.7912e-01, 7.9325e-02],
         [5.1548e+01, 3.5119e-04, 2.8942e-01, 4.8526e-02],
         ...,
         [4.3244e+01, 1.2587e-01, 6.3142e-01, 1.3357e-02],
         [4.3756e+01, 1.6582e-01, 5.6089e-01, 1.0861e-02],
         [4.3297e+01, 1.8550e-01, 2.67


Train Diffusion:  90%|█████████ | 1801/2000 [1:14:45<07:43,  2.33s/it][A
Train Diffusion:  90%|█████████ | 1802/2000 [1:14:47<07:40,  2.33s/it][A
Train Diffusion:  90%|█████████ | 1803/2000 [1:14:49<07:40,  2.34s/it][A
Train Diffusion:  90%|█████████ | 1804/2000 [1:14:52<07:38,  2.34s/it][A
Train Diffusion:  90%|█████████ | 1805/2000 [1:14:54<07:35,  2.34s/it][A
Train Diffusion:  90%|█████████ | 1806/2000 [1:14:56<07:31,  2.33s/it][A
Train Diffusion:  90%|█████████ | 1807/2000 [1:14:59<07:29,  2.33s/it][A
Train Diffusion:  90%|█████████ | 1808/2000 [1:15:01<07:27,  2.33s/it][A
Train Diffusion:  90%|█████████ | 1809/2000 [1:15:03<07:24,  2.33s/it][A
Train Diffusion:  90%|█████████ | 1810/2000 [1:15:05<07:21,  2.32s/it][A

Moving average ELBO loss at 1810 iterations is: -35173.951953125. Best ELBO loss value is: -38359.63671875.

C_PATH mean = tensor([[4.5850e+01, 1.7304e-01, 6.0403e-01, 3.9373e-02],
        [4.5915e+01, 1.7601e-01, 6.0012e-01, 3.9074e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3484e+01, 1.3954e-02, 7.0109e-02, 3.3101e-02],
         [5.3268e+01, 2.3745e-04, 1.6307e-01, 3.4448e-02],
         ...,
         [4.3023e+01, 1.2972e-01, 5.4770e-01, 2.4229e-02],
         [4.3590e+01, 1.1051e-01, 5.0268e-01, 1.7328e-02],
         [4.2997e+01, 1.0410e-01, 1.9768e-01, 9.4130e-03]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2601e+01, 5.9164e-02, 3.2526e-01, 1.2675e-01],
         [5.1560e+01, 5.5003e-08, 3.3019e-01, 5.4007e-02],
         ...,
         [4.3129e+01, 1.1311e-01, 9.5857e-01, 2.6457e-02],
         [4.2617e+01, 1.2606e-01, 7.3450e-01, 2.6353e-02],
         [4.3246e+01, 1.8051e-01, 3.99


Train Diffusion:  91%|█████████ | 1811/2000 [1:15:08<07:19,  2.33s/it][A
Train Diffusion:  91%|█████████ | 1812/2000 [1:15:10<07:17,  2.32s/it][A
Train Diffusion:  91%|█████████ | 1813/2000 [1:15:12<07:14,  2.32s/it][A
Train Diffusion:  91%|█████████ | 1814/2000 [1:15:15<07:12,  2.32s/it][A
Train Diffusion:  91%|█████████ | 1815/2000 [1:15:17<07:10,  2.32s/it][A
Train Diffusion:  91%|█████████ | 1816/2000 [1:15:19<07:07,  2.32s/it][A
Train Diffusion:  91%|█████████ | 1817/2000 [1:15:22<07:06,  2.33s/it][A
Train Diffusion:  91%|█████████ | 1818/2000 [1:15:24<07:04,  2.33s/it][A
Train Diffusion:  91%|█████████ | 1819/2000 [1:15:26<07:02,  2.33s/it][A
Train Diffusion:  91%|█████████ | 1820/2000 [1:15:29<06:59,  2.33s/it][A

Moving average ELBO loss at 1820 iterations is: -35728.44453125. Best ELBO loss value is: -38359.63671875.

C_PATH mean = tensor([[4.5881e+01, 1.8138e-01, 5.9723e-01, 3.9743e-02],
        [4.5901e+01, 1.7321e-01, 5.9336e-01, 3.8315e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3832e+01, 9.4964e-02, 1.1759e-01, 5.5490e-02],
         [5.3831e+01, 5.4625e-08, 1.1878e-01, 2.0984e-02],
         ...,
         [4.2402e+01, 1.7940e-01, 7.5604e-01, 3.1531e-02],
         [4.3319e+01, 1.7635e-01, 6.3750e-01, 1.5222e-02],
         [4.2832e+01, 1.5118e-01, 2.7816e-01, 8.9861e-03]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2898e+01, 2.3399e-02, 2.4679e-01, 8.5513e-02],
         [5.1987e+01, 5.9253e-04, 4.8674e-01, 7.1509e-02],
         ...,
         [4.3697e+01, 8.9778e-02, 5.9718e-01, 1.4751e-02],
         [4.3028e+01, 8.2578e-02, 5.5737e-01, 2.0761e-02],
         [4.3455e+01, 1.1254e-01, 3.179


Train Diffusion:  91%|█████████ | 1821/2000 [1:15:31<06:58,  2.34s/it][A
Train Diffusion:  91%|█████████ | 1822/2000 [1:15:33<06:55,  2.34s/it][A
Train Diffusion:  91%|█████████ | 1823/2000 [1:15:36<06:52,  2.33s/it][A
Train Diffusion:  91%|█████████ | 1824/2000 [1:15:38<06:50,  2.33s/it][A
Train Diffusion:  91%|█████████▏| 1825/2000 [1:15:40<06:47,  2.33s/it][A
Train Diffusion:  91%|█████████▏| 1826/2000 [1:15:43<06:45,  2.33s/it][A
Train Diffusion:  91%|█████████▏| 1827/2000 [1:15:45<06:43,  2.33s/it][A
Train Diffusion:  91%|█████████▏| 1828/2000 [1:15:47<06:40,  2.33s/it][A
Train Diffusion:  91%|█████████▏| 1829/2000 [1:15:50<06:38,  2.33s/it][A
Train Diffusion:  92%|█████████▏| 1830/2000 [1:15:52<06:38,  2.34s/it][A

Moving average ELBO loss at 1830 iterations is: -37076.427734375. Best ELBO loss value is: -38359.63671875.

C_PATH mean = tensor([[4.5888e+01, 1.7419e-01, 5.8581e-01, 4.0530e-02],
        [4.5917e+01, 1.7175e-01, 5.9356e-01, 4.1142e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3247e+01, 1.8587e-02, 8.9752e-02, 6.1911e-02],
         [5.2466e+01, 5.0106e-04, 1.8225e-01, 2.2533e-02],
         ...,
         [4.2948e+01, 7.1294e-02, 8.0451e-01, 9.9155e-03],
         [4.2371e+01, 9.1773e-02, 6.5556e-01, 2.4466e-02],
         [4.3104e+01, 1.1728e-01, 2.8750e-01, 1.2825e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2401e+01, 9.6280e-02, 2.5804e-01, 7.9505e-02],
         [5.2815e+01, 4.4272e-08, 3.0329e-01, 6.7686e-02],
         ...,
         [4.3217e+01, 1.9462e-01, 5.9712e-01, 5.3083e-02],
         [4.3864e+01, 1.7030e-01, 5.6753e-01, 3.0211e-02],
         [4.3221e+01, 1.6208e-01, 3.20


Train Diffusion:  92%|█████████▏| 1831/2000 [1:15:54<06:36,  2.35s/it][A
Train Diffusion:  92%|█████████▏| 1832/2000 [1:15:57<06:32,  2.34s/it][A
Train Diffusion:  92%|█████████▏| 1833/2000 [1:15:59<06:30,  2.34s/it][A
Train Diffusion:  92%|█████████▏| 1834/2000 [1:16:01<06:28,  2.34s/it][A
Train Diffusion:  92%|█████████▏| 1835/2000 [1:16:04<06:24,  2.33s/it][A
Train Diffusion:  92%|█████████▏| 1836/2000 [1:16:06<06:21,  2.33s/it][A
Train Diffusion:  92%|█████████▏| 1837/2000 [1:16:08<06:19,  2.33s/it][A
Train Diffusion:  92%|█████████▏| 1838/2000 [1:16:11<06:16,  2.32s/it][A
Train Diffusion:  92%|█████████▏| 1839/2000 [1:16:13<06:14,  2.32s/it][A
Train Diffusion:  92%|█████████▏| 1840/2000 [1:16:16<06:28,  2.43s/it][A

Moving average ELBO loss at 1840 iterations is: -38010.537109375. Best ELBO loss value is: -38876.78515625.

C_PATH mean = tensor([[4.5965e+01, 1.7418e-01, 5.7460e-01, 4.0555e-02],
        [4.5875e+01, 1.7417e-01, 6.1097e-01, 3.9512e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2796e+01, 3.5651e-02, 9.0846e-02, 4.1807e-02],
         [5.2968e+01, 5.2906e-04, 8.5454e-02, 3.7241e-02],
         ...,
         [4.3148e+01, 1.0044e-01, 8.4229e-01, 2.4082e-02],
         [4.3813e+01, 1.3931e-01, 6.3477e-01, 2.3258e-02],
         [4.4283e+01, 1.3619e-01, 2.6152e-01, 1.0895e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3604e+01, 6.2501e-02, 2.6989e-01, 9.3309e-02],
         [5.2517e+01, 4.7143e-08, 4.3658e-01, 5.0176e-02],
         ...,
         [4.3091e+01, 1.4601e-01, 5.8190e-01, 3.0997e-02],
         [4.2453e+01, 1.1110e-01, 5.6787e-01, 1.7962e-02],
         [4.2361e+01, 1.4651e-01, 2.86


Train Diffusion:  92%|█████████▏| 1841/2000 [1:16:18<06:23,  2.41s/it][A
Train Diffusion:  92%|█████████▏| 1842/2000 [1:16:20<06:16,  2.38s/it][A
Train Diffusion:  92%|█████████▏| 1843/2000 [1:16:23<06:11,  2.37s/it][A
Train Diffusion:  92%|█████████▏| 1844/2000 [1:16:25<06:06,  2.35s/it][A
Train Diffusion:  92%|█████████▏| 1845/2000 [1:16:27<06:02,  2.34s/it][A
Train Diffusion:  92%|█████████▏| 1846/2000 [1:16:30<05:59,  2.33s/it][A
Train Diffusion:  92%|█████████▏| 1847/2000 [1:16:32<05:59,  2.35s/it][A
Train Diffusion:  92%|█████████▏| 1848/2000 [1:16:34<05:57,  2.35s/it][A
Train Diffusion:  92%|█████████▏| 1849/2000 [1:16:37<05:53,  2.34s/it][A
Train Diffusion:  92%|█████████▎| 1850/2000 [1:16:39<05:50,  2.34s/it][A

Moving average ELBO loss at 1850 iterations is: -37608.665625. Best ELBO loss value is: -38934.35546875.

C_PATH mean = tensor([[4.5931e+01, 1.6986e-01, 6.0519e-01, 4.0222e-02],
        [4.5906e+01, 1.7213e-01, 6.0334e-01, 3.9183e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3817e+01, 2.2995e-02, 8.6912e-02, 7.9238e-02],
         [5.2735e+01, 7.7975e-08, 1.4708e-01, 4.2985e-02],
         ...,
         [4.3404e+01, 9.0441e-02, 6.4841e-01, 4.6406e-02],
         [4.3711e+01, 8.0304e-02, 5.0355e-01, 2.5162e-02],
         [4.3984e+01, 1.0796e-01, 2.9522e-01, 1.1975e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2956e+01, 3.1717e-02, 2.4868e-01, 7.1487e-02],
         [5.3168e+01, 2.0118e-04, 3.4401e-01, 4.2650e-02],
         ...,
         [4.2849e+01, 1.5337e-01, 7.7427e-01, 1.2163e-02],
         [4.2647e+01, 1.6448e-01, 6.7288e-01, 2.6647e-02],
         [4.2245e+01, 1.5231e-01, 3.3326e


Train Diffusion:  93%|█████████▎| 1851/2000 [1:16:41<05:47,  2.33s/it][A
Train Diffusion:  93%|█████████▎| 1852/2000 [1:16:44<05:44,  2.33s/it][A
Train Diffusion:  93%|█████████▎| 1853/2000 [1:16:46<05:41,  2.32s/it][A
Train Diffusion:  93%|█████████▎| 1854/2000 [1:16:48<05:40,  2.33s/it][A
Train Diffusion:  93%|█████████▎| 1855/2000 [1:16:51<05:38,  2.33s/it][A
Train Diffusion:  93%|█████████▎| 1856/2000 [1:16:53<05:36,  2.33s/it][A
Train Diffusion:  93%|█████████▎| 1857/2000 [1:16:55<05:33,  2.33s/it][A
Train Diffusion:  93%|█████████▎| 1858/2000 [1:16:58<05:30,  2.33s/it][A
Train Diffusion:  93%|█████████▎| 1859/2000 [1:17:00<05:28,  2.33s/it][A
Train Diffusion:  93%|█████████▎| 1860/2000 [1:17:02<05:25,  2.33s/it][A

Moving average ELBO loss at 1860 iterations is: -37354.982421875. Best ELBO loss value is: -38934.35546875.

C_PATH mean = tensor([[4.5876e+01, 1.8084e-01, 6.0507e-01, 3.9714e-02],
        [4.6006e+01, 1.6831e-01, 5.8572e-01, 3.9138e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2673e+01, 7.9298e-03, 7.4529e-02, 7.0763e-02],
         [5.1493e+01, 4.2058e-08, 1.4733e-01, 2.2703e-02],
         ...,
         [4.2664e+01, 1.4292e-01, 8.3574e-01, 3.9253e-02],
         [4.2354e+01, 1.2651e-01, 6.6942e-01, 2.3380e-02],
         [4.2146e+01, 1.1761e-01, 2.9799e-01, 1.2201e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3453e+01, 1.1527e-01, 3.3993e-01, 7.1630e-02],
         [5.3397e+01, 5.4988e-04, 3.6190e-01, 6.3181e-02],
         ...,
         [4.3619e+01, 8.3087e-02, 6.6964e-01, 1.5019e-02],
         [4.4057e+01, 9.4807e-02, 5.3125e-01, 2.4635e-02],
         [4.4217e+01, 1.7100e-01, 2.71


Train Diffusion:  93%|█████████▎| 1861/2000 [1:17:05<05:23,  2.33s/it][A
Train Diffusion:  93%|█████████▎| 1862/2000 [1:17:07<05:21,  2.33s/it][A
Train Diffusion:  93%|█████████▎| 1863/2000 [1:17:09<05:18,  2.33s/it][A
Train Diffusion:  93%|█████████▎| 1864/2000 [1:17:12<05:16,  2.33s/it][A
Train Diffusion:  93%|█████████▎| 1865/2000 [1:17:14<05:13,  2.32s/it][A
Train Diffusion:  93%|█████████▎| 1866/2000 [1:17:16<05:11,  2.33s/it][A
Train Diffusion:  93%|█████████▎| 1867/2000 [1:17:19<05:09,  2.32s/it][A
Train Diffusion:  93%|█████████▎| 1868/2000 [1:17:21<05:06,  2.32s/it][A
Train Diffusion:  93%|█████████▎| 1869/2000 [1:17:23<05:04,  2.32s/it][A
Train Diffusion:  94%|█████████▎| 1870/2000 [1:17:26<05:01,  2.32s/it][A

Moving average ELBO loss at 1870 iterations is: -39554.608203125. Best ELBO loss value is: -40296.0625.

C_PATH mean = tensor([[4.5991e+01, 1.7171e-01, 5.9017e-01, 4.1102e-02],
        [4.5935e+01, 1.7420e-01, 5.9784e-01, 3.8863e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3582e+01, 8.4219e-03, 2.6171e-01, 6.8066e-02],
         [5.3749e+01, 3.3299e-04, 2.9897e-01, 3.8934e-02],
         ...,
         [4.3516e+01, 1.6659e-01, 6.5742e-01, 2.1008e-02],
         [4.2967e+01, 1.5394e-01, 5.6973e-01, 2.6407e-02],
         [4.2467e+01, 1.6547e-01, 2.7785e-01, 1.3417e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2688e+01, 9.4509e-02, 5.9408e-02, 7.4410e-02],
         [5.1813e+01, 8.3029e-08, 1.4454e-01, 4.4861e-02],
         ...,
         [4.2803e+01, 1.0567e-01, 7.5476e-01, 3.2448e-02],
         [4.3488e+01, 8.5720e-02, 6.0005e-01, 1.7344e-02],
         [4.3843e+01, 8.9107e-02, 2.7711e-


Train Diffusion:  94%|█████████▎| 1871/2000 [1:17:28<04:59,  2.32s/it][A
Train Diffusion:  94%|█████████▎| 1872/2000 [1:17:30<04:57,  2.33s/it][A
Train Diffusion:  94%|█████████▎| 1873/2000 [1:17:33<04:55,  2.32s/it][A
Train Diffusion:  94%|█████████▎| 1874/2000 [1:17:35<04:53,  2.33s/it][A
Train Diffusion:  94%|█████████▍| 1875/2000 [1:17:37<04:50,  2.32s/it][A
Train Diffusion:  94%|█████████▍| 1876/2000 [1:17:40<04:48,  2.32s/it][A
Train Diffusion:  94%|█████████▍| 1877/2000 [1:17:42<04:45,  2.32s/it][A
Train Diffusion:  94%|█████████▍| 1878/2000 [1:17:44<04:43,  2.32s/it][A
Train Diffusion:  94%|█████████▍| 1879/2000 [1:17:47<04:41,  2.32s/it][A
Train Diffusion:  94%|█████████▍| 1880/2000 [1:17:49<04:39,  2.33s/it][A

Moving average ELBO loss at 1880 iterations is: -38757.7859375. Best ELBO loss value is: -40296.0625.

C_PATH mean = tensor([[4.5975e+01, 1.7277e-01, 5.9558e-01, 4.1194e-02],
        [4.5969e+01, 1.7701e-01, 5.8929e-01, 4.0097e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3630e+01, 3.8529e-02, 1.2541e-01, 7.8550e-02],
         [5.2592e+01, 7.0964e-04, 1.3602e-01, 6.8134e-02],
         ...,
         [4.3124e+01, 8.7570e-02, 6.5537e-01, 1.2423e-02],
         [4.3905e+01, 1.2110e-01, 5.1421e-01, 2.5818e-02],
         [4.4256e+01, 1.2301e-01, 3.0071e-01, 1.3685e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2780e+01, 5.4937e-02, 1.9706e-01, 7.2203e-02],
         [5.3066e+01, 6.7087e-08, 1.6720e-01, 2.2987e-02],
         ...,
         [4.3217e+01, 1.7707e-01, 7.8493e-01, 4.6307e-02],
         [4.2580e+01, 1.3211e-01, 6.9586e-01, 2.4461e-02],
         [4.2247e+01, 1.4741e-01, 3.4464e-01


Train Diffusion:  94%|█████████▍| 1881/2000 [1:17:51<04:37,  2.33s/it][A
Train Diffusion:  94%|█████████▍| 1882/2000 [1:17:54<04:36,  2.34s/it][A
Train Diffusion:  94%|█████████▍| 1883/2000 [1:17:56<04:33,  2.34s/it][A
Train Diffusion:  94%|█████████▍| 1884/2000 [1:17:58<04:30,  2.33s/it][A
Train Diffusion:  94%|█████████▍| 1885/2000 [1:18:01<04:27,  2.33s/it][A
Train Diffusion:  94%|█████████▍| 1886/2000 [1:18:03<04:25,  2.33s/it][A
Train Diffusion:  94%|█████████▍| 1887/2000 [1:18:05<04:22,  2.32s/it][A
Train Diffusion:  94%|█████████▍| 1888/2000 [1:18:08<04:20,  2.33s/it][A
Train Diffusion:  94%|█████████▍| 1889/2000 [1:18:10<04:17,  2.32s/it][A
Train Diffusion:  94%|█████████▍| 1890/2000 [1:18:12<04:15,  2.32s/it][A

Moving average ELBO loss at 1890 iterations is: -38743.1484375. Best ELBO loss value is: -40296.0625.

C_PATH mean = tensor([[4.5931e+01, 1.7289e-01, 5.9216e-01, 4.1475e-02],
        [4.6010e+01, 1.7341e-01, 5.9677e-01, 4.1869e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2682e+01, 1.8159e-02, 2.4458e-01, 5.2653e-02],
         [5.1721e+01, 3.9356e-04, 2.7372e-01, 1.8731e-02],
         ...,
         [4.2631e+01, 2.1075e-01, 7.6244e-01, 2.3291e-02],
         [4.3357e+01, 1.7288e-01, 7.0397e-01, 3.0426e-02],
         [4.2980e+01, 1.5394e-01, 3.4693e-01, 1.6082e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3559e+01, 9.9105e-02, 9.4602e-02, 1.0427e-01],
         [5.3560e+01, 1.2386e-07, 1.9222e-01, 8.7774e-02],
         ...,
         [4.3727e+01, 4.4560e-02, 6.6098e-01, 3.2854e-02],
         [4.3094e+01, 7.1189e-02, 5.1224e-01, 1.7866e-02],
         [4.3590e+01, 1.1206e-01, 3.0583e-01


Train Diffusion:  95%|█████████▍| 1891/2000 [1:18:14<04:13,  2.32s/it][A
Train Diffusion:  95%|█████████▍| 1892/2000 [1:18:17<04:10,  2.32s/it][A
Train Diffusion:  95%|█████████▍| 1893/2000 [1:18:19<04:08,  2.33s/it][A
Train Diffusion:  95%|█████████▍| 1894/2000 [1:18:21<04:06,  2.33s/it][A
Train Diffusion:  95%|█████████▍| 1895/2000 [1:18:24<04:04,  2.33s/it][A
Train Diffusion:  95%|█████████▍| 1896/2000 [1:18:26<04:03,  2.34s/it][A
Train Diffusion:  95%|█████████▍| 1897/2000 [1:18:28<04:00,  2.33s/it][A
Train Diffusion:  95%|█████████▍| 1898/2000 [1:18:31<03:57,  2.33s/it][A
Train Diffusion:  95%|█████████▍| 1899/2000 [1:18:33<03:56,  2.34s/it][A
Train Diffusion:  95%|█████████▌| 1900/2000 [1:18:36<03:53,  2.34s/it][A

Moving average ELBO loss at 1900 iterations is: -37718.751171875. Best ELBO loss value is: -40296.0625.

C_PATH mean = tensor([[4.5951e+01, 1.7664e-01, 6.1342e-01, 3.9485e-02],
        [4.5968e+01, 1.7281e-01, 5.8459e-01, 4.3309e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3525e+01, 2.9794e-02, 6.5444e-02, 9.4949e-02],
         [5.3455e+01, 5.6165e-04, 1.5955e-01, 5.0555e-02],
         ...,
         [4.3340e+01, 8.2347e-02, 6.5790e-01, 2.2768e-02],
         [4.3715e+01, 8.7762e-02, 5.1466e-01, 2.9719e-02],
         [4.4162e+01, 9.6445e-02, 3.0226e-01, 1.5782e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2690e+01, 5.0218e-02, 2.5817e-01, 5.8718e-02],
         [5.1644e+01, 6.4759e-08, 2.9426e-01, 3.7360e-02],
         ...,
         [4.2998e+01, 1.4402e-01, 7.6919e-01, 3.2639e-02],
         [4.2683e+01, 1.2586e-01, 6.9129e-01, 1.7846e-02],
         [4.2458e+01, 1.4105e-01, 3.4633e-


Train Diffusion:  95%|█████████▌| 1901/2000 [1:18:38<03:50,  2.33s/it][A
Train Diffusion:  95%|█████████▌| 1902/2000 [1:18:40<03:48,  2.33s/it][A
Train Diffusion:  95%|█████████▌| 1903/2000 [1:18:42<03:45,  2.33s/it][A
Train Diffusion:  95%|█████████▌| 1904/2000 [1:18:45<03:43,  2.33s/it][A
Train Diffusion:  95%|█████████▌| 1905/2000 [1:18:47<03:40,  2.33s/it][A
Train Diffusion:  95%|█████████▌| 1906/2000 [1:18:49<03:38,  2.33s/it][A
Train Diffusion:  95%|█████████▌| 1907/2000 [1:18:52<03:36,  2.32s/it][A
Train Diffusion:  95%|█████████▌| 1908/2000 [1:18:54<03:33,  2.32s/it][A
Train Diffusion:  95%|█████████▌| 1909/2000 [1:18:56<03:31,  2.33s/it][A
Train Diffusion:  96%|█████████▌| 1910/2000 [1:18:59<03:30,  2.34s/it][A

Moving average ELBO loss at 1910 iterations is: -37611.50859375. Best ELBO loss value is: -40296.0625.

C_PATH mean = tensor([[4.5965e+01, 1.7674e-01, 5.8543e-01, 4.0934e-02],
        [4.5997e+01, 1.8196e-01, 6.1137e-01, 3.9920e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3512e+01, 1.5827e-01, 1.3150e-01, 9.1175e-02],
         [5.2505e+01, 8.6796e-04, 1.3311e-01, 5.2835e-02],
         ...,
         [4.2719e+01, 2.0103e-01, 6.4704e-01, 1.6623e-02],
         [4.3533e+01, 1.7417e-01, 5.7512e-01, 2.3199e-02],
         [4.4069e+01, 1.6327e-01, 2.8740e-01, 1.4407e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2702e+01, 1.1783e-02, 2.5746e-01, 5.0236e-02],
         [5.2876e+01, 7.2744e-08, 4.5891e-01, 3.8305e-02],
         ...,
         [4.3634e+01, 6.2679e-02, 7.6720e-01, 3.6424e-02],
         [4.2910e+01, 8.5933e-02, 6.0407e-01, 2.2213e-02],
         [4.2467e+01, 1.1784e-01, 2.7992e-0


Train Diffusion:  96%|█████████▌| 1911/2000 [1:19:01<03:27,  2.33s/it][A
Train Diffusion:  96%|█████████▌| 1912/2000 [1:19:03<03:24,  2.33s/it][A
Train Diffusion:  96%|█████████▌| 1913/2000 [1:19:06<03:22,  2.32s/it][A
Train Diffusion:  96%|█████████▌| 1914/2000 [1:19:08<03:19,  2.32s/it][A
Train Diffusion:  96%|█████████▌| 1915/2000 [1:19:10<03:17,  2.32s/it][A
Train Diffusion:  96%|█████████▌| 1916/2000 [1:19:13<03:15,  2.32s/it][A
Train Diffusion:  96%|█████████▌| 1917/2000 [1:19:15<03:12,  2.32s/it][A
Train Diffusion:  96%|█████████▌| 1918/2000 [1:19:17<03:10,  2.32s/it][A
Train Diffusion:  96%|█████████▌| 1919/2000 [1:19:20<03:07,  2.32s/it][A
Train Diffusion:  96%|█████████▌| 1920/2000 [1:19:22<03:05,  2.32s/it][A

Moving average ELBO loss at 1920 iterations is: -37725.667578125. Best ELBO loss value is: -40296.0625.

C_PATH mean = tensor([[4.5989e+01, 1.7090e-01, 6.0907e-01, 4.0144e-02],
        [4.5990e+01, 1.7552e-01, 5.9524e-01, 4.0798e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2717e+01, 9.3760e-02, 1.4083e-01, 7.3625e-02],
         [5.1683e+01, 1.0518e-07, 2.9326e-01, 7.0683e-02],
         ...,
         [4.3177e+01, 5.1786e-02, 5.0241e-01, 3.4634e-02],
         [4.3741e+01, 6.7059e-02, 5.2425e-01, 3.2217e-02],
         [4.4035e+01, 1.6752e-01, 2.9226e-01, 1.3049e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3567e+01, 6.6277e-03, 1.4676e-01, 6.3307e-02],
         [5.3421e+01, 3.7692e-04, 1.6153e-01, 2.0800e-02],
         ...,
         [4.3234e+01, 1.8087e-01, 1.0482e+00, 2.1227e-02],
         [4.2782e+01, 1.5412e-01, 7.7678e-01, 1.2821e-02],
         [4.2340e+01, 1.0997e-01, 2.9269e-


Train Diffusion:  96%|█████████▌| 1921/2000 [1:19:24<03:03,  2.32s/it][A
Train Diffusion:  96%|█████████▌| 1922/2000 [1:19:27<03:01,  2.32s/it][A
Train Diffusion:  96%|█████████▌| 1923/2000 [1:19:29<02:59,  2.33s/it][A
Train Diffusion:  96%|█████████▌| 1924/2000 [1:19:31<02:57,  2.34s/it][A
Train Diffusion:  96%|█████████▋| 1925/2000 [1:19:34<02:55,  2.34s/it][A
Train Diffusion:  96%|█████████▋| 1926/2000 [1:19:36<02:52,  2.33s/it][A
Train Diffusion:  96%|█████████▋| 1927/2000 [1:19:38<02:49,  2.33s/it][A
Train Diffusion:  96%|█████████▋| 1928/2000 [1:19:41<02:47,  2.32s/it][A
Train Diffusion:  96%|█████████▋| 1929/2000 [1:19:43<02:44,  2.32s/it][A
Train Diffusion:  96%|█████████▋| 1930/2000 [1:19:45<02:42,  2.32s/it][A

Moving average ELBO loss at 1930 iterations is: -35381.270703125. Best ELBO loss value is: -40296.0625.

C_PATH mean = tensor([[4.5985e+01, 1.6827e-01, 6.1120e-01, 3.9701e-02],
        [4.5966e+01, 1.7231e-01, 6.3248e-01, 3.9419e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3138e+01, 1.2564e-02, 2.4731e-01, 4.5696e-02],
         [5.2906e+01, 5.9509e-08, 4.9239e-01, 3.5231e-02],
         ...,
         [4.3039e+01, 1.6876e-01, 1.0053e+00, 2.2587e-02],
         [4.3774e+01, 1.5027e-01, 6.8196e-01, 1.6319e-02],
         [4.4037e+01, 1.4551e-01, 2.9192e-01, 9.7298e-03]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3942e+01, 2.1341e-02, 8.0592e-02, 1.0779e-01],
         [5.2888e+01, 3.0923e-04, 6.2270e-02, 5.4162e-02],
         ...,
         [4.3351e+01, 5.8352e-02, 5.0866e-01, 3.2695e-02],
         [4.2711e+01, 6.7611e-02, 6.2569e-01, 2.9629e-02],
         [4.2215e+01, 9.2512e-02, 3.5384e-


Train Diffusion:  97%|█████████▋| 1931/2000 [1:19:48<02:40,  2.33s/it][A
Train Diffusion:  97%|█████████▋| 1932/2000 [1:19:50<02:38,  2.33s/it][A
Train Diffusion:  97%|█████████▋| 1933/2000 [1:19:52<02:35,  2.33s/it][A
Train Diffusion:  97%|█████████▋| 1934/2000 [1:19:55<02:33,  2.33s/it][A
Train Diffusion:  97%|█████████▋| 1935/2000 [1:19:57<02:31,  2.33s/it][A
Train Diffusion:  97%|█████████▋| 1936/2000 [1:19:59<02:28,  2.33s/it][A
Train Diffusion:  97%|█████████▋| 1937/2000 [1:20:02<02:27,  2.34s/it][A
Train Diffusion:  97%|█████████▋| 1938/2000 [1:20:04<02:24,  2.34s/it][A
Train Diffusion:  97%|█████████▋| 1939/2000 [1:20:06<02:22,  2.33s/it][A
Train Diffusion:  97%|█████████▋| 1940/2000 [1:20:09<02:19,  2.33s/it][A

Moving average ELBO loss at 1940 iterations is: -35083.548046875. Best ELBO loss value is: -40296.0625.

C_PATH mean = tensor([[4.6023e+01, 1.7496e-01, 5.9706e-01, 3.9747e-02],
        [4.6041e+01, 1.7231e-01, 5.9131e-01, 3.8445e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.4156e+01, 5.4101e-02, 1.7833e-01, 1.1183e-01],
         [5.3057e+01, 1.0988e-07, 1.5834e-01, 4.2094e-02],
         ...,
         [4.3466e+01, 1.6644e-01, 8.1085e-01, 2.4131e-02],
         [4.2999e+01, 1.3655e-01, 6.3767e-01, 2.8305e-02],
         [4.2536e+01, 1.1574e-01, 4.3285e-01, 1.4973e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3371e+01, 3.2601e-02, 1.3596e-01, 4.2602e-02],
         [5.3297e+01, 8.5120e-04, 2.5677e-01, 3.6616e-02],
         ...,
         [4.2978e+01, 1.0340e-01, 6.6083e-01, 3.4163e-02],
         [4.3632e+01, 1.2269e-01, 5.4898e-01, 2.0685e-02],
         [4.4176e+01, 1.5846e-01, 1.9210e-


Train Diffusion:  97%|█████████▋| 1941/2000 [1:20:11<02:17,  2.33s/it][A
Train Diffusion:  97%|█████████▋| 1942/2000 [1:20:13<02:15,  2.33s/it][A
Train Diffusion:  97%|█████████▋| 1943/2000 [1:20:16<02:12,  2.33s/it][A
Train Diffusion:  97%|█████████▋| 1944/2000 [1:20:18<02:10,  2.32s/it][A
Train Diffusion:  97%|█████████▋| 1945/2000 [1:20:20<02:07,  2.32s/it][A
Train Diffusion:  97%|█████████▋| 1946/2000 [1:20:22<02:05,  2.32s/it][A
Train Diffusion:  97%|█████████▋| 1947/2000 [1:20:25<02:03,  2.32s/it][A
Train Diffusion:  97%|█████████▋| 1948/2000 [1:20:27<02:00,  2.32s/it][A
Train Diffusion:  97%|█████████▋| 1949/2000 [1:20:29<01:58,  2.32s/it][A
Train Diffusion:  98%|█████████▊| 1950/2000 [1:20:32<01:56,  2.33s/it][A

Moving average ELBO loss at 1950 iterations is: -36521.305078125. Best ELBO loss value is: -40296.0625.

C_PATH mean = tensor([[4.5971e+01, 1.8555e-01, 6.1686e-01, 4.0915e-02],
        [4.6017e+01, 1.7539e-01, 6.0867e-01, 4.0127e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.1969e+01, 1.2855e-01, 2.4017e-01, 1.0231e-01],
         [5.1867e+01, 7.9562e-04, 4.3927e-01, 4.4281e-02],
         ...,
         [4.3233e+01, 1.1608e-01, 9.5924e-01, 1.1132e-02],
         [4.3735e+01, 8.9201e-02, 7.3338e-01, 2.2415e-02],
         [4.4137e+01, 1.0326e-01, 4.2810e-01, 1.5597e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3007e+01, 9.0666e-03, 1.3114e-01, 5.3732e-02],
         [5.3486e+01, 5.1075e-08, 1.4600e-01, 3.7455e-02],
         ...,
         [4.3171e+01, 1.3633e-01, 6.1217e-01, 4.9650e-02],
         [4.2722e+01, 1.5838e-01, 5.8883e-01, 3.0374e-02],
         [4.2298e+01, 1.6036e-01, 2.4322e-


Train Diffusion:  98%|█████████▊| 1951/2000 [1:20:34<01:55,  2.36s/it][A
Train Diffusion:  98%|█████████▊| 1952/2000 [1:20:37<01:53,  2.36s/it][A
Train Diffusion:  98%|█████████▊| 1953/2000 [1:20:39<01:50,  2.34s/it][A
Train Diffusion:  98%|█████████▊| 1954/2000 [1:20:41<01:47,  2.34s/it][A
Train Diffusion:  98%|█████████▊| 1955/2000 [1:20:44<01:44,  2.33s/it][A
Train Diffusion:  98%|█████████▊| 1956/2000 [1:20:46<01:42,  2.33s/it][A
Train Diffusion:  98%|█████████▊| 1957/2000 [1:20:48<01:40,  2.33s/it][A
Train Diffusion:  98%|█████████▊| 1958/2000 [1:20:51<01:37,  2.32s/it][A
Train Diffusion:  98%|█████████▊| 1959/2000 [1:20:53<01:35,  2.32s/it][A
Train Diffusion:  98%|█████████▊| 1960/2000 [1:20:55<01:32,  2.32s/it][A

Moving average ELBO loss at 1960 iterations is: -37239.71953125. Best ELBO loss value is: -40296.0625.

C_PATH mean = tensor([[4.6044e+01, 1.8103e-01, 6.1134e-01, 4.0984e-02],
        [4.6017e+01, 1.7303e-01, 5.8427e-01, 3.8767e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2798e+01, 5.3913e-03, 1.4950e-01, 6.7280e-02],
         [5.1637e+01, 2.2689e-04, 2.5013e-01, 2.2286e-02],
         ...,
         [4.3405e+01, 1.2330e-01, 8.8994e-01, 2.3746e-02],
         [4.2883e+01, 1.6739e-01, 6.7336e-01, 1.7044e-02],
         [4.2505e+01, 1.9476e-01, 2.8881e-01, 1.2820e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3605e+01, 7.8101e-02, 2.0046e-01, 6.9277e-02],
         [5.3222e+01, 5.2222e-08, 2.1057e-01, 6.4481e-02],
         ...,
         [4.3057e+01, 1.0164e-01, 6.3184e-01, 2.9036e-02],
         [4.3815e+01, 7.9811e-02, 5.5584e-01, 2.6098e-02],
         [4.4173e+01, 8.5689e-02, 2.6906e-0


Train Diffusion:  98%|█████████▊| 1961/2000 [1:20:57<01:30,  2.32s/it][A
Train Diffusion:  98%|█████████▊| 1962/2000 [1:21:00<01:28,  2.33s/it][A
Train Diffusion:  98%|█████████▊| 1963/2000 [1:21:02<01:26,  2.33s/it][A
Train Diffusion:  98%|█████████▊| 1964/2000 [1:21:04<01:23,  2.33s/it][A
Train Diffusion:  98%|█████████▊| 1965/2000 [1:21:07<01:21,  2.34s/it][A
Train Diffusion:  98%|█████████▊| 1966/2000 [1:21:09<01:19,  2.34s/it][A
Train Diffusion:  98%|█████████▊| 1967/2000 [1:21:11<01:16,  2.33s/it][A
Train Diffusion:  98%|█████████▊| 1968/2000 [1:21:14<01:14,  2.33s/it][A
Train Diffusion:  98%|█████████▊| 1969/2000 [1:21:16<01:12,  2.33s/it][A
Train Diffusion:  98%|█████████▊| 1970/2000 [1:21:18<01:09,  2.33s/it][A

Moving average ELBO loss at 1970 iterations is: -37037.2078125. Best ELBO loss value is: -40296.0625.

C_PATH mean = tensor([[4.6061e+01, 1.7057e-01, 6.2994e-01, 3.9462e-02],
        [4.6049e+01, 1.6580e-01, 6.0314e-01, 3.9273e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3058e+01, 2.0893e-02, 2.4967e-01, 4.3488e-02],
         [5.1788e+01, 8.2252e-04, 2.9349e-01, 3.6096e-02],
         ...,
         [4.2999e+01, 1.7298e-01, 4.5741e-01, 3.6174e-02],
         [4.3612e+01, 1.6460e-01, 5.7031e-01, 3.2712e-02],
         [4.4089e+01, 1.4622e-01, 2.8825e-01, 1.1867e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3882e+01, 3.6169e-02, 8.5598e-02, 1.3388e-01],
         [5.3563e+01, 9.0022e-08, 1.9764e-01, 5.3609e-02],
         ...,
         [4.3587e+01, 8.6239e-02, 1.0294e+00, 1.5953e-02],
         [4.3065e+01, 7.7411e-02, 7.4124e-01, 1.0386e-02],
         [4.2583e+01, 1.0309e-01, 4.5020e-01


Train Diffusion:  99%|█████████▊| 1971/2000 [1:21:21<01:07,  2.33s/it][A
Train Diffusion:  99%|█████████▊| 1972/2000 [1:21:23<01:05,  2.33s/it][A
Train Diffusion:  99%|█████████▊| 1973/2000 [1:21:25<01:02,  2.33s/it][A
Train Diffusion:  99%|█████████▊| 1974/2000 [1:21:28<01:00,  2.33s/it][A
Train Diffusion:  99%|█████████▉| 1975/2000 [1:21:30<00:58,  2.33s/it][A
Train Diffusion:  99%|█████████▉| 1976/2000 [1:21:32<00:56,  2.34s/it][A
Train Diffusion:  99%|█████████▉| 1977/2000 [1:21:35<00:53,  2.34s/it][A
Train Diffusion:  99%|█████████▉| 1978/2000 [1:21:37<00:51,  2.34s/it][A
Train Diffusion:  99%|█████████▉| 1979/2000 [1:21:40<00:49,  2.35s/it][A
Train Diffusion:  99%|█████████▉| 1980/2000 [1:21:42<00:46,  2.35s/it][A

Moving average ELBO loss at 1980 iterations is: -38054.115234375. Best ELBO loss value is: -40296.0625.

C_PATH mean = tensor([[4.6052e+01, 1.7260e-01, 6.0330e-01, 3.9218e-02],
        [4.6103e+01, 1.7147e-01, 6.0236e-01, 4.0596e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3298e+01, 7.9496e-02, 1.4686e-01, 6.4324e-02],
         [5.3575e+01, 1.0196e-03, 2.9491e-01, 2.0902e-02],
         ...,
         [4.3444e+01, 1.2326e-01, 9.2671e-01, 3.9390e-02],
         [4.2765e+01, 9.8227e-02, 7.2593e-01, 2.3130e-02],
         [4.3291e+01, 1.6876e-01, 3.0211e-01, 1.6666e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2521e+01, 1.7052e-02, 1.7742e-01, 7.4939e-02],
         [5.1692e+01, 1.0457e-07, 1.6565e-01, 6.8741e-02],
         ...,
         [4.3205e+01, 1.6025e-01, 5.3313e-01, 1.7565e-02],
         [4.3855e+01, 1.8939e-01, 4.8554e-01, 2.9519e-02],
         [4.3209e+01, 1.3556e-01, 3.0807e-


Train Diffusion:  99%|█████████▉| 1981/2000 [1:21:44<00:44,  2.34s/it][A
Train Diffusion:  99%|█████████▉| 1982/2000 [1:21:47<00:42,  2.33s/it][A
Train Diffusion:  99%|█████████▉| 1983/2000 [1:21:49<00:39,  2.33s/it][A
Train Diffusion:  99%|█████████▉| 1984/2000 [1:21:51<00:37,  2.33s/it][A
Train Diffusion:  99%|█████████▉| 1985/2000 [1:21:54<00:34,  2.33s/it][A
Train Diffusion:  99%|█████████▉| 1986/2000 [1:21:56<00:32,  2.33s/it][A
Train Diffusion:  99%|█████████▉| 1987/2000 [1:21:58<00:30,  2.33s/it][A
Train Diffusion:  99%|█████████▉| 1988/2000 [1:22:00<00:27,  2.33s/it][A
Train Diffusion:  99%|█████████▉| 1989/2000 [1:22:03<00:25,  2.33s/it][A
Train Diffusion: 100%|█████████▉| 1990/2000 [1:22:05<00:23,  2.33s/it][A

Moving average ELBO loss at 1990 iterations is: -37088.16484375. Best ELBO loss value is: -40296.0625.

C_PATH mean = tensor([[4.6040e+01, 1.7404e-01, 6.0496e-01, 4.3309e-02],
        [4.6062e+01, 1.6575e-01, 6.1232e-01, 4.0637e-02]],
       grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.2643e+01, 5.6473e-03, 2.8989e-01, 7.6037e-02],
         [5.1680e+01, 5.3497e-08, 5.3359e-01, 6.8434e-02],
         ...,
         [4.2443e+01, 6.7230e-02, 5.5932e-01, 3.7296e-02],
         [4.2203e+01, 1.1954e-01, 5.1705e-01, 2.0203e-02],
         [4.2084e+01, 1.6354e-01, 3.0953e-01, 1.0330e-02]],

        [[5.3602e+01, 1.1421e-01, 6.7073e-01, 1.3415e-02],
         [5.3474e+01, 1.2385e-01, 6.1474e-02, 6.5767e-02],
         [5.3464e+01, 9.0037e-04, 4.7412e-02, 2.0792e-02],
         ...,
         [4.4131e+01, 1.8610e-01, 9.8890e-01, 1.8507e-02],
         [4.4488e+01, 1.2485e-01, 7.7854e-01, 2.4775e-02],
         [4.4730e+01, 9.7900e-02, 3.3061e-0


Train Diffusion: 100%|█████████▉| 1991/2000 [1:22:07<00:20,  2.33s/it][A
Train Diffusion: 100%|█████████▉| 1992/2000 [1:22:10<00:18,  2.32s/it][A
Train Diffusion: 100%|█████████▉| 1993/2000 [1:22:12<00:16,  2.34s/it][A
Train Diffusion: 100%|█████████▉| 1994/2000 [1:22:14<00:14,  2.34s/it][A
Train Diffusion: 100%|█████████▉| 1995/2000 [1:22:17<00:11,  2.33s/it][A
Train Diffusion: 100%|█████████▉| 1996/2000 [1:22:19<00:09,  2.33s/it][A
Train Diffusion: 100%|█████████▉| 1997/2000 [1:22:21<00:06,  2.33s/it][A
Train Diffusion: 100%|█████████▉| 1998/2000 [1:22:24<00:04,  2.32s/it][A
Train Diffusion: 100%|█████████▉| 1999/2000 [1:22:26<00:02,  2.32s/it][A
Train Diffusion: 100%|██████████| 2000/2000 [1:22:28<00:00,  2.47s/it][A
