In [1]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from tqdm import tqdm
import math

#Torch-related imports
import torch
import torch.distributions as D
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Function

#Model-specific imports
from SBM_SDE import *
from obs_and_flow import *
from training import calc_log_lik

In [2]:
torch.manual_seed(0)
np.random.seed(0)

In [3]:
temp_ref = 283
temp_rise = 5 #High estimate of 5 celsius temperature rise by 2100. 

#System parameters from deterministic CON model
u_M = 0.002
a_SD = 0.33
a_DS = 0.33
a_M = 0.33
a_MSC = 0.5
k_S_ref = 0.000025
k_D_ref = 0.005
k_M_ref = 0.0002
Ea_S = 75
Ea_D = 50
Ea_M = 50

#SCON diffusion matrix parameters
c_SOC = 0.1
c_DOC = 0.0001
c_MBC = 0.001
s_SOC = 0.001
s_DOC = 0.001
s_MBC = 0.001

SCON_C_params_dict = {'u_M': u_M, 'a_SD': a_SD, 'a_DS': a_DS, 'a_M': a_M, 'a_MSC': a_MSC, 'k_S_ref': k_S_ref, 'k_D_ref': k_D_ref, 'k_M_ref': k_M_ref, 'Ea_S': Ea_S, 'Ea_D': Ea_D, 'Ea_M': Ea_M, 'c_SOC': c_SOC, 'c_DOC': c_DOC, 'c_MBC': c_MBC}
SCON_SS_params_dict = {'u_M': u_M, 'a_SD': a_SD, 'a_DS': a_DS, 'a_M': a_M, 'a_MSC': a_MSC, 'k_S_ref': k_S_ref, 'k_D_ref': k_D_ref, 'k_M_ref': k_M_ref, 'Ea_S': Ea_S, 'Ea_D': Ea_D, 'Ea_M': Ea_M, 's_SOC': s_SOC, 's_DOC': s_DOC, 's_MBC': s_MBC}

#System parameters from deterministic AWB model
u_Q_ref = 0.2
Q = 0.002
a_MSA = 0.5
K_D = 200
K_U = 1
V_D_ref = 0.4
V_U_ref = 0.02
Ea_V_D = 75
Ea_V_U = 50
r_M = 0.0004
r_E = 0.00001
r_L = 0.0005

#SAWB diffusion matrix parameters
c_SOC = 2
c_DOC = 0.05
c_MBC = 0.1
c_EEC = 0.01
s_SOC = 0.1
s_DOC = 0.1
s_MBC = 0.1
s_EEC = 0.1

SAWB_C_params_dict = {'u_Q_ref': u_Q_ref, 'Q': Q, 'a_MSA': a_MSA, 'K_D': K_D, 'K_U': K_U, 'V_D_ref': V_D_ref, 'V_U_ref': V_U_ref, 'Ea_V_D': Ea_V_D, 'Ea_V_U': Ea_V_U, 'r_M': r_M, 'r_E': r_E, 'r_L': r_L, 'c_SOC': c_SOC, 'c_DOC': c_DOC, 'c_MBC': c_MBC, 'c_EEC': c_EEC}
SAWB_SS_params_dict = {'u_Q_ref': u_Q_ref, 'Q': Q, 'a_MSA': a_MSA, 'K_D': K_D, 'K_U': K_U, 'V_D_ref': V_D_ref, 'V_U_ref': V_U_ref, 'Ea_V_D': Ea_V_D, 'Ea_V_U': Ea_V_U, 'r_M': r_M, 'r_E': r_E, 'r_L': r_L, 's_SOC': s_SOC, 's_DOC': s_DOC, 's_MBC': s_MBC, 's_EEC': s_EEC}

#System parameters from deterministic AWB-ECA model
u_Q_ref = 0.2
Q = 0.002
a_MSA = 0.5
K_DE = 200
K_UE = 1
V_DE_ref = 0.4
V_UE_ref = 0.02
Ea_V_DE = 75
Ea_V_UE = 50
r_M = 0.0004
r_E = 0.00001
r_L = 0.0005

#SAWB-ECA diffusion matrix parameters
c_SOC = 2
c_DOC = 0.05
c_MBC = 0.1
c_EEC = 0.01
s_SOC = 0.1
s_DOC = 0.1
s_MBC = 0.1
s_EEC = 0.1

SAWB_ECA_C_params_dict = {'u_Q_ref': u_Q_ref, 'Q': Q, 'a_MSA': a_MSA, 'K_DE': K_DE, 'K_UE': K_UE, 'V_DE_ref': V_DE_ref, 'V_UE_ref': V_UE_ref, 'Ea_V_DE': Ea_V_DE, 'Ea_V_UE': Ea_V_UE, 'r_M': r_M, 'r_E': r_E, 'r_L': r_L, 'c_SOC': c_SOC, 'c_DOC': c_DOC, 'c_MBC': c_MBC, 'c_EEC': c_EEC}
SAWB_ECA_SS_params_dict = {'u_Q_ref': u_Q_ref, 'Q': Q, 'a_MSA': a_MSA, 'K_DE': K_DE, 'K_UE': K_UE, 'V_DE_ref': V_DE_ref, 'V_UE_ref': V_UE_ref, 'Ea_V_DE': Ea_V_DE, 'Ea_V_UE': Ea_V_UE, 'r_M': r_M, 'r_E': r_E, 'r_L': r_L, 's_SOC': s_SOC, 's_DOC': s_DOC, 's_MBC': s_MBC, 's_EEC': s_EEC}

In [4]:
#Set flow NN parameters.

devi = torch.device("".join(["cuda:",f'{cuda_id}']) if torch.cuda.is_available() else "cpu")
dt_flow = 0.2
t = 1000
n_flow = int(t / dt_flow) + 1
t_span = np.linspace(0, t, n_flow)
t_span_tensor = torch.reshape(torch.Tensor(t_span), [1, n_flow, 1]) #T_span needs to be converted to tensor object. Additionally, facilitates conversion of I_S and I_D to tensor objects.
l_r = 1e-4
niter = 5001
piter = 101
batch_size = 3 #Number of sets of observation outputs to sample per set of parameters.
state_dim_SCON = 3 #Not including CO2 in STATE_DIM, because CO2 is an observation.
obs_error_scale = 0.1 #Proportion of the mean of observation error standard deviation.

x0_SCON = [19, 0.08, 0.8] #Initial condition means for SCON

In [5]:
#Obtain temperature forcing function.
temp_tensor = temp_gen(t_span_tensor, temp_ref, temp_rise)
print(temp_tensor)

#Obtain SOC and DOC pool litter input vectors for use in flow SDE functions.
i_s_tensor = i_s(t_span_tensor) #Exogenous SOC input function
i_d_tensor = i_d(t_span_tensor) #Exogenous DOC input function
print(i_s_tensor)
print(i_d_tensor)

tensor([[[283.0000],
         [283.5248],
         [284.0482],
         ...,
         [281.4880],
         [281.1925],
         [280.9200]]])
tensor([[[0.0010],
         [0.0010],
         [0.0010],
         ...,
         [0.0013],
         [0.0013],
         [0.0013]]])
tensor([[[1.0000e-04],
         [1.0001e-04],
         [1.0001e-04],
         ...,
         [1.3286e-04],
         [1.3286e-04],
         [1.3287e-04]]])


In [6]:
def train(DEVICE, L_R, NITER, PRETRAIN_ITER, BATCH_SIZE, SDEFLOW, ObsModel, csv_to_obs_df, DATA_CSV, OBS_ERROR_SCALE, STATE_DIM, T, DT, N, T_SPAN_TENSOR, I_S_TENSOR, I_D_TENSOR, TEMP_TENSOR, TEMP_REF, C0, DRIFT_DIFFUSION, PARAMS_DICT): 
    #Read-in observation information. 
    obs_times, obs_means, obs_error = csv_to_obs_df(DATA_CSV, STATE_DIM, T, OBS_ERROR_SCALE)
    obs_means = LowerBound.apply(obs_means, 1e-6)
    #Pass observation information to `ObsModel`.
    obs_model = ObsModel(DEVICE, obs_times, DT, obs_means, obs_error)
    net = SDEFlow(DEVICE, obs_model, STATE_DIM, T, DT, N, I_S_TENSOR, I_D_TENSOR, cond_inputs = 3, num_layers = 6).to(DEVICE)
    optimizer = optim.Adam(net.parameters(), lr = L_R)
    if PRETRAIN_ITER >= NITER:
        raise Exception("PRETRAIN_ITER must be < NITER.")
    best_loss_norm = 1e15
    best_loss_ELBO = 1e15
    norm_losses = []
    ELBO_losses = []
    C0_tensor = torch.tensor(C0).to(DEVICE) #Convert initial conditions from list to tensor for X0 prior object.
    #C0 = C0[(None,) * 2].repeat(BATCH_SIZE, 1, 1).to(DEVICE)
    PARAMS_DICT_TENSOR = {k: torch.tensor(v).expand(BATCH_SIZE) for k, v in PARAMS_DICT.items()}
    X0_prior = D.normal.Normal(loc = C0_tensor, scale = OBS_ERROR_SCALE * C0_tensor) #Setting prior noise = observation noise for now.
    with tqdm(total = NITER, desc = f'Train Diffusion', position = -1) as tq:
        for i in range(NITER):
            net.train()
            optimizer.zero_grad()
            C_PATH, log_prob = net(BATCH_SIZE) #For obs_and_flow.py
            #C_PATH = torch.cat([C0, C_PATH], 1) #Learning initial conditions in this version. #Append deterministic CON initial conditions conditional on parameter values to C path.
            if i <= PRETRAIN_ITER:
                l1_norm_element = C_PATH - torch.mean(obs_model.mu, -1)
                l1_norm = torch.sum(torch.abs(l1_norm_element)).mean()
                best_loss_norm = l1_norm if l1_norm < best_loss_norm else best_loss_norm
                norm_losses.append(l1_norm.item())
                #l2_norm_element = C_PATH - torch.mean(obs_model.mu, -1)
                #l2_norm = torch.sqrt(torch.sum(torch.square(l2_norm_element))).mean()
                #best_loss_norm = l2_norm if l2_norm < best_loss_norm else best_loss_norm
                #norm_losses.append(l2_norm.item())
                if i % 10 == 0:
                    ma_norm_loss = sum(norm_losses[-10:]) / len(norm_losses[-10:])
                    print(f"\nMoving average norm loss at {iter} iterations is: {ma_norm_loss}. Best norm loss value is: {best_loss_norm}.")
                    print('\nC_PATH mean =', C_PATH.mean(-2))
                    print('\nC_PATH =', C_PATH)
                l1_norm.backward()
                #l2_norm.backward()
            else:
                log_lik = calc_log_lik(C_PATH, T_SPAN_TENSOR.to(DEVICE), DT, I_S_TENSOR.to(DEVICE), I_D_TENSOR.to(DEVICE), TEMP_TENSOR.to(DEVICE), TEMP_REF, DRIFT_DIFFUSION, PARAMS_DICT)
                neg_ELBO = -X0_prior.log_prob(C_PATH[:, 0, :]).sum(-1).mean() - log_lik.mean() - obs_model(C_PATH, PARAMS_DICT_TENSOR) + log_prob.mean()
                best_loss_ELBO = neg_ELBO if neg_ELBO < best_loss_ELBO else best_loss_ELBO
                ELBO_losses.append(neg_ELBO.item())
                if i % 10 == 0:             
                    ma_elbo_loss = sum(ELBO_losses[-10:]) / len(ELBO_losses[-10:])
                    print(f"\nMoving average ELBO loss at {iter} iterations is: {ma_elbo_loss}. Best ELBO loss value is: {best_loss_ELBO}.")
                    print('\nC_PATH mean =', C_PATH.mean(-2))
                    print('\nC_PATH =', C_PATH)
            torch.nn.utils.clip_grad_norm_(net.parameters(), 3.0)
            optimizer.step()
            if i % 100000 == 0 and i > 0:
                optimizer.param_groups[0]['lr'] *= 0.1
            tq.update()
    return net, ELBO_losses, norm_losses

In [7]:
net, ELBO_losses, norm_losses = train(devi, l_r, niter, piter, batch_size, SDEFlow, ObsModel, csv_to_obs_df, 'y_from_x_t_1000_dt_0-01.csv', obs_error_scale, state_dim_SCON, t, dt_flow, n_flow, t_span_tensor, i_s_tensor, i_d_tensor, temp_tensor, temp_ref, x0_SCON, drift_diffusion_SCON_C, SCON_C_params_dict)


Train Diffusion:   0%|          | 0/5001 [00:00<?, ?it/s][A


Moving average norm loss at <built-in function iter> iterations is: 487776.46875. Best norm loss value is: 487776.46875.

C_PATH mean = tensor([[0.8412, 0.8549, 0.8230],
        [0.8406, 0.8506, 0.8430],
        [0.8338, 0.8452, 0.8374]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[0.7979, 0.4989, 0.8650],
         [1.0883, 0.3769, 2.7311],
         [0.3203, 0.7267, 1.9741],
         ...,
         [0.5538, 0.7271, 0.4697],
         [0.6251, 1.0342, 0.5232],
         [0.5830, 1.1936, 0.6196]],

        [[0.7932, 0.9848, 0.8966],
         [0.2745, 1.7503, 1.1032],
         [0.2097, 0.5167, 1.5775],
         ...,
         [0.7713, 1.1644, 0.7867],
         [0.6278, 0.5162, 0.7906],
         [0.8528, 0.6983, 0.7032]],

        [[0.5078, 1.8464, 0.8937],
         [1.3542, 0.9721, 0.2880],
         [3.1418, 1.7542, 0.5738],
         ...,
         [1.0607, 0.4409, 0.7835],
         [1.4567, 0.4050, 0.8300],
         [1.0807, 0.6015, 0.7105]]], grad_fn=<AddBackward0>)



Train Diffusion:   0%|          | 1/5001 [00:08<11:45:22,  8.46s/it][A
Train Diffusion:   0%|          | 2/5001 [00:17<12:12:07,  8.79s/it][A
Train Diffusion:   0%|          | 3/5001 [00:25<11:44:31,  8.46s/it][A
Train Diffusion:   0%|          | 4/5001 [00:33<11:34:55,  8.34s/it][A
Train Diffusion:   0%|          | 5/5001 [00:44<12:54:25,  9.30s/it][A
Train Diffusion:   0%|          | 6/5001 [00:55<13:28:31,  9.71s/it][A
Train Diffusion:   0%|          | 7/5001 [01:04<13:16:58,  9.58s/it][A
Train Diffusion:   0%|          | 8/5001 [01:14<13:17:51,  9.59s/it][A
Train Diffusion:   0%|          | 9/5001 [01:22<12:36:55,  9.10s/it][A
Train Diffusion:   0%|          | 10/5001 [01:29<11:54:39,  8.59s/it][A


Moving average norm loss at <built-in function iter> iterations is: 486311.496875. Best norm loss value is: 485398.75.

C_PATH mean = tensor([[0.9002, 0.8340, 0.8882],
        [0.8929, 0.8188, 0.8916],
        [0.8892, 0.8296, 0.8777]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[0.9859, 1.3254, 1.4777],
         [0.4882, 1.3655, 1.1026],
         [1.6296, 0.5975, 1.0450],
         ...,
         [0.6678, 1.7428, 1.4339],
         [0.3964, 1.8624, 1.2646],
         [2.9912, 1.0947, 1.4510]],

        [[0.6973, 0.7329, 0.6936],
         [0.7490, 0.6953, 1.5204],
         [1.1318, 0.8571, 1.4014],
         ...,
         [0.5059, 0.8771, 0.6433],
         [0.9966, 0.6296, 0.6261],
         [0.7703, 0.8904, 1.1772]],

        [[0.6312, 0.7424, 0.7368],
         [1.1397, 0.7034, 0.4623],
         [0.2655, 1.1055, 0.7856],
         ...,
         [2.2622, 0.2000, 0.6261],
         [2.1634, 0.3482, 0.7702],
         [0.5345, 0.7446, 0.2625]]], grad_fn=<AddBackward0>)



Train Diffusion:   0%|          | 11/5001 [01:36<11:17:52,  8.15s/it][A
Train Diffusion:   0%|          | 12/5001 [01:45<11:21:48,  8.20s/it][A
Train Diffusion:   0%|          | 13/5001 [01:52<11:05:33,  8.01s/it][A
Train Diffusion:   0%|          | 14/5001 [02:00<11:03:27,  7.98s/it][A
Train Diffusion:   0%|          | 15/5001 [02:09<11:33:20,  8.34s/it][A
Train Diffusion:   0%|          | 16/5001 [02:18<11:43:29,  8.47s/it][A
Train Diffusion:   0%|          | 17/5001 [02:25<11:02:42,  7.98s/it][A
Train Diffusion:   0%|          | 18/5001 [02:32<10:32:01,  7.61s/it][A
Train Diffusion:   0%|          | 19/5001 [02:45<13:05:05,  9.46s/it][A
Train Diffusion:   0%|          | 20/5001 [02:53<12:16:49,  8.88s/it][A


Moving average norm loss at <built-in function iter> iterations is: 484515.73125. Best norm loss value is: 483648.5625.

C_PATH mean = tensor([[0.9963, 0.8249, 0.9231],
        [0.9767, 0.8207, 0.9275],
        [0.9850, 0.8235, 0.9228]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[0.7660, 1.1288, 0.8395],
         [0.8597, 1.1590, 0.8577],
         [1.2461, 1.0417, 1.1242],
         ...,
         [3.0011, 0.7007, 0.9277],
         [2.5244, 0.1713, 0.7467],
         [1.3721, 0.7511, 0.0964]],

        [[1.1354, 0.7318, 1.3357],
         [0.6916, 0.6794, 0.8766],
         [0.8672, 0.9784, 0.9082],
         ...,
         [0.8699, 0.6208, 0.9658],
         [0.6905, 1.3308, 0.9722],
         [2.5634, 0.6138, 1.5269]],

        [[0.6679, 1.1703, 0.8279],
         [1.4027, 0.8691, 2.1529],
         [1.5869, 0.4969, 1.2615],
         ...,
         [0.5815, 1.5146, 0.4791],
         [0.9398, 0.7106, 0.9429],
         [0.6237, 0.8467, 2.7502]]], grad_fn=<AddBackward0>)



Train Diffusion:   0%|          | 21/5001 [03:01<11:50:24,  8.56s/it][A
Train Diffusion:   0%|          | 22/5001 [03:07<11:05:41,  8.02s/it][A
Train Diffusion:   0%|          | 23/5001 [03:14<10:33:11,  7.63s/it][A
Train Diffusion:   0%|          | 24/5001 [03:21<10:11:47,  7.38s/it][A
Train Diffusion:   0%|          | 25/5001 [03:28<9:55:20,  7.18s/it] [A
Train Diffusion:   1%|          | 26/5001 [03:34<9:44:22,  7.05s/it][A
Train Diffusion:   1%|          | 27/5001 [03:41<9:37:06,  6.96s/it][A
Train Diffusion:   1%|          | 28/5001 [03:48<9:31:06,  6.89s/it][A
Train Diffusion:   1%|          | 29/5001 [03:55<9:27:11,  6.84s/it][A
Train Diffusion:   1%|          | 30/5001 [04:01<9:24:16,  6.81s/it][A


Moving average norm loss at <built-in function iter> iterations is: 482109.1125. Best norm loss value is: 480447.5.

C_PATH mean = tensor([[1.1859, 0.8189, 0.9912],
        [1.1687, 0.8088, 0.9927],
        [1.1856, 0.8136, 0.9864]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[0.9500, 0.7416, 0.8634],
         [1.0217, 0.9315, 0.8391],
         [0.9851, 1.1146, 0.9497],
         ...,
         [0.6021, 0.9880, 0.6393],
         [1.2333, 0.8784, 1.0950],
         [0.8355, 0.9349, 4.3520]],

        [[0.6398, 1.1210, 1.2533],
         [1.9608, 1.2871, 1.4494],
         [1.4476, 1.6178, 1.4632],
         ...,
         [1.7996, 0.3913, 0.7791],
         [0.3594, 1.0528, 0.5972],
         [1.0173, 0.9628, 0.1817]],

        [[0.7052, 1.0874, 1.0098],
         [0.7863, 0.9781, 1.0572],
         [1.5946, 0.8895, 0.9649],
         ...,
         [1.6656, 0.7329, 0.9731],
         [1.2449, 0.5304, 1.1378],
         [1.7007, 1.0226, 0.3532]]], grad_fn=<AddBackward0>)



Train Diffusion:   1%|          | 31/5001 [04:08<9:22:32,  6.79s/it][A
Train Diffusion:   1%|          | 32/5001 [04:15<9:20:36,  6.77s/it][A
Train Diffusion:   1%|          | 33/5001 [04:22<9:19:08,  6.75s/it][A
Train Diffusion:   1%|          | 34/5001 [04:28<9:19:38,  6.76s/it][A
Train Diffusion:   1%|          | 35/5001 [04:35<9:18:55,  6.75s/it][A
Train Diffusion:   1%|          | 36/5001 [04:42<9:18:18,  6.75s/it][A
Train Diffusion:   1%|          | 37/5001 [04:49<9:17:13,  6.74s/it][A
Train Diffusion:   1%|          | 38/5001 [04:55<9:15:44,  6.72s/it][A
Train Diffusion:   1%|          | 39/5001 [05:02<9:28:55,  6.88s/it][A
Train Diffusion:   1%|          | 40/5001 [05:09<9:24:45,  6.83s/it][A


Moving average norm loss at <built-in function iter> iterations is: 476849.84375. Best norm loss value is: 473056.9375.

C_PATH mean = tensor([[1.7251, 0.8018, 1.1075],
        [1.7194, 0.8022, 1.1019],
        [1.6943, 0.8026, 1.1048]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[0.7292, 1.1713, 1.4880],
         [1.8950, 0.6697, 0.9883],
         [1.1136, 0.9329, 0.9913],
         ...,
         [0.9479, 1.0965, 1.0996],
         [1.0250, 1.1217, 0.8083],
         [0.7204, 1.2336, 0.4496]],

        [[0.9144, 0.7320, 1.0933],
         [1.0540, 0.9052, 1.5284],
         [6.6473, 0.3971, 0.6207],
         ...,
         [0.6265, 1.7656, 1.6876],
         [6.5558, 0.0781, 0.9474],
         [1.1117, 1.0597, 7.0374]],

        [[0.5867, 1.1407, 0.9294],
         [1.1459, 0.9625, 0.9082],
         [1.0759, 1.0023, 0.8868],
         ...,
         [3.6167, 0.4340, 1.2177],
         [0.9921, 0.7952, 1.6757],
         [1.8351, 1.3294, 0.1545]]], grad_fn=<AddBackward0>)



Train Diffusion:   1%|          | 41/5001 [05:16<9:22:00,  6.80s/it][A
Train Diffusion:   1%|          | 42/5001 [05:23<9:19:02,  6.76s/it][A
Train Diffusion:   1%|          | 43/5001 [05:29<9:17:49,  6.75s/it][A
Train Diffusion:   1%|          | 44/5001 [05:36<9:15:42,  6.73s/it][A
Train Diffusion:   1%|          | 45/5001 [05:43<9:15:42,  6.73s/it][A
Train Diffusion:   1%|          | 46/5001 [05:49<9:14:25,  6.71s/it][A
Train Diffusion:   1%|          | 47/5001 [05:56<9:13:39,  6.71s/it][A
Train Diffusion:   1%|          | 48/5001 [06:03<9:17:25,  6.75s/it][A
Train Diffusion:   1%|          | 49/5001 [06:10<9:14:39,  6.72s/it][A
Train Diffusion:   1%|          | 50/5001 [06:16<9:12:16,  6.69s/it][A


Moving average norm loss at <built-in function iter> iterations is: 462727.634375. Best norm loss value is: 451902.34375.

C_PATH mean = tensor([[3.0580, 0.7049, 1.1371],
        [3.0223, 0.7251, 1.1405],
        [2.9964, 0.7222, 1.1486]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.7338,  1.1427,  0.8748],
         [ 0.7622,  0.9075,  0.5855],
         [11.8893,  0.1336,  1.0422],
         ...,
         [ 1.1556,  0.6999,  0.9628],
         [ 1.1121,  0.8576,  0.9680],
         [ 1.0233,  1.3000,  0.3875]],

        [[ 0.9833,  0.8754,  1.9197],
         [ 0.9378,  1.0165,  1.1370],
         [ 0.8424,  0.8033,  0.5806],
         ...,
         [12.2370,  0.0802,  1.3078],
         [ 1.2752,  0.5366,  0.8029],
         [ 7.8211,  1.1692,  0.9377]],

        [[ 0.8322,  1.0485,  0.8338],
         [ 3.9898,  0.9127,  1.0040],
         [ 0.9949,  1.0868,  0.7349],
         ...,
         [ 0.7540,  2.1123,  0.8774],
         [11.5654,  0.0514,  1.3723],
         [ 1.0511,  0.8817,  0.8


Train Diffusion:   1%|          | 51/5001 [06:23<9:11:12,  6.68s/it][A
Train Diffusion:   1%|          | 52/5001 [06:30<9:11:53,  6.69s/it][A
Train Diffusion:   1%|          | 53/5001 [06:36<9:10:39,  6.68s/it][A
Train Diffusion:   1%|          | 54/5001 [06:43<9:09:46,  6.67s/it][A
Train Diffusion:   1%|          | 55/5001 [06:49<9:08:26,  6.65s/it][A
Train Diffusion:   1%|          | 56/5001 [06:56<9:07:38,  6.64s/it][A
Train Diffusion:   1%|          | 57/5001 [07:03<9:13:39,  6.72s/it][A
Train Diffusion:   1%|          | 58/5001 [07:10<9:14:12,  6.73s/it][A
Train Diffusion:   1%|          | 59/5001 [07:16<9:11:19,  6.69s/it][A
Train Diffusion:   1%|          | 60/5001 [07:23<9:09:58,  6.68s/it][A


Moving average norm loss at <built-in function iter> iterations is: 434894.271875. Best norm loss value is: 420364.71875.

C_PATH mean = tensor([[4.9550, 0.6419, 1.1737],
        [5.0529, 0.6245, 1.1342],
        [5.1220, 0.6183, 1.1420]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.8146,  0.8445,  0.4899],
         [ 4.8153,  0.6654,  0.8839],
         [ 0.9416,  0.6863,  0.4901],
         ...,
         [ 1.5444,  0.3329,  0.7146],
         [ 0.7080,  0.9810,  0.9336],
         [ 1.3213,  1.7640,  1.7194]],

        [[ 0.8146,  0.8162,  0.5522],
         [ 2.9493,  0.7552,  0.8781],
         [ 0.7194,  0.2575,  0.9534],
         ...,
         [ 0.7728,  1.4910,  0.7833],
         [21.5755,  0.1129,  1.8640],
         [ 1.9885,  0.4544,  3.9287]],

        [[ 0.8146,  0.5281,  1.0897],
         [ 1.0257,  0.4312,  0.5821],
         [ 7.2507,  3.0697,  0.8555],
         ...,
         [23.7667,  0.0552,  1.6759],
         [ 2.1822,  0.1836,  0.6664],
         [ 9.8141,  1.5398,  2.4


Train Diffusion:   1%|          | 61/5001 [07:30<9:08:27,  6.66s/it][A
Train Diffusion:   1%|          | 62/5001 [07:36<9:07:57,  6.66s/it][A
Train Diffusion:   1%|▏         | 63/5001 [07:43<9:07:49,  6.66s/it][A
Train Diffusion:   1%|▏         | 64/5001 [07:50<9:06:53,  6.65s/it][A
Train Diffusion:   1%|▏         | 65/5001 [07:56<9:08:28,  6.67s/it][A
Train Diffusion:   1%|▏         | 66/5001 [08:03<9:07:11,  6.65s/it][A
Train Diffusion:   1%|▏         | 67/5001 [08:09<9:06:27,  6.65s/it][A
Train Diffusion:   1%|▏         | 68/5001 [08:16<9:05:53,  6.64s/it][A
Train Diffusion:   1%|▏         | 69/5001 [08:23<9:05:28,  6.64s/it][A
Train Diffusion:   1%|▏         | 70/5001 [08:29<9:04:40,  6.63s/it][A


Moving average norm loss at <built-in function iter> iterations is: 408846.715625. Best norm loss value is: 399300.0625.

C_PATH mean = tensor([[6.5458, 0.5834, 1.1602],
        [6.5616, 0.5873, 1.1469],
        [6.5464, 0.5631, 1.1545]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.7724,  0.7923,  0.4483],
         [ 1.7520,  0.7751,  0.7319],
         [ 0.5714,  0.6413,  0.6477],
         ...,
         [ 5.0346,  0.6080,  0.1962],
         [ 2.1290,  0.1274,  5.9137],
         [ 0.9329,  1.6845,  2.5257]],

        [[ 0.7724,  0.7880,  0.4692],
         [ 7.4629,  0.6086,  0.9328],
         [ 1.2088,  0.4637,  0.4086],
         ...,
         [ 1.9915,  0.2327,  1.2458],
         [ 6.5110,  0.7453,  0.9201],
         [ 9.1240,  1.7082,  0.4805]],

        [[ 0.7724,  0.5672,  0.7808],
         [ 1.2452,  0.3356,  0.7991],
         [23.0311,  0.1598,  1.0833],
         ...,
         [ 5.8042,  0.6030,  1.5345],
         [ 0.7478,  0.6643,  1.1272],
         [ 0.4023,  1.2021,  4.53


Train Diffusion:   1%|▏         | 71/5001 [08:36<9:04:56,  6.63s/it][A
Train Diffusion:   1%|▏         | 72/5001 [08:43<9:05:58,  6.65s/it][A
Train Diffusion:   1%|▏         | 73/5001 [08:49<9:04:43,  6.63s/it][A
Train Diffusion:   1%|▏         | 74/5001 [08:56<9:04:26,  6.63s/it][A
Train Diffusion:   1%|▏         | 75/5001 [09:03<9:05:55,  6.65s/it][A
Train Diffusion:   2%|▏         | 76/5001 [09:10<9:12:52,  6.74s/it][A
Train Diffusion:   2%|▏         | 77/5001 [09:16<9:10:13,  6.70s/it][A
Train Diffusion:   2%|▏         | 78/5001 [09:23<9:07:42,  6.68s/it][A
Train Diffusion:   2%|▏         | 79/5001 [09:29<9:05:56,  6.66s/it][A
Train Diffusion:   2%|▏         | 80/5001 [09:36<9:04:45,  6.64s/it][A


Moving average norm loss at <built-in function iter> iterations is: 387452.584375. Best norm loss value is: 379623.0625.

C_PATH mean = tensor([[8.0627, 0.4703, 1.1132],
        [7.9736, 0.4967, 1.1057],
        [8.0437, 0.4890, 1.1424]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.8016,  0.6321,  0.9513],
         [ 1.2118,  0.4146,  0.5915],
         [ 1.5875,  0.0941,  1.0879],
         ...,
         [ 0.6484,  0.7683,  1.0043],
         [32.3663,  0.3043,  1.1288],
         [ 2.9570,  0.4364,  3.8154]],

        [[ 0.8016,  0.8088,  0.7574],
         [13.6773,  0.5387,  1.0812],
         [ 1.6824,  0.3687,  0.4047],
         ...,
         [ 2.2453,  0.2281,  0.7176],
         [ 0.6146,  0.7343,  0.7498],
         [ 0.7193,  1.1898,  1.8336]],

        [[ 0.8016,  0.8096,  0.6913],
         [ 0.5340,  0.6747,  0.4654],
         [12.4579,  0.5844,  1.9822],
         ...,
         [33.8723,  0.1263,  1.4518],
         [ 3.2518,  0.1305,  0.7875],
         [24.9355,  1.0948,  1.84


Train Diffusion:   2%|▏         | 81/5001 [09:43<9:05:12,  6.65s/it][A
Train Diffusion:   2%|▏         | 82/5001 [09:49<9:04:35,  6.64s/it][A
Train Diffusion:   2%|▏         | 83/5001 [09:56<9:03:24,  6.63s/it][A
Train Diffusion:   2%|▏         | 84/5001 [10:02<9:02:45,  6.62s/it][A
Train Diffusion:   2%|▏         | 85/5001 [10:09<9:02:08,  6.62s/it][A
Train Diffusion:   2%|▏         | 86/5001 [10:16<9:03:34,  6.64s/it][A
Train Diffusion:   2%|▏         | 87/5001 [10:22<9:02:08,  6.62s/it][A
Train Diffusion:   2%|▏         | 88/5001 [10:29<9:01:42,  6.62s/it][A
Train Diffusion:   2%|▏         | 89/5001 [10:36<9:01:17,  6.61s/it][A
Train Diffusion:   2%|▏         | 90/5001 [10:42<9:03:12,  6.64s/it][A


Moving average norm loss at <built-in function iter> iterations is: 372616.215625. Best norm loss value is: 367386.5.

C_PATH mean = tensor([[8.7813, 0.4537, 1.1706],
        [9.1015, 0.4455, 1.1909],
        [8.6959, 0.4659, 1.1419]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[9.3537e-01, 6.5333e-01, 1.1835e+00],
         [1.4178e+00, 3.5037e-01, 6.5631e-01],
         [2.5629e-01, 1.1211e+00, 1.9839e+00],
         ...,
         [1.2383e+00, 7.2576e-01, 2.4764e-01],
         [7.3265e+00, 6.4814e-01, 1.3844e-01],
         [2.7588e+00, 3.6715e-01, 1.0153e+00]],

        [[7.4374e-01, 8.4212e-01, 6.9410e-01],
         [5.4643e-01, 7.0416e-01, 5.3313e-01],
         [2.1427e+01, 2.6928e-01, 1.5861e+00],
         ...,
         [2.9439e+00, 1.3715e-01, 9.9336e-01],
         [2.2871e-02, 7.4257e-01, 1.7603e+00],
         [2.0230e+01, 1.2862e+00, 2.0609e+00]],

        [[7.1709e-01, 8.5184e-01, 8.1929e-01],
         [1.5534e+01, 4.4500e-01, 1.1476e+00],
         [2.1170e+00, 2.9946e-01, 5.83


Train Diffusion:   2%|▏         | 91/5001 [10:49<9:03:40,  6.64s/it][A
Train Diffusion:   2%|▏         | 92/5001 [10:56<9:02:45,  6.63s/it][A
Train Diffusion:   2%|▏         | 93/5001 [11:02<9:02:57,  6.64s/it][A
Train Diffusion:   2%|▏         | 94/5001 [11:09<9:08:59,  6.71s/it][A
Train Diffusion:   2%|▏         | 95/5001 [11:16<9:06:06,  6.68s/it][A
Train Diffusion:   2%|▏         | 96/5001 [11:22<9:04:11,  6.66s/it][A
Train Diffusion:   2%|▏         | 97/5001 [11:29<9:02:25,  6.64s/it][A
Train Diffusion:   2%|▏         | 98/5001 [11:36<9:02:27,  6.64s/it][A
Train Diffusion:   2%|▏         | 99/5001 [11:42<9:02:01,  6.63s/it][A
Train Diffusion:   2%|▏         | 100/5001 [11:49<9:01:56,  6.63s/it][A


Moving average norm loss at <built-in function iter> iterations is: 363749.096875. Best norm loss value is: 360414.875.

C_PATH mean = tensor([[9.3022, 0.4463, 1.1438],
        [9.4446, 0.4402, 1.1373],
        [9.4818, 0.4276, 1.1283]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.1102,  0.6316,  1.9055],
         [ 1.2017,  0.4384,  0.6947],
         [ 0.4738,  1.1167,  1.2724],
         ...,
         [25.2683,  0.1701,  1.2421],
         [ 3.7202,  0.1288,  0.7097],
         [ 1.0808,  1.2685,  2.5872]],

        [[ 0.8107,  0.8188,  0.8506],
         [11.1481,  0.4805,  1.1241],
         [ 2.0590,  0.4022,  0.6189],
         ...,
         [ 0.9320,  0.5411,  2.0928],
         [28.7424,  0.1665,  1.5549],
         [ 4.3309,  0.4349,  1.7610]],

        [[ 0.5910,  0.8234,  0.8612],
         [ 0.4186,  0.7027,  0.9954],
         [29.9603,  0.2242,  1.1596],
         ...,
         [ 3.9555,  0.0907,  0.8258],
         [ 0.5742,  0.6048,  0.7216],
         [27.7041,  0.9616,  2.620


Train Diffusion:   2%|▏         | 101/5001 [11:55<9:01:29,  6.63s/it][A
Train Diffusion:   2%|▏         | 102/5001 [12:02<9:00:45,  6.62s/it][A
Train Diffusion:   2%|▏         | 103/5001 [12:04<7:16:43,  5.35s/it][A
Train Diffusion:   2%|▏         | 104/5001 [12:07<6:06:07,  4.49s/it][A
Train Diffusion:   2%|▏         | 105/5001 [12:10<5:24:43,  3.98s/it][A
Train Diffusion:   2%|▏         | 106/5001 [12:12<4:51:16,  3.57s/it][A
Train Diffusion:   2%|▏         | 107/5001 [12:15<4:27:20,  3.28s/it][A
Train Diffusion:   2%|▏         | 108/5001 [12:17<4:10:57,  3.08s/it][A
Train Diffusion:   2%|▏         | 109/5001 [12:20<3:59:15,  2.93s/it][A
Train Diffusion:   2%|▏         | 110/5001 [12:23<3:51:10,  2.84s/it][A
Train Diffusion:   2%|▏         | 111/5001 [12:25<3:45:37,  2.77s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 248199182.2222222. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[9.6242, 0.4475, 1.1794],
        [9.3138, 0.4516, 1.1702],
        [9.4012, 0.4500, 1.1616]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.5843,  0.8585,  0.8546],
         [ 0.4752,  0.6905,  1.5620],
         [31.4495,  0.1172,  1.1273],
         ...,
         [22.0102,  0.1349,  1.2867],
         [ 4.6904,  0.0761,  1.1358],
         [21.3334,  1.4821,  2.2934]],

        [[ 1.3808,  0.6913,  3.0483],
         [ 1.1749,  0.5292,  0.7242],
         [ 0.6647,  0.5806,  0.7088],
         ...,
         [ 5.2320,  2.2825,  0.8523],
         [ 1.0211,  1.5912,  0.4956],
         [ 0.4855,  1.9408,  1.0816]],

        [[ 0.7027,  0.8665,  0.8470],
         [12.6964,  0.4662,  1.0994],
         [ 2.1101,  0.3690,  1.1410],
         ...,
         [ 4.4183,  0.0975,  1.0910],
         [40.2490,  0.0860,  1.3592],
         [ 5.1791,  0.3509,  


Train Diffusion:   2%|▏         | 112/5001 [12:28<3:41:23,  2.72s/it][A
Train Diffusion:   2%|▏         | 113/5001 [12:30<3:38:33,  2.68s/it][A
Train Diffusion:   2%|▏         | 114/5001 [12:33<3:36:50,  2.66s/it][A
Train Diffusion:   2%|▏         | 115/5001 [12:36<3:35:42,  2.65s/it][A
Train Diffusion:   2%|▏         | 116/5001 [12:38<3:34:14,  2.63s/it][A
Train Diffusion:   2%|▏         | 117/5001 [12:41<3:33:17,  2.62s/it][A
Train Diffusion:   2%|▏         | 118/5001 [12:44<3:33:24,  2.62s/it][A
Train Diffusion:   2%|▏         | 119/5001 [12:46<3:33:46,  2.63s/it][A
Train Diffusion:   2%|▏         | 120/5001 [12:49<3:35:14,  2.65s/it][A
Train Diffusion:   2%|▏         | 121/5001 [12:52<3:37:31,  2.67s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 283415958.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.9829, 0.4834, 1.2315],
        [9.0250, 0.4734, 1.2852],
        [8.9727, 0.4980, 1.2284]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.4940,  0.9238,  0.8308],
         [11.7178,  0.6720,  1.2695],
         [ 1.5536,  0.5189,  0.8865],
         ...,
         [31.0735,  0.2411,  0.9097],
         [ 3.8930,  0.3059,  0.7220],
         [ 0.7966,  1.4596,  0.8519]],

        [[ 0.8876,  0.9267,  0.7461],
         [ 0.8322,  0.5342,  1.6951],
         [27.4324,  0.2785,  1.1506],
         ...,
         [ 0.5275,  0.3170,  1.7048],
         [ 0.5001,  0.4262,  0.7685],
         [26.3966,  1.1887,  2.1529]],

        [[ 1.4473,  0.7971,  5.6435],
         [ 0.7591,  0.8750,  1.3870],
         [ 0.6603,  0.8032,  0.9033],
         ...,
         [ 3.3806,  0.1181,  1.0905],
         [38.0343,  0.1179,  1.3109],
         [ 5.1359,  0.5656,  1.3349


Train Diffusion:   2%|▏         | 122/5001 [12:54<3:37:04,  2.67s/it][A
Train Diffusion:   2%|▏         | 123/5001 [12:57<3:35:51,  2.65s/it][A
Train Diffusion:   2%|▏         | 124/5001 [12:59<3:34:41,  2.64s/it][A
Train Diffusion:   2%|▏         | 125/5001 [13:02<3:34:32,  2.64s/it][A
Train Diffusion:   3%|▎         | 126/5001 [13:05<3:33:52,  2.63s/it][A
Train Diffusion:   3%|▎         | 127/5001 [13:07<3:33:39,  2.63s/it][A
Train Diffusion:   3%|▎         | 128/5001 [13:10<3:39:00,  2.70s/it][A
Train Diffusion:   3%|▎         | 129/5001 [13:13<3:40:19,  2.71s/it][A
Train Diffusion:   3%|▎         | 130/5001 [13:16<3:37:47,  2.68s/it][A
Train Diffusion:   3%|▎         | 131/5001 [13:18<3:35:50,  2.66s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 300813174.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7368, 0.4950, 1.2991],
        [9.0946, 0.4608, 1.2823],
        [8.8461, 0.4711, 1.2741]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.6501,  0.9440,  0.8734],
         [ 0.5853,  0.7517,  1.8325],
         [28.6082,  0.1847,  1.1830],
         ...,
         [ 0.4770,  0.7971,  0.7956],
         [36.3731,  0.0887,  1.4076],
         [ 6.2376,  0.2286,  0.8654]],

        [[ 0.6383,  0.9434,  0.8877],
         [12.2553,  0.6101,  0.9809],
         [ 2.0123,  0.4318,  0.7135],
         ...,
         [32.3857,  0.0806,  1.2588],
         [ 5.6274,  0.1015,  0.5544],
         [ 3.1811,  1.8087,  8.1315]],

        [[ 1.5892,  0.7985,  3.8956],
         [ 1.1915,  0.5525,  0.8894],
         [ 0.7034,  0.6837,  1.3287],
         ...,
         [ 4.1645,  0.1412,  0.7765],
         [ 0.5391,  0.6436,  0.7398],
         [ 5.7380,  1.5541,  2.4066


Train Diffusion:   3%|▎         | 132/5001 [13:21<3:34:21,  2.64s/it][A
Train Diffusion:   3%|▎         | 133/5001 [13:23<3:33:26,  2.63s/it][A
Train Diffusion:   3%|▎         | 134/5001 [13:26<3:32:41,  2.62s/it][A
Train Diffusion:   3%|▎         | 135/5001 [13:29<3:32:18,  2.62s/it][A
Train Diffusion:   3%|▎         | 136/5001 [13:31<3:31:42,  2.61s/it][A
Train Diffusion:   3%|▎         | 137/5001 [13:34<3:31:44,  2.61s/it][A
Train Diffusion:   3%|▎         | 138/5001 [13:36<3:31:16,  2.61s/it][A
Train Diffusion:   3%|▎         | 139/5001 [13:39<3:31:02,  2.60s/it][A
Train Diffusion:   3%|▎         | 140/5001 [13:42<3:30:59,  2.60s/it][A
Train Diffusion:   3%|▎         | 141/5001 [13:44<3:31:32,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 329216249.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8551, 0.5106, 1.2872],
        [8.9232, 0.4972, 1.3161],
        [8.4280, 0.5035, 1.2900]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.7417,  0.9552,  1.1238],
         [13.1955,  0.3920,  1.2545],
         [ 2.6642,  0.3665,  0.6141],
         ...,
         [ 0.7048,  0.1158,  0.5925],
         [ 5.1653,  0.0857,  0.8013],
         [19.9108,  1.5623,  2.0196]],

        [[ 0.5690,  0.9473,  0.9298],
         [ 0.5647,  0.8257,  1.6253],
         [26.9092,  0.1224,  1.1818],
         ...,
         [ 0.2086,  0.5073,  1.0133],
         [38.1192,  0.0636,  1.4230],
         [ 6.3720,  0.2470,  0.8810]],

        [[ 1.5836,  0.8034,  1.4859],
         [ 1.5890,  0.4043,  0.7481],
         [ 0.6826,  0.7542,  1.3515],
         ...,
         [ 0.4870,  0.6532,  2.4999],
         [ 0.2945,  0.7283,  0.5473],
         [ 0.7946,  2.2056,  0.9879


Train Diffusion:   3%|▎         | 142/5001 [13:47<3:31:18,  2.61s/it][A
Train Diffusion:   3%|▎         | 143/5001 [13:49<3:30:52,  2.60s/it][A
Train Diffusion:   3%|▎         | 144/5001 [13:52<3:30:23,  2.60s/it][A
Train Diffusion:   3%|▎         | 145/5001 [13:55<3:30:34,  2.60s/it][A
Train Diffusion:   3%|▎         | 146/5001 [13:57<3:30:24,  2.60s/it][A
Train Diffusion:   3%|▎         | 147/5001 [14:00<3:30:45,  2.61s/it][A
Train Diffusion:   3%|▎         | 148/5001 [14:02<3:30:26,  2.60s/it][A
Train Diffusion:   3%|▎         | 149/5001 [14:05<3:30:26,  2.60s/it][A
Train Diffusion:   3%|▎         | 150/5001 [14:08<3:30:13,  2.60s/it][A
Train Diffusion:   3%|▎         | 151/5001 [14:10<3:30:31,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 328949136.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8724, 0.4939, 1.2835],
        [8.8031, 0.4678, 1.2702],
        [8.8438, 0.4914, 1.2403]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.5841,  0.9515,  0.9364],
         [ 0.5650,  0.8266,  1.6996],
         [28.2986,  0.0852,  1.2027],
         ...,
         [24.4008,  0.4862,  0.3022],
         [ 1.5285,  0.9110,  0.2461],
         [ 3.6468,  0.2879,  1.0078]],

        [[ 1.5938,  0.8058,  2.1746],
         [ 1.4445,  0.4912,  0.7725],
         [ 0.7268,  0.6948,  0.8535],
         ...,
         [ 2.2792,  0.2217,  1.0255],
         [11.5720,  0.3942,  1.1285],
         [ 0.8218,  2.6156,  2.2910]],

        [[ 0.7218,  0.9583,  1.0667],
         [12.3687,  0.4664,  1.2094],
         [ 2.2990,  0.4594,  0.8125],
         ...,
         [ 1.0893,  0.1173,  1.1580],
         [ 2.7593,  0.1530,  0.3951],
         [19.3510,  2.7676,  2.8429


Train Diffusion:   3%|▎         | 152/5001 [14:13<3:30:03,  2.60s/it][A
Train Diffusion:   3%|▎         | 153/5001 [14:15<3:29:46,  2.60s/it][A
Train Diffusion:   3%|▎         | 154/5001 [14:18<3:30:07,  2.60s/it][A
Train Diffusion:   3%|▎         | 155/5001 [14:21<3:29:50,  2.60s/it][A
Train Diffusion:   3%|▎         | 156/5001 [14:23<3:29:34,  2.60s/it][A
Train Diffusion:   3%|▎         | 157/5001 [14:26<3:29:41,  2.60s/it][A
Train Diffusion:   3%|▎         | 158/5001 [14:28<3:29:33,  2.60s/it][A
Train Diffusion:   3%|▎         | 159/5001 [14:31<3:29:51,  2.60s/it][A
Train Diffusion:   3%|▎         | 160/5001 [14:34<3:29:51,  2.60s/it][A
Train Diffusion:   3%|▎         | 161/5001 [14:36<3:29:57,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 325044848.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.9717, 0.4943, 1.3277],
        [8.5174, 0.5252, 1.2899],
        [8.8210, 0.4934, 1.2923]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.8426,  0.9619,  0.7892],
         [ 0.7871,  0.6194,  1.9031],
         [27.9654,  0.1740,  1.2022],
         ...,
         [30.1986,  0.3582,  0.7006],
         [ 4.4325,  0.1415,  0.7259],
         [ 0.6405,  1.7002,  9.3196]],

        [[ 1.5433,  0.8255,  5.3024],
         [ 0.8021,  0.8064,  1.3373],
         [ 0.6481,  0.7200,  1.1765],
         ...,
         [ 0.5856,  0.0977,  1.9393],
         [ 0.2603,  0.5998,  1.0573],
         [ 5.0999,  1.9114,  2.0811]],

        [[ 0.5189,  0.9521,  0.8393],
         [11.6162,  0.7371,  0.8217],
         [ 1.7548,  0.4310,  0.7690],
         ...,
         [ 2.4852,  0.2083,  1.1412],
         [38.0366,  0.0745,  1.3906],
         [ 5.9769,  0.3330,  1.1013


Train Diffusion:   3%|▎         | 162/5001 [14:39<3:29:42,  2.60s/it][A
Train Diffusion:   3%|▎         | 163/5001 [14:41<3:29:35,  2.60s/it][A
Train Diffusion:   3%|▎         | 164/5001 [14:44<3:29:52,  2.60s/it][A
Train Diffusion:   3%|▎         | 165/5001 [14:47<3:29:39,  2.60s/it][A
Train Diffusion:   3%|▎         | 166/5001 [14:49<3:29:24,  2.60s/it][A
Train Diffusion:   3%|▎         | 167/5001 [14:52<3:29:27,  2.60s/it][A
Train Diffusion:   3%|▎         | 168/5001 [14:54<3:29:00,  2.59s/it][A
Train Diffusion:   3%|▎         | 169/5001 [14:57<3:29:06,  2.60s/it][A
Train Diffusion:   3%|▎         | 170/5001 [15:00<3:29:12,  2.60s/it][A
Train Diffusion:   3%|▎         | 171/5001 [15:02<3:31:06,  2.62s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 341811715.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6459, 0.5079, 1.2963],
        [8.8842, 0.5011, 1.3061],
        [8.8197, 0.4848, 1.3082]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.5991,  0.8061,  2.4801],
         [ 1.3933,  0.5218,  0.7852],
         [ 0.7279,  0.7077,  1.3304],
         ...,
         [ 4.6042,  0.1034,  1.2085],
         [35.1372,  0.1361,  1.2757],
         [ 5.1037,  0.6396,  1.3031]],

        [[ 0.7098,  0.9575,  1.0336],
         [12.4542,  0.4851,  1.1888],
         [ 2.2544,  0.4314,  0.6959],
         ...,
         [30.5184,  0.2057,  1.1106],
         [ 4.1782,  0.2547,  0.8386],
         [ 0.4504,  1.8003,  0.7841]],

        [[ 0.5936,  0.9519,  0.9324],
         [ 0.5662,  0.8259,  1.7414],
         [27.6041,  0.1860,  1.2034],
         ...,
         [ 0.1986,  0.1647,  1.5985],
         [ 0.3412,  0.4815,  1.5269],
         [19.5265,  1.7476,  2.0536


Train Diffusion:   3%|▎         | 172/5001 [15:05<3:31:07,  2.62s/it][A
Train Diffusion:   3%|▎         | 173/5001 [15:08<3:30:27,  2.62s/it][A
Train Diffusion:   3%|▎         | 174/5001 [15:10<3:32:59,  2.65s/it][A
Train Diffusion:   3%|▎         | 175/5001 [15:13<3:34:03,  2.66s/it][A
Train Diffusion:   4%|▎         | 176/5001 [15:16<3:35:08,  2.68s/it][A
Train Diffusion:   4%|▎         | 177/5001 [15:18<3:32:54,  2.65s/it][A
Train Diffusion:   4%|▎         | 178/5001 [15:21<3:31:39,  2.63s/it][A
Train Diffusion:   4%|▎         | 179/5001 [15:23<3:31:05,  2.63s/it][A
Train Diffusion:   4%|▎         | 180/5001 [15:26<3:29:59,  2.61s/it][A
Train Diffusion:   4%|▎         | 181/5001 [15:29<3:29:27,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 323987664.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8898, 0.4967, 1.2911],
        [8.6280, 0.4888, 1.2769],
        [8.9282, 0.4658, 1.2862]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.5933e+00, 8.0788e-01, 1.8859e+00],
         [1.4851e+00, 4.5684e-01, 7.6749e-01],
         [7.0277e-01, 7.5912e-01, 9.2533e-01],
         ...,
         [2.0164e-01, 7.6132e-01, 4.7489e-01],
         [9.2654e+00, 8.5520e-01, 8.0484e-01],
         [1.9978e+00, 9.9214e-01, 1.3609e+00]],

        [[7.3575e-01, 9.6091e-01, 1.0989e+00],
         [1.2484e+01, 4.4308e-01, 1.2283e+00],
         [2.4188e+00, 4.6611e-01, 1.0598e+00],
         ...,
         [3.0845e+00, 1.3754e-01, 8.2797e-01],
         [2.1259e+01, 1.8807e-01, 6.9610e-01],
         [3.9216e+00, 1.3116e+00, 6.7755e-01]],

        [[5.7469e-01, 9.5303e-01, 9.4450e-01],
         [5.6487e-01, 8.2420e-01, 1.6563e+00],
         [2.7493e+01, 1.7688e-01, 1.1


Train Diffusion:   4%|▎         | 182/5001 [15:31<3:29:31,  2.61s/it][A
Train Diffusion:   4%|▎         | 183/5001 [15:34<3:29:09,  2.60s/it][A
Train Diffusion:   4%|▎         | 184/5001 [15:36<3:28:59,  2.60s/it][A
Train Diffusion:   4%|▎         | 185/5001 [15:39<3:29:00,  2.60s/it][A
Train Diffusion:   4%|▎         | 186/5001 [15:42<3:29:06,  2.61s/it][A
Train Diffusion:   4%|▎         | 187/5001 [15:44<3:29:23,  2.61s/it][A
Train Diffusion:   4%|▍         | 188/5001 [15:47<3:29:07,  2.61s/it][A
Train Diffusion:   4%|▍         | 189/5001 [15:49<3:29:01,  2.61s/it][A
Train Diffusion:   4%|▍         | 190/5001 [15:52<3:28:31,  2.60s/it][A
Train Diffusion:   4%|▍         | 191/5001 [15:55<3:28:17,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 315128932.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6383, 0.4812, 1.2796],
        [8.9644, 0.5010, 1.3036],
        [8.7602, 0.5286, 1.2989]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.1303,  0.9191,  2.4366],
         [ 2.0387,  0.8009,  1.5570],
         [ 0.8628,  0.9216,  0.7655],
         ...,
         [20.5779,  0.2772,  0.9663],
         [ 3.6878,  0.2412,  0.7766],
         [ 0.5975,  1.8791,  0.9240]],

        [[ 0.4473,  0.9546,  0.9073],
         [ 8.8197,  0.7367,  2.1087],
         [ 1.2927,  0.6835,  1.1622],
         ...,
         [ 1.0486,  0.7403,  7.9906],
         [15.8138,  0.0807,  1.2913],
         [ 5.9263,  0.2005,  4.0659]],

        [[ 1.3346,  0.8721,  0.7100],
         [ 1.5618,  0.3662,  1.5659],
         [25.4456,  0.2850,  1.1698],
         ...,
         [ 0.2810,  1.1642,  2.9068],
         [ 0.2081,  0.5811,  0.7729],
         [ 0.2201,  1.4135,  2.5389


Train Diffusion:   4%|▍         | 192/5001 [15:57<3:27:58,  2.59s/it][A
Train Diffusion:   4%|▍         | 193/5001 [16:00<3:28:03,  2.60s/it][A
Train Diffusion:   4%|▍         | 194/5001 [16:02<3:28:37,  2.60s/it][A
Train Diffusion:   4%|▍         | 195/5001 [16:05<3:28:16,  2.60s/it][A
Train Diffusion:   4%|▍         | 196/5001 [16:08<3:27:42,  2.59s/it][A
Train Diffusion:   4%|▍         | 197/5001 [16:10<3:28:02,  2.60s/it][A
Train Diffusion:   4%|▍         | 198/5001 [16:13<3:28:18,  2.60s/it][A
Train Diffusion:   4%|▍         | 199/5001 [16:15<3:28:14,  2.60s/it][A
Train Diffusion:   4%|▍         | 200/5001 [16:18<3:27:38,  2.59s/it][A
Train Diffusion:   4%|▍         | 201/5001 [16:21<3:27:58,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 331970848.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7733, 0.4891, 1.2957],
        [8.6738, 0.5040, 1.2860],
        [8.6662, 0.4968, 1.3121]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.5936,  0.9507,  0.8791],
         [12.0426,  0.6449,  0.8898],
         [ 1.9501,  0.4465,  0.7415],
         ...,
         [ 5.6222,  2.0045,  1.1626],
         [ 0.9837,  1.2938,  0.7806],
         [ 0.3369,  1.6514, 11.6418]],

        [[ 0.7100,  0.9562,  0.8453],
         [ 0.6536,  0.6947,  1.9193],
         [28.2443,  0.1878,  1.1975],
         ...,
         [17.9777,  0.5063,  0.6082],
         [ 4.5322,  0.0395,  7.6003],
         [ 1.7198,  1.2219,  1.2473]],

        [[ 1.5988,  0.8164,  4.4988],
         [ 0.9784,  0.6836,  1.0492],
         [ 0.6898,  0.6921,  1.2947],
         ...,
         [ 4.3609,  0.0971,  1.0543],
         [24.9087,  0.5052,  1.4255],
         [ 0.2683,  4.8546,  1.3605


Train Diffusion:   4%|▍         | 202/5001 [16:23<3:27:49,  2.60s/it][A
Train Diffusion:   4%|▍         | 203/5001 [16:26<3:27:49,  2.60s/it][A
Train Diffusion:   4%|▍         | 204/5001 [16:28<3:27:37,  2.60s/it][A
Train Diffusion:   4%|▍         | 205/5001 [16:31<3:27:31,  2.60s/it][A
Train Diffusion:   4%|▍         | 206/5001 [16:34<3:27:55,  2.60s/it][A
Train Diffusion:   4%|▍         | 207/5001 [16:36<3:27:29,  2.60s/it][A
Train Diffusion:   4%|▍         | 208/5001 [16:39<3:27:36,  2.60s/it][A
Train Diffusion:   4%|▍         | 209/5001 [16:41<3:27:32,  2.60s/it][A
Train Diffusion:   4%|▍         | 210/5001 [16:44<3:27:45,  2.60s/it][A
Train Diffusion:   4%|▍         | 211/5001 [16:47<3:28:12,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 341317974.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6542, 0.5130, 1.3002],
        [8.7863, 0.4950, 1.3143],
        [8.8422, 0.5050, 1.2966]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.0257,  0.9409,  0.8261],
         [ 1.0074,  0.5259,  1.8672],
         [26.7946,  0.2649,  1.1977],
         ...,
         [25.2630,  0.4348,  0.9921],
         [ 3.0217,  0.5959,  0.7359],
         [ 0.5033,  1.6008,  6.5861]],

        [[ 0.4629,  0.9543,  0.8804],
         [11.1305,  0.7251,  1.0230],
         [ 1.5838,  0.4714,  0.7980],
         ...,
         [ 3.7645,  0.1224,  0.9928],
         [39.1857,  0.0866,  1.2283],
         [ 5.6063,  0.4584,  1.1314]],

        [[ 1.4226,  0.8483,  5.0485],
         [ 0.8060,  0.8807,  1.4539],
         [ 0.7161,  0.7766,  1.0773],
         ...,
         [ 1.3143,  0.6886,  1.8236],
         [ 1.0516,  0.3175,  1.0975],
         [18.3125,  1.5462,  1.9630


Train Diffusion:   4%|▍         | 212/5001 [16:49<3:27:28,  2.60s/it][A
Train Diffusion:   4%|▍         | 213/5001 [16:52<3:27:15,  2.60s/it][A
Train Diffusion:   4%|▍         | 214/5001 [16:54<3:27:24,  2.60s/it][A
Train Diffusion:   4%|▍         | 215/5001 [16:57<3:27:01,  2.60s/it][A
Train Diffusion:   4%|▍         | 216/5001 [17:00<3:26:37,  2.59s/it][A
Train Diffusion:   4%|▍         | 217/5001 [17:02<3:27:17,  2.60s/it][A
Train Diffusion:   4%|▍         | 218/5001 [17:05<3:27:47,  2.61s/it][A
Train Diffusion:   4%|▍         | 219/5001 [17:07<3:27:34,  2.60s/it][A
Train Diffusion:   4%|▍         | 220/5001 [17:10<3:28:21,  2.61s/it][A
Train Diffusion:   4%|▍         | 221/5001 [17:13<3:30:57,  2.65s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 330181456.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6253, 0.5366, 1.3263],
        [8.6295, 0.5207, 1.3114],
        [8.7240, 0.5005, 1.3072]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[8.1040e-01, 9.6119e-01, 7.9529e-01],
         [7.3721e-01, 6.5928e-01, 1.8959e+00],
         [2.6580e+01, 2.6832e-01, 1.2038e+00],
         ...,
         [2.8816e+01, 7.4346e-02, 1.2833e+00],
         [5.6160e+00, 6.3665e-02, 1.1231e+01],
         [2.8019e+00, 2.4453e+00, 2.0231e+00]],

        [[5.3238e-01, 9.5115e-01, 8.3594e-01],
         [1.2060e+01, 7.3696e-01, 9.7573e-01],
         [1.8121e+00, 4.1977e-01, 7.8935e-01],
         ...,
         [4.2074e-01, 6.0133e-01, 7.8687e-01],
         [4.9575e-05, 1.4489e+00, 1.5771e+00],
         [2.5827e-01, 1.6873e+00, 7.8444e-01]],

        [[1.5614e+00, 8.2257e-01, 5.0046e+00],
         [8.9038e-01, 7.8554e-01, 1.2723e+00],
         [6.0410e-01, 7.8904e-01, 1.0


Train Diffusion:   4%|▍         | 222/5001 [17:16<3:43:07,  2.80s/it][A
Train Diffusion:   4%|▍         | 223/5001 [17:19<3:46:03,  2.84s/it][A
Train Diffusion:   4%|▍         | 224/5001 [17:22<3:42:14,  2.79s/it][A
Train Diffusion:   4%|▍         | 225/5001 [17:24<3:37:40,  2.73s/it][A
Train Diffusion:   5%|▍         | 226/5001 [17:27<3:34:06,  2.69s/it][A
Train Diffusion:   5%|▍         | 227/5001 [17:29<3:32:00,  2.66s/it][A
Train Diffusion:   5%|▍         | 228/5001 [17:32<3:30:28,  2.65s/it][A
Train Diffusion:   5%|▍         | 229/5001 [17:35<3:29:03,  2.63s/it][A
Train Diffusion:   5%|▍         | 230/5001 [17:37<3:28:16,  2.62s/it][A
Train Diffusion:   5%|▍         | 231/5001 [17:40<3:27:29,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 325632406.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8856, 0.5042, 1.3304],
        [8.8232, 0.4944, 1.2665],
        [8.6189, 0.4912, 1.2858]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.4489e-01, 9.5470e-01, 1.0043e+00],
         [9.0000e+00, 5.5269e-01, 2.7853e+00],
         [2.4664e+00, 1.1588e-01, 4.3499e-01],
         ...,
         [4.6989e-01, 5.8467e-01, 1.1507e+00],
         [3.4095e+01, 5.2711e-02, 1.4877e+00],
         [6.4654e+00, 2.4627e-01, 7.8618e-01]],

        [[1.1533e+00, 9.1353e-01, 2.3823e+00],
         [1.7404e+00, 9.2377e-01, 1.3931e+00],
         [7.5560e-01, 6.9445e-01, 1.2645e+00],
         ...,
         [3.2514e+01, 1.0860e-01, 1.2234e+00],
         [5.2380e+00, 1.3096e-01, 6.4546e-01],
         [4.4265e+00, 1.8740e+00, 3.2379e+00]],

        [[1.3143e+00, 8.7840e-01, 6.5791e-01],
         [1.5362e+00, 3.8199e-01, 3.3915e-01],
         [1.4842e-03, 1.8352e+00, 3.8


Train Diffusion:   5%|▍         | 232/5001 [17:42<3:27:00,  2.60s/it][A
Train Diffusion:   5%|▍         | 233/5001 [17:45<3:27:17,  2.61s/it][A
Train Diffusion:   5%|▍         | 234/5001 [17:48<3:27:26,  2.61s/it][A
Train Diffusion:   5%|▍         | 235/5001 [17:50<3:27:41,  2.61s/it][A
Train Diffusion:   5%|▍         | 236/5001 [17:53<3:27:16,  2.61s/it][A
Train Diffusion:   5%|▍         | 237/5001 [17:55<3:27:01,  2.61s/it][A
Train Diffusion:   5%|▍         | 238/5001 [17:58<3:26:58,  2.61s/it][A
Train Diffusion:   5%|▍         | 239/5001 [18:01<3:26:40,  2.60s/it][A
Train Diffusion:   5%|▍         | 240/5001 [18:03<3:26:45,  2.61s/it][A
Train Diffusion:   5%|▍         | 241/5001 [18:06<3:26:47,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 320005926.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6030, 0.4806, 1.2937],
        [8.5783, 0.4813, 1.2884],
        [8.9886, 0.4991, 1.2895]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.4799,  0.9530,  0.9543],
         [12.6711,  0.5624,  0.9795],
         [ 2.0264,  0.3973,  0.7523],
         ...,
         [ 0.2157,  0.3304,  2.0842],
         [ 0.2576,  0.6496,  0.7715],
         [25.6043,  1.2474,  2.0408]],

        [[ 1.4762,  0.8385,  4.4900],
         [ 0.7705,  0.9268,  1.3351],
         [ 0.6127,  0.7618,  1.1999],
         ...,
         [25.4518,  0.4309,  0.5565],
         [ 4.5173,  0.1124,  0.7792],
         [ 0.6736,  1.7216,  0.8149]],

        [[ 0.9521,  0.9523,  0.7907],
         [ 0.9097,  0.5772,  1.9860],
         [26.8124,  0.2558,  1.2051],
         ...,
         [ 4.4843,  0.1120,  1.1209],
         [37.5537,  0.1000,  1.3162],
         [ 5.6255,  0.4383,  1.1298


Train Diffusion:   5%|▍         | 242/5001 [18:08<3:26:46,  2.61s/it][A
Train Diffusion:   5%|▍         | 243/5001 [18:11<3:27:06,  2.61s/it][A
Train Diffusion:   5%|▍         | 244/5001 [18:14<3:26:39,  2.61s/it][A
Train Diffusion:   5%|▍         | 245/5001 [18:16<3:26:32,  2.61s/it][A
Train Diffusion:   5%|▍         | 246/5001 [18:19<3:26:43,  2.61s/it][A
Train Diffusion:   5%|▍         | 247/5001 [18:21<3:26:41,  2.61s/it][A
Train Diffusion:   5%|▍         | 248/5001 [18:24<3:26:03,  2.60s/it][A
Train Diffusion:   5%|▍         | 249/5001 [18:27<3:25:42,  2.60s/it][A
Train Diffusion:   5%|▍         | 250/5001 [18:29<3:25:48,  2.60s/it][A
Train Diffusion:   5%|▌         | 251/5001 [18:32<3:25:48,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 336479046.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7144, 0.4651, 1.3073],
        [8.7451, 0.5289, 1.3321],
        [8.6652, 0.5092, 1.3341]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[6.1470e-01, 9.5252e-01, 9.1861e-01],
         [5.7064e-01, 7.6187e-01, 1.8379e+00],
         [2.3830e+01, 3.0746e-01, 1.2297e+00],
         ...,
         [4.2686e-01, 7.8260e+00, 5.2478e-01],
         [7.9811e-06, 7.7105e-01, 3.3720e+00],
         [1.3548e-01, 1.3241e+00, 1.1426e+01]],

        [[6.8401e-01, 9.5546e-01, 1.0110e+00],
         [1.2760e+01, 4.8401e-01, 1.1712e+00],
         [2.2459e+00, 3.8013e-01, 8.0579e-01],
         ...,
         [1.0798e-04, 3.5609e-01, 3.6441e+00],
         [1.4466e-01, 1.3932e+00, 1.0534e+01],
         [5.7559e-01, 3.3884e+00, 1.6973e+00]],

        [[1.6038e+00, 8.0908e-01, 2.7993e+00],
         [1.2375e+00, 5.9912e-01, 8.1968e-01],
         [7.1319e-01, 8.0750e-01, 1.0


Train Diffusion:   5%|▌         | 252/5001 [18:34<3:26:26,  2.61s/it][A
Train Diffusion:   5%|▌         | 253/5001 [18:37<3:25:58,  2.60s/it][A
Train Diffusion:   5%|▌         | 254/5001 [18:40<3:25:37,  2.60s/it][A
Train Diffusion:   5%|▌         | 255/5001 [18:42<3:25:33,  2.60s/it][A
Train Diffusion:   5%|▌         | 256/5001 [18:45<3:25:58,  2.60s/it][A
Train Diffusion:   5%|▌         | 257/5001 [18:47<3:25:55,  2.60s/it][A
Train Diffusion:   5%|▌         | 258/5001 [18:50<3:25:31,  2.60s/it][A
Train Diffusion:   5%|▌         | 259/5001 [18:53<3:25:14,  2.60s/it][A
Train Diffusion:   5%|▌         | 260/5001 [18:55<3:25:10,  2.60s/it][A
Train Diffusion:   5%|▌         | 261/5001 [18:58<3:25:28,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 326323318.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.9089, 0.5080, 1.3051],
        [8.6498, 0.4914, 1.2867],
        [8.6316, 0.4915, 1.2797]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.5654,  0.9501,  0.9315],
         [ 0.5442,  0.8424,  1.6345],
         [25.9188,  0.2341,  1.2005],
         ...,
         [ 2.7257,  0.1868,  1.0369],
         [41.5033,  0.0817,  1.3028],
         [ 5.9328,  0.2968,  0.9811]],

        [[ 1.5838,  0.8058,  1.3196],
         [ 1.6532,  0.3880,  0.7493],
         [ 0.6807,  0.7498,  1.4885],
         ...,
         [ 1.4073,  0.1518,  1.3406],
         [ 3.5597,  0.1361,  1.2734],
         [ 0.1074,  1.7398,  2.9287]],

        [[ 0.7528,  0.9592,  1.1359],
         [13.1914,  0.3955,  1.2597],
         [ 2.6543,  0.3765,  0.6310],
         ...,
         [14.7958,  0.6222,  0.9007],
         [ 2.3782,  0.5589,  0.7577],
         [ 0.5971,  1.6071,  9.5330


Train Diffusion:   5%|▌         | 262/5001 [19:00<3:25:19,  2.60s/it][A
Train Diffusion:   5%|▌         | 263/5001 [19:03<3:25:55,  2.61s/it][A
Train Diffusion:   5%|▌         | 264/5001 [19:06<3:28:40,  2.64s/it][A
Train Diffusion:   5%|▌         | 265/5001 [19:08<3:28:26,  2.64s/it][A
Train Diffusion:   5%|▌         | 266/5001 [19:11<3:27:23,  2.63s/it][A
Train Diffusion:   5%|▌         | 267/5001 [19:14<3:29:23,  2.65s/it][A
Train Diffusion:   5%|▌         | 268/5001 [19:16<3:28:31,  2.64s/it][A
Train Diffusion:   5%|▌         | 269/5001 [19:19<3:31:24,  2.68s/it][A
Train Diffusion:   5%|▌         | 270/5001 [19:22<3:31:15,  2.68s/it][A
Train Diffusion:   5%|▌         | 271/5001 [19:24<3:29:15,  2.65s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 318812678.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6956, 0.4763, 1.2709],
        [8.8083, 0.5059, 1.3098],
        [8.8930, 0.4807, 1.2914]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.6560,  0.9549,  0.9614],
         [12.5212,  0.5330,  1.1308],
         [ 2.1454,  0.4081,  0.7770],
         ...,
         [ 8.8684,  0.1857,  1.1864],
         [ 4.8067,  0.0827,  0.7109],
         [ 0.2139,  2.2682,  1.5699]],

        [[ 0.6389,  0.9542,  0.8987],
         [ 0.5888,  0.7601,  1.8510],
         [26.5618,  0.2305,  1.2039],
         ...,
         [ 3.7073,  0.1341,  1.0611],
         [40.6071,  0.0510,  1.3613],
         [ 6.4244,  0.2120,  1.7283]],

        [[ 1.6076,  0.8093,  3.3225],
         [ 1.2110,  0.6120,  0.8604],
         [ 0.7108,  0.7966,  1.0960],
         ...,
         [ 3.9718,  1.5223,  1.1918],
         [ 0.7929,  1.1120,  0.4245],
         [18.7932,  1.7369,  1.9264


Train Diffusion:   5%|▌         | 272/5001 [19:27<3:27:39,  2.63s/it][A
Train Diffusion:   5%|▌         | 273/5001 [19:30<3:26:27,  2.62s/it][A
Train Diffusion:   5%|▌         | 274/5001 [19:32<3:26:46,  2.62s/it][A
Train Diffusion:   5%|▌         | 275/5001 [19:35<3:25:52,  2.61s/it][A
Train Diffusion:   6%|▌         | 276/5001 [19:37<3:25:16,  2.61s/it][A
Train Diffusion:   6%|▌         | 277/5001 [19:40<3:24:57,  2.60s/it][A
Train Diffusion:   6%|▌         | 278/5001 [19:43<3:24:41,  2.60s/it][A
Train Diffusion:   6%|▌         | 279/5001 [19:45<3:25:18,  2.61s/it][A
Train Diffusion:   6%|▌         | 280/5001 [19:48<3:25:20,  2.61s/it][A
Train Diffusion:   6%|▌         | 281/5001 [19:50<3:24:56,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 332864064.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.4978, 0.5124, 1.2797],
        [8.7244, 0.5194, 1.3314],
        [8.8169, 0.4859, 1.3078]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.5971,  0.9517,  0.8463],
         [11.6868,  0.7090,  0.8732],
         [ 1.8509,  0.4420,  0.7450],
         ...,
         [30.6444,  0.0569,  1.2966],
         [ 5.9826,  0.0720,  0.2606],
         [ 0.0611,  3.1201,  0.4859]],

        [[ 0.7056,  0.9567,  0.8480],
         [ 0.6405,  0.7224,  1.8748],
         [28.0650,  0.1604,  1.2023],
         ...,
         [ 0.2855,  7.9751,  0.5818],
         [25.3122,  0.2947,  0.8727],
         [ 5.8705,  0.1639,  1.7538]],

        [[ 1.6004,  0.8129,  4.5387],
         [ 1.0738,  0.6016,  0.9918],
         [ 0.7139,  0.7090,  1.1872],
         ...,
         [ 4.0808,  0.1659,  0.5816],
         [ 0.3386,  0.9955,  1.0593],
         [12.0956,  2.1228,  2.4388


Train Diffusion:   6%|▌         | 282/5001 [19:53<3:25:01,  2.61s/it][A
Train Diffusion:   6%|▌         | 283/5001 [19:56<3:24:24,  2.60s/it][A
Train Diffusion:   6%|▌         | 284/5001 [19:58<3:24:06,  2.60s/it][A
Train Diffusion:   6%|▌         | 285/5001 [20:01<3:24:12,  2.60s/it][A
Train Diffusion:   6%|▌         | 286/5001 [20:03<3:23:53,  2.59s/it][A
Train Diffusion:   6%|▌         | 287/5001 [20:06<3:23:55,  2.60s/it][A
Train Diffusion:   6%|▌         | 288/5001 [20:09<3:23:20,  2.59s/it][A
Train Diffusion:   6%|▌         | 289/5001 [20:11<3:23:38,  2.59s/it][A
Train Diffusion:   6%|▌         | 290/5001 [20:14<3:23:42,  2.59s/it][A
Train Diffusion:   6%|▌         | 291/5001 [20:16<3:23:54,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 322470496.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6491, 0.4818, 1.3399],
        [8.9074, 0.5059, 1.3246],
        [8.6242, 0.4874, 1.2830]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.7275,  0.9580,  1.0889],
         [12.5996,  0.4477,  1.2252],
         [ 2.3673,  0.4252,  0.9517],
         ...,
         [ 3.4898,  0.2387,  0.9020],
         [ 0.4794,  0.5901,  0.7973],
         [ 3.2250,  1.5178,  2.5986]],

        [[ 0.5808,  0.9509,  0.9356],
         [ 0.5719,  0.8323,  1.6944],
         [28.1176,  0.0844,  1.2051],
         ...,
         [33.6209,  0.1039,  1.2115],
         [ 5.3423,  0.1281,  0.6753],
         [ 0.7414,  1.5106,  9.2538]],

        [[ 1.5952,  0.8056,  1.9431],
         [ 1.4953,  0.4584,  0.7708],
         [ 0.7278,  0.7014,  0.8171],
         ...,
         [ 0.5244,  0.5636,  1.0146],
         [36.7797,  0.0718,  1.4063],
         [ 6.1390,  0.3026,  0.8279


Train Diffusion:   6%|▌         | 292/5001 [20:19<3:23:37,  2.59s/it][A
Train Diffusion:   6%|▌         | 293/5001 [20:22<3:23:26,  2.59s/it][A
Train Diffusion:   6%|▌         | 294/5001 [20:24<3:23:32,  2.59s/it][A
Train Diffusion:   6%|▌         | 295/5001 [20:27<3:23:54,  2.60s/it][A
Train Diffusion:   6%|▌         | 296/5001 [20:29<3:23:43,  2.60s/it][A
Train Diffusion:   6%|▌         | 297/5001 [20:32<3:23:35,  2.60s/it][A
Train Diffusion:   6%|▌         | 298/5001 [20:35<3:23:39,  2.60s/it][A
Train Diffusion:   6%|▌         | 299/5001 [20:37<3:23:35,  2.60s/it][A
Train Diffusion:   6%|▌         | 300/5001 [20:40<3:23:21,  2.60s/it][A
Train Diffusion:   6%|▌         | 301/5001 [20:42<3:23:39,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 326203590.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.5400, 0.5291, 1.3277],
        [8.6955, 0.4993, 1.3050],
        [8.8549, 0.4906, 1.3024]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[6.1877e-01, 9.5181e-01, 7.5297e-01],
         [1.0645e-01, 1.3120e+00, 4.8848e-01],
         [2.0011e+01, 3.3014e-01, 1.1578e+00],
         ...,
         [3.1656e+01, 1.0892e-01, 1.1815e+00],
         [5.1777e+00, 3.7265e-02, 7.2383e+00],
         [2.7633e+00, 9.3887e-01, 1.3259e+00]],

        [[6.8105e-01, 9.5445e-01, 6.5248e-01],
         [8.8840e+00, 1.0919e+00, 4.4140e+00],
         [8.1248e-01, 8.2938e-01, 1.0080e+00],
         ...,
         [4.1662e-01, 4.5868e-01, 1.4243e+00],
         [1.3795e-03, 5.4763e-01, 3.0709e+00],
         [1.8273e-01, 1.4681e+00, 1.1592e+01]],

        [[1.6012e+00, 7.8752e-01, 4.4149e+00],
         [1.7393e+00, 5.4962e-02, 1.0988e+00],
         [6.5994e-01, 7.2395e-01, 7.3


Train Diffusion:   6%|▌         | 302/5001 [20:45<3:23:32,  2.60s/it][A
Train Diffusion:   6%|▌         | 303/5001 [20:48<3:24:17,  2.61s/it][A
Train Diffusion:   6%|▌         | 304/5001 [20:50<3:24:00,  2.61s/it][A
Train Diffusion:   6%|▌         | 305/5001 [20:53<3:23:30,  2.60s/it][A
Train Diffusion:   6%|▌         | 306/5001 [20:55<3:23:18,  2.60s/it][A
Train Diffusion:   6%|▌         | 307/5001 [20:58<3:24:13,  2.61s/it][A
Train Diffusion:   6%|▌         | 308/5001 [21:01<3:23:37,  2.60s/it][A
Train Diffusion:   6%|▌         | 309/5001 [21:03<3:23:45,  2.61s/it][A
Train Diffusion:   6%|▌         | 310/5001 [21:06<3:24:22,  2.61s/it][A
Train Diffusion:   6%|▌         | 311/5001 [21:08<3:24:13,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 329400028.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7496, 0.5080, 1.2970],
        [8.5970, 0.4883, 1.2833],
        [8.9943, 0.4789, 1.2978]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.3428e+00, 8.6981e-01, 3.5633e+00],
         [1.4427e+00, 9.7495e-01, 1.1115e+00],
         [1.3239e+00, 1.0545e+01, 1.0053e+00],
         ...,
         [6.3795e-01, 4.4533e-01, 1.1025e+00],
         [3.6173e+01, 1.0309e-01, 1.3352e+00],
         [5.5490e+00, 2.6999e-01, 5.0771e+00]],

        [[4.4833e-01, 9.5526e-01, 1.0885e+00],
         [5.0421e-01, 6.5729e-01, 2.2290e-01],
         [2.3634e-05, 2.4065e+00, 2.9427e-01],
         ...,
         [1.1120e+00, 2.6337e-01, 2.3264e+00],
         [2.3713e-01, 5.8724e-01, 2.0658e+00],
         [1.7025e-02, 2.1853e+00, 3.0483e+00]],

        [[1.1208e+00, 9.2191e-01, 1.8491e+00],
         [4.4996e+00, 7.7344e-01, 1.3838e+00],
         [1.4587e+00, 6.0936e+00, 6.7


Train Diffusion:   6%|▌         | 312/5001 [21:11<3:23:50,  2.61s/it][A
Train Diffusion:   6%|▋         | 313/5001 [21:14<3:23:27,  2.60s/it][A
Train Diffusion:   6%|▋         | 314/5001 [21:16<3:23:27,  2.60s/it][A
Train Diffusion:   6%|▋         | 315/5001 [21:19<3:25:36,  2.63s/it][A
Train Diffusion:   6%|▋         | 316/5001 [21:22<3:26:45,  2.65s/it][A
Train Diffusion:   6%|▋         | 317/5001 [21:24<3:27:43,  2.66s/it][A
Train Diffusion:   6%|▋         | 318/5001 [21:27<3:26:11,  2.64s/it][A
Train Diffusion:   6%|▋         | 319/5001 [21:29<3:25:33,  2.63s/it][A
Train Diffusion:   6%|▋         | 320/5001 [21:32<3:24:55,  2.63s/it][A
Train Diffusion:   6%|▋         | 321/5001 [21:35<3:24:49,  2.63s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 332736080.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6930, 0.5141, 1.3025],
        [8.7174, 0.4934, 1.2878],
        [8.8110, 0.4905, 1.2740]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.4906,  0.8363,  5.4016],
         [ 0.9800,  1.1520,  1.2790],
         [ 0.5137,  5.8617,  1.8261],
         ...,
         [ 4.0405,  0.1588,  0.7967],
         [ 0.5248,  1.0883,  1.3334],
         [ 0.4085,  3.9081,  0.9726]],

        [[ 0.9340,  0.9541,  0.7889],
         [ 0.9634,  0.5187,  0.2074],
         [16.1852,  1.4584, 13.4684],
         ...,
         [ 0.5019,  0.9462,  0.8423],
         [24.7148,  0.1243,  1.2601],
         [ 5.1272,  0.2596,  3.9125]],

        [[ 0.4843,  0.9525,  0.9881],
         [ 9.5554,  0.5175,  1.6137],
         [ 1.0502,  3.5267,  2.6504],
         ...,
         [32.2707,  0.0820,  1.2524],
         [ 5.6301,  0.1095,  0.7047],
         [17.1995,  1.4237,  1.8826


Train Diffusion:   6%|▋         | 322/5001 [21:37<3:23:54,  2.61s/it][A
Train Diffusion:   6%|▋         | 323/5001 [21:40<3:23:09,  2.61s/it][A
Train Diffusion:   6%|▋         | 324/5001 [21:42<3:22:58,  2.60s/it][A
Train Diffusion:   6%|▋         | 325/5001 [21:45<3:22:58,  2.60s/it][A
Train Diffusion:   7%|▋         | 326/5001 [21:48<3:23:18,  2.61s/it][A
Train Diffusion:   7%|▋         | 327/5001 [21:50<3:23:03,  2.61s/it][A
Train Diffusion:   7%|▋         | 328/5001 [21:53<3:22:46,  2.60s/it][A
Train Diffusion:   7%|▋         | 329/5001 [21:56<3:22:48,  2.60s/it][A
Train Diffusion:   7%|▋         | 330/5001 [21:58<3:22:09,  2.60s/it][A
Train Diffusion:   7%|▋         | 331/5001 [22:01<3:22:16,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 334837305.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7702, 0.4984, 1.2852],
        [8.7012, 0.5344, 1.3091],
        [8.6816, 0.5040, 1.3226]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.6022,  0.9523,  0.8823],
         [12.6559,  0.6414,  0.9276],
         [ 2.0802,  0.4050,  0.7391],
         ...,
         [13.5351,  0.3740,  1.1810],
         [ 2.9243,  0.2255,  0.7291],
         [ 0.9025,  1.4732,  0.8657]],

        [[ 1.6015,  0.8114,  4.1919],
         [ 1.1276,  0.6527,  1.0382],
         [ 0.6194,  0.7309,  1.3145],
         ...,
         [ 0.5551,  0.3870,  1.9944],
         [ 2.0021,  0.2915,  0.8100],
         [27.2847,  1.1047,  2.0264]],

        [[ 0.7000,  0.9568,  0.8532],
         [ 0.6338,  0.7376,  1.9507],
         [27.4783,  0.2269,  1.2009],
         ...,
         [ 1.0434,  0.3392,  0.9972],
         [37.9950,  0.1429,  1.2287],
         [ 5.3156,  0.4588,  1.1974


Train Diffusion:   7%|▋         | 332/5001 [22:03<3:21:55,  2.59s/it][A
Train Diffusion:   7%|▋         | 333/5001 [22:06<3:21:39,  2.59s/it][A
Train Diffusion:   7%|▋         | 334/5001 [22:08<3:21:49,  2.59s/it][A
Train Diffusion:   7%|▋         | 335/5001 [22:11<3:21:44,  2.59s/it][A
Train Diffusion:   7%|▋         | 336/5001 [22:14<3:22:06,  2.60s/it][A
Train Diffusion:   7%|▋         | 337/5001 [22:16<3:21:35,  2.59s/it][A
Train Diffusion:   7%|▋         | 338/5001 [22:19<3:21:34,  2.59s/it][A
Train Diffusion:   7%|▋         | 339/5001 [22:21<3:21:30,  2.59s/it][A
Train Diffusion:   7%|▋         | 340/5001 [22:24<3:21:16,  2.59s/it][A
Train Diffusion:   7%|▋         | 341/5001 [22:27<3:22:00,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 319127424.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7391, 0.4937, 1.2798],
        [8.7839, 0.5004, 1.3253],
        [8.5765, 0.5356, 1.2801]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.5939e+00, 8.2400e-01, 4.6268e+00],
         [8.2043e-01, 8.0151e-01, 1.0474e+00],
         [8.2943e-01, 6.9879e-01, 1.3634e+00],
         ...,
         [4.3696e-01, 6.1058e-01, 1.0068e+00],
         [1.1670e-06, 4.7357e-01, 3.5144e+00],
         [1.3672e-01, 1.3401e+00, 8.5572e+00]],

        [[7.2311e-01, 9.5753e-01, 8.3843e-01],
         [6.9423e-01, 6.4676e-01, 1.8982e+00],
         [2.6372e+01, 2.5478e-01, 1.2026e+00],
         ...,
         [3.2628e+01, 1.7233e-01, 1.1335e+00],
         [4.6151e+00, 1.1770e-01, 1.1967e+01],
         [7.7756e-01, 2.7805e+00, 1.9914e+00]],

        [[5.8522e-01, 9.5080e-01, 9.1938e-01],
         [1.1293e+01, 5.7487e-01, 9.8936e-01],
         [1.8736e+00, 4.6108e-01, 6.6


Train Diffusion:   7%|▋         | 342/5001 [22:29<3:21:44,  2.60s/it][A
Train Diffusion:   7%|▋         | 343/5001 [22:32<3:21:58,  2.60s/it][A
Train Diffusion:   7%|▋         | 344/5001 [22:34<3:22:19,  2.61s/it][A
Train Diffusion:   7%|▋         | 345/5001 [22:37<3:22:04,  2.60s/it][A
Train Diffusion:   7%|▋         | 346/5001 [22:40<3:21:59,  2.60s/it][A
Train Diffusion:   7%|▋         | 347/5001 [22:42<3:21:41,  2.60s/it][A
Train Diffusion:   7%|▋         | 348/5001 [22:45<3:21:44,  2.60s/it][A
Train Diffusion:   7%|▋         | 349/5001 [22:47<3:21:22,  2.60s/it][A
Train Diffusion:   7%|▋         | 350/5001 [22:50<3:21:27,  2.60s/it][A
Train Diffusion:   7%|▋         | 351/5001 [22:53<3:21:30,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 327274019.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6080, 0.4881, 1.3173],
        [8.7440, 0.5002, 1.3557],
        [8.9272, 0.4939, 1.2998]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.6287e-01, 9.5352e-01, 9.4000e-01],
         [5.3822e-01, 8.2832e-01, 1.5851e+00],
         [2.6748e+01, 2.1865e-01, 1.1864e+00],
         ...,
         [1.6899e+00, 4.1811e-02, 6.7989e-01],
         [2.2035e+00, 2.4929e-01, 6.2282e-01],
         [7.8966e+00, 1.9260e+00, 5.4949e+00]],

        [[1.5882e+00, 8.0685e-01, 1.2674e+00],
         [1.6404e+00, 3.9414e-01, 7.3168e-01],
         [5.8555e-01, 8.0265e-01, 1.0989e+00],
         ...,
         [2.4268e-01, 5.9649e-01, 9.8922e-01],
         [4.3316e+01, 1.0994e-01, 1.3579e+00],
         [6.4646e+00, 2.1435e-01, 7.6720e-01]],

        [[7.5408e-01, 9.6273e-01, 1.1452e+00],
         [1.3064e+01, 3.8669e-01, 1.2675e+00],
         [2.7027e+00, 3.8948e-01, 1.1


Train Diffusion:   7%|▋         | 352/5001 [22:55<3:21:12,  2.60s/it][A
Train Diffusion:   7%|▋         | 353/5001 [22:58<3:21:12,  2.60s/it][A
Train Diffusion:   7%|▋         | 354/5001 [23:00<3:21:33,  2.60s/it][A
Train Diffusion:   7%|▋         | 355/5001 [23:03<3:21:25,  2.60s/it][A
Train Diffusion:   7%|▋         | 356/5001 [23:06<3:22:22,  2.61s/it][A
Train Diffusion:   7%|▋         | 357/5001 [23:08<3:21:53,  2.61s/it][A
Train Diffusion:   7%|▋         | 358/5001 [23:11<3:22:39,  2.62s/it][A
Train Diffusion:   7%|▋         | 359/5001 [23:14<3:22:23,  2.62s/it][A
Train Diffusion:   7%|▋         | 360/5001 [23:16<3:21:34,  2.61s/it][A
Train Diffusion:   7%|▋         | 361/5001 [23:19<3:26:30,  2.67s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 318597219.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6105, 0.4938, 1.2903],
        [8.7395, 0.4957, 1.2946],
        [8.8153, 0.4974, 1.2923]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[6.3695e-01, 9.5391e-01, 8.8932e-01],
         [1.2592e+01, 6.3478e-01, 9.8728e-01],
         [2.1217e+00, 3.9757e-01, 7.2864e-01],
         ...,
         [2.6299e+01, 2.6997e-01, 7.9505e-01],
         [5.2016e+00, 3.3724e-02, 6.1382e+00],
         [1.6097e+00, 1.1636e+00, 1.3840e+00]],

        [[1.6063e+00, 8.0802e-01, 3.9263e+00],
         [1.2124e+00, 6.1590e-01, 9.7878e-01],
         [6.3366e-01, 7.4426e-01, 1.2824e+00],
         ...,
         [3.2835e-01, 1.7424e+01, 6.0094e-01],
         [6.9187e+00, 1.0327e+00, 9.0148e-01],
         [2.4357e+00, 9.4995e-01, 7.7558e-01]],

        [[6.5881e-01, 9.5491e-01, 8.8432e-01],
         [6.0052e-01, 7.7165e-01, 1.9184e+00],
         [2.6932e+01, 2.4455e-01, 1.2


Train Diffusion:   7%|▋         | 362/5001 [23:22<3:24:58,  2.65s/it][A
Train Diffusion:   7%|▋         | 363/5001 [23:24<3:25:29,  2.66s/it][A
Train Diffusion:   7%|▋         | 364/5001 [23:27<3:27:58,  2.69s/it][A
Train Diffusion:   7%|▋         | 365/5001 [23:30<3:25:24,  2.66s/it][A
Train Diffusion:   7%|▋         | 366/5001 [23:32<3:24:19,  2.64s/it][A
Train Diffusion:   7%|▋         | 367/5001 [23:35<3:22:47,  2.63s/it][A
Train Diffusion:   7%|▋         | 368/5001 [23:37<3:21:54,  2.61s/it][A
Train Diffusion:   7%|▋         | 369/5001 [23:40<3:21:25,  2.61s/it][A
Train Diffusion:   7%|▋         | 370/5001 [23:43<3:21:13,  2.61s/it][A
Train Diffusion:   7%|▋         | 371/5001 [23:45<3:20:52,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 323718963.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6475, 0.4921, 1.2858],
        [8.8082, 0.5052, 1.3248],
        [8.5773, 0.4926, 1.2803]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.7642e-01, 9.5098e-01, 8.2691e-01],
         [1.2243e+01, 7.3945e-01, 8.1622e-01],
         [1.9255e+00, 4.1113e-01, 7.2712e-01],
         ...,
         [8.2551e-01, 4.5098e-01, 1.0280e+00],
         [3.4215e+01, 2.5098e-01, 7.2046e-01],
         [4.1019e+00, 1.4133e+00, 8.2927e+00]],

        [[1.5885e+00, 8.1357e-01, 4.6515e+00],
         [1.0783e+00, 6.6572e-01, 1.1346e+00],
         [5.9628e-01, 7.2929e-01, 1.2972e+00],
         ...,
         [3.5678e+01, 1.1843e-01, 1.1498e+00],
         [5.1583e+00, 1.5556e-01, 7.8120e-01],
         [5.1277e-01, 3.0940e+00, 1.3039e+00]],

        [[7.3681e-01, 9.5878e-01, 8.2950e-01],
         [6.5802e-01, 7.1811e-01, 1.9152e+00],
         [2.7206e+01, 2.4337e-01, 1.2


Train Diffusion:   7%|▋         | 372/5001 [23:48<3:20:46,  2.60s/it][A
Train Diffusion:   7%|▋         | 373/5001 [23:50<3:20:53,  2.60s/it][A
Train Diffusion:   7%|▋         | 374/5001 [23:53<3:20:28,  2.60s/it][A
Train Diffusion:   7%|▋         | 375/5001 [23:56<3:20:18,  2.60s/it][A
Train Diffusion:   8%|▊         | 376/5001 [23:58<3:20:14,  2.60s/it][A
Train Diffusion:   8%|▊         | 377/5001 [24:01<3:20:31,  2.60s/it][A
Train Diffusion:   8%|▊         | 378/5001 [24:03<3:22:16,  2.63s/it][A
Train Diffusion:   8%|▊         | 379/5001 [24:06<3:21:34,  2.62s/it][A
Train Diffusion:   8%|▊         | 380/5001 [24:09<3:20:46,  2.61s/it][A
Train Diffusion:   8%|▊         | 381/5001 [24:11<3:22:21,  2.63s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 334086345.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6408, 0.4890, 1.3054],
        [8.7441, 0.4872, 1.2806],
        [8.8472, 0.5018, 1.2970]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.4361,  0.8460,  0.6179],
         [ 1.7379,  0.3384,  1.4491],
         [23.6797,  0.3082,  1.1837],
         ...,
         [ 1.6759,  0.2456,  0.7745],
         [ 0.5036,  2.6263,  2.0474],
         [11.0248,  2.4474,  1.9224]],

        [[ 1.0080,  0.9433,  1.6393],
         [ 3.7422,  0.7282,  1.4957],
         [ 1.0086,  0.9246,  0.6996],
         ...,
         [33.4072,  0.1286,  1.1553],
         [ 5.4170,  0.1429,  0.6386],
         [ 0.4535,  2.0781,  0.7887]],

        [[ 0.4657,  0.9541,  0.9402],
         [ 6.5190,  0.6950,  2.3207],
         [ 1.1752,  0.6407,  0.9840],
         ...,
         [ 1.1768,  0.3705,  0.9016],
         [33.7059,  0.1734,  1.2915],
         [ 5.1247,  0.4169,  4.0166


Train Diffusion:   8%|▊         | 382/5001 [24:14<3:21:14,  2.61s/it][A
Train Diffusion:   8%|▊         | 383/5001 [24:16<3:20:32,  2.61s/it][A
Train Diffusion:   8%|▊         | 384/5001 [24:19<3:20:14,  2.60s/it][A
Train Diffusion:   8%|▊         | 385/5001 [24:22<3:19:57,  2.60s/it][A
Train Diffusion:   8%|▊         | 386/5001 [24:24<3:20:17,  2.60s/it][A
Train Diffusion:   8%|▊         | 387/5001 [24:27<3:19:50,  2.60s/it][A
Train Diffusion:   8%|▊         | 388/5001 [24:29<3:19:25,  2.59s/it][A
Train Diffusion:   8%|▊         | 389/5001 [24:32<3:20:18,  2.61s/it][A
Train Diffusion:   8%|▊         | 390/5001 [24:35<3:19:45,  2.60s/it][A
Train Diffusion:   8%|▊         | 391/5001 [24:37<3:19:27,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 341988905.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7448, 0.4710, 1.2969],
        [8.8846, 0.4881, 1.2735],
        [8.6688, 0.5016, 1.3123]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.5792,  0.9517,  0.9351],
         [ 0.5764,  0.8372,  1.7325],
         [26.6194,  0.2323,  1.2052],
         ...,
         [ 3.6900,  0.2747,  1.1675],
         [ 0.3177,  0.7956,  6.4682],
         [ 1.7368,  1.8006,  1.6956]],

        [[ 0.7298,  0.9591,  1.0924],
         [13.1868,  0.4310,  1.2287],
         [ 2.5195,  0.3887,  0.6783],
         ...,
         [ 0.4362,  0.6331,  1.1943],
         [ 0.2285,  0.6819,  2.1743],
         [ 0.5023,  1.4688,  0.6927]],

        [[ 1.5935,  0.8057,  1.9015],
         [ 1.5144,  0.4545,  0.7914],
         [ 0.6794,  0.7366,  1.4290],
         ...,
         [27.5728,  0.1201,  1.2921],
         [ 5.2043,  0.1514,  0.5674],
         [16.9712,  1.7703,  1.8208


Train Diffusion:   8%|▊         | 392/5001 [24:40<3:19:17,  2.59s/it][A
Train Diffusion:   8%|▊         | 393/5001 [24:42<3:19:36,  2.60s/it][A
Train Diffusion:   8%|▊         | 394/5001 [24:45<3:19:32,  2.60s/it][A
Train Diffusion:   8%|▊         | 395/5001 [24:48<3:19:58,  2.60s/it][A
Train Diffusion:   8%|▊         | 396/5001 [24:50<3:19:11,  2.60s/it][A
Train Diffusion:   8%|▊         | 397/5001 [24:53<3:19:04,  2.59s/it][A
Train Diffusion:   8%|▊         | 398/5001 [24:55<3:18:49,  2.59s/it][A
Train Diffusion:   8%|▊         | 399/5001 [24:58<3:18:47,  2.59s/it][A
Train Diffusion:   8%|▊         | 400/5001 [25:01<3:18:45,  2.59s/it][A
Train Diffusion:   8%|▊         | 401/5001 [25:03<3:18:55,  2.59s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 320228432.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7280, 0.5023, 1.2993],
        [8.7820, 0.5018, 1.2884],
        [8.9543, 0.4664, 1.2888]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.6697,  0.9569,  0.8765],
         [ 0.6107,  0.7581,  1.8835],
         [26.8161,  0.2559,  1.2011],
         ...,
         [ 0.5421,  0.9239,  2.0933],
         [ 0.2338,  1.3658,  0.6554],
         [ 1.7904,  2.2848,  1.0933]],

        [[ 0.6258,  0.9549,  0.8794],
         [12.2335,  0.6507,  1.0099],
         [ 2.0051,  0.4159,  0.7568],
         ...,
         [ 0.1995,  0.5957,  0.7419],
         [36.6025,  0.1481,  1.0194],
         [ 6.2152,  0.2260,  0.7899]],

        [[ 1.6069,  0.8101,  4.0773],
         [ 1.1738,  0.6070,  0.9551],
         [ 0.6749,  0.7678,  1.1749],
         ...,
         [ 4.5638,  0.4535,  0.1548],
         [ 4.8154,  0.1077,  0.8265],
         [18.8831,  1.4124,  1.9942


Train Diffusion:   8%|▊         | 402/5001 [25:06<3:19:24,  2.60s/it][A
Train Diffusion:   8%|▊         | 403/5001 [25:09<3:25:06,  2.68s/it][A
Train Diffusion:   8%|▊         | 404/5001 [25:12<3:28:58,  2.73s/it][A
Train Diffusion:   8%|▊         | 405/5001 [25:14<3:28:03,  2.72s/it][A
Train Diffusion:   8%|▊         | 406/5001 [25:17<3:27:18,  2.71s/it][A
Train Diffusion:   8%|▊         | 407/5001 [25:20<3:33:25,  2.79s/it][A
Train Diffusion:   8%|▊         | 408/5001 [25:23<3:38:19,  2.85s/it][A
Train Diffusion:   8%|▊         | 409/5001 [25:25<3:32:14,  2.77s/it][A
Train Diffusion:   8%|▊         | 410/5001 [25:28<3:34:47,  2.81s/it][A
Train Diffusion:   8%|▊         | 411/5001 [25:31<3:30:08,  2.75s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 339474550.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6520, 0.5071, 1.3000],
        [8.8244, 0.5042, 1.2649],
        [8.7482, 0.5011, 1.3197]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[7.1892e-01, 9.5849e-01, 1.0552e+00],
         [1.2167e+01, 4.7963e-01, 1.1989e+00],
         [2.2389e+00, 4.7393e-01, 1.2089e+00],
         ...,
         [1.6394e+01, 3.7267e-01, 6.2820e-01],
         [4.5751e+00, 1.3196e-01, 6.3033e-01],
         [9.7098e-01, 2.2096e+00, 9.5379e-01]],

        [[5.8691e-01, 9.5197e-01, 9.3622e-01],
         [5.6662e-01, 8.3105e-01, 1.7062e+00],
         [2.8260e+01, 1.6296e-01, 1.1967e+00],
         ...,
         [2.3136e-01, 5.3736e-01, 1.2102e+00],
         [3.5939e+01, 5.9465e-02, 1.4260e+00],
         [6.2854e+00, 1.3436e-01, 8.0530e+00]],

        [[1.5962e+00, 8.0613e-01, 2.3065e+00],
         [1.4270e+00, 4.9846e-01, 7.7454e-01],
         [7.4137e-01, 6.8753e-01, 8.5


Train Diffusion:   8%|▊         | 412/5001 [25:34<3:27:01,  2.71s/it][A
Train Diffusion:   8%|▊         | 413/5001 [25:36<3:25:20,  2.69s/it][A
Train Diffusion:   8%|▊         | 414/5001 [25:39<3:23:15,  2.66s/it][A
Train Diffusion:   8%|▊         | 415/5001 [25:42<3:25:35,  2.69s/it][A
Train Diffusion:   8%|▊         | 416/5001 [25:44<3:24:20,  2.67s/it][A
Train Diffusion:   8%|▊         | 417/5001 [25:47<3:24:58,  2.68s/it][A
Train Diffusion:   8%|▊         | 418/5001 [25:50<3:23:51,  2.67s/it][A
Train Diffusion:   8%|▊         | 419/5001 [25:52<3:24:02,  2.67s/it][A
Train Diffusion:   8%|▊         | 420/5001 [25:55<3:23:01,  2.66s/it][A
Train Diffusion:   8%|▊         | 421/5001 [25:58<3:23:23,  2.66s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 320970158.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6587, 0.5186, 1.3013],
        [8.9482, 0.4803, 1.2886],
        [8.6938, 0.4929, 1.2745]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.6706,  0.9562,  0.8748],
         [ 0.6128,  0.7505,  1.9515],
         [26.7921,  0.2475,  1.2060],
         ...,
         [ 0.5442,  0.6526,  1.0526],
         [33.4166,  0.1751,  1.1972],
         [ 4.5956,  0.9725,  0.7612]],

        [[ 1.6081,  0.8106,  3.8712],
         [ 1.1673,  0.6472,  0.9920],
         [ 0.6321,  0.7477,  1.2785],
         ...,
         [ 3.9909,  0.1572,  0.7987],
         [ 0.9116,  0.3702,  0.5185],
         [24.2937,  1.7932,  1.7933]],

        [[ 0.6244,  0.9542,  0.9079],
         [12.8437,  0.6048,  1.0156],
         [ 2.1676,  0.3942,  0.7278],
         ...,
         [30.8010,  0.2369,  1.0489],
         [ 4.0876,  0.2432,  3.9126],
         [ 1.2071,  1.8389,  1.6253


Train Diffusion:   8%|▊         | 422/5001 [26:00<3:23:02,  2.66s/it][A
Train Diffusion:   8%|▊         | 423/5001 [26:03<3:23:08,  2.66s/it][A
Train Diffusion:   8%|▊         | 424/5001 [26:05<3:22:31,  2.65s/it][A
Train Diffusion:   8%|▊         | 425/5001 [26:08<3:21:55,  2.65s/it][A
Train Diffusion:   9%|▊         | 426/5001 [26:11<3:22:06,  2.65s/it][A
Train Diffusion:   9%|▊         | 427/5001 [26:13<3:21:47,  2.65s/it][A
Train Diffusion:   9%|▊         | 428/5001 [26:16<3:21:20,  2.64s/it][A
Train Diffusion:   9%|▊         | 429/5001 [26:19<3:22:03,  2.65s/it][A
Train Diffusion:   9%|▊         | 430/5001 [26:21<3:22:25,  2.66s/it][A
Train Diffusion:   9%|▊         | 431/5001 [26:24<3:23:16,  2.67s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 336634604.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8369, 0.4828, 1.3233],
        [8.8360, 0.4750, 1.2937],
        [8.6128, 0.5033, 1.2612]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.5803,  0.8086,  1.4006],
         [ 1.6441,  0.3878,  0.7396],
         [ 0.5837,  0.8458,  1.0418],
         ...,
         [ 3.9975,  0.1616,  0.7939],
         [ 0.2825,  3.3120,  1.2100],
         [ 0.8013,  2.7031,  0.6470]],

        [[ 0.7696,  0.9624,  1.1434],
         [12.6344,  0.3912,  1.2657],
         [ 2.6689,  0.4088,  1.0421],
         ...,
         [ 0.4654,  0.6551,  0.5691],
         [32.8168,  0.4457,  0.9179],
         [ 5.7233,  0.1185,  7.4763]],

        [[ 0.5542,  0.9526,  0.9413],
         [ 0.5267,  0.8579,  1.5050],
         [26.1244,  0.2451,  1.1659],
         ...,
         [30.0073,  0.0855,  1.2769],
         [ 5.5345,  0.1042,  2.3661],
         [ 0.0879,  1.8833,  3.0901


Train Diffusion:   9%|▊         | 432/5001 [26:27<3:23:20,  2.67s/it][A
Train Diffusion:   9%|▊         | 433/5001 [26:29<3:24:03,  2.68s/it][A
Train Diffusion:   9%|▊         | 434/5001 [26:32<3:23:43,  2.68s/it][A
Train Diffusion:   9%|▊         | 435/5001 [26:35<3:22:21,  2.66s/it][A
Train Diffusion:   9%|▊         | 436/5001 [26:37<3:22:15,  2.66s/it][A
Train Diffusion:   9%|▊         | 437/5001 [26:40<3:22:10,  2.66s/it][A
Train Diffusion:   9%|▉         | 438/5001 [26:43<3:21:35,  2.65s/it][A
Train Diffusion:   9%|▉         | 439/5001 [26:45<3:21:41,  2.65s/it][A
Train Diffusion:   9%|▉         | 440/5001 [26:48<3:21:42,  2.65s/it][A
Train Diffusion:   9%|▉         | 441/5001 [26:51<3:21:37,  2.65s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 335513132.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8370, 0.4629, 1.2875],
        [8.7556, 0.5071, 1.3044],
        [8.6856, 0.5120, 1.2774]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[6.9795e-01, 9.5682e-01, 8.5670e-01],
         [7.1258e-01, 5.9706e-01, 2.0733e-01],
         [3.9437e-03, 2.3500e+00, 2.2815e-01],
         ...,
         [1.0106e+00, 4.9653e-01, 5.5227e-01],
         [2.7447e+00, 1.3875e+00, 1.7218e+00],
         [2.7721e-01, 2.5638e+00, 6.3613e-01]],

        [[6.0356e-01, 9.5256e-01, 9.1587e-01],
         [9.8379e+00, 5.4178e-01, 1.5422e+00],
         [2.1842e+00, 6.4652e-01, 6.5509e-01],
         ...,
         [3.1704e-02, 1.1125e+00, 2.3565e+00],
         [2.7803e-01, 5.2928e-01, 1.2676e+00],
         [4.9315e+00, 1.6894e+00, 2.4702e+00]],

        [[1.6014e+00, 8.4212e-01, 5.7145e+00],
         [8.4452e-01, 1.1415e+00, 1.2603e+00],
         [4.8693e-01, 8.2678e+00, 1.9


Train Diffusion:   9%|▉         | 442/5001 [26:54<3:26:57,  2.72s/it][A
Train Diffusion:   9%|▉         | 443/5001 [26:56<3:27:10,  2.73s/it][A
Train Diffusion:   9%|▉         | 444/5001 [26:59<3:27:39,  2.73s/it][A
Train Diffusion:   9%|▉         | 445/5001 [27:02<3:28:43,  2.75s/it][A
Train Diffusion:   9%|▉         | 446/5001 [27:05<3:28:46,  2.75s/it][A
Train Diffusion:   9%|▉         | 447/5001 [27:07<3:29:44,  2.76s/it][A
Train Diffusion:   9%|▉         | 448/5001 [27:10<3:32:12,  2.80s/it][A
Train Diffusion:   9%|▉         | 449/5001 [27:13<3:31:24,  2.79s/it][A
Train Diffusion:   9%|▉         | 450/5001 [27:16<3:30:55,  2.78s/it][A
Train Diffusion:   9%|▉         | 451/5001 [27:19<3:30:43,  2.78s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 329816507.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.5265, 0.5147, 1.3138],
        [8.6304, 0.5022, 1.3031],
        [9.0033, 0.4707, 1.2850]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.2474e-01, 9.5228e-01, 9.2697e-01],
         [9.7836e-01, 7.2865e-01, 2.3161e+00],
         [1.0782e+01, 3.8214e-01, 9.5262e-01],
         ...,
         [3.8204e-01, 6.5764e-01, 6.7244e-01],
         [1.0661e-05, 7.2848e-01, 2.1598e+00],
         [1.9300e-01, 1.3109e+00, 1.3145e+01]],

        [[8.2746e-01, 9.6230e-01, 1.1940e+00],
         [1.0335e+01, 3.7959e-01, 1.2706e+00],
         [2.4696e+00, 3.1514e-01, 6.8098e-01],
         ...,
         [3.0816e+00, 1.8472e-01, 7.5501e-01],
         [1.2615e+01, 1.6112e+00, 5.2022e+00],
         [6.2862e-01, 3.8945e+00, 2.1512e+00]],

        [[1.5536e+00, 8.1718e-01, 5.8194e-01],
         [1.8091e+00, 3.5397e-01, 6.2125e-01],
         [1.1594e-03, 1.0388e+00, 8.6


Train Diffusion:   9%|▉         | 452/5001 [27:21<3:30:00,  2.77s/it][A
Train Diffusion:   9%|▉         | 453/5001 [27:24<3:30:55,  2.78s/it][A
Train Diffusion:   9%|▉         | 454/5001 [27:27<3:36:57,  2.86s/it][A
Train Diffusion:   9%|▉         | 455/5001 [27:30<3:40:51,  2.92s/it][A
Train Diffusion:   9%|▉         | 456/5001 [27:33<3:34:55,  2.84s/it][A
Train Diffusion:   9%|▉         | 457/5001 [27:35<3:29:59,  2.77s/it][A
Train Diffusion:   9%|▉         | 458/5001 [27:38<3:27:39,  2.74s/it][A
Train Diffusion:   9%|▉         | 459/5001 [27:41<3:25:21,  2.71s/it][A
Train Diffusion:   9%|▉         | 460/5001 [27:43<3:25:14,  2.71s/it][A
Train Diffusion:   9%|▉         | 461/5001 [27:46<3:26:35,  2.73s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 334621331.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.9611, 0.5227, 1.2870],
        [8.6952, 0.5148, 1.2924],
        [8.6280, 0.4875, 1.3114]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.6488,  0.9542,  0.8873],
         [ 0.6029,  0.7293,  1.9278],
         [26.5611,  0.2589,  1.2024],
         ...,
         [ 0.4364,  0.6814,  0.7756],
         [32.2546,  0.2238,  0.8836],
         [ 4.7822,  0.8637,  0.7171]],

        [[ 1.6096,  0.8126,  3.4335],
         [ 1.1131,  0.6673,  0.9184],
         [ 0.6851,  0.7874,  1.1200],
         ...,
         [ 3.4412,  0.1471,  0.8876],
         [ 0.4404,  0.7948,  0.5333],
         [10.2233,  2.5905,  1.1467]],

        [[ 0.6450,  0.9541,  0.9629],
         [12.8785,  0.5185,  1.1342],
         [ 2.2072,  0.3967,  0.7772],
         ...,
         [34.5382,  0.1094,  1.1721],
         [ 5.2457,  0.1224,  2.3708],
         [ 1.0189,  0.3408,  1.1958


Train Diffusion:   9%|▉         | 462/5001 [27:49<3:25:05,  2.71s/it][A
Train Diffusion:   9%|▉         | 463/5001 [27:52<3:26:21,  2.73s/it][A
Train Diffusion:   9%|▉         | 464/5001 [27:55<3:28:20,  2.76s/it][A
Train Diffusion:   9%|▉         | 465/5001 [27:57<3:31:10,  2.79s/it][A
Train Diffusion:   9%|▉         | 466/5001 [28:00<3:36:04,  2.86s/it][A
Train Diffusion:   9%|▉         | 467/5001 [28:03<3:34:23,  2.84s/it][A
Train Diffusion:   9%|▉         | 468/5001 [28:06<3:36:53,  2.87s/it][A
Train Diffusion:   9%|▉         | 469/5001 [28:09<3:36:03,  2.86s/it][A
Train Diffusion:   9%|▉         | 470/5001 [28:12<3:35:36,  2.86s/it][A
Train Diffusion:   9%|▉         | 471/5001 [28:15<3:36:26,  2.87s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 337320716.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.9333, 0.4761, 1.3112],
        [8.6615, 0.5141, 1.2826],
        [8.6920, 0.4692, 1.3261]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.4615e-01, 9.5510e-01, 8.1270e-01],
         [8.9333e+00, 9.6897e-01, 2.2039e+00],
         [1.0213e+00, 7.0436e-01, 9.9736e-01],
         ...,
         [2.0636e+00, 7.4293e-01, 1.3003e+00],
         [3.7222e-01, 2.0111e+00, 7.9832e+00],
         [2.3656e-01, 3.6192e+00, 2.4807e+00]],

        [[1.3246e+00, 8.7567e-01, 4.9358e+00],
         [1.2002e+00, 7.8054e-01, 1.5215e+00],
         [9.8506e-01, 8.2798e-01, 7.5336e-01],
         ...,
         [2.6624e-01, 6.1012e-01, 9.5309e-01],
         [1.7599e-06, 5.5878e-01, 2.9605e+00],
         [1.5409e-01, 1.2633e+00, 1.1047e+01]],

        [[1.1417e+00, 9.1724e-01, 9.3048e-01],
         [1.2137e+00, 4.4374e-01, 1.5093e+00],
         [2.3980e+01, 3.0128e-01, 1.1


Train Diffusion:   9%|▉         | 472/5001 [28:18<3:36:10,  2.86s/it][A
Train Diffusion:   9%|▉         | 473/5001 [28:20<3:31:02,  2.80s/it][A
Train Diffusion:   9%|▉         | 474/5001 [28:23<3:27:53,  2.76s/it][A
Train Diffusion:   9%|▉         | 475/5001 [28:26<3:25:13,  2.72s/it][A
Train Diffusion:  10%|▉         | 476/5001 [28:28<3:23:37,  2.70s/it][A
Train Diffusion:  10%|▉         | 477/5001 [28:31<3:22:30,  2.69s/it][A
Train Diffusion:  10%|▉         | 478/5001 [28:33<3:22:06,  2.68s/it][A
Train Diffusion:  10%|▉         | 479/5001 [28:36<3:21:06,  2.67s/it][A
Train Diffusion:  10%|▉         | 480/5001 [28:39<3:20:35,  2.66s/it][A
Train Diffusion:  10%|▉         | 481/5001 [28:41<3:20:25,  2.66s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 324165712.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7373, 0.5099, 1.3181],
        [8.7408, 0.4945, 1.2606],
        [8.7779, 0.4930, 1.3223]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.8788,  0.9596,  1.3121],
         [ 9.2118,  0.4807,  1.3071],
         [ 2.0816,  0.6626,  0.6757],
         ...,
         [ 4.1611,  0.1272,  0.6560],
         [ 0.1558,  5.0498,  1.2717],
         [20.9327,  1.3947,  2.0318]],

        [[ 0.5039,  0.9516,  0.9402],
         [ 1.4964,  0.7884,  0.8326],
         [ 0.7438,  0.7261,  1.0271],
         ...,
         [ 0.2028,  8.9279,  0.7057],
         [24.3364,  0.6947,  0.5573],
         [ 5.0383,  0.3245,  1.1206]],

        [[ 1.5226,  0.8282,  0.7367],
         [ 1.8062,  0.3357,  1.1885],
         [21.2682,  0.3271,  1.2713],
         ...,
         [28.3213,  0.0742,  1.3017],
         [ 5.7733,  0.0749,  0.6868],
         [ 0.5980,  1.9487,  0.7759


Train Diffusion:  10%|▉         | 482/5001 [28:44<3:20:11,  2.66s/it][A
Train Diffusion:  10%|▉         | 483/5001 [28:47<3:20:16,  2.66s/it][A
Train Diffusion:  10%|▉         | 484/5001 [28:49<3:19:54,  2.66s/it][A
Train Diffusion:  10%|▉         | 485/5001 [28:52<3:20:14,  2.66s/it][A
Train Diffusion:  10%|▉         | 486/5001 [28:55<3:20:02,  2.66s/it][A
Train Diffusion:  10%|▉         | 487/5001 [28:57<3:19:56,  2.66s/it][A
Train Diffusion:  10%|▉         | 488/5001 [29:00<3:21:19,  2.68s/it][A
Train Diffusion:  10%|▉         | 489/5001 [29:03<3:23:12,  2.70s/it][A
Train Diffusion:  10%|▉         | 490/5001 [29:05<3:21:39,  2.68s/it][A
Train Diffusion:  10%|▉         | 491/5001 [29:08<3:21:29,  2.68s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 313947689.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.9841, 0.4679, 1.2740],
        [8.8128, 0.4841, 1.2877],
        [8.6578, 0.4941, 1.3058]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.8110,  0.9632,  1.1989],
         [12.2707,  0.3347,  1.2819],
         [ 2.9082,  0.3737,  0.8066],
         ...,
         [31.3395,  0.1189,  1.2173],
         [ 5.4407,  0.1277,  0.6546],
         [ 1.2066,  0.7158,  1.1805]],

        [[ 1.5617,  0.8140,  0.7567],
         [ 1.7981,  0.3489,  0.8717],
         [ 5.1869,  0.5534,  1.2162],
         ...,
         [ 0.6108,  0.5982,  0.9250],
         [35.8769,  0.1726,  0.8777],
         [ 5.0218,  0.8569,  0.7652]],

        [[ 0.5324,  0.9533,  0.9206],
         [ 0.5939,  0.7932,  0.8470],
         [ 9.2965,  0.5050,  0.8606],
         ...,
         [ 3.2817,  0.1784,  0.8652],
         [ 0.4830,  0.6152,  0.3100],
         [ 0.6250,  3.3993,  0.6295


Train Diffusion:  10%|▉         | 492/5001 [29:11<3:21:12,  2.68s/it][A
Train Diffusion:  10%|▉         | 493/5001 [29:14<3:21:40,  2.68s/it][A
Train Diffusion:  10%|▉         | 494/5001 [29:16<3:19:53,  2.66s/it][A
Train Diffusion:  10%|▉         | 495/5001 [29:19<3:18:30,  2.64s/it][A
Train Diffusion:  10%|▉         | 496/5001 [29:21<3:16:59,  2.62s/it][A
Train Diffusion:  10%|▉         | 497/5001 [29:24<3:16:33,  2.62s/it][A
Train Diffusion:  10%|▉         | 498/5001 [29:27<3:15:48,  2.61s/it][A
Train Diffusion:  10%|▉         | 499/5001 [29:29<3:17:24,  2.63s/it][A
Train Diffusion:  10%|▉         | 500/5001 [29:32<3:18:43,  2.65s/it][A
Train Diffusion:  10%|█         | 501/5001 [29:35<3:20:47,  2.68s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 325859331.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.9460, 0.4739, 1.2951],
        [8.8910, 0.4807, 1.3032],
        [8.6892, 0.4980, 1.3124]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.5997,  0.8167,  4.6024],
         [ 1.0535,  0.6881,  1.1150],
         [ 0.6141,  0.7340,  1.2809],
         ...,
         [ 0.2886,  2.6474,  6.4863],
         [ 0.3689,  2.3909,  1.1942],
         [ 0.2912,  1.4121,  9.4005]],

        [[ 0.7204,  0.9605,  0.8417],
         [ 0.6529,  0.7143,  1.9518],
         [27.6443,  0.2287,  1.2003],
         ...,
         [ 0.6710,  2.4251,  1.0474],
         [ 0.4174,  1.1579,  9.6733],
         [ 0.1057,  2.5858,  3.0784]],

        [[ 0.5851,  0.9538,  0.8497],
         [12.3063,  0.7030,  0.8761],
         [ 1.9664,  0.4090,  0.7696],
         ...,
         [ 2.6495,  2.1983,  2.0785],
         [ 7.7819,  0.0904,  1.2238],
         [ 4.5363,  0.5837,  1.0629


Train Diffusion:  10%|█         | 502/5001 [29:37<3:19:15,  2.66s/it][A
Train Diffusion:  10%|█         | 503/5001 [29:40<3:17:31,  2.63s/it][A
Train Diffusion:  10%|█         | 504/5001 [29:42<3:16:28,  2.62s/it][A
Train Diffusion:  10%|█         | 505/5001 [29:45<3:15:37,  2.61s/it][A
Train Diffusion:  10%|█         | 506/5001 [29:48<3:15:02,  2.60s/it][A
Train Diffusion:  10%|█         | 507/5001 [29:50<3:15:00,  2.60s/it][A
Train Diffusion:  10%|█         | 508/5001 [29:53<3:14:37,  2.60s/it][A
Train Diffusion:  10%|█         | 509/5001 [29:55<3:14:08,  2.59s/it][A
Train Diffusion:  10%|█         | 510/5001 [29:58<3:14:13,  2.59s/it][A
Train Diffusion:  10%|█         | 511/5001 [30:01<3:15:02,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 324635849.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8277, 0.5095, 1.3030],
        [8.6252, 0.4965, 1.3087],
        [8.8555, 0.4741, 1.3090]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.5145e-01, 9.5152e-01, 9.2543e-01],
         [5.1442e-01, 8.3958e-01, 1.2401e+00],
         [1.9161e+01, 3.3035e-01, 1.0487e+00],
         ...,
         [4.1891e-01, 4.3792e+00, 1.8973e+00],
         [1.9778e+00, 3.7398e-01, 5.6040e-02],
         [5.1116e+00, 2.7263e-01, 9.0122e-01]],

        [[1.5789e+00, 8.0783e-01, 1.1134e+00],
         [1.7367e+00, 3.6116e-01, 7.0649e-01],
         [1.3592e+00, 7.5516e-01, 1.0124e+00],
         ...,
         [2.1025e+01, 9.0834e-02, 1.3105e+00],
         [4.6577e+00, 9.1496e-02, 7.8621e-01],
         [3.5101e-01, 2.4753e+00, 1.6229e+00]],

        [[7.7371e-01, 9.6135e-01, 1.1584e+00],
         [1.3105e+01, 3.3408e-01, 1.2761e+00],
         [2.9565e+00, 3.5729e-01, 8.8


Train Diffusion:  10%|█         | 512/5001 [30:03<3:14:53,  2.60s/it][A
Train Diffusion:  10%|█         | 513/5001 [30:06<3:15:08,  2.61s/it][A
Train Diffusion:  10%|█         | 514/5001 [30:08<3:14:50,  2.61s/it][A
Train Diffusion:  10%|█         | 515/5001 [30:11<3:14:47,  2.61s/it][A
Train Diffusion:  10%|█         | 516/5001 [30:14<3:14:14,  2.60s/it][A
Train Diffusion:  10%|█         | 517/5001 [30:16<3:13:50,  2.59s/it][A
Train Diffusion:  10%|█         | 518/5001 [30:19<3:13:38,  2.59s/it][A
Train Diffusion:  10%|█         | 519/5001 [30:21<3:14:01,  2.60s/it][A
Train Diffusion:  10%|█         | 520/5001 [30:24<3:15:02,  2.61s/it][A
Train Diffusion:  10%|█         | 521/5001 [30:27<3:14:38,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 316189619.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8367, 0.4932, 1.2896],
        [8.6391, 0.5029, 1.2857],
        [8.6508, 0.4965, 1.3088]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[7.1541e-01, 9.5692e-01, 1.0632e+00],
         [1.2084e+01, 4.7462e-01, 1.2095e+00],
         [2.2061e+00, 5.0543e-01, 1.1493e+00],
         ...,
         [1.3226e+00, 3.7853e-01, 7.2594e-01],
         [1.2310e-01, 1.3222e+00, 1.3541e+00],
         [1.9404e+01, 1.4380e+00, 2.0420e+00]],

        [[5.9027e-01, 9.5078e-01, 9.3929e-01],
         [5.5355e-01, 7.5809e-01, 1.7710e+00],
         [2.8279e+01, 1.6895e-01, 1.1936e+00],
         ...,
         [2.8565e-02, 4.3650e+00, 2.0969e+00],
         [2.3240e+01, 5.7241e-01, 6.0931e-01],
         [5.5273e+00, 2.4350e-01, 1.0572e+00]],

        [[1.5956e+00, 8.1012e-01, 2.2396e+00],
         [1.2791e+00, 5.4339e-01, 7.7729e-01],
         [7.5431e-01, 6.9931e-01, 9.2


Train Diffusion:  10%|█         | 522/5001 [30:29<3:14:30,  2.61s/it][A
Train Diffusion:  10%|█         | 523/5001 [30:32<3:14:15,  2.60s/it][A
Train Diffusion:  10%|█         | 524/5001 [30:34<3:14:02,  2.60s/it][A
Train Diffusion:  10%|█         | 525/5001 [30:37<3:14:06,  2.60s/it][A
Train Diffusion:  11%|█         | 526/5001 [30:40<3:14:23,  2.61s/it][A
Train Diffusion:  11%|█         | 527/5001 [30:42<3:14:26,  2.61s/it][A
Train Diffusion:  11%|█         | 528/5001 [30:45<3:14:05,  2.60s/it][A
Train Diffusion:  11%|█         | 529/5001 [30:47<3:13:39,  2.60s/it][A
Train Diffusion:  11%|█         | 530/5001 [30:50<3:14:31,  2.61s/it][A
Train Diffusion:  11%|█         | 531/5001 [30:53<3:16:28,  2.64s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 319303056.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7178, 0.4937, 1.3166],
        [8.5660, 0.5188, 1.3054],
        [8.9734, 0.4785, 1.2897]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.7565e-01, 9.5253e-01, 7.3226e-01],
         [8.4831e+00, 1.1670e+00, 2.6999e+00],
         [7.6691e-01, 8.5248e-01, 8.9435e-01],
         ...,
         [2.5839e+01, 1.0305e-01, 1.3368e+00],
         [5.3329e+00, 1.2805e-01, 4.8851e-01],
         [9.9567e+00, 1.9077e+00, 4.3196e+00]],

        [[9.6815e-01, 9.4903e-01, 7.9604e-01],
         [9.2142e-01, 5.4126e-01, 1.2213e+00],
         [1.6834e+01, 3.5268e-01, 1.1801e+00],
         ...,
         [2.3247e+00, 3.8607e-01, 9.5093e-01],
         [4.3473e-01, 6.0717e-01, 7.3723e-01],
         [2.8727e-02, 1.6040e+00, 2.6974e+00]],

        [[1.4643e+00, 8.4008e-01, 5.5966e+00],
         [8.6573e-01, 2.7306e-01, 1.1683e+00],
         [1.3471e+00, 6.4520e-01, 7.0


Train Diffusion:  11%|█         | 532/5001 [30:55<3:15:19,  2.62s/it][A
Train Diffusion:  11%|█         | 533/5001 [30:58<3:14:34,  2.61s/it][A
Train Diffusion:  11%|█         | 534/5001 [31:01<3:14:05,  2.61s/it][A
Train Diffusion:  11%|█         | 535/5001 [31:03<3:13:57,  2.61s/it][A
Train Diffusion:  11%|█         | 536/5001 [31:06<3:13:39,  2.60s/it][A
Train Diffusion:  11%|█         | 537/5001 [31:08<3:14:33,  2.62s/it][A
Train Diffusion:  11%|█         | 538/5001 [31:11<3:13:45,  2.60s/it][A
Train Diffusion:  11%|█         | 539/5001 [31:14<3:13:50,  2.61s/it][A
Train Diffusion:  11%|█         | 540/5001 [31:16<3:13:20,  2.60s/it][A
Train Diffusion:  11%|█         | 541/5001 [31:19<3:13:04,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 337168121.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6650, 0.5005, 1.2721],
        [8.8104, 0.5037, 1.3296],
        [8.6278, 0.5189, 1.2989]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[8.1909e-01, 9.6120e-01, 1.1940e+00],
         [1.1748e+01, 3.4447e-01, 1.2789e+00],
         [2.7866e+00, 3.1645e-01, 5.8023e-01],
         ...,
         [3.0814e+01, 3.3387e-01, 1.1343e+00],
         [3.5391e+00, 1.2210e-01, 5.7869e-01],
         [4.6106e+00, 2.0297e-01, 3.3629e+00]],

        [[5.2924e-01, 9.5124e-01, 9.2225e-01],
         [6.8369e-01, 7.8252e-01, 1.4634e+00],
         [1.7702e+01, 1.4112e-01, 1.0668e+00],
         ...,
         [2.8275e+00, 4.3726e-01, 1.0349e+00],
         [4.7593e-01, 4.7734e-01, 8.2347e-01],
         [7.5755e-04, 1.2607e+00, 3.2249e+00]],

        [[1.5557e+00, 8.1535e-01, 6.6981e-01],
         [1.8120e+00, 3.4678e-01, 6.7482e-01],
         [7.9407e-03, 7.8567e-01, 1.2


Train Diffusion:  11%|█         | 542/5001 [31:21<3:13:07,  2.60s/it][A
Train Diffusion:  11%|█         | 543/5001 [31:24<3:12:46,  2.59s/it][A
Train Diffusion:  11%|█         | 544/5001 [31:27<3:12:59,  2.60s/it][A
Train Diffusion:  11%|█         | 545/5001 [31:29<3:12:47,  2.60s/it][A
Train Diffusion:  11%|█         | 546/5001 [31:32<3:14:10,  2.62s/it][A
Train Diffusion:  11%|█         | 547/5001 [31:35<3:19:37,  2.69s/it][A
Train Diffusion:  11%|█         | 548/5001 [31:37<3:19:57,  2.69s/it][A
Train Diffusion:  11%|█         | 549/5001 [31:40<3:17:41,  2.66s/it][A
Train Diffusion:  11%|█         | 550/5001 [31:43<3:16:07,  2.64s/it][A
Train Diffusion:  11%|█         | 551/5001 [31:45<3:15:14,  2.63s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 336706275.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7613, 0.4817, 1.2881],
        [8.9588, 0.5163, 1.2704],
        [8.5717, 0.5046, 1.2469]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.8211,  0.9620,  0.7949],
         [ 0.7755,  0.6193,  1.8910],
         [28.2686,  0.1845,  1.1972],
         ...,
         [ 0.5384,  1.0849,  1.0047],
         [ 0.5705,  0.5239,  0.7814],
         [ 5.4370,  1.1832,  2.4912]],

        [[ 0.5274,  0.9520,  0.8375],
         [11.2784,  0.7375,  0.7509],
         [ 1.7191,  0.4527,  0.7510],
         ...,
         [31.9633,  0.1839,  1.0279],
         [ 4.4115,  0.0954,  0.3518],
         [ 2.8463,  0.5964,  0.8402]],

        [[ 1.5561,  0.8261,  5.4277],
         [ 0.8044,  0.7956,  1.3145],
         [ 0.6977,  0.6920,  1.2612],
         ...,
         [ 2.1502,  0.2336,  1.0558],
         [21.1281,  0.2891,  1.1219],
         [ 3.7194,  0.6403,  6.7312


Train Diffusion:  11%|█         | 552/5001 [31:48<3:14:15,  2.62s/it][A
Train Diffusion:  11%|█         | 553/5001 [31:50<3:13:59,  2.62s/it][A
Train Diffusion:  11%|█         | 554/5001 [31:53<3:13:35,  2.61s/it][A
Train Diffusion:  11%|█         | 555/5001 [31:56<3:13:10,  2.61s/it][A
Train Diffusion:  11%|█         | 556/5001 [31:58<3:12:33,  2.60s/it][A
Train Diffusion:  11%|█         | 557/5001 [32:01<3:12:38,  2.60s/it][A
Train Diffusion:  11%|█         | 558/5001 [32:03<3:12:21,  2.60s/it][A
Train Diffusion:  11%|█         | 559/5001 [32:06<3:12:38,  2.60s/it][A
Train Diffusion:  11%|█         | 560/5001 [32:09<3:12:32,  2.60s/it][A
Train Diffusion:  11%|█         | 561/5001 [32:11<3:11:59,  2.59s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 325796204.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8940, 0.4585, 1.2961],
        [8.7076, 0.4866, 1.3284],
        [8.8256, 0.4897, 1.3113]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.2787e-01, 9.5378e-01, 7.4972e-01],
         [1.0572e+01, 9.7451e-01, 1.3036e+00],
         [1.3414e+00, 5.7362e-01, 8.4949e-01],
         ...,
         [2.4129e-04, 3.9508e-01, 3.5358e+00],
         [1.8029e-01, 9.9937e-01, 5.0017e-01],
         [2.2082e+01, 1.5436e+00, 2.0218e+00]],

        [[8.1920e-01, 9.6361e-01, 7.9535e-01],
         [7.6318e-01, 6.1936e-01, 1.7152e+00],
         [2.6136e+01, 2.8454e-01, 1.1917e+00],
         ...,
         [4.5914e+00, 2.8895e-02, 9.8666e-01],
         [4.4110e+00, 8.3361e-02, 8.5166e-01],
         [4.8034e-01, 2.2032e+00, 6.5798e-01]],

        [[1.5580e+00, 8.2685e-01, 5.7926e+00],
         [8.0973e-01, 6.9957e-01, 1.3021e+00],
         [7.5116e-01, 8.1509e-01, 9.2


Train Diffusion:  11%|█         | 562/5001 [32:14<3:12:23,  2.60s/it][A
Train Diffusion:  11%|█▏        | 563/5001 [32:16<3:12:14,  2.60s/it][A
Train Diffusion:  11%|█▏        | 564/5001 [32:19<3:11:57,  2.60s/it][A
Train Diffusion:  11%|█▏        | 565/5001 [32:22<3:11:48,  2.59s/it][A
Train Diffusion:  11%|█▏        | 566/5001 [32:24<3:12:11,  2.60s/it][A
Train Diffusion:  11%|█▏        | 567/5001 [32:27<3:11:56,  2.60s/it][A
Train Diffusion:  11%|█▏        | 568/5001 [32:29<3:11:34,  2.59s/it][A
Train Diffusion:  11%|█▏        | 569/5001 [32:32<3:11:55,  2.60s/it][A
Train Diffusion:  11%|█▏        | 570/5001 [32:35<3:11:41,  2.60s/it][A
Train Diffusion:  11%|█▏        | 571/5001 [32:37<3:11:56,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 327854275.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6478, 0.4954, 1.2933],
        [8.7875, 0.4923, 1.3156],
        [8.6777, 0.4953, 1.2969]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.0789,  0.9277,  1.8594],
         [ 2.5008,  0.8115,  1.4515],
         [ 0.8384,  0.8541,  0.9713],
         ...,
         [ 1.0059,  0.9603,  0.9258],
         [ 1.9058,  0.2193,  1.2820],
         [21.8500,  1.5195,  2.0518]],

        [[ 1.3754,  0.8578,  0.6107],
         [ 1.6412,  0.3560,  1.8279],
         [27.1349,  0.2621,  1.1840],
         ...,
         [19.4209,  1.0556,  2.0994],
         [ 1.2979,  1.1526,  1.8571],
         [ 0.3207,  1.9439,  0.8080]],

        [[ 0.4544,  0.9530,  0.9614],
         [ 8.3261,  0.6163,  1.2619],
         [ 1.5469,  0.5267,  0.8790],
         ...,
         [ 0.2961,  0.5125,  0.6864],
         [34.2016,  0.2120,  0.8883],
         [ 5.0851,  0.4551,  1.1735


Train Diffusion:  11%|█▏        | 572/5001 [32:40<3:11:26,  2.59s/it][A
Train Diffusion:  11%|█▏        | 573/5001 [32:42<3:11:17,  2.59s/it][A
Train Diffusion:  11%|█▏        | 574/5001 [32:45<3:11:35,  2.60s/it][A
Train Diffusion:  11%|█▏        | 575/5001 [32:47<3:11:26,  2.60s/it][A
Train Diffusion:  12%|█▏        | 576/5001 [32:50<3:11:22,  2.59s/it][A
Train Diffusion:  12%|█▏        | 577/5001 [32:53<3:11:31,  2.60s/it][A
Train Diffusion:  12%|█▏        | 578/5001 [32:55<3:11:06,  2.59s/it][A
Train Diffusion:  12%|█▏        | 579/5001 [32:58<3:11:05,  2.59s/it][A
Train Diffusion:  12%|█▏        | 580/5001 [33:00<3:10:53,  2.59s/it][A
Train Diffusion:  12%|█▏        | 581/5001 [33:03<3:11:15,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 327444636.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7643, 0.4869, 1.2766],
        [8.8722, 0.4691, 1.2825],
        [8.7311, 0.4929, 1.3153]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.6048e+00, 8.1611e-01, 4.1542e+00],
         [1.0543e+00, 6.6757e-01, 9.2694e-01],
         [7.4548e-01, 7.8296e-01, 6.3825e-01],
         ...,
         [2.8564e+00, 1.7156e-01, 7.9791e-01],
         [1.7299e-01, 7.0235e+00, 1.0423e+00],
         [2.0132e+01, 1.4621e+00, 1.8559e+00]],

        [[6.9185e-01, 9.5872e-01, 8.5861e-01],
         [6.3624e-01, 7.1840e-01, 1.8515e+00],
         [2.8356e+01, 8.8526e-02, 1.2011e+00],
         ...,
         [5.8162e-01, 6.6733e-01, 5.6013e-01],
         [1.5072e+01, 9.0835e-01, 8.6878e-01],
         [1.7738e+00, 1.6785e+00, 1.4176e+00]],

        [[6.0710e-01, 9.5490e-01, 9.0309e-01],
         [1.1922e+01, 6.1892e-01, 1.0676e+00],
         [1.9174e+00, 4.5536e-01, 8.8


Train Diffusion:  12%|█▏        | 582/5001 [33:06<3:11:07,  2.60s/it][A
Train Diffusion:  12%|█▏        | 583/5001 [33:08<3:12:30,  2.61s/it][A
Train Diffusion:  12%|█▏        | 584/5001 [33:11<3:12:52,  2.62s/it][A
Train Diffusion:  12%|█▏        | 585/5001 [33:14<3:12:12,  2.61s/it][A
Train Diffusion:  12%|█▏        | 586/5001 [33:16<3:11:41,  2.61s/it][A
Train Diffusion:  12%|█▏        | 587/5001 [33:19<3:11:48,  2.61s/it][A
Train Diffusion:  12%|█▏        | 588/5001 [33:21<3:11:46,  2.61s/it][A
Train Diffusion:  12%|█▏        | 589/5001 [33:24<3:11:21,  2.60s/it][A
Train Diffusion:  12%|█▏        | 590/5001 [33:27<3:10:56,  2.60s/it][A
Train Diffusion:  12%|█▏        | 591/5001 [33:29<3:11:20,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 332631724.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6097, 0.5281, 1.3204],
        [8.7327, 0.5105, 1.3129],
        [8.7754, 0.4928, 1.3336]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.6446e-01, 9.5316e-01, 9.3126e-01],
         [5.6449e+00, 7.4081e-01, 1.6040e+00],
         [1.0193e+00, 7.1593e-01, 9.4805e-01],
         ...,
         [4.2947e+00, 1.3994e-01, 6.6376e-01],
         [7.4584e-01, 8.3850e-01, 5.0090e+00],
         [4.1126e-01, 2.6198e+00, 2.9386e+00]],

        [[1.4285e+00, 8.4632e-01, 6.4752e-01],
         [1.7232e+00, 3.3872e-01, 1.5075e+00],
         [2.4120e+01, 3.0123e-01, 1.1889e+00],
         ...,
         [3.0574e+01, 7.4727e-02, 1.2814e+00],
         [5.7506e+00, 2.9426e-02, 4.6856e-01],
         [2.3235e+00, 7.0087e-01, 8.9890e-01]],

        [[1.0178e+00, 9.4057e-01, 1.6989e+00],
         [4.8229e+00, 6.4228e-01, 1.4131e+00],
         [1.3354e+00, 8.2094e-01, 7.2


Train Diffusion:  12%|█▏        | 592/5001 [33:32<3:11:05,  2.60s/it][A
Train Diffusion:  12%|█▏        | 593/5001 [33:34<3:11:52,  2.61s/it][A
Train Diffusion:  12%|█▏        | 594/5001 [33:37<3:15:36,  2.66s/it][A
Train Diffusion:  12%|█▏        | 595/5001 [33:40<3:16:32,  2.68s/it][A
Train Diffusion:  12%|█▏        | 596/5001 [33:42<3:14:27,  2.65s/it][A
Train Diffusion:  12%|█▏        | 597/5001 [33:45<3:12:54,  2.63s/it][A
Train Diffusion:  12%|█▏        | 598/5001 [33:48<3:12:27,  2.62s/it][A
Train Diffusion:  12%|█▏        | 599/5001 [33:50<3:13:34,  2.64s/it][A
Train Diffusion:  12%|█▏        | 600/5001 [33:53<3:12:30,  2.62s/it][A
Train Diffusion:  12%|█▏        | 601/5001 [33:55<3:11:59,  2.62s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 341361804.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6235, 0.5008, 1.3027],
        [8.7501, 0.5200, 1.2979],
        [8.8254, 0.4832, 1.2958]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.2507e-01, 9.5172e-01, 9.2511e-01],
         [7.0357e-01, 7.6203e-01, 6.4954e-01],
         [7.3282e+00, 4.6975e-01, 7.4333e-01],
         ...,
         [1.7202e-01, 2.6882e+00, 6.3501e+00],
         [3.3403e-01, 2.7218e+00, 1.1907e+00],
         [2.6349e-01, 1.4257e+00, 1.1092e+01]],

        [[1.5505e+00, 8.1785e-01, 5.8731e-01],
         [1.8119e+00, 3.5037e-01, 7.7692e-01],
         [1.3359e-04, 6.7459e-01, 1.7730e+00],
         ...,
         [1.7887e+00, 2.1964e+00, 1.3421e+00],
         [2.7903e+00, 5.8542e-02, 1.3021e+00],
         [3.4238e+00, 1.2053e+00, 1.0660e+00]],

        [[8.2783e-01, 9.6169e-01, 1.1892e+00],
         [1.1512e+01, 3.6045e-01, 1.2686e+00],
         [2.7525e+00, 3.4969e-01, 1.0


Train Diffusion:  12%|█▏        | 602/5001 [33:58<3:11:35,  2.61s/it][A
Train Diffusion:  12%|█▏        | 603/5001 [34:01<3:11:07,  2.61s/it][A
Train Diffusion:  12%|█▏        | 604/5001 [34:03<3:11:43,  2.62s/it][A
Train Diffusion:  12%|█▏        | 605/5001 [34:06<3:11:01,  2.61s/it][A
Train Diffusion:  12%|█▏        | 606/5001 [34:09<3:10:34,  2.60s/it][A
Train Diffusion:  12%|█▏        | 607/5001 [34:11<3:10:19,  2.60s/it][A
Train Diffusion:  12%|█▏        | 608/5001 [34:14<3:10:05,  2.60s/it][A
Train Diffusion:  12%|█▏        | 609/5001 [34:16<3:09:43,  2.59s/it][A
Train Diffusion:  12%|█▏        | 610/5001 [34:19<3:09:36,  2.59s/it][A
Train Diffusion:  12%|█▏        | 611/5001 [34:21<3:09:50,  2.59s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 318385811.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7410, 0.4815, 1.3145],
        [8.7633, 0.5063, 1.3181],
        [8.8872, 0.5094, 1.3011]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.8502,  0.9638,  1.2182],
         [ 9.2049,  0.4792,  1.2789],
         [ 2.0490,  0.5786,  0.6318],
         ...,
         [ 0.3993,  0.5966,  0.8133],
         [36.7606,  0.1484,  1.0722],
         [ 5.8107,  0.3671,  0.8933]],

        [[ 0.5143,  0.9540,  0.9368],
         [ 1.2165,  0.6988,  1.1091],
         [ 1.5582,  0.1259,  0.5663],
         ...,
         [29.5441,  0.1007,  1.2800],
         [ 5.3833,  0.1391,  0.5862],
         [ 0.6270,  1.5217,  8.1235]],

        [[ 1.5435,  0.8232,  0.5370],
         [ 1.7957,  0.3588,  0.8769],
         [ 6.2776,  0.4883,  1.1475],
         ...,
         [ 3.2885,  0.2595,  0.8197],
         [ 0.4692,  1.5099,  0.6896],
         [13.9155,  1.5916,  1.9642


Train Diffusion:  12%|█▏        | 612/5001 [34:24<3:10:54,  2.61s/it][A
Train Diffusion:  12%|█▏        | 613/5001 [34:27<3:10:59,  2.61s/it][A
Train Diffusion:  12%|█▏        | 614/5001 [34:29<3:10:27,  2.60s/it][A
Train Diffusion:  12%|█▏        | 615/5001 [34:32<3:10:29,  2.61s/it][A
Train Diffusion:  12%|█▏        | 616/5001 [34:35<3:10:15,  2.60s/it][A
Train Diffusion:  12%|█▏        | 617/5001 [34:37<3:10:48,  2.61s/it][A
Train Diffusion:  12%|█▏        | 618/5001 [34:40<3:10:14,  2.60s/it][A
Train Diffusion:  12%|█▏        | 619/5001 [34:42<3:10:42,  2.61s/it][A
Train Diffusion:  12%|█▏        | 620/5001 [34:45<3:10:13,  2.61s/it][A
Train Diffusion:  12%|█▏        | 621/5001 [34:48<3:10:35,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 331740633.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6771, 0.4918, 1.2952],
        [8.5625, 0.5055, 1.2946],
        [8.8514, 0.5134, 1.3135]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[9.3854e-01, 9.5417e-01, 1.3665e+00],
         [5.9092e+00, 6.3715e-01, 1.3559e+00],
         [1.3876e+00, 7.0018e-01, 1.1101e+00],
         ...,
         [1.5267e+00, 3.7052e-01, 1.4159e+00],
         [3.0382e-01, 6.9316e-01, 7.3983e-01],
         [8.7207e+00, 1.6483e+00, 2.1276e+00]],

        [[4.8322e-01, 9.5295e-01, 9.5428e-01],
         [3.4417e+00, 6.5415e-01, 2.5479e+00],
         [1.7435e+00, 3.0026e-01, 5.0634e-01],
         ...,
         [2.8908e+01, 1.1193e-01, 1.2589e+00],
         [5.2870e+00, 1.2119e-01, 5.7643e-01],
         [1.7053e+00, 1.6325e+00, 6.5448e+00]],

        [[1.4868e+00, 8.3713e-01, 5.1642e-01],
         [1.7748e+00, 3.4906e-01, 8.6778e-01],
         [6.8324e-04, 5.5351e-01, 1.9


Train Diffusion:  12%|█▏        | 622/5001 [34:50<3:10:52,  2.62s/it][A
Train Diffusion:  12%|█▏        | 623/5001 [34:53<3:10:09,  2.61s/it][A
Train Diffusion:  12%|█▏        | 624/5001 [34:55<3:09:53,  2.60s/it][A
Train Diffusion:  12%|█▏        | 625/5001 [34:58<3:09:14,  2.59s/it][A
Train Diffusion:  13%|█▎        | 626/5001 [35:01<3:09:24,  2.60s/it][A
Train Diffusion:  13%|█▎        | 627/5001 [35:03<3:09:28,  2.60s/it][A
Train Diffusion:  13%|█▎        | 628/5001 [35:06<3:08:55,  2.59s/it][A
Train Diffusion:  13%|█▎        | 629/5001 [35:08<3:09:45,  2.60s/it][A
Train Diffusion:  13%|█▎        | 630/5001 [35:11<3:09:33,  2.60s/it][A
Train Diffusion:  13%|█▎        | 631/5001 [35:14<3:09:31,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 320818904.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7801, 0.5023, 1.3212],
        [8.8838, 0.4819, 1.2992],
        [8.6164, 0.4976, 1.2671]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.3676,  0.8628,  5.4754],
         [ 1.2550,  0.5656,  1.3719],
         [ 1.3377,  0.7083,  0.7028],
         ...,
         [ 0.2305,  8.4044,  0.6277],
         [25.6606,  0.5764,  0.4712],
         [ 5.4089,  0.3078,  0.8245]],

        [[ 1.0940,  0.9266,  0.8773],
         [ 1.1133,  0.4664,  1.2029],
         [19.2758,  0.3375,  1.1893],
         ...,
         [29.7015,  0.0718,  1.2927],
         [ 5.7383,  0.0886,  0.9442],
         [17.4444,  1.4237,  1.9342]],

        [[ 0.4505,  0.9544,  0.7485],
         [ 7.7738,  1.1836,  2.6891],
         [ 0.7645,  0.7958,  0.9431],
         ...,
         [ 4.3721,  0.1283,  0.6870],
         [ 0.3218,  1.1646,  1.2341],
         [ 0.3455,  1.8306,  4.9968


Train Diffusion:  13%|█▎        | 632/5001 [35:16<3:09:24,  2.60s/it][A
Train Diffusion:  13%|█▎        | 633/5001 [35:19<3:09:00,  2.60s/it][A
Train Diffusion:  13%|█▎        | 634/5001 [35:21<3:08:57,  2.60s/it][A
Train Diffusion:  13%|█▎        | 635/5001 [35:24<3:08:49,  2.59s/it][A
Train Diffusion:  13%|█▎        | 636/5001 [35:27<3:08:52,  2.60s/it][A
Train Diffusion:  13%|█▎        | 637/5001 [35:29<3:08:45,  2.60s/it][A
Train Diffusion:  13%|█▎        | 638/5001 [35:32<3:09:36,  2.61s/it][A
Train Diffusion:  13%|█▎        | 639/5001 [35:34<3:09:44,  2.61s/it][A
Train Diffusion:  13%|█▎        | 640/5001 [35:37<3:09:37,  2.61s/it][A
Train Diffusion:  13%|█▎        | 641/5001 [35:40<3:17:52,  2.72s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 335093388.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6740, 0.5072, 1.2760],
        [8.8759, 0.5007, 1.3060],
        [8.6300, 0.5013, 1.3257]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[7.1591e-01, 9.5652e-01, 1.0686e+00],
         [1.2541e+01, 4.6179e-01, 1.2117e+00],
         [2.3202e+00, 4.6995e-01, 6.4004e-01],
         ...,
         [3.1775e+01, 6.5235e-02, 1.2651e+00],
         [5.8129e+00, 2.5474e-02, 1.0334e+00],
         [2.6779e+00, 5.0139e-01, 1.6684e+00]],

        [[5.9039e-01, 9.5039e-01, 9.3964e-01],
         [5.5324e-01, 7.5654e-01, 1.7909e+00],
         [2.8169e+01, 8.7700e-02, 1.2048e+00],
         ...,
         [4.2289e+00, 1.3795e-01, 7.4243e-01],
         [5.9171e-01, 6.4454e-01, 1.1924e+00],
         [9.2431e-03, 2.0992e+00, 3.1871e+00]],

        [[1.5957e+00, 8.0967e-01, 2.2230e+00],
         [1.2818e+00, 5.4750e-01, 7.7353e-01],
         [7.1952e-01, 7.1627e-01, 1.0


Train Diffusion:  13%|█▎        | 642/5001 [35:43<3:16:28,  2.70s/it][A
Train Diffusion:  13%|█▎        | 643/5001 [35:45<3:14:03,  2.67s/it][A
Train Diffusion:  13%|█▎        | 644/5001 [35:48<3:12:42,  2.65s/it][A
Train Diffusion:  13%|█▎        | 645/5001 [35:50<3:11:56,  2.64s/it][A
Train Diffusion:  13%|█▎        | 646/5001 [35:53<3:11:52,  2.64s/it][A
Train Diffusion:  13%|█▎        | 647/5001 [35:56<3:10:43,  2.63s/it][A
Train Diffusion:  13%|█▎        | 648/5001 [35:58<3:10:32,  2.63s/it][A
Train Diffusion:  13%|█▎        | 649/5001 [36:01<3:11:42,  2.64s/it][A
Train Diffusion:  13%|█▎        | 650/5001 [36:04<3:11:16,  2.64s/it][A
Train Diffusion:  13%|█▎        | 651/5001 [36:06<3:10:16,  2.62s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 326370203.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7904, 0.4757, 1.2955],
        [8.7227, 0.4796, 1.2690],
        [9.0361, 0.4674, 1.2847]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.5574,  0.8343,  5.1001],
         [ 0.9830,  0.9459,  1.1734],
         [ 1.2703,  0.5952,  0.8201],
         ...,
         [ 3.5058,  2.1811,  1.3826],
         [ 1.5831,  1.7399,  0.4767],
         [ 0.9382,  1.5329,  0.7437]],

        [[ 0.5262,  0.9541,  0.9975],
         [ 0.5652,  0.6472,  1.8528],
         [29.1176,  0.0850,  1.1905],
         ...,
         [14.2673,  0.2599,  0.5182],
         [ 5.0740,  0.0703,  0.7967],
         [24.6171,  1.3185,  1.9419]],

        [[ 0.8224,  0.9639,  0.9190],
         [ 7.0125,  0.6932,  1.0678],
         [ 1.4678,  0.5641,  0.7928],
         ...,
         [ 4.2837,  0.1077,  0.9311],
         [31.3011,  0.0938,  1.3788],
         [ 4.7512,  0.7073,  1.3891


Train Diffusion:  13%|█▎        | 652/5001 [36:09<3:09:27,  2.61s/it][A
Train Diffusion:  13%|█▎        | 653/5001 [36:11<3:09:10,  2.61s/it][A
Train Diffusion:  13%|█▎        | 654/5001 [36:14<3:08:36,  2.60s/it][A
Train Diffusion:  13%|█▎        | 655/5001 [36:17<3:08:25,  2.60s/it][A
Train Diffusion:  13%|█▎        | 656/5001 [36:19<3:08:02,  2.60s/it][A
Train Diffusion:  13%|█▎        | 657/5001 [36:22<3:08:03,  2.60s/it][A
Train Diffusion:  13%|█▎        | 658/5001 [36:24<3:07:45,  2.59s/it][A
Train Diffusion:  13%|█▎        | 659/5001 [36:27<3:07:39,  2.59s/it][A
Train Diffusion:  13%|█▎        | 660/5001 [36:30<3:07:41,  2.59s/it][A
Train Diffusion:  13%|█▎        | 661/5001 [36:32<3:07:54,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 337984438.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6310, 0.5060, 1.3044],
        [8.7735, 0.5253, 1.3088],
        [8.7042, 0.4995, 1.3100]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.0966e+00, 9.2508e-01, 8.8060e-01],
         [1.1624e+00, 4.6784e-01, 2.0009e-01],
         [1.2359e+01, 1.6787e+00, 3.9569e-01],
         ...,
         [2.5860e+01, 7.0654e-02, 1.3078e+00],
         [5.6428e+00, 3.8806e-02, 1.5802e+01],
         [8.1124e-02, 2.8311e+00, 3.4224e+00]],

        [[4.5153e-01, 9.5345e-01, 1.0050e+00],
         [8.3046e+00, 5.4723e-01, 1.7367e+00],
         [1.5289e+00, 5.8922e-01, 7.3215e-01],
         ...,
         [1.7645e-01, 1.2146e+01, 4.1046e-01],
         [5.5892e-04, 1.7090e+00, 1.5429e+00],
         [2.2684e-01, 1.2929e+00, 5.3512e+00]],

        [[1.3638e+00, 8.6248e-01, 4.6522e+00],
         [1.3264e+00, 1.0976e+00, 1.3079e+00],
         [2.6218e-01, 7.0653e+00, 2.8


Train Diffusion:  13%|█▎        | 662/5001 [36:35<3:08:07,  2.60s/it][A
Train Diffusion:  13%|█▎        | 663/5001 [36:37<3:07:43,  2.60s/it][A
Train Diffusion:  13%|█▎        | 664/5001 [36:40<3:07:56,  2.60s/it][A
Train Diffusion:  13%|█▎        | 665/5001 [36:43<3:08:00,  2.60s/it][A
Train Diffusion:  13%|█▎        | 666/5001 [36:45<3:08:03,  2.60s/it][A
Train Diffusion:  13%|█▎        | 667/5001 [36:48<3:07:56,  2.60s/it][A
Train Diffusion:  13%|█▎        | 668/5001 [36:50<3:08:00,  2.60s/it][A
Train Diffusion:  13%|█▎        | 669/5001 [36:53<3:07:35,  2.60s/it][A
Train Diffusion:  13%|█▎        | 670/5001 [36:56<3:07:47,  2.60s/it][A
Train Diffusion:  13%|█▎        | 671/5001 [36:58<3:07:29,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 333713526.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7630, 0.4952, 1.3147],
        [8.9076, 0.5002, 1.2748],
        [8.7875, 0.5097, 1.3129]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[7.8569e-01, 9.6282e-01, 1.1740e+00],
         [1.2657e+01, 3.4404e-01, 1.2796e+00],
         [2.8097e+00, 2.7975e-01, 1.1546e+00],
         ...,
         [2.5115e+01, 3.6717e-01, 4.0756e-01],
         [5.1890e+00, 2.6095e-02, 7.6503e-03],
         [3.3698e+00, 2.7618e-01, 5.8642e+00]],

        [[5.4416e-01, 9.5290e-01, 9.4332e-01],
         [5.7274e-01, 7.7195e-01, 1.0661e+00],
         [2.3197e-03, 1.0636e+00, 4.5556e+00],
         ...,
         [1.0292e-01, 9.5204e+00, 1.7546e+00],
         [2.8735e-01, 6.5145e-01, 1.6870e+00],
         [3.6472e-01, 1.9858e+00, 2.3596e+00]],

        [[1.5763e+00, 8.0982e-01, 7.0071e-01],
         [1.7112e+00, 3.8431e-01, 5.0634e-01],
         [1.2420e+01, 8.0366e-01, 6.1


Train Diffusion:  13%|█▎        | 672/5001 [37:01<3:07:34,  2.60s/it][A
Train Diffusion:  13%|█▎        | 673/5001 [37:03<3:07:19,  2.60s/it][A
Train Diffusion:  13%|█▎        | 674/5001 [37:06<3:07:17,  2.60s/it][A
Train Diffusion:  13%|█▎        | 675/5001 [37:09<3:10:22,  2.64s/it][A
Train Diffusion:  14%|█▎        | 676/5001 [37:11<3:09:28,  2.63s/it][A
Train Diffusion:  14%|█▎        | 677/5001 [37:14<3:08:38,  2.62s/it][A
Train Diffusion:  14%|█▎        | 678/5001 [37:17<3:08:19,  2.61s/it][A
Train Diffusion:  14%|█▎        | 679/5001 [37:19<3:08:02,  2.61s/it][A
Train Diffusion:  14%|█▎        | 680/5001 [37:22<3:07:46,  2.61s/it][A
Train Diffusion:  14%|█▎        | 681/5001 [37:24<3:07:27,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 319302368.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7883, 0.5021, 1.2943],
        [8.5699, 0.4939, 1.3261],
        [8.8402, 0.5103, 1.3191]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.7729e-01, 9.5229e-01, 7.5912e-01],
         [1.0292e+01, 1.0351e+00, 2.8187e+00],
         [1.0351e+00, 8.0493e-01, 1.2706e+00],
         ...,
         [4.9515e+00, 9.7092e-01, 8.4408e-01],
         [5.3855e+00, 7.3157e-01, 2.1147e-01],
         [4.2598e+00, 5.1019e-01, 9.9215e-01]],

        [[9.6304e-01, 9.4998e-01, 7.9348e-01],
         [9.1963e-01, 5.4310e-01, 1.3687e+00],
         [2.4103e+01, 2.8494e-01, 1.1724e+00],
         ...,
         [3.5343e+00, 9.9657e-02, 8.3135e-01],
         [1.8862e-03, 5.2720e-01, 1.9759e+00],
         [2.2566e+01, 1.3658e+00, 2.0336e+00]],

        [[1.4689e+00, 8.3904e-01, 5.6178e+00],
         [8.0574e-01, 6.3242e-01, 1.4125e+00],
         [8.2748e-01, 8.3003e-01, 7.4


Train Diffusion:  14%|█▎        | 682/5001 [37:27<3:07:07,  2.60s/it][A
Train Diffusion:  14%|█▎        | 683/5001 [37:29<3:07:09,  2.60s/it][A
Train Diffusion:  14%|█▎        | 684/5001 [37:32<3:07:11,  2.60s/it][A
Train Diffusion:  14%|█▎        | 685/5001 [37:35<3:07:22,  2.60s/it][A
Train Diffusion:  14%|█▎        | 686/5001 [37:37<3:07:05,  2.60s/it][A
Train Diffusion:  14%|█▎        | 687/5001 [37:40<3:07:45,  2.61s/it][A
Train Diffusion:  14%|█▍        | 688/5001 [37:43<3:23:07,  2.83s/it][A
Train Diffusion:  14%|█▍        | 689/5001 [37:46<3:19:10,  2.77s/it][A
Train Diffusion:  14%|█▍        | 690/5001 [37:49<3:15:18,  2.72s/it][A
Train Diffusion:  14%|█▍        | 691/5001 [37:51<3:13:03,  2.69s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 339997920.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7780, 0.5067, 1.3195],
        [8.7845, 0.4783, 1.2690],
        [8.8124, 0.4844, 1.3170]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.6017,  0.8130,  4.5134],
         [ 1.0990,  0.6316,  1.0169],
         [ 0.6743,  0.7626,  1.1606],
         ...,
         [20.6328,  0.2537,  1.0880],
         [ 4.5683,  0.1616,  0.8593],
         [22.7817,  1.3351,  2.0016]],

        [[ 0.5965,  0.9522,  0.8448],
         [11.9106,  0.7133,  0.9459],
         [ 1.8757,  0.4250,  0.7629],
         ...,
         [ 0.4426,  0.6603,  0.8213],
         [42.0076,  0.1408,  1.1339],
         [ 5.9468,  0.2797,  1.3899]],

        [[ 0.7056,  0.9573,  0.8488],
         [ 0.6390,  0.7304,  1.8770],
         [27.0716,  0.2290,  1.2018],
         ...,
         [ 1.7908,  0.0614,  0.7084],
         [ 0.9544,  0.5154,  0.7544],
         [ 0.4852,  2.0236,  0.9807


Train Diffusion:  14%|█▍        | 692/5001 [37:54<3:10:46,  2.66s/it][A
Train Diffusion:  14%|█▍        | 693/5001 [37:56<3:09:09,  2.63s/it][A
Train Diffusion:  14%|█▍        | 694/5001 [37:59<3:08:23,  2.62s/it][A
Train Diffusion:  14%|█▍        | 695/5001 [38:02<3:08:04,  2.62s/it][A
Train Diffusion:  14%|█▍        | 696/5001 [38:04<3:07:04,  2.61s/it][A
Train Diffusion:  14%|█▍        | 697/5001 [38:07<3:06:43,  2.60s/it][A
Train Diffusion:  14%|█▍        | 698/5001 [38:09<3:06:35,  2.60s/it][A
Train Diffusion:  14%|█▍        | 699/5001 [38:12<3:06:41,  2.60s/it][A
Train Diffusion:  14%|█▍        | 700/5001 [38:14<3:06:19,  2.60s/it][A
Train Diffusion:  14%|█▍        | 701/5001 [38:17<3:06:06,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 327372924.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8105, 0.4877, 1.2993],
        [8.7627, 0.4864, 1.3003],
        [8.8782, 0.4820, 1.2749]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.5770,  0.9516,  0.7369],
         [11.0228,  0.9627,  1.0185],
         [ 1.5129,  0.4891,  0.8426],
         ...,
         [28.9183,  0.1469,  1.0953],
         [ 5.5709,  0.0731,  0.6368],
         [ 0.4885,  3.2336,  1.4318]],

        [[ 0.7330,  0.9592,  0.8327],
         [ 0.6586,  0.6936,  1.8055],
         [27.1780,  0.2175,  1.2006],
         ...,
         [ 1.7103,  1.8183,  1.7806],
         [ 0.2582,  0.7488,  0.5131],
         [18.9170,  2.2308,  1.7931]],

        [[ 1.5925,  0.8156,  5.2710],
         [ 1.0293,  0.5774,  1.1354],
         [ 0.6894,  0.7733,  1.0227],
         ...,
         [ 0.2680,  0.6112,  0.8928],
         [32.3072,  0.0813,  1.0022],
         [ 6.0851,  0.2776,  0.7922


Train Diffusion:  14%|█▍        | 702/5001 [38:20<3:06:11,  2.60s/it][A
Train Diffusion:  14%|█▍        | 703/5001 [38:22<3:06:13,  2.60s/it][A
Train Diffusion:  14%|█▍        | 704/5001 [38:25<3:06:06,  2.60s/it][A
Train Diffusion:  14%|█▍        | 705/5001 [38:27<3:05:52,  2.60s/it][A
Train Diffusion:  14%|█▍        | 706/5001 [38:30<3:05:44,  2.59s/it][A
Train Diffusion:  14%|█▍        | 707/5001 [38:33<3:06:09,  2.60s/it][A
Train Diffusion:  14%|█▍        | 708/5001 [38:35<3:05:35,  2.59s/it][A
Train Diffusion:  14%|█▍        | 709/5001 [38:38<3:06:24,  2.61s/it][A
Train Diffusion:  14%|█▍        | 710/5001 [38:40<3:06:41,  2.61s/it][A
Train Diffusion:  14%|█▍        | 711/5001 [38:43<3:06:48,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 318086304.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7044, 0.4930, 1.3017],
        [8.7017, 0.4973, 1.3003],
        [8.9362, 0.4941, 1.3170]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.8047,  0.9632,  0.7984],
         [ 0.7515,  0.6477,  1.9461],
         [27.1681,  0.2403,  1.1986],
         ...,
         [ 4.4086,  0.1151,  0.9197],
         [30.9598,  0.2798,  1.5727],
         [ 3.2295,  1.8263,  1.5092]],

        [[ 1.5650,  0.8258,  4.2976],
         [ 0.8378,  0.8559,  1.1376],
         [ 0.6469,  0.8041,  1.0760],
         ...,
         [ 0.5032,  8.2930,  1.0873],
         [ 0.0465,  1.5983,  0.6831],
         [ 0.4172,  1.1165, 12.2052]],

        [[ 0.5359,  0.9534,  0.9470],
         [12.8532,  0.5451,  1.0933],
         [ 2.0718,  0.4115,  0.8616],
         ...,
         [22.4382,  0.2984,  0.7248],
         [ 5.3074,  0.0489,  3.2924],
         [ 0.3590,  2.5840,  1.7956


Train Diffusion:  14%|█▍        | 712/5001 [38:46<3:06:14,  2.61s/it][A
Train Diffusion:  14%|█▍        | 713/5001 [38:48<3:05:42,  2.60s/it][A
Train Diffusion:  14%|█▍        | 714/5001 [38:51<3:06:05,  2.60s/it][A
Train Diffusion:  14%|█▍        | 715/5001 [38:53<3:05:55,  2.60s/it][A
Train Diffusion:  14%|█▍        | 716/5001 [38:56<3:06:04,  2.61s/it][A
Train Diffusion:  14%|█▍        | 717/5001 [38:59<3:05:45,  2.60s/it][A
Train Diffusion:  14%|█▍        | 718/5001 [39:01<3:05:27,  2.60s/it][A
Train Diffusion:  14%|█▍        | 719/5001 [39:04<3:05:37,  2.60s/it][A
Train Diffusion:  14%|█▍        | 720/5001 [39:06<3:05:16,  2.60s/it][A
Train Diffusion:  14%|█▍        | 721/5001 [39:09<3:06:08,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 317189126.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6885, 0.4851, 1.2959],
        [8.8686, 0.4707, 1.2614],
        [8.8149, 0.4981, 1.2873]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.5836,  0.8069,  1.1043],
         [ 1.6848,  0.3799,  0.7263],
         [ 0.5439,  0.7971,  1.3710],
         ...,
         [ 4.6471,  0.3994,  1.6768],
         [12.1889,  0.6461,  0.1051],
         [ 4.9666,  0.2803,  0.7274]],

        [[ 0.7587,  0.9614,  1.1459],
         [13.2784,  0.3803,  1.2676],
         [ 2.7480,  0.3714,  0.6148],
         ...,
         [ 3.4348,  0.1384,  0.8867],
         [ 3.3938,  0.4633,  1.5700],
         [ 0.6870,  2.7693,  1.5391]],

        [[ 0.5609,  0.9518,  0.9337],
         [ 0.5301,  0.8205,  1.5999],
         [25.8759,  0.1750,  1.1941],
         ...,
         [ 4.4272,  0.1692,  0.3103],
         [ 4.9950,  0.0689,  0.4248],
         [19.7936,  2.0199,  2.3783


Train Diffusion:  14%|█▍        | 722/5001 [39:12<3:06:07,  2.61s/it][A
Train Diffusion:  14%|█▍        | 723/5001 [39:14<3:06:09,  2.61s/it][A
Train Diffusion:  14%|█▍        | 724/5001 [39:17<3:05:43,  2.61s/it][A
Train Diffusion:  14%|█▍        | 725/5001 [39:20<3:05:35,  2.60s/it][A
Train Diffusion:  15%|█▍        | 726/5001 [39:22<3:05:19,  2.60s/it][A
Train Diffusion:  15%|█▍        | 727/5001 [39:25<3:05:23,  2.60s/it][A
Train Diffusion:  15%|█▍        | 728/5001 [39:27<3:05:17,  2.60s/it][A
Train Diffusion:  15%|█▍        | 729/5001 [39:30<3:05:14,  2.60s/it][A
Train Diffusion:  15%|█▍        | 730/5001 [39:33<3:05:13,  2.60s/it][A
Train Diffusion:  15%|█▍        | 731/5001 [39:35<3:05:13,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 324258768.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6190, 0.5146, 1.2893],
        [8.7671, 0.4851, 1.2688],
        [8.9131, 0.4999, 1.3128]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.5147,  0.8314,  0.9072],
         [ 1.7658,  0.3379,  1.1238],
         [17.9138,  0.3449,  1.1838],
         ...,
         [ 3.1908,  0.3510,  1.1814],
         [ 0.3398,  0.6268,  0.4480],
         [ 0.2746,  2.2349,  1.1601]],

        [[ 0.4960,  0.9528,  0.9472],
         [ 0.8419,  0.8734,  0.5700],
         [ 1.2633,  0.9034,  0.9922],
         ...,
         [30.9002,  0.1215,  1.2100],
         [ 5.0281,  0.1401,  0.7743],
         [18.5576,  1.4361,  2.0257]],

        [[ 0.8972,  0.9593,  1.3758],
         [10.5227,  0.3800,  1.2959],
         [ 2.6733,  0.4687,  0.7108],
         ...,
         [ 0.4935,  0.5241,  1.8032],
         [30.0882,  0.0393,  1.5685],
         [ 6.6043,  0.1942,  0.9472


Train Diffusion:  15%|█▍        | 732/5001 [39:38<3:05:12,  2.60s/it][A
Train Diffusion:  15%|█▍        | 733/5001 [39:40<3:04:54,  2.60s/it][A
Train Diffusion:  15%|█▍        | 734/5001 [39:43<3:05:05,  2.60s/it][A
Train Diffusion:  15%|█▍        | 735/5001 [39:46<3:13:35,  2.72s/it][A
Train Diffusion:  15%|█▍        | 736/5001 [39:49<3:14:46,  2.74s/it][A
Train Diffusion:  15%|█▍        | 737/5001 [39:51<3:12:16,  2.71s/it][A
Train Diffusion:  15%|█▍        | 738/5001 [39:54<3:10:25,  2.68s/it][A
Train Diffusion:  15%|█▍        | 739/5001 [39:57<3:08:25,  2.65s/it][A
Train Diffusion:  15%|█▍        | 740/5001 [39:59<3:07:16,  2.64s/it][A
Train Diffusion:  15%|█▍        | 741/5001 [40:02<3:07:00,  2.63s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 321877753.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7750, 0.5031, 1.3105],
        [8.6559, 0.5183, 1.3223],
        [8.6894, 0.5024, 1.3182]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.9138e-01, 9.5152e-01, 9.3349e-01],
         [5.6653e-01, 8.2034e-01, 1.7392e+00],
         [2.7033e+01, 1.8951e-01, 1.2055e+00],
         ...,
         [2.8860e+01, 5.5798e-02, 1.3000e+00],
         [5.8691e+00, 7.6561e-02, 1.0141e+00],
         [9.0063e-03, 1.7844e+00, 2.8098e+00]],

        [[7.1378e-01, 9.5748e-01, 1.0682e+00],
         [1.2680e+01, 4.5419e-01, 1.2110e+00],
         [2.3353e+00, 3.9311e-01, 7.1547e-01],
         ...,
         [2.4972e-01, 3.9440e+00, 5.3571e-01],
         [2.5848e+01, 7.3565e-01, 2.3423e-01],
         [5.2968e+00, 2.1301e-01, 2.6384e+00]],

        [[1.5978e+00, 8.0532e-01, 2.1685e+00],
         [1.4376e+00, 4.9612e-01, 7.7516e-01],
         [7.2423e-01, 7.4619e-01, 1.2


Train Diffusion:  15%|█▍        | 742/5001 [40:04<3:06:28,  2.63s/it][A
Train Diffusion:  15%|█▍        | 743/5001 [40:07<3:05:56,  2.62s/it][A
Train Diffusion:  15%|█▍        | 744/5001 [40:10<3:05:11,  2.61s/it][A
Train Diffusion:  15%|█▍        | 745/5001 [40:12<3:05:09,  2.61s/it][A
Train Diffusion:  15%|█▍        | 746/5001 [40:15<3:04:59,  2.61s/it][A
Train Diffusion:  15%|█▍        | 747/5001 [40:17<3:05:04,  2.61s/it][A
Train Diffusion:  15%|█▍        | 748/5001 [40:20<3:05:21,  2.61s/it][A
Train Diffusion:  15%|█▍        | 749/5001 [40:23<3:05:19,  2.62s/it][A
Train Diffusion:  15%|█▍        | 750/5001 [40:25<3:05:09,  2.61s/it][A
Train Diffusion:  15%|█▌        | 751/5001 [40:28<3:04:58,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 332301958.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8164, 0.4891, 1.3072],
        [8.8169, 0.4899, 1.2921],
        [8.7189, 0.4920, 1.3044]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.7169e-01, 9.5295e-01, 7.4640e-01],
         [1.1479e+01, 9.2684e-01, 6.7230e-01],
         [1.6944e+00, 4.3895e-01, 7.6694e-01],
         ...,
         [4.9728e-01, 6.4736e-01, 9.2219e-01],
         [1.7023e-05, 6.1100e-01, 2.9931e+00],
         [1.5635e-01, 1.4263e+00, 1.1558e+01]],

        [[7.4057e-01, 9.6117e-01, 8.2990e-01],
         [6.7063e-01, 6.8068e-01, 1.8875e+00],
         [2.8002e+01, 2.1383e-01, 1.2012e+00],
         ...,
         [3.8177e+00, 1.8641e-01, 8.9221e-01],
         [4.4872e-01, 2.0184e+00, 1.8430e+00],
         [8.3468e-01, 3.5384e+00, 9.4037e-01]],

        [[1.5927e+00, 8.1853e-01, 5.3451e+00],
         [9.8330e-01, 6.5384e-01, 1.2767e+00],
         [5.9470e-01, 7.0544e-01, 1.2


Train Diffusion:  15%|█▌        | 752/5001 [40:30<3:04:30,  2.61s/it][A
Train Diffusion:  15%|█▌        | 753/5001 [40:33<3:05:03,  2.61s/it][A
Train Diffusion:  15%|█▌        | 754/5001 [40:36<3:04:36,  2.61s/it][A
Train Diffusion:  15%|█▌        | 755/5001 [40:38<3:04:27,  2.61s/it][A
Train Diffusion:  15%|█▌        | 756/5001 [40:41<3:04:32,  2.61s/it][A
Train Diffusion:  15%|█▌        | 757/5001 [40:44<3:04:17,  2.61s/it][A
Train Diffusion:  15%|█▌        | 758/5001 [40:46<3:04:11,  2.60s/it][A
Train Diffusion:  15%|█▌        | 759/5001 [40:49<3:04:34,  2.61s/it][A
Train Diffusion:  15%|█▌        | 760/5001 [40:51<3:04:48,  2.61s/it][A
Train Diffusion:  15%|█▌        | 761/5001 [40:54<3:04:16,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 322345257.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.5168, 0.4899, 1.3119],
        [8.7905, 0.5031, 1.3397],
        [8.7099, 0.5049, 1.2851]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.5622,  0.9508,  0.9366],
         [ 0.5492,  0.8574,  1.5469],
         [24.3749,  0.2779,  1.1945],
         ...,
         [ 0.7979,  2.0864,  1.7629],
         [ 0.2273,  0.8713,  0.5768],
         [20.9909,  2.7710,  3.3104]],

        [[ 0.7570,  0.9602,  1.1217],
         [12.7208,  0.4039,  1.2553],
         [ 2.6144,  0.4299,  1.0080],
         ...,
         [ 1.5056,  0.2773,  0.8034],
         [29.2318,  0.2469,  1.7933],
         [ 3.4500,  2.0484,  1.8333]],

        [[ 1.5831,  0.8072,  1.5263],
         [ 1.6194,  0.3983,  0.7559],
         [ 0.6744,  0.7620,  0.7751],
         ...,
         [29.6478,  0.3269,  0.6482],
         [ 5.2271,  0.0578,  0.4613],
         [ 1.2586,  0.9319,  0.9176


Train Diffusion:  15%|█▌        | 762/5001 [40:57<3:03:59,  2.60s/it][A
Train Diffusion:  15%|█▌        | 763/5001 [40:59<3:04:06,  2.61s/it][A
Train Diffusion:  15%|█▌        | 764/5001 [41:02<3:04:16,  2.61s/it][A
Train Diffusion:  15%|█▌        | 765/5001 [41:04<3:03:57,  2.61s/it][A
Train Diffusion:  15%|█▌        | 766/5001 [41:07<3:03:51,  2.60s/it][A
Train Diffusion:  15%|█▌        | 767/5001 [41:10<3:04:13,  2.61s/it][A
Train Diffusion:  15%|█▌        | 768/5001 [41:12<3:04:15,  2.61s/it][A
Train Diffusion:  15%|█▌        | 769/5001 [41:15<3:03:42,  2.60s/it][A
Train Diffusion:  15%|█▌        | 770/5001 [41:17<3:03:39,  2.60s/it][A
Train Diffusion:  15%|█▌        | 771/5001 [41:20<3:03:50,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 320560953.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.9555, 0.4734, 1.2957],
        [8.6213, 0.5233, 1.2755],
        [8.8076, 0.4766, 1.2890]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.0450e-01, 9.5357e-01, 9.8077e-01],
         [1.0894e+01, 4.9975e-01, 1.3048e+00],
         [1.7593e+00, 5.8673e-01, 6.7591e-01],
         ...,
         [3.9555e-01, 1.6041e+00, 8.6325e-01],
         [4.6886e-01, 3.4041e-01, 1.0817e+00],
         [2.1373e+00, 1.2751e+00, 1.1811e+00]],

        [[8.7608e-01, 9.6172e-01, 7.8838e-01],
         [8.8665e-01, 5.4804e-01, 2.1635e-01],
         [1.6819e-05, 2.3609e+00, 2.2439e-01],
         ...,
         [3.1667e+01, 1.1185e+00, 6.2217e-02],
         [1.3434e+00, 5.6286e+00, 1.4805e-01],
         [4.2182e-01, 1.5206e+00, 5.7351e+00]],

        [[1.5263e+00, 8.2925e-01, 5.1813e+00],
         [8.2593e-01, 1.1290e+00, 1.2518e+00],
         [1.3149e+00, 8.4320e-01, 5.4


Train Diffusion:  15%|█▌        | 772/5001 [41:23<3:03:42,  2.61s/it][A
Train Diffusion:  15%|█▌        | 773/5001 [41:25<3:03:22,  2.60s/it][A
Train Diffusion:  15%|█▌        | 774/5001 [41:28<3:03:25,  2.60s/it][A
Train Diffusion:  15%|█▌        | 775/5001 [41:30<3:03:26,  2.60s/it][A
Train Diffusion:  16%|█▌        | 776/5001 [41:33<3:03:39,  2.61s/it][A
Train Diffusion:  16%|█▌        | 777/5001 [41:36<3:03:31,  2.61s/it][A
Train Diffusion:  16%|█▌        | 778/5001 [41:38<3:03:31,  2.61s/it][A
Train Diffusion:  16%|█▌        | 779/5001 [41:41<3:03:34,  2.61s/it][A
Train Diffusion:  16%|█▌        | 780/5001 [41:43<3:03:26,  2.61s/it][A
Train Diffusion:  16%|█▌        | 781/5001 [41:46<3:03:18,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 339626332.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7094, 0.4843, 1.2665],
        [8.7406, 0.4826, 1.3010],
        [8.8539, 0.4755, 1.2556]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5276e-01, 9.5460e-01, 1.0836e+00],
         [5.0340e-01, 6.6265e-01, 2.3030e-01],
         [1.7863e-05, 2.3879e+00, 3.0425e-01],
         ...,
         [3.9643e+00, 1.6068e-01, 7.2830e-01],
         [4.1509e+00, 5.0914e+00, 2.7315e+00],
         [4.7224e+00, 3.6395e+00, 1.7340e+00]],

        [[1.3677e+00, 8.6166e-01, 4.5929e+00],
         [2.3997e+00, 8.7764e-01, 1.3175e+00],
         [1.6229e+00, 1.0453e+01, 6.4732e-01],
         ...,
         [3.1648e+01, 5.3112e-02, 1.2837e+00],
         [6.0208e+00, 3.3813e-02, 7.9378e+00],
         [1.2920e+00, 1.5650e+00, 1.4781e+00]],

        [[1.0893e+00, 9.2696e-01, 1.3262e+00],
         [2.3707e+00, 8.3313e-01, 9.8553e-01],
         [1.2104e+00, 5.9344e+00, 9.3


Train Diffusion:  16%|█▌        | 782/5001 [41:51<3:55:10,  3.34s/it][A
Train Diffusion:  16%|█▌        | 783/5001 [41:54<3:41:23,  3.15s/it][A
Train Diffusion:  16%|█▌        | 784/5001 [41:56<3:29:36,  2.98s/it][A
Train Diffusion:  16%|█▌        | 785/5001 [41:59<3:20:35,  2.85s/it][A
Train Diffusion:  16%|█▌        | 786/5001 [42:02<3:15:03,  2.78s/it][A
Train Diffusion:  16%|█▌        | 787/5001 [42:04<3:10:34,  2.71s/it][A
Train Diffusion:  16%|█▌        | 788/5001 [42:07<3:07:19,  2.67s/it][A
Train Diffusion:  16%|█▌        | 789/5001 [42:09<3:05:11,  2.64s/it][A
Train Diffusion:  16%|█▌        | 790/5001 [42:12<3:03:35,  2.62s/it][A
Train Diffusion:  16%|█▌        | 791/5001 [42:14<3:02:36,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 328260313.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.5644, 0.5048, 1.2790],
        [8.8259, 0.4919, 1.3051],
        [8.7146, 0.5102, 1.3121]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.5683,  0.9504,  0.9360],
         [ 0.5429,  0.8014,  1.6001],
         [24.2271,  0.1174,  1.1295],
         ...,
         [ 0.2860,  0.5502,  0.9591],
         [38.3853,  0.0883,  1.2316],
         [ 5.9833,  0.3398,  1.0061]],

        [[ 1.5878,  0.8061,  1.0785],
         [ 1.6406,  0.3954,  0.6880],
         [ 0.5619,  0.9518,  0.7489],
         ...,
         [ 1.1665,  1.0077,  1.7609],
         [ 0.3099,  0.7061,  0.7198],
         [22.9533,  1.4121,  2.0284]],

        [[ 0.7471,  0.9591,  1.1468],
         [13.4264,  0.3534,  1.2718],
         [ 2.8041,  0.2880,  1.1147],
         ...,
         [27.7810,  0.1948,  0.9002],
         [ 5.2489,  0.0873,  0.6724],
         [ 0.5736,  1.9144,  0.9253


Train Diffusion:  16%|█▌        | 792/5001 [42:17<3:02:06,  2.60s/it][A
Train Diffusion:  16%|█▌        | 793/5001 [42:20<3:01:13,  2.58s/it][A
Train Diffusion:  16%|█▌        | 794/5001 [42:22<3:00:58,  2.58s/it][A
Train Diffusion:  16%|█▌        | 795/5001 [42:25<3:01:02,  2.58s/it][A
Train Diffusion:  16%|█▌        | 796/5001 [42:27<3:00:38,  2.58s/it][A
Train Diffusion:  16%|█▌        | 797/5001 [42:30<3:00:26,  2.58s/it][A
Train Diffusion:  16%|█▌        | 798/5001 [42:32<3:00:39,  2.58s/it][A
Train Diffusion:  16%|█▌        | 799/5001 [42:35<3:00:20,  2.58s/it][A
Train Diffusion:  16%|█▌        | 800/5001 [42:38<3:00:26,  2.58s/it][A
Train Diffusion:  16%|█▌        | 801/5001 [42:40<3:00:23,  2.58s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 327326553.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7051, 0.4856, 1.2924],
        [8.7904, 0.5017, 1.2982],
        [8.8288, 0.4933, 1.2829]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.5156e+00, 8.3121e-01, 5.5017e+00],
         [7.7219e-01, 8.6836e-01, 1.5162e+00],
         [5.9617e-01, 7.1646e-01, 1.3171e+00],
         ...,
         [2.3830e+00, 2.6132e-01, 5.9107e-01],
         [8.7334e-03, 3.0454e+00, 2.0998e+00],
         [2.0590e+01, 1.7147e+00, 1.8584e+00]],

        [[8.9312e-01, 9.5967e-01, 7.8422e-01],
         [8.5118e-01, 5.8446e-01, 1.9225e+00],
         [2.7504e+01, 2.4138e-01, 1.2012e+00],
         ...,
         [3.9471e-01, 1.7629e+00, 3.3279e-01],
         [7.2629e+00, 1.9831e+00, 4.4209e-01],
         [7.7088e-01, 2.2315e+00, 1.4174e+00]],

        [[4.9811e-01, 9.5294e-01, 8.3932e-01],
         [1.1708e+01, 7.5165e-01, 7.3502e-01],
         [1.7513e+00, 4.2994e-01, 7.2


Train Diffusion:  16%|█▌        | 802/5001 [42:43<3:00:35,  2.58s/it][A
Train Diffusion:  16%|█▌        | 803/5001 [42:45<3:00:14,  2.58s/it][A
Train Diffusion:  16%|█▌        | 804/5001 [42:48<2:59:49,  2.57s/it][A
Train Diffusion:  16%|█▌        | 805/5001 [42:50<2:59:39,  2.57s/it][A
Train Diffusion:  16%|█▌        | 806/5001 [42:53<2:59:56,  2.57s/it][A
Train Diffusion:  16%|█▌        | 807/5001 [42:56<3:00:53,  2.59s/it][A
Train Diffusion:  16%|█▌        | 808/5001 [42:58<3:00:24,  2.58s/it][A
Train Diffusion:  16%|█▌        | 809/5001 [43:01<3:00:13,  2.58s/it][A
Train Diffusion:  16%|█▌        | 810/5001 [43:03<3:00:13,  2.58s/it][A
Train Diffusion:  16%|█▌        | 811/5001 [43:06<3:00:07,  2.58s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 328775942.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6692, 0.5148, 1.3212],
        [8.7392, 0.4874, 1.3291],
        [8.8375, 0.5061, 1.2976]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.5893,  0.8210,  4.2008],
         [ 0.9569,  0.7647,  1.0829],
         [ 0.6395,  0.7907,  0.7034],
         ...,
         [20.4878,  0.3755,  0.6121],
         [ 4.4403,  0.0830,  3.7165],
         [ 1.4858,  1.1402,  1.4208]],

        [[ 0.7502,  0.9613,  0.8231],
         [ 0.6883,  0.6861,  1.9770],
         [24.4824,  0.1580,  1.2442],
         ...,
         [ 0.2028,  0.5511,  1.2373],
         [26.9795,  0.2013,  1.2197],
         [ 4.4714,  0.6908,  7.2669]],

        [[ 0.5655,  0.9524,  0.9272],
         [12.8858,  0.5721,  1.0520],
         [ 2.1004,  0.3862,  0.7971],
         ...,
         [ 0.7715,  0.3216,  2.9042],
         [ 0.2026,  0.6461,  0.6939],
         [21.0996,  1.4231,  1.7617


Train Diffusion:  16%|█▌        | 812/5001 [43:09<2:59:56,  2.58s/it][A
Train Diffusion:  16%|█▋        | 813/5001 [43:11<3:01:08,  2.60s/it][A
Train Diffusion:  16%|█▋        | 814/5001 [43:14<3:01:24,  2.60s/it][A
Train Diffusion:  16%|█▋        | 815/5001 [43:16<3:00:40,  2.59s/it][A
Train Diffusion:  16%|█▋        | 816/5001 [43:19<2:59:55,  2.58s/it][A
Train Diffusion:  16%|█▋        | 817/5001 [43:21<2:59:49,  2.58s/it][A
Train Diffusion:  16%|█▋        | 818/5001 [43:24<2:59:34,  2.58s/it][A
Train Diffusion:  16%|█▋        | 819/5001 [43:27<2:59:44,  2.58s/it][A
Train Diffusion:  16%|█▋        | 820/5001 [43:29<2:59:34,  2.58s/it][A
Train Diffusion:  16%|█▋        | 821/5001 [43:32<2:59:48,  2.58s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 330507548.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8570, 0.4780, 1.2886],
        [8.5854, 0.5025, 1.2906],
        [8.7245, 0.5087, 1.3192]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.4439,  0.9526,  0.9136],
         [ 7.3868,  0.7738,  0.8284],
         [ 1.2897,  0.5082,  0.7869],
         ...,
         [ 1.1847,  0.6100,  0.8560],
         [ 0.4800,  0.6109,  1.0346],
         [26.3644,  1.1133,  1.9974]],

        [[ 1.2441,  0.8974,  0.8269],
         [ 1.4230,  0.3913,  1.8694],
         [28.5303,  0.1507,  1.1901],
         ...,
         [35.8769,  0.1191,  1.1237],
         [ 5.1064,  0.0811,  0.3175],
         [ 2.3375,  0.5948,  1.0185]],

        [[ 1.2217,  0.9020,  3.7212],
         [ 2.2502,  0.8009,  1.5066],
         [ 1.0547,  0.6841,  0.8236],
         ...,
         [ 2.6721,  0.1826,  1.0056],
         [20.4247,  0.3406,  1.0700],
         [ 3.3448,  0.9228,  1.1398


Train Diffusion:  16%|█▋        | 822/5001 [43:34<2:59:43,  2.58s/it][A
Train Diffusion:  16%|█▋        | 823/5001 [43:37<2:59:22,  2.58s/it][A
Train Diffusion:  16%|█▋        | 824/5001 [43:40<2:59:04,  2.57s/it][A
Train Diffusion:  16%|█▋        | 825/5001 [43:42<2:59:08,  2.57s/it][A
Train Diffusion:  17%|█▋        | 826/5001 [43:45<2:59:02,  2.57s/it][A
Train Diffusion:  17%|█▋        | 827/5001 [43:47<2:59:29,  2.58s/it][A
Train Diffusion:  17%|█▋        | 828/5001 [43:50<2:59:10,  2.58s/it][A
Train Diffusion:  17%|█▋        | 829/5001 [43:52<3:00:52,  2.60s/it][A
Train Diffusion:  17%|█▋        | 830/5001 [43:56<3:13:42,  2.79s/it][A
Train Diffusion:  17%|█▋        | 831/5001 [43:58<3:09:11,  2.72s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 337246144.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7695, 0.4785, 1.3137],
        [8.8876, 0.5246, 1.2751],
        [8.7656, 0.4935, 1.2781]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.5934e+00, 8.0655e-01, 1.6962e+00],
         [1.5262e+00, 4.3974e-01, 7.6407e-01],
         [6.9233e-01, 7.5860e-01, 1.2755e+00],
         ...,
         [2.1412e-01, 5.1606e-01, 8.8520e-01],
         [5.4831e-06, 4.3788e-01, 2.8651e+00],
         [1.7868e-01, 1.1745e+00, 1.1696e+01]],

        [[5.8037e-01, 9.5310e-01, 9.3792e-01],
         [5.7111e-01, 8.1860e-01, 1.6819e+00],
         [2.6752e+01, 1.0365e-01, 1.1870e+00],
         ...,
         [4.3485e-01, 1.3676e-01, 3.4624e-01],
         [5.3125e+00, 4.5992e-02, 6.6510e+00],
         [1.6267e-01, 2.7595e+00, 1.9941e+00]],

        [[7.2958e-01, 9.6042e-01, 1.1098e+00],
         [1.3274e+01, 3.9972e-01, 1.2411e+00],
         [2.6588e+00, 3.5794e-01, 6.3


Train Diffusion:  17%|█▋        | 832/5001 [44:01<3:06:17,  2.68s/it][A
Train Diffusion:  17%|█▋        | 833/5001 [44:03<3:04:12,  2.65s/it][A
Train Diffusion:  17%|█▋        | 834/5001 [44:06<3:03:00,  2.64s/it][A
Train Diffusion:  17%|█▋        | 835/5001 [44:09<3:01:30,  2.61s/it][A
Train Diffusion:  17%|█▋        | 836/5001 [44:11<3:00:54,  2.61s/it][A
Train Diffusion:  17%|█▋        | 837/5001 [44:14<3:00:03,  2.59s/it][A
Train Diffusion:  17%|█▋        | 838/5001 [44:16<2:59:36,  2.59s/it][A
Train Diffusion:  17%|█▋        | 839/5001 [44:19<2:59:19,  2.59s/it][A
Train Diffusion:  17%|█▋        | 840/5001 [44:21<2:58:49,  2.58s/it][A
Train Diffusion:  17%|█▋        | 841/5001 [44:24<2:58:39,  2.58s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 331569337.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7258, 0.4971, 1.3474],
        [8.6020, 0.4917, 1.3142],
        [8.7879, 0.5091, 1.3023]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.6069e+00, 8.0815e-01, 4.0380e+00],
         [1.1882e+00, 6.1366e-01, 9.9085e-01],
         [6.3454e-01, 7.1380e-01, 1.2562e+00],
         ...,
         [2.9111e+01, 2.7805e-01, 9.9031e-01],
         [3.9546e+00, 2.3248e-01, 1.0678e+01],
         [4.7826e-01, 3.3066e+00, 1.8604e+00]],

        [[6.6634e-01, 9.5474e-01, 8.7792e-01],
         [6.0689e-01, 7.6245e-01, 1.9202e+00],
         [2.8008e+01, 1.6466e-01, 1.2038e+00],
         ...,
         [4.4113e-01, 5.2666e-01, 1.0132e+00],
         [1.1302e-06, 4.1181e-01, 3.4749e+00],
         [1.3611e-01, 1.2122e+00, 1.1236e+01]],

        [[6.2929e-01, 9.5310e-01, 8.7838e-01],
         [1.2505e+01, 6.4614e-01, 9.3596e-01],
         [2.0879e+00, 4.1634e-01, 7.5


Train Diffusion:  17%|█▋        | 842/5001 [44:27<2:58:23,  2.57s/it][A
Train Diffusion:  17%|█▋        | 843/5001 [44:29<2:58:21,  2.57s/it][A
Train Diffusion:  17%|█▋        | 844/5001 [44:32<2:58:42,  2.58s/it][A
Train Diffusion:  17%|█▋        | 845/5001 [44:34<2:58:15,  2.57s/it][A
Train Diffusion:  17%|█▋        | 846/5001 [44:37<3:00:17,  2.60s/it][A
Train Diffusion:  17%|█▋        | 847/5001 [44:40<3:05:11,  2.67s/it][A
Train Diffusion:  17%|█▋        | 848/5001 [44:43<3:10:37,  2.75s/it][A
Train Diffusion:  17%|█▋        | 849/5001 [44:46<3:10:28,  2.75s/it][A
Train Diffusion:  17%|█▋        | 850/5001 [44:48<3:08:32,  2.73s/it][A
Train Diffusion:  17%|█▋        | 851/5001 [44:51<3:07:25,  2.71s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 331774179.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7517, 0.5075, 1.3046],
        [8.5731, 0.5157, 1.3125],
        [8.7844, 0.4872, 1.3067]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.7891e-01, 9.5088e-01, 8.3831e-01],
         [1.1223e+01, 7.8630e-01, 1.1573e+00],
         [1.5300e+00, 5.0666e-01, 8.1454e-01],
         ...,
         [1.3164e+01, 3.5925e-01, 4.4292e-01],
         [5.0651e+00, 4.4678e-02, 6.5789e+00],
         [9.0399e-01, 2.1198e+00, 1.1314e+00]],

        [[9.5406e-01, 9.5017e-01, 7.9040e-01],
         [9.1542e-01, 5.5573e-01, 1.8167e+00],
         [2.6348e+01, 2.7768e-01, 1.1975e+00],
         ...,
         [1.7464e-01, 6.0010e-01, 1.1083e+00],
         [1.5722e-06, 5.1916e-01, 3.2911e+00],
         [1.3324e-01, 1.3378e+00, 1.1546e+01]],

        [[1.4755e+00, 8.3746e-01, 5.4206e+00],
         [7.8700e-01, 8.6258e-01, 1.4662e+00],
         [6.7612e-01, 8.0700e-01, 9.8


Train Diffusion:  17%|█▋        | 852/5001 [44:54<3:06:53,  2.70s/it][A
Train Diffusion:  17%|█▋        | 853/5001 [44:56<3:06:15,  2.69s/it][A
Train Diffusion:  17%|█▋        | 854/5001 [44:59<3:06:18,  2.70s/it][A
Train Diffusion:  17%|█▋        | 855/5001 [45:02<3:05:34,  2.69s/it][A
Train Diffusion:  17%|█▋        | 856/5001 [45:04<3:04:13,  2.67s/it][A
Train Diffusion:  17%|█▋        | 857/5001 [45:07<3:02:47,  2.65s/it][A
Train Diffusion:  17%|█▋        | 858/5001 [45:09<3:02:33,  2.64s/it][A
Train Diffusion:  17%|█▋        | 859/5001 [45:12<3:03:49,  2.66s/it][A
Train Diffusion:  17%|█▋        | 860/5001 [45:15<3:03:13,  2.65s/it][A
Train Diffusion:  17%|█▋        | 861/5001 [45:17<3:03:00,  2.65s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 321633139.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6891, 0.4942, 1.3210],
        [8.6634, 0.4948, 1.3069],
        [8.8912, 0.4862, 1.3251]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.5691,  0.8229,  2.9670],
         [ 0.9194,  0.7602,  0.8558],
         [ 0.2462,  4.9083,  2.1148],
         ...,
         [ 0.3667,  0.5857,  1.0497],
         [30.5547,  0.1093,  1.0332],
         [ 5.9545,  0.3036,  0.7980]],

        [[ 0.5405,  0.9520,  0.9836],
         [ 0.5378,  0.7063,  0.2156],
         [14.0093,  1.5482,  2.2403],
         ...,
         [25.9321,  0.0746,  1.3507],
         [ 5.6465,  0.1041,  0.8183],
         [23.8790,  1.3210,  2.0286]],

        [[ 0.7954,  0.9618,  1.0430],
         [11.1269,  0.5137,  1.1730],
         [ 1.8058,  0.6410,  1.4334],
         ...,
         [ 3.1005,  0.3653,  1.1083],
         [ 0.3235,  0.8770,  0.6575],
         [ 0.5115,  2.0725,  1.1782


Train Diffusion:  17%|█▋        | 862/5001 [45:20<3:01:56,  2.64s/it][A
Train Diffusion:  17%|█▋        | 863/5001 [45:23<3:01:30,  2.63s/it][A
Train Diffusion:  17%|█▋        | 864/5001 [45:25<3:00:49,  2.62s/it][A
Train Diffusion:  17%|█▋        | 865/5001 [45:28<3:00:23,  2.62s/it][A
Train Diffusion:  17%|█▋        | 866/5001 [45:30<3:00:13,  2.62s/it][A
Train Diffusion:  17%|█▋        | 867/5001 [45:33<2:59:56,  2.61s/it][A
Train Diffusion:  17%|█▋        | 868/5001 [45:36<2:59:23,  2.60s/it][A
Train Diffusion:  17%|█▋        | 869/5001 [45:38<2:59:57,  2.61s/it][A
Train Diffusion:  17%|█▋        | 870/5001 [45:41<3:00:46,  2.63s/it][A
Train Diffusion:  17%|█▋        | 871/5001 [45:44<3:00:50,  2.63s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 327150428.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7520, 0.4760, 1.2640],
        [8.8236, 0.4787, 1.3055],
        [8.7822, 0.4848, 1.2931]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[6.7820e-01, 9.5669e-01, 8.7002e-01],
         [6.1761e-01, 7.5260e-01, 1.8396e+00],
         [2.5993e+01, 2.7118e-01, 1.2031e+00],
         ...,
         [2.4634e+01, 5.1880e-01, 3.7930e-02],
         [1.3074e+00, 1.2027e+00, 1.5518e+00],
         [3.9507e+00, 2.7625e-01, 9.9935e-01]],

        [[1.6068e+00, 8.1097e-01, 4.1244e+00],
         [1.1613e+00, 6.5318e-01, 9.6850e-01],
         [6.6767e-01, 8.3233e-01, 1.0025e+00],
         ...,
         [4.5003e+00, 1.1111e-01, 6.4857e-01],
         [3.0828e-02, 6.7038e-01, 1.1670e+00],
         [1.3163e+01, 3.3218e+00, 6.1694e+00]],

        [[6.1744e-01, 9.5418e-01, 8.7679e-01],
         [1.2239e+01, 6.5417e-01, 1.0730e+00],
         [1.9593e+00, 4.4038e-01, 7.9


Train Diffusion:  17%|█▋        | 872/5001 [45:46<3:00:19,  2.62s/it][A
Train Diffusion:  17%|█▋        | 873/5001 [45:49<2:59:56,  2.62s/it][A
Train Diffusion:  17%|█▋        | 874/5001 [45:51<2:59:27,  2.61s/it][A
Train Diffusion:  17%|█▋        | 875/5001 [45:54<2:59:05,  2.60s/it][A
Train Diffusion:  18%|█▊        | 876/5001 [45:57<3:07:50,  2.73s/it][A
Train Diffusion:  18%|█▊        | 877/5001 [46:00<3:07:30,  2.73s/it][A
Train Diffusion:  18%|█▊        | 878/5001 [46:02<3:04:40,  2.69s/it][A
Train Diffusion:  18%|█▊        | 879/5001 [46:05<3:03:08,  2.67s/it][A
Train Diffusion:  18%|█▊        | 880/5001 [46:08<3:01:14,  2.64s/it][A
Train Diffusion:  18%|█▊        | 881/5001 [46:10<3:00:37,  2.63s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 327387811.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7989, 0.5057, 1.3130],
        [8.6634, 0.5096, 1.2882],
        [8.8944, 0.5054, 1.2903]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.6547,  0.9563,  0.8885],
         [ 0.6002,  0.7704,  1.9163],
         [26.9288,  0.1226,  1.2182],
         ...,
         [23.7254,  0.6005,  1.0703],
         [ 2.4188,  0.9034,  0.9815],
         [ 0.7782,  1.9462,  9.5423]],

        [[ 0.6385,  0.9555,  0.9047],
         [12.5324,  0.6184,  1.0056],
         [ 2.1084,  0.4115,  0.6739],
         ...,
         [ 1.3222,  1.5947,  1.1404],
         [ 0.3846,  0.6764,  4.7024],
         [ 0.0478,  2.6999,  2.2581]],

        [[ 1.6115,  0.8089,  3.8275],
         [ 1.2105,  0.5978,  0.9454],
         [ 0.6631,  0.7356,  0.7161],
         ...,
         [ 0.7727,  2.2174,  7.6924],
         [ 9.3925,  0.0553,  1.8587],
         [ 5.4252,  0.4312,  1.1727


Train Diffusion:  18%|█▊        | 882/5001 [46:13<3:00:06,  2.62s/it][A
Train Diffusion:  18%|█▊        | 883/5001 [46:15<2:59:22,  2.61s/it][A
Train Diffusion:  18%|█▊        | 884/5001 [46:18<2:58:35,  2.60s/it][A
Train Diffusion:  18%|█▊        | 885/5001 [46:20<2:58:21,  2.60s/it][A
Train Diffusion:  18%|█▊        | 886/5001 [46:23<2:58:02,  2.60s/it][A
Train Diffusion:  18%|█▊        | 887/5001 [46:26<2:57:57,  2.60s/it][A
Train Diffusion:  18%|█▊        | 888/5001 [46:28<2:57:19,  2.59s/it][A
Train Diffusion:  18%|█▊        | 889/5001 [46:31<2:57:12,  2.59s/it][A
Train Diffusion:  18%|█▊        | 890/5001 [46:33<2:57:29,  2.59s/it][A
Train Diffusion:  18%|█▊        | 891/5001 [46:36<2:57:31,  2.59s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 333322489.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7067, 0.5025, 1.3132],
        [8.6696, 0.5121, 1.3323],
        [8.7683, 0.5157, 1.2789]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.7500,  0.9593,  0.8214],
         [ 0.6703,  0.6983,  1.8695],
         [26.6723,  0.2650,  1.2045],
         ...,
         [32.1737,  0.0551,  1.2761],
         [ 5.9906,  0.0841,  0.7664],
         [27.3782,  1.0788,  1.9990]],

        [[ 0.5660,  0.9504,  0.7703],
         [11.6889,  0.8659,  0.8709],
         [ 1.7212,  0.4257,  0.7535],
         ...,
         [ 0.3167,  0.7186,  0.7129],
         [15.8439,  0.3745,  1.0433],
         [ 3.3792,  0.6817,  1.0602]],

        [[ 1.5874,  0.8157,  5.0733],
         [ 1.0360,  0.6532,  1.1969],
         [ 0.6032,  0.7588,  1.1637],
         ...,
         [ 3.5363,  0.1635,  0.7595],
         [ 3.2148,  0.2696,  1.0755],
         [ 2.7861,  1.1095,  1.2998


Train Diffusion:  18%|█▊        | 892/5001 [46:39<2:57:10,  2.59s/it][A
Train Diffusion:  18%|█▊        | 893/5001 [46:41<2:57:13,  2.59s/it][A
Train Diffusion:  18%|█▊        | 894/5001 [46:44<2:57:07,  2.59s/it][A
Train Diffusion:  18%|█▊        | 895/5001 [46:46<2:57:03,  2.59s/it][A
Train Diffusion:  18%|█▊        | 896/5001 [46:49<2:57:11,  2.59s/it][A
Train Diffusion:  18%|█▊        | 897/5001 [46:52<2:56:54,  2.59s/it][A
Train Diffusion:  18%|█▊        | 898/5001 [46:54<2:57:01,  2.59s/it][A
Train Diffusion:  18%|█▊        | 899/5001 [46:57<2:56:53,  2.59s/it][A
Train Diffusion:  18%|█▊        | 900/5001 [46:59<2:56:33,  2.58s/it][A
Train Diffusion:  18%|█▊        | 901/5001 [47:02<2:56:58,  2.59s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 320028768.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7460, 0.5052, 1.2907],
        [8.8682, 0.4901, 1.2950],
        [8.5491, 0.5201, 1.3003]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.6078,  0.8076,  3.7135],
         [ 1.2231,  0.5618,  0.8854],
         [ 0.7188,  0.7110,  0.9128],
         ...,
         [ 4.7191,  0.5062,  0.1841],
         [ 4.7353,  0.1120,  0.6253],
         [ 0.5723,  2.0130,  0.7599]],

        [[ 0.6499,  0.9549,  0.9117],
         [12.1691,  0.6111,  1.0333],
         [ 2.0410,  0.4343,  0.7364],
         ...,
         [ 0.1624,  0.6117,  0.8291],
         [36.9325,  0.1372,  1.1357],
         [ 5.8708,  0.2833,  1.2563]],

        [[ 0.6453,  0.9547,  0.8948],
         [ 0.5916,  0.7744,  1.8547],
         [28.2868,  0.1132,  1.2022],
         ...,
         [ 0.6350,  0.5748,  2.1446],
         [ 0.2240,  0.8786,  0.6964],
         [22.5662,  1.3816,  2.0420


Train Diffusion:  18%|█▊        | 902/5001 [47:04<2:56:54,  2.59s/it][A
Train Diffusion:  18%|█▊        | 903/5001 [47:07<2:56:42,  2.59s/it][A
Train Diffusion:  18%|█▊        | 904/5001 [47:10<2:56:20,  2.58s/it][A
Train Diffusion:  18%|█▊        | 905/5001 [47:12<2:57:06,  2.59s/it][A
Train Diffusion:  18%|█▊        | 906/5001 [47:15<2:57:11,  2.60s/it][A
Train Diffusion:  18%|█▊        | 907/5001 [47:17<2:56:58,  2.59s/it][A
Train Diffusion:  18%|█▊        | 908/5001 [47:20<2:56:32,  2.59s/it][A
Train Diffusion:  18%|█▊        | 909/5001 [47:23<2:56:14,  2.58s/it][A
Train Diffusion:  18%|█▊        | 910/5001 [47:25<2:56:23,  2.59s/it][A
Train Diffusion:  18%|█▊        | 911/5001 [47:28<2:56:19,  2.59s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 336668185.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8105, 0.5118, 1.2632],
        [8.8817, 0.4980, 1.3154],
        [8.6777, 0.5075, 1.2991]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.5578e+00, 8.1928e-01, 1.1749e+00],
         [1.5420e+00, 4.2296e-01, 7.7826e-01],
         [4.1191e+00, 6.4703e-01, 1.0398e+00],
         ...,
         [2.7877e+01, 7.4720e-02, 1.2993e+00],
         [5.8496e+00, 6.8533e-02, 2.5632e+00],
         [7.2238e+00, 2.0525e+00, 1.8225e+00]],

        [[8.2432e-01, 9.6418e-01, 1.2443e+00],
         [1.2760e+01, 3.4969e-01, 1.2842e+00],
         [2.9976e+00, 3.7835e-01, 7.5926e-01],
         ...,
         [4.7556e-01, 8.2862e-01, 8.6952e-01],
         [1.1599e-04, 5.5648e-01, 3.0927e+00],
         [2.1285e-01, 1.1954e+00, 9.4257e+00]],

        [[5.2618e-01, 9.5415e-01, 9.8029e-01],
         [5.2740e-01, 8.0165e-01, 1.1883e+00],
         [1.4735e+01, 3.8618e-01, 1.0


Train Diffusion:  18%|█▊        | 912/5001 [47:30<2:56:36,  2.59s/it][A
Train Diffusion:  18%|█▊        | 913/5001 [47:33<2:56:32,  2.59s/it][A
Train Diffusion:  18%|█▊        | 914/5001 [47:36<2:56:26,  2.59s/it][A
Train Diffusion:  18%|█▊        | 915/5001 [47:38<2:56:33,  2.59s/it][A
Train Diffusion:  18%|█▊        | 916/5001 [47:41<2:56:18,  2.59s/it][A
Train Diffusion:  18%|█▊        | 917/5001 [47:43<2:56:04,  2.59s/it][A
Train Diffusion:  18%|█▊        | 918/5001 [47:46<2:56:21,  2.59s/it][A
Train Diffusion:  18%|█▊        | 919/5001 [47:49<2:56:41,  2.60s/it][A
Train Diffusion:  18%|█▊        | 920/5001 [47:51<2:58:08,  2.62s/it][A
Train Diffusion:  18%|█▊        | 921/5001 [47:54<2:58:20,  2.62s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 323684022.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7737, 0.4965, 1.3078],
        [8.6372, 0.4888, 1.3110],
        [8.8419, 0.4993, 1.3259]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.5256,  0.8288,  5.5538],
         [ 0.8367,  0.3926,  1.1542],
         [ 0.9429,  0.7610,  0.5001],
         ...,
         [ 3.7294,  0.1005,  0.9685],
         [16.5053,  0.3347,  1.0593],
         [ 1.8726,  2.1978,  1.3463]],

        [[ 0.5046,  0.9538,  0.7201],
         [ 9.8867,  1.1199,  1.9109],
         [ 0.9935,  1.2134,  0.9203],
         ...,
         [10.8477,  0.4374,  1.8906],
         [ 1.8870,  0.2369,  0.0720],
         [ 4.1145,  0.4179,  0.7573]],

        [[ 0.8768,  0.9617,  0.7854],
         [ 0.7974,  0.6110,  1.4975],
         [24.0469,  0.2054,  1.1873],
         ...,
         [ 4.9676,  0.3885,  2.4184],
         [ 0.9900,  0.3320,  0.5235],
         [25.3373,  1.4955,  1.8678


Train Diffusion:  18%|█▊        | 922/5001 [47:56<2:57:43,  2.61s/it][A
Train Diffusion:  18%|█▊        | 923/5001 [47:59<3:06:17,  2.74s/it][A
Train Diffusion:  18%|█▊        | 924/5001 [48:02<3:06:28,  2.74s/it][A
Train Diffusion:  18%|█▊        | 925/5001 [48:05<3:02:59,  2.69s/it][A
Train Diffusion:  19%|█▊        | 926/5001 [48:07<3:00:36,  2.66s/it][A
Train Diffusion:  19%|█▊        | 927/5001 [48:10<2:58:55,  2.64s/it][A
Train Diffusion:  19%|█▊        | 928/5001 [48:13<2:58:11,  2.62s/it][A
Train Diffusion:  19%|█▊        | 929/5001 [48:15<2:57:15,  2.61s/it][A
Train Diffusion:  19%|█▊        | 930/5001 [48:18<2:56:46,  2.61s/it][A
Train Diffusion:  19%|█▊        | 931/5001 [48:20<2:56:06,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 312700486.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6823, 0.4867, 1.2828],
        [8.8562, 0.4830, 1.3176],
        [8.8023, 0.4846, 1.2593]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.5710e+00, 8.2405e-01, 4.9304e+00],
         [9.3615e-01, 7.4138e-01, 1.2561e+00],
         [5.9207e-01, 7.3484e-01, 1.2878e+00],
         ...,
         [3.5175e+01, 1.0244e-01, 1.1787e+00],
         [5.3650e+00, 1.2736e-01, 6.7333e-01],
         [1.1965e+00, 1.9807e+00, 8.9532e+00]],

        [[7.9102e-01, 9.6439e-01, 8.0343e-01],
         [7.1691e-01, 6.7525e-01, 1.9520e+00],
         [2.6922e+01, 2.5729e-01, 1.2075e+00],
         ...,
         [2.6224e+00, 1.9125e-01, 9.0229e-01],
         [4.8759e-01, 5.7422e-01, 1.2760e+00],
         [1.8213e-03, 1.8909e+00, 3.2133e+00]],

        [[5.4238e-01, 9.5470e-01, 8.3482e-01],
         [1.2211e+01, 7.3667e-01, 8.0971e-01],
         [1.8848e+00, 4.1403e-01, 7.2


Train Diffusion:  19%|█▊        | 932/5001 [48:23<2:55:52,  2.59s/it][A
Train Diffusion:  19%|█▊        | 933/5001 [48:25<2:55:18,  2.59s/it][A
Train Diffusion:  19%|█▊        | 934/5001 [48:28<2:55:14,  2.59s/it][A
Train Diffusion:  19%|█▊        | 935/5001 [48:31<2:55:22,  2.59s/it][A
Train Diffusion:  19%|█▊        | 936/5001 [48:33<2:55:20,  2.59s/it][A
Train Diffusion:  19%|█▊        | 937/5001 [48:36<2:55:13,  2.59s/it][A
Train Diffusion:  19%|█▉        | 938/5001 [48:38<2:55:09,  2.59s/it][A
Train Diffusion:  19%|█▉        | 939/5001 [48:41<2:54:57,  2.58s/it][A
Train Diffusion:  19%|█▉        | 940/5001 [48:44<2:54:53,  2.58s/it][A
Train Diffusion:  19%|█▉        | 941/5001 [48:46<2:54:41,  2.58s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 331932896.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7473, 0.5115, 1.2703],
        [8.8583, 0.4738, 1.3010],
        [8.7421, 0.5115, 1.3464]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[8.2623e-01, 9.6198e-01, 1.2065e+00],
         [1.1537e+01, 3.4699e-01, 1.2779e+00],
         [2.7684e+00, 3.1942e-01, 1.2633e+00],
         ...,
         [3.8597e+00, 1.6786e-01, 8.9142e-01],
         [4.4038e-01, 3.1711e+00, 3.4939e+00],
         [3.8916e-01, 4.6427e+00, 1.0083e+00]],

        [[5.2545e-01, 9.5201e-01, 9.2636e-01],
         [7.1265e-01, 7.7298e-01, 1.1501e+00],
         [1.2764e+01, 1.1617e-01, 8.7648e-01],
         ...,
         [3.2193e+01, 8.1264e-02, 1.2496e+00],
         [5.5965e+00, 6.5072e-02, 7.2180e+00],
         [7.8022e-01, 3.6069e+00, 1.2415e+00]],

        [[1.5538e+00, 8.1667e-01, 6.4697e-01],
         [1.8006e+00, 3.5181e-01, 6.9480e-01],
         [2.0673e-03, 7.9935e-01, 1.2


Train Diffusion:  19%|█▉        | 942/5001 [48:49<2:54:27,  2.58s/it][A
Train Diffusion:  19%|█▉        | 943/5001 [48:51<2:54:59,  2.59s/it][A
Train Diffusion:  19%|█▉        | 944/5001 [48:54<2:54:38,  2.58s/it][A
Train Diffusion:  19%|█▉        | 945/5001 [48:56<2:54:43,  2.58s/it][A
Train Diffusion:  19%|█▉        | 946/5001 [48:59<2:54:42,  2.59s/it][A
Train Diffusion:  19%|█▉        | 947/5001 [49:02<2:54:54,  2.59s/it][A
Train Diffusion:  19%|█▉        | 948/5001 [49:04<2:54:26,  2.58s/it][A
Train Diffusion:  19%|█▉        | 949/5001 [49:07<2:54:20,  2.58s/it][A
Train Diffusion:  19%|█▉        | 950/5001 [49:09<2:54:16,  2.58s/it][A
Train Diffusion:  19%|█▉        | 951/5001 [49:12<2:54:54,  2.59s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 334675996.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6885, 0.5080, 1.3294],
        [8.6668, 0.4872, 1.3034],
        [8.7796, 0.5166, 1.2743]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.5732e+00, 8.2171e-01, 5.1339e+00],
         [8.9694e-01, 7.5161e-01, 1.2635e+00],
         [6.1864e-01, 7.4806e-01, 1.2050e+00],
         ...,
         [5.7669e+00, 5.0448e+00, 8.3037e-01],
         [8.0582e+00, 6.2404e-01, 5.9651e-01],
         [4.7918e+00, 2.5306e-01, 1.9936e+00]],

        [[5.4405e-01, 9.5098e-01, 8.1703e-01],
         [1.1786e+01, 7.6772e-01, 8.7264e-01],
         [1.7878e+00, 4.1979e-01, 7.8152e-01],
         ...,
         [6.6209e+00, 2.0522e-01, 7.8380e-01],
         [4.7733e+00, 7.4280e-02, 1.4790e+00],
         [4.3566e-03, 1.7595e+00, 3.2446e+00]],

        [[7.8736e-01, 9.6106e-01, 8.0178e-01],
         [7.2022e-01, 6.6170e-01, 1.9001e+00],
         [2.7467e+01, 2.4077e-01, 1.1


Train Diffusion:  19%|█▉        | 952/5001 [49:15<2:54:54,  2.59s/it][A
Train Diffusion:  19%|█▉        | 953/5001 [49:17<2:54:30,  2.59s/it][A
Train Diffusion:  19%|█▉        | 954/5001 [49:20<2:54:22,  2.59s/it][A
Train Diffusion:  19%|█▉        | 955/5001 [49:22<2:54:40,  2.59s/it][A
Train Diffusion:  19%|█▉        | 956/5001 [49:25<2:54:04,  2.58s/it][A
Train Diffusion:  19%|█▉        | 957/5001 [49:28<2:54:24,  2.59s/it][A
Train Diffusion:  19%|█▉        | 958/5001 [49:30<2:54:05,  2.58s/it][A
Train Diffusion:  19%|█▉        | 959/5001 [49:33<2:54:27,  2.59s/it][A
Train Diffusion:  19%|█▉        | 960/5001 [49:35<2:54:21,  2.59s/it][A
Train Diffusion:  19%|█▉        | 961/5001 [49:38<2:54:17,  2.59s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 336189145.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7420, 0.4813, 1.2951],
        [8.9292, 0.4868, 1.2910],
        [8.7474, 0.4769, 1.2960]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.2849e-01, 9.5470e-01, 9.0717e-01],
         [1.1133e+01, 6.0784e-01, 8.8725e-01],
         [1.7867e+00, 4.6584e-01, 7.2136e-01],
         ...,
         [2.8226e+01, 1.0899e-01, 1.2782e+00],
         [5.2505e+00, 1.3322e-01, 6.0051e-01],
         [8.1583e-01, 1.9203e+00, 4.7585e+00]],

        [[1.5605e+00, 8.3146e-01, 5.1527e+00],
         [8.2005e-01, 8.4947e-01, 1.1892e+00],
         [8.1781e-01, 6.9263e-01, 1.3320e+00],
         ...,
         [2.4206e-01, 5.5029e-01, 1.0963e+00],
         [3.7037e+01, 8.1393e-02, 1.3603e+00],
         [6.1555e+00, 1.5855e-01, 3.5119e+00]],

        [[8.1810e-01, 9.6471e-01, 7.9832e-01],
         [7.8987e-01, 6.1103e-01, 1.9120e+00],
         [2.8243e+01, 2.0011e-01, 1.1


Train Diffusion:  19%|█▉        | 962/5001 [49:40<2:54:42,  2.60s/it][A
Train Diffusion:  19%|█▉        | 963/5001 [49:43<2:57:52,  2.64s/it][A
Train Diffusion:  19%|█▉        | 964/5001 [49:46<2:56:55,  2.63s/it][A
Train Diffusion:  19%|█▉        | 965/5001 [49:48<2:55:54,  2.62s/it][A
Train Diffusion:  19%|█▉        | 966/5001 [49:51<2:55:00,  2.60s/it][A
Train Diffusion:  19%|█▉        | 967/5001 [49:54<2:54:44,  2.60s/it][A
Train Diffusion:  19%|█▉        | 968/5001 [49:56<2:54:26,  2.60s/it][A
Train Diffusion:  19%|█▉        | 969/5001 [49:59<2:54:12,  2.59s/it][A
Train Diffusion:  19%|█▉        | 970/5001 [50:01<2:56:01,  2.62s/it][A
Train Diffusion:  19%|█▉        | 971/5001 [50:04<3:02:08,  2.71s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 338482508.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6563, 0.5137, 1.2469],
        [8.6177, 0.5142, 1.2877],
        [8.9702, 0.4954, 1.3119]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.5921,  0.8143,  4.8525],
         [ 1.0649,  0.6729,  1.0990],
         [ 0.6466,  0.8219,  0.9939],
         ...,
         [ 1.0212,  0.6712,  1.2455],
         [ 1.3727,  1.1152,  1.1068],
         [ 0.3295,  3.8062,  1.0827]],

        [[ 0.5740,  0.9506,  0.8002],
         [11.6759,  0.8017,  1.0380],
         [ 1.7194,  0.4533,  0.7758],
         ...,
         [ 0.1793,  0.5477,  0.4773],
         [36.4471,  0.4380,  0.3015],
         [ 5.3775,  0.3068,  1.0709]],

        [[ 0.7371,  0.9585,  0.8294],
         [ 0.6597,  0.7122,  1.8236],
         [25.1724,  0.2971,  1.2108],
         ...,
         [ 2.2316,  0.1778,  1.0381],
         [ 4.4370,  0.1364,  0.4927],
         [21.7294,  1.8167,  1.9425


Train Diffusion:  19%|█▉        | 972/5001 [50:07<2:59:50,  2.68s/it][A
Train Diffusion:  19%|█▉        | 973/5001 [50:10<2:57:39,  2.65s/it][A
Train Diffusion:  19%|█▉        | 974/5001 [50:12<2:56:59,  2.64s/it][A
Train Diffusion:  19%|█▉        | 975/5001 [50:15<2:55:49,  2.62s/it][A
Train Diffusion:  20%|█▉        | 976/5001 [50:17<2:54:38,  2.60s/it][A
Train Diffusion:  20%|█▉        | 977/5001 [50:20<2:54:04,  2.60s/it][A
Train Diffusion:  20%|█▉        | 978/5001 [50:22<2:53:30,  2.59s/it][A
Train Diffusion:  20%|█▉        | 979/5001 [50:25<2:53:24,  2.59s/it][A
Train Diffusion:  20%|█▉        | 980/5001 [50:28<2:53:38,  2.59s/it][A
Train Diffusion:  20%|█▉        | 981/5001 [50:30<2:53:43,  2.59s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 331845918.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6420, 0.4933, 1.3174],
        [8.5567, 0.5072, 1.2752],
        [8.8387, 0.5106, 1.3194]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.9109,  0.9566,  0.7829],
         [ 0.8596,  0.5862,  1.7636],
         [25.9596,  0.2884,  1.1985],
         ...,
         [ 3.2048,  0.1279,  0.9478],
         [33.8347,  0.1735,  0.9212],
         [ 4.2451,  1.1120,  1.3758]],

        [[ 0.4922,  0.9521,  0.8301],
         [11.3807,  0.7827,  1.2909],
         [ 1.5334,  0.5352,  0.8502],
         ...,
         [ 6.7480,  0.3136,  2.3563],
         [ 1.1831,  0.2809,  0.9932],
         [22.7382,  1.3260,  2.0319]],

        [[ 1.5037,  0.8322,  5.3472],
         [ 0.7850,  0.8440,  1.3916],
         [ 0.6700,  0.8428,  0.8917],
         ...,
         [ 0.9447,  0.8362,  4.1395],
         [ 0.7618,  1.5978,  0.7669],
         [ 0.7418,  1.4497,  0.8096


Train Diffusion:  20%|█▉        | 982/5001 [50:33<2:53:36,  2.59s/it][A
Train Diffusion:  20%|█▉        | 983/5001 [50:35<2:53:10,  2.59s/it][A
Train Diffusion:  20%|█▉        | 984/5001 [50:38<2:53:11,  2.59s/it][A
Train Diffusion:  20%|█▉        | 985/5001 [50:41<2:53:26,  2.59s/it][A
Train Diffusion:  20%|█▉        | 986/5001 [50:43<2:53:15,  2.59s/it][A
Train Diffusion:  20%|█▉        | 987/5001 [50:46<2:53:03,  2.59s/it][A
Train Diffusion:  20%|█▉        | 988/5001 [50:48<2:53:02,  2.59s/it][A
Train Diffusion:  20%|█▉        | 989/5001 [50:51<2:52:58,  2.59s/it][A
Train Diffusion:  20%|█▉        | 990/5001 [50:54<2:55:53,  2.63s/it][A
Train Diffusion:  20%|█▉        | 991/5001 [50:56<2:55:04,  2.62s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 343533548.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8629, 0.4941, 1.2873],
        [8.6919, 0.5120, 1.3010],
        [8.7887, 0.4924, 1.3386]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.5683,  0.8187,  5.1997],
         [ 1.1011,  0.0887,  0.8843],
         [ 1.2537,  0.6493,  0.7386],
         ...,
         [ 0.4408,  0.5296,  1.1597],
         [17.5331,  0.3464,  1.1433],
         [ 2.8610,  0.9980,  4.1523]],

        [[ 0.7996,  0.9626,  0.8012],
         [ 0.6901,  0.6928,  1.3403],
         [14.7520,  0.3809,  1.2720],
         ...,
         [30.2410,  0.2361,  0.8861],
         [ 4.9277,  0.0497,  1.2638],
         [ 2.6564,  0.5573,  1.0869]],

        [[ 0.5378,  0.9527,  0.6498],
         [ 8.7942,  1.3441,  2.1183],
         [ 0.7512,  0.7227,  0.8991],
         ...,
         [ 1.1184,  0.3410,  2.0338],
         [ 0.2355,  0.6834,  0.8412],
         [24.0127,  1.1361,  1.9600


Train Diffusion:  20%|█▉        | 992/5001 [50:59<2:54:31,  2.61s/it][A
Train Diffusion:  20%|█▉        | 993/5001 [51:01<2:53:44,  2.60s/it][A
Train Diffusion:  20%|█▉        | 994/5001 [51:04<2:53:27,  2.60s/it][A
Train Diffusion:  20%|█▉        | 995/5001 [51:07<2:53:08,  2.59s/it][A
Train Diffusion:  20%|█▉        | 996/5001 [51:09<2:52:45,  2.59s/it][A
Train Diffusion:  20%|█▉        | 997/5001 [51:12<2:53:16,  2.60s/it][A
Train Diffusion:  20%|█▉        | 998/5001 [51:14<2:53:07,  2.60s/it][A
Train Diffusion:  20%|█▉        | 999/5001 [51:17<2:52:50,  2.59s/it][A
Train Diffusion:  20%|█▉        | 1000/5001 [51:20<2:52:33,  2.59s/it][A
Train Diffusion:  20%|██        | 1001/5001 [51:22<2:52:17,  2.58s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 312260008.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8485, 0.5022, 1.3113],
        [8.7006, 0.4914, 1.3054],
        [8.5131, 0.5125, 1.3191]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.5996,  0.8062,  2.4011],
         [ 1.3527,  0.5320,  0.7781],
         [ 0.7229,  0.6919,  0.7732],
         ...,
         [31.9840,  0.0862,  1.2443],
         [ 5.5789,  0.0478,  0.6715],
         [ 2.1379,  0.6573,  1.0010]],

        [[ 0.5942,  0.9496,  0.9324],
         [ 0.5599,  0.7987,  1.7567],
         [26.9515,  0.2270,  1.2003],
         ...,
         [ 0.3154,  7.4000,  0.7029],
         [18.6617,  0.3500,  1.0865],
         [ 3.5691,  0.7389,  3.9517]],

        [[ 0.7089,  0.9550,  1.0490],
         [12.3688,  0.4759,  1.1957],
         [ 2.2668,  0.4884,  1.1531],
         ...,
         [ 4.0907,  0.1618,  0.6959],
         [ 0.5742,  0.6374,  1.0210],
         [23.5928,  1.1430,  1.9442


Train Diffusion:  20%|██        | 1002/5001 [51:25<2:52:06,  2.58s/it][A
Train Diffusion:  20%|██        | 1003/5001 [51:27<2:52:17,  2.59s/it][A
Train Diffusion:  20%|██        | 1004/5001 [51:30<2:52:14,  2.59s/it][A
Train Diffusion:  20%|██        | 1005/5001 [51:32<2:52:25,  2.59s/it][A
Train Diffusion:  20%|██        | 1006/5001 [51:35<2:52:34,  2.59s/it][A
Train Diffusion:  20%|██        | 1007/5001 [51:38<2:52:03,  2.58s/it][A
Train Diffusion:  20%|██        | 1008/5001 [51:40<2:51:57,  2.58s/it][A
Train Diffusion:  20%|██        | 1009/5001 [51:43<2:51:54,  2.58s/it][A
Train Diffusion:  20%|██        | 1010/5001 [51:45<2:51:40,  2.58s/it][A
Train Diffusion:  20%|██        | 1011/5001 [51:48<2:52:14,  2.59s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 322858136.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7797, 0.4966, 1.3048],
        [8.8373, 0.4756, 1.3014],
        [8.7437, 0.5217, 1.2977]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.4842e-01, 9.5331e-01, 7.5467e-01],
         [1.1247e+01, 9.2926e-01, 8.7083e-01],
         [1.5682e+00, 4.5729e-01, 7.7345e-01],
         ...,
         [3.7546e+00, 1.8664e-01, 9.1649e-01],
         [5.0245e-01, 5.2202e-01, 1.1718e+00],
         [3.1083e-01, 2.3579e+00, 1.9029e+00]],

        [[7.7912e-01, 9.6326e-01, 8.0705e-01],
         [7.0636e-01, 6.6468e-01, 1.8576e+00],
         [2.7202e+01, 2.5502e-01, 1.1987e+00],
         ...,
         [5.5470e-01, 5.8474e-01, 9.7925e-01],
         [1.2120e-04, 5.1022e-01, 2.3666e+00],
         [1.7580e+01, 1.8213e+00, 1.9079e+00]],

        [[1.5779e+00, 8.2231e-01, 5.3888e+00],
         [9.3735e-01, 6.2195e-01, 1.2361e+00],
         [6.6308e-01, 7.3884e-01, 1.1


Train Diffusion:  20%|██        | 1012/5001 [51:51<2:51:51,  2.58s/it][A
Train Diffusion:  20%|██        | 1013/5001 [51:53<2:51:48,  2.58s/it][A
Train Diffusion:  20%|██        | 1014/5001 [51:56<2:52:13,  2.59s/it][A
Train Diffusion:  20%|██        | 1015/5001 [51:58<2:52:17,  2.59s/it][A
Train Diffusion:  20%|██        | 1016/5001 [52:01<2:51:55,  2.59s/it][A
Train Diffusion:  20%|██        | 1017/5001 [52:04<2:51:58,  2.59s/it][A
Train Diffusion:  20%|██        | 1018/5001 [52:06<2:53:19,  2.61s/it][A
Train Diffusion:  20%|██        | 1019/5001 [52:09<2:55:52,  2.65s/it][A
Train Diffusion:  20%|██        | 1020/5001 [52:12<2:55:22,  2.64s/it][A
Train Diffusion:  20%|██        | 1021/5001 [52:14<2:54:26,  2.63s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 336687971.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6873, 0.4892, 1.2876],
        [8.6171, 0.5126, 1.3128],
        [8.8609, 0.4862, 1.3077]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.5907,  0.8157,  4.5833],
         [ 1.0691,  0.7134,  1.0642],
         [ 0.6496,  0.8403,  0.9759],
         ...,
         [ 0.4569,  0.6583,  0.9643],
         [34.1956,  0.1157,  1.3053],
         [ 5.4323,  0.4696,  1.0882]],

        [[ 0.7377,  0.9598,  0.8301],
         [ 0.6617,  0.7179,  1.8317],
         [26.0640,  0.2826,  1.2015],
         ...,
         [34.6441,  0.1519,  1.1546],
         [ 4.7090,  0.2061,  0.8002],
         [ 0.4224,  1.4405,  8.2720]],

        [[ 0.5743,  0.9519,  0.8448],
         [12.0676,  0.7162,  1.0635],
         [ 1.8278,  0.4601,  0.8083],
         ...,
         [ 3.5660,  0.1650,  0.8600],
         [ 0.5461,  0.4382,  3.2887],
         [ 0.0675,  2.1789,  3.0851


Train Diffusion:  20%|██        | 1022/5001 [52:17<2:53:19,  2.61s/it][A
Train Diffusion:  20%|██        | 1023/5001 [52:19<2:52:40,  2.60s/it][A
Train Diffusion:  20%|██        | 1024/5001 [52:22<2:52:22,  2.60s/it][A
Train Diffusion:  20%|██        | 1025/5001 [52:24<2:51:45,  2.59s/it][A
Train Diffusion:  21%|██        | 1026/5001 [52:27<2:52:31,  2.60s/it][A
Train Diffusion:  21%|██        | 1027/5001 [52:30<2:52:15,  2.60s/it][A
Train Diffusion:  21%|██        | 1028/5001 [52:32<2:52:06,  2.60s/it][A
Train Diffusion:  21%|██        | 1029/5001 [52:35<2:52:22,  2.60s/it][A
Train Diffusion:  21%|██        | 1030/5001 [52:37<2:51:43,  2.59s/it][A
Train Diffusion:  21%|██        | 1031/5001 [52:40<2:51:37,  2.59s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 309282857.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.9132, 0.4915, 1.2867],
        [8.6503, 0.4801, 1.3189],
        [8.7473, 0.4854, 1.3162]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.0975,  0.9271,  0.8815],
         [ 1.1255,  0.4837,  1.9453],
         [28.1135,  0.2182,  1.1935],
         ...,
         [ 3.8975,  0.1511,  0.9557],
         [34.6785,  0.2122,  0.8839],
         [ 4.6762,  0.8061,  6.6242]],

        [[ 0.4502,  0.9553,  0.9060],
         [11.0672,  0.6893,  1.0002],
         [ 1.6942,  0.4339,  0.9035],
         ...,
         [31.6266,  0.1151,  1.2273],
         [ 5.3751,  0.1388,  0.9129],
         [16.9821,  1.4221,  1.9444]],

        [[ 1.3652,  0.8647,  4.8595],
         [ 0.8252,  0.9898,  1.6176],
         [ 0.6543,  0.7678,  1.1687],
         ...,
         [ 0.5817,  0.6346,  0.7207],
         [ 0.4781,  0.6155,  0.7061],
         [ 0.6523,  1.4874,  0.9194


Train Diffusion:  21%|██        | 1032/5001 [52:43<2:51:48,  2.60s/it][A
Train Diffusion:  21%|██        | 1033/5001 [52:45<2:51:13,  2.59s/it][A
Train Diffusion:  21%|██        | 1034/5001 [52:48<2:51:00,  2.59s/it][A
Train Diffusion:  21%|██        | 1035/5001 [52:50<2:51:07,  2.59s/it][A
Train Diffusion:  21%|██        | 1036/5001 [52:53<2:50:50,  2.59s/it][A
Train Diffusion:  21%|██        | 1037/5001 [52:56<2:51:05,  2.59s/it][A
Train Diffusion:  21%|██        | 1038/5001 [52:58<2:50:45,  2.59s/it][A
Train Diffusion:  21%|██        | 1039/5001 [53:01<2:50:35,  2.58s/it][A
Train Diffusion:  21%|██        | 1040/5001 [53:03<2:50:55,  2.59s/it][A
Train Diffusion:  21%|██        | 1041/5001 [53:06<2:50:41,  2.59s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 337417788.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7662, 0.4931, 1.2814],
        [8.9897, 0.5106, 1.3067],
        [8.6245, 0.4983, 1.2922]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.6056,  0.8096,  3.9079],
         [ 1.1809,  0.6102,  0.9177],
         [ 0.6988,  0.7850,  1.1133],
         ...,
         [ 0.6683, 10.0823,  0.6793],
         [ 1.1379,  1.2170,  1.4661],
         [ 0.3802,  1.5445,  7.4573]],

        [[ 0.6637,  0.9563,  0.8802],
         [ 0.6060,  0.7589,  1.8519],
         [26.9966,  0.2253,  1.2020],
         ...,
         [ 1.0994,  0.6751,  7.4309],
         [ 7.2028,  0.1286,  0.8881],
         [ 5.4398,  0.4031,  0.8914]],

        [[ 0.6321,  0.9548,  0.8971],
         [12.2017,  0.6265,  1.0570],
         [ 2.0095,  0.4192,  0.7852],
         ...,
         [ 0.0973,  1.3750,  3.4457],
         [ 0.2023,  0.4701,  1.1943],
         [12.6407,  1.5975,  1.9834


Train Diffusion:  21%|██        | 1042/5001 [53:09<2:50:47,  2.59s/it][A
Train Diffusion:  21%|██        | 1043/5001 [53:11<2:50:52,  2.59s/it][A
Train Diffusion:  21%|██        | 1044/5001 [53:14<2:51:15,  2.60s/it][A
Train Diffusion:  21%|██        | 1045/5001 [53:16<2:50:54,  2.59s/it][A
Train Diffusion:  21%|██        | 1046/5001 [53:19<2:50:44,  2.59s/it][A
Train Diffusion:  21%|██        | 1047/5001 [53:21<2:51:00,  2.59s/it][A
Train Diffusion:  21%|██        | 1048/5001 [53:24<2:50:27,  2.59s/it][A
Train Diffusion:  21%|██        | 1049/5001 [53:27<2:50:08,  2.58s/it][A
Train Diffusion:  21%|██        | 1050/5001 [53:29<2:50:16,  2.59s/it][A
Train Diffusion:  21%|██        | 1051/5001 [53:32<2:50:21,  2.59s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 314240700.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6588, 0.5230, 1.2813],
        [8.6710, 0.5080, 1.2937],
        [8.8958, 0.4835, 1.2926]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.5494,  0.9524,  0.9191],
         [ 0.4986,  0.8133,  1.2344],
         [20.9453,  0.3123,  1.0578],
         ...,
         [ 5.5295,  0.2564,  0.4192],
         [ 5.1201,  0.0774,  0.9946],
         [ 0.6172,  1.9344,  1.1401]],

        [[ 0.7797,  0.9623,  1.1641],
         [13.0569,  0.3323,  1.2769],
         [ 2.9603,  0.3567,  0.9172],
         ...,
         [ 0.6202,  1.1999,  2.6449],
         [ 0.1576,  0.9297,  0.7945],
         [23.1556,  1.4320,  1.9264]],

        [[ 1.5766,  0.8081,  0.9254],
         [ 1.7763,  0.3526,  0.6904],
         [ 0.6098,  0.8020,  1.2533],
         ...,
         [ 0.1661,  0.5891,  1.0289],
         [29.2082,  0.1506,  0.9198],
         [ 5.5619,  0.3592,  1.1322


Train Diffusion:  21%|██        | 1052/5001 [53:34<2:50:00,  2.58s/it][A
Train Diffusion:  21%|██        | 1053/5001 [53:37<2:49:56,  2.58s/it][A
Train Diffusion:  21%|██        | 1054/5001 [53:40<2:50:25,  2.59s/it][A
Train Diffusion:  21%|██        | 1055/5001 [53:42<2:50:18,  2.59s/it][A
Train Diffusion:  21%|██        | 1056/5001 [53:45<2:50:09,  2.59s/it][A
Train Diffusion:  21%|██        | 1057/5001 [53:47<2:49:55,  2.59s/it][A
Train Diffusion:  21%|██        | 1058/5001 [53:50<2:49:57,  2.59s/it][A
Train Diffusion:  21%|██        | 1059/5001 [53:53<2:49:54,  2.59s/it][A
Train Diffusion:  21%|██        | 1060/5001 [53:55<2:49:57,  2.59s/it][A
Train Diffusion:  21%|██        | 1061/5001 [53:58<2:50:06,  2.59s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 335013110.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7615, 0.5106, 1.2727],
        [8.6581, 0.5114, 1.2472],
        [8.8381, 0.4901, 1.3159]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.6906,  0.9545,  0.8592],
         [ 0.6246,  0.7481,  1.8921],
         [25.1474,  0.2932,  1.2158],
         ...,
         [ 0.3059,  0.0367,  1.2888],
         [ 0.3494,  0.5643,  0.8481],
         [25.1879,  1.2578,  2.0050]],

        [[ 1.6012,  0.8092,  4.2547],
         [ 1.1524,  0.6405,  1.0188],
         [ 0.6333,  0.7741,  1.1626],
         ...,
         [ 4.1144,  0.1560,  1.0607],
         [36.4178,  0.1539,  1.2328],
         [ 4.9463,  0.7089,  1.2499]],

        [[ 0.6088,  0.9509,  0.8613],
         [12.3305,  0.6742,  0.9711],
         [ 2.0011,  0.3964,  0.7386],
         ...,
         [31.4287,  0.1891,  0.9380],
         [ 4.5208,  0.1978,  0.7605],
         [ 0.8527,  1.3424,  0.8830


Train Diffusion:  21%|██        | 1062/5001 [54:00<2:49:49,  2.59s/it][A
Train Diffusion:  21%|██▏       | 1063/5001 [54:03<2:51:42,  2.62s/it][A
Train Diffusion:  21%|██▏       | 1064/5001 [54:06<2:50:52,  2.60s/it][A
Train Diffusion:  21%|██▏       | 1065/5001 [54:08<2:52:23,  2.63s/it][A
Train Diffusion:  21%|██▏       | 1066/5001 [54:11<2:51:42,  2.62s/it][A
Train Diffusion:  21%|██▏       | 1067/5001 [54:14<2:54:28,  2.66s/it][A
Train Diffusion:  21%|██▏       | 1068/5001 [54:16<2:52:57,  2.64s/it][A
Train Diffusion:  21%|██▏       | 1069/5001 [54:19<2:51:37,  2.62s/it][A
Train Diffusion:  21%|██▏       | 1070/5001 [54:21<2:50:52,  2.61s/it][A
Train Diffusion:  21%|██▏       | 1071/5001 [54:24<2:50:31,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 329171075.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6702, 0.4910, 1.2775],
        [8.8518, 0.4611, 1.3124],
        [8.8627, 0.5060, 1.3189]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.6033e+00, 8.3087e-01, 4.4001e+00],
         [8.2167e-01, 8.2748e-01, 1.0033e+00],
         [9.8232e-01, 6.4294e-01, 7.6343e-01],
         ...,
         [2.6190e+01, 4.2983e-01, 3.8003e-01],
         [4.5716e+00, 4.4636e-02, 2.2033e+00],
         [2.6402e+00, 6.6711e-01, 1.1771e+00]],

        [[7.0329e-01, 9.5849e-01, 9.1756e-01],
         [9.7626e+00, 5.9274e-01, 1.0877e+00],
         [1.7736e+00, 5.2087e-01, 1.0953e+00],
         ...,
         [3.1545e-01, 5.1779e-01, 1.0844e+00],
         [4.2825e-03, 5.0987e-01, 2.7491e+00],
         [2.2559e-01, 1.4958e+00, 7.5542e+00]],

        [[5.9737e-01, 9.5356e-01, 9.3209e-01],
         [6.0360e-01, 6.6193e-01, 1.8732e+00],
         [2.8965e+01, 1.0574e-01, 1.1


Train Diffusion:  21%|██▏       | 1072/5001 [54:26<2:49:59,  2.60s/it][A
Train Diffusion:  21%|██▏       | 1073/5001 [54:29<2:49:32,  2.59s/it][A
Train Diffusion:  21%|██▏       | 1074/5001 [54:32<2:49:51,  2.60s/it][A
Train Diffusion:  21%|██▏       | 1075/5001 [54:34<2:49:27,  2.59s/it][A
Train Diffusion:  22%|██▏       | 1076/5001 [54:37<2:49:25,  2.59s/it][A
Train Diffusion:  22%|██▏       | 1077/5001 [54:39<2:49:13,  2.59s/it][A
Train Diffusion:  22%|██▏       | 1078/5001 [54:42<2:48:59,  2.58s/it][A
Train Diffusion:  22%|██▏       | 1079/5001 [54:45<2:49:13,  2.59s/it][A
Train Diffusion:  22%|██▏       | 1080/5001 [54:47<2:50:05,  2.60s/it][A
Train Diffusion:  22%|██▏       | 1081/5001 [54:50<2:49:51,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 333758124.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8986, 0.4908, 1.3388],
        [8.7543, 0.4953, 1.2942],
        [8.6539, 0.4961, 1.2703]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[6.1374e-01, 9.5398e-01, 8.6029e-01],
         [1.2140e+01, 6.8214e-01, 9.5205e-01],
         [1.9702e+00, 4.2307e-01, 8.0084e-01],
         ...,
         [3.1687e+01, 8.0546e-02, 1.2558e+00],
         [5.5830e+00, 4.7273e-02, 6.2829e+00],
         [1.4568e+00, 1.4479e+00, 1.2575e+00]],

        [[1.6068e+00, 8.1067e-01, 4.2639e+00],
         [1.1602e+00, 6.1076e-01, 9.9400e-01],
         [6.6013e-01, 7.3021e-01, 6.6181e-01],
         ...,
         [3.8589e+00, 1.7882e-01, 8.8153e-01],
         [4.5655e-01, 1.7506e+00, 8.3805e+00],
         [4.5827e-01, 3.7547e+00, 1.7263e+00]],

        [[6.8323e-01, 9.5692e-01, 8.6711e-01],
         [6.2062e-01, 7.5336e-01, 1.8903e+00],
         [2.8230e+01, 9.3738e-02, 1.2


Train Diffusion:  22%|██▏       | 1082/5001 [54:52<2:49:26,  2.59s/it][A
Train Diffusion:  22%|██▏       | 1083/5001 [54:55<2:49:23,  2.59s/it][A
Train Diffusion:  22%|██▏       | 1084/5001 [54:58<2:49:19,  2.59s/it][A
Train Diffusion:  22%|██▏       | 1085/5001 [55:00<2:49:18,  2.59s/it][A
Train Diffusion:  22%|██▏       | 1086/5001 [55:03<2:49:27,  2.60s/it][A
Train Diffusion:  22%|██▏       | 1087/5001 [55:05<2:49:16,  2.60s/it][A
Train Diffusion:  22%|██▏       | 1088/5001 [55:08<2:48:55,  2.59s/it][A
Train Diffusion:  22%|██▏       | 1089/5001 [55:11<2:48:42,  2.59s/it][A
Train Diffusion:  22%|██▏       | 1090/5001 [55:13<2:49:27,  2.60s/it][A
Train Diffusion:  22%|██▏       | 1091/5001 [55:16<2:49:27,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 325808284.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6787, 0.4970, 1.3214],
        [8.7309, 0.4967, 1.2780],
        [8.7361, 0.5203, 1.2966]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.5165,  0.9521,  0.9568],
         [ 9.4404,  0.5418,  1.1660],
         [ 1.6811,  0.5252,  1.0554],
         ...,
         [ 0.4275,  8.3436,  0.6988],
         [17.4901,  0.2978,  1.0435],
         [ 3.5381,  1.0364,  5.5484]],

        [[ 1.5427,  0.8290,  5.9593],
         [ 0.8601,  1.2214,  1.3757],
         [ 0.2821,  8.8987,  2.8696],
         ...,
         [ 0.1625,  1.2755,  3.6451],
         [ 0.2984,  0.4423,  0.7589],
         [20.9942,  1.1403,  1.9248]],

        [[ 0.8471,  0.9621,  0.7920],
         [ 0.8656,  0.5445,  0.3016],
         [17.1548,  1.1698,  8.9078],
         ...,
         [ 0.1988,  1.1765,  8.9855],
         [ 0.4094,  0.7135,  1.1815],
         [ 2.4472,  0.8363,  0.8709


Train Diffusion:  22%|██▏       | 1092/5001 [55:18<2:48:55,  2.59s/it][A
Train Diffusion:  22%|██▏       | 1093/5001 [55:21<2:48:36,  2.59s/it][A
Train Diffusion:  22%|██▏       | 1094/5001 [55:24<2:48:23,  2.59s/it][A
Train Diffusion:  22%|██▏       | 1095/5001 [55:26<2:48:20,  2.59s/it][A
Train Diffusion:  22%|██▏       | 1096/5001 [55:29<2:48:18,  2.59s/it][A
Train Diffusion:  22%|██▏       | 1097/5001 [55:31<2:48:12,  2.59s/it][A
Train Diffusion:  22%|██▏       | 1098/5001 [55:34<2:48:21,  2.59s/it][A
Train Diffusion:  22%|██▏       | 1099/5001 [55:36<2:48:28,  2.59s/it][A
Train Diffusion:  22%|██▏       | 1100/5001 [55:39<2:48:32,  2.59s/it][A
Train Diffusion:  22%|██▏       | 1101/5001 [55:42<2:49:16,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 318068457.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6655, 0.5010, 1.3129],
        [8.6862, 0.4924, 1.2722],
        [9.0245, 0.4926, 1.3029]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5791e-01, 9.5593e-01, 1.0784e+00],
         [4.9720e-01, 6.8883e-01, 3.5769e-01],
         [1.5344e+01, 1.1329e+00, 7.1100e+00],
         ...,
         [4.1691e+00, 1.2782e-01, 7.1595e-01],
         [6.7569e-01, 6.7747e-01, 1.8017e+00],
         [9.0342e+00, 2.5220e+00, 1.7503e+00]],

        [[1.4043e+00, 8.5423e-01, 3.0707e+00],
         [8.4619e-01, 9.3758e-01, 9.4850e-01],
         [9.2087e-01, 7.2507e-01, 1.3936e+00],
         ...,
         [2.2468e+01, 1.7887e-01, 1.1017e+00],
         [5.1235e+00, 2.4413e-02, 5.0358e+00],
         [2.6054e+00, 7.7267e-01, 1.3683e+00]],

        [[1.0494e+00, 9.3785e-01, 1.6289e+00],
         [6.7298e+00, 6.8807e-01, 1.1934e+00],
         [1.4422e+00, 7.8665e-01, 1.6


Train Diffusion:  22%|██▏       | 1102/5001 [55:44<2:49:08,  2.60s/it][A
Train Diffusion:  22%|██▏       | 1103/5001 [55:47<2:49:00,  2.60s/it][A
Train Diffusion:  22%|██▏       | 1104/5001 [55:49<2:48:33,  2.60s/it][A
Train Diffusion:  22%|██▏       | 1105/5001 [55:52<2:48:19,  2.59s/it][A
Train Diffusion:  22%|██▏       | 1106/5001 [55:55<2:47:57,  2.59s/it][A
Train Diffusion:  22%|██▏       | 1107/5001 [55:57<2:48:17,  2.59s/it][A
Train Diffusion:  22%|██▏       | 1108/5001 [56:00<2:48:17,  2.59s/it][A
Train Diffusion:  22%|██▏       | 1109/5001 [56:02<2:48:08,  2.59s/it][A
Train Diffusion:  22%|██▏       | 1110/5001 [56:05<2:47:58,  2.59s/it][A
Train Diffusion:  22%|██▏       | 1111/5001 [56:08<2:47:49,  2.59s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 344583404.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8236, 0.5005, 1.2820],
        [8.6452, 0.5036, 1.2872],
        [8.7349, 0.5192, 1.3067]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.6054,  0.8079,  3.0863],
         [ 1.2435,  0.5872,  0.8233],
         [ 0.7296,  0.7884,  1.0351],
         ...,
         [ 1.6368,  0.3929,  0.9279],
         [ 0.3052,  6.3200,  0.6529],
         [ 7.1338,  1.6865,  2.2614]],

        [[ 0.6262,  0.9527,  0.9089],
         [ 0.5787,  0.7697,  1.8132],
         [27.0916,  0.1778,  1.2045],
         ...,
         [20.6373,  0.0819,  1.4510],
         [ 5.6170,  0.1061,  0.5860],
         [ 0.8868,  1.6517,  9.3625]],

        [[ 0.6704,  0.9547,  0.9804],
         [12.3383,  0.5178,  1.1450],
         [ 2.1370,  0.4136,  0.7929],
         ...,
         [ 0.1778,  0.6505,  0.5785],
         [34.6347,  0.2851,  0.9464],
         [ 5.6230,  0.2884,  0.9198


Train Diffusion:  22%|██▏       | 1112/5001 [56:10<2:49:10,  2.61s/it][A
Train Diffusion:  22%|██▏       | 1113/5001 [56:13<2:48:36,  2.60s/it][A
Train Diffusion:  22%|██▏       | 1114/5001 [56:16<2:51:44,  2.65s/it][A
Train Diffusion:  22%|██▏       | 1115/5001 [56:18<2:50:22,  2.63s/it][A
Train Diffusion:  22%|██▏       | 1116/5001 [56:21<2:49:25,  2.62s/it][A
Train Diffusion:  22%|██▏       | 1117/5001 [56:23<2:49:05,  2.61s/it][A
Train Diffusion:  22%|██▏       | 1118/5001 [56:26<2:48:20,  2.60s/it][A
Train Diffusion:  22%|██▏       | 1119/5001 [56:29<2:47:50,  2.59s/it][A
Train Diffusion:  22%|██▏       | 1120/5001 [56:31<2:47:12,  2.59s/it][A
Train Diffusion:  22%|██▏       | 1121/5001 [56:34<2:48:17,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 334592556.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8554, 0.4857, 1.2760],
        [8.6461, 0.5000, 1.3224],
        [8.6459, 0.5056, 1.3034]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.5956,  0.8052,  2.2214],
         [ 1.4550,  0.4888,  0.7728],
         [ 0.7349,  0.6894,  0.8297],
         ...,
         [ 0.2713,  4.7521,  0.5027],
         [12.9589,  1.0536,  0.9127],
         [ 2.9661,  0.3706,  5.5157]],

        [[ 0.7217,  0.9569,  1.0604],
         [12.2170,  0.4730,  1.2053],
         [ 2.2594,  0.4651,  0.8485],
         ...,
         [28.2396,  0.0498,  1.3358],
         [ 5.9714,  0.0789,  0.5490],
         [ 2.9101,  2.5393,  1.0681]],

        [[ 0.5851,  0.9502,  0.9333],
         [ 0.5713,  0.8389,  1.6799],
         [28.3211,  0.0860,  1.2023],
         ...,
         [ 4.1663,  0.1427,  0.6299],
         [ 6.6829,  8.7039,  3.6095],
         [ 0.3305,  3.1910,  3.2497


Train Diffusion:  22%|██▏       | 1122/5001 [56:36<2:47:33,  2.59s/it][A
Train Diffusion:  22%|██▏       | 1123/5001 [56:39<2:47:29,  2.59s/it][A
Train Diffusion:  22%|██▏       | 1124/5001 [56:41<2:47:13,  2.59s/it][A
Train Diffusion:  22%|██▏       | 1125/5001 [56:44<2:46:55,  2.58s/it][A
Train Diffusion:  23%|██▎       | 1126/5001 [56:47<2:46:57,  2.59s/it][A
Train Diffusion:  23%|██▎       | 1127/5001 [56:49<2:46:43,  2.58s/it][A
Train Diffusion:  23%|██▎       | 1128/5001 [56:52<2:46:58,  2.59s/it][A
Train Diffusion:  23%|██▎       | 1129/5001 [56:54<2:46:45,  2.58s/it][A
Train Diffusion:  23%|██▎       | 1130/5001 [56:57<2:47:04,  2.59s/it][A
Train Diffusion:  23%|██▎       | 1131/5001 [57:00<2:47:13,  2.59s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 329431616.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7421, 0.4914, 1.3139],
        [8.6557, 0.4994, 1.3242],
        [8.8212, 0.4996, 1.2963]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.5791e+00, 8.0770e-01, 8.3405e-01],
         [1.7641e+00, 3.6177e-01, 6.8038e-01],
         [3.8081e-02, 7.7168e-01, 1.2713e+00],
         ...,
         [1.9281e+00, 2.3255e-01, 1.0145e+00],
         [3.8519e+01, 7.7810e-02, 1.3469e+00],
         [5.9704e+00, 2.9834e-01, 1.0966e+00]],

        [[7.7545e-01, 9.6308e-01, 1.1615e+00],
         [1.3131e+01, 3.3089e-01, 1.2779e+00],
         [2.9625e+00, 3.0601e-01, 1.2403e+00],
         ...,
         [1.4293e+00, 6.9173e-01, 1.8874e+00],
         [1.8324e+00, 5.2681e-01, 7.9735e-01],
         [2.0675e+00, 2.0586e+00, 7.9800e+00]],

        [[5.5104e-01, 9.5327e-01, 9.2568e-01],
         [5.2428e-01, 7.9404e-01, 1.3240e+00],
         [2.0955e+01, 2.1444e-01, 9.9


Train Diffusion:  23%|██▎       | 1132/5001 [57:02<2:46:59,  2.59s/it][A
Train Diffusion:  23%|██▎       | 1133/5001 [57:05<2:46:51,  2.59s/it][A
Train Diffusion:  23%|██▎       | 1134/5001 [57:07<2:46:43,  2.59s/it][A
Train Diffusion:  23%|██▎       | 1135/5001 [57:10<2:46:23,  2.58s/it][A
Train Diffusion:  23%|██▎       | 1136/5001 [57:12<2:45:57,  2.58s/it][A
Train Diffusion:  23%|██▎       | 1137/5001 [57:15<2:46:55,  2.59s/it][A
Train Diffusion:  23%|██▎       | 1138/5001 [57:18<2:46:41,  2.59s/it][A
Train Diffusion:  23%|██▎       | 1139/5001 [57:20<2:46:57,  2.59s/it][A
Train Diffusion:  23%|██▎       | 1140/5001 [57:23<2:46:25,  2.59s/it][A
Train Diffusion:  23%|██▎       | 1141/5001 [57:25<2:46:17,  2.58s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 319656224.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6082, 0.4812, 1.2893],
        [8.8049, 0.4838, 1.3159],
        [8.7426, 0.4966, 1.2736]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.6337,  0.9538,  0.8989],
         [12.4224,  0.6193,  0.9727],
         [ 2.0871,  0.4284,  0.7317],
         ...,
         [10.0709,  0.7441,  1.1374],
         [ 1.4596,  0.9707,  1.0874],
         [ 2.3190,  0.7823,  1.1944]],

        [[ 0.6623,  0.9551,  0.8799],
         [ 0.6052,  0.7526,  1.9007],
         [27.9248,  0.1917,  1.2013],
         ...,
         [ 2.1596,  0.2349,  1.0158],
         [26.0432,  0.1566,  1.0893],
         [ 4.2331,  0.7938,  0.8195]],

        [[ 1.6045,  0.8095,  3.8909],
         [ 1.1685,  0.6053,  0.9464],
         [ 0.6716,  0.7066,  1.3156],
         ...,
         [ 0.6428,  0.2334,  0.4695],
         [ 4.5137,  0.0854,  0.8316],
         [28.2199,  1.0752,  2.0026


Train Diffusion:  23%|██▎       | 1142/5001 [57:28<2:46:28,  2.59s/it][A
Train Diffusion:  23%|██▎       | 1143/5001 [57:31<2:46:21,  2.59s/it][A
Train Diffusion:  23%|██▎       | 1144/5001 [57:33<2:46:27,  2.59s/it][A
Train Diffusion:  23%|██▎       | 1145/5001 [57:36<2:46:11,  2.59s/it][A
Train Diffusion:  23%|██▎       | 1146/5001 [57:38<2:46:01,  2.58s/it][A
Train Diffusion:  23%|██▎       | 1147/5001 [57:41<2:46:15,  2.59s/it][A
Train Diffusion:  23%|██▎       | 1148/5001 [57:44<2:45:54,  2.58s/it][A
Train Diffusion:  23%|██▎       | 1149/5001 [57:46<2:46:39,  2.60s/it][A
Train Diffusion:  23%|██▎       | 1150/5001 [57:49<2:46:16,  2.59s/it][A
Train Diffusion:  23%|██▎       | 1151/5001 [57:51<2:46:10,  2.59s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 319191996.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7640, 0.4862, 1.3088],
        [8.7535, 0.4937, 1.3073],
        [8.7621, 0.4800, 1.2921]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.6054,  0.9533,  0.9254],
         [ 0.5690,  0.8058,  1.8367],
         [27.4318,  0.2234,  1.1995],
         ...,
         [ 2.1939,  0.4320,  0.9178],
         [ 3.5551,  0.1835,  0.7397],
         [ 0.5078,  2.0125,  0.8840]],

        [[ 1.6034,  0.8071,  2.8623],
         [ 1.3283,  0.5665,  0.8320],
         [ 0.6960,  0.7161,  1.3920],
         ...,
         [ 0.3995,  0.5187,  1.0414],
         [38.2052,  0.0842,  1.3269],
         [ 6.0232,  0.2743,  2.0786]],

        [[ 0.6939,  0.9572,  0.9979],
         [12.7550,  0.5013,  1.1455],
         [ 2.3035,  0.4146,  0.7000],
         ...,
         [ 0.3473,  0.4805,  3.2005],
         [ 0.3097,  0.5315,  0.9582],
         [21.6250,  1.3891,  2.0095


Train Diffusion:  23%|██▎       | 1152/5001 [57:54<2:45:57,  2.59s/it][A
Train Diffusion:  23%|██▎       | 1153/5001 [57:56<2:45:55,  2.59s/it][A
Train Diffusion:  23%|██▎       | 1154/5001 [57:59<2:45:54,  2.59s/it][A
Train Diffusion:  23%|██▎       | 1155/5001 [58:02<2:46:30,  2.60s/it][A
Train Diffusion:  23%|██▎       | 1156/5001 [58:04<2:45:54,  2.59s/it][A
Train Diffusion:  23%|██▎       | 1157/5001 [58:07<2:45:39,  2.59s/it][A
Train Diffusion:  23%|██▎       | 1158/5001 [58:09<2:45:27,  2.58s/it][A
Train Diffusion:  23%|██▎       | 1159/5001 [58:12<2:45:45,  2.59s/it][A
Train Diffusion:  23%|██▎       | 1160/5001 [58:15<2:48:57,  2.64s/it][A
Train Diffusion:  23%|██▎       | 1161/5001 [58:17<2:50:01,  2.66s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 325981238.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8435, 0.5043, 1.2862],
        [8.6875, 0.4972, 1.3063],
        [8.8367, 0.4939, 1.3048]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.5971,  0.9517,  0.9306],
         [ 0.5662,  0.8197,  1.7619],
         [27.4550,  0.1762,  1.2052],
         ...,
         [ 2.1990,  0.1934,  1.0451],
         [21.7937,  1.3502,  3.3815],
         [ 4.1054,  3.5306,  2.0453]],

        [[ 1.6022,  0.8057,  2.5348],
         [ 1.3742,  0.5331,  0.7889],
         [ 0.7221,  0.7287,  1.2394],
         ...,
         [22.2826,  0.4968,  0.8409],
         [ 1.0078,  2.9147,  0.6298],
         [ 0.3296,  1.3589,  9.6998]],

        [[ 0.7045,  0.9568,  1.0307],
         [12.5402,  0.4828,  1.1855],
         [ 2.2597,  0.4132,  0.7165],
         ...,
         [ 1.3061,  0.1821,  0.5791],
         [ 3.1648,  0.0549,  2.2111],
         [ 1.2481,  1.2033,  1.1063


Train Diffusion:  23%|██▎       | 1162/5001 [58:20<2:50:24,  2.66s/it][A
Train Diffusion:  23%|██▎       | 1163/5001 [58:23<2:48:42,  2.64s/it][A
Train Diffusion:  23%|██▎       | 1164/5001 [58:25<2:47:19,  2.62s/it][A
Train Diffusion:  23%|██▎       | 1165/5001 [58:28<2:47:00,  2.61s/it][A
Train Diffusion:  23%|██▎       | 1166/5001 [58:30<2:46:17,  2.60s/it][A
Train Diffusion:  23%|██▎       | 1167/5001 [58:33<2:46:21,  2.60s/it][A
Train Diffusion:  23%|██▎       | 1168/5001 [58:36<2:46:03,  2.60s/it][A
Train Diffusion:  23%|██▎       | 1169/5001 [58:38<2:45:29,  2.59s/it][A
Train Diffusion:  23%|██▎       | 1170/5001 [58:41<2:45:35,  2.59s/it][A
Train Diffusion:  23%|██▎       | 1171/5001 [58:43<2:45:40,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 321296054.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8225, 0.4824, 1.3584],
        [8.6400, 0.4804, 1.3215],
        [8.7072, 0.4910, 1.2987]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.6004,  0.8118,  4.3748],
         [ 1.0931,  0.6244,  1.0113],
         [ 0.6693,  0.7174,  1.2887],
         ...,
         [33.5914,  0.2674,  0.9300],
         [ 3.8369,  0.5611,  0.6635],
         [ 0.4240,  1.8135,  0.7967]],

        [[ 0.6077,  0.9523,  0.8596],
         [12.0738,  0.6770,  0.9005],
         [ 1.9525,  0.4267,  0.7488],
         ...,
         [ 0.5781,  0.6644,  0.9605],
         [30.7651,  0.1549,  1.2587],
         [ 4.7490,  0.7868,  1.2743]],

        [[ 0.6931,  0.9561,  0.8563],
         [ 0.6304,  0.7286,  1.8969],
         [27.7960,  0.2164,  1.1979],
         ...,
         [ 3.6362,  0.2124,  0.8430],
         [ 0.5705,  1.0288,  5.3604],
         [11.8987,  2.2112,  2.0692


Train Diffusion:  23%|██▎       | 1172/5001 [58:46<2:45:03,  2.59s/it][A
Train Diffusion:  23%|██▎       | 1173/5001 [58:49<2:45:05,  2.59s/it][A
Train Diffusion:  23%|██▎       | 1174/5001 [58:51<2:45:02,  2.59s/it][A
Train Diffusion:  23%|██▎       | 1175/5001 [58:54<2:45:02,  2.59s/it][A
Train Diffusion:  24%|██▎       | 1176/5001 [58:56<2:44:50,  2.59s/it][A
Train Diffusion:  24%|██▎       | 1177/5001 [58:59<2:45:05,  2.59s/it][A
Train Diffusion:  24%|██▎       | 1178/5001 [59:02<2:46:03,  2.61s/it][A
Train Diffusion:  24%|██▎       | 1179/5001 [59:04<2:45:59,  2.61s/it][A
Train Diffusion:  24%|██▎       | 1180/5001 [59:07<2:45:33,  2.60s/it][A
Train Diffusion:  24%|██▎       | 1181/5001 [59:09<2:45:57,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 333992230.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6325, 0.5190, 1.2665],
        [8.9132, 0.5295, 1.3290],
        [8.6805, 0.4967, 1.3183]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.5484,  0.8264,  4.7506],
         [ 0.8236,  0.8696,  1.1879],
         [ 0.6681,  0.8486,  0.9536],
         ...,
         [30.5915,  0.0688,  1.2772],
         [ 5.7793,  0.0878,  0.6223],
         [ 0.5256,  2.0228,  0.8334]],

        [[ 0.5205,  0.9524,  0.8996],
         [12.1384,  0.6256,  1.0993],
         [ 1.8271,  0.4800,  0.8383],
         ...,
         [ 4.0670,  0.1423,  0.6920],
         [ 0.3504,  2.5743,  0.6745],
         [20.9848,  1.5102,  2.0357]],

        [[ 0.8370,  0.9625,  0.7908],
         [ 0.7784,  0.6358,  1.8591],
         [26.5231,  0.2584,  1.1970],
         ...,
         [ 0.2342,  6.6887,  0.6201],
         [30.3429,  0.2056,  0.8187],
         [ 5.7906,  0.2968,  1.0319


Train Diffusion:  24%|██▎       | 1182/5001 [59:12<2:45:31,  2.60s/it][A
Train Diffusion:  24%|██▎       | 1183/5001 [59:15<2:45:54,  2.61s/it][A
Train Diffusion:  24%|██▎       | 1184/5001 [59:17<2:45:19,  2.60s/it][A
Train Diffusion:  24%|██▎       | 1185/5001 [59:20<2:45:32,  2.60s/it][A
Train Diffusion:  24%|██▎       | 1186/5001 [59:22<2:45:13,  2.60s/it][A
Train Diffusion:  24%|██▎       | 1187/5001 [59:25<2:44:54,  2.59s/it][A
Train Diffusion:  24%|██▍       | 1188/5001 [59:28<2:44:51,  2.59s/it][A
Train Diffusion:  24%|██▍       | 1189/5001 [59:30<2:44:21,  2.59s/it][A
Train Diffusion:  24%|██▍       | 1190/5001 [59:33<2:44:29,  2.59s/it][A
Train Diffusion:  24%|██▍       | 1191/5001 [59:35<2:44:28,  2.59s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 327140579.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7228, 0.4983, 1.3342],
        [8.5691, 0.4988, 1.2804],
        [8.9306, 0.4837, 1.3212]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.2697e+00, 8.9391e-01, 7.6316e-01],
         [1.4607e+00, 3.9218e-01, 1.8561e+00],
         [2.6550e+01, 2.7798e-01, 1.1929e+00],
         ...,
         [5.7178e-01, 4.5660e-01, 1.4383e+00],
         [2.0435e-05, 6.1024e-01, 3.3983e+00],
         [1.3821e-01, 1.3712e+00, 1.0443e+01]],

        [[4.4354e-01, 9.5629e-01, 9.2395e-01],
         [9.7477e+00, 6.8220e-01, 1.2852e+00],
         [1.5315e+00, 5.2141e-01, 8.3283e-01],
         ...,
         [2.0005e+00, 4.2821e-01, 1.0744e+00],
         [3.9903e-01, 9.6941e-01, 6.8292e+00],
         [1.1256e+00, 2.6422e+00, 2.4675e+00]],

        [[1.2002e+00, 9.0733e-01, 3.1533e+00],
         [1.3861e+00, 8.9144e-01, 1.5932e+00],
         [7.4718e-01, 8.4694e-01, 9.5


Train Diffusion:  24%|██▍       | 1192/5001 [59:38<2:44:40,  2.59s/it][A
Train Diffusion:  24%|██▍       | 1193/5001 [59:41<2:44:51,  2.60s/it][A
Train Diffusion:  24%|██▍       | 1194/5001 [59:43<2:44:27,  2.59s/it][A
Train Diffusion:  24%|██▍       | 1195/5001 [59:46<2:44:18,  2.59s/it][A
Train Diffusion:  24%|██▍       | 1196/5001 [59:48<2:44:35,  2.60s/it][A
Train Diffusion:  24%|██▍       | 1197/5001 [59:51<2:44:34,  2.60s/it][A
Train Diffusion:  24%|██▍       | 1198/5001 [59:53<2:44:09,  2.59s/it][A
Train Diffusion:  24%|██▍       | 1199/5001 [59:56<2:44:07,  2.59s/it][A
Train Diffusion:  24%|██▍       | 1200/5001 [59:59<2:44:46,  2.60s/it][A
Train Diffusion:  24%|██▍       | 1201/5001 [1:00:01<2:45:53,  2.62s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 318909840.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7891, 0.5094, 1.3033],
        [8.7144, 0.4971, 1.3041],
        [8.9106, 0.4821, 1.2870]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.5959e+00, 8.1966e-01, 4.1049e+00],
         [9.3334e-01, 7.6892e-01, 1.0035e+00],
         [7.1329e-01, 8.8825e-01, 8.4355e-01],
         ...,
         [2.4119e-01, 1.2625e+01, 1.1202e+00],
         [1.1686e+01, 5.4859e-01, 8.3384e-01],
         [4.2377e+00, 3.5499e-01, 4.8809e+00]],

        [[7.2381e-01, 9.5876e-01, 8.3885e-01],
         [6.7187e-01, 6.8079e-01, 1.8221e+00],
         [2.1936e+01, 3.1683e-01, 1.2438e+00],
         ...,
         [2.0512e+00, 9.9163e-01, 1.4344e-02],
         [4.4717e+00, 7.6034e-02, 7.1913e-01],
         [2.3301e+01, 1.1902e+00, 1.8784e+00]],

        [[5.8421e-01, 9.5186e-01, 9.3844e-01],
         [1.2408e+01, 5.6083e-01, 1.1434e+00],
         [1.9328e+00, 3.9605e-01, 8.3


Train Diffusion:  24%|██▍       | 1202/5001 [1:00:04<2:45:37,  2.62s/it][A
Train Diffusion:  24%|██▍       | 1203/5001 [1:00:07<2:45:09,  2.61s/it][A
Train Diffusion:  24%|██▍       | 1204/5001 [1:00:09<2:44:36,  2.60s/it][A
Train Diffusion:  24%|██▍       | 1205/5001 [1:00:12<2:44:35,  2.60s/it][A
Train Diffusion:  24%|██▍       | 1206/5001 [1:00:14<2:45:22,  2.61s/it][A
Train Diffusion:  24%|██▍       | 1207/5001 [1:00:17<2:46:53,  2.64s/it][A
Train Diffusion:  24%|██▍       | 1208/5001 [1:00:20<2:47:28,  2.65s/it][A
Train Diffusion:  24%|██▍       | 1209/5001 [1:00:22<2:48:29,  2.67s/it][A
Train Diffusion:  24%|██▍       | 1210/5001 [1:00:25<2:46:41,  2.64s/it][A
Train Diffusion:  24%|██▍       | 1211/5001 [1:00:28<2:45:49,  2.63s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 342321737.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7705, 0.5084, 1.2935],
        [8.6186, 0.5071, 1.2998],
        [8.8248, 0.4953, 1.3222]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[6.0175e-01, 9.5114e-01, 8.9110e-01],
         [1.1752e+01, 6.3100e-01, 9.2677e-01],
         [1.9094e+00, 4.6777e-01, 7.0279e-01],
         ...,
         [4.9927e-01, 6.4904e-01, 8.8833e-01],
         [2.5560e-06, 5.3804e-01, 3.1335e+00],
         [1.4593e-01, 1.3376e+00, 1.0901e+01]],

        [[6.9985e-01, 9.5557e-01, 8.5115e-01],
         [6.4389e-01, 7.0477e-01, 1.8894e+00],
         [2.8586e+01, 9.1450e-02, 1.2018e+00],
         ...,
         [3.2319e+01, 6.6875e-02, 1.2654e+00],
         [5.8108e+00, 5.6677e-02, 5.4389e+00],
         [7.4053e-01, 4.3881e+00, 1.0409e+00]],

        [[1.6006e+00, 8.1500e-01, 4.3235e+00],
         [1.0089e+00, 6.3481e-01, 9.5430e-01],
         [7.5937e-01, 6.7726e-01, 9.7


Train Diffusion:  24%|██▍       | 1212/5001 [1:00:30<2:45:00,  2.61s/it][A
Train Diffusion:  24%|██▍       | 1213/5001 [1:00:33<2:44:42,  2.61s/it][A
Train Diffusion:  24%|██▍       | 1214/5001 [1:00:35<2:43:57,  2.60s/it][A
Train Diffusion:  24%|██▍       | 1215/5001 [1:00:38<2:43:46,  2.60s/it][A
Train Diffusion:  24%|██▍       | 1216/5001 [1:00:41<2:43:41,  2.59s/it][A
Train Diffusion:  24%|██▍       | 1217/5001 [1:00:43<2:43:42,  2.60s/it][A
Train Diffusion:  24%|██▍       | 1218/5001 [1:00:46<2:43:48,  2.60s/it][A
Train Diffusion:  24%|██▍       | 1219/5001 [1:00:48<2:43:18,  2.59s/it][A
Train Diffusion:  24%|██▍       | 1220/5001 [1:00:51<2:42:59,  2.59s/it][A
Train Diffusion:  24%|██▍       | 1221/5001 [1:00:54<2:42:42,  2.58s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 332577184.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6959, 0.5026, 1.2998],
        [8.7289, 0.5059, 1.3130],
        [8.7732, 0.4759, 1.3025]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.5446,  0.8273,  4.3137],
         [ 0.7970,  0.9378,  1.1805],
         [ 0.6581,  0.8847,  0.8779],
         ...,
         [ 0.1240,  1.2620,  3.9782],
         [ 0.1667,  0.4523,  2.3640],
         [ 0.8224,  2.2215,  2.4655]],

        [[ 0.8443,  0.9628,  0.7908],
         [ 0.8041,  0.6142,  1.8759],
         [25.7028,  0.2807,  1.1978],
         ...,
         [ 7.7487,  2.1242,  1.1418],
         [ 2.0226,  0.7681,  1.2123],
         [ 0.4237,  1.8262,  3.9882]],

        [[ 0.5171,  0.9529,  0.9720],
         [12.8769,  0.5157,  1.1625],
         [ 2.0280,  0.4647,  0.8288],
         ...,
         [ 0.2659,  1.4021,  8.7434],
         [ 9.0328,  0.1359,  0.7433],
         [ 5.4536,  0.4813,  0.7733


Train Diffusion:  24%|██▍       | 1222/5001 [1:00:56<2:42:47,  2.58s/it][A
Train Diffusion:  24%|██▍       | 1223/5001 [1:00:59<2:42:57,  2.59s/it][A
Train Diffusion:  24%|██▍       | 1224/5001 [1:01:01<2:42:59,  2.59s/it][A
Train Diffusion:  24%|██▍       | 1225/5001 [1:01:04<2:43:02,  2.59s/it][A
Train Diffusion:  25%|██▍       | 1226/5001 [1:01:06<2:42:54,  2.59s/it][A
Train Diffusion:  25%|██▍       | 1227/5001 [1:01:09<2:42:58,  2.59s/it][A
Train Diffusion:  25%|██▍       | 1228/5001 [1:01:12<2:42:30,  2.58s/it][A
Train Diffusion:  25%|██▍       | 1229/5001 [1:01:14<2:42:55,  2.59s/it][A
Train Diffusion:  25%|██▍       | 1230/5001 [1:01:17<2:42:54,  2.59s/it][A
Train Diffusion:  25%|██▍       | 1231/5001 [1:01:19<2:42:58,  2.59s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 330349990.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7228, 0.4961, 1.2827],
        [8.9071, 0.4931, 1.2876],
        [8.8112, 0.4741, 1.2872]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.3917e+00, 8.5601e-01, 4.8232e+00],
         [7.8165e-01, 9.6942e-01, 1.5443e+00],
         [6.4153e-01, 8.0176e-01, 1.0665e+00],
         ...,
         [1.4734e-05, 4.9851e-01, 3.8155e+00],
         [1.5771e-01, 4.8264e-01, 1.0039e+00],
         [1.3974e-01, 1.4869e+00, 2.9317e+00]],

        [[1.0638e+00, 9.3359e-01, 8.5304e-01],
         [1.0617e+00, 5.0870e-01, 1.9075e+00],
         [2.6782e+01, 2.6582e-01, 1.2001e+00],
         ...,
         [3.0423e+00, 1.1211e+01, 8.4092e-01],
         [8.7638e-01, 2.5663e-01, 2.0072e-01],
         [5.1570e+00, 2.2609e-01, 9.2529e-01]],

        [[4.5599e-01, 9.5479e-01, 9.0686e-01],
         [1.1463e+01, 6.7491e-01, 1.0508e+00],
         [1.7194e+00, 4.3856e-01, 8.1


Train Diffusion:  25%|██▍       | 1232/5001 [1:01:22<2:42:39,  2.59s/it][A
Train Diffusion:  25%|██▍       | 1233/5001 [1:01:25<2:42:34,  2.59s/it][A
Train Diffusion:  25%|██▍       | 1234/5001 [1:01:27<2:42:49,  2.59s/it][A
Train Diffusion:  25%|██▍       | 1235/5001 [1:01:30<2:42:39,  2.59s/it][A
Train Diffusion:  25%|██▍       | 1236/5001 [1:01:32<2:42:51,  2.60s/it][A
Train Diffusion:  25%|██▍       | 1237/5001 [1:01:35<2:42:36,  2.59s/it][A
Train Diffusion:  25%|██▍       | 1238/5001 [1:01:38<2:42:16,  2.59s/it][A
Train Diffusion:  25%|██▍       | 1239/5001 [1:01:40<2:41:57,  2.58s/it][A
Train Diffusion:  25%|██▍       | 1240/5001 [1:01:43<2:42:15,  2.59s/it][A
Train Diffusion:  25%|██▍       | 1241/5001 [1:01:45<2:42:06,  2.59s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 332314076.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7751, 0.4927, 1.2994],
        [8.5298, 0.4908, 1.3059],
        [8.9050, 0.4840, 1.3364]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.4222,  0.8477,  4.6826],
         [ 0.7290,  1.0010,  1.4969],
         [ 0.6014,  0.7368,  1.2936],
         ...,
         [ 1.8125,  2.0480,  0.8129],
         [ 0.4847,  1.0808,  1.5375],
         [ 0.3069,  1.6916,  8.5748]],

        [[ 1.0249,  0.9395,  0.8261],
         [ 1.0173,  0.5250,  2.0005],
         [27.1262,  0.2471,  1.2036],
         ...,
         [ 0.1854,  0.7408,  0.4629],
         [35.4767,  0.3770,  0.6906],
         [ 5.4047,  0.2905,  0.8306]],

        [[ 0.4621,  0.9531,  0.9517],
         [12.2286,  0.5819,  0.8811],
         [ 1.9637,  0.4052,  0.7269],
         ...,
         [18.1701,  0.1426,  1.2853],
         [ 5.2024,  0.0880,  1.0626],
         [ 4.5165,  1.5470,  2.5069


Train Diffusion:  25%|██▍       | 1242/5001 [1:01:48<2:42:05,  2.59s/it][A
Train Diffusion:  25%|██▍       | 1243/5001 [1:01:50<2:41:59,  2.59s/it][A
Train Diffusion:  25%|██▍       | 1244/5001 [1:01:53<2:41:59,  2.59s/it][A
Train Diffusion:  25%|██▍       | 1245/5001 [1:01:56<2:41:55,  2.59s/it][A
Train Diffusion:  25%|██▍       | 1246/5001 [1:01:58<2:41:51,  2.59s/it][A
Train Diffusion:  25%|██▍       | 1247/5001 [1:02:01<2:42:54,  2.60s/it][A
Train Diffusion:  25%|██▍       | 1248/5001 [1:02:04<2:43:43,  2.62s/it][A
Train Diffusion:  25%|██▍       | 1249/5001 [1:02:06<2:43:05,  2.61s/it][A
Train Diffusion:  25%|██▍       | 1250/5001 [1:02:09<2:43:08,  2.61s/it][A
Train Diffusion:  25%|██▌       | 1251/5001 [1:02:11<2:42:24,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 330045712.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8507, 0.4868, 1.3038],
        [8.6056, 0.5157, 1.3260],
        [8.7941, 0.5006, 1.2911]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.7533,  0.9613,  0.8203],
         [ 0.6707,  0.6933,  1.8332],
         [27.5422,  0.2461,  1.1953],
         ...,
         [ 0.4329,  0.4835,  1.0138],
         [ 6.2991,  0.5178,  1.5143],
         [ 0.7279,  1.9682,  0.7614]],

        [[ 0.5640,  0.9522,  0.7209],
         [11.1004,  1.0145,  0.8487],
         [ 1.5052,  0.4694,  0.7965],
         ...,
         [32.0623,  0.1832,  0.8610],
         [ 4.3850,  0.1025,  0.6795],
         [ 3.2324,  0.3795,  1.2215]],

        [[ 1.5872,  0.8164,  5.2657],
         [ 1.0489,  0.5212,  1.1561],
         [ 0.6703,  0.7245,  1.1852],
         ...,
         [ 1.2507,  0.2101,  1.7476],
         [ 0.3026,  0.6642,  0.8733],
         [24.8299,  1.2373,  2.0379


Train Diffusion:  25%|██▌       | 1252/5001 [1:02:14<2:42:38,  2.60s/it][A
Train Diffusion:  25%|██▌       | 1253/5001 [1:02:17<2:42:06,  2.60s/it][A
Train Diffusion:  25%|██▌       | 1254/5001 [1:02:19<2:43:46,  2.62s/it][A
Train Diffusion:  25%|██▌       | 1255/5001 [1:02:22<2:42:54,  2.61s/it][A
Train Diffusion:  25%|██▌       | 1256/5001 [1:02:25<2:48:45,  2.70s/it][A
Train Diffusion:  25%|██▌       | 1257/5001 [1:02:27<2:46:39,  2.67s/it][A
Train Diffusion:  25%|██▌       | 1258/5001 [1:02:30<2:45:02,  2.65s/it][A
Train Diffusion:  25%|██▌       | 1259/5001 [1:02:32<2:44:25,  2.64s/it][A
Train Diffusion:  25%|██▌       | 1260/5001 [1:02:35<2:43:22,  2.62s/it][A
Train Diffusion:  25%|██▌       | 1261/5001 [1:02:38<2:42:47,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 332228067.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6048, 0.5031, 1.3066],
        [8.7906, 0.5100, 1.3143],
        [8.6866, 0.5151, 1.3203]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.6025,  0.8060,  2.7438],
         [ 1.3523,  0.5399,  0.8008],
         [ 0.7228,  0.7219,  1.2708],
         ...,
         [32.6062,  0.0914,  1.2361],
         [ 5.5070,  0.1044,  0.7542],
         [ 1.2639,  1.7507,  4.6234]],

        [[ 0.6956,  0.9568,  1.0080],
         [12.4925,  0.4996,  1.1634],
         [ 2.2293,  0.4214,  0.7057],
         ...,
         [ 4.1151,  0.1504,  0.7684],
         [ 0.5596,  0.6281,  0.8178],
         [ 4.4435,  1.8832,  2.2275]],

        [[ 0.6048,  0.9527,  0.9254],
         [ 0.5690,  0.8146,  1.7832],
         [27.5756,  0.1663,  1.2058],
         ...,
         [ 0.5017,  1.5114,  0.7794],
         [35.8699,  0.0731,  1.4122],
         [ 6.2301,  0.3158,  0.7638


Train Diffusion:  25%|██▌       | 1262/5001 [1:02:40<2:42:24,  2.61s/it][A
Train Diffusion:  25%|██▌       | 1263/5001 [1:02:43<2:42:08,  2.60s/it][A
Train Diffusion:  25%|██▌       | 1264/5001 [1:02:45<2:42:02,  2.60s/it][A
Train Diffusion:  25%|██▌       | 1265/5001 [1:02:48<2:41:51,  2.60s/it][A
Train Diffusion:  25%|██▌       | 1266/5001 [1:02:51<2:41:49,  2.60s/it][A
Train Diffusion:  25%|██▌       | 1267/5001 [1:02:53<2:42:02,  2.60s/it][A
Train Diffusion:  25%|██▌       | 1268/5001 [1:02:56<2:41:40,  2.60s/it][A
Train Diffusion:  25%|██▌       | 1269/5001 [1:02:58<2:41:33,  2.60s/it][A
Train Diffusion:  25%|██▌       | 1270/5001 [1:03:01<2:41:49,  2.60s/it][A
Train Diffusion:  25%|██▌       | 1271/5001 [1:03:04<2:41:42,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 338318240.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8004, 0.4861, 1.3387],
        [8.6582, 0.5132, 1.3161],
        [8.7087, 0.5060, 1.2759]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.3601e+00, 8.6234e-01, 5.1393e+00],
         [1.6296e+00, 1.5846e-01, 1.1380e+00],
         [2.5656e+00, 4.0441e-01, 7.5340e-01],
         ...,
         [3.2551e-01, 1.7523e+01, 6.7761e-01],
         [1.1471e-03, 1.4192e+00, 1.7417e+00],
         [2.2838e-01, 1.2973e+00, 1.0756e+01]],

        [[4.5192e-01, 9.5305e-01, 7.1233e-01],
         [2.5526e+00, 1.5050e+00, 3.1316e-01],
         [8.9379e+00, 4.5334e-01, 9.4253e-01],
         ...,
         [2.0535e+00, 2.9552e-01, 8.8428e-01],
         [8.5571e+00, 1.6580e-01, 1.1974e+00],
         [3.4579e+00, 9.8754e-01, 1.0233e+00]],

        [[1.0983e+00, 9.2416e-01, 8.8205e-01],
         [1.0933e+00, 4.7461e-01, 9.2205e-01],
         [4.4786e+00, 5.6955e-01, 1.4


Train Diffusion:  25%|██▌       | 1272/5001 [1:03:06<2:41:36,  2.60s/it][A
Train Diffusion:  25%|██▌       | 1273/5001 [1:03:09<2:41:09,  2.59s/it][A
Train Diffusion:  25%|██▌       | 1274/5001 [1:03:11<2:41:24,  2.60s/it][A
Train Diffusion:  25%|██▌       | 1275/5001 [1:03:14<2:41:13,  2.60s/it][A
Train Diffusion:  26%|██▌       | 1276/5001 [1:03:17<2:41:37,  2.60s/it][A
Train Diffusion:  26%|██▌       | 1277/5001 [1:03:19<2:41:32,  2.60s/it][A
Train Diffusion:  26%|██▌       | 1278/5001 [1:03:22<2:41:32,  2.60s/it][A
Train Diffusion:  26%|██▌       | 1279/5001 [1:03:24<2:41:22,  2.60s/it][A
Train Diffusion:  26%|██▌       | 1280/5001 [1:03:27<2:41:03,  2.60s/it][A
Train Diffusion:  26%|██▌       | 1281/5001 [1:03:30<2:40:55,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 332111564.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6885, 0.4811, 1.2854],
        [9.0796, 0.4900, 1.3122],
        [8.7028, 0.5082, 1.2811]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.2504e-01, 9.5413e-01, 7.8878e-01],
         [1.1507e+01, 8.5651e-01, 8.6164e-01],
         [1.6355e+00, 4.4470e-01, 7.4239e-01],
         ...,
         [1.0917e+00, 9.5084e-01, 1.3896e+01],
         [2.2187e+00, 4.9318e-01, 1.4395e+00],
         [2.5168e+00, 1.1110e+00, 2.3150e+00]],

        [[1.5567e+00, 8.2679e-01, 5.3307e+00],
         [8.7232e-01, 6.9097e-01, 1.2985e+00],
         [6.4276e-01, 7.4660e-01, 1.1613e+00],
         ...,
         [1.4416e+00, 3.2009e-04, 7.6474e-01],
         [3.8048e+00, 9.9580e-02, 8.2624e-01],
         [2.5808e+01, 1.1903e+00, 1.9524e+00]],

        [[8.2488e-01, 9.6401e-01, 7.9405e-01],
         [7.5200e-01, 6.4687e-01, 1.8798e+00],
         [2.6384e+01, 2.4774e-01, 1.2


Train Diffusion:  26%|██▌       | 1282/5001 [1:03:32<2:41:07,  2.60s/it][A
Train Diffusion:  26%|██▌       | 1283/5001 [1:03:35<2:41:03,  2.60s/it][A
Train Diffusion:  26%|██▌       | 1284/5001 [1:03:37<2:40:45,  2.60s/it][A
Train Diffusion:  26%|██▌       | 1285/5001 [1:03:40<2:40:30,  2.59s/it][A
Train Diffusion:  26%|██▌       | 1286/5001 [1:03:43<2:40:48,  2.60s/it][A
Train Diffusion:  26%|██▌       | 1287/5001 [1:03:45<2:41:05,  2.60s/it][A
Train Diffusion:  26%|██▌       | 1288/5001 [1:03:48<2:40:57,  2.60s/it][A
Train Diffusion:  26%|██▌       | 1289/5001 [1:03:50<2:40:54,  2.60s/it][A
Train Diffusion:  26%|██▌       | 1290/5001 [1:03:53<2:40:41,  2.60s/it][A
Train Diffusion:  26%|██▌       | 1291/5001 [1:03:56<2:40:38,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 335771625.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8163, 0.5194, 1.3311],
        [8.7715, 0.4977, 1.2931],
        [8.6356, 0.5196, 1.2827]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5472e-01, 9.5473e-01, 8.7384e-01],
         [5.5802e+00, 9.1069e-01, 1.0797e+00],
         [8.7182e-01, 7.9189e-01, 9.6665e-01],
         ...,
         [1.8190e+01, 2.7215e-01, 1.2261e+00],
         [3.0198e+00, 3.3470e-01, 7.8614e-01],
         [1.6676e+01, 1.4220e+00, 1.9388e+00]],

        [[1.3780e+00, 8.5892e-01, 7.6607e-01],
         [1.6387e+00, 3.4610e-01, 1.5575e+00],
         [2.4673e+01, 2.8748e-01, 1.1744e+00],
         ...,
         [3.2183e+00, 3.9958e-02, 7.5682e-01],
         [2.7883e+00, 1.5434e-01, 6.2715e-01],
         [2.5074e+00, 1.9919e+00, 3.6708e+00]],

        [[1.0777e+00, 9.2995e-01, 2.0280e+00],
         [4.5643e+00, 5.5698e-01, 1.3483e+00],
         [1.6353e+00, 6.7625e-01, 7.9


Train Diffusion:  26%|██▌       | 1292/5001 [1:03:58<2:41:30,  2.61s/it][A
Train Diffusion:  26%|██▌       | 1293/5001 [1:04:01<2:42:45,  2.63s/it][A
Train Diffusion:  26%|██▌       | 1294/5001 [1:04:04<2:42:49,  2.64s/it][A
Train Diffusion:  26%|██▌       | 1295/5001 [1:04:06<2:42:15,  2.63s/it][A
Train Diffusion:  26%|██▌       | 1296/5001 [1:04:09<2:41:44,  2.62s/it][A
Train Diffusion:  26%|██▌       | 1297/5001 [1:04:11<2:41:23,  2.61s/it][A
Train Diffusion:  26%|██▌       | 1298/5001 [1:04:14<2:40:54,  2.61s/it][A
Train Diffusion:  26%|██▌       | 1299/5001 [1:04:17<2:40:38,  2.60s/it][A
Train Diffusion:  26%|██▌       | 1300/5001 [1:04:19<2:40:25,  2.60s/it][A
Train Diffusion:  26%|██▌       | 1301/5001 [1:04:22<2:44:25,  2.67s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 323443913.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7548, 0.4918, 1.2852],
        [8.6257, 0.5128, 1.3239],
        [8.7384, 0.5039, 1.3078]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.5169,  0.8297,  4.3949],
         [ 0.7646,  0.9935,  1.2418],
         [ 0.6499,  0.9193,  0.8356],
         ...,
         [28.5810,  0.0763,  1.2483],
         [ 5.7129,  0.0676,  0.6825],
         [ 0.8289,  2.6315,  1.1104]],

        [[ 0.4997,  0.9523,  0.9831],
         [12.8703,  0.5050,  1.2063],
         [ 2.0242,  0.4913,  0.9658],
         ...,
         [ 0.5974,  8.8054,  0.6413],
         [18.1574,  0.2544,  0.6210],
         [ 5.3655,  0.3731,  0.8009]],

        [[ 0.8903,  0.9593,  0.7846],
         [ 0.8585,  0.5870,  1.8177],
         [25.9446,  0.2694,  1.1901],
         ...,
         [ 4.5572,  0.1124,  0.6134],
         [ 0.0543,  0.6548,  0.7900],
         [14.5877,  3.1574,  1.7440


Train Diffusion:  26%|██▌       | 1302/5001 [1:04:25<2:43:12,  2.65s/it][A
Train Diffusion:  26%|██▌       | 1303/5001 [1:04:27<2:46:46,  2.71s/it][A
Train Diffusion:  26%|██▌       | 1304/5001 [1:04:30<2:45:06,  2.68s/it][A
Train Diffusion:  26%|██▌       | 1305/5001 [1:04:33<2:43:45,  2.66s/it][A
Train Diffusion:  26%|██▌       | 1306/5001 [1:04:35<2:42:32,  2.64s/it][A
Train Diffusion:  26%|██▌       | 1307/5001 [1:04:38<2:41:35,  2.62s/it][A
Train Diffusion:  26%|██▌       | 1308/5001 [1:04:40<2:41:04,  2.62s/it][A
Train Diffusion:  26%|██▌       | 1309/5001 [1:04:43<2:40:49,  2.61s/it][A
Train Diffusion:  26%|██▌       | 1310/5001 [1:04:46<2:40:37,  2.61s/it][A
Train Diffusion:  26%|██▌       | 1311/5001 [1:04:48<2:40:51,  2.62s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 331213744.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.9924, 0.4776, 1.3345],
        [8.7028, 0.5011, 1.2987],
        [8.5142, 0.5034, 1.2767]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[6.1744e-01, 9.5384e-01, 9.1578e-01],
         [5.7517e-01, 7.9536e-01, 1.8924e+00],
         [2.5696e+01, 2.7277e-01, 1.2144e+00],
         ...,
         [4.1978e+00, 1.5571e-01, 5.6865e-01],
         [7.8237e+00, 3.0490e+00, 3.5098e+00],
         [5.8409e-01, 4.2616e+00, 1.4269e+00]],

        [[1.6036e+00, 8.0682e-01, 3.0018e+00],
         [1.3082e+00, 5.9020e-01, 8.7156e-01],
         [6.6559e-01, 7.4691e-01, 1.3155e+00],
         ...,
         [2.2607e-01, 1.3275e+01, 7.2971e-01],
         [2.0663e-05, 8.1288e-01, 2.2284e+00],
         [1.9855e-01, 1.1818e+00, 1.1362e+01]],

        [[6.8058e-01, 9.5646e-01, 9.8213e-01],
         [1.3049e+01, 5.0238e-01, 1.1331e+00],
         [2.3290e+00, 3.8896e-01, 6.8


Train Diffusion:  26%|██▌       | 1312/5001 [1:04:51<2:40:32,  2.61s/it][A
Train Diffusion:  26%|██▋       | 1313/5001 [1:04:54<2:40:56,  2.62s/it][A
Train Diffusion:  26%|██▋       | 1314/5001 [1:04:56<2:40:24,  2.61s/it][A
Train Diffusion:  26%|██▋       | 1315/5001 [1:04:59<2:40:26,  2.61s/it][A
Train Diffusion:  26%|██▋       | 1316/5001 [1:05:01<2:40:15,  2.61s/it][A
Train Diffusion:  26%|██▋       | 1317/5001 [1:05:04<2:40:07,  2.61s/it][A
Train Diffusion:  26%|██▋       | 1318/5001 [1:05:07<2:39:57,  2.61s/it][A
Train Diffusion:  26%|██▋       | 1319/5001 [1:05:09<2:39:39,  2.60s/it][A
Train Diffusion:  26%|██▋       | 1320/5001 [1:05:12<2:39:24,  2.60s/it][A
Train Diffusion:  26%|██▋       | 1321/5001 [1:05:14<2:40:04,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 324967718.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6401, 0.5160, 1.2807],
        [9.1104, 0.4917, 1.3148],
        [8.6590, 0.5167, 1.2948]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.5644,  0.9513,  0.8150],
         [11.9268,  0.7727,  0.9379],
         [ 1.8110,  0.4198,  0.7691],
         ...,
         [ 2.4706,  0.2290,  1.1176],
         [31.6679,  0.1451,  1.2083],
         [ 5.3181,  0.4125,  0.7691]],

        [[ 1.5880,  0.8180,  4.9189],
         [ 1.0060,  0.7057,  1.1685],
         [ 0.6182,  0.7794,  1.1301],
         ...,
         [ 1.1758,  0.4479,  0.6992],
         [ 0.6644,  0.2974,  2.5336],
         [ 0.9692,  1.5393,  1.2819]],

        [[ 0.7520,  0.9604,  0.8213],
         [ 0.6780,  0.6953,  1.8929],
         [26.5844,  0.2682,  1.2042],
         ...,
         [35.0941,  0.1031,  1.1559],
         [ 5.3070,  0.1264,  0.7916],
         [26.6600,  1.1717,  1.9816


Train Diffusion:  26%|██▋       | 1322/5001 [1:05:17<2:40:29,  2.62s/it][A
Train Diffusion:  26%|██▋       | 1323/5001 [1:05:20<2:40:24,  2.62s/it][A
Train Diffusion:  26%|██▋       | 1324/5001 [1:05:22<2:39:53,  2.61s/it][A
Train Diffusion:  26%|██▋       | 1325/5001 [1:05:25<2:39:25,  2.60s/it][A
Train Diffusion:  27%|██▋       | 1326/5001 [1:05:27<2:39:48,  2.61s/it][A
Train Diffusion:  27%|██▋       | 1327/5001 [1:05:30<2:39:39,  2.61s/it][A
Train Diffusion:  27%|██▋       | 1328/5001 [1:05:33<2:39:37,  2.61s/it][A
Train Diffusion:  27%|██▋       | 1329/5001 [1:05:35<2:39:13,  2.60s/it][A
Train Diffusion:  27%|██▋       | 1330/5001 [1:05:38<2:39:28,  2.61s/it][A
Train Diffusion:  27%|██▋       | 1331/5001 [1:05:40<2:39:43,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 334018966.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.5969, 0.5175, 1.2874],
        [8.7725, 0.4966, 1.3001],
        [8.8010, 0.5058, 1.2909]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.6014,  0.8171,  4.3457],
         [ 0.9555,  0.6972,  1.0250],
         [ 0.7100,  0.6939,  1.3027],
         ...,
         [ 1.1998,  0.4494,  0.6160],
         [ 3.3035,  0.1449,  0.8656],
         [ 0.2746,  1.2207,  2.8884]],

        [[ 0.5995,  0.9511,  0.9007],
         [12.1015,  0.6087,  0.9476],
         [ 1.9811,  0.4478,  0.7243],
         ...,
         [ 3.3484,  0.1543,  1.1039],
         [12.6528,  0.3312,  1.2459],
         [ 1.4454,  1.1858, 10.0931]],

        [[ 0.7015,  0.9557,  0.8501],
         [ 0.6520,  0.6900,  1.9264],
         [28.1250,  0.1790,  1.1987],
         ...,
         [18.7589,  0.4203,  0.9990],
         [ 2.3885,  0.1133,  0.2565],
         [ 3.8734,  0.4012,  0.9007


Train Diffusion:  27%|██▋       | 1332/5001 [1:05:43<2:39:27,  2.61s/it][A
Train Diffusion:  27%|██▋       | 1333/5001 [1:05:46<2:39:06,  2.60s/it][A
Train Diffusion:  27%|██▋       | 1334/5001 [1:05:48<2:38:58,  2.60s/it][A
Train Diffusion:  27%|██▋       | 1335/5001 [1:05:51<2:38:58,  2.60s/it][A
Train Diffusion:  27%|██▋       | 1336/5001 [1:05:53<2:38:36,  2.60s/it][A
Train Diffusion:  27%|██▋       | 1337/5001 [1:05:56<2:38:51,  2.60s/it][A
Train Diffusion:  27%|██▋       | 1338/5001 [1:05:59<2:38:36,  2.60s/it][A
Train Diffusion:  27%|██▋       | 1339/5001 [1:06:01<2:39:03,  2.61s/it][A
Train Diffusion:  27%|██▋       | 1340/5001 [1:06:04<2:39:16,  2.61s/it][A
Train Diffusion:  27%|██▋       | 1341/5001 [1:06:06<2:39:21,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 316620988.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7820, 0.4951, 1.2873],
        [8.7951, 0.4876, 1.3050],
        [8.6212, 0.5251, 1.2852]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.8531,  0.9617,  0.7879],
         [ 0.7831,  0.6110,  1.7240],
         [27.1979,  0.2579,  1.1909],
         ...,
         [ 2.9503,  0.1538,  2.9610],
         [ 4.0669,  0.6828,  1.7209],
         [ 0.8714,  1.7821,  1.0158]],

        [[ 1.5402,  0.8264,  5.6990],
         [ 0.8205,  0.6571,  1.3583],
         [ 0.6921,  0.7844,  1.0007],
         ...,
         [30.4946,  0.4398,  1.0024],
         [ 2.7526,  0.3014,  0.1391],
         [ 4.1625,  0.4296,  0.9691]],

        [[ 0.5138,  0.9519,  0.7251],
         [10.7922,  1.0664,  1.2922],
         [ 1.3053,  0.5893,  0.9307],
         ...,
         [ 0.6504,  0.9312,  1.3172],
         [ 0.3985,  0.4720,  0.8024],
         [26.2176,  1.1536,  2.0106


Train Diffusion:  27%|██▋       | 1342/5001 [1:06:09<2:39:03,  2.61s/it][A
Train Diffusion:  27%|██▋       | 1343/5001 [1:06:12<2:38:49,  2.61s/it][A
Train Diffusion:  27%|██▋       | 1344/5001 [1:06:14<2:38:36,  2.60s/it][A
Train Diffusion:  27%|██▋       | 1345/5001 [1:06:17<2:38:31,  2.60s/it][A
Train Diffusion:  27%|██▋       | 1346/5001 [1:06:19<2:38:41,  2.61s/it][A
Train Diffusion:  27%|██▋       | 1347/5001 [1:06:22<2:38:25,  2.60s/it][A
Train Diffusion:  27%|██▋       | 1348/5001 [1:06:25<2:41:13,  2.65s/it][A
Train Diffusion:  27%|██▋       | 1349/5001 [1:06:27<2:40:03,  2.63s/it][A
Train Diffusion:  27%|██▋       | 1350/5001 [1:06:30<2:43:24,  2.69s/it][A
Train Diffusion:  27%|██▋       | 1351/5001 [1:06:33<2:43:29,  2.69s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 321360707.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8605, 0.5071, 1.2761],
        [8.7672, 0.4822, 1.2915],
        [8.7128, 0.4890, 1.3023]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.6693,  0.9560,  0.8763],
         [ 0.6097,  0.7559,  1.9220],
         [27.2693,  0.2388,  1.2015],
         ...,
         [21.0785,  1.5878,  1.0638],
         [ 1.2921,  2.0809,  0.9280],
         [ 0.3304,  1.5008,  6.0243]],

        [[ 0.6269,  0.9541,  0.8846],
         [12.5462,  0.6379,  0.9885],
         [ 2.1037,  0.3969,  0.7616],
         ...,
         [ 3.5288,  0.1076,  5.2955],
         [12.1944,  0.2954,  0.8869],
         [ 4.1459,  1.1835,  0.8221]],

        [[ 1.6061,  0.8097,  4.0221],
         [ 1.1732,  0.6361,  0.9920],
         [ 0.6356,  0.7470,  1.2551],
         ...,
         [ 0.0372,  0.8794,  2.8520],
         [ 0.2445,  1.1027,  8.9874],
         [ 0.1902,  2.5506,  2.9781


Train Diffusion:  27%|██▋       | 1352/5001 [1:06:36<2:41:53,  2.66s/it][A
Train Diffusion:  27%|██▋       | 1353/5001 [1:06:38<2:40:32,  2.64s/it][A
Train Diffusion:  27%|██▋       | 1354/5001 [1:06:41<2:39:52,  2.63s/it][A
Train Diffusion:  27%|██▋       | 1355/5001 [1:06:43<2:39:05,  2.62s/it][A
Train Diffusion:  27%|██▋       | 1356/5001 [1:06:46<2:38:26,  2.61s/it][A
Train Diffusion:  27%|██▋       | 1357/5001 [1:06:49<2:38:07,  2.60s/it][A
Train Diffusion:  27%|██▋       | 1358/5001 [1:06:51<2:37:51,  2.60s/it][A
Train Diffusion:  27%|██▋       | 1359/5001 [1:06:54<2:37:42,  2.60s/it][A
Train Diffusion:  27%|██▋       | 1360/5001 [1:06:56<2:37:38,  2.60s/it][A
Train Diffusion:  27%|██▋       | 1361/5001 [1:06:59<2:37:30,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 332478998.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6307, 0.5033, 1.2903],
        [8.8663, 0.4749, 1.3355],
        [8.8341, 0.5078, 1.2706]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.6016,  0.8075,  2.6592],
         [ 1.3454,  0.5238,  0.7858],
         [ 0.7678,  0.6732,  0.8356],
         ...,
         [30.1056,  0.0801,  1.2624],
         [ 5.5768,  0.0652,  0.4336],
         [ 0.5952,  2.3171,  0.6777]],

        [[ 0.7018,  0.9587,  1.0207],
         [12.0609,  0.5052,  1.1742],
         [ 2.1607,  0.4822,  1.2266],
         ...,
         [ 0.4529,  3.1593,  0.6138],
         [25.3216,  0.0690,  1.4022],
         [ 5.7792,  0.2385,  1.5227]],

        [[ 0.5994,  0.9540,  0.9314],
         [ 0.5658,  0.8109,  1.7696],
         [28.3850,  0.1883,  1.1938],
         ...,
         [ 4.2446,  0.1401,  0.7968],
         [ 0.5040,  0.6820,  0.8706],
         [23.8834,  1.1508,  2.0182


Train Diffusion:  27%|██▋       | 1362/5001 [1:07:02<2:38:39,  2.62s/it][A
Train Diffusion:  27%|██▋       | 1363/5001 [1:07:04<2:38:28,  2.61s/it][A
Train Diffusion:  27%|██▋       | 1364/5001 [1:07:07<2:37:53,  2.60s/it][A
Train Diffusion:  27%|██▋       | 1365/5001 [1:07:09<2:37:47,  2.60s/it][A
Train Diffusion:  27%|██▋       | 1366/5001 [1:07:12<2:37:39,  2.60s/it][A
Train Diffusion:  27%|██▋       | 1367/5001 [1:07:15<2:37:34,  2.60s/it][A
Train Diffusion:  27%|██▋       | 1368/5001 [1:07:17<2:38:08,  2.61s/it][A
Train Diffusion:  27%|██▋       | 1369/5001 [1:07:20<2:37:41,  2.60s/it][A
Train Diffusion:  27%|██▋       | 1370/5001 [1:07:22<2:37:36,  2.60s/it][A
Train Diffusion:  27%|██▋       | 1371/5001 [1:07:25<2:37:50,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 330658176.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.5466, 0.5229, 1.3038],
        [8.7419, 0.5033, 1.3130],
        [8.8817, 0.4891, 1.2848]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.6061,  0.8149,  3.6062],
         [ 1.0143,  0.6867,  0.9404],
         [ 0.7075,  0.6902,  1.3620],
         ...,
         [ 4.2125,  0.1507,  0.6827],
         [ 0.4382,  1.4578,  0.7144],
         [23.0669,  1.3769,  2.0222]],

        [[ 0.6457,  0.9536,  0.8881],
         [ 0.6073,  0.7053,  1.9288],
         [28.0348,  0.1963,  1.1989],
         ...,
         [ 0.2292, 10.8442,  0.6909],
         [29.5725,  0.1990,  0.9496],
         [ 5.6145,  0.3612,  1.0575]],

        [[ 0.6493,  0.9537,  0.9512],
         [12.5037,  0.5304,  1.0711],
         [ 2.1641,  0.4437,  0.7093],
         ...,
         [30.4729,  0.0912,  1.2695],
         [ 5.5763,  0.0931,  0.6657],
         [ 0.6216,  1.8414,  0.8736


Train Diffusion:  27%|██▋       | 1372/5001 [1:07:28<2:37:25,  2.60s/it][A
Train Diffusion:  27%|██▋       | 1373/5001 [1:07:30<2:37:17,  2.60s/it][A
Train Diffusion:  27%|██▋       | 1374/5001 [1:07:33<2:37:16,  2.60s/it][A
Train Diffusion:  27%|██▋       | 1375/5001 [1:07:35<2:37:31,  2.61s/it][A
Train Diffusion:  28%|██▊       | 1376/5001 [1:07:38<2:37:12,  2.60s/it][A
Train Diffusion:  28%|██▊       | 1377/5001 [1:07:41<2:37:20,  2.60s/it][A
Train Diffusion:  28%|██▊       | 1378/5001 [1:07:43<2:37:51,  2.61s/it][A
Train Diffusion:  28%|██▊       | 1379/5001 [1:07:46<2:37:30,  2.61s/it][A
Train Diffusion:  28%|██▊       | 1380/5001 [1:07:48<2:37:12,  2.60s/it][A
Train Diffusion:  28%|██▊       | 1381/5001 [1:07:51<2:36:59,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 336542499.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.5833, 0.5044, 1.3293],
        [8.7296, 0.4713, 1.3087],
        [8.8687, 0.5067, 1.3190]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.6056,  0.7980,  4.8445],
         [ 1.4267,  0.2249,  1.0197],
         [ 0.6982,  0.7326,  0.9709],
         ...,
         [ 0.8046,  0.2906,  2.2649],
         [ 0.3344,  0.5720,  0.9179],
         [ 2.6651,  1.9067,  1.8873]],

        [[ 0.6693,  0.9553,  0.8561],
         [ 0.6264,  0.8473,  1.5526],
         [26.4188,  0.2623,  1.1859],
         ...,
         [ 0.2947,  0.5193,  1.0405],
         [39.6477,  0.0601,  1.4005],
         [ 6.5143,  0.2060,  1.1761]],

        [[ 0.6271,  0.9535,  0.6412],
         [10.3815,  1.2138,  1.3877],
         [ 1.1899,  0.6397,  0.8993],
         ...,
         [25.1391,  0.3707,  0.5966],
         [ 4.4451,  0.1382,  0.3208],
         [15.0766,  2.4363,  3.9885


Train Diffusion:  28%|██▊       | 1382/5001 [1:07:54<2:36:57,  2.60s/it][A
Train Diffusion:  28%|██▊       | 1383/5001 [1:07:56<2:36:40,  2.60s/it][A
Train Diffusion:  28%|██▊       | 1384/5001 [1:07:59<2:36:39,  2.60s/it][A
Train Diffusion:  28%|██▊       | 1385/5001 [1:08:01<2:36:50,  2.60s/it][A
Train Diffusion:  28%|██▊       | 1386/5001 [1:08:04<2:36:58,  2.61s/it][A
Train Diffusion:  28%|██▊       | 1387/5001 [1:08:07<2:37:01,  2.61s/it][A
Train Diffusion:  28%|██▊       | 1388/5001 [1:08:09<2:36:49,  2.60s/it][A
Train Diffusion:  28%|██▊       | 1389/5001 [1:08:12<2:36:21,  2.60s/it][A
Train Diffusion:  28%|██▊       | 1390/5001 [1:08:14<2:36:31,  2.60s/it][A
Train Diffusion:  28%|██▊       | 1391/5001 [1:08:17<2:36:47,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 315388048.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7156, 0.4945, 1.3022],
        [8.8601, 0.4880, 1.3007],
        [8.7775, 0.4986, 1.3012]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.5192e+00, 8.3122e-01, 5.5235e+00],
         [8.2389e-01, 7.9338e-01, 1.2873e+00],
         [8.1575e-01, 6.6793e-01, 7.3764e-01],
         ...,
         [5.7094e+00, 3.2805e-01, 1.4050e+00],
         [7.6642e+00, 1.2657e+00, 3.5106e-01],
         [3.2422e+00, 4.7347e-01, 1.1601e+00]],

        [[8.8852e-01, 9.6073e-01, 7.8575e-01],
         [8.5212e-01, 5.8381e-01, 1.8722e+00],
         [2.8777e+01, 8.4102e-02, 1.1983e+00],
         ...,
         [4.2595e+00, 1.0631e-01, 9.8128e-01],
         [1.5333e-04, 4.3824e-01, 2.4322e+00],
         [2.3197e+01, 1.2883e+00, 2.0391e+00]],

        [[4.9893e-01, 9.5373e-01, 8.5006e-01],
         [1.0632e+01, 7.4673e-01, 7.4477e-01],
         [1.5801e+00, 4.8485e-01, 8.2


Train Diffusion:  28%|██▊       | 1392/5001 [1:08:20<2:36:40,  2.60s/it][A
Train Diffusion:  28%|██▊       | 1393/5001 [1:08:22<2:36:16,  2.60s/it][A
Train Diffusion:  28%|██▊       | 1394/5001 [1:08:25<2:36:21,  2.60s/it][A
Train Diffusion:  28%|██▊       | 1395/5001 [1:08:28<2:39:08,  2.65s/it][A
Train Diffusion:  28%|██▊       | 1396/5001 [1:08:30<2:38:06,  2.63s/it][A
Train Diffusion:  28%|██▊       | 1397/5001 [1:08:33<2:43:33,  2.72s/it][A
Train Diffusion:  28%|██▊       | 1398/5001 [1:08:36<2:42:11,  2.70s/it][A
Train Diffusion:  28%|██▊       | 1399/5001 [1:08:38<2:40:30,  2.67s/it][A
Train Diffusion:  28%|██▊       | 1400/5001 [1:08:41<2:38:56,  2.65s/it][A
Train Diffusion:  28%|██▊       | 1401/5001 [1:08:44<2:38:08,  2.64s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 335346620.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7352, 0.5198, 1.3042],
        [8.7634, 0.5046, 1.3017],
        [8.7911, 0.5065, 1.2965]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.4424,  0.9550,  0.9645],
         [10.9597,  0.5955,  1.4482],
         [ 1.7295,  0.4770,  0.8624],
         ...,
         [ 0.5430,  8.1186,  0.7436],
         [ 0.6653,  2.8925,  0.5485],
         [ 0.3690,  1.4533, 11.4312]],

        [[ 1.2530,  0.8982,  0.7567],
         [ 1.4239,  0.4082,  1.8080],
         [25.1799,  0.3002,  1.1940],
         ...,
         [ 0.0391,  0.4258,  3.4441],
         [ 0.3409,  0.7406,  6.2611],
         [ 0.0274,  2.6733,  2.8168]],

        [[ 1.2181,  0.9053,  3.1130],
         [ 1.0243,  0.9603,  1.5597],
         [ 0.6534,  0.9247,  0.8052],
         ...,
         [ 3.8255,  0.1814,  0.9867],
         [25.1850,  0.0658,  1.3107],
         [ 4.7914,  0.6248,  1.4093


Train Diffusion:  28%|██▊       | 1402/5001 [1:08:46<2:37:21,  2.62s/it][A
Train Diffusion:  28%|██▊       | 1403/5001 [1:08:49<2:36:50,  2.62s/it][A
Train Diffusion:  28%|██▊       | 1404/5001 [1:08:51<2:36:30,  2.61s/it][A
Train Diffusion:  28%|██▊       | 1405/5001 [1:08:54<2:36:08,  2.61s/it][A
Train Diffusion:  28%|██▊       | 1406/5001 [1:08:57<2:36:23,  2.61s/it][A
Train Diffusion:  28%|██▊       | 1407/5001 [1:08:59<2:35:55,  2.60s/it][A
Train Diffusion:  28%|██▊       | 1408/5001 [1:09:02<2:36:07,  2.61s/it][A
Train Diffusion:  28%|██▊       | 1409/5001 [1:09:04<2:35:50,  2.60s/it][A
Train Diffusion:  28%|██▊       | 1410/5001 [1:09:07<2:35:26,  2.60s/it][A
Train Diffusion:  28%|██▊       | 1411/5001 [1:09:10<2:35:23,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 318740512.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8525, 0.4939, 1.3367],
        [8.6805, 0.4846, 1.3133],
        [8.7747, 0.5104, 1.3019]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.6010,  0.9520,  0.8992],
         [11.6275,  0.6183,  0.9401],
         [ 1.8950,  0.4795,  0.8437],
         ...,
         [21.5893,  0.0373,  1.4406],
         [ 5.8800,  0.0669,  0.3296],
         [17.0558,  2.2602,  3.8794]],

        [[ 0.7008,  0.9565,  0.8510],
         [ 0.6499,  0.6938,  1.8937],
         [28.6932,  0.0831,  1.2003],
         ...,
         [ 0.1025,  2.6960,  1.1241],
         [21.2855,  0.6613,  0.4214],
         [ 5.4214,  0.1988,  0.9902]],

        [[ 1.6000,  0.8174,  4.3423],
         [ 0.9678,  0.6548,  0.9550],
         [ 0.7881,  0.6679,  0.8073],
         ...,
         [ 4.1524,  0.1063,  0.7643],
         [ 1.5460,  0.7055,  1.2588],
         [ 0.1413,  2.2088,  2.6245


Train Diffusion:  28%|██▊       | 1412/5001 [1:09:12<2:35:31,  2.60s/it][A
Train Diffusion:  28%|██▊       | 1413/5001 [1:09:15<2:35:58,  2.61s/it][A
Train Diffusion:  28%|██▊       | 1414/5001 [1:09:17<2:36:37,  2.62s/it][A
Train Diffusion:  28%|██▊       | 1415/5001 [1:09:20<2:36:20,  2.62s/it][A
Train Diffusion:  28%|██▊       | 1416/5001 [1:09:23<2:35:50,  2.61s/it][A
Train Diffusion:  28%|██▊       | 1417/5001 [1:09:25<2:35:35,  2.60s/it][A
Train Diffusion:  28%|██▊       | 1418/5001 [1:09:28<2:35:35,  2.61s/it][A
Train Diffusion:  28%|██▊       | 1419/5001 [1:09:30<2:35:35,  2.61s/it][A
Train Diffusion:  28%|██▊       | 1420/5001 [1:09:33<2:35:27,  2.60s/it][A
Train Diffusion:  28%|██▊       | 1421/5001 [1:09:36<2:35:13,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 327676406.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6642, 0.5143, 1.2905],
        [8.8350, 0.4909, 1.2896],
        [8.6894, 0.4996, 1.3427]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[7.0097e-01, 9.5735e-01, 8.5228e-01],
         [6.3492e-01, 7.3441e-01, 1.9241e+00],
         [2.7424e+01, 2.3542e-01, 1.2017e+00],
         ...,
         [2.7226e+00, 1.4395e-01, 7.4088e-01],
         [5.7708e-01, 6.1725e-01, 7.2462e-01],
         [2.0676e-02, 1.4474e+00, 2.8102e+00]],

        [[6.0024e-01, 9.5282e-01, 8.4528e-01],
         [1.2253e+01, 7.0542e-01, 8.7719e-01],
         [1.9722e+00, 4.1034e-01, 7.4561e-01],
         ...,
         [3.4525e-01, 4.5697e+00, 6.7609e-01],
         [3.3484e+01, 1.2620e-01, 1.3400e+00],
         [5.3896e+00, 3.2641e-01, 4.5678e+00]],

        [[1.6018e+00, 8.1223e-01, 4.4655e+00],
         [1.1142e+00, 6.4748e-01, 1.0704e+00],
         [6.2007e-01, 7.3280e-01, 1.2


Train Diffusion:  28%|██▊       | 1422/5001 [1:09:38<2:35:15,  2.60s/it][A
Train Diffusion:  28%|██▊       | 1423/5001 [1:09:41<2:35:12,  2.60s/it][A
Train Diffusion:  28%|██▊       | 1424/5001 [1:09:43<2:35:15,  2.60s/it][A
Train Diffusion:  28%|██▊       | 1425/5001 [1:09:46<2:34:53,  2.60s/it][A
Train Diffusion:  29%|██▊       | 1426/5001 [1:09:49<2:37:21,  2.64s/it][A
Train Diffusion:  29%|██▊       | 1427/5001 [1:09:51<2:36:39,  2.63s/it][A
Train Diffusion:  29%|██▊       | 1428/5001 [1:09:54<2:36:07,  2.62s/it][A
Train Diffusion:  29%|██▊       | 1429/5001 [1:09:57<2:35:44,  2.62s/it][A
Train Diffusion:  29%|██▊       | 1430/5001 [1:09:59<2:35:50,  2.62s/it][A
Train Diffusion:  29%|██▊       | 1431/5001 [1:10:02<2:35:16,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 315592550.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6276, 0.4953, 1.2837],
        [8.8646, 0.4809, 1.3074],
        [8.7794, 0.4931, 1.2831]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.5825,  0.8133,  1.1987],
         [ 1.4796,  0.4511,  0.7040],
         [ 0.7636,  0.9984,  0.5997],
         ...,
         [ 0.5162,  0.5910,  0.9789],
         [42.3748,  0.1309,  1.3386],
         [ 6.4381,  0.2209,  0.8128]],

        [[ 0.7710,  0.9638,  1.1615],
         [13.1126,  0.3780,  1.2741],
         [ 2.6776,  0.3207,  0.7880],
         ...,
         [ 3.4232,  0.1153,  1.0208],
         [ 1.4719,  0.3428,  0.7023],
         [17.4281,  1.5481,  2.0145]],

        [[ 0.5530,  0.9541,  0.9661],
         [ 0.5401,  0.7685,  1.6761],
         [22.6861,  0.2794,  1.1742],
         ...,
         [24.6085,  0.1738,  1.2012],
         [ 4.3267,  0.2081,  0.4167],
         [ 2.3205,  2.2158,  1.0110


Train Diffusion:  29%|██▊       | 1432/5001 [1:10:04<2:34:30,  2.60s/it][A
Train Diffusion:  29%|██▊       | 1433/5001 [1:10:07<2:33:43,  2.59s/it][A
Train Diffusion:  29%|██▊       | 1434/5001 [1:10:09<2:33:03,  2.57s/it][A
Train Diffusion:  29%|██▊       | 1435/5001 [1:10:12<2:32:50,  2.57s/it][A
Train Diffusion:  29%|██▊       | 1436/5001 [1:10:15<2:32:33,  2.57s/it][A
Train Diffusion:  29%|██▊       | 1437/5001 [1:10:17<2:32:15,  2.56s/it][A
Train Diffusion:  29%|██▉       | 1438/5001 [1:10:20<2:31:49,  2.56s/it][A
Train Diffusion:  29%|██▉       | 1439/5001 [1:10:22<2:31:56,  2.56s/it][A
Train Diffusion:  29%|██▉       | 1440/5001 [1:10:25<2:31:29,  2.55s/it][A
Train Diffusion:  29%|██▉       | 1441/5001 [1:10:27<2:31:40,  2.56s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 330303036.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7417, 0.4867, 1.2792],
        [8.6964, 0.4871, 1.3061],
        [8.8381, 0.4975, 1.2775]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.4954e-01, 9.5537e-01, 9.9063e-01],
         [7.0989e+00, 5.7911e-01, 2.4703e+00],
         [1.9649e+00, 2.3062e-02, 5.1953e-01],
         ...,
         [3.0191e+01, 9.6571e-02, 1.2695e+00],
         [5.4662e+00, 1.0698e-01, 5.9555e-01],
         [1.1105e+00, 1.6763e+00, 7.4973e+00]],

        [[1.3504e+00, 8.6773e-01, 6.0835e-01],
         [1.5841e+00, 3.7544e-01, 2.2235e-01],
         [1.3171e+00, 2.1649e+00, 1.9124e-01],
         ...,
         [2.1921e+00, 4.4202e-01, 1.3777e+00],
         [2.8317e-01, 8.0588e-01, 7.0472e-01],
         [1.2793e+01, 1.5516e+00, 1.9657e+00]],

        [[1.1121e+00, 9.2430e-01, 2.0890e+00],
         [2.6988e+00, 8.7423e-01, 1.3378e+00],
         [5.7873e-01, 4.6599e+00, 1.8


Train Diffusion:  29%|██▉       | 1442/5001 [1:10:30<2:33:35,  2.59s/it][A
Train Diffusion:  29%|██▉       | 1443/5001 [1:10:33<2:33:10,  2.58s/it][A
Train Diffusion:  29%|██▉       | 1444/5001 [1:10:35<2:33:52,  2.60s/it][A
Train Diffusion:  29%|██▉       | 1445/5001 [1:10:38<2:35:00,  2.62s/it][A
Train Diffusion:  29%|██▉       | 1446/5001 [1:10:40<2:34:16,  2.60s/it][A
Train Diffusion:  29%|██▉       | 1447/5001 [1:10:43<2:33:24,  2.59s/it][A
Train Diffusion:  29%|██▉       | 1448/5001 [1:10:46<2:32:51,  2.58s/it][A
Train Diffusion:  29%|██▉       | 1449/5001 [1:10:48<2:33:02,  2.59s/it][A
Train Diffusion:  29%|██▉       | 1450/5001 [1:10:51<2:32:19,  2.57s/it][A
Train Diffusion:  29%|██▉       | 1451/5001 [1:10:53<2:32:08,  2.57s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 331993340.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8624, 0.5035, 1.2725],
        [8.6732, 0.5055, 1.3311],
        [8.6943, 0.4931, 1.2890]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.5034,  0.8333,  4.3249],
         [ 0.7630,  1.0015,  1.2370],
         [ 0.6539,  0.9615,  0.7724],
         ...,
         [ 1.1928,  0.4219,  1.9461],
         [ 0.2488,  0.7154,  0.7686],
         [ 1.4743,  1.5172,  2.5325]],

        [[ 0.4916,  0.9529,  0.9924],
         [12.9181,  0.5001,  1.2277],
         [ 2.0187,  0.4962,  1.0024],
         ...,
         [24.6866,  0.1826,  0.8291],
         [ 5.0638,  0.1150,  0.4637],
         [ 5.9080,  1.8639,  6.7538]],

        [[ 0.9131,  0.9574,  0.7837],
         [ 0.8822,  0.5799,  1.7546],
         [25.4148,  0.2816,  1.1853],
         ...,
         [ 0.2109,  0.5828,  0.9990],
         [37.6577,  0.0790,  1.3855],
         [ 6.3743,  0.2162,  0.8319


Train Diffusion:  29%|██▉       | 1452/5001 [1:10:56<2:31:28,  2.56s/it][A
Train Diffusion:  29%|██▉       | 1453/5001 [1:10:58<2:31:33,  2.56s/it][A
Train Diffusion:  29%|██▉       | 1454/5001 [1:11:01<2:31:15,  2.56s/it][A
Train Diffusion:  29%|██▉       | 1455/5001 [1:11:04<2:32:03,  2.57s/it][A
Train Diffusion:  29%|██▉       | 1456/5001 [1:11:06<2:31:49,  2.57s/it][A
Train Diffusion:  29%|██▉       | 1457/5001 [1:11:09<2:31:22,  2.56s/it][A
Train Diffusion:  29%|██▉       | 1458/5001 [1:11:11<2:31:14,  2.56s/it][A
Train Diffusion:  29%|██▉       | 1459/5001 [1:11:14<2:31:10,  2.56s/it][A
Train Diffusion:  29%|██▉       | 1460/5001 [1:11:16<2:30:56,  2.56s/it][A
Train Diffusion:  29%|██▉       | 1461/5001 [1:11:19<2:31:35,  2.57s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 325068880.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6624, 0.5137, 1.3353],
        [8.6560, 0.4966, 1.2945],
        [8.8354, 0.4956, 1.3173]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.3375,  0.8691,  4.7237],
         [ 0.9337,  0.9147,  1.5568],
         [ 0.7639,  0.8620,  0.8220],
         ...,
         [ 0.7544,  0.4949,  0.8260],
         [22.8971,  0.7947,  3.4524],
         [ 0.3891,  3.6054,  2.5890]],

        [[ 1.1247,  0.9192,  0.9136],
         [ 1.1715,  0.4675,  1.7426],
         [24.9643,  0.3019,  1.1959],
         ...,
         [ 3.4374,  0.2105,  0.8023],
         [ 0.6955,  1.3672,  0.0538],
         [ 1.0217,  2.5363,  1.3481]],

        [[ 0.4490,  0.9534,  0.8664],
         [10.4082,  0.7817,  1.5657],
         [ 1.3828,  0.5355,  0.8407],
         ...,
         [35.5274,  0.4191,  0.2582],
         [ 2.2557,  2.9146,  0.2811],
         [ 0.3492,  2.5005,  4.0139


Train Diffusion:  29%|██▉       | 1462/5001 [1:11:22<2:31:42,  2.57s/it][A
Train Diffusion:  29%|██▉       | 1463/5001 [1:11:24<2:31:41,  2.57s/it][A
Train Diffusion:  29%|██▉       | 1464/5001 [1:11:27<2:31:20,  2.57s/it][A
Train Diffusion:  29%|██▉       | 1465/5001 [1:11:29<2:30:54,  2.56s/it][A
Train Diffusion:  29%|██▉       | 1466/5001 [1:11:32<2:30:50,  2.56s/it][A
Train Diffusion:  29%|██▉       | 1467/5001 [1:11:34<2:30:48,  2.56s/it][A
Train Diffusion:  29%|██▉       | 1468/5001 [1:11:37<2:30:21,  2.55s/it][A
Train Diffusion:  29%|██▉       | 1469/5001 [1:11:39<2:30:26,  2.56s/it][A
Train Diffusion:  29%|██▉       | 1470/5001 [1:11:42<2:30:37,  2.56s/it][A
Train Diffusion:  29%|██▉       | 1471/5001 [1:11:45<2:30:48,  2.56s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 336462720.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7486, 0.4917, 1.3160],
        [8.8261, 0.4859, 1.3038],
        [8.5966, 0.4909, 1.2990]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.5261e+00, 8.2755e-01, 4.4355e+00],
         [8.0834e-01, 9.2434e-01, 1.2146e+00],
         [6.3053e-01, 8.9788e-01, 8.3741e-01],
         ...,
         [4.2458e+00, 1.1125e-01, 1.1073e+00],
         [3.6231e-05, 4.0320e-01, 2.1937e+00],
         [2.1678e+01, 1.4857e+00, 1.9126e+00]],

        [[8.7042e-01, 9.6106e-01, 7.8562e-01],
         [8.1199e-01, 6.2139e-01, 1.8523e+00],
         [2.4970e+01, 2.7123e-01, 1.2055e+00],
         ...,
         [2.4258e+01, 6.0486e-01, 3.1432e-01],
         [1.6614e+00, 2.9729e+00, 3.0569e-01],
         [9.8090e-01, 1.4050e+00, 8.0405e-01]],

        [[5.0759e-01, 9.5267e-01, 9.4016e-01],
         [1.2784e+01, 5.6378e-01, 1.1727e+00],
         [1.9466e+00, 4.6691e-01, 8.0


Train Diffusion:  29%|██▉       | 1472/5001 [1:11:47<2:30:24,  2.56s/it][A
Train Diffusion:  29%|██▉       | 1473/5001 [1:11:50<2:30:41,  2.56s/it][A
Train Diffusion:  29%|██▉       | 1474/5001 [1:11:52<2:30:42,  2.56s/it][A
Train Diffusion:  29%|██▉       | 1475/5001 [1:11:55<2:30:24,  2.56s/it][A
Train Diffusion:  30%|██▉       | 1476/5001 [1:11:57<2:30:16,  2.56s/it][A
Train Diffusion:  30%|██▉       | 1477/5001 [1:12:00<2:30:27,  2.56s/it][A
Train Diffusion:  30%|██▉       | 1478/5001 [1:12:02<2:30:54,  2.57s/it][A
Train Diffusion:  30%|██▉       | 1479/5001 [1:12:05<2:31:06,  2.57s/it][A
Train Diffusion:  30%|██▉       | 1480/5001 [1:12:08<2:31:58,  2.59s/it][A
Train Diffusion:  30%|██▉       | 1481/5001 [1:12:10<2:31:20,  2.58s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 330666614.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8583, 0.4975, 1.3248],
        [8.7643, 0.5195, 1.2670],
        [8.7009, 0.4968, 1.2694]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.6070,  0.8140,  3.9981],
         [ 1.0885,  0.6714,  1.0105],
         [ 0.6422,  0.7260,  1.3533],
         ...,
         [30.0115,  0.3752,  0.6100],
         [ 4.7236,  0.1104,  0.5493],
         [ 8.8047,  2.1536,  6.4496]],

        [[ 0.6121,  0.9534,  0.9147],
         [12.8023,  0.5960,  0.9867],
         [ 2.1434,  0.4111,  0.7171],
         ...,
         [ 0.3418,  0.8075,  2.3593],
         [ 0.1806,  0.7442,  6.3814],
         [ 0.0345,  2.3401,  3.3122]],

        [[ 0.6848,  0.9565,  0.8628],
         [ 0.6285,  0.7254,  1.9761],
         [27.3623,  0.2361,  1.2011],
         ...,
         [ 3.6643,  0.1574,  1.2734],
         [31.5325,  0.0562,  1.3913],
         [ 6.1109,  0.1594,  2.3901


Train Diffusion:  30%|██▉       | 1482/5001 [1:12:13<2:30:52,  2.57s/it][A
Train Diffusion:  30%|██▉       | 1483/5001 [1:12:15<2:30:41,  2.57s/it][A
Train Diffusion:  30%|██▉       | 1484/5001 [1:12:18<2:30:26,  2.57s/it][A
Train Diffusion:  30%|██▉       | 1485/5001 [1:12:21<2:30:33,  2.57s/it][A
Train Diffusion:  30%|██▉       | 1486/5001 [1:12:23<2:30:11,  2.56s/it][A
Train Diffusion:  30%|██▉       | 1487/5001 [1:12:26<2:30:01,  2.56s/it][A
Train Diffusion:  30%|██▉       | 1488/5001 [1:12:28<2:29:54,  2.56s/it][A
Train Diffusion:  30%|██▉       | 1489/5001 [1:12:31<2:31:33,  2.59s/it][A
Train Diffusion:  30%|██▉       | 1490/5001 [1:12:33<2:31:10,  2.58s/it][A
Train Diffusion:  30%|██▉       | 1491/5001 [1:12:36<2:30:51,  2.58s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 323525196.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8318, 0.4807, 1.3022],
        [8.7748, 0.5041, 1.3236],
        [8.6475, 0.4951, 1.2491]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.3751e+00, 8.5874e-01, 4.3504e+00],
         [1.6776e+00, 1.0254e+00, 1.1754e+00],
         [1.7090e+00, 4.3593e+00, 4.8996e-01],
         ...,
         [5.2494e-01, 6.1332e-01, 7.7763e-01],
         [9.9217e+00, 5.7790e-01, 1.0482e+00],
         [1.3336e+00, 1.1202e+00, 1.0626e+01]],

        [[1.0794e+00, 9.2824e-01, 1.3990e+00],
         [3.8613e+00, 7.4929e-01, 1.2734e+00],
         [1.0899e+00, 4.6497e+00, 1.9607e+00],
         ...,
         [3.1244e+01, 1.0456e-01, 1.2369e+00],
         [5.3382e+00, 1.4812e-01, 7.2911e-01],
         [1.8553e-02, 1.2744e+00, 2.9106e+00]],

        [[4.5425e-01, 9.5377e-01, 1.0800e+00],
         [5.0843e-01, 6.5486e-01, 2.3345e-01],
         [1.2390e+00, 2.1701e+00, 2.0


Train Diffusion:  30%|██▉       | 1492/5001 [1:12:39<2:33:33,  2.63s/it][A
Train Diffusion:  30%|██▉       | 1493/5001 [1:12:41<2:32:18,  2.60s/it][A
Train Diffusion:  30%|██▉       | 1494/5001 [1:12:44<2:31:49,  2.60s/it][A
Train Diffusion:  30%|██▉       | 1495/5001 [1:12:46<2:31:56,  2.60s/it][A
Train Diffusion:  30%|██▉       | 1496/5001 [1:12:49<2:33:42,  2.63s/it][A
Train Diffusion:  30%|██▉       | 1497/5001 [1:12:52<2:32:16,  2.61s/it][A
Train Diffusion:  30%|██▉       | 1498/5001 [1:12:54<2:31:25,  2.59s/it][A
Train Diffusion:  30%|██▉       | 1499/5001 [1:12:57<2:31:21,  2.59s/it][A
Train Diffusion:  30%|██▉       | 1500/5001 [1:12:59<2:30:36,  2.58s/it][A
Train Diffusion:  30%|███       | 1501/5001 [1:13:02<2:31:03,  2.59s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 324578396.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8885, 0.4883, 1.2885],
        [8.5546, 0.4994, 1.2902],
        [8.9065, 0.4792, 1.2861]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.5278,  0.8292,  0.5628],
         [ 1.8080,  0.3462,  0.2897],
         [16.4621,  1.3461,  8.7983],
         ...,
         [34.1583,  0.1553,  1.1609],
         [ 4.6668,  0.1949,  0.8010],
         [ 0.5241,  1.8370,  0.7785]],

        [[ 0.5039,  0.9532,  0.9407],
         [ 2.9874,  0.6836,  5.8676],
         [ 0.6138,  3.6827,  3.4955],
         ...,
         [ 3.4066,  0.2668,  0.9050],
         [ 0.5212,  0.5115,  0.9505],
         [16.7949,  1.6305,  1.9022]],

        [[ 0.8756,  0.9613,  1.2653],
         [ 4.3368,  0.6587,  1.5647],
         [ 0.8568,  2.3864,  1.0814],
         ...,
         [ 0.6239,  0.5335,  0.9869],
         [36.2430,  0.1281,  1.3428],
         [ 5.2761,  0.4351,  4.8469


Train Diffusion:  30%|███       | 1502/5001 [1:13:05<2:30:47,  2.59s/it][A
Train Diffusion:  30%|███       | 1503/5001 [1:13:07<2:30:21,  2.58s/it][A
Train Diffusion:  30%|███       | 1504/5001 [1:13:10<2:29:48,  2.57s/it][A
Train Diffusion:  30%|███       | 1505/5001 [1:13:12<2:29:25,  2.56s/it][A
Train Diffusion:  30%|███       | 1506/5001 [1:13:15<2:29:23,  2.56s/it][A
Train Diffusion:  30%|███       | 1507/5001 [1:13:17<2:29:44,  2.57s/it][A
Train Diffusion:  30%|███       | 1508/5001 [1:13:20<2:29:27,  2.57s/it][A
Train Diffusion:  30%|███       | 1509/5001 [1:13:23<2:29:44,  2.57s/it][A
Train Diffusion:  30%|███       | 1510/5001 [1:13:25<2:29:25,  2.57s/it][A
Train Diffusion:  30%|███       | 1511/5001 [1:13:28<2:29:20,  2.57s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 328563094.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6562, 0.5190, 1.2804],
        [8.7537, 0.4925, 1.3130],
        [8.7495, 0.5184, 1.2896]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.1940,  0.9059,  2.7101],
         [ 1.3726,  0.9625,  1.3852],
         [ 0.7378,  1.0371,  0.5914],
         ...,
         [28.5372,  0.1538,  1.1192],
         [ 5.7017,  0.0698,  0.6983],
         [16.5356,  1.2817,  1.7265]],

        [[ 1.2750,  0.8905,  0.7137],
         [ 1.4769,  0.3951,  1.3822],
         [19.8205,  0.3459,  1.1986],
         ...,
         [ 1.9009,  3.8570,  1.5818],
         [ 8.3181,  0.5398,  1.5317],
         [ 1.6199,  2.2065,  1.6986]],

        [[ 0.4435,  0.9542,  1.0089],
         [10.0446,  0.5297,  1.8199],
         [ 1.7642,  0.4551,  0.6738],
         ...,
         [ 0.2523,  0.6664,  0.6611],
         [16.2519,  0.3046,  1.1266],
         [ 3.9045,  0.3230,  8.9069


Train Diffusion:  30%|███       | 1512/5001 [1:13:30<2:28:56,  2.56s/it][A
Train Diffusion:  30%|███       | 1513/5001 [1:13:33<2:29:06,  2.56s/it][A
Train Diffusion:  30%|███       | 1514/5001 [1:13:35<2:29:08,  2.57s/it][A
Train Diffusion:  30%|███       | 1515/5001 [1:13:38<2:29:02,  2.57s/it][A
Train Diffusion:  30%|███       | 1516/5001 [1:13:41<2:29:02,  2.57s/it][A
Train Diffusion:  30%|███       | 1517/5001 [1:13:43<2:28:53,  2.56s/it][A
Train Diffusion:  30%|███       | 1518/5001 [1:13:46<2:28:40,  2.56s/it][A
Train Diffusion:  30%|███       | 1519/5001 [1:13:48<2:28:48,  2.56s/it][A
Train Diffusion:  30%|███       | 1520/5001 [1:13:51<2:28:26,  2.56s/it][A
Train Diffusion:  30%|███       | 1521/5001 [1:13:53<2:28:38,  2.56s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 316259337.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8470, 0.4727, 1.2756],
        [8.7718, 0.4722, 1.2649],
        [8.7662, 0.4967, 1.2673]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.9806,  0.9481,  1.6293],
         [ 7.1487,  0.5496,  1.3316],
         [ 1.8466,  0.7045,  0.8157],
         ...,
         [ 0.1151,  9.5797,  0.8987],
         [22.9507,  0.8147,  0.1445],
         [ 5.0870,  0.1478,  8.4887]],

        [[ 0.4728,  0.9543,  0.9504],
         [ 2.8563,  0.8095,  1.0025],
         [ 0.7063,  0.9648,  1.6839],
         ...,
         [ 4.2801,  0.1247,  0.6353],
         [ 0.3352,  1.3492,  1.5955],
         [ 0.3151,  2.2866,  0.8769]],

        [[ 1.4558,  0.8427,  0.7191],
         [ 1.7409,  0.3382,  1.3663],
         [25.8961,  0.2711,  1.1647],
         ...,
         [24.6312,  0.0696,  1.3901],
         [ 5.7178,  0.0754,  0.8150],
         [ 0.5780,  1.6956,  2.3195


Train Diffusion:  30%|███       | 1522/5001 [1:13:56<2:28:35,  2.56s/it][A
Train Diffusion:  30%|███       | 1523/5001 [1:13:58<2:28:32,  2.56s/it][A
Train Diffusion:  30%|███       | 1524/5001 [1:14:01<2:28:45,  2.57s/it][A
Train Diffusion:  30%|███       | 1525/5001 [1:14:04<2:29:08,  2.57s/it][A
Train Diffusion:  31%|███       | 1526/5001 [1:14:06<2:29:01,  2.57s/it][A
Train Diffusion:  31%|███       | 1527/5001 [1:14:09<2:28:39,  2.57s/it][A
Train Diffusion:  31%|███       | 1528/5001 [1:14:11<2:28:33,  2.57s/it][A
Train Diffusion:  31%|███       | 1529/5001 [1:14:14<2:28:14,  2.56s/it][A
Train Diffusion:  31%|███       | 1530/5001 [1:14:16<2:27:59,  2.56s/it][A
Train Diffusion:  31%|███       | 1531/5001 [1:14:19<2:28:10,  2.56s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 333165580.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8086, 0.4794, 1.3005],
        [8.6655, 0.5011, 1.2693],
        [8.8331, 0.4999, 1.2849]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.5964,  0.9522,  0.9308],
         [ 0.5667,  0.8245,  1.7485],
         [28.3880,  0.0833,  1.2046],
         ...,
         [ 1.7108,  0.9248,  1.3464],
         [ 0.5937,  0.4145,  0.7940],
         [27.3792,  1.1170,  2.0225]],

        [[ 0.7056,  0.9573,  1.0239],
         [12.3026,  0.4956,  1.1786],
         [ 2.2169,  0.4514,  0.8712],
         ...,
         [29.3932,  0.9132,  1.2187],
         [ 2.7329,  0.7790,  0.8740],
         [ 3.7412,  0.6034,  1.1999]],

        [[ 1.6010,  0.8060,  2.5895],
         [ 1.3764,  0.5228,  0.7864],
         [ 0.7385,  0.6837,  0.7847],
         ...,
         [ 0.6051,  0.4692,  0.9746],
         [ 7.9478,  0.4316,  1.3619],
         [ 1.4792,  1.4180,  0.8238


Train Diffusion:  31%|███       | 1532/5001 [1:14:22<2:28:16,  2.56s/it][A
Train Diffusion:  31%|███       | 1533/5001 [1:14:24<2:28:09,  2.56s/it][A
Train Diffusion:  31%|███       | 1534/5001 [1:14:27<2:28:12,  2.56s/it][A
Train Diffusion:  31%|███       | 1535/5001 [1:14:29<2:28:01,  2.56s/it][A
Train Diffusion:  31%|███       | 1536/5001 [1:14:32<2:28:03,  2.56s/it][A
Train Diffusion:  31%|███       | 1537/5001 [1:14:34<2:29:20,  2.59s/it][A
Train Diffusion:  31%|███       | 1538/5001 [1:14:37<2:28:50,  2.58s/it][A
Train Diffusion:  31%|███       | 1539/5001 [1:14:40<2:28:50,  2.58s/it][A
Train Diffusion:  31%|███       | 1540/5001 [1:14:42<2:30:47,  2.61s/it][A
Train Diffusion:  31%|███       | 1541/5001 [1:14:45<2:30:37,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 318875713.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7215, 0.4980, 1.3211],
        [8.7805, 0.5115, 1.2799],
        [8.8072, 0.4915, 1.2935]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.7186e-01, 9.5131e-01, 9.3797e-01],
         [5.6146e-01, 8.0823e-01, 1.6989e+00],
         [2.6510e+01, 1.8511e-01, 1.1977e+00],
         ...,
         [3.4851e-01, 6.4039e+00, 9.8841e-01],
         [2.8536e-01, 8.7296e-01, 5.2373e-01],
         [1.8082e+01, 2.0948e+00, 1.8595e+00]],

        [[1.5891e+00, 8.0652e-01, 1.3205e+00],
         [1.5901e+00, 4.1054e-01, 7.3890e-01],
         [7.0787e-01, 7.7459e-01, 1.3060e+00],
         ...,
         [4.1019e-01, 1.6113e+00, 1.5489e+01],
         [5.1665e-01, 2.4833e-02, 6.8248e-01],
         [6.1627e+00, 2.4962e-01, 7.5199e-01]],

        [[7.4143e-01, 9.5963e-01, 1.1378e+00],
         [1.3194e+01, 3.9779e-01, 1.2597e+00],
         [2.5904e+00, 3.4985e-01, 7.1


Train Diffusion:  31%|███       | 1542/5001 [1:14:47<2:29:48,  2.60s/it][A
Train Diffusion:  31%|███       | 1543/5001 [1:14:50<2:29:31,  2.59s/it][A
Train Diffusion:  31%|███       | 1544/5001 [1:14:53<2:29:05,  2.59s/it][A
Train Diffusion:  31%|███       | 1545/5001 [1:14:55<2:28:20,  2.58s/it][A
Train Diffusion:  31%|███       | 1546/5001 [1:14:58<2:28:12,  2.57s/it][A
Train Diffusion:  31%|███       | 1547/5001 [1:15:00<2:28:33,  2.58s/it][A
Train Diffusion:  31%|███       | 1548/5001 [1:15:03<2:28:52,  2.59s/it][A
Train Diffusion:  31%|███       | 1549/5001 [1:15:05<2:28:39,  2.58s/it][A
Train Diffusion:  31%|███       | 1550/5001 [1:15:08<2:28:18,  2.58s/it][A
Train Diffusion:  31%|███       | 1551/5001 [1:15:11<2:28:02,  2.57s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 331855497.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[9.0535, 0.4774, 1.3204],
        [8.6583, 0.4962, 1.2962],
        [8.6090, 0.4982, 1.3366]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.4504,  0.9562,  1.0053],
         [12.2474,  0.5225,  1.1716],
         [ 1.9559,  0.4755,  0.9040],
         ...,
         [ 0.4969,  0.6048,  0.9951],
         [37.4236,  0.0961,  1.3008],
         [ 5.8588,  0.3854,  1.1073]],

        [[ 1.1005,  0.9273,  0.8861],
         [ 1.1246,  0.4988,  1.9238],
         [27.2526,  0.0971,  1.2015],
         ...,
         [32.3113,  0.1090,  1.2325],
         [ 5.3046,  0.1313,  0.6855],
         [ 0.6140,  1.8384,  0.8284]],

        [[ 1.3620,  0.8660,  4.0066],
         [ 0.7758,  1.0484,  1.3902],
         [ 0.6463,  0.8724,  0.5439],
         ...,
         [ 3.8096,  0.1925,  0.8521],
         [ 0.5357,  0.5657,  0.8100],
         [24.7320,  1.3036,  2.0479


Train Diffusion:  31%|███       | 1552/5001 [1:15:13<2:27:32,  2.57s/it][A
Train Diffusion:  31%|███       | 1553/5001 [1:15:16<2:27:21,  2.56s/it][A
Train Diffusion:  31%|███       | 1554/5001 [1:15:18<2:27:45,  2.57s/it][A
Train Diffusion:  31%|███       | 1555/5001 [1:15:21<2:27:47,  2.57s/it][A
Train Diffusion:  31%|███       | 1556/5001 [1:15:23<2:27:51,  2.58s/it][A
Train Diffusion:  31%|███       | 1557/5001 [1:15:26<2:27:32,  2.57s/it][A
Train Diffusion:  31%|███       | 1558/5001 [1:15:29<2:27:27,  2.57s/it][A
Train Diffusion:  31%|███       | 1559/5001 [1:15:31<2:27:23,  2.57s/it][A
Train Diffusion:  31%|███       | 1560/5001 [1:15:34<2:27:05,  2.56s/it][A
Train Diffusion:  31%|███       | 1561/5001 [1:15:36<2:27:01,  2.56s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 335482483.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.5847, 0.4939, 1.3317],
        [8.9262, 0.5323, 1.3089],
        [8.5779, 0.5135, 1.3041]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.6069,  0.9525,  0.9250],
         [ 0.5674,  0.7469,  1.8071],
         [28.2372,  0.2112,  1.1880],
         ...,
         [ 3.8514,  0.1612,  0.7607],
         [ 0.5322,  0.6631,  1.0533],
         [ 0.1334,  1.2309,  2.9162]],

        [[ 1.6046,  0.8114,  2.7309],
         [ 1.1842,  0.5688,  0.7945],
         [ 0.8256,  0.6607,  0.8482],
         ...,
         [ 0.3829,  1.4439,  0.7647],
         [23.5506,  0.2845,  1.1093],
         [ 4.1041,  0.4755,  8.6291]],

        [[ 0.6924,  0.9563,  1.0250],
         [11.5600,  0.5070,  1.1790],
         [ 2.0424,  0.5332,  1.2114],
         ...,
         [32.3045,  0.0850,  1.2466],
         [ 5.5808,  0.0583,  0.6994],
         [ 1.8653,  0.6905,  0.9885


Train Diffusion:  31%|███       | 1562/5001 [1:15:39<2:27:04,  2.57s/it][A
Train Diffusion:  31%|███▏      | 1563/5001 [1:15:41<2:27:15,  2.57s/it][A
Train Diffusion:  31%|███▏      | 1564/5001 [1:15:44<2:27:16,  2.57s/it][A
Train Diffusion:  31%|███▏      | 1565/5001 [1:15:47<2:26:52,  2.56s/it][A
Train Diffusion:  31%|███▏      | 1566/5001 [1:15:49<2:27:02,  2.57s/it][A
Train Diffusion:  31%|███▏      | 1567/5001 [1:15:52<2:26:42,  2.56s/it][A
Train Diffusion:  31%|███▏      | 1568/5001 [1:15:54<2:26:25,  2.56s/it][A
Train Diffusion:  31%|███▏      | 1569/5001 [1:15:57<2:26:34,  2.56s/it][A
Train Diffusion:  31%|███▏      | 1570/5001 [1:15:59<2:26:23,  2.56s/it][A
Train Diffusion:  31%|███▏      | 1571/5001 [1:16:02<2:26:38,  2.57s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 324604851.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7675, 0.5011, 1.2833],
        [8.7739, 0.4693, 1.3019],
        [8.7985, 0.4872, 1.2813]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.5678,  0.8218,  5.5242],
         [ 0.9637,  0.3874,  1.0982],
         [ 0.8682,  0.8007,  0.8048],
         ...,
         [ 0.5789,  0.9019,  2.7513],
         [ 0.2000,  0.5797,  0.5677],
         [ 0.3508,  2.1166,  1.3214]],

        [[ 0.5397,  0.9530,  0.6840],
         [10.0936,  1.1851,  2.1689],
         [ 1.0430,  0.7531,  1.2002],
         ...,
         [ 0.3274,  0.9158,  5.8040],
         [20.4851,  0.0461,  1.4651],
         [ 6.4141,  0.2072,  1.3096]],

        [[ 0.7969,  0.9630,  0.8015],
         [ 0.7094,  0.6581,  1.4603],
         [25.1145,  0.2859,  1.1773],
         ...,
         [20.3948,  0.2423,  1.1762],
         [ 4.0994,  0.2109,  0.7968],
         [21.2034,  1.3850,  1.9887


Train Diffusion:  31%|███▏      | 1572/5001 [1:16:05<2:26:38,  2.57s/it][A
Train Diffusion:  31%|███▏      | 1573/5001 [1:16:07<2:26:31,  2.56s/it][A
Train Diffusion:  31%|███▏      | 1574/5001 [1:16:10<2:26:42,  2.57s/it][A
Train Diffusion:  31%|███▏      | 1575/5001 [1:16:12<2:26:35,  2.57s/it][A
Train Diffusion:  32%|███▏      | 1576/5001 [1:16:15<2:26:24,  2.56s/it][A
Train Diffusion:  32%|███▏      | 1577/5001 [1:16:17<2:26:09,  2.56s/it][A
Train Diffusion:  32%|███▏      | 1578/5001 [1:16:20<2:26:14,  2.56s/it][A
Train Diffusion:  32%|███▏      | 1579/5001 [1:16:22<2:26:17,  2.56s/it][A
Train Diffusion:  32%|███▏      | 1580/5001 [1:16:25<2:26:10,  2.56s/it][A
Train Diffusion:  32%|███▏      | 1581/5001 [1:16:28<2:26:03,  2.56s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 333668192.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7461, 0.5053, 1.2901],
        [8.7913, 0.5150, 1.3417],
        [8.7279, 0.4962, 1.3122]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.5336,  0.8280,  5.5641],
         [ 0.7939,  0.7557,  1.3661],
         [ 0.6841,  0.8073,  0.9598],
         ...,
         [33.4198,  0.1429,  1.0854],
         [ 5.0895,  0.0418,  3.1717],
         [ 2.9025,  0.5110,  1.0168]],

        [[ 0.5073,  0.9520,  0.7821],
         [11.0902,  0.8911,  1.2290],
         [ 1.4441,  0.5566,  0.8588],
         ...,
         [ 2.1713,  0.2946,  1.8679],
         [ 0.2428,  0.6801,  0.7663],
         [ 0.6498,  1.3603,  2.5249]],

        [[ 0.8661,  0.9609,  0.7873],
         [ 0.8053,  0.6055,  1.7740],
         [26.6467,  0.2710,  1.1950],
         ...,
         [ 0.9236,  0.4307,  1.2147],
         [14.0275,  0.4098,  1.2099],
         [ 2.0592,  1.1016, 10.5755


Train Diffusion:  32%|███▏      | 1582/5001 [1:16:30<2:25:51,  2.56s/it][A
Train Diffusion:  32%|███▏      | 1583/5001 [1:16:33<2:26:22,  2.57s/it][A
Train Diffusion:  32%|███▏      | 1584/5001 [1:16:35<2:27:31,  2.59s/it][A
Train Diffusion:  32%|███▏      | 1585/5001 [1:16:38<2:26:47,  2.58s/it][A
Train Diffusion:  32%|███▏      | 1586/5001 [1:16:40<2:26:37,  2.58s/it][A
Train Diffusion:  32%|███▏      | 1587/5001 [1:16:43<2:26:46,  2.58s/it][A
Train Diffusion:  32%|███▏      | 1588/5001 [1:16:46<2:29:34,  2.63s/it][A
Train Diffusion:  32%|███▏      | 1589/5001 [1:16:48<2:28:22,  2.61s/it][A
Train Diffusion:  32%|███▏      | 1590/5001 [1:16:51<2:27:28,  2.59s/it][A
Train Diffusion:  32%|███▏      | 1591/5001 [1:16:54<2:27:07,  2.59s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 332967084.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8319, 0.4835, 1.3192],
        [8.6183, 0.4896, 1.2875],
        [8.7815, 0.5036, 1.2808]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.9054,  0.9566,  1.3561],
         [ 8.5088,  0.4719,  1.3101],
         [ 2.0763,  0.6111,  0.7004],
         ...,
         [ 2.6795,  0.1599,  0.7108],
         [ 0.5753,  0.6057,  0.4097],
         [ 8.1134,  2.3888,  1.2175]],

        [[ 0.4941,  0.9509,  0.9436],
         [ 1.9867,  0.7935,  1.0047],
         [ 0.7345,  0.8754,  1.1731],
         ...,
         [27.5515,  0.1182,  1.2646],
         [ 5.4140,  0.1068,  0.8228],
         [ 6.7695,  1.4586,  1.6664]],

        [[ 1.5070,  0.8309,  0.7078],
         [ 1.8045,  0.3323,  1.1756],
         [21.4522,  0.3120,  1.1855],
         ...,
         [ 0.2583,  8.8850,  0.6763],
         [32.4012,  0.0737,  1.4316],
         [ 6.4580,  0.2365,  0.7302


Train Diffusion:  32%|███▏      | 1592/5001 [1:16:56<2:26:30,  2.58s/it][A
Train Diffusion:  32%|███▏      | 1593/5001 [1:16:59<2:25:50,  2.57s/it][A
Train Diffusion:  32%|███▏      | 1594/5001 [1:17:01<2:25:45,  2.57s/it][A
Train Diffusion:  32%|███▏      | 1595/5001 [1:17:04<2:25:59,  2.57s/it][A
Train Diffusion:  32%|███▏      | 1596/5001 [1:17:06<2:25:49,  2.57s/it][A
Train Diffusion:  32%|███▏      | 1597/5001 [1:17:09<2:25:26,  2.56s/it][A
Train Diffusion:  32%|███▏      | 1598/5001 [1:17:11<2:25:26,  2.56s/it][A
Train Diffusion:  32%|███▏      | 1599/5001 [1:17:14<2:25:54,  2.57s/it][A
Train Diffusion:  32%|███▏      | 1600/5001 [1:17:17<2:26:00,  2.58s/it][A
Train Diffusion:  32%|███▏      | 1601/5001 [1:17:19<2:26:16,  2.58s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 333500662.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7380, 0.4835, 1.2761],
        [8.7655, 0.4862, 1.3257],
        [8.8084, 0.4843, 1.2822]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[7.4704e-01, 9.6034e-01, 1.1142e+00],
         [1.2064e+01, 4.4385e-01, 1.2471e+00],
         [2.3374e+00, 4.8963e-01, 1.0947e+00],
         ...,
         [4.9634e-02, 9.7435e-01, 4.2601e+00],
         [1.5387e-01, 1.0948e-01, 5.6391e+00],
         [1.9538e+00, 9.2691e-01, 1.2811e+00]],

        [[1.5869e+00, 8.0832e-01, 1.6539e+00],
         [1.5256e+00, 4.2575e-01, 7.6430e-01],
         [7.1622e-01, 7.7007e-01, 9.5493e-01],
         ...,
         [3.2440e-01, 6.7043e-01, 2.7186e+00],
         [3.3475e-05, 8.1441e-01, 3.0710e+00],
         [1.4110e-01, 1.6251e+00, 8.8463e+00]],

        [[5.6831e-01, 9.5161e-01, 9.4528e-01],
         [5.6715e-01, 8.2312e-01, 1.6150e+00],
         [2.7645e+01, 2.1975e-01, 1.1


Train Diffusion:  32%|███▏      | 1602/5001 [1:17:22<2:26:08,  2.58s/it][A
Train Diffusion:  32%|███▏      | 1603/5001 [1:17:24<2:26:19,  2.58s/it][A
Train Diffusion:  32%|███▏      | 1604/5001 [1:17:27<2:25:43,  2.57s/it][A
Train Diffusion:  32%|███▏      | 1605/5001 [1:17:29<2:25:22,  2.57s/it][A
Train Diffusion:  32%|███▏      | 1606/5001 [1:17:32<2:25:56,  2.58s/it][A
Train Diffusion:  32%|███▏      | 1607/5001 [1:17:35<2:25:31,  2.57s/it][A
Train Diffusion:  32%|███▏      | 1608/5001 [1:17:37<2:25:06,  2.57s/it][A
Train Diffusion:  32%|███▏      | 1609/5001 [1:17:40<2:25:16,  2.57s/it][A
Train Diffusion:  32%|███▏      | 1610/5001 [1:17:42<2:25:15,  2.57s/it][A
Train Diffusion:  32%|███▏      | 1611/5001 [1:17:45<2:25:19,  2.57s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 341215523.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7901, 0.4823, 1.3163],
        [8.7330, 0.4952, 1.2933],
        [8.7039, 0.5074, 1.2848]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.7974,  0.9623,  0.8002],
         [ 0.7476,  0.6470,  1.8593],
         [27.5924,  0.1543,  1.1957],
         ...,
         [ 0.4629,  0.7326,  1.0052],
         [28.5838,  0.2135,  1.1494],
         [ 4.6910,  0.6812,  0.6978]],

        [[ 0.5385,  0.9524,  0.9453],
         [12.4915,  0.5482,  1.1337],
         [ 1.9526,  0.4959,  0.9611],
         ...,
         [31.5094,  0.1420,  1.1948],
         [ 4.8676,  0.1054,  4.2404],
         [ 1.3593,  1.2040,  1.3427]],

        [[ 1.5699,  0.8256,  4.3286],
         [ 0.8323,  0.8662,  1.0940],
         [ 0.6886,  0.8514,  0.7488],
         ...,
         [ 4.0231,  0.1488,  0.7763],
         [ 0.7328,  0.4641,  0.7629],
         [25.6853,  1.3654,  1.8886


Train Diffusion:  32%|███▏      | 1612/5001 [1:17:47<2:25:01,  2.57s/it][A
Train Diffusion:  32%|███▏      | 1613/5001 [1:17:50<2:24:44,  2.56s/it][A
Train Diffusion:  32%|███▏      | 1614/5001 [1:17:53<2:24:49,  2.57s/it][A
Train Diffusion:  32%|███▏      | 1615/5001 [1:17:55<2:24:43,  2.56s/it][A
Train Diffusion:  32%|███▏      | 1616/5001 [1:17:58<2:24:45,  2.57s/it][A
Train Diffusion:  32%|███▏      | 1617/5001 [1:18:00<2:24:24,  2.56s/it][A
Train Diffusion:  32%|███▏      | 1618/5001 [1:18:03<2:24:28,  2.56s/it][A
Train Diffusion:  32%|███▏      | 1619/5001 [1:18:05<2:24:51,  2.57s/it][A
Train Diffusion:  32%|███▏      | 1620/5001 [1:18:08<2:24:32,  2.57s/it][A
Train Diffusion:  32%|███▏      | 1621/5001 [1:18:11<2:24:23,  2.56s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 324154723.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8436, 0.4794, 1.3263],
        [8.6471, 0.4965, 1.3215],
        [8.7028, 0.4803, 1.3035]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.6503,  0.9549,  0.8905],
         [ 0.5957,  0.7706,  1.8522],
         [28.3901,  0.0835,  1.2068],
         ...,
         [ 2.6965,  0.1578,  4.2825],
         [ 0.6441,  0.7894,  2.3292],
         [ 0.3410,  1.5456,  6.7227]],

        [[ 0.6449,  0.9547,  0.9139],
         [12.1527,  0.6090,  1.0267],
         [ 2.0354,  0.4458,  0.8653],
         ...,
         [27.3021,  0.2719,  0.9730],
         [ 3.7346,  0.0583,  0.6107],
         [ 4.0920,  0.3685,  0.8818]],

        [[ 1.6062,  0.8083,  3.7030],
         [ 1.2180,  0.5596,  0.8835],
         [ 0.7248,  0.6978,  0.7444],
         ...,
         [ 0.4862,  1.3291,  1.4762],
         [ 0.3437,  0.5295,  0.9526],
         [ 8.3714,  1.4541,  2.2932


Train Diffusion:  32%|███▏      | 1622/5001 [1:18:13<2:24:24,  2.56s/it][A
Train Diffusion:  32%|███▏      | 1623/5001 [1:18:16<2:24:23,  2.56s/it][A
Train Diffusion:  32%|███▏      | 1624/5001 [1:18:18<2:24:17,  2.56s/it][A
Train Diffusion:  32%|███▏      | 1625/5001 [1:18:21<2:24:08,  2.56s/it][A
Train Diffusion:  33%|███▎      | 1626/5001 [1:18:23<2:24:19,  2.57s/it][A
Train Diffusion:  33%|███▎      | 1627/5001 [1:18:26<2:24:10,  2.56s/it][A
Train Diffusion:  33%|███▎      | 1628/5001 [1:18:28<2:24:11,  2.56s/it][A
Train Diffusion:  33%|███▎      | 1629/5001 [1:18:31<2:24:03,  2.56s/it][A
Train Diffusion:  33%|███▎      | 1630/5001 [1:18:34<2:24:27,  2.57s/it][A
Train Diffusion:  33%|███▎      | 1631/5001 [1:18:36<2:25:18,  2.59s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 320106508.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6591, 0.4898, 1.2826],
        [8.8187, 0.5104, 1.2901],
        [8.7375, 0.5140, 1.3131]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.5999e-01, 9.5123e-01, 8.7059e-01],
         [1.1674e+01, 6.6524e-01, 8.2943e-01],
         [1.8406e+00, 4.6671e-01, 1.0146e+00],
         ...,
         [2.1716e+01, 3.9971e-01, 1.5291e-01],
         [1.8986e+00, 1.2870e+00, 4.6222e-01],
         [5.2291e-01, 4.2006e+00, 1.1587e+00]],

        [[7.5877e-01, 9.6090e-01, 8.1495e-01],
         [7.0558e-01, 6.6494e-01, 1.9197e+00],
         [2.8621e+01, 8.7432e-02, 1.2022e+00],
         ...,
         [4.1242e+00, 9.9305e-02, 1.0496e+00],
         [3.6633e+01, 5.5670e-01, 1.9271e+00],
         [7.8394e-01, 4.1495e+00, 1.5427e+00]],

        [[1.5858e+00, 8.2249e-01, 4.8507e+00],
         [8.7786e-01, 7.2022e-01, 1.1073e+00],
         [7.2755e-01, 6.7428e-01, 7.5


Train Diffusion:  33%|███▎      | 1632/5001 [1:18:39<2:25:24,  2.59s/it][A
Train Diffusion:  33%|███▎      | 1633/5001 [1:18:41<2:24:45,  2.58s/it][A
Train Diffusion:  33%|███▎      | 1634/5001 [1:18:44<2:24:35,  2.58s/it][A
Train Diffusion:  33%|███▎      | 1635/5001 [1:18:47<2:25:40,  2.60s/it][A
Train Diffusion:  33%|███▎      | 1636/5001 [1:18:49<2:27:25,  2.63s/it][A
Train Diffusion:  33%|███▎      | 1637/5001 [1:18:52<2:26:12,  2.61s/it][A
Train Diffusion:  33%|███▎      | 1638/5001 [1:18:54<2:25:27,  2.60s/it][A
Train Diffusion:  33%|███▎      | 1639/5001 [1:18:57<2:25:03,  2.59s/it][A
Train Diffusion:  33%|███▎      | 1640/5001 [1:19:00<2:24:29,  2.58s/it][A
Train Diffusion:  33%|███▎      | 1641/5001 [1:19:02<2:24:18,  2.58s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 329878844.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7956, 0.4975, 1.3149],
        [8.7415, 0.4835, 1.3170],
        [8.7156, 0.4900, 1.2606]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.5778,  0.9528,  0.9386],
         [ 0.5731,  0.8396,  1.6877],
         [27.6155,  0.1640,  1.1999],
         ...,
         [ 3.1417,  0.3663,  1.0311],
         [ 0.4682,  0.4128,  3.3095],
         [ 4.5595,  2.1583,  2.4123]],

        [[ 1.5937,  0.8066,  2.0009],
         [ 1.4944,  0.4677,  0.7791],
         [ 0.6990,  0.7215,  1.3847],
         ...,
         [ 0.5901,  0.5174,  1.1327],
         [30.7963,  0.1175,  1.1917],
         [ 5.3349,  0.5735,  0.8577]],

        [[ 0.7323,  0.9603,  1.0860],
         [12.8371,  0.4417,  1.2229],
         [ 2.4696,  0.4107,  0.6631],
         ...,
         [32.4479,  0.2495,  1.1198],
         [ 3.9148,  0.3793,  0.9492],
         [ 0.3831,  1.4366,  6.3532


Train Diffusion:  33%|███▎      | 1642/5001 [1:19:05<2:24:20,  2.58s/it][A
Train Diffusion:  33%|███▎      | 1643/5001 [1:19:07<2:24:28,  2.58s/it][A
Train Diffusion:  33%|███▎      | 1644/5001 [1:19:10<2:24:20,  2.58s/it][A
Train Diffusion:  33%|███▎      | 1645/5001 [1:19:12<2:23:51,  2.57s/it][A
Train Diffusion:  33%|███▎      | 1646/5001 [1:19:15<2:24:02,  2.58s/it][A
Train Diffusion:  33%|███▎      | 1647/5001 [1:19:18<2:23:47,  2.57s/it][A
Train Diffusion:  33%|███▎      | 1648/5001 [1:19:20<2:23:54,  2.58s/it][A
Train Diffusion:  33%|███▎      | 1649/5001 [1:19:23<2:23:59,  2.58s/it][A
Train Diffusion:  33%|███▎      | 1650/5001 [1:19:25<2:24:02,  2.58s/it][A
Train Diffusion:  33%|███▎      | 1651/5001 [1:19:28<2:23:46,  2.58s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 326681081.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7949, 0.4905, 1.2882],
        [8.5653, 0.5023, 1.3277],
        [8.7973, 0.4731, 1.2752]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.6894,  0.9563,  1.0193],
         [12.4971,  0.4885,  1.1785],
         [ 2.2466,  0.4631,  0.8309],
         ...,
         [ 4.2311,  0.5430,  0.1203],
         [ 4.8189,  0.0958,  0.6650],
         [ 0.6133,  1.8950,  0.8745]],

        [[ 1.6028,  0.8091,  2.6954],
         [ 1.2499,  0.5815,  0.7993],
         [ 0.7281,  0.6895,  0.8362],
         ...,
         [ 0.6057,  1.0259,  2.3573],
         [ 0.1694,  4.2928,  0.7879],
         [21.8665,  1.3723,  2.0437]],

        [[ 0.6096,  0.9528,  0.9233],
         [ 0.5679,  0.7649,  1.8217],
         [28.2661,  0.0852,  1.2064],
         ...,
         [ 0.1662,  0.6126,  0.6165],
         [32.6836,  0.2565,  0.7583],
         [ 5.5038,  0.3305,  1.0001


Train Diffusion:  33%|███▎      | 1652/5001 [1:19:30<2:23:35,  2.57s/it][A
Train Diffusion:  33%|███▎      | 1653/5001 [1:19:33<2:23:38,  2.57s/it][A
Train Diffusion:  33%|███▎      | 1654/5001 [1:19:36<2:23:11,  2.57s/it][A
Train Diffusion:  33%|███▎      | 1655/5001 [1:19:38<2:23:07,  2.57s/it][A
Train Diffusion:  33%|███▎      | 1656/5001 [1:19:41<2:22:59,  2.56s/it][A
Train Diffusion:  33%|███▎      | 1657/5001 [1:19:43<2:22:50,  2.56s/it][A
Train Diffusion:  33%|███▎      | 1658/5001 [1:19:46<2:22:55,  2.57s/it][A
Train Diffusion:  33%|███▎      | 1659/5001 [1:19:48<2:23:01,  2.57s/it][A
Train Diffusion:  33%|███▎      | 1660/5001 [1:19:51<2:23:35,  2.58s/it][A
Train Diffusion:  33%|███▎      | 1661/5001 [1:19:54<2:23:31,  2.58s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 330898816.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8164, 0.4972, 1.2996],
        [8.6730, 0.5111, 1.3308],
        [8.6764, 0.4976, 1.3047]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.4524e-01, 9.5454e-01, 8.8819e-01],
         [1.0130e+01, 7.4450e-01, 1.3410e+00],
         [1.4404e+00, 5.6075e-01, 8.7591e-01],
         ...,
         [4.7422e-02, 1.0489e+00, 4.0607e+00],
         [1.4022e-01, 1.4752e+00, 1.5130e+01],
         [1.1408e-01, 2.9272e+00, 3.0645e+00]],

        [[1.1723e+00, 9.0988e-01, 8.9147e-01],
         [1.2776e+00, 4.3583e-01, 1.8193e+00],
         [2.6667e+01, 2.7482e-01, 1.1882e+00],
         ...,
         [1.7951e+00, 5.3850e-01, 1.5887e+00],
         [2.7560e+00, 7.8405e-01, 1.1112e+00],
         [1.5759e+00, 1.7137e+00, 2.4795e+00]],

        [[1.2938e+00, 8.8446e-01, 4.3676e+00],
         [1.0629e+00, 9.3583e-01, 1.5904e+00],
         [7.5969e-01, 8.3880e-01, 9.3


Train Diffusion:  33%|███▎      | 1662/5001 [1:19:56<2:23:07,  2.57s/it][A
Train Diffusion:  33%|███▎      | 1663/5001 [1:19:59<2:22:56,  2.57s/it][A
Train Diffusion:  33%|███▎      | 1664/5001 [1:20:01<2:22:50,  2.57s/it][A
Train Diffusion:  33%|███▎      | 1665/5001 [1:20:04<2:22:45,  2.57s/it][A
Train Diffusion:  33%|███▎      | 1666/5001 [1:20:07<2:23:32,  2.58s/it][A
Train Diffusion:  33%|███▎      | 1667/5001 [1:20:09<2:23:19,  2.58s/it][A
Train Diffusion:  33%|███▎      | 1668/5001 [1:20:12<2:23:07,  2.58s/it][A
Train Diffusion:  33%|███▎      | 1669/5001 [1:20:14<2:22:58,  2.57s/it][A
Train Diffusion:  33%|███▎      | 1670/5001 [1:20:17<2:22:40,  2.57s/it][A
Train Diffusion:  33%|███▎      | 1671/5001 [1:20:19<2:22:41,  2.57s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 332007385.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6223, 0.5006, 1.2740],
        [8.9190, 0.4808, 1.2855],
        [8.6417, 0.5175, 1.3034]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.0455e+00, 9.3569e-01, 8.3877e-01],
         [1.0324e+00, 5.1512e-01, 1.8741e+00],
         [2.6608e+01, 2.6984e-01, 1.1993e+00],
         ...,
         [2.7601e+01, 1.6774e-01, 1.1709e+00],
         [4.6603e+00, 1.9545e-01, 7.7780e-01],
         [6.4587e-01, 1.8980e+00, 3.7899e+00]],

        [[4.5962e-01, 9.5282e-01, 8.8047e-01],
         [1.1187e+01, 7.2673e-01, 1.0459e+00],
         [1.6067e+00, 4.6300e-01, 7.9939e-01],
         ...,
         [2.4666e-01, 5.2342e-01, 1.0438e+00],
         [3.6528e+01, 7.6286e-02, 1.3806e+00],
         [6.0613e+00, 1.7486e-01, 5.1095e+00]],

        [[1.4046e+00, 8.5110e-01, 4.9812e+00],
         [7.9583e-01, 9.1171e-01, 1.5100e+00],
         [6.8240e-01, 7.8759e-01, 1.0


Train Diffusion:  33%|███▎      | 1672/5001 [1:20:22<2:22:22,  2.57s/it][A
Train Diffusion:  33%|███▎      | 1673/5001 [1:20:24<2:22:16,  2.57s/it][A
Train Diffusion:  33%|███▎      | 1674/5001 [1:20:27<2:22:24,  2.57s/it][A
Train Diffusion:  33%|███▎      | 1675/5001 [1:20:30<2:22:22,  2.57s/it][A
Train Diffusion:  34%|███▎      | 1676/5001 [1:20:32<2:22:14,  2.57s/it][A
Train Diffusion:  34%|███▎      | 1677/5001 [1:20:35<2:22:00,  2.56s/it][A
Train Diffusion:  34%|███▎      | 1678/5001 [1:20:37<2:22:06,  2.57s/it][A
Train Diffusion:  34%|███▎      | 1679/5001 [1:20:40<2:25:12,  2.62s/it][A
Train Diffusion:  34%|███▎      | 1680/5001 [1:20:43<2:24:45,  2.62s/it][A
Train Diffusion:  34%|███▎      | 1681/5001 [1:20:45<2:23:49,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 324650097.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7674, 0.5134, 1.3321],
        [8.6715, 0.4937, 1.2926],
        [8.7992, 0.4773, 1.2795]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[7.3125e-01, 9.5773e-01, 1.1138e+00],
         [1.3153e+01, 4.1611e-01, 1.2431e+00],
         [2.5033e+00, 3.5530e-01, 7.3702e-01],
         ...,
         [2.9321e+01, 1.5966e-01, 1.1984e+00],
         [4.6082e+00, 8.0304e-02, 7.4367e+00],
         [1.5554e+00, 1.8866e+00, 1.2217e+00]],

        [[5.7818e-01, 9.5032e-01, 9.4252e-01],
         [5.5578e-01, 7.9014e-01, 1.7472e+00],
         [2.5619e+01, 1.6811e-01, 1.2149e+00],
         ...,
         [3.2403e+00, 3.3775e-01, 1.1921e+00],
         [3.3971e-01, 1.7363e+00, 6.4382e+00],
         [2.8688e+00, 3.2385e+00, 1.9208e+00]],

        [[1.5919e+00, 8.0708e-01, 1.6611e+00],
         [1.4684e+00, 4.5558e-01, 7.5820e-01],
         [7.0516e-01, 8.1591e-01, 1.0


Train Diffusion:  34%|███▎      | 1682/5001 [1:20:48<2:23:06,  2.59s/it][A
Train Diffusion:  34%|███▎      | 1683/5001 [1:20:51<2:27:04,  2.66s/it][A
Train Diffusion:  34%|███▎      | 1684/5001 [1:20:53<2:25:25,  2.63s/it][A
Train Diffusion:  34%|███▎      | 1685/5001 [1:20:56<2:24:15,  2.61s/it][A
Train Diffusion:  34%|███▎      | 1686/5001 [1:20:58<2:23:46,  2.60s/it][A
Train Diffusion:  34%|███▎      | 1687/5001 [1:21:01<2:23:01,  2.59s/it][A
Train Diffusion:  34%|███▍      | 1688/5001 [1:21:03<2:23:05,  2.59s/it][A
Train Diffusion:  34%|███▍      | 1689/5001 [1:21:06<2:22:38,  2.58s/it][A
Train Diffusion:  34%|███▍      | 1690/5001 [1:21:09<2:22:05,  2.58s/it][A
Train Diffusion:  34%|███▍      | 1691/5001 [1:21:11<2:21:56,  2.57s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 323029814.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.5914, 0.4777, 1.3087],
        [8.7542, 0.4929, 1.3066],
        [8.8997, 0.4924, 1.3125]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.7410e-01, 9.5196e-01, 9.3964e-01],
         [5.7489e-01, 8.3976e-01, 1.6414e+00],
         [2.7834e+01, 1.9571e-01, 1.1919e+00],
         ...,
         [6.4696e-01, 7.0583e-01, 5.6922e-01],
         [2.6115e+01, 1.8404e-01, 2.3970e+00],
         [5.2525e+00, 5.3669e-01, 1.3781e+00]],

        [[7.3901e-01, 9.6007e-01, 1.0941e+00],
         [1.2407e+01, 4.4723e-01, 1.2278e+00],
         [2.3813e+00, 4.6226e-01, 1.1671e+00],
         ...,
         [2.8797e+00, 1.4823e-01, 7.2863e-01],
         [1.7114e-02, 5.9575e-01, 1.2067e+00],
         [1.9291e+01, 1.7807e+00, 1.8938e+00]],

        [[1.5896e+00, 8.0697e-01, 1.8770e+00],
         [1.5163e+00, 4.4569e-01, 7.7010e-01],
         [7.1208e-01, 7.2688e-01, 9.0


Train Diffusion:  34%|███▍      | 1692/5001 [1:21:14<2:21:38,  2.57s/it][A
Train Diffusion:  34%|███▍      | 1693/5001 [1:21:16<2:21:22,  2.56s/it][A
Train Diffusion:  34%|███▍      | 1694/5001 [1:21:19<2:21:23,  2.57s/it][A
Train Diffusion:  34%|███▍      | 1695/5001 [1:21:21<2:21:48,  2.57s/it][A
Train Diffusion:  34%|███▍      | 1696/5001 [1:21:24<2:21:41,  2.57s/it][A
Train Diffusion:  34%|███▍      | 1697/5001 [1:21:27<2:21:20,  2.57s/it][A
Train Diffusion:  34%|███▍      | 1698/5001 [1:21:29<2:21:05,  2.56s/it][A
Train Diffusion:  34%|███▍      | 1699/5001 [1:21:32<2:21:44,  2.58s/it][A
Train Diffusion:  34%|███▍      | 1700/5001 [1:21:34<2:21:30,  2.57s/it][A
Train Diffusion:  34%|███▍      | 1701/5001 [1:21:37<2:21:20,  2.57s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 338656224.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7646, 0.5193, 1.2935],
        [8.6794, 0.4947, 1.3120],
        [8.8679, 0.4820, 1.2823]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.5915,  0.9514,  0.9325],
         [ 0.5688,  0.8324,  1.7231],
         [27.9876,  0.1382,  1.2015],
         ...,
         [23.9530,  0.1486,  0.9427],
         [ 5.3496,  0.0864,  0.6917],
         [ 0.6235,  1.9198,  0.7414]],

        [[ 0.7134,  0.9573,  1.0447],
         [12.6874,  0.4681,  1.1965],
         [ 2.3565,  0.4254,  0.6804],
         ...,
         [ 3.9813,  0.1373,  0.6845],
         [ 0.3217,  3.1055,  0.8266],
         [22.6560,  1.3524,  2.0479]],

        [[ 1.5975,  0.8054,  2.3804],
         [ 1.4202,  0.5180,  0.7836],
         [ 0.7098,  0.7045,  1.3273],
         ...,
         [ 0.0997, 13.5687,  1.0691],
         [27.3664,  0.3092,  1.0702],
         [ 5.4143,  0.3478,  1.2207


Train Diffusion:  34%|███▍      | 1702/5001 [1:21:39<2:20:57,  2.56s/it][A
Train Diffusion:  34%|███▍      | 1703/5001 [1:21:42<2:21:05,  2.57s/it][A
Train Diffusion:  34%|███▍      | 1704/5001 [1:21:45<2:20:46,  2.56s/it][A
Train Diffusion:  34%|███▍      | 1705/5001 [1:21:47<2:20:43,  2.56s/it][A
Train Diffusion:  34%|███▍      | 1706/5001 [1:21:50<2:21:26,  2.58s/it][A
Train Diffusion:  34%|███▍      | 1707/5001 [1:21:52<2:21:30,  2.58s/it][A
Train Diffusion:  34%|███▍      | 1708/5001 [1:21:55<2:21:26,  2.58s/it][A
Train Diffusion:  34%|███▍      | 1709/5001 [1:21:57<2:21:22,  2.58s/it][A
Train Diffusion:  34%|███▍      | 1710/5001 [1:22:00<2:21:05,  2.57s/it][A
Train Diffusion:  34%|███▍      | 1711/5001 [1:22:03<2:21:07,  2.57s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 321985337.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8782, 0.4786, 1.2949],
        [8.6721, 0.5282, 1.2975],
        [8.6544, 0.4942, 1.3098]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.3885,  0.8566,  5.3617],
         [ 1.1032,  0.5577,  1.2816],
         [ 1.2963,  0.6915,  0.8135],
         ...,
         [ 4.1135,  0.1598,  0.7225],
         [ 0.6624,  0.4300,  0.9781],
         [21.1716,  1.5106,  2.0009]],

        [[ 1.0673,  0.9321,  0.8552],
         [ 1.0669,  0.4881,  1.5882],
         [22.2630,  0.3135,  1.1958],
         ...,
         [ 0.3128,  9.7437,  0.6301],
         [28.2621,  0.0951,  1.3134],
         [ 5.4016,  0.5678,  1.1717]],

        [[ 0.4551,  0.9543,  0.7849],
         [ 8.1570,  1.0470,  1.2967],
         [ 0.8982,  0.6692,  0.8162],
         ...,
         [32.5467,  0.3229,  0.7797],
         [ 3.0268,  1.0466,  0.5016],
         [ 0.5680,  1.6121,  2.8767


Train Diffusion:  34%|███▍      | 1712/5001 [1:22:05<2:21:10,  2.58s/it][A
Train Diffusion:  34%|███▍      | 1713/5001 [1:22:08<2:20:45,  2.57s/it][A
Train Diffusion:  34%|███▍      | 1714/5001 [1:22:10<2:20:40,  2.57s/it][A
Train Diffusion:  34%|███▍      | 1715/5001 [1:22:13<2:20:31,  2.57s/it][A
Train Diffusion:  34%|███▍      | 1716/5001 [1:22:15<2:20:22,  2.56s/it][A
Train Diffusion:  34%|███▍      | 1717/5001 [1:22:18<2:20:15,  2.56s/it][A
Train Diffusion:  34%|███▍      | 1718/5001 [1:22:21<2:20:14,  2.56s/it][A
Train Diffusion:  34%|███▍      | 1719/5001 [1:22:23<2:20:18,  2.56s/it][A
Train Diffusion:  34%|███▍      | 1720/5001 [1:22:26<2:20:07,  2.56s/it][A
Train Diffusion:  34%|███▍      | 1721/5001 [1:22:28<2:20:18,  2.57s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 336993120.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.5557, 0.5074, 1.2985],
        [8.8412, 0.4953, 1.3003],
        [8.5705, 0.4984, 1.2731]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.5768,  0.9495,  0.9501],
         [ 0.5433,  0.7467,  0.3260],
         [14.1782,  1.0999,  2.4789],
         ...,
         [ 0.9728,  0.3841,  1.6246],
         [19.2769,  0.0568,  1.3609],
         [ 5.0830,  0.4148,  0.7507]],

        [[ 1.5901,  0.8116,  2.0177],
         [ 1.2679,  0.5391,  0.7189],
         [ 0.0548,  1.1147,  4.0191],
         ...,
         [ 1.7960,  0.0529,  1.2341],
         [ 5.7187,  0.0681,  0.6889],
         [25.9953,  1.2512,  1.9057]],

        [[ 0.7349,  0.9571,  1.0879],
         [12.5986,  0.3959,  1.2296],
         [ 2.4619,  0.5192,  1.0429],
         ...,
         [ 0.0630,  5.9998,  1.5804],
         [ 5.6754,  0.6097,  1.7649],
         [ 1.3552,  1.7679,  1.3404


Train Diffusion:  34%|███▍      | 1722/5001 [1:22:31<2:20:08,  2.56s/it][A
Train Diffusion:  34%|███▍      | 1723/5001 [1:22:33<2:20:22,  2.57s/it][A
Train Diffusion:  34%|███▍      | 1724/5001 [1:22:36<2:20:32,  2.57s/it][A
Train Diffusion:  34%|███▍      | 1725/5001 [1:22:39<2:20:45,  2.58s/it][A
Train Diffusion:  35%|███▍      | 1726/5001 [1:22:41<2:20:49,  2.58s/it][A
Train Diffusion:  35%|███▍      | 1727/5001 [1:22:44<2:23:39,  2.63s/it][A
Train Diffusion:  35%|███▍      | 1728/5001 [1:22:46<2:22:18,  2.61s/it][A
Train Diffusion:  35%|███▍      | 1729/5001 [1:22:49<2:21:14,  2.59s/it][A
Train Diffusion:  35%|███▍      | 1730/5001 [1:22:52<2:21:42,  2.60s/it][A
Train Diffusion:  35%|███▍      | 1731/5001 [1:22:54<2:23:28,  2.63s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 316768489.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7614, 0.4752, 1.2531],
        [8.9004, 0.4904, 1.2919],
        [8.8436, 0.4853, 1.3103]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.0479,  0.9366,  0.8430],
         [ 1.0661,  0.4930,  1.8002],
         [26.7432,  0.2743,  1.1886],
         ...,
         [14.6704,  0.0315,  1.4230],
         [ 5.3915,  0.0529,  1.0616],
         [ 0.0715,  3.4229,  0.7552]],

        [[ 0.4581,  0.9546,  0.8897],
         [10.0960,  0.7196,  1.1902],
         [ 1.4567,  0.5560,  0.8544],
         ...,
         [ 3.9593,  0.1110,  0.8809],
         [30.8940,  0.3724,  3.1039],
         [ 3.3086,  2.3508,  1.9696]],

        [[ 1.4044,  0.8528,  5.3525],
         [ 0.9442,  0.9548,  1.4828],
         [ 0.8221,  0.8357,  0.9563],
         ...,
         [ 4.5573,  0.7808,  1.1733],
         [ 0.0720,  1.7240,  0.9268],
         [ 0.3986,  1.1280, 11.0394


Train Diffusion:  35%|███▍      | 1732/5001 [1:22:57<2:22:30,  2.62s/it][A
Train Diffusion:  35%|███▍      | 1733/5001 [1:22:59<2:21:19,  2.59s/it][A
Train Diffusion:  35%|███▍      | 1734/5001 [1:23:02<2:20:55,  2.59s/it][A
Train Diffusion:  35%|███▍      | 1735/5001 [1:23:05<2:20:44,  2.59s/it][A
Train Diffusion:  35%|███▍      | 1736/5001 [1:23:07<2:20:17,  2.58s/it][A
Train Diffusion:  35%|███▍      | 1737/5001 [1:23:10<2:19:54,  2.57s/it][A
Train Diffusion:  35%|███▍      | 1738/5001 [1:23:12<2:19:37,  2.57s/it][A
Train Diffusion:  35%|███▍      | 1739/5001 [1:23:15<2:19:28,  2.57s/it][A
Train Diffusion:  35%|███▍      | 1740/5001 [1:23:17<2:19:36,  2.57s/it][A
Train Diffusion:  35%|███▍      | 1741/5001 [1:23:20<2:19:22,  2.57s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 321488032.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.5547, 0.5115, 1.2721],
        [8.8776, 0.4931, 1.3001],
        [8.7839, 0.4752, 1.3062]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.5358,  0.9522,  0.9166],
         [ 0.5010,  0.8483,  0.8740],
         [13.9862,  0.3663,  0.9313],
         ...,
         [35.7802,  0.0852,  1.1705],
         [ 5.7620,  0.0808,  0.7588],
         [ 0.2757,  4.9851,  1.6262]],

        [[ 0.8049,  0.9621,  1.1904],
         [12.6527,  0.3230,  1.2822],
         [ 2.9988,  0.3668,  0.7909],
         ...,
         [ 2.7983,  0.2867,  0.9116],
         [35.4971,  0.1063,  1.3522],
         [ 5.6644,  0.3137,  1.4032]],

        [[ 1.5641,  0.8122,  1.0216],
         [ 1.7855,  0.3443,  0.8641],
         [ 2.9224,  0.6545,  1.3953],
         ...,
         [ 0.9819,  0.5891,  0.9119],
         [ 0.4192,  0.7249,  0.2361],
         [17.6305,  3.4452,  3.5127


Train Diffusion:  35%|███▍      | 1742/5001 [1:23:23<2:20:04,  2.58s/it][A
Train Diffusion:  35%|███▍      | 1743/5001 [1:23:25<2:20:05,  2.58s/it][A
Train Diffusion:  35%|███▍      | 1744/5001 [1:23:28<2:19:39,  2.57s/it][A
Train Diffusion:  35%|███▍      | 1745/5001 [1:23:30<2:19:32,  2.57s/it][A
Train Diffusion:  35%|███▍      | 1746/5001 [1:23:33<2:19:32,  2.57s/it][A
Train Diffusion:  35%|███▍      | 1747/5001 [1:23:35<2:19:53,  2.58s/it][A
Train Diffusion:  35%|███▍      | 1748/5001 [1:23:38<2:19:37,  2.58s/it][A
Train Diffusion:  35%|███▍      | 1749/5001 [1:23:41<2:19:55,  2.58s/it][A
Train Diffusion:  35%|███▍      | 1750/5001 [1:23:43<2:19:36,  2.58s/it][A
Train Diffusion:  35%|███▌      | 1751/5001 [1:23:46<2:19:36,  2.58s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 332655664.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6186, 0.5128, 1.2649],
        [8.9371, 0.4981, 1.3214],
        [8.8084, 0.5014, 1.2825]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.6279,  0.9539,  0.9082],
         [ 0.5804,  0.7895,  1.8477],
         [28.0265,  0.1313,  1.2029],
         ...,
         [ 0.5323,  1.1943,  1.4376],
         [ 0.3659,  0.5636,  0.6709],
         [ 9.5544,  1.9501,  1.6600]],

        [[ 1.6063,  0.8071,  3.3555],
         [ 1.2734,  0.5599,  0.8535],
         [ 0.7141,  0.7200,  1.1777],
         ...,
         [ 0.2184,  0.5860,  1.9869],
         [28.1059,  0.0567,  1.3413],
         [ 6.3631,  0.2734,  0.7136]],

        [[ 0.6686,  0.9557,  0.9500],
         [12.3567,  0.5569,  1.0893],
         [ 2.1294,  0.4230,  0.7313],
         ...,
         [20.9579,  0.1993,  1.1801],
         [ 4.3578,  0.2038,  0.7175],
         [ 2.7625,  2.1279,  2.8758


Train Diffusion:  35%|███▌      | 1752/5001 [1:23:48<2:19:10,  2.57s/it][A
Train Diffusion:  35%|███▌      | 1753/5001 [1:23:51<2:19:08,  2.57s/it][A
Train Diffusion:  35%|███▌      | 1754/5001 [1:23:53<2:19:04,  2.57s/it][A
Train Diffusion:  35%|███▌      | 1755/5001 [1:23:56<2:19:06,  2.57s/it][A
Train Diffusion:  35%|███▌      | 1756/5001 [1:23:59<2:18:57,  2.57s/it][A
Train Diffusion:  35%|███▌      | 1757/5001 [1:24:01<2:18:36,  2.56s/it][A
Train Diffusion:  35%|███▌      | 1758/5001 [1:24:04<2:18:44,  2.57s/it][A
Train Diffusion:  35%|███▌      | 1759/5001 [1:24:06<2:19:06,  2.57s/it][A
Train Diffusion:  35%|███▌      | 1760/5001 [1:24:09<2:18:46,  2.57s/it][A
Train Diffusion:  35%|███▌      | 1761/5001 [1:24:11<2:18:42,  2.57s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 322752198.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7010, 0.5181, 1.2851],
        [8.8840, 0.4940, 1.3313],
        [8.7065, 0.4768, 1.2909]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.3086e+00, 8.8028e-01, 6.6357e-01],
         [1.5243e+00, 3.8465e-01, 8.7606e-01],
         [1.8174e+00, 5.2248e-01, 1.3298e+00],
         ...,
         [2.6034e+01, 1.0367e-01, 1.2303e+00],
         [5.3225e+00, 8.8783e-02, 2.3436e+00],
         [1.3629e+01, 1.9996e+00, 1.9531e+00]],

        [[4.4463e-01, 9.5417e-01, 1.0061e+00],
         [8.2672e+00, 5.5616e-01, 2.1916e+00],
         [2.1928e+00, 6.0217e-02, 4.6508e-01],
         ...,
         [2.2430e-01, 5.5584e-01, 1.2852e+00],
         [2.1133e-04, 5.1958e-01, 3.0298e+00],
         [1.8093e-01, 1.4956e+00, 7.4421e+00]],

        [[1.1597e+00, 9.1232e-01, 2.4495e+00],
         [2.1558e+00, 9.1946e-01, 1.3139e+00],
         [8.8914e-01, 9.1647e-01, 6.4


Train Diffusion:  35%|███▌      | 1762/5001 [1:24:14<2:18:33,  2.57s/it][A
Train Diffusion:  35%|███▌      | 1763/5001 [1:24:17<2:18:17,  2.56s/it][A
Train Diffusion:  35%|███▌      | 1764/5001 [1:24:19<2:18:27,  2.57s/it][A
Train Diffusion:  35%|███▌      | 1765/5001 [1:24:22<2:18:18,  2.56s/it][A
Train Diffusion:  35%|███▌      | 1766/5001 [1:24:24<2:18:05,  2.56s/it][A
Train Diffusion:  35%|███▌      | 1767/5001 [1:24:27<2:18:27,  2.57s/it][A
Train Diffusion:  35%|███▌      | 1768/5001 [1:24:29<2:18:04,  2.56s/it][A
Train Diffusion:  35%|███▌      | 1769/5001 [1:24:32<2:18:15,  2.57s/it][A
Train Diffusion:  35%|███▌      | 1770/5001 [1:24:34<2:18:04,  2.56s/it][A
Train Diffusion:  35%|███▌      | 1771/5001 [1:24:37<2:18:02,  2.56s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 322049980.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7571, 0.4835, 1.2870],
        [8.8901, 0.4958, 1.2656],
        [8.6311, 0.5014, 1.2968]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.4065e-01, 9.5146e-01, 9.1862e-01],
         [4.5076e-01, 8.6491e-01, 1.3737e+00],
         [2.6738e+01, 2.0517e-01, 1.1960e+00],
         ...,
         [2.3877e+00, 1.4024e-01, 1.0373e+00],
         [2.1475e+00, 2.1403e-01, 5.3172e-01],
         [9.8068e+00, 2.2837e+00, 5.1128e+00]],

        [[7.9530e-01, 9.6152e-01, 1.1736e+00],
         [1.2468e+01, 3.9102e-01, 1.2726e+00],
         [2.6770e+00, 4.1926e-01, 1.1045e+00],
         ...,
         [2.0002e+01, 2.1756e-01, 1.2178e+00],
         [3.7021e+00, 2.5152e-01, 8.7670e-01],
         [1.6691e-02, 1.5573e+00, 2.6398e+00]],

        [[1.5685e+00, 8.1065e-01, 1.1681e+00],
         [1.7575e+00, 3.5067e-01, 7.3649e-01],
         [5.4468e-01, 8.7323e-01, 1.1


Train Diffusion:  35%|███▌      | 1772/5001 [1:24:40<2:17:51,  2.56s/it][A
Train Diffusion:  35%|███▌      | 1773/5001 [1:24:42<2:17:51,  2.56s/it][A
Train Diffusion:  35%|███▌      | 1774/5001 [1:24:45<2:18:09,  2.57s/it][A
Train Diffusion:  35%|███▌      | 1775/5001 [1:24:47<2:20:52,  2.62s/it][A
Train Diffusion:  36%|███▌      | 1776/5001 [1:24:50<2:20:13,  2.61s/it][A
Train Diffusion:  36%|███▌      | 1777/5001 [1:24:53<2:19:24,  2.59s/it][A
Train Diffusion:  36%|███▌      | 1778/5001 [1:24:55<2:21:30,  2.63s/it][A
Train Diffusion:  36%|███▌      | 1779/5001 [1:24:58<2:21:03,  2.63s/it][A
Train Diffusion:  36%|███▌      | 1780/5001 [1:25:01<2:20:07,  2.61s/it][A
Train Diffusion:  36%|███▌      | 1781/5001 [1:25:03<2:19:25,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 332569603.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8888, 0.4856, 1.2962],
        [8.6849, 0.4896, 1.3359],
        [8.6885, 0.4778, 1.2488]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[6.1556e-01, 9.5275e-01, 9.1476e-01],
         [5.7792e-01, 7.3107e-01, 1.8074e+00],
         [2.4500e+01, 2.9395e-01, 1.2071e+00],
         ...,
         [2.9930e+01, 1.1140e-01, 1.2428e+00],
         [5.2169e+00, 1.5011e-01, 8.6151e-01],
         [2.5557e-02, 1.4153e+00, 2.8620e+00]],

        [[6.8192e-01, 9.5546e-01, 1.0016e+00],
         [1.2984e+01, 4.7637e-01, 1.1704e+00],
         [2.2070e+00, 4.3804e-01, 8.0092e-01],
         ...,
         [3.8546e+00, 1.8923e-01, 7.6563e-01],
         [6.2066e-01, 6.5737e-01, 5.9059e-01],
         [5.5969e+00, 2.0995e+00, 6.6254e+00]],

        [[1.6048e+00, 8.1266e-01, 2.9814e+00],
         [1.1208e+00, 6.7194e-01, 8.7330e-01],
         [6.9047e-01, 9.0113e-01, 8.1


Train Diffusion:  36%|███▌      | 1782/5001 [1:25:06<2:18:41,  2.59s/it][A
Train Diffusion:  36%|███▌      | 1783/5001 [1:25:08<2:18:43,  2.59s/it][A
Train Diffusion:  36%|███▌      | 1784/5001 [1:25:11<2:18:12,  2.58s/it][A
Train Diffusion:  36%|███▌      | 1785/5001 [1:25:13<2:18:03,  2.58s/it][A
Train Diffusion:  36%|███▌      | 1786/5001 [1:25:16<2:17:51,  2.57s/it][A
Train Diffusion:  36%|███▌      | 1787/5001 [1:25:18<2:17:37,  2.57s/it][A
Train Diffusion:  36%|███▌      | 1788/5001 [1:25:21<2:18:02,  2.58s/it][A
Train Diffusion:  36%|███▌      | 1789/5001 [1:25:24<2:17:52,  2.58s/it][A
Train Diffusion:  36%|███▌      | 1790/5001 [1:25:26<2:17:47,  2.57s/it][A
Train Diffusion:  36%|███▌      | 1791/5001 [1:25:29<2:17:32,  2.57s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 324979561.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6526, 0.4899, 1.3294],
        [8.5036, 0.5117, 1.3252],
        [8.8921, 0.5112, 1.3179]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.7463,  0.9599,  1.1160],
         [12.9582,  0.4167,  1.2445],
         [ 2.5574,  0.3935,  0.6574],
         ...,
         [15.2430,  0.1286,  1.2819],
         [ 4.5815,  0.0957,  1.1210],
         [21.0689,  1.4115,  2.0340]],

        [[ 0.5690,  0.9513,  0.9356],
         [ 0.5671,  0.8537,  1.6447],
         [27.1293,  0.1881,  1.1966],
         ...,
         [ 1.5237,  2.4339,  1.7802],
         [17.7667,  0.3075,  0.7875],
         [ 3.4167,  1.6161,  1.0786]],

        [[ 1.5904,  0.8058,  1.6567],
         [ 1.5886,  0.4157,  0.7711],
         [ 0.6911,  0.7336,  1.4323],
         ...,
         [ 0.1844,  1.2590,  8.1489],
         [ 0.3288,  1.6463,  1.3723],
         [ 0.5286,  1.3859,  1.5764


Train Diffusion:  36%|███▌      | 1792/5001 [1:25:31<2:17:18,  2.57s/it][A
Train Diffusion:  36%|███▌      | 1793/5001 [1:25:34<2:17:39,  2.57s/it][A
Train Diffusion:  36%|███▌      | 1794/5001 [1:25:37<2:17:22,  2.57s/it][A
Train Diffusion:  36%|███▌      | 1795/5001 [1:25:39<2:17:19,  2.57s/it][A
Train Diffusion:  36%|███▌      | 1796/5001 [1:25:42<2:17:32,  2.57s/it][A
Train Diffusion:  36%|███▌      | 1797/5001 [1:25:44<2:17:25,  2.57s/it][A
Train Diffusion:  36%|███▌      | 1798/5001 [1:25:47<2:17:32,  2.58s/it][A
Train Diffusion:  36%|███▌      | 1799/5001 [1:25:49<2:18:05,  2.59s/it][A
Train Diffusion:  36%|███▌      | 1800/5001 [1:25:52<2:17:58,  2.59s/it][A
Train Diffusion:  36%|███▌      | 1801/5001 [1:25:55<2:19:54,  2.62s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 343249984.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7799, 0.4898, 1.2780],
        [8.7619, 0.5174, 1.3078],
        [8.6927, 0.5217, 1.3226]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.4984,  0.8338,  0.5223],
         [ 1.7948,  0.3447,  0.8038],
         [13.6952,  0.4661,  1.2904],
         ...,
         [ 1.3586,  0.4334,  0.1382],
         [ 4.9824,  0.0833,  0.6545],
         [ 0.5370,  2.0346,  0.8508]],

        [[ 0.4903,  0.9528,  0.9502],
         [ 3.3528,  0.6639,  3.2123],
         [ 1.2170,  0.4462,  0.6646],
         ...,
         [ 0.1652,  0.5924,  0.4913],
         [31.6317,  0.4870,  0.5823],
         [ 5.3476,  0.2778,  1.1065]],

        [[ 0.9190,  0.9565,  1.3190],
         [ 5.9049,  0.6258,  1.3831],
         [ 1.3675,  0.7179,  0.8180],
         ...,
         [ 0.4505,  1.9511,  2.2337],
         [ 0.1843,  4.5433,  1.0552],
         [20.0639,  1.4489,  2.0340


Train Diffusion:  36%|███▌      | 1802/5001 [1:25:57<2:19:57,  2.63s/it][A
Train Diffusion:  36%|███▌      | 1803/5001 [1:26:00<2:19:55,  2.63s/it][A
Train Diffusion:  36%|███▌      | 1804/5001 [1:26:03<2:19:24,  2.62s/it][A
Train Diffusion:  36%|███▌      | 1805/5001 [1:26:05<2:19:38,  2.62s/it][A
Train Diffusion:  36%|███▌      | 1806/5001 [1:26:08<2:19:54,  2.63s/it][A
Train Diffusion:  36%|███▌      | 1807/5001 [1:26:10<2:19:41,  2.62s/it][A
Train Diffusion:  36%|███▌      | 1808/5001 [1:26:13<2:19:49,  2.63s/it][A
Train Diffusion:  36%|███▌      | 1809/5001 [1:26:16<2:19:42,  2.63s/it][A
Train Diffusion:  36%|███▌      | 1810/5001 [1:26:18<2:19:18,  2.62s/it][A
Train Diffusion:  36%|███▌      | 1811/5001 [1:26:21<2:19:29,  2.62s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 333006851.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8189, 0.5012, 1.3102],
        [8.6133, 0.5108, 1.2764],
        [8.7461, 0.4955, 1.3285]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.0309e-01, 9.5303e-01, 9.4003e-01],
         [2.0121e+00, 7.2500e-01, 1.4658e+00],
         [1.5660e+00, 8.2462e-02, 6.0486e-01],
         ...,
         [4.2843e+00, 1.2474e-01, 7.2730e-01],
         [1.9318e+01, 4.9298e+00, 4.8450e+00],
         [4.5787e+00, 2.5576e+00, 2.1825e+00]],

        [[1.5244e+00, 8.2893e-01, 5.9504e-01],
         [1.8222e+00, 3.3938e-01, 1.0912e+00],
         [1.0378e-05, 5.0906e-01, 2.9221e+00],
         ...,
         [5.0262e-01, 4.5171e-01, 2.6575e+00],
         [5.2883e-01, 3.6645e-01, 4.4738e-01],
         [5.0899e-01, 2.3161e+00, 9.7475e-01]],

        [[8.7967e-01, 9.6084e-01, 1.2757e+00],
         [8.3391e+00, 4.8337e-01, 1.3108e+00],
         [1.9575e+00, 5.6920e-01, 1.2


Train Diffusion:  36%|███▌      | 1812/5001 [1:26:24<2:19:07,  2.62s/it][A
Train Diffusion:  36%|███▋      | 1813/5001 [1:26:26<2:19:07,  2.62s/it][A
Train Diffusion:  36%|███▋      | 1814/5001 [1:26:29<2:18:56,  2.62s/it][A
Train Diffusion:  36%|███▋      | 1815/5001 [1:26:31<2:19:13,  2.62s/it][A
Train Diffusion:  36%|███▋      | 1816/5001 [1:26:34<2:19:47,  2.63s/it][A
Train Diffusion:  36%|███▋      | 1817/5001 [1:26:37<2:19:27,  2.63s/it][A
Train Diffusion:  36%|███▋      | 1818/5001 [1:26:39<2:19:12,  2.62s/it][A
Train Diffusion:  36%|███▋      | 1819/5001 [1:26:42<2:19:31,  2.63s/it][A
Train Diffusion:  36%|███▋      | 1820/5001 [1:26:45<2:19:24,  2.63s/it][A
Train Diffusion:  36%|███▋      | 1821/5001 [1:26:47<2:19:41,  2.64s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 321899168.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.5953, 0.4782, 1.3095],
        [8.6974, 0.5186, 1.3256],
        [8.7657, 0.4958, 1.3109]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.4698,  0.9517,  0.8917],
         [10.3982,  0.6959,  1.2142],
         [ 1.4944,  0.5970,  0.9349],
         ...,
         [ 2.5666,  0.2038,  0.6971],
         [ 0.3716,  2.7790,  0.6309],
         [ 0.0545,  2.0705,  1.9841]],

        [[ 1.4421,  0.8424,  5.3635],
         [ 0.8718,  0.9422,  1.3879],
         [ 0.8202,  0.8554,  0.8880],
         ...,
         [30.3231,  0.0621,  1.2945],
         [ 5.8399,  0.0912,  0.5921],
         [12.4917,  1.6852,  2.3390]],

        [[ 0.9969,  0.9428,  0.8090],
         [ 0.9897,  0.5234,  1.7527],
         [27.0176,  0.2214,  1.1874],
         ...,
         [ 0.2112,  1.8849,  0.5827],
         [31.7942,  0.1861,  0.7718],
         [ 6.1354,  0.1884,  1.2664


Train Diffusion:  36%|███▋      | 1822/5001 [1:26:50<2:21:12,  2.67s/it][A
Train Diffusion:  36%|███▋      | 1823/5001 [1:26:53<2:20:57,  2.66s/it][A
Train Diffusion:  36%|███▋      | 1824/5001 [1:26:55<2:20:35,  2.66s/it][A
Train Diffusion:  36%|███▋      | 1825/5001 [1:26:58<2:24:07,  2.72s/it][A
Train Diffusion:  37%|███▋      | 1826/5001 [1:27:01<2:22:16,  2.69s/it][A
Train Diffusion:  37%|███▋      | 1827/5001 [1:27:03<2:20:56,  2.66s/it][A
Train Diffusion:  37%|███▋      | 1828/5001 [1:27:06<2:19:14,  2.63s/it][A
Train Diffusion:  37%|███▋      | 1829/5001 [1:27:09<2:18:28,  2.62s/it][A
Train Diffusion:  37%|███▋      | 1830/5001 [1:27:11<2:17:54,  2.61s/it][A
Train Diffusion:  37%|███▋      | 1831/5001 [1:27:14<2:17:11,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 324596246.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7494, 0.5171, 1.2986],
        [8.6841, 0.4980, 1.3239],
        [8.7803, 0.5073, 1.3311]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[7.0752e-01, 9.5753e-01, 8.5138e-01],
         [7.2341e-01, 5.9159e-01, 2.2886e-01],
         [2.1235e-05, 2.3235e+00, 3.1265e-01],
         ...,
         [7.0348e-01, 4.0867e-01, 9.1102e-01],
         [3.6120e+01, 1.5093e-01, 1.1951e+00],
         [5.1053e+00, 4.8294e-01, 1.1705e+00]],

        [[1.6011e+00, 8.4421e-01, 5.8853e+00],
         [7.9402e-01, 1.1880e+00, 1.3445e+00],
         [8.5738e-01, 1.4263e+01, 7.7750e-01],
         ...,
         [1.1563e+00, 8.9853e-02, 1.5772e+00],
         [4.5494e+00, 1.2055e-01, 8.4115e-01],
         [2.7798e+01, 1.0787e+00, 2.0260e+00]],

        [[5.9533e-01, 9.5218e-01, 9.1677e-01],
         [1.0171e+01, 5.4220e-01, 1.4443e+00],
         [1.6308e+00, 3.8420e-01, 1.3


Train Diffusion:  37%|███▋      | 1832/5001 [1:27:16<2:16:40,  2.59s/it][A
Train Diffusion:  37%|███▋      | 1833/5001 [1:27:19<2:16:21,  2.58s/it][A
Train Diffusion:  37%|███▋      | 1834/5001 [1:27:21<2:16:27,  2.59s/it][A
Train Diffusion:  37%|███▋      | 1835/5001 [1:27:24<2:16:20,  2.58s/it][A
Train Diffusion:  37%|███▋      | 1836/5001 [1:27:27<2:16:00,  2.58s/it][A
Train Diffusion:  37%|███▋      | 1837/5001 [1:27:29<2:15:53,  2.58s/it][A
Train Diffusion:  37%|███▋      | 1838/5001 [1:27:32<2:15:57,  2.58s/it][A
Train Diffusion:  37%|███▋      | 1839/5001 [1:27:34<2:15:53,  2.58s/it][A
Train Diffusion:  37%|███▋      | 1840/5001 [1:27:37<2:15:37,  2.57s/it][A
Train Diffusion:  37%|███▋      | 1841/5001 [1:27:39<2:15:12,  2.57s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 336825491.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.3648, 0.5348, 1.3379],
        [8.8435, 0.4969, 1.3218],
        [8.9493, 0.5144, 1.3164]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.4910,  0.8353,  0.6465],
         [ 1.7991,  0.3306,  0.9969],
         [23.9579,  0.2683,  0.9559],
         ...,
         [ 0.1553,  2.0939,  2.1494],
         [ 0.4697,  2.5786,  0.6864],
         [ 0.3364,  1.5866,  4.0907]],

        [[ 0.9321,  0.9540,  1.4120],
         [ 9.7035,  0.5054,  1.2142],
         [ 2.3279,  0.6087,  0.9901],
         ...,
         [ 0.0702,  0.8873,  4.2771],
         [ 0.2549,  1.2114, 10.8282],
         [ 6.0455,  2.5017,  2.0518]],

        [[ 0.4857,  0.9521,  0.9430],
         [ 1.0508,  0.8204,  0.6316],
         [ 0.1261,  1.8224,  3.4588],
         ...,
         [ 1.2453,  0.3296,  0.8649],
         [21.7596,  0.1265,  1.1209],
         [ 4.3571,  0.9216,  0.9700


Train Diffusion:  37%|███▋      | 1842/5001 [1:27:42<2:15:09,  2.57s/it][A
Train Diffusion:  37%|███▋      | 1843/5001 [1:27:45<2:15:16,  2.57s/it][A
Train Diffusion:  37%|███▋      | 1844/5001 [1:27:47<2:15:05,  2.57s/it][A
Train Diffusion:  37%|███▋      | 1845/5001 [1:27:50<2:15:13,  2.57s/it][A
Train Diffusion:  37%|███▋      | 1846/5001 [1:27:52<2:15:12,  2.57s/it][A
Train Diffusion:  37%|███▋      | 1847/5001 [1:27:55<2:15:01,  2.57s/it][A
Train Diffusion:  37%|███▋      | 1848/5001 [1:27:57<2:15:02,  2.57s/it][A
Train Diffusion:  37%|███▋      | 1849/5001 [1:28:00<2:15:06,  2.57s/it][A
Train Diffusion:  37%|███▋      | 1850/5001 [1:28:03<2:15:10,  2.57s/it][A
Train Diffusion:  37%|███▋      | 1851/5001 [1:28:05<2:15:12,  2.58s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 340812406.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6438, 0.4892, 1.3174],
        [8.7475, 0.5098, 1.3629],
        [8.6156, 0.5440, 1.2772]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.5822,  0.8087,  1.4277],
         [ 1.5609,  0.4135,  0.7364],
         [ 0.6484,  0.8130,  0.9196],
         ...,
         [30.1383,  0.1185,  1.2512],
         [ 5.7881,  0.0686,  0.8126],
         [ 0.5257,  1.5563,  0.7601]],

        [[ 0.7628,  0.9601,  1.1349],
         [12.5532,  0.4055,  1.2635],
         [ 2.5883,  0.4475,  0.9345],
         ...,
         [ 3.5880,  0.1539,  1.1187],
         [ 5.5427,  0.4070,  1.3491],
         [ 2.5707,  3.3921,  0.9580]],

        [[ 0.5591,  0.9503,  0.9478],
         [ 0.5597,  0.8170,  1.5404],
         [25.2331,  0.2723,  1.1770],
         ...,
         [ 0.2843,  4.4858,  1.2321],
         [ 1.4973,  0.5996, 13.1256],
         [ 1.4643,  2.1068,  2.0744


Train Diffusion:  37%|███▋      | 1852/5001 [1:28:08<2:15:35,  2.58s/it][A
Train Diffusion:  37%|███▋      | 1853/5001 [1:28:10<2:15:06,  2.58s/it][A
Train Diffusion:  37%|███▋      | 1854/5001 [1:28:13<2:15:10,  2.58s/it][A
Train Diffusion:  37%|███▋      | 1855/5001 [1:28:15<2:14:48,  2.57s/it][A
Train Diffusion:  37%|███▋      | 1856/5001 [1:28:18<2:14:43,  2.57s/it][A
Train Diffusion:  37%|███▋      | 1857/5001 [1:28:21<2:14:49,  2.57s/it][A
Train Diffusion:  37%|███▋      | 1858/5001 [1:28:23<2:14:49,  2.57s/it][A
Train Diffusion:  37%|███▋      | 1859/5001 [1:28:26<2:14:37,  2.57s/it][A
Train Diffusion:  37%|███▋      | 1860/5001 [1:28:28<2:14:51,  2.58s/it][A
Train Diffusion:  37%|███▋      | 1861/5001 [1:28:31<2:14:36,  2.57s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 315923174.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8869, 0.5005, 1.3240],
        [8.6482, 0.4918, 1.2973],
        [8.7183, 0.4842, 1.3027]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.6998,  0.9571,  0.8525],
         [ 0.6368,  0.7282,  1.9574],
         [27.5666,  0.2036,  1.2038],
         ...,
         [ 0.5465,  1.1248,  0.9005],
         [ 0.7507,  0.4153,  0.6964],
         [ 0.1116,  1.4063,  2.5182]],

        [[ 1.6022,  0.8133,  4.1950],
         [ 1.0985,  0.6703,  1.0455],
         [ 0.6225,  0.7350,  1.2677],
         ...,
         [30.5827,  0.5833,  0.9018],
         [ 3.0897,  0.2342,  0.0704],
         [ 4.5577,  0.2433,  0.9025]],

        [[ 0.6008,  0.9526,  0.8890],
         [12.6662,  0.6323,  0.9575],
         [ 2.0877,  0.4022,  0.7679],
         ...,
         [ 3.5721,  0.1372,  1.0055],
         [11.0027,  0.3917,  1.2479],
         [ 3.8076,  2.5872,  3.0610


Train Diffusion:  37%|███▋      | 1862/5001 [1:28:33<2:14:52,  2.58s/it][A
Train Diffusion:  37%|███▋      | 1863/5001 [1:28:36<2:14:40,  2.58s/it][A
Train Diffusion:  37%|███▋      | 1864/5001 [1:28:39<2:14:20,  2.57s/it][A
Train Diffusion:  37%|███▋      | 1865/5001 [1:28:41<2:14:21,  2.57s/it][A
Train Diffusion:  37%|███▋      | 1866/5001 [1:28:44<2:14:21,  2.57s/it][A
Train Diffusion:  37%|███▋      | 1867/5001 [1:28:46<2:14:22,  2.57s/it][A
Train Diffusion:  37%|███▋      | 1868/5001 [1:28:49<2:14:14,  2.57s/it][A
Train Diffusion:  37%|███▋      | 1869/5001 [1:28:52<2:15:39,  2.60s/it][A
Train Diffusion:  37%|███▋      | 1870/5001 [1:28:54<2:15:15,  2.59s/it][A
Train Diffusion:  37%|███▋      | 1871/5001 [1:28:57<2:14:48,  2.58s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 337595340.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7575, 0.4803, 1.2815],
        [8.6889, 0.4907, 1.2840],
        [8.9876, 0.4885, 1.2938]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.4417,  0.9562,  1.0016],
         [ 7.7517,  0.5716,  0.9250],
         [ 1.7813,  0.4689,  0.8189],
         ...,
         [ 7.6865,  0.4199,  1.5297],
         [ 2.7933,  0.3067,  0.8484],
         [ 0.5921,  1.6319,  3.6377]],

        [[ 1.2390,  0.9032,  0.7742],
         [ 1.4039,  0.4165,  0.3999],
         [18.3542,  0.8724, 10.0186],
         ...,
         [ 0.1335,  1.1527,  4.5222],
         [ 0.1760,  0.5273,  1.0758],
         [ 8.0499,  1.8572,  2.0641]],

        [[ 1.2336,  0.9043,  3.3060],
         [ 1.5142,  1.0462,  1.4094],
         [ 0.2453,  9.1381,  2.9513],
         ...,
         [ 0.6949,  0.4314,  1.3499],
         [30.2655,  0.0874,  1.1338],
         [ 5.8814,  0.4119,  0.7417


Train Diffusion:  37%|███▋      | 1872/5001 [1:28:59<2:15:20,  2.60s/it][A
Train Diffusion:  37%|███▋      | 1873/5001 [1:29:02<2:21:06,  2.71s/it][A
Train Diffusion:  37%|███▋      | 1874/5001 [1:29:05<2:18:49,  2.66s/it][A
Train Diffusion:  37%|███▋      | 1875/5001 [1:29:07<2:17:37,  2.64s/it][A
Train Diffusion:  38%|███▊      | 1876/5001 [1:29:10<2:16:08,  2.61s/it][A
Train Diffusion:  38%|███▊      | 1877/5001 [1:29:13<2:15:24,  2.60s/it][A
Train Diffusion:  38%|███▊      | 1878/5001 [1:29:15<2:15:00,  2.59s/it][A
Train Diffusion:  38%|███▊      | 1879/5001 [1:29:18<2:14:46,  2.59s/it][A
Train Diffusion:  38%|███▊      | 1880/5001 [1:29:20<2:14:09,  2.58s/it][A
Train Diffusion:  38%|███▊      | 1881/5001 [1:29:23<2:14:20,  2.58s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 328232697.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6306, 0.5096, 1.3246],
        [8.8559, 0.4916, 1.3619],
        [8.6589, 0.4863, 1.2927]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.6064,  0.9531,  0.9246],
         [ 0.5694,  0.8138,  1.7893],
         [27.6824,  0.1943,  1.2013],
         ...,
         [34.0061,  0.1605,  1.0651],
         [ 4.5495,  0.0942,  0.5639],
         [ 2.2390,  0.9440,  0.9249]],

        [[ 1.6024,  0.8063,  2.8496],
         [ 1.3450,  0.5400,  0.8058],
         [ 0.7259,  0.7129,  1.3270],
         ...,
         [ 1.8527,  0.2662,  1.1338],
         [15.7155,  0.3767,  1.2044],
         [ 2.8705,  0.8511,  1.1660]],

        [[ 0.6941,  0.9570,  0.9974],
         [12.4170,  0.5109,  1.1483],
         [ 2.2093,  0.4289,  0.7089],
         ...,
         [ 0.8895,  0.7505,  1.4419],
         [ 0.4410,  0.5166,  0.8385],
         [26.9807,  1.0887,  2.0151


Train Diffusion:  38%|███▊      | 1882/5001 [1:29:25<2:14:02,  2.58s/it][A
Train Diffusion:  38%|███▊      | 1883/5001 [1:29:28<2:14:05,  2.58s/it][A
Train Diffusion:  38%|███▊      | 1884/5001 [1:29:31<2:13:42,  2.57s/it][A
Train Diffusion:  38%|███▊      | 1885/5001 [1:29:33<2:13:56,  2.58s/it][A
Train Diffusion:  38%|███▊      | 1886/5001 [1:29:36<2:13:51,  2.58s/it][A
Train Diffusion:  38%|███▊      | 1887/5001 [1:29:38<2:13:27,  2.57s/it][A
Train Diffusion:  38%|███▊      | 1888/5001 [1:29:41<2:13:17,  2.57s/it][A
Train Diffusion:  38%|███▊      | 1889/5001 [1:29:43<2:13:12,  2.57s/it][A
Train Diffusion:  38%|███▊      | 1890/5001 [1:29:46<2:13:17,  2.57s/it][A
Train Diffusion:  38%|███▊      | 1891/5001 [1:29:49<2:13:26,  2.57s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 336288748.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8237, 0.4866, 1.2654],
        [8.8812, 0.4961, 1.2778],
        [8.6905, 0.5292, 1.3096]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.6082e+00, 8.1125e-01, 4.2065e+00],
         [1.1414e+00, 6.1449e-01, 9.8390e-01],
         [6.6718e-01, 7.3869e-01, 1.2383e+00],
         ...,
         [8.3429e+00, 4.6479e-01, 1.4665e+00],
         [1.4534e+00, 4.9781e-01, 5.9543e-01],
         [4.6025e-01, 2.1398e+00, 1.3649e+00]],

        [[6.7754e-01, 9.5685e-01, 8.7017e-01],
         [6.1823e-01, 7.4617e-01, 1.9090e+00],
         [2.7293e+01, 2.0900e-01, 1.2048e+00],
         ...,
         [6.2735e-01, 3.8510e-02, 1.0188e+00],
         [4.2336e+00, 1.1216e-01, 7.3922e-01],
         [2.2786e+01, 1.4883e+00, 1.9478e+00]],

        [[6.1847e-01, 9.5438e-01, 8.7109e-01],
         [1.2192e+01, 6.6207e-01, 9.5542e-01],
         [1.9977e+00, 4.1749e-01, 7.3


Train Diffusion:  38%|███▊      | 1892/5001 [1:29:51<2:13:29,  2.58s/it][A
Train Diffusion:  38%|███▊      | 1893/5001 [1:29:54<2:13:33,  2.58s/it][A
Train Diffusion:  38%|███▊      | 1894/5001 [1:29:56<2:13:18,  2.57s/it][A
Train Diffusion:  38%|███▊      | 1895/5001 [1:29:59<2:13:13,  2.57s/it][A
Train Diffusion:  38%|███▊      | 1896/5001 [1:30:01<2:13:36,  2.58s/it][A
Train Diffusion:  38%|███▊      | 1897/5001 [1:30:04<2:13:38,  2.58s/it][A
Train Diffusion:  38%|███▊      | 1898/5001 [1:30:07<2:13:25,  2.58s/it][A
Train Diffusion:  38%|███▊      | 1899/5001 [1:30:09<2:13:39,  2.59s/it][A
Train Diffusion:  38%|███▊      | 1900/5001 [1:30:12<2:13:16,  2.58s/it][A
Train Diffusion:  38%|███▊      | 1901/5001 [1:30:14<2:12:59,  2.57s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 325666132.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6081, 0.4925, 1.2932],
        [8.8532, 0.4715, 1.3002],
        [8.8673, 0.4745, 1.2915]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.4698,  0.8396,  5.4952],
         [ 0.8104,  0.7827,  1.4172],
         [ 0.7530,  0.7529,  1.0876],
         ...,
         [ 3.6726,  0.1943,  0.8570],
         [ 0.5442,  0.4937,  1.4940],
         [ 0.2629,  2.0031,  2.7480]],

        [[ 0.4776,  0.9532,  0.8155],
         [10.7179,  0.8585,  1.0163],
         [ 1.4291,  0.5018,  0.8384],
         ...,
         [34.8907,  0.1410,  1.1590],
         [ 4.8298,  0.1787,  0.8118],
         [ 0.4445,  1.6092, 11.0932]],

        [[ 0.9616,  0.9509,  0.7942],
         [ 0.9241,  0.5508,  1.8167],
         [27.3446,  0.2544,  1.1922],
         ...,
         [ 0.6381,  0.5644,  0.9884],
         [35.7120,  0.0873,  1.3858],
         [ 5.7911,  0.3660,  1.0216


Train Diffusion:  38%|███▊      | 1902/5001 [1:30:17<2:13:06,  2.58s/it][A
Train Diffusion:  38%|███▊      | 1903/5001 [1:30:19<2:12:52,  2.57s/it][A
Train Diffusion:  38%|███▊      | 1904/5001 [1:30:22<2:12:42,  2.57s/it][A
Train Diffusion:  38%|███▊      | 1905/5001 [1:30:25<2:13:26,  2.59s/it][A
Train Diffusion:  38%|███▊      | 1906/5001 [1:30:27<2:13:05,  2.58s/it][A
Train Diffusion:  38%|███▊      | 1907/5001 [1:30:30<2:13:02,  2.58s/it][A
Train Diffusion:  38%|███▊      | 1908/5001 [1:30:32<2:12:48,  2.58s/it][A
Train Diffusion:  38%|███▊      | 1909/5001 [1:30:35<2:12:42,  2.58s/it][A
Train Diffusion:  38%|███▊      | 1910/5001 [1:30:38<2:12:31,  2.57s/it][A
Train Diffusion:  38%|███▊      | 1911/5001 [1:30:40<2:12:34,  2.57s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 334963756.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8839, 0.5098, 1.3234],
        [8.6419, 0.5164, 1.3043],
        [8.5936, 0.5140, 1.3009]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.4642,  0.8399,  0.5200],
         [ 1.7399,  0.3528,  1.2179],
         [25.2317,  0.2634,  1.1186],
         ...,
         [ 2.6773,  0.1618,  0.8292],
         [28.5794,  0.2911,  4.2426],
         [ 2.0469,  2.2264,  2.6595]],

        [[ 0.4743,  0.9516,  0.9568],
         [ 4.0130,  0.6376,  2.0134],
         [ 0.9676,  0.9808,  1.8145],
         ...,
         [33.7681,  0.0470,  1.2220],
         [ 6.1233,  0.0588,  0.1161],
         [ 5.5693,  3.9173,  0.4669]],

        [[ 0.9716,  0.9475,  1.4643],
         [ 5.3048,  0.6876,  1.3560],
         [ 1.2803,  0.8428,  0.7548],
         ...,
         [ 0.4738,  0.6666,  0.6659],
         [ 0.0769,  0.8171,  1.2100],
         [ 0.4777,  1.1502,  6.0268


Train Diffusion:  38%|███▊      | 1912/5001 [1:30:43<2:12:38,  2.58s/it][A
Train Diffusion:  38%|███▊      | 1913/5001 [1:30:45<2:12:15,  2.57s/it][A
Train Diffusion:  38%|███▊      | 1914/5001 [1:30:48<2:12:09,  2.57s/it][A
Train Diffusion:  38%|███▊      | 1915/5001 [1:30:50<2:12:28,  2.58s/it][A
Train Diffusion:  38%|███▊      | 1916/5001 [1:30:53<2:13:58,  2.61s/it][A
Train Diffusion:  38%|███▊      | 1917/5001 [1:30:56<2:13:27,  2.60s/it][A
Train Diffusion:  38%|███▊      | 1918/5001 [1:30:58<2:13:03,  2.59s/it][A
Train Diffusion:  38%|███▊      | 1919/5001 [1:31:01<2:12:34,  2.58s/it][A
Train Diffusion:  38%|███▊      | 1920/5001 [1:31:04<2:17:00,  2.67s/it][A
Train Diffusion:  38%|███▊      | 1921/5001 [1:31:06<2:15:16,  2.64s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 335711856.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6479, 0.5231, 1.3035],
        [8.9232, 0.4969, 1.3058],
        [8.6674, 0.4934, 1.3185]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.4124,  0.8500,  0.5687],
         [ 1.6987,  0.3490,  1.4717],
         [22.3920,  0.3294,  1.1953],
         ...,
         [ 4.5599,  0.0957,  0.8836],
         [29.5224,  0.0922,  1.1374],
         [ 5.0254,  0.5630,  1.0446]],

        [[ 1.0384,  0.9376,  1.6767],
         [ 2.9159,  0.8006,  1.4287],
         [ 0.8858,  0.9753,  0.6624],
         ...,
         [13.7156,  5.7841,  1.1013],
         [ 1.3137,  1.8735,  1.1906],
         [ 0.3290,  1.5799,  6.8987]],

        [[ 0.4605,  0.9533,  0.9670],
         [ 7.5397,  0.6046,  2.0962],
         [ 1.4192,  0.5117,  0.8118],
         ...,
         [ 0.4570,  0.5696,  1.5358],
         [ 0.6905,  0.3038,  4.7204],
         [10.4038,  2.1697,  1.9726


Train Diffusion:  38%|███▊      | 1922/5001 [1:31:09<2:14:37,  2.62s/it][A
Train Diffusion:  38%|███▊      | 1923/5001 [1:31:11<2:13:32,  2.60s/it][A
Train Diffusion:  38%|███▊      | 1924/5001 [1:31:14<2:13:12,  2.60s/it][A
Train Diffusion:  38%|███▊      | 1925/5001 [1:31:17<2:12:36,  2.59s/it][A
Train Diffusion:  39%|███▊      | 1926/5001 [1:31:19<2:12:15,  2.58s/it][A
Train Diffusion:  39%|███▊      | 1927/5001 [1:31:22<2:12:07,  2.58s/it][A
Train Diffusion:  39%|███▊      | 1928/5001 [1:31:24<2:12:25,  2.59s/it][A
Train Diffusion:  39%|███▊      | 1929/5001 [1:31:27<2:12:12,  2.58s/it][A
Train Diffusion:  39%|███▊      | 1930/5001 [1:31:29<2:11:50,  2.58s/it][A
Train Diffusion:  39%|███▊      | 1931/5001 [1:31:32<2:12:03,  2.58s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 330309772.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6254, 0.5071, 1.3294],
        [8.7417, 0.5122, 1.2694],
        [8.7848, 0.5013, 1.3052]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.5624,  0.8289,  5.6469],
         [ 0.8201,  1.0116,  1.2920],
         [ 0.8502,  0.6758,  1.0307],
         ...,
         [23.9687,  1.2684,  1.2669],
         [ 1.3576,  2.0964,  0.8181],
         [ 0.5805,  1.3095,  8.7447]],

        [[ 0.8091,  0.9606,  0.7996],
         [ 0.8069,  0.5788,  1.8923],
         [28.8164,  0.0904,  1.1966],
         ...,
         [ 1.9882,  0.2192,  0.9719],
         [34.1796,  0.1127,  1.1121],
         [ 4.8794,  0.7126,  1.4173]],

        [[ 0.5338,  0.9506,  0.9200],
         [10.5003,  0.5707,  0.9033],
         [ 1.7910,  0.4725,  0.7060],
         ...,
         [ 1.3603,  1.1534,  0.8110],
         [ 2.3060,  0.1750,  1.0499],
         [16.7025,  1.5079,  1.8557


Train Diffusion:  39%|███▊      | 1932/5001 [1:31:35<2:12:01,  2.58s/it][A
Train Diffusion:  39%|███▊      | 1933/5001 [1:31:37<2:11:46,  2.58s/it][A
Train Diffusion:  39%|███▊      | 1934/5001 [1:31:40<2:11:38,  2.58s/it][A
Train Diffusion:  39%|███▊      | 1935/5001 [1:31:42<2:11:26,  2.57s/it][A
Train Diffusion:  39%|███▊      | 1936/5001 [1:31:45<2:11:19,  2.57s/it][A
Train Diffusion:  39%|███▊      | 1937/5001 [1:31:47<2:11:14,  2.57s/it][A
Train Diffusion:  39%|███▉      | 1938/5001 [1:31:50<2:11:17,  2.57s/it][A
Train Diffusion:  39%|███▉      | 1939/5001 [1:31:53<2:11:27,  2.58s/it][A
Train Diffusion:  39%|███▉      | 1940/5001 [1:31:55<2:11:16,  2.57s/it][A
Train Diffusion:  39%|███▉      | 1941/5001 [1:31:58<2:11:06,  2.57s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 330029888.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8444, 0.5013, 1.2674],
        [8.7647, 0.4921, 1.3272],
        [8.6738, 0.4849, 1.2897]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.7196,  0.9585,  1.0577],
         [12.4134,  0.4714,  1.2024],
         [ 2.3026,  0.4570,  1.0461],
         ...,
         [20.2802,  0.3363,  0.7200],
         [ 5.0241,  0.0757,  0.8159],
         [ 0.1104,  1.6066,  2.0944]],

        [[ 0.5865,  0.9520,  0.9362],
         [ 0.5679,  0.8326,  1.7051],
         [28.2163,  0.1045,  1.2023],
         ...,
         [ 4.3899,  0.1168,  0.6888],
         [ 0.2144,  0.5904,  0.9944],
         [ 9.8469,  2.9945,  5.5289]],

        [[ 1.5964,  0.8061,  2.2776],
         [ 1.4361,  0.5025,  0.7766],
         [ 0.7238,  0.6964,  0.8368],
         ...,
         [ 0.7943, 12.9666,  0.7226],
         [12.7093,  0.1000,  2.1385],
         [ 5.6830,  0.2907,  1.2511


Train Diffusion:  39%|███▉      | 1942/5001 [1:32:00<2:11:15,  2.57s/it][A
Train Diffusion:  39%|███▉      | 1943/5001 [1:32:03<2:11:12,  2.57s/it][A
Train Diffusion:  39%|███▉      | 1944/5001 [1:32:05<2:10:53,  2.57s/it][A
Train Diffusion:  39%|███▉      | 1945/5001 [1:32:08<2:10:46,  2.57s/it][A
Train Diffusion:  39%|███▉      | 1946/5001 [1:32:11<2:10:56,  2.57s/it][A
Train Diffusion:  39%|███▉      | 1947/5001 [1:32:13<2:11:09,  2.58s/it][A
Train Diffusion:  39%|███▉      | 1948/5001 [1:32:16<2:10:55,  2.57s/it][A
Train Diffusion:  39%|███▉      | 1949/5001 [1:32:18<2:10:49,  2.57s/it][A
Train Diffusion:  39%|███▉      | 1950/5001 [1:32:21<2:10:35,  2.57s/it][A
Train Diffusion:  39%|███▉      | 1951/5001 [1:32:23<2:10:47,  2.57s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 324594784.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6985, 0.4878, 1.3020],
        [8.7055, 0.5058, 1.3239],
        [8.6833, 0.4780, 1.3158]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.0465e-01, 9.5385e-01, 9.4076e-01],
         [1.7793e+00, 7.0084e-01, 1.9458e+00],
         [1.9428e+00, 3.1641e-01, 5.7024e-01],
         ...,
         [2.0926e+01, 2.6036e-01, 1.1180e+00],
         [2.5772e+00, 1.8086e-01, 1.6330e+00],
         [1.8203e+00, 8.5646e-01, 1.0097e+00]],

        [[8.7522e-01, 9.6185e-01, 1.2602e+00],
         [8.2343e+00, 4.9066e-01, 1.3076e+00],
         [1.9298e+00, 5.1675e-01, 1.0951e+00],
         ...,
         [2.3336e+00, 2.3665e-01, 1.1398e+00],
         [3.2441e+01, 1.8842e-01, 1.1174e+00],
         [4.3716e+00, 8.2063e-01, 1.5021e+00]],

        [[1.5266e+00, 8.2888e-01, 5.5464e-01],
         [1.8052e+00, 3.4862e-01, 7.8748e-01],
         [7.8830e-03, 6.2720e-01, 1.4


Train Diffusion:  39%|███▉      | 1952/5001 [1:32:26<2:10:40,  2.57s/it][A
Train Diffusion:  39%|███▉      | 1953/5001 [1:32:29<2:10:23,  2.57s/it][A
Train Diffusion:  39%|███▉      | 1954/5001 [1:32:31<2:10:31,  2.57s/it][A
Train Diffusion:  39%|███▉      | 1955/5001 [1:32:34<2:10:54,  2.58s/it][A
Train Diffusion:  39%|███▉      | 1956/5001 [1:32:36<2:10:33,  2.57s/it][A
Train Diffusion:  39%|███▉      | 1957/5001 [1:32:39<2:10:32,  2.57s/it][A
Train Diffusion:  39%|███▉      | 1958/5001 [1:32:41<2:10:23,  2.57s/it][A
Train Diffusion:  39%|███▉      | 1959/5001 [1:32:44<2:10:12,  2.57s/it][A
Train Diffusion:  39%|███▉      | 1960/5001 [1:32:47<2:10:11,  2.57s/it][A
Train Diffusion:  39%|███▉      | 1961/5001 [1:32:49<2:09:57,  2.56s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 343104476.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.4607, 0.5004, 1.3039],
        [8.7365, 0.4740, 1.3035],
        [8.8978, 0.5032, 1.3379]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.7531,  0.9592,  1.1310],
         [12.9179,  0.3938,  1.2618],
         [ 2.6513,  0.3953,  1.0989],
         ...,
         [ 4.0236,  0.1440,  1.0082],
         [14.0213,  0.2619,  1.2094],
         [ 2.8310,  0.9226,  1.5107]],

        [[ 1.5861,  0.8056,  1.4125],
         [ 1.6431,  0.3895,  0.7480],
         [ 0.6497,  0.7834,  1.0199],
         ...,
         [ 0.4010,  0.7428,  0.5938],
         [ 6.8060,  0.4285,  0.0327],
         [ 2.4798,  1.1677,  0.6540]],

        [[ 0.5644,  0.9502,  0.9319],
         [ 0.5437,  0.8524,  1.5625],
         [27.0108,  0.1451,  1.1823],
         ...,
         [25.6849,  0.0375,  1.4091],
         [ 6.0855,  0.0686,  0.7928],
         [26.1005,  1.1524,  1.9647


Train Diffusion:  39%|███▉      | 1962/5001 [1:32:52<2:09:52,  2.56s/it][A
Train Diffusion:  39%|███▉      | 1963/5001 [1:32:54<2:10:11,  2.57s/it][A
Train Diffusion:  39%|███▉      | 1964/5001 [1:32:57<2:11:39,  2.60s/it][A
Train Diffusion:  39%|███▉      | 1965/5001 [1:33:00<2:11:13,  2.59s/it][A
Train Diffusion:  39%|███▉      | 1966/5001 [1:33:02<2:10:57,  2.59s/it][A
Train Diffusion:  39%|███▉      | 1967/5001 [1:33:05<2:11:48,  2.61s/it][A
Train Diffusion:  39%|███▉      | 1968/5001 [1:33:07<2:12:48,  2.63s/it][A
Train Diffusion:  39%|███▉      | 1969/5001 [1:33:10<2:12:00,  2.61s/it][A
Train Diffusion:  39%|███▉      | 1970/5001 [1:33:13<2:11:13,  2.60s/it][A
Train Diffusion:  39%|███▉      | 1971/5001 [1:33:15<2:11:02,  2.59s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 342577161.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6899, 0.5161, 1.2933],
        [8.6125, 0.5099, 1.3631],
        [8.7170, 0.5083, 1.3203]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.4493,  0.9536,  0.9845],
         [ 7.9853,  0.5839,  1.8562],
         [ 2.0843,  2.1488,  0.4223],
         ...,
         [ 0.5558,  0.5497,  1.0587],
         [ 2.4296,  0.5189,  2.1310],
         [ 0.6889,  1.0350,  8.6867]],

        [[ 1.3506,  0.8665,  0.6181],
         [ 1.5957,  0.3683,  0.8419],
         [ 0.2401,  0.5476,  1.4683],
         ...,
         [ 3.3480,  0.2662,  0.9164],
         [ 0.4389,  2.4444,  1.7911],
         [ 0.3447,  5.0264,  0.8885]],

        [[ 1.1116,  0.9230,  2.0629],
         [ 2.8879,  0.8139,  1.3349],
         [ 0.9791,  0.9735,  0.7279],
         ...,
         [33.0921,  0.0934,  1.2222],
         [ 5.4483,  0.1228,  0.6573],
         [ 1.9566,  4.4446,  1.1105


Train Diffusion:  39%|███▉      | 1972/5001 [1:33:18<2:10:47,  2.59s/it][A
Train Diffusion:  39%|███▉      | 1973/5001 [1:33:20<2:10:20,  2.58s/it][A
Train Diffusion:  39%|███▉      | 1974/5001 [1:33:23<2:11:07,  2.60s/it][A
Train Diffusion:  39%|███▉      | 1975/5001 [1:33:25<2:10:28,  2.59s/it][A
Train Diffusion:  40%|███▉      | 1976/5001 [1:33:28<2:10:12,  2.58s/it][A
Train Diffusion:  40%|███▉      | 1977/5001 [1:33:31<2:09:59,  2.58s/it][A
Train Diffusion:  40%|███▉      | 1978/5001 [1:33:33<2:10:02,  2.58s/it][A
Train Diffusion:  40%|███▉      | 1979/5001 [1:33:36<2:09:57,  2.58s/it][A
Train Diffusion:  40%|███▉      | 1980/5001 [1:33:38<2:09:38,  2.57s/it][A
Train Diffusion:  40%|███▉      | 1981/5001 [1:33:41<2:09:34,  2.57s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 332630345.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6060, 0.5043, 1.2956],
        [8.8632, 0.4685, 1.3000],
        [8.6272, 0.5082, 1.2902]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.5354,  0.9524,  0.8897],
         [12.2898,  0.6382,  1.1044],
         [ 1.8538,  0.4815,  0.8358],
         ...,
         [ 0.1989,  0.5299,  2.6045],
         [ 1.7346,  0.6431,  2.2784],
         [ 0.5315,  1.5668,  2.4094]],

        [[ 1.5627,  0.8230,  4.6161],
         [ 0.8999,  0.8301,  1.1347],
         [ 0.6564,  0.8580,  0.9380],
         ...,
         [23.8004,  0.1481,  1.3020],
         [ 4.9201,  0.0461,  8.7670],
         [ 2.4752,  1.2587,  1.6054]],

        [[ 0.8063,  0.9623,  0.7966],
         [ 0.7344,  0.6672,  1.8454],
         [26.3212,  0.2750,  1.1960],
         ...,
         [ 1.3573,  0.5474,  1.5227],
         [ 0.2786,  0.6207,  0.4599],
         [21.5375,  1.9572,  1.7732


Train Diffusion:  40%|███▉      | 1982/5001 [1:33:43<2:09:31,  2.57s/it][A
Train Diffusion:  40%|███▉      | 1983/5001 [1:33:46<2:09:16,  2.57s/it][A
Train Diffusion:  40%|███▉      | 1984/5001 [1:33:49<2:09:24,  2.57s/it][A
Train Diffusion:  40%|███▉      | 1985/5001 [1:33:51<2:09:05,  2.57s/it][A
Train Diffusion:  40%|███▉      | 1986/5001 [1:33:54<2:09:21,  2.57s/it][A
Train Diffusion:  40%|███▉      | 1987/5001 [1:33:56<2:09:11,  2.57s/it][A
Train Diffusion:  40%|███▉      | 1988/5001 [1:33:59<2:09:08,  2.57s/it][A
Train Diffusion:  40%|███▉      | 1989/5001 [1:34:02<2:09:13,  2.57s/it][A
Train Diffusion:  40%|███▉      | 1990/5001 [1:34:04<2:09:19,  2.58s/it][A
Train Diffusion:  40%|███▉      | 1991/5001 [1:34:07<2:09:38,  2.58s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 333457001.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6405, 0.5066, 1.2859],
        [8.8053, 0.5105, 1.3438],
        [8.7160, 0.4904, 1.3575]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[6.2679e-01, 9.5333e-01, 9.2391e-01],
         [1.2842e+01, 5.7727e-01, 1.0761e+00],
         [2.1698e+00, 3.9129e-01, 7.6092e-01],
         ...,
         [2.6529e+01, 2.2153e-01, 7.3048e-01],
         [4.8473e+00, 5.6445e-02, 9.3745e+00],
         [1.7585e+00, 1.5418e+00, 1.3729e+00]],

        [[6.6905e-01, 9.5521e-01, 8.7383e-01],
         [6.1311e-01, 7.3867e-01, 1.9342e+00],
         [2.6664e+01, 2.5175e-01, 1.2038e+00],
         ...,
         [1.3453e+00, 3.6156e-01, 2.5546e+00],
         [1.7179e-01, 2.5863e+00, 3.5645e+00],
         [9.2091e-01, 3.9266e+00, 1.1500e+00]],

        [[1.6067e+00, 8.1096e-01, 3.7751e+00],
         [1.1403e+00, 6.6545e-01, 9.6789e-01],
         [6.5188e-01, 7.7496e-01, 1.1


Train Diffusion:  40%|███▉      | 1992/5001 [1:34:09<2:09:49,  2.59s/it][A
Train Diffusion:  40%|███▉      | 1993/5001 [1:34:12<2:09:31,  2.58s/it][A
Train Diffusion:  40%|███▉      | 1994/5001 [1:34:14<2:09:10,  2.58s/it][A
Train Diffusion:  40%|███▉      | 1995/5001 [1:34:17<2:09:11,  2.58s/it][A
Train Diffusion:  40%|███▉      | 1996/5001 [1:34:20<2:08:56,  2.57s/it][A
Train Diffusion:  40%|███▉      | 1997/5001 [1:34:22<2:09:30,  2.59s/it][A
Train Diffusion:  40%|███▉      | 1998/5001 [1:34:25<2:09:05,  2.58s/it][A
Train Diffusion:  40%|███▉      | 1999/5001 [1:34:27<2:09:53,  2.60s/it][A
Train Diffusion:  40%|███▉      | 2000/5001 [1:34:30<2:09:24,  2.59s/it][A
Train Diffusion:  40%|████      | 2001/5001 [1:34:33<2:09:50,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 326019632.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7909, 0.4927, 1.2833],
        [8.7345, 0.4936, 1.3038],
        [8.8424, 0.4961, 1.2849]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.2425,  0.9002,  0.7717],
         [ 1.4198,  0.4075,  1.7405],
         [25.5143,  0.2840,  1.1814],
         ...,
         [ 1.0957,  0.5686,  2.5550],
         [ 0.1826,  1.5783, 13.7459],
         [ 0.5937,  3.0290,  2.0994]],

        [[ 1.2259,  0.9037,  3.1771],
         [ 1.0220,  0.9848,  1.5379],
         [ 0.6484,  0.9609,  0.7707],
         ...,
         [ 0.3081,  0.4949,  1.9257],
         [15.3067,  0.2350,  1.2706],
         [ 3.7375,  0.3597, 11.0545]],

        [[ 0.4431,  0.9548,  0.9849],
         [10.6976,  0.5622,  1.5979],
         [ 1.7727,  0.5250,  1.0434],
         ...,
         [28.3973,  0.1432,  1.1227],
         [ 4.9888,  0.1210,  0.6692],
         [ 0.0739,  2.1467,  0.9937


Train Diffusion:  40%|████      | 2002/5001 [1:34:35<2:09:21,  2.59s/it][A
Train Diffusion:  40%|████      | 2003/5001 [1:34:38<2:09:26,  2.59s/it][A
Train Diffusion:  40%|████      | 2004/5001 [1:34:40<2:09:06,  2.58s/it][A
Train Diffusion:  40%|████      | 2005/5001 [1:34:43<2:08:56,  2.58s/it][A
Train Diffusion:  40%|████      | 2006/5001 [1:34:45<2:08:57,  2.58s/it][A
Train Diffusion:  40%|████      | 2007/5001 [1:34:48<2:08:42,  2.58s/it][A
Train Diffusion:  40%|████      | 2008/5001 [1:34:51<2:08:36,  2.58s/it][A
Train Diffusion:  40%|████      | 2009/5001 [1:34:53<2:08:45,  2.58s/it][A
Train Diffusion:  40%|████      | 2010/5001 [1:34:56<2:08:46,  2.58s/it][A
Train Diffusion:  40%|████      | 2011/5001 [1:34:58<2:09:46,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 321828080.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7903, 0.4852, 1.3083],
        [8.5900, 0.5063, 1.2840],
        [8.9582, 0.5172, 1.2696]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.4678,  0.9517,  1.0096],
         [11.6375,  0.4872,  1.6857],
         [ 2.4653,  0.5636,  0.4637],
         ...,
         [ 4.0254,  0.1371,  0.7133],
         [ 0.3205,  0.9637,  1.4712],
         [ 0.3241,  2.1669,  0.8815]],

        [[ 1.0011,  0.9418,  0.8128],
         [ 1.0267,  0.5075,  0.2022],
         [ 4.1412,  1.9947,  0.2942],
         ...,
         [ 0.3553,  6.1595,  3.4884],
         [ 0.5205,  0.4326,  0.1993],
         [ 5.0948,  0.2851,  1.0271]],

        [[ 1.4396,  0.8431,  4.6864],
         [ 0.7805,  1.1352,  1.3402],
         [ 0.2401,  4.5664,  2.0032],
         ...,
         [ 0.2102,  0.3752,  3.3055],
         [ 0.3677,  0.4359,  0.9613],
         [23.3085,  1.3205,  2.0423


Train Diffusion:  40%|████      | 2012/5001 [1:35:01<2:09:08,  2.59s/it][A
Train Diffusion:  40%|████      | 2013/5001 [1:35:04<2:08:42,  2.58s/it][A
Train Diffusion:  40%|████      | 2014/5001 [1:35:06<2:08:53,  2.59s/it][A
Train Diffusion:  40%|████      | 2015/5001 [1:35:09<2:11:06,  2.63s/it][A
Train Diffusion:  40%|████      | 2016/5001 [1:35:12<2:10:30,  2.62s/it][A
Train Diffusion:  40%|████      | 2017/5001 [1:35:14<2:09:45,  2.61s/it][A
Train Diffusion:  40%|████      | 2018/5001 [1:35:17<2:09:00,  2.59s/it][A
Train Diffusion:  40%|████      | 2019/5001 [1:35:19<2:08:34,  2.59s/it][A
Train Diffusion:  40%|████      | 2020/5001 [1:35:22<2:08:14,  2.58s/it][A
Train Diffusion:  40%|████      | 2021/5001 [1:35:24<2:08:30,  2.59s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 318000156.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7942, 0.5116, 1.2623],
        [8.6930, 0.5012, 1.3178],
        [8.5431, 0.4798, 1.3271]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.6333e-01, 9.5161e-01, 9.6775e-01],
         [6.4445e+00, 5.9839e-01, 2.7095e+00],
         [7.3188e-01, 1.8044e+00, 3.3360e+00],
         ...,
         [2.0859e-02, 4.0785e-01, 3.2799e+00],
         [2.3289e-01, 5.6216e-01, 7.8225e-01],
         [5.2904e-03, 1.4975e+00, 2.7877e+00]],

        [[1.4195e+00, 8.4627e-01, 5.4812e-01],
         [1.7050e+00, 3.4982e-01, 3.4505e-01],
         [1.7712e+01, 1.0759e+00, 4.8072e-01],
         ...,
         [3.4150e+00, 1.1189e-01, 1.0462e+00],
         [2.1964e+00, 4.8912e-01, 2.0461e+00],
         [3.7493e-01, 1.6100e+00, 3.4341e+00]],

        [[1.0258e+00, 9.3806e-01, 1.6079e+00],
         [3.7201e+00, 7.7119e-01, 1.3682e+00],
         [9.6774e-01, 6.8487e-01, 1.3


Train Diffusion:  40%|████      | 2022/5001 [1:35:27<2:08:26,  2.59s/it][A
Train Diffusion:  40%|████      | 2023/5001 [1:35:30<2:08:12,  2.58s/it][A
Train Diffusion:  40%|████      | 2024/5001 [1:35:32<2:08:11,  2.58s/it][A
Train Diffusion:  40%|████      | 2025/5001 [1:35:35<2:07:49,  2.58s/it][A
Train Diffusion:  41%|████      | 2026/5001 [1:35:37<2:07:47,  2.58s/it][A
Train Diffusion:  41%|████      | 2027/5001 [1:35:40<2:07:44,  2.58s/it][A
Train Diffusion:  41%|████      | 2028/5001 [1:35:42<2:07:48,  2.58s/it][A
Train Diffusion:  41%|████      | 2029/5001 [1:35:45<2:07:22,  2.57s/it][A
Train Diffusion:  41%|████      | 2030/5001 [1:35:48<2:07:17,  2.57s/it][A
Train Diffusion:  41%|████      | 2031/5001 [1:35:50<2:07:25,  2.57s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 324911475.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7936, 0.4959, 1.2759],
        [8.6295, 0.5167, 1.3287],
        [8.8679, 0.4779, 1.3163]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.1770e+00, 9.1032e-01, 2.6223e+00],
         [1.8164e+00, 9.5849e-01, 1.3563e+00],
         [3.6725e-01, 6.2669e+00, 2.0368e+00],
         ...,
         [4.3267e-01, 7.7500e-01, 4.6575e-01],
         [2.0744e+01, 5.9553e-01, 8.1461e-01],
         [3.8968e+00, 3.4306e-01, 6.1251e+00]],

        [[4.4451e-01, 9.5576e-01, 1.0086e+00],
         [8.5770e+00, 5.5296e-01, 2.2378e+00],
         [1.2367e+00, 6.4151e-01, 2.1401e+00],
         ...,
         [4.2077e+00, 1.3373e-01, 1.0057e+00],
         [6.7940e-04, 4.9465e-01, 2.9932e+00],
         [1.5720e-01, 2.1057e+00, 8.2556e-01]],

        [[1.2925e+00, 8.8661e-01, 6.8650e-01],
         [1.5020e+00, 3.9064e-01, 2.0540e-01],
         [1.5483e+01, 1.5079e+00, 9.0


Train Diffusion:  41%|████      | 2032/5001 [1:35:53<2:07:43,  2.58s/it][A
Train Diffusion:  41%|████      | 2033/5001 [1:35:55<2:07:36,  2.58s/it][A
Train Diffusion:  41%|████      | 2034/5001 [1:35:58<2:07:49,  2.58s/it][A
Train Diffusion:  41%|████      | 2035/5001 [1:36:00<2:07:42,  2.58s/it][A
Train Diffusion:  41%|████      | 2036/5001 [1:36:03<2:07:52,  2.59s/it][A
Train Diffusion:  41%|████      | 2037/5001 [1:36:06<2:09:04,  2.61s/it][A
Train Diffusion:  41%|████      | 2038/5001 [1:36:08<2:08:14,  2.60s/it][A
Train Diffusion:  41%|████      | 2039/5001 [1:36:11<2:09:07,  2.62s/it][A
Train Diffusion:  41%|████      | 2040/5001 [1:36:14<2:10:10,  2.64s/it][A
Train Diffusion:  41%|████      | 2041/5001 [1:36:16<2:09:11,  2.62s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 328803625.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7395, 0.4943, 1.3053],
        [8.7738, 0.4848, 1.2899],
        [8.7620, 0.4861, 1.3415]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.9816e-01, 9.5329e-01, 9.4235e-01],
         [1.5447e+00, 7.6893e-01, 1.2066e+00],
         [1.1915e-01, 1.1720e+00, 3.8594e+00],
         ...,
         [5.8568e-01, 4.3255e-01, 3.1649e+00],
         [1.8053e-01, 4.3083e-01, 1.3663e+00],
         [7.5411e-01, 1.6960e+00, 1.0278e+00]],

        [[8.9144e-01, 9.6000e-01, 1.3128e+00],
         [9.3390e+00, 3.8965e-01, 1.2756e+00],
         [2.4395e+00, 3.7779e-01, 1.1607e+00],
         ...,
         [2.8820e+01, 2.5494e-01, 8.6494e-01],
         [4.3543e+00, 8.5330e-02, 1.5109e+01],
         [1.1950e+00, 2.8318e+00, 2.1389e+00]],

        [[1.5169e+00, 8.3078e-01, 6.3681e-01],
         [1.8184e+00, 3.3531e-01, 7.2503e-01],
         [1.8907e+01, 4.4940e-01, 2.2


Train Diffusion:  41%|████      | 2042/5001 [1:36:19<2:08:29,  2.61s/it][A
Train Diffusion:  41%|████      | 2043/5001 [1:36:21<2:08:06,  2.60s/it][A
Train Diffusion:  41%|████      | 2044/5001 [1:36:24<2:07:46,  2.59s/it][A
Train Diffusion:  41%|████      | 2045/5001 [1:36:27<2:07:17,  2.58s/it][A
Train Diffusion:  41%|████      | 2046/5001 [1:36:29<2:07:01,  2.58s/it][A
Train Diffusion:  41%|████      | 2047/5001 [1:36:32<2:07:35,  2.59s/it][A
Train Diffusion:  41%|████      | 2048/5001 [1:36:34<2:07:15,  2.59s/it][A
Train Diffusion:  41%|████      | 2049/5001 [1:36:37<2:06:53,  2.58s/it][A
Train Diffusion:  41%|████      | 2050/5001 [1:36:39<2:06:39,  2.58s/it][A
Train Diffusion:  41%|████      | 2051/5001 [1:36:42<2:06:45,  2.58s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 325828366.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7797, 0.4757, 1.3098],
        [8.8575, 0.4949, 1.3113],
        [8.6882, 0.4853, 1.2891]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.6006e+00, 8.0871e-01, 2.5519e+00],
         [1.3028e+00, 5.5936e-01, 7.9019e-01],
         [7.2155e-01, 6.9483e-01, 1.0534e+00],
         ...,
         [3.5313e+00, 2.3307e-01, 8.7512e-01],
         [6.5442e-01, 3.6274e-01, 1.8344e+00],
         [2.0714e+00, 1.3665e+00, 9.7606e-01]],

        [[7.0617e-01, 9.5736e-01, 1.0341e+00],
         [1.2474e+01, 4.8368e-01, 1.1869e+00],
         [2.2775e+00, 4.5607e-01, 6.5370e-01],
         ...,
         [3.1567e+01, 6.4939e-02, 1.2699e+00],
         [5.8186e+00, 4.6606e-02, 1.3164e+01],
         [2.4245e-01, 3.1142e+00, 2.4585e+00]],

        [[5.9606e-01, 9.5219e-01, 9.3421e-01],
         [5.6017e-01, 7.8364e-01, 1.7996e+00],
         [2.8259e+01, 8.9623e-02, 1.2


Train Diffusion:  41%|████      | 2052/5001 [1:36:45<2:06:38,  2.58s/it][A
Train Diffusion:  41%|████      | 2053/5001 [1:36:47<2:06:21,  2.57s/it][A
Train Diffusion:  41%|████      | 2054/5001 [1:36:50<2:06:27,  2.57s/it][A
Train Diffusion:  41%|████      | 2055/5001 [1:36:52<2:06:10,  2.57s/it][A
Train Diffusion:  41%|████      | 2056/5001 [1:36:55<2:06:36,  2.58s/it][A
Train Diffusion:  41%|████      | 2057/5001 [1:36:57<2:06:14,  2.57s/it][A
Train Diffusion:  41%|████      | 2058/5001 [1:37:00<2:06:32,  2.58s/it][A
Train Diffusion:  41%|████      | 2059/5001 [1:37:03<2:07:36,  2.60s/it][A
Train Diffusion:  41%|████      | 2060/5001 [1:37:05<2:07:17,  2.60s/it][A
Train Diffusion:  41%|████      | 2061/5001 [1:37:08<2:06:53,  2.59s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 338556080.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8098, 0.4804, 1.3044],
        [8.6560, 0.5047, 1.2825],
        [8.9611, 0.4991, 1.2805]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5660e-01, 9.5395e-01, 9.8091e-01],
         [3.7542e+00, 6.4213e-01, 2.5468e+00],
         [1.4963e+00, 2.3709e-01, 5.4516e-01],
         ...,
         [1.1202e+00, 6.4175e+00, 1.7403e+00],
         [2.8099e+01, 1.0440e-01, 1.2021e+00],
         [5.3191e+00, 5.0315e-01, 1.0407e+00]],

        [[1.0546e+00, 9.3511e-01, 1.8516e+00],
         [4.1301e+00, 7.8230e-01, 1.4093e+00],
         [9.9231e-01, 1.0835e+01, 6.5704e-01],
         ...,
         [7.5745e+00, 1.8343e-01, 1.2643e+00],
         [4.2520e+00, 9.7582e-02, 1.3771e+00],
         [7.5573e+00, 1.8063e+00, 2.2507e+00]],

        [[1.4000e+00, 8.5395e-01, 5.6916e-01],
         [1.6006e+00, 3.8219e-01, 2.2132e-01],
         [3.1206e-05, 2.4022e+00, 2.6


Train Diffusion:  41%|████      | 2062/5001 [1:37:11<2:11:11,  2.68s/it][A
Train Diffusion:  41%|████▏     | 2063/5001 [1:37:13<2:09:48,  2.65s/it][A
Train Diffusion:  41%|████▏     | 2064/5001 [1:37:16<2:08:38,  2.63s/it][A
Train Diffusion:  41%|████▏     | 2065/5001 [1:37:18<2:07:30,  2.61s/it][A
Train Diffusion:  41%|████▏     | 2066/5001 [1:37:21<2:06:53,  2.59s/it][A
Train Diffusion:  41%|████▏     | 2067/5001 [1:37:24<2:07:16,  2.60s/it][A
Train Diffusion:  41%|████▏     | 2068/5001 [1:37:26<2:06:43,  2.59s/it][A
Train Diffusion:  41%|████▏     | 2069/5001 [1:37:29<2:06:26,  2.59s/it][A
Train Diffusion:  41%|████▏     | 2070/5001 [1:37:31<2:06:07,  2.58s/it][A
Train Diffusion:  41%|████▏     | 2071/5001 [1:37:34<2:06:20,  2.59s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 332150595.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6799, 0.4697, 1.3219],
        [8.9606, 0.5088, 1.3095],
        [8.6556, 0.5086, 1.2913]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.6483,  0.9560,  0.9303],
         [12.2821,  0.5808,  1.0610],
         [ 2.0777,  0.4350,  0.9706],
         ...,
         [33.6032,  0.1348,  1.1829],
         [ 5.0581,  0.1115,  0.1100],
         [ 1.5345,  0.8716,  0.8655]],

        [[ 1.6085,  0.8098,  3.6192],
         [ 1.1982,  0.5829,  0.8797],
         [ 0.7232,  0.7073,  0.7206],
         ...,
         [ 4.0875,  0.1523,  0.8178],
         [ 0.5047,  0.6258,  0.9643],
         [26.6966,  1.1828,  1.9965]],

        [[ 0.6467,  0.9559,  0.8937],
         [ 0.5943,  0.7612,  1.8780],
         [28.4232,  0.0844,  1.2051],
         ...,
         [ 0.5429,  1.1635,  0.7403],
         [26.8136,  0.2277,  1.2114],
         [ 4.3882,  0.7650,  1.3287


Train Diffusion:  41%|████▏     | 2072/5001 [1:37:37<2:06:18,  2.59s/it][A
Train Diffusion:  41%|████▏     | 2073/5001 [1:37:39<2:06:12,  2.59s/it][A
Train Diffusion:  41%|████▏     | 2074/5001 [1:37:42<2:05:55,  2.58s/it][A
Train Diffusion:  41%|████▏     | 2075/5001 [1:37:44<2:05:32,  2.57s/it][A
Train Diffusion:  42%|████▏     | 2076/5001 [1:37:47<2:05:52,  2.58s/it][A
Train Diffusion:  42%|████▏     | 2077/5001 [1:37:49<2:05:29,  2.58s/it][A
Train Diffusion:  42%|████▏     | 2078/5001 [1:37:52<2:05:36,  2.58s/it][A
Train Diffusion:  42%|████▏     | 2079/5001 [1:37:55<2:05:28,  2.58s/it][A
Train Diffusion:  42%|████▏     | 2080/5001 [1:37:57<2:05:14,  2.57s/it][A
Train Diffusion:  42%|████▏     | 2081/5001 [1:38:00<2:05:13,  2.57s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 328127340.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8556, 0.4905, 1.2926],
        [8.5015, 0.5066, 1.3132],
        [8.7708, 0.4976, 1.2981]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.7398,  0.9595,  0.8287],
         [ 0.6690,  0.6900,  1.8973],
         [27.6607,  0.2339,  1.1987],
         ...,
         [ 3.8044,  0.1898,  0.7563],
         [ 0.5043,  0.7560,  0.6886],
         [ 3.1243,  1.6173,  2.5303]],

        [[ 0.5727,  0.9514,  0.7901],
         [11.6126,  0.8236,  0.7645],
         [ 1.7636,  0.4307,  0.7467],
         ...,
         [32.4369,  0.0851,  1.2452],
         [ 5.5923,  0.1129,  0.5673],
         [ 1.8724,  1.6151,  7.8049]],

        [[ 1.5896,  0.8177,  5.0936],
         [ 0.9944,  0.6562,  1.1901],
         [ 0.6276,  0.7162,  1.2957],
         ...,
         [ 0.4731,  1.2589,  0.7856],
         [35.0672,  0.1155,  1.2135],
         [ 6.1390,  0.2615,  0.7919


Train Diffusion:  42%|████▏     | 2082/5001 [1:38:02<2:05:21,  2.58s/it][A
Train Diffusion:  42%|████▏     | 2083/5001 [1:38:05<2:05:13,  2.58s/it][A
Train Diffusion:  42%|████▏     | 2084/5001 [1:38:07<2:05:18,  2.58s/it][A
Train Diffusion:  42%|████▏     | 2085/5001 [1:38:10<2:05:16,  2.58s/it][A
Train Diffusion:  42%|████▏     | 2086/5001 [1:38:13<2:05:43,  2.59s/it][A
Train Diffusion:  42%|████▏     | 2087/5001 [1:38:15<2:05:19,  2.58s/it][A
Train Diffusion:  42%|████▏     | 2088/5001 [1:38:18<2:04:53,  2.57s/it][A
Train Diffusion:  42%|████▏     | 2089/5001 [1:38:20<2:05:00,  2.58s/it][A
Train Diffusion:  42%|████▏     | 2090/5001 [1:38:23<2:04:53,  2.57s/it][A
Train Diffusion:  42%|████▏     | 2091/5001 [1:38:25<2:04:51,  2.57s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 323328169.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6043, 0.4932, 1.3196],
        [8.6811, 0.4807, 1.2926],
        [8.9684, 0.4732, 1.3109]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.5795,  0.9515,  0.7150],
         [10.7096,  1.0326,  1.1150],
         [ 1.3918,  0.5460,  0.8581],
         ...,
         [19.3203,  0.1654,  0.7428],
         [ 5.3657,  0.0687,  0.4897],
         [ 8.5890,  1.9315,  4.5457]],

        [[ 1.5948,  0.8149,  5.3056],
         [ 1.0533,  0.5200,  1.0949],
         [ 0.7313,  0.7794,  0.9994],
         ...,
         [ 0.3715,  4.0681,  1.3580],
         [18.9574,  0.9978,  0.1976],
         [ 5.1991,  0.2268,  0.7900]],

        [[ 0.7294,  0.9588,  0.8349],
         [ 0.6532,  0.6967,  1.7624],
         [26.9273,  0.2450,  1.1946],
         ...,
         [ 3.8539,  0.1045,  0.6462],
         [ 0.0928,  2.3048,  1.6006],
         [ 4.2295,  1.6584,  2.0847


Train Diffusion:  42%|████▏     | 2092/5001 [1:38:28<2:04:49,  2.57s/it][A
Train Diffusion:  42%|████▏     | 2093/5001 [1:38:31<2:04:45,  2.57s/it][A
Train Diffusion:  42%|████▏     | 2094/5001 [1:38:33<2:04:58,  2.58s/it][A
Train Diffusion:  42%|████▏     | 2095/5001 [1:38:36<2:04:46,  2.58s/it][A
Train Diffusion:  42%|████▏     | 2096/5001 [1:38:38<2:04:35,  2.57s/it][A
Train Diffusion:  42%|████▏     | 2097/5001 [1:38:41<2:04:26,  2.57s/it][A
Train Diffusion:  42%|████▏     | 2098/5001 [1:38:44<2:04:43,  2.58s/it][A
Train Diffusion:  42%|████▏     | 2099/5001 [1:38:46<2:04:44,  2.58s/it][A
Train Diffusion:  42%|████▏     | 2100/5001 [1:38:49<2:04:27,  2.57s/it][A
Train Diffusion:  42%|████▏     | 2101/5001 [1:38:51<2:04:23,  2.57s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 325051676.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6771, 0.4961, 1.3064],
        [8.9344, 0.4986, 1.3233],
        [8.5874, 0.5175, 1.2752]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[9.5244e-01, 9.5181e-01, 1.4355e+00],
         [7.4101e+00, 5.1085e-01, 1.2707e+00],
         [1.8625e+00, 6.4684e-01, 1.3180e+00],
         ...,
         [5.8322e-06, 4.6342e-01, 3.2569e+00],
         [1.6090e-01, 5.6507e-01, 6.2376e-01],
         [1.4514e+01, 1.5822e+00, 1.9640e+00]],

        [[4.7968e-01, 9.5255e-01, 9.6287e-01],
         [2.0483e+00, 6.7448e-01, 1.8808e+00],
         [2.2066e+00, 1.1801e+00, 6.2104e-01],
         ...,
         [4.4884e+00, 1.5478e+01, 3.2166e+00],
         [7.1345e+00, 9.1228e-03, 1.1509e+00],
         [6.5426e+00, 2.1722e-01, 7.2880e-01]],

        [[1.4761e+00, 8.3769e-01, 5.2337e-01],
         [1.7222e+00, 3.6183e-01, 3.6699e-01],
         [5.5103e-05, 1.8639e+00, 4.5


Train Diffusion:  42%|████▏     | 2102/5001 [1:38:54<2:04:46,  2.58s/it][A
Train Diffusion:  42%|████▏     | 2103/5001 [1:38:56<2:04:41,  2.58s/it][A
Train Diffusion:  42%|████▏     | 2104/5001 [1:38:59<2:04:18,  2.57s/it][A
Train Diffusion:  42%|████▏     | 2105/5001 [1:39:02<2:05:26,  2.60s/it][A
Train Diffusion:  42%|████▏     | 2106/5001 [1:39:04<2:07:18,  2.64s/it][A
Train Diffusion:  42%|████▏     | 2107/5001 [1:39:07<2:06:19,  2.62s/it][A
Train Diffusion:  42%|████▏     | 2108/5001 [1:39:10<2:05:50,  2.61s/it][A
Train Diffusion:  42%|████▏     | 2109/5001 [1:39:12<2:05:24,  2.60s/it][A
Train Diffusion:  42%|████▏     | 2110/5001 [1:39:15<2:08:16,  2.66s/it][A
Train Diffusion:  42%|████▏     | 2111/5001 [1:39:18<2:08:20,  2.66s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 328396252.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.5919, 0.4914, 1.2771],
        [8.7543, 0.5261, 1.2687],
        [8.9909, 0.4915, 1.2847]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.6008,  0.8120,  4.5200],
         [ 1.1032,  0.6319,  1.0640],
         [ 0.6334,  0.7194,  1.2975],
         ...,
         [ 4.0489,  0.1053,  1.0743],
         [14.0045,  0.3559,  1.5482],
         [ 7.6539,  3.0364,  2.2834]],

        [[ 0.6015,  0.9521,  0.8399],
         [12.1091,  0.7160,  0.8478],
         [ 1.9371,  0.4215,  0.7487],
         ...,
         [15.3856,  0.6776,  0.3531],
         [ 3.4080,  0.0996,  0.0280],
         [ 3.9516,  0.2865,  0.9358]],

        [[ 0.7001,  0.9567,  0.8523],
         [ 0.6346,  0.7311,  1.9174],
         [27.7661,  0.2156,  1.1996],
         ...,
         [ 1.1413,  0.2618,  2.3926],
         [ 0.4623,  0.4567,  0.5580],
         [ 0.7745,  2.0545,  1.3208


Train Diffusion:  42%|████▏     | 2112/5001 [1:39:20<2:06:42,  2.63s/it][A
Train Diffusion:  42%|████▏     | 2113/5001 [1:39:23<2:05:50,  2.61s/it][A
Train Diffusion:  42%|████▏     | 2114/5001 [1:39:25<2:05:46,  2.61s/it][A
Train Diffusion:  42%|████▏     | 2115/5001 [1:39:28<2:05:05,  2.60s/it][A
Train Diffusion:  42%|████▏     | 2116/5001 [1:39:30<2:04:38,  2.59s/it][A
Train Diffusion:  42%|████▏     | 2117/5001 [1:39:33<2:04:27,  2.59s/it][A
Train Diffusion:  42%|████▏     | 2118/5001 [1:39:36<2:04:03,  2.58s/it][A
Train Diffusion:  42%|████▏     | 2119/5001 [1:39:38<2:04:04,  2.58s/it][A
Train Diffusion:  42%|████▏     | 2120/5001 [1:39:41<2:04:05,  2.58s/it][A
Train Diffusion:  42%|████▏     | 2121/5001 [1:39:43<2:04:10,  2.59s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 334033043.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7089, 0.4928, 1.2920],
        [8.7779, 0.4955, 1.3378],
        [8.7937, 0.4821, 1.3350]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.5104,  0.9525,  0.9348],
         [ 0.5372,  0.9097,  0.9886],
         [18.5282,  0.2856,  1.0349],
         ...,
         [ 0.0464,  1.4416,  1.8015],
         [16.6097,  0.4630,  2.2851],
         [ 2.2852,  1.3510,  1.8567]],

        [[ 0.8626,  0.9614,  1.2812],
         [11.5219,  0.3232,  1.2893],
         [ 2.9402,  0.3638,  0.8795],
         ...,
         [ 0.3002,  1.2692,  2.4805],
         [ 0.5427,  0.4493,  5.2109],
         [ 0.0424,  2.4762,  2.4364]],

        [[ 1.5328,  0.8263,  0.9214],
         [ 1.8115,  0.3288,  0.7262],
         [ 0.4969,  0.7864,  1.6069],
         ...,
         [ 0.5514,  1.0717, 15.9112],
         [ 0.3114,  1.2295,  3.2343],
         [ 0.2081,  1.4763,  3.2792


Train Diffusion:  42%|████▏     | 2122/5001 [1:39:46<2:03:51,  2.58s/it][A
Train Diffusion:  42%|████▏     | 2123/5001 [1:39:49<2:03:52,  2.58s/it][A
Train Diffusion:  42%|████▏     | 2124/5001 [1:39:51<2:03:53,  2.58s/it][A
Train Diffusion:  42%|████▏     | 2125/5001 [1:39:54<2:04:05,  2.59s/it][A
Train Diffusion:  43%|████▎     | 2126/5001 [1:39:56<2:03:48,  2.58s/it][A
Train Diffusion:  43%|████▎     | 2127/5001 [1:39:59<2:03:29,  2.58s/it][A
Train Diffusion:  43%|████▎     | 2128/5001 [1:40:01<2:03:21,  2.58s/it][A
Train Diffusion:  43%|████▎     | 2129/5001 [1:40:04<2:03:25,  2.58s/it][A
Train Diffusion:  43%|████▎     | 2130/5001 [1:40:07<2:03:31,  2.58s/it][A
Train Diffusion:  43%|████▎     | 2131/5001 [1:40:09<2:03:30,  2.58s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 316121286.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[9.0492, 0.4911, 1.2879],
        [8.6083, 0.4900, 1.3259],
        [8.6904, 0.5146, 1.2747]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.0103,  0.9431,  1.6161],
         [ 4.5019,  0.6740,  1.4056],
         [ 1.2613,  0.8324,  0.6469],
         ...,
         [35.1341,  0.1420,  1.1356],
         [ 4.8332,  0.1835,  0.7247],
         [ 0.4987,  1.4100,  7.8282]],

        [[ 1.4335,  0.8465,  0.5823],
         [ 1.7345,  0.3424,  1.2058],
         [ 9.3867,  0.4159,  1.4642],
         ...,
         [ 3.9814,  0.1468,  1.1093],
         [37.4820,  0.0948,  1.2869],
         [ 5.7537,  0.4526,  0.8774]],

        [[ 0.4658,  0.9543,  0.9535],
         [ 5.8889,  0.6558,  2.3221],
         [ 1.2644,  0.3296,  0.5626],
         ...,
         [ 0.6178,  0.6029,  0.7863],
         [ 0.5298,  0.5113,  1.0939],
         [ 7.5356,  1.7521,  2.3088


Train Diffusion:  43%|████▎     | 2132/5001 [1:40:12<2:03:56,  2.59s/it][A
Train Diffusion:  43%|████▎     | 2133/5001 [1:40:14<2:03:49,  2.59s/it][A
Train Diffusion:  43%|████▎     | 2134/5001 [1:40:17<2:03:36,  2.59s/it][A
Train Diffusion:  43%|████▎     | 2135/5001 [1:40:20<2:03:33,  2.59s/it][A
Train Diffusion:  43%|████▎     | 2136/5001 [1:40:22<2:03:15,  2.58s/it][A
Train Diffusion:  43%|████▎     | 2137/5001 [1:40:25<2:03:05,  2.58s/it][A
Train Diffusion:  43%|████▎     | 2138/5001 [1:40:27<2:03:06,  2.58s/it][A
Train Diffusion:  43%|████▎     | 2139/5001 [1:40:30<2:03:00,  2.58s/it][A
Train Diffusion:  43%|████▎     | 2140/5001 [1:40:32<2:02:59,  2.58s/it][A
Train Diffusion:  43%|████▎     | 2141/5001 [1:40:35<2:02:57,  2.58s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 339564672.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6472, 0.5076, 1.3156],
        [8.9052, 0.4814, 1.3091],
        [8.5937, 0.5006, 1.2975]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[7.2590e-01, 9.5890e-01, 1.0718e+00],
         [1.2629e+01, 4.5772e-01, 1.2129e+00],
         [2.3831e+00, 4.2906e-01, 6.5055e-01],
         ...,
         [1.3522e+01, 1.5157e-01, 9.2837e-01],
         [5.2177e+00, 8.0536e-02, 3.4058e+00],
         [8.3173e-01, 2.9570e+00, 1.3142e+00]],

        [[1.5949e+00, 8.0670e-01, 2.1453e+00],
         [1.4577e+00, 4.8999e-01, 7.7756e-01],
         [7.1414e-01, 7.0676e-01, 1.0974e+00],
         ...,
         [1.0654e+00, 7.1643e-01, 2.4249e+00],
         [1.9891e-01, 2.8585e+00, 1.7308e+00],
         [4.1825e-01, 4.9015e+00, 9.3895e-01]],

        [[5.8107e-01, 9.5188e-01, 9.3828e-01],
         [5.6795e-01, 8.3322e-01, 1.7013e+00],
         [2.7928e+01, 8.9087e-02, 1.2


Train Diffusion:  43%|████▎     | 2142/5001 [1:40:38<2:02:45,  2.58s/it][A
Train Diffusion:  43%|████▎     | 2143/5001 [1:40:40<2:02:51,  2.58s/it][A
Train Diffusion:  43%|████▎     | 2144/5001 [1:40:43<2:02:42,  2.58s/it][A
Train Diffusion:  43%|████▎     | 2145/5001 [1:40:45<2:02:31,  2.57s/it][A
Train Diffusion:  43%|████▎     | 2146/5001 [1:40:48<2:03:07,  2.59s/it][A
Train Diffusion:  43%|████▎     | 2147/5001 [1:40:50<2:02:42,  2.58s/it][A
Train Diffusion:  43%|████▎     | 2148/5001 [1:40:53<2:02:46,  2.58s/it][A
Train Diffusion:  43%|████▎     | 2149/5001 [1:40:56<2:02:44,  2.58s/it][A
Train Diffusion:  43%|████▎     | 2150/5001 [1:40:58<2:02:38,  2.58s/it][A
Train Diffusion:  43%|████▎     | 2151/5001 [1:41:01<2:02:25,  2.58s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 322986224.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6865, 0.4963, 1.2803],
        [8.6925, 0.5114, 1.2825],
        [8.8410, 0.4722, 1.3231]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.5862e+00, 8.0585e-01, 1.4504e+00],
         [1.6355e+00, 3.9006e-01, 7.4991e-01],
         [6.8638e-01, 7.8689e-01, 1.0116e+00],
         ...,
         [1.6885e-01, 1.0970e+01, 1.0821e+00],
         [3.4049e+00, 1.1498e+00, 4.7217e-01],
         [6.6547e+00, 2.3140e+00, 3.5708e+00]],

        [[5.6445e-01, 9.5039e-01, 9.3366e-01],
         [5.4258e-01, 8.5440e-01, 1.5581e+00],
         [2.7192e+01, 2.1299e-01, 1.1809e+00],
         ...,
         [2.7065e+01, 1.1322e-01, 1.2724e+00],
         [5.7944e+00, 6.5236e-02, 2.2442e-01],
         [1.1640e-01, 3.5315e+00, 2.1527e-01]],

        [[7.5271e-01, 9.5957e-01, 1.1287e+00],
         [1.2656e+01, 4.0496e-01, 1.2589e+00],
         [2.5698e+00, 4.2312e-01, 1.1


Train Diffusion:  43%|████▎     | 2152/5001 [1:41:03<2:02:40,  2.58s/it][A
Train Diffusion:  43%|████▎     | 2153/5001 [1:41:06<2:03:53,  2.61s/it][A
Train Diffusion:  43%|████▎     | 2154/5001 [1:41:09<2:03:46,  2.61s/it][A
Train Diffusion:  43%|████▎     | 2155/5001 [1:41:11<2:03:25,  2.60s/it][A
Train Diffusion:  43%|████▎     | 2156/5001 [1:41:14<2:03:13,  2.60s/it][A
Train Diffusion:  43%|████▎     | 2157/5001 [1:41:17<2:04:20,  2.62s/it][A
Train Diffusion:  43%|████▎     | 2158/5001 [1:41:19<2:04:55,  2.64s/it][A
Train Diffusion:  43%|████▎     | 2159/5001 [1:41:22<2:03:49,  2.61s/it][A
Train Diffusion:  43%|████▎     | 2160/5001 [1:41:24<2:03:49,  2.62s/it][A
Train Diffusion:  43%|████▎     | 2161/5001 [1:41:27<2:03:11,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 316425878.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8028, 0.4998, 1.3106],
        [8.6319, 0.5044, 1.2921],
        [8.7723, 0.5112, 1.3323]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.6024e+00, 8.1209e-01, 4.4047e+00],
         [1.1228e+00, 6.0939e-01, 1.0250e+00],
         [6.5493e-01, 7.1735e-01, 6.6558e-01],
         ...,
         [1.4386e-04, 5.8795e-01, 3.5859e+00],
         [1.6763e-01, 4.4519e-01, 3.0649e+00],
         [4.3969e-02, 2.5775e+00, 2.4739e+00]],

        [[6.9988e-01, 9.5742e-01, 8.5417e-01],
         [6.3374e-01, 7.3830e-01, 1.9070e+00],
         [2.7827e+01, 1.1920e-01, 1.2086e+00],
         ...,
         [1.0760e+01, 3.1404e+00, 7.5635e-01],
         [1.3132e+00, 5.4183e-01, 2.0435e-02],
         [4.0311e+00, 4.7219e-01, 1.5639e+00]],

        [[6.0170e-01, 9.5288e-01, 8.5286e-01],
         [1.2119e+01, 6.9589e-01, 8.7167e-01],
         [1.9410e+00, 4.2653e-01, 7.0


Train Diffusion:  43%|████▎     | 2162/5001 [1:41:30<2:02:51,  2.60s/it][A
Train Diffusion:  43%|████▎     | 2163/5001 [1:41:32<2:02:35,  2.59s/it][A
Train Diffusion:  43%|████▎     | 2164/5001 [1:41:35<2:02:08,  2.58s/it][A
Train Diffusion:  43%|████▎     | 2165/5001 [1:41:37<2:02:03,  2.58s/it][A
Train Diffusion:  43%|████▎     | 2166/5001 [1:41:40<2:01:54,  2.58s/it][A
Train Diffusion:  43%|████▎     | 2167/5001 [1:41:42<2:01:45,  2.58s/it][A
Train Diffusion:  43%|████▎     | 2168/5001 [1:41:45<2:01:40,  2.58s/it][A
Train Diffusion:  43%|████▎     | 2169/5001 [1:41:48<2:01:26,  2.57s/it][A
Train Diffusion:  43%|████▎     | 2170/5001 [1:41:50<2:01:33,  2.58s/it][A
Train Diffusion:  43%|████▎     | 2171/5001 [1:41:53<2:01:43,  2.58s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 317673068.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7689, 0.5055, 1.3072],
        [8.7666, 0.4874, 1.2945],
        [8.7384, 0.5084, 1.3008]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.5964,  0.8055,  2.0764],
         [ 1.4760,  0.4781,  0.7764],
         [ 0.7215,  0.7230,  0.9250],
         ...,
         [30.3203,  0.0591,  1.2958],
         [ 5.8908,  0.0839,  0.5681],
         [ 1.6730,  1.8762,  3.2178]],

        [[ 0.5840,  0.9519,  0.9353],
         [ 0.5721,  0.8343,  1.7153],
         [27.7995,  0.1289,  1.2050],
         ...,
         [ 0.2204,  2.5147,  0.5124],
         [30.4936,  0.2150,  0.6877],
         [ 5.9058,  0.2702,  0.8111]],

        [[ 0.7229,  0.9588,  1.0762],
         [12.7297,  0.4521,  1.2178],
         [ 2.3647,  0.4045,  0.7137],
         ...,
         [ 3.3799,  0.1523,  0.7054],
         [ 0.2992,  2.8432,  0.7431],
         [15.1492,  1.6793,  1.8931


Train Diffusion:  43%|████▎     | 2172/5001 [1:41:55<2:01:35,  2.58s/it][A
Train Diffusion:  43%|████▎     | 2173/5001 [1:41:58<2:01:26,  2.58s/it][A
Train Diffusion:  43%|████▎     | 2174/5001 [1:42:00<2:01:09,  2.57s/it][A
Train Diffusion:  43%|████▎     | 2175/5001 [1:42:03<2:01:23,  2.58s/it][A
Train Diffusion:  44%|████▎     | 2176/5001 [1:42:06<2:01:24,  2.58s/it][A
Train Diffusion:  44%|████▎     | 2177/5001 [1:42:08<2:01:10,  2.57s/it][A
Train Diffusion:  44%|████▎     | 2178/5001 [1:42:11<2:01:19,  2.58s/it][A
Train Diffusion:  44%|████▎     | 2179/5001 [1:42:13<2:01:16,  2.58s/it][A
Train Diffusion:  44%|████▎     | 2180/5001 [1:42:16<2:01:01,  2.57s/it][A
Train Diffusion:  44%|████▎     | 2181/5001 [1:42:18<2:01:05,  2.58s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 337525872.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.9802, 0.5017, 1.3323],
        [8.4661, 0.5074, 1.3246],
        [8.6365, 0.5024, 1.3096]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[6.8173e-01, 9.5561e-01, 8.6669e-01],
         [6.1922e-01, 7.5007e-01, 1.9211e+00],
         [2.7229e+01, 2.3951e-01, 1.2037e+00],
         ...,
         [2.2163e+00, 2.0870e-01, 2.2241e+00],
         [1.5356e+00, 4.5947e-02, 7.1612e-01],
         [3.6819e+00, 7.3441e-01, 2.9996e+00]],

        [[6.1497e-01, 9.5289e-01, 8.6277e-01],
         [1.2416e+01, 6.6992e-01, 9.0377e-01],
         [2.0429e+00, 4.1043e-01, 7.2482e-01],
         ...,
         [1.7307e+01, 1.7379e-01, 1.2894e+00],
         [3.8907e+00, 1.2323e-01, 7.5942e+00],
         [1.7353e-02, 2.4628e+00, 3.2408e+00]],

        [[1.6058e+00, 8.1003e-01, 4.2227e+00],
         [1.1567e+00, 6.2797e-01, 1.0342e+00],
         [6.2147e-01, 7.2879e-01, 1.3


Train Diffusion:  44%|████▎     | 2182/5001 [1:42:21<2:00:52,  2.57s/it][A
Train Diffusion:  44%|████▎     | 2183/5001 [1:42:24<2:00:58,  2.58s/it][A
Train Diffusion:  44%|████▎     | 2184/5001 [1:42:26<2:01:00,  2.58s/it][A
Train Diffusion:  44%|████▎     | 2185/5001 [1:42:29<2:00:48,  2.57s/it][A
Train Diffusion:  44%|████▎     | 2186/5001 [1:42:31<2:00:49,  2.58s/it][A
Train Diffusion:  44%|████▎     | 2187/5001 [1:42:34<2:01:01,  2.58s/it][A
Train Diffusion:  44%|████▍     | 2188/5001 [1:42:37<2:00:42,  2.57s/it][A
Train Diffusion:  44%|████▍     | 2189/5001 [1:42:39<2:00:42,  2.58s/it][A
Train Diffusion:  44%|████▍     | 2190/5001 [1:42:42<2:00:31,  2.57s/it][A
Train Diffusion:  44%|████▍     | 2191/5001 [1:42:44<2:00:49,  2.58s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 331130028.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7387, 0.5027, 1.3246],
        [8.7747, 0.5118, 1.3098],
        [8.6777, 0.4926, 1.3239]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.5094e+00, 8.3154e-01, 6.2711e-01],
         [1.8198e+00, 3.3404e-01, 1.2623e+00],
         [2.3483e+01, 2.9780e-01, 1.1915e+00],
         ...,
         [4.0359e-01, 3.9123e-02, 8.8249e-01],
         [3.2484e+00, 1.7974e-01, 7.3997e-01],
         [5.2618e-01, 1.9649e+00, 8.5590e-01]],

        [[9.0295e-01, 9.5850e-01, 1.3250e+00],
         [7.2992e+00, 5.3683e-01, 1.3618e+00],
         [1.6664e+00, 7.3050e-01, 7.1717e-01],
         ...,
         [2.1353e-01, 5.6737e-01, 1.1145e+00],
         [4.3410e+01, 9.7946e-02, 1.3070e+00],
         [6.1019e+00, 3.3131e-01, 1.0317e+00]],

        [[4.9538e-01, 9.5259e-01, 9.4245e-01],
         [2.7840e+00, 7.4992e-01, 1.7902e+00],
         [7.9862e-01, 8.2410e-01, 1.2


Train Diffusion:  44%|████▍     | 2192/5001 [1:42:47<2:00:42,  2.58s/it][A
Train Diffusion:  44%|████▍     | 2193/5001 [1:42:49<2:00:24,  2.57s/it][A
Train Diffusion:  44%|████▍     | 2194/5001 [1:42:52<2:00:32,  2.58s/it][A
Train Diffusion:  44%|████▍     | 2195/5001 [1:42:55<2:00:20,  2.57s/it][A
Train Diffusion:  44%|████▍     | 2196/5001 [1:42:57<2:00:22,  2.57s/it][A
Train Diffusion:  44%|████▍     | 2197/5001 [1:43:00<2:00:11,  2.57s/it][A
Train Diffusion:  44%|████▍     | 2198/5001 [1:43:02<2:00:16,  2.57s/it][A
Train Diffusion:  44%|████▍     | 2199/5001 [1:43:05<2:00:15,  2.58s/it][A
Train Diffusion:  44%|████▍     | 2200/5001 [1:43:07<2:00:14,  2.58s/it][A
Train Diffusion:  44%|████▍     | 2201/5001 [1:43:10<2:01:32,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 333236038.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7548, 0.4876, 1.3016],
        [8.6598, 0.4999, 1.3049],
        [8.8042, 0.4814, 1.3088]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.4636,  0.9531,  1.0669],
         [ 0.5045,  0.6790,  0.2156],
         [ 2.5731,  2.1161,  0.2326],
         ...,
         [ 0.5538,  0.5147,  1.1178],
         [30.8506,  0.0856,  1.3457],
         [ 5.7693,  0.2149,  1.1413]],

        [[ 1.0163,  0.9407,  1.4532],
         [ 7.3119,  0.6714,  1.3212],
         [ 2.4027,  0.8302,  0.4965],
         ...,
         [32.5699,  0.0869,  1.2256],
         [ 5.5213,  0.0767,  0.7994],
         [ 0.6582,  1.9246,  0.8258]],

        [[ 1.4298,  0.8463,  3.4444],
         [ 0.8365,  1.0062,  0.9835],
         [ 0.3059,  6.2376,  2.5447],
         ...,
         [ 2.9517,  0.4129,  1.0670],
         [ 0.3774,  0.6579,  0.8924],
         [20.0993,  1.2177,  1.9379


Train Diffusion:  44%|████▍     | 2202/5001 [1:43:13<2:01:06,  2.60s/it][A
Train Diffusion:  44%|████▍     | 2203/5001 [1:43:15<2:01:18,  2.60s/it][A
Train Diffusion:  44%|████▍     | 2204/5001 [1:43:18<2:01:02,  2.60s/it][A
Train Diffusion:  44%|████▍     | 2205/5001 [1:43:21<2:04:37,  2.67s/it][A
Train Diffusion:  44%|████▍     | 2206/5001 [1:43:23<2:03:10,  2.64s/it][A
Train Diffusion:  44%|████▍     | 2207/5001 [1:43:26<2:02:35,  2.63s/it][A
Train Diffusion:  44%|████▍     | 2208/5001 [1:43:28<2:01:57,  2.62s/it][A
Train Diffusion:  44%|████▍     | 2209/5001 [1:43:31<2:01:22,  2.61s/it][A
Train Diffusion:  44%|████▍     | 2210/5001 [1:43:34<2:01:00,  2.60s/it][A
Train Diffusion:  44%|████▍     | 2211/5001 [1:43:36<2:00:31,  2.59s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 331567881.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7993, 0.4897, 1.3322],
        [8.6022, 0.4902, 1.3554],
        [8.8577, 0.4899, 1.3146]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.5689,  0.8189,  5.1834],
         [ 1.1085,  0.1579,  0.9817],
         [ 0.9578,  0.7289,  0.7487],
         ...,
         [ 1.7675,  0.5491,  5.4462],
         [ 0.5011,  0.9735,  1.5507],
         [ 5.7414,  2.1231,  4.3454]],

        [[ 0.5372,  0.9531,  0.6447],
         [ 9.7912,  1.3361,  2.6416],
         [ 0.8473,  0.8027,  0.9584],
         ...,
         [ 0.0387,  1.0736,  4.8971],
         [ 0.1341,  0.4897,  0.8645],
         [ 7.5820,  1.4555,  2.0980]],

        [[ 0.7999,  0.9630,  0.8018],
         [ 0.6895,  0.6938,  1.3579],
         [21.6242,  0.3148,  1.1718],
         ...,
         [ 5.4912,  0.5350,  4.4819],
         [ 0.6766,  0.1140,  0.2639],
         [ 5.6202,  0.2253,  0.7884


Train Diffusion:  44%|████▍     | 2212/5001 [1:43:39<1:59:59,  2.58s/it][A
Train Diffusion:  44%|████▍     | 2213/5001 [1:43:41<1:59:58,  2.58s/it][A
Train Diffusion:  44%|████▍     | 2214/5001 [1:43:44<1:59:44,  2.58s/it][A
Train Diffusion:  44%|████▍     | 2215/5001 [1:43:47<1:59:40,  2.58s/it][A
Train Diffusion:  44%|████▍     | 2216/5001 [1:43:49<1:59:37,  2.58s/it][A
Train Diffusion:  44%|████▍     | 2217/5001 [1:43:52<1:59:30,  2.58s/it][A
Train Diffusion:  44%|████▍     | 2218/5001 [1:43:54<1:59:34,  2.58s/it][A
Train Diffusion:  44%|████▍     | 2219/5001 [1:43:57<1:59:29,  2.58s/it][A
Train Diffusion:  44%|████▍     | 2220/5001 [1:43:59<1:59:18,  2.57s/it][A
Train Diffusion:  44%|████▍     | 2221/5001 [1:44:02<1:59:34,  2.58s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 342301939.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7036, 0.5063, 1.3004],
        [8.8474, 0.4919, 1.3308],
        [8.8256, 0.5037, 1.2716]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.4234e-01, 9.5265e-01, 9.5465e-01],
         [9.6806e+00, 5.2499e-01, 1.6023e+00],
         [1.9821e+00, 1.0693e+00, 5.6395e-01],
         ...,
         [8.2881e-01, 4.2009e-01, 1.0426e+00],
         [5.3395e+00, 5.0064e-01, 1.5953e+00],
         [5.8322e-01, 2.0928e+00, 3.0011e+00]],

        [[1.5725e+00, 8.3966e-01, 5.7711e+00],
         [8.7288e-01, 1.1947e+00, 1.2854e+00],
         [7.7908e-01, 1.0985e+01, 8.1001e-01],
         ...,
         [1.0760e+00, 1.3101e-01, 1.8006e+00],
         [2.8145e-01, 6.2685e-01, 8.6659e-01],
         [2.0723e+01, 1.2842e+00, 1.9866e+00]],

        [[7.9006e-01, 9.6269e-01, 8.0773e-01],
         [8.0969e-01, 5.6440e-01, 2.0572e-01],
         [5.8121e-05, 2.4095e+00, 2.7


Train Diffusion:  44%|████▍     | 2222/5001 [1:44:05<1:59:42,  2.58s/it][A
Train Diffusion:  44%|████▍     | 2223/5001 [1:44:07<1:59:20,  2.58s/it][A
Train Diffusion:  44%|████▍     | 2224/5001 [1:44:10<1:59:07,  2.57s/it][A
Train Diffusion:  44%|████▍     | 2225/5001 [1:44:12<1:59:24,  2.58s/it][A
Train Diffusion:  45%|████▍     | 2226/5001 [1:44:15<1:59:16,  2.58s/it][A
Train Diffusion:  45%|████▍     | 2227/5001 [1:44:17<1:59:21,  2.58s/it][A
Train Diffusion:  45%|████▍     | 2228/5001 [1:44:20<1:59:00,  2.58s/it][A
Train Diffusion:  45%|████▍     | 2229/5001 [1:44:23<1:58:46,  2.57s/it][A
Train Diffusion:  45%|████▍     | 2230/5001 [1:44:25<1:59:04,  2.58s/it][A
Train Diffusion:  45%|████▍     | 2231/5001 [1:44:28<1:58:56,  2.58s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 333556892.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.5879, 0.4780, 1.2795],
        [8.7295, 0.5031, 1.3310],
        [8.7692, 0.5181, 1.3410]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.5843,  0.9512,  0.7788],
         [11.2985,  0.8519,  0.8059],
         [ 1.6946,  0.4423,  0.7804],
         ...,
         [ 0.0421, 12.1036,  1.9547],
         [31.3748,  0.1586,  1.2344],
         [ 5.7695,  0.3918,  1.0695]],

        [[ 0.7237,  0.9581,  0.8377],
         [ 0.6480,  0.7198,  1.8565],
         [27.9815,  0.1908,  1.1977],
         ...,
         [11.3688,  0.6961,  0.2274],
         [ 4.4462,  0.1262,  0.6690],
         [ 0.6258,  1.8199,  0.8246]],

        [[ 1.5951,  0.8131,  4.9256],
         [ 1.0913,  0.5700,  1.0529],
         [ 0.6886,  0.7168,  1.2188],
         ...,
         [ 2.8777,  0.1491,  0.7367],
         [ 0.6127,  0.5604,  0.8356],
         [24.5060,  1.3156,  2.0359


Train Diffusion:  45%|████▍     | 2232/5001 [1:44:30<1:58:42,  2.57s/it][A
Train Diffusion:  45%|████▍     | 2233/5001 [1:44:33<1:58:59,  2.58s/it][A
Train Diffusion:  45%|████▍     | 2234/5001 [1:44:35<1:58:49,  2.58s/it][A
Train Diffusion:  45%|████▍     | 2235/5001 [1:44:38<1:58:42,  2.58s/it][A
Train Diffusion:  45%|████▍     | 2236/5001 [1:44:41<1:58:37,  2.57s/it][A
Train Diffusion:  45%|████▍     | 2237/5001 [1:44:43<1:58:26,  2.57s/it][A
Train Diffusion:  45%|████▍     | 2238/5001 [1:44:46<1:58:32,  2.57s/it][A
Train Diffusion:  45%|████▍     | 2239/5001 [1:44:48<1:58:18,  2.57s/it][A
Train Diffusion:  45%|████▍     | 2240/5001 [1:44:51<1:58:30,  2.58s/it][A
Train Diffusion:  45%|████▍     | 2241/5001 [1:44:54<1:58:54,  2.58s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 326227132.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8508, 0.4923, 1.2921],
        [8.9057, 0.4919, 1.2720],
        [8.6896, 0.4923, 1.2901]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.7688,  0.9634,  1.1475],
         [13.1032,  0.3786,  1.2683],
         [ 2.7623,  0.3702,  0.6159],
         ...,
         [ 4.2590,  0.1375,  0.3144],
         [27.5578,  1.0174,  2.4137],
         [ 3.3579,  0.7498,  1.5364]],

        [[ 1.5810,  0.8076,  1.2282],
         [ 1.7131,  0.3696,  0.7442],
         [ 0.5017,  0.8107,  1.3270],
         ...,
         [ 0.2514,  0.9422,  1.7590],
         [ 0.1664, 10.5822,  1.3471],
         [19.2503,  1.5599,  1.8650]],

        [[ 0.5550,  0.9537,  0.9306],
         [ 0.5201,  0.8548,  1.5276],
         [26.0742,  0.1638,  1.1867],
         ...,
         [27.4546,  0.4235,  0.4724],
         [ 4.7486,  0.0835,  0.6679],
         [ 0.9036,  1.4310,  0.6840


Train Diffusion:  45%|████▍     | 2242/5001 [1:44:56<1:58:41,  2.58s/it][A
Train Diffusion:  45%|████▍     | 2243/5001 [1:44:59<1:58:31,  2.58s/it][A
Train Diffusion:  45%|████▍     | 2244/5001 [1:45:01<1:58:40,  2.58s/it][A
Train Diffusion:  45%|████▍     | 2245/5001 [1:45:04<1:58:38,  2.58s/it][A
Train Diffusion:  45%|████▍     | 2246/5001 [1:45:06<1:58:46,  2.59s/it][A
Train Diffusion:  45%|████▍     | 2247/5001 [1:45:09<1:58:48,  2.59s/it][A
Train Diffusion:  45%|████▍     | 2248/5001 [1:45:12<1:59:45,  2.61s/it][A
Train Diffusion:  45%|████▍     | 2249/5001 [1:45:14<2:00:25,  2.63s/it][A
Train Diffusion:  45%|████▍     | 2250/5001 [1:45:17<2:03:15,  2.69s/it][A
Train Diffusion:  45%|████▌     | 2251/5001 [1:45:21<2:18:11,  3.02s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 321417462.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8690, 0.4779, 1.3031],
        [8.7233, 0.4889, 1.3029],
        [8.7976, 0.5150, 1.2741]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.6043e+00, 8.0988e-01, 2.6705e+00],
         [1.2593e+00, 5.9507e-01, 8.0141e-01],
         [7.1724e-01, 8.4133e-01, 6.4831e-01],
         ...,
         [5.0369e-01, 5.7134e-01, 1.0393e+00],
         [6.1289e+00, 3.4354e-01, 1.8670e+00],
         [1.0688e+00, 8.0008e-01, 1.0711e+01]],

        [[6.9138e-01, 9.5797e-01, 1.0240e+00],
         [1.2749e+01, 4.7012e-01, 1.1798e+00],
         [2.2595e+00, 4.2649e-01, 1.1521e+00],
         ...,
         [5.8928e+00, 6.0399e-01, 1.6405e+00],
         [8.7752e-01, 1.3890e+00, 1.6639e+01],
         [2.2775e-01, 3.0245e+00, 3.1903e+00]],

        [[6.0796e-01, 9.5428e-01, 9.2585e-01],
         [5.6757e-01, 7.6775e-01, 1.7902e+00],
         [2.7502e+01, 1.2014e-01, 1.1


Train Diffusion:  45%|████▌     | 2252/5001 [1:45:25<2:29:47,  3.27s/it][A
Train Diffusion:  45%|████▌     | 2253/5001 [1:45:28<2:22:36,  3.11s/it][A
Train Diffusion:  45%|████▌     | 2254/5001 [1:45:30<2:18:34,  3.03s/it][A
Train Diffusion:  45%|████▌     | 2255/5001 [1:45:33<2:14:21,  2.94s/it][A
Train Diffusion:  45%|████▌     | 2256/5001 [1:45:36<2:11:10,  2.87s/it][A
Train Diffusion:  45%|████▌     | 2257/5001 [1:45:39<2:08:49,  2.82s/it][A
Train Diffusion:  45%|████▌     | 2258/5001 [1:45:41<2:07:14,  2.78s/it][A
Train Diffusion:  45%|████▌     | 2259/5001 [1:45:44<2:06:28,  2.77s/it][A
Train Diffusion:  45%|████▌     | 2260/5001 [1:45:47<2:05:20,  2.74s/it][A
Train Diffusion:  45%|████▌     | 2261/5001 [1:45:49<2:03:32,  2.71s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 353483446.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.5051, 0.5141, 1.2908],
        [8.6382, 0.4962, 1.3120],
        [8.9629, 0.5227, 1.2859]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[7.0549e-01, 9.5627e-01, 8.4753e-01],
         [6.4074e-01, 7.2513e-01, 1.8617e+00],
         [2.7252e+01, 1.7897e-01, 1.2033e+00],
         ...,
         [2.8779e-01, 1.8283e+00, 4.0108e+00],
         [1.6245e-01, 1.9368e+00, 1.0298e+00],
         [3.9537e-01, 1.5585e+00, 8.9170e+00]],

        [[1.6029e+00, 8.1336e-01, 4.3116e+00],
         [1.0858e+00, 6.9185e-01, 1.0060e+00],
         [6.6580e-01, 8.1404e-01, 9.0795e-01],
         ...,
         [1.2377e+00, 5.7667e-01, 4.4656e+00],
         [1.9950e+00, 6.0090e-03, 1.3846e+00],
         [4.4716e+00, 5.8125e-01, 1.2879e+00]],

        [[5.9568e-01, 9.5117e-01, 8.7812e-01],
         [1.2226e+01, 6.5396e-01, 1.0628e+00],
         [1.9326e+00, 4.4058e-01, 8.7


Train Diffusion:  45%|████▌     | 2262/5001 [1:45:52<2:02:09,  2.68s/it][A
Train Diffusion:  45%|████▌     | 2263/5001 [1:45:54<2:00:51,  2.65s/it][A
Train Diffusion:  45%|████▌     | 2264/5001 [1:45:57<2:00:23,  2.64s/it][A
Train Diffusion:  45%|████▌     | 2265/5001 [1:46:00<1:59:39,  2.62s/it][A
Train Diffusion:  45%|████▌     | 2266/5001 [1:46:02<1:59:44,  2.63s/it][A
Train Diffusion:  45%|████▌     | 2267/5001 [1:46:05<1:59:19,  2.62s/it][A
Train Diffusion:  45%|████▌     | 2268/5001 [1:46:07<1:59:02,  2.61s/it][A
Train Diffusion:  45%|████▌     | 2269/5001 [1:46:10<1:59:14,  2.62s/it][A
Train Diffusion:  45%|████▌     | 2270/5001 [1:46:13<1:59:10,  2.62s/it][A
Train Diffusion:  45%|████▌     | 2271/5001 [1:46:15<1:59:03,  2.62s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 336279558.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.4107, 0.5173, 1.2852],
        [8.9340, 0.4872, 1.2836],
        [8.8421, 0.5065, 1.2659]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.0756,  0.9295,  0.8615],
         [ 1.1309,  0.4668,  1.9215],
         [28.0491,  0.2192,  1.1891],
         ...,
         [ 7.4988,  0.3049,  0.7576],
         [ 4.9012,  0.0990,  0.2637],
         [ 0.9268,  1.4559,  0.7460]],

        [[ 1.3801,  0.8574,  5.2637],
         [ 1.1781,  0.9802,  1.4766],
         [ 0.8933,  0.7059,  1.3161],
         ...,
         [ 0.4506,  0.5837,  1.0611],
         [34.9046,  0.1303,  1.2724],
         [ 5.0759,  0.6290,  1.2777]],

        [[ 0.4552,  0.9536,  0.9396],
         [ 9.1551,  0.6360,  0.8288],
         [ 1.5817,  0.4625,  0.7260],
         ...,
         [ 0.8624,  0.2200,  1.7422],
         [ 0.2991,  0.6183,  0.7813],
         [25.9359,  1.3287,  1.9480


Train Diffusion:  45%|████▌     | 2272/5001 [1:46:18<1:58:46,  2.61s/it][A
Train Diffusion:  45%|████▌     | 2273/5001 [1:46:21<1:58:42,  2.61s/it][A
Train Diffusion:  45%|████▌     | 2274/5001 [1:46:23<1:58:25,  2.61s/it][A
Train Diffusion:  45%|████▌     | 2275/5001 [1:46:26<1:58:03,  2.60s/it][A
Train Diffusion:  46%|████▌     | 2276/5001 [1:46:28<1:58:16,  2.60s/it][A
Train Diffusion:  46%|████▌     | 2277/5001 [1:46:31<1:58:08,  2.60s/it][A
Train Diffusion:  46%|████▌     | 2278/5001 [1:46:34<1:58:32,  2.61s/it][A
Train Diffusion:  46%|████▌     | 2279/5001 [1:46:36<1:58:15,  2.61s/it][A
Train Diffusion:  46%|████▌     | 2280/5001 [1:46:39<1:57:58,  2.60s/it][A
Train Diffusion:  46%|████▌     | 2281/5001 [1:46:41<1:58:06,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 330102777.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7417, 0.4905, 1.3270],
        [8.7263, 0.4935, 1.3330],
        [8.7077, 0.5055, 1.2811]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.5763e+00, 8.0857e-01, 1.1517e+00],
         [1.7289e+00, 3.6208e-01, 6.9463e-01],
         [1.4036e+00, 7.4520e-01, 9.6841e-01],
         ...,
         [2.8904e+01, 1.3018e-01, 1.2293e+00],
         [4.9694e+00, 1.6805e-01, 8.3143e-01],
         [3.4404e-02, 1.5479e+00, 2.4690e+00]],

        [[7.8044e-01, 9.6114e-01, 1.1618e+00],
         [1.2996e+01, 3.3978e-01, 1.2769e+00],
         [2.9348e+00, 3.6887e-01, 8.5345e-01],
         ...,
         [3.6280e+00, 2.1586e-01, 7.9096e-01],
         [7.7483e-01, 4.7823e-01, 4.5474e-01],
         [1.1587e+01, 2.2530e+00, 5.0473e+00]],

        [[5.4834e-01, 9.5103e-01, 9.2749e-01],
         [5.0963e-01, 8.4686e-01, 1.2632e+00],
         [1.9521e+01, 3.3575e-01, 1.0


Train Diffusion:  46%|████▌     | 2282/5001 [1:46:44<1:58:14,  2.61s/it][A
Train Diffusion:  46%|████▌     | 2283/5001 [1:46:47<1:57:59,  2.60s/it][A
Train Diffusion:  46%|████▌     | 2284/5001 [1:46:49<1:58:04,  2.61s/it][A
Train Diffusion:  46%|████▌     | 2285/5001 [1:46:52<1:57:42,  2.60s/it][A
Train Diffusion:  46%|████▌     | 2286/5001 [1:46:54<1:57:44,  2.60s/it][A
Train Diffusion:  46%|████▌     | 2287/5001 [1:46:57<1:57:38,  2.60s/it][A
Train Diffusion:  46%|████▌     | 2288/5001 [1:47:00<1:57:27,  2.60s/it][A
Train Diffusion:  46%|████▌     | 2289/5001 [1:47:02<1:57:37,  2.60s/it][A
Train Diffusion:  46%|████▌     | 2290/5001 [1:47:05<1:57:24,  2.60s/it][A
Train Diffusion:  46%|████▌     | 2291/5001 [1:47:07<1:57:18,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 323211302.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6641, 0.4726, 1.2923],
        [8.9795, 0.4946, 1.3198],
        [8.6485, 0.5002, 1.3099]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.3172,  0.8786,  0.8085],
         [ 1.5317,  0.3654,  1.4463],
         [23.8614,  0.3013,  1.1824],
         ...,
         [20.3706,  0.1762,  1.2113],
         [ 5.0947,  0.0474,  4.4565],
         [ 0.3618,  2.8012,  1.4024]],

        [[ 0.4454,  0.9555,  0.8639],
         [ 7.4231,  0.9034,  2.1796],
         [ 0.9756,  0.7264,  1.0444],
         ...,
         [ 4.4751,  0.1086,  0.8780],
         [27.0840,  0.8800,  3.4664],
         [ 4.4590,  2.7208,  2.1473]],

        [[ 1.1510,  0.9154,  2.8041],
         [ 2.6532,  0.6979,  1.5015],
         [ 1.1496,  0.8364,  0.7287],
         ...,
         [ 1.9190,  8.4256,  0.7462],
         [ 0.0924,  1.9908,  1.0639],
         [ 0.3387,  1.5887,  0.7900


Train Diffusion:  46%|████▌     | 2292/5001 [1:47:10<1:57:24,  2.60s/it][A
Train Diffusion:  46%|████▌     | 2293/5001 [1:47:13<1:57:20,  2.60s/it][A
Train Diffusion:  46%|████▌     | 2294/5001 [1:47:15<2:00:04,  2.66s/it][A
Train Diffusion:  46%|████▌     | 2295/5001 [1:47:18<1:59:27,  2.65s/it][A
Train Diffusion:  46%|████▌     | 2296/5001 [1:47:21<1:58:41,  2.63s/it][A
Train Diffusion:  46%|████▌     | 2297/5001 [1:47:23<1:58:00,  2.62s/it][A
Train Diffusion:  46%|████▌     | 2298/5001 [1:47:26<1:59:33,  2.65s/it][A
Train Diffusion:  46%|████▌     | 2299/5001 [1:47:29<2:01:03,  2.69s/it][A
Train Diffusion:  46%|████▌     | 2300/5001 [1:47:31<1:59:45,  2.66s/it][A
Train Diffusion:  46%|████▌     | 2301/5001 [1:47:34<1:59:07,  2.65s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 331774422.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.5414, 0.5040, 1.3210],
        [8.6254, 0.5010, 1.3204],
        [8.8530, 0.4978, 1.3368]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.8568,  0.9605,  1.2348],
         [ 9.5149,  0.4502,  1.2759],
         [ 2.2046,  0.5645,  0.4840],
         ...,
         [ 0.2642,  0.5948,  0.3380],
         [29.0250,  0.6665,  0.4143],
         [ 5.0940,  0.1763,  1.6292]],

        [[ 0.5121,  0.9510,  0.9331],
         [ 1.3419,  0.7240,  0.8060],
         [ 0.6777,  1.1788,  1.5187],
         ...,
         [ 1.9394,  0.8106,  0.7159],
         [ 0.0877,  1.5459,  2.0469],
         [ 8.3803,  2.3517,  5.4609]],

        [[ 1.5363,  0.8252,  0.5850],
         [ 1.8153,  0.3445,  1.0978],
         [22.0433,  0.2880,  1.9699],
         ...,
         [11.3072,  0.0480,  1.5505],
         [ 5.8722,  0.0649,  1.3597],
         [ 0.0301,  1.7279,  2.5765


Train Diffusion:  46%|████▌     | 2302/5001 [1:47:37<1:58:26,  2.63s/it][A
Train Diffusion:  46%|████▌     | 2303/5001 [1:47:39<1:57:52,  2.62s/it][A
Train Diffusion:  46%|████▌     | 2304/5001 [1:47:42<1:57:28,  2.61s/it][A
Train Diffusion:  46%|████▌     | 2305/5001 [1:47:44<1:57:11,  2.61s/it][A
Train Diffusion:  46%|████▌     | 2306/5001 [1:47:47<1:57:00,  2.60s/it][A
Train Diffusion:  46%|████▌     | 2307/5001 [1:47:49<1:56:50,  2.60s/it][A
Train Diffusion:  46%|████▌     | 2308/5001 [1:47:52<1:56:52,  2.60s/it][A
Train Diffusion:  46%|████▌     | 2309/5001 [1:47:55<1:56:42,  2.60s/it][A
Train Diffusion:  46%|████▌     | 2310/5001 [1:47:57<1:56:41,  2.60s/it][A
Train Diffusion:  46%|████▌     | 2311/5001 [1:48:00<1:56:38,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 335939299.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7553, 0.4901, 1.3118],
        [8.7379, 0.5126, 1.3037],
        [8.6797, 0.4968, 1.3303]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[7.6232e-01, 9.5968e-01, 1.1377e+00],
         [1.2705e+01, 3.9760e-01, 1.2667e+00],
         [2.6090e+00, 4.2230e-01, 1.1027e+00],
         ...,
         [3.0962e+01, 8.8775e-02, 1.2606e+00],
         [5.5683e+00, 9.9763e-02, 5.8281e-01],
         [1.5705e+00, 1.7525e+00, 8.6371e+00]],

        [[5.5940e-01, 9.4994e-01, 9.3077e-01],
         [5.1759e-01, 8.5424e-01, 1.5275e+00],
         [2.6975e+01, 2.2179e-01, 1.1905e+00],
         ...,
         [4.1794e+00, 1.4704e-01, 6.8757e-01],
         [5.5989e-01, 7.2828e-01, 6.6286e-01],
         [1.5079e-02, 1.5987e+00, 3.1149e+00]],

        [[1.5815e+00, 8.0649e-01, 1.2998e+00],
         [1.6771e+00, 3.7577e-01, 7.4004e-01],
         [6.4977e-01, 8.0887e-01, 1.0


Train Diffusion:  46%|████▌     | 2312/5001 [1:48:03<1:56:37,  2.60s/it][A
Train Diffusion:  46%|████▋     | 2313/5001 [1:48:05<1:56:26,  2.60s/it][A
Train Diffusion:  46%|████▋     | 2314/5001 [1:48:08<1:56:28,  2.60s/it][A
Train Diffusion:  46%|████▋     | 2315/5001 [1:48:10<1:56:19,  2.60s/it][A
Train Diffusion:  46%|████▋     | 2316/5001 [1:48:13<1:56:24,  2.60s/it][A
Train Diffusion:  46%|████▋     | 2317/5001 [1:48:16<1:56:21,  2.60s/it][A
Train Diffusion:  46%|████▋     | 2318/5001 [1:48:18<1:56:32,  2.61s/it][A
Train Diffusion:  46%|████▋     | 2319/5001 [1:48:21<1:56:17,  2.60s/it][A
Train Diffusion:  46%|████▋     | 2320/5001 [1:48:23<1:56:15,  2.60s/it][A
Train Diffusion:  46%|████▋     | 2321/5001 [1:48:26<1:56:03,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 337200262.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6239, 0.4950, 1.3003],
        [8.7935, 0.5143, 1.3268],
        [8.8359, 0.4827, 1.3128]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.5399,  0.9524,  0.9493],
         [ 0.5106,  0.8564,  1.4435],
         [23.6414,  0.2888,  1.1719],
         ...,
         [31.2690,  0.1020,  1.2365],
         [ 5.4404,  0.1026,  0.8721],
         [16.8678,  1.3531,  1.8772]],

        [[ 1.5692,  0.8123,  1.3219],
         [ 1.6468,  0.3854,  0.7344],
         [ 0.6629,  0.8148,  0.9475],
         ...,
         [ 0.3189,  0.9939,  1.0329],
         [ 0.3736,  0.6631,  0.4826],
         [ 0.1391,  2.5567,  0.7271]],

        [[ 0.7949,  0.9624,  1.1782],
         [12.5967,  0.3847,  1.2719],
         [ 2.7497,  0.4200,  0.8979],
         ...,
         [ 3.8098,  0.2013,  1.0458],
         [39.1256,  0.0641,  1.3975],
         [ 6.4254,  0.1465,  2.7530


Train Diffusion:  46%|████▋     | 2322/5001 [1:48:29<1:56:06,  2.60s/it][A
Train Diffusion:  46%|████▋     | 2323/5001 [1:48:31<1:56:09,  2.60s/it][A
Train Diffusion:  46%|████▋     | 2324/5001 [1:48:34<1:56:06,  2.60s/it][A
Train Diffusion:  46%|████▋     | 2325/5001 [1:48:36<1:55:58,  2.60s/it][A
Train Diffusion:  47%|████▋     | 2326/5001 [1:48:39<1:55:59,  2.60s/it][A
Train Diffusion:  47%|████▋     | 2327/5001 [1:48:42<1:55:46,  2.60s/it][A
Train Diffusion:  47%|████▋     | 2328/5001 [1:48:44<1:55:56,  2.60s/it][A
Train Diffusion:  47%|████▋     | 2329/5001 [1:48:47<1:55:35,  2.60s/it][A
Train Diffusion:  47%|████▋     | 2330/5001 [1:48:49<1:55:36,  2.60s/it][A
Train Diffusion:  47%|████▋     | 2331/5001 [1:48:52<1:55:45,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 328650537.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6248, 0.4873, 1.3278],
        [8.8009, 0.4807, 1.2643],
        [8.8815, 0.4993, 1.2896]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.5634,  0.8131,  1.0649],
         [ 1.7857,  0.3439,  0.7151],
         [ 0.1209,  0.8218,  1.6809],
         ...,
         [19.9416,  0.5257,  0.7899],
         [ 3.9448,  0.1559,  0.7230],
         [ 0.1150,  1.7270,  2.3147]],

        [[ 0.8069,  0.9632,  1.1944],
         [12.6118,  0.3286,  1.2812],
         [ 2.9731,  0.3437,  1.0367],
         ...,
         [ 1.3126,  0.1247,  0.5531],
         [ 1.8251,  0.3791,  0.6272],
         [14.3933,  1.8536,  2.3262]],

        [[ 0.5344,  0.9533,  0.9187],
         [ 0.4763,  0.8605,  1.1089],
         [21.2871,  0.2632,  1.0325],
         ...,
         [ 0.2601,  0.6013,  0.9188],
         [42.5393,  0.1343,  1.3079],
         [ 6.4613,  0.1885,  0.9869


Train Diffusion:  47%|████▋     | 2332/5001 [1:48:55<1:55:48,  2.60s/it][A
Train Diffusion:  47%|████▋     | 2333/5001 [1:48:57<1:55:54,  2.61s/it][A
Train Diffusion:  47%|████▋     | 2334/5001 [1:49:00<1:55:38,  2.60s/it][A
Train Diffusion:  47%|████▋     | 2335/5001 [1:49:02<1:56:12,  2.62s/it][A
Train Diffusion:  47%|████▋     | 2336/5001 [1:49:05<1:55:48,  2.61s/it][A
Train Diffusion:  47%|████▋     | 2337/5001 [1:49:08<1:55:22,  2.60s/it][A
Train Diffusion:  47%|████▋     | 2338/5001 [1:49:10<1:55:44,  2.61s/it][A
Train Diffusion:  47%|████▋     | 2339/5001 [1:49:13<1:55:32,  2.60s/it][A
Train Diffusion:  47%|████▋     | 2340/5001 [1:49:16<1:58:32,  2.67s/it][A
Train Diffusion:  47%|████▋     | 2341/5001 [1:49:18<1:57:34,  2.65s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 333161696.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7768, 0.4923, 1.3075],
        [8.7738, 0.4937, 1.2892],
        [8.7994, 0.4978, 1.2765]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[6.4079e-01, 9.5624e-01, 8.9474e-01],
         [6.0080e-01, 7.2440e-01, 1.8551e+00],
         [2.5604e+01, 2.4487e-01, 1.2051e+00],
         ...,
         [3.2249e-01, 5.8140e-01, 1.3801e+00],
         [2.0819e-06, 6.0651e-01, 3.4086e+00],
         [1.4300e-01, 1.7028e+00, 7.9721e-01]],

        [[1.6105e+00, 8.1517e-01, 3.3612e+00],
         [1.0884e+00, 6.9764e-01, 9.1689e-01],
         [6.9063e-01, 8.7467e-01, 8.8549e-01],
         ...,
         [3.1386e+01, 8.4494e-02, 1.2478e+00],
         [5.5551e+00, 5.2753e-02, 7.5309e+00],
         [5.3271e-01, 2.2373e+00, 1.6946e+00]],

        [[6.5206e-01, 9.5675e-01, 9.7094e-01],
         [1.2884e+01, 5.0528e-01, 1.1628e+00],
         [2.1592e+00, 4.6146e-01, 8.1


Train Diffusion:  47%|████▋     | 2342/5001 [1:49:21<1:56:47,  2.64s/it][A
Train Diffusion:  47%|████▋     | 2343/5001 [1:49:23<1:56:28,  2.63s/it][A
Train Diffusion:  47%|████▋     | 2344/5001 [1:49:26<1:56:33,  2.63s/it][A
Train Diffusion:  47%|████▋     | 2345/5001 [1:49:29<1:57:01,  2.64s/it][A
Train Diffusion:  47%|████▋     | 2346/5001 [1:49:32<1:58:56,  2.69s/it][A
Train Diffusion:  47%|████▋     | 2347/5001 [1:49:34<1:58:01,  2.67s/it][A
Train Diffusion:  47%|████▋     | 2348/5001 [1:49:37<1:57:02,  2.65s/it][A
Train Diffusion:  47%|████▋     | 2349/5001 [1:49:39<1:56:11,  2.63s/it][A
Train Diffusion:  47%|████▋     | 2350/5001 [1:49:42<1:57:39,  2.66s/it][A
Train Diffusion:  47%|████▋     | 2351/5001 [1:49:45<1:57:24,  2.66s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 323062080.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7116, 0.4916, 1.3161],
        [8.6967, 0.4964, 1.3208],
        [8.7321, 0.4910, 1.2844]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.4609,  0.9525,  0.7207],
         [ 7.7864,  1.2534,  1.7638],
         [ 0.7704,  1.0337,  0.7634],
         ...,
         [ 1.1696,  0.1382,  0.3893],
         [ 3.7401,  0.1004,  0.5359],
         [19.9542,  1.6737,  2.3807]],

        [[ 1.4098,  0.8489,  5.5137],
         [ 1.0236,  0.4227,  1.2161],
         [ 1.3961,  0.6779,  0.5149],
         ...,
         [16.6628,  0.5282,  1.0422],
         [ 3.7212,  0.3524,  0.0440],
         [ 5.0086,  0.2368,  0.7777]],

        [[ 1.0380,  0.9364,  0.8335],
         [ 1.0169,  0.4983,  1.3608],
         [17.0533,  0.3219,  1.2283],
         ...,
         [ 0.4013,  0.4836,  0.9104],
         [ 4.1900,  0.4690,  1.3984],
         [ 0.4530,  2.4672,  1.9846


Train Diffusion:  47%|████▋     | 2352/5001 [1:49:47<1:56:48,  2.65s/it][A
Train Diffusion:  47%|████▋     | 2353/5001 [1:49:50<1:55:53,  2.63s/it][A
Train Diffusion:  47%|████▋     | 2354/5001 [1:49:53<1:55:35,  2.62s/it][A
Train Diffusion:  47%|████▋     | 2355/5001 [1:49:55<1:55:12,  2.61s/it][A
Train Diffusion:  47%|████▋     | 2356/5001 [1:49:58<1:54:51,  2.61s/it][A
Train Diffusion:  47%|████▋     | 2357/5001 [1:50:00<1:54:43,  2.60s/it][A
Train Diffusion:  47%|████▋     | 2358/5001 [1:50:03<1:54:50,  2.61s/it][A
Train Diffusion:  47%|████▋     | 2359/5001 [1:50:06<1:54:33,  2.60s/it][A
Train Diffusion:  47%|████▋     | 2360/5001 [1:50:08<1:54:35,  2.60s/it][A
Train Diffusion:  47%|████▋     | 2361/5001 [1:50:11<1:54:27,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 308306427.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6930, 0.4884, 1.3042],
        [8.7173, 0.5034, 1.2991],
        [8.7477, 0.4948, 1.3247]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.6279,  0.9534,  0.9082],
         [ 0.5805,  0.7881,  1.8223],
         [28.5180,  0.1151,  1.1990],
         ...,
         [ 7.8152,  0.0368,  1.1493],
         [ 2.8763,  8.0963,  1.3440],
         [ 8.5420,  2.9707,  6.5751]],

        [[ 1.6064,  0.8068,  3.3057],
         [ 1.2727,  0.5410,  0.8302],
         [ 0.7506,  0.6834,  0.7823],
         ...,
         [ 4.2650,  0.1078,  0.6812],
         [ 1.2748,  0.9345,  0.4947],
         [ 0.3567,  2.3825,  1.1592]],

        [[ 0.6684,  0.9552,  0.9536],
         [12.1095,  0.5566,  1.0873],
         [ 2.0892,  0.4565,  1.1864],
         ...,
         [18.2829,  0.8132,  0.4064],
         [ 3.5730,  0.6914,  5.3684],
         [ 4.3039,  0.6031,  1.4773


Train Diffusion:  47%|████▋     | 2362/5001 [1:50:13<1:54:33,  2.60s/it][A
Train Diffusion:  47%|████▋     | 2363/5001 [1:50:16<1:54:55,  2.61s/it][A
Train Diffusion:  47%|████▋     | 2364/5001 [1:50:19<1:54:34,  2.61s/it][A
Train Diffusion:  47%|████▋     | 2365/5001 [1:50:21<1:54:08,  2.60s/it][A
Train Diffusion:  47%|████▋     | 2366/5001 [1:50:24<1:54:14,  2.60s/it][A
Train Diffusion:  47%|████▋     | 2367/5001 [1:50:26<1:54:22,  2.61s/it][A
Train Diffusion:  47%|████▋     | 2368/5001 [1:50:29<1:54:04,  2.60s/it][A
Train Diffusion:  47%|████▋     | 2369/5001 [1:50:32<1:54:12,  2.60s/it][A
Train Diffusion:  47%|████▋     | 2370/5001 [1:50:34<1:54:16,  2.61s/it][A
Train Diffusion:  47%|████▋     | 2371/5001 [1:50:37<1:54:17,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 322378150.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8266, 0.4915, 1.2897],
        [8.6824, 0.4782, 1.3050],
        [8.7653, 0.5082, 1.2941]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.1594,  0.9118,  0.9297],
         [ 1.2263,  0.4371,  0.8169],
         [ 0.5729,  0.6618,  1.4923],
         ...,
         [ 0.5601,  0.5837,  1.0927],
         [ 0.0358,  0.5099,  2.6815],
         [ 0.3297,  1.6424,  0.7438]],

        [[ 0.4453,  0.9540,  0.7377],
         [ 2.8181,  1.4185,  0.3807],
         [11.9798,  0.3903,  0.9917],
         ...,
         [ 3.4547,  0.2794,  0.9850],
         [ 0.4098,  0.6328,  0.4850],
         [17.1926,  2.1138,  1.7701]],

        [[ 1.3066,  0.8799,  4.7793],
         [ 2.2672,  0.2174,  1.2116],
         [ 2.5499,  0.3803,  0.9126],
         ...,
         [30.7722,  0.0976,  1.2585],
         [ 5.4664,  0.0413,  9.5505],
         [ 1.6227,  1.6043,  1.3278


Train Diffusion:  47%|████▋     | 2372/5001 [1:50:39<1:54:13,  2.61s/it][A
Train Diffusion:  47%|████▋     | 2373/5001 [1:50:42<1:54:02,  2.60s/it][A
Train Diffusion:  47%|████▋     | 2374/5001 [1:50:45<1:54:06,  2.61s/it][A
Train Diffusion:  47%|████▋     | 2375/5001 [1:50:47<1:54:16,  2.61s/it][A
Train Diffusion:  48%|████▊     | 2376/5001 [1:50:50<1:54:12,  2.61s/it][A
Train Diffusion:  48%|████▊     | 2377/5001 [1:50:53<1:55:37,  2.64s/it][A
Train Diffusion:  48%|████▊     | 2378/5001 [1:50:55<1:54:59,  2.63s/it][A
Train Diffusion:  48%|████▊     | 2379/5001 [1:50:58<1:54:47,  2.63s/it][A
Train Diffusion:  48%|████▊     | 2380/5001 [1:51:00<1:54:09,  2.61s/it][A
Train Diffusion:  48%|████▊     | 2381/5001 [1:51:03<1:54:02,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 328886243.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[9.0362, 0.4664, 1.3091],
        [8.6743, 0.4692, 1.2857],
        [8.8952, 0.4869, 1.2797]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.2474,  0.9012,  0.7636],
         [ 1.4292,  0.4070,  1.4758],
         [24.2960,  0.2827,  1.1374],
         ...,
         [31.0021,  0.1142,  1.2350],
         [ 5.1833,  0.0816,  0.7025],
         [ 1.4617,  1.0010,  0.9490]],

        [[ 0.4422,  0.9569,  0.9897],
         [10.6515,  0.5600,  1.9230],
         [ 1.6653,  0.6874,  1.2299],
         ...,
         [ 0.3001,  0.5605,  1.0509],
         [27.8798,  0.1662,  1.2182],
         [ 4.8196,  0.3477,  8.4561]],

        [[ 1.2237,  0.9062,  3.1293],
         [ 1.0865,  0.9785,  1.5064],
         [ 0.6739,  1.0398,  0.6691],
         ...,
         [ 2.2373,  0.3526,  0.8807],
         [ 0.4984,  0.5830,  0.8570],
         [11.5510,  1.2147,  2.1733


Train Diffusion:  48%|████▊     | 2382/5001 [1:51:06<1:53:56,  2.61s/it][A
Train Diffusion:  48%|████▊     | 2383/5001 [1:51:08<1:53:39,  2.60s/it][A
Train Diffusion:  48%|████▊     | 2384/5001 [1:51:11<1:53:33,  2.60s/it][A
Train Diffusion:  48%|████▊     | 2385/5001 [1:51:13<1:53:24,  2.60s/it][A
Train Diffusion:  48%|████▊     | 2386/5001 [1:51:16<1:53:45,  2.61s/it][A
Train Diffusion:  48%|████▊     | 2387/5001 [1:51:19<1:53:51,  2.61s/it][A
Train Diffusion:  48%|████▊     | 2388/5001 [1:51:21<1:54:46,  2.64s/it][A
Train Diffusion:  48%|████▊     | 2389/5001 [1:51:24<1:54:10,  2.62s/it][A
Train Diffusion:  48%|████▊     | 2390/5001 [1:51:26<1:54:14,  2.63s/it][A
Train Diffusion:  48%|████▊     | 2391/5001 [1:51:29<1:53:49,  2.62s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 330891430.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7717, 0.4967, 1.2968],
        [8.8045, 0.5048, 1.2797],
        [8.8157, 0.5062, 1.3272]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.8973,  0.9592,  0.7838],
         [ 0.8556,  0.5938,  1.8047],
         [26.6526,  0.2567,  1.1892],
         ...,
         [ 3.2398,  0.1639,  0.6713],
         [ 0.6171,  0.6033,  0.4143],
         [ 0.2748,  2.4297,  0.7856]],

        [[ 1.5147,  0.8317,  4.8613],
         [ 0.8055,  0.9137,  1.2155],
         [ 0.7382,  0.8727,  0.9079],
         ...,
         [ 0.2207, 12.1647,  0.7745],
         [32.8414,  0.0761,  1.4282],
         [ 6.5877,  0.1104,  7.7611]],

        [[ 0.4961,  0.9526,  0.9215],
         [11.6706,  0.6063,  1.1423],
         [ 1.7250,  0.5333,  0.8890],
         ...,
         [28.5314,  0.1326,  1.2420],
         [ 5.3856,  0.1030,  0.7658],
         [ 1.5859,  1.8166,  2.0859


Train Diffusion:  48%|████▊     | 2392/5001 [1:51:32<1:56:40,  2.68s/it][A
Train Diffusion:  48%|████▊     | 2393/5001 [1:51:35<1:56:25,  2.68s/it][A
Train Diffusion:  48%|████▊     | 2394/5001 [1:51:37<1:55:17,  2.65s/it][A
Train Diffusion:  48%|████▊     | 2395/5001 [1:51:40<1:54:43,  2.64s/it][A
Train Diffusion:  48%|████▊     | 2396/5001 [1:51:42<1:54:11,  2.63s/it][A
Train Diffusion:  48%|████▊     | 2397/5001 [1:51:45<1:53:43,  2.62s/it][A
Train Diffusion:  48%|████▊     | 2398/5001 [1:51:48<1:53:27,  2.62s/it][A
Train Diffusion:  48%|████▊     | 2399/5001 [1:51:50<1:53:27,  2.62s/it][A
Train Diffusion:  48%|████▊     | 2400/5001 [1:51:53<1:53:13,  2.61s/it][A
Train Diffusion:  48%|████▊     | 2401/5001 [1:51:55<1:53:01,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 341509548.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8142, 0.5110, 1.3097],
        [8.5133, 0.5125, 1.2614],
        [8.8132, 0.5079, 1.2845]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.3825e+00, 8.5638e-01, 4.6552e+00],
         [1.6705e+00, 1.0175e+00, 1.2295e+00],
         [1.2123e+00, 6.2992e-01, 1.2690e+00],
         ...,
         [2.7817e+01, 1.6266e-01, 1.2084e+00],
         [4.8936e+00, 1.4425e-01, 7.0874e-01],
         [1.3781e+00, 2.1575e+00, 1.3779e+00]],

        [[1.0742e+00, 9.2872e-01, 1.3126e+00],
         [3.7742e+00, 7.7092e-01, 9.9208e-01],
         [1.4887e+00, 5.3185e-01, 7.6521e-01],
         ...,
         [1.4508e+00, 2.1124e-01, 9.3380e-01],
         [6.3568e-01, 4.9435e-01, 6.3250e-01],
         [1.3275e-02, 1.6865e+00, 1.8054e+00]],

        [[4.5416e-01, 9.5228e-01, 1.0777e+00],
         [5.0800e-01, 6.5510e-01, 1.8480e+00],
         [2.8930e+01, 1.5688e-01, 1.1


Train Diffusion:  48%|████▊     | 2402/5001 [1:51:58<1:52:46,  2.60s/it][A
Train Diffusion:  48%|████▊     | 2403/5001 [1:52:01<1:52:39,  2.60s/it][A
Train Diffusion:  48%|████▊     | 2404/5001 [1:52:03<1:52:48,  2.61s/it][A
Train Diffusion:  48%|████▊     | 2405/5001 [1:52:06<1:52:25,  2.60s/it][A
Train Diffusion:  48%|████▊     | 2406/5001 [1:52:08<1:52:24,  2.60s/it][A
Train Diffusion:  48%|████▊     | 2407/5001 [1:52:11<1:52:12,  2.60s/it][A
Train Diffusion:  48%|████▊     | 2408/5001 [1:52:14<1:52:06,  2.59s/it][A
Train Diffusion:  48%|████▊     | 2409/5001 [1:52:16<1:52:03,  2.59s/it][A
Train Diffusion:  48%|████▊     | 2410/5001 [1:52:19<1:51:55,  2.59s/it][A
Train Diffusion:  48%|████▊     | 2411/5001 [1:52:21<1:52:11,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 320054908.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6965, 0.4951, 1.2920],
        [8.7750, 0.5017, 1.2957],
        [8.8190, 0.4960, 1.2837]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.4855e-01, 9.5221e-01, 9.3024e-01],
         [4.9085e-01, 8.5264e-01, 1.4678e+00],
         [2.6570e+01, 1.6814e-01, 1.1783e+00],
         ...,
         [3.5692e+00, 2.5845e-02, 1.6847e+00],
         [1.0043e+00, 1.1341e+01, 1.3352e+00],
         [1.4568e+01, 1.4775e+00, 1.9677e+00]],

        [[7.7901e-01, 9.6210e-01, 1.1621e+00],
         [1.2763e+01, 3.8279e-01, 1.2726e+00],
         [2.7181e+00, 3.9812e-01, 1.0575e+00],
         ...,
         [2.2108e+01, 3.9322e-01, 6.0724e-02],
         [1.3201e+00, 4.7478e-01, 6.9496e-03],
         [4.0608e+00, 2.2536e-01, 1.7861e+00]],

        [[1.5779e+00, 8.0857e-01, 1.1919e+00],
         [1.7202e+00, 3.6484e-01, 7.3113e-01],
         [5.2428e-01, 8.5229e-01, 1.1


Train Diffusion:  48%|████▊     | 2412/5001 [1:52:24<1:51:53,  2.59s/it][A
Train Diffusion:  48%|████▊     | 2413/5001 [1:52:27<1:51:59,  2.60s/it][A
Train Diffusion:  48%|████▊     | 2414/5001 [1:52:29<1:52:14,  2.60s/it][A
Train Diffusion:  48%|████▊     | 2415/5001 [1:52:32<1:52:24,  2.61s/it][A
Train Diffusion:  48%|████▊     | 2416/5001 [1:52:34<1:52:39,  2.61s/it][A
Train Diffusion:  48%|████▊     | 2417/5001 [1:52:37<1:52:23,  2.61s/it][A
Train Diffusion:  48%|████▊     | 2418/5001 [1:52:40<1:52:38,  2.62s/it][A
Train Diffusion:  48%|████▊     | 2419/5001 [1:52:42<1:52:40,  2.62s/it][A
Train Diffusion:  48%|████▊     | 2420/5001 [1:52:45<1:52:36,  2.62s/it][A
Train Diffusion:  48%|████▊     | 2421/5001 [1:52:48<1:52:13,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 343979004.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7446, 0.5104, 1.3004],
        [8.6456, 0.4980, 1.2938],
        [8.7577, 0.5035, 1.2983]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[8.2480e-01, 9.6144e-01, 7.9286e-01],
         [7.6751e-01, 6.3218e-01, 1.8090e+00],
         [2.6606e+01, 2.5873e-01, 1.1953e+00],
         ...,
         [3.4883e+01, 8.5232e-02, 1.1903e+00],
         [5.6528e+00, 9.9990e-02, 5.2544e-01],
         [3.5630e+00, 1.7770e+00, 7.7193e+00]],

        [[5.2605e-01, 9.5138e-01, 8.4063e-01],
         [1.1480e+01, 7.3633e-01, 1.1196e+00],
         [1.6502e+00, 5.1597e-01, 8.4576e-01],
         ...,
         [2.7664e+00, 3.1759e-01, 9.5587e-01],
         [3.9498e-01, 8.3673e-01, 6.4926e-01],
         [2.4321e-02, 1.5469e+00, 3.0751e+00]],

        [[1.5536e+00, 8.2484e-01, 5.2111e+00],
         [8.1587e-01, 8.2881e-01, 1.2473e+00],
         [6.8342e-01, 8.3713e-01, 9.4


Train Diffusion:  48%|████▊     | 2422/5001 [1:52:50<1:52:21,  2.61s/it][A
Train Diffusion:  48%|████▊     | 2423/5001 [1:52:53<1:52:13,  2.61s/it][A
Train Diffusion:  48%|████▊     | 2424/5001 [1:52:55<1:51:49,  2.60s/it][A
Train Diffusion:  48%|████▊     | 2425/5001 [1:52:58<1:51:47,  2.60s/it][A
Train Diffusion:  49%|████▊     | 2426/5001 [1:53:01<1:51:45,  2.60s/it][A
Train Diffusion:  49%|████▊     | 2427/5001 [1:53:03<1:51:47,  2.61s/it][A
Train Diffusion:  49%|████▊     | 2428/5001 [1:53:06<1:51:34,  2.60s/it][A
Train Diffusion:  49%|████▊     | 2429/5001 [1:53:08<1:51:22,  2.60s/it][A
Train Diffusion:  49%|████▊     | 2430/5001 [1:53:11<1:51:29,  2.60s/it][A
Train Diffusion:  49%|████▊     | 2431/5001 [1:53:14<1:51:22,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 347667878.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8254, 0.5060, 1.2896],
        [8.7123, 0.4910, 1.2716],
        [8.8174, 0.4814, 1.3436]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.6936,  0.9562,  0.8562],
         [ 0.6326,  0.7286,  1.8846],
         [26.9306,  0.2374,  1.1987],
         ...,
         [ 0.6807,  1.1991,  0.4993],
         [ 2.8674,  1.1236,  1.5745],
         [ 0.6378,  1.8385,  4.4418]],

        [[ 1.6035,  0.8126,  4.0593],
         [ 1.0998,  0.6913,  0.9834],
         [ 0.6603,  0.8145,  1.0419],
         ...,
         [ 0.1630,  0.6222,  0.3855],
         [29.8459,  0.6892,  0.2285],
         [ 5.3126,  0.2767,  0.7272]],

        [[ 0.6053,  0.9522,  0.9038],
         [12.5387,  0.6122,  1.0839],
         [ 2.0342,  0.4229,  0.8498],
         ...,
         [ 0.4046,  2.1506,  0.7802],
         [ 0.6664,  0.4639,  0.8469],
         [ 6.1676,  1.7105,  2.0639


Train Diffusion:  49%|████▊     | 2432/5001 [1:53:16<1:51:27,  2.60s/it][A
Train Diffusion:  49%|████▊     | 2433/5001 [1:53:19<1:51:21,  2.60s/it][A
Train Diffusion:  49%|████▊     | 2434/5001 [1:53:21<1:51:56,  2.62s/it][A
Train Diffusion:  49%|████▊     | 2435/5001 [1:53:24<1:52:04,  2.62s/it][A
Train Diffusion:  49%|████▊     | 2436/5001 [1:53:27<1:52:00,  2.62s/it][A
Train Diffusion:  49%|████▊     | 2437/5001 [1:53:29<1:51:38,  2.61s/it][A
Train Diffusion:  49%|████▉     | 2438/5001 [1:53:32<1:52:01,  2.62s/it][A
Train Diffusion:  49%|████▉     | 2439/5001 [1:53:35<1:53:15,  2.65s/it][A
Train Diffusion:  49%|████▉     | 2440/5001 [1:53:37<1:53:18,  2.65s/it][A
Train Diffusion:  49%|████▉     | 2441/5001 [1:53:40<1:52:36,  2.64s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 327010659.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7508, 0.5102, 1.3540],
        [8.5498, 0.5068, 1.3051],
        [8.7507, 0.5044, 1.2941]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.2200e+00, 9.0295e-01, 8.0196e-01],
         [1.3599e+00, 4.2601e-01, 2.1499e-01],
         [2.3276e-05, 2.3918e+00, 2.9402e-01],
         ...,
         [8.3763e-02, 1.1663e+00, 9.8075e-01],
         [5.9909e-01, 5.2151e-02, 2.5944e+00],
         [1.9889e+00, 7.3651e-01, 1.1432e+00]],

        [[4.4420e-01, 9.5364e-01, 9.8671e-01],
         [5.6163e+00, 5.9487e-01, 1.6090e+00],
         [1.4956e+00, 3.1927e+00, 6.9565e-01],
         ...,
         [2.0916e+01, 3.7329e+00, 5.6312e-01],
         [1.9149e+00, 2.3045e+00, 6.1431e-01],
         [3.4010e-01, 1.6433e+00, 6.8731e+00]],

        [[1.2469e+00, 8.9750e-01, 3.6214e+00],
         [2.4928e+00, 9.3587e-01, 1.3382e+00],
         [1.0476e+00, 1.1711e+01, 7.3


Train Diffusion:  49%|████▉     | 2442/5001 [1:53:43<1:52:37,  2.64s/it][A
Train Diffusion:  49%|████▉     | 2443/5001 [1:53:45<1:52:41,  2.64s/it][A
Train Diffusion:  49%|████▉     | 2444/5001 [1:53:48<1:52:05,  2.63s/it][A
Train Diffusion:  49%|████▉     | 2445/5001 [1:53:50<1:51:32,  2.62s/it][A
Train Diffusion:  49%|████▉     | 2446/5001 [1:53:53<1:51:18,  2.61s/it][A
Train Diffusion:  49%|████▉     | 2447/5001 [1:53:56<1:50:58,  2.61s/it][A
Train Diffusion:  49%|████▉     | 2448/5001 [1:53:58<1:50:46,  2.60s/it][A
Train Diffusion:  49%|████▉     | 2449/5001 [1:54:01<1:50:37,  2.60s/it][A
Train Diffusion:  49%|████▉     | 2450/5001 [1:54:03<1:50:47,  2.61s/it][A
Train Diffusion:  49%|████▉     | 2451/5001 [1:54:06<1:50:33,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 320208115.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8441, 0.4778, 1.3247],
        [8.6354, 0.4840, 1.3190],
        [8.7937, 0.4910, 1.3140]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.6006,  0.8093,  2.4682],
         [ 1.3119,  0.5458,  0.7816],
         [ 0.7416,  0.6910,  0.8621],
         ...,
         [ 4.0257,  0.1644,  0.9716],
         [24.9409,  0.3471,  1.2376],
         [ 4.4821,  1.2844,  2.4176]],

        [[ 0.7048,  0.9585,  1.0442],
         [12.2473,  0.4831,  1.1912],
         [ 2.2268,  0.4925,  1.1944],
         ...,
         [ 0.4953,  0.9138,  0.7130],
         [ 0.5246,  0.6920,  0.8587],
         [ 6.3126,  1.7331,  1.3312]],

        [[ 0.5969,  0.9533,  0.9346],
         [ 0.5609,  0.7814,  1.7856],
         [28.0228,  0.1970,  1.1950],
         ...,
         [33.0810,  0.0701,  1.2459],
         [ 5.8499,  0.0438,  0.0568],
         [ 2.3092,  0.5246,  0.8735


Train Diffusion:  49%|████▉     | 2452/5001 [1:54:09<1:50:19,  2.60s/it][A
Train Diffusion:  49%|████▉     | 2453/5001 [1:54:11<1:50:20,  2.60s/it][A
Train Diffusion:  49%|████▉     | 2454/5001 [1:54:14<1:50:11,  2.60s/it][A
Train Diffusion:  49%|████▉     | 2455/5001 [1:54:16<1:50:21,  2.60s/it][A
Train Diffusion:  49%|████▉     | 2456/5001 [1:54:19<1:50:10,  2.60s/it][A
Train Diffusion:  49%|████▉     | 2457/5001 [1:54:22<1:49:55,  2.59s/it][A
Train Diffusion:  49%|████▉     | 2458/5001 [1:54:24<1:50:15,  2.60s/it][A
Train Diffusion:  49%|████▉     | 2459/5001 [1:54:27<1:50:11,  2.60s/it][A
Train Diffusion:  49%|████▉     | 2460/5001 [1:54:29<1:50:13,  2.60s/it][A
Train Diffusion:  49%|████▉     | 2461/5001 [1:54:32<1:50:21,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 329885769.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6755, 0.5109, 1.2899],
        [8.8220, 0.4908, 1.3068],
        [8.6971, 0.5245, 1.3103]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.0142e+00, 9.4269e-01, 1.5952e+00],
         [3.9497e+00, 7.3226e-01, 1.4060e+00],
         [1.0610e+00, 6.9897e-01, 1.1860e+00],
         ...,
         [3.6170e-01, 6.3882e-01, 9.8589e-01],
         [6.5922e+00, 4.4994e-01, 1.8061e+00],
         [6.3179e-01, 1.6447e+00, 4.8023e+00]],

        [[1.4335e+00, 8.4711e-01, 5.6029e-01],
         [1.7311e+00, 3.4505e-01, 5.2615e-01],
         [2.5907e-02, 1.1934e+00, 5.5224e-01],
         ...,
         [2.8379e+00, 1.8277e-01, 8.9831e-01],
         [5.4262e-01, 5.0951e-01, 7.3331e-01],
         [8.5077e-01, 1.4005e+00, 2.5658e+00]],

        [[4.6406e-01, 9.5435e-01, 9.6189e-01],
         [6.4144e+00, 6.2194e-01, 3.1095e+00],
         [1.9639e+00, 3.3316e-02, 4.5


Train Diffusion:  49%|████▉     | 2462/5001 [1:54:35<1:50:28,  2.61s/it][A
Train Diffusion:  49%|████▉     | 2463/5001 [1:54:37<1:50:26,  2.61s/it][A
Train Diffusion:  49%|████▉     | 2464/5001 [1:54:40<1:50:19,  2.61s/it][A
Train Diffusion:  49%|████▉     | 2465/5001 [1:54:42<1:50:11,  2.61s/it][A
Train Diffusion:  49%|████▉     | 2466/5001 [1:54:45<1:50:23,  2.61s/it][A
Train Diffusion:  49%|████▉     | 2467/5001 [1:54:48<1:50:27,  2.62s/it][A
Train Diffusion:  49%|████▉     | 2468/5001 [1:54:50<1:50:01,  2.61s/it][A
Train Diffusion:  49%|████▉     | 2469/5001 [1:54:53<1:49:48,  2.60s/it][A
Train Diffusion:  49%|████▉     | 2470/5001 [1:54:55<1:49:39,  2.60s/it][A
Train Diffusion:  49%|████▉     | 2471/5001 [1:54:58<1:49:38,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 317492153.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7449, 0.4996, 1.3079],
        [8.7841, 0.4917, 1.2741],
        [8.8506, 0.4759, 1.2598]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.5811,  0.8203,  5.3428],
         [ 0.9283,  0.6727,  1.1705],
         [ 0.7097,  0.7918,  1.0293],
         ...,
         [ 0.5037,  0.6495,  0.4766],
         [28.9930,  0.5711,  0.6658],
         [ 4.6042,  0.4366,  1.2589]],

        [[ 0.7635,  0.9622,  0.8137],
         [ 0.6969,  0.6669,  1.8150],
         [27.0824,  0.2447,  1.1957],
         ...,
         [29.7492,  0.0569,  1.2981],
         [ 5.8779,  0.0882,  0.8325],
         [26.7789,  1.1056,  2.2599]],

        [[ 0.5588,  0.9525,  0.7720],
         [11.0744,  0.8749,  1.0365],
         [ 1.5495,  0.4914,  0.8423],
         ...,
         [ 4.0502,  0.1588,  0.7534],
         [ 0.8604,  0.2252,  0.7962],
         [ 1.0723,  1.6146,  1.2911


Train Diffusion:  49%|████▉     | 2472/5001 [1:55:01<1:49:31,  2.60s/it][A
Train Diffusion:  49%|████▉     | 2473/5001 [1:55:03<1:49:35,  2.60s/it][A
Train Diffusion:  49%|████▉     | 2474/5001 [1:55:06<1:49:20,  2.60s/it][A
Train Diffusion:  49%|████▉     | 2475/5001 [1:55:08<1:49:13,  2.59s/it][A
Train Diffusion:  50%|████▉     | 2476/5001 [1:55:11<1:49:00,  2.59s/it][A
Train Diffusion:  50%|████▉     | 2477/5001 [1:55:14<1:48:55,  2.59s/it][A
Train Diffusion:  50%|████▉     | 2478/5001 [1:55:16<1:49:26,  2.60s/it][A
Train Diffusion:  50%|████▉     | 2479/5001 [1:55:19<1:49:13,  2.60s/it][A
Train Diffusion:  50%|████▉     | 2480/5001 [1:55:21<1:49:16,  2.60s/it][A
Train Diffusion:  50%|████▉     | 2481/5001 [1:55:24<1:49:43,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 326762937.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.4247, 0.5260, 1.3143],
        [9.0090, 0.4841, 1.3240],
        [8.6430, 0.5123, 1.3004]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[6.7447e-01, 9.5547e-01, 9.7686e-01],
         [1.2689e+01, 5.1391e-01, 1.1411e+00],
         [2.2288e+00, 3.9421e-01, 7.4889e-01],
         ...,
         [5.9537e-03, 6.7794e-01, 3.6454e+00],
         [1.3564e-01, 1.6493e+00, 4.8391e+00],
         [2.8609e-01, 4.0927e+00, 1.1740e+00]],

        [[1.6055e+00, 8.0719e-01, 3.0976e+00],
         [1.2726e+00, 5.9013e-01, 8.4723e-01],
         [7.0076e-01, 7.7153e-01, 1.1792e+00],
         ...,
         [2.0797e-01, 4.8205e-01, 2.0544e+00],
         [5.8427e-06, 6.4329e-01, 3.5894e+00],
         [1.2019e-01, 1.4081e+00, 1.2694e+01]],

        [[6.2341e-01, 9.5322e-01, 9.1207e-01],
         [5.7678e-01, 7.8137e-01, 1.8460e+00],
         [2.6559e+01, 2.5340e-01, 1.2


Train Diffusion:  50%|████▉     | 2482/5001 [1:55:27<1:50:23,  2.63s/it][A
Train Diffusion:  50%|████▉     | 2483/5001 [1:55:29<1:50:00,  2.62s/it][A
Train Diffusion:  50%|████▉     | 2484/5001 [1:55:32<1:49:48,  2.62s/it][A
Train Diffusion:  50%|████▉     | 2485/5001 [1:55:34<1:49:19,  2.61s/it][A
Train Diffusion:  50%|████▉     | 2486/5001 [1:55:37<1:49:57,  2.62s/it][A
Train Diffusion:  50%|████▉     | 2487/5001 [1:55:40<1:51:49,  2.67s/it][A
Train Diffusion:  50%|████▉     | 2488/5001 [1:55:43<1:51:02,  2.65s/it][A
Train Diffusion:  50%|████▉     | 2489/5001 [1:55:45<1:50:25,  2.64s/it][A
Train Diffusion:  50%|████▉     | 2490/5001 [1:55:48<1:50:30,  2.64s/it][A
Train Diffusion:  50%|████▉     | 2491/5001 [1:55:50<1:50:02,  2.63s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 327905139.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7550, 0.4955, 1.3209],
        [8.8143, 0.5135, 1.3455],
        [8.5543, 0.4959, 1.2973]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[6.9060e-01, 9.5553e-01, 9.9310e-01],
         [1.2613e+01, 5.0806e-01, 1.1474e+00],
         [2.2379e+00, 4.0035e-01, 7.2301e-01],
         ...,
         [3.8704e-01, 7.3075e-01, 4.8437e-01],
         [2.9601e+01, 6.0555e-01, 6.7352e-01],
         [5.6718e+00, 1.6048e-01, 1.6525e+00]],

        [[1.6039e+00, 8.0566e-01, 2.9120e+00],
         [1.3350e+00, 5.5804e-01, 8.2488e-01],
         [7.0440e-01, 7.4654e-01, 1.2365e+00],
         ...,
         [3.8405e+00, 1.6773e-01, 7.5498e-01],
         [2.5180e-01, 4.4273e+00, 1.0503e+00],
         [1.0273e-02, 1.6566e+00, 2.6602e+00]],

        [[6.0825e-01, 9.5194e-01, 9.2141e-01],
         [5.7051e-01, 8.1008e-01, 1.8150e+00],
         [2.6725e+01, 2.2574e-01, 1.2


Train Diffusion:  50%|████▉     | 2492/5001 [1:55:53<1:49:52,  2.63s/it][A
Train Diffusion:  50%|████▉     | 2493/5001 [1:55:56<1:49:19,  2.62s/it][A
Train Diffusion:  50%|████▉     | 2494/5001 [1:55:58<1:49:18,  2.62s/it][A
Train Diffusion:  50%|████▉     | 2495/5001 [1:56:01<1:48:51,  2.61s/it][A
Train Diffusion:  50%|████▉     | 2496/5001 [1:56:03<1:48:48,  2.61s/it][A
Train Diffusion:  50%|████▉     | 2497/5001 [1:56:06<1:48:41,  2.60s/it][A
Train Diffusion:  50%|████▉     | 2498/5001 [1:56:09<1:48:33,  2.60s/it][A
Train Diffusion:  50%|████▉     | 2499/5001 [1:56:11<1:48:19,  2.60s/it][A
Train Diffusion:  50%|████▉     | 2500/5001 [1:56:14<1:48:22,  2.60s/it][A
Train Diffusion:  50%|█████     | 2501/5001 [1:56:16<1:48:25,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 330364624.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7906, 0.5232, 1.2862],
        [8.4896, 0.5304, 1.3311],
        [8.6240, 0.4984, 1.3149]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.3226e+00, 8.7377e-01, 6.4743e-01],
         [1.4762e+00, 4.0583e-01, 2.2228e-01],
         [1.7252e-05, 2.4013e+00, 2.9900e-01],
         ...,
         [9.9502e-01, 1.1958e+00, 2.7274e+00],
         [1.8714e-01, 2.0028e+00, 5.2261e+00],
         [2.2294e+00, 3.7300e+00, 1.7982e+00]],

        [[1.1414e+00, 9.1509e-01, 2.5752e+00],
         [4.2890e+00, 7.6498e-01, 1.3364e+00],
         [1.5119e+00, 9.7785e+00, 6.4008e-01],
         ...,
         [2.2005e+01, 3.0162e-01, 5.3756e-01],
         [5.0745e+00, 4.1839e-02, 7.8163e+00],
         [1.3228e+00, 1.6503e+00, 1.5629e+00]],

        [[4.4680e-01, 9.5311e-01, 9.8792e-01],
         [3.3159e+00, 6.3982e-01, 1.6490e+00],
         [1.1382e+00, 7.0019e+00, 7.3


Train Diffusion:  50%|█████     | 2502/5001 [1:56:19<1:48:37,  2.61s/it][A
Train Diffusion:  50%|█████     | 2503/5001 [1:56:22<1:48:46,  2.61s/it][A
Train Diffusion:  50%|█████     | 2504/5001 [1:56:24<1:48:17,  2.60s/it][A
Train Diffusion:  50%|█████     | 2505/5001 [1:56:27<1:48:19,  2.60s/it][A
Train Diffusion:  50%|█████     | 2506/5001 [1:56:29<1:48:17,  2.60s/it][A
Train Diffusion:  50%|█████     | 2507/5001 [1:56:32<1:48:21,  2.61s/it][A
Train Diffusion:  50%|█████     | 2508/5001 [1:56:35<1:48:21,  2.61s/it][A
Train Diffusion:  50%|█████     | 2509/5001 [1:56:37<1:48:02,  2.60s/it][A
Train Diffusion:  50%|█████     | 2510/5001 [1:56:40<1:47:47,  2.60s/it][A
Train Diffusion:  50%|█████     | 2511/5001 [1:56:42<1:47:50,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 318495244.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7299, 0.4928, 1.2930],
        [8.9166, 0.4960, 1.2836],
        [8.7179, 0.4864, 1.3071]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[7.7504e-01, 9.6117e-01, 8.0789e-01],
         [6.9413e-01, 6.7891e-01, 1.7962e+00],
         [2.6795e+01, 2.6635e-01, 1.1969e+00],
         ...,
         [2.4548e+01, 2.6359e+00, 9.7665e-01],
         [2.6992e+00, 2.7352e-01, 4.3093e-02],
         [5.1029e+00, 2.3475e-01, 8.7661e-01]],

        [[1.5753e+00, 8.1865e-01, 5.2109e+00],
         [9.8158e-01, 6.4957e-01, 1.1939e+00],
         [6.5190e-01, 8.0293e-01, 9.8613e-01],
         ...,
         [4.1781e+00, 1.0365e-01, 1.3303e+00],
         [5.6020e-03, 5.5556e-01, 2.1274e+00],
         [1.7151e+01, 1.9251e+00, 1.9322e+00]],

        [[5.5176e-01, 9.5144e-01, 7.6152e-01],
         [1.1402e+01, 8.9970e-01, 1.1331e+00],
         [1.5446e+00, 5.1176e-01, 8.6


Train Diffusion:  50%|█████     | 2512/5001 [1:56:45<1:47:46,  2.60s/it][A
Train Diffusion:  50%|█████     | 2513/5001 [1:56:48<1:47:36,  2.60s/it][A
Train Diffusion:  50%|█████     | 2514/5001 [1:56:50<1:47:31,  2.59s/it][A
Train Diffusion:  50%|█████     | 2515/5001 [1:56:53<1:47:49,  2.60s/it][A
Train Diffusion:  50%|█████     | 2516/5001 [1:56:55<1:47:37,  2.60s/it][A
Train Diffusion:  50%|█████     | 2517/5001 [1:56:58<1:47:28,  2.60s/it][A
Train Diffusion:  50%|█████     | 2518/5001 [1:57:01<1:47:36,  2.60s/it][A
Train Diffusion:  50%|█████     | 2519/5001 [1:57:03<1:47:48,  2.61s/it][A
Train Diffusion:  50%|█████     | 2520/5001 [1:57:06<1:47:39,  2.60s/it][A
Train Diffusion:  50%|█████     | 2521/5001 [1:57:08<1:47:18,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 327754131.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.9081, 0.4902, 1.3380],
        [8.4453, 0.5099, 1.3170],
        [8.8064, 0.4767, 1.2969]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5747e-01, 9.5473e-01, 9.7735e-01],
         [6.5229e+00, 5.9487e-01, 2.2917e+00],
         [1.6520e+00, 3.7655e-01, 5.3735e-01],
         ...,
         [1.0920e+01, 7.2013e+00, 6.8851e-01],
         [9.8614e-01, 2.5404e+00, 9.0931e-01],
         [3.9379e-01, 1.5166e+00, 1.1041e+00]],

        [[1.0502e+00, 9.3672e-01, 1.7152e+00],
         [3.1731e+00, 8.2761e-01, 1.3579e+00],
         [1.0769e+00, 1.3336e+00, 6.0641e-01],
         ...,
         [1.9849e-03, 1.2152e+00, 1.6677e+00],
         [3.1580e-01, 4.2409e-01, 4.8528e-01],
         [4.4517e-01, 2.7247e+00, 8.4102e-01]],

        [[1.4036e+00, 8.5348e-01, 5.5653e-01],
         [1.6694e+00, 3.6019e-01, 2.1760e-01],
         [1.5396e-05, 2.4007e+00, 2.1


Train Diffusion:  50%|█████     | 2522/5001 [1:57:11<1:47:13,  2.60s/it][A
Train Diffusion:  50%|█████     | 2523/5001 [1:57:14<1:47:02,  2.59s/it][A
Train Diffusion:  50%|█████     | 2524/5001 [1:57:16<1:47:00,  2.59s/it][A
Train Diffusion:  50%|█████     | 2525/5001 [1:57:19<1:47:15,  2.60s/it][A
Train Diffusion:  51%|█████     | 2526/5001 [1:57:21<1:47:13,  2.60s/it][A
Train Diffusion:  51%|█████     | 2527/5001 [1:57:24<1:47:27,  2.61s/it][A
Train Diffusion:  51%|█████     | 2528/5001 [1:57:27<1:50:00,  2.67s/it][A
Train Diffusion:  51%|█████     | 2529/5001 [1:57:29<1:49:29,  2.66s/it][A
Train Diffusion:  51%|█████     | 2530/5001 [1:57:32<1:48:58,  2.65s/it][A
Train Diffusion:  51%|█████     | 2531/5001 [1:57:35<1:48:16,  2.63s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 327266188.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8010, 0.4989, 1.2996],
        [8.9307, 0.4751, 1.2838],
        [8.7876, 0.4812, 1.2727]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.5725e+00, 8.1029e-01, 1.0542e+00],
         [1.7548e+00, 3.5557e-01, 7.0846e-01],
         [4.0694e-01, 8.9743e-01, 1.3219e+00],
         ...,
         [4.0726e+00, 1.6035e-01, 7.3775e-01],
         [7.2084e-01, 4.7664e-01, 1.9315e+00],
         [1.8904e+00, 1.7325e+00, 1.3309e+00]],

        [[7.9189e-01, 9.6406e-01, 1.1818e+00],
         [1.2674e+01, 3.6924e-01, 1.2777e+00],
         [2.7907e+00, 3.9166e-01, 1.0839e+00],
         ...,
         [3.2026e+01, 6.8543e-02, 1.2628e+00],
         [5.8051e+00, 6.6172e-02, 5.2046e+00],
         [1.0317e+01, 2.1227e+00, 2.0817e+00]],

        [[5.4100e-01, 9.5427e-01, 9.2721e-01],
         [4.6188e-01, 8.3753e-01, 1.3696e+00],
         [2.5809e+01, 2.3181e-01, 1.1


Train Diffusion:  51%|█████     | 2532/5001 [1:57:37<1:47:47,  2.62s/it][A
Train Diffusion:  51%|█████     | 2533/5001 [1:57:40<1:48:43,  2.64s/it][A
Train Diffusion:  51%|█████     | 2534/5001 [1:57:43<1:49:13,  2.66s/it][A
Train Diffusion:  51%|█████     | 2535/5001 [1:57:45<1:48:27,  2.64s/it][A
Train Diffusion:  51%|█████     | 2536/5001 [1:57:48<1:48:12,  2.63s/it][A
Train Diffusion:  51%|█████     | 2537/5001 [1:57:50<1:47:44,  2.62s/it][A
Train Diffusion:  51%|█████     | 2538/5001 [1:57:53<1:47:27,  2.62s/it][A
Train Diffusion:  51%|█████     | 2539/5001 [1:57:56<1:47:18,  2.62s/it][A
Train Diffusion:  51%|█████     | 2540/5001 [1:57:58<1:46:57,  2.61s/it][A
Train Diffusion:  51%|█████     | 2541/5001 [1:58:01<1:46:54,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 309494150.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7758, 0.4871, 1.2906],
        [8.7572, 0.4728, 1.3056],
        [8.8764, 0.4827, 1.3129]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.5724e+00, 8.1042e-01, 1.1685e+00],
         [1.7512e+00, 3.5517e-01, 7.1939e-01],
         [4.1583e-01, 8.8595e-01, 1.2524e+00],
         ...,
         [2.8582e+00, 1.5428e-01, 9.0549e-01],
         [1.9068e+01, 2.4658e-01, 1.2702e+00],
         [3.4504e+00, 8.6070e-01, 1.4002e+00]],

        [[5.3976e-01, 9.5332e-01, 9.2296e-01],
         [4.7885e-01, 8.6244e-01, 1.3226e+00],
         [2.4637e+01, 2.3910e-01, 1.1407e+00],
         ...,
         [3.8222e-01, 7.0674e-01, 3.2803e+00],
         [8.3930e-03, 1.1833e+00, 2.1517e+00],
         [2.0273e-01, 1.3965e+00, 9.9446e+00]],

        [[7.9386e-01, 9.6317e-01, 1.1786e+00],
         [1.2731e+01, 3.5813e-01, 1.2772e+00],
         [2.8472e+00, 3.8118e-01, 1.0


Train Diffusion:  51%|█████     | 2542/5001 [1:58:04<1:46:56,  2.61s/it][A
Train Diffusion:  51%|█████     | 2543/5001 [1:58:06<1:46:55,  2.61s/it][A
Train Diffusion:  51%|█████     | 2544/5001 [1:58:09<1:46:38,  2.60s/it][A
Train Diffusion:  51%|█████     | 2545/5001 [1:58:11<1:46:27,  2.60s/it][A
Train Diffusion:  51%|█████     | 2546/5001 [1:58:14<1:46:30,  2.60s/it][A
Train Diffusion:  51%|█████     | 2547/5001 [1:58:16<1:46:13,  2.60s/it][A
Train Diffusion:  51%|█████     | 2548/5001 [1:58:19<1:46:21,  2.60s/it][A
Train Diffusion:  51%|█████     | 2549/5001 [1:58:22<1:46:19,  2.60s/it][A
Train Diffusion:  51%|█████     | 2550/5001 [1:58:24<1:46:17,  2.60s/it][A
Train Diffusion:  51%|█████     | 2551/5001 [1:58:27<1:46:19,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 319525510.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8185, 0.5257, 1.3129],
        [8.7286, 0.4890, 1.3025],
        [8.6942, 0.4884, 1.3020]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.6571,  0.9541,  0.8846],
         [ 0.5998,  0.7657,  1.8835],
         [26.0593,  0.2708,  1.2086],
         ...,
         [ 4.1527,  0.1125,  0.7092],
         [ 0.6137,  0.7469,  0.7084],
         [20.2774,  1.1476,  1.9335]],

        [[ 0.6386,  0.9533,  0.8987],
         [12.3898,  0.6209,  1.0141],
         [ 2.0745,  0.4109,  0.7074],
         ...,
         [ 0.0707,  8.4732,  1.8584],
         [16.0918,  0.3515,  1.1295],
         [ 3.2921,  0.7367,  5.3453]],

        [[ 1.6072,  0.8074,  3.8411],
         [ 1.2008,  0.5933,  0.9338],
         [ 0.6723,  0.7432,  1.2653],
         ...,
         [19.4835,  0.1584,  1.0758],
         [ 5.1748,  0.0329,  0.2646],
         [ 2.5714,  0.6308,  0.8599


Train Diffusion:  51%|█████     | 2552/5001 [1:58:30<1:46:15,  2.60s/it][A
Train Diffusion:  51%|█████     | 2553/5001 [1:58:32<1:46:12,  2.60s/it][A
Train Diffusion:  51%|█████     | 2554/5001 [1:58:35<1:46:05,  2.60s/it][A
Train Diffusion:  51%|█████     | 2555/5001 [1:58:37<1:45:55,  2.60s/it][A
Train Diffusion:  51%|█████     | 2556/5001 [1:58:40<1:45:39,  2.59s/it][A
Train Diffusion:  51%|█████     | 2557/5001 [1:58:42<1:45:40,  2.59s/it][A
Train Diffusion:  51%|█████     | 2558/5001 [1:58:45<1:45:47,  2.60s/it][A
Train Diffusion:  51%|█████     | 2559/5001 [1:58:48<1:45:44,  2.60s/it][A
Train Diffusion:  51%|█████     | 2560/5001 [1:58:50<1:45:47,  2.60s/it][A
Train Diffusion:  51%|█████     | 2561/5001 [1:58:53<1:45:41,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 340607564.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7606, 0.4978, 1.3001],
        [8.8499, 0.4905, 1.2611],
        [8.7750, 0.4894, 1.3248]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.6281,  0.9541,  0.9034],
         [ 0.5909,  0.7228,  1.9071],
         [28.2603,  0.1288,  1.2026],
         ...,
         [ 4.1517,  0.1487,  0.6077],
         [ 0.5743,  0.7021,  0.8671],
         [ 0.3990,  2.1751,  1.2550]],

        [[ 1.6080,  0.8143,  3.2815],
         [ 1.0777,  0.6562,  0.8834],
         [ 0.7165,  0.6880,  1.3067],
         ...,
         [ 0.3048,  7.7501,  0.6049],
         [28.0736,  0.1368,  0.8164],
         [ 5.8544,  0.2850,  0.9412]],

        [[ 0.6668,  0.9558,  0.9780],
         [12.5439,  0.5132,  1.1136],
         [ 2.2048,  0.4540,  0.6813],
         ...,
         [30.5004,  0.0626,  1.2887],
         [ 5.9442,  0.0757,  0.8148],
         [22.7893,  1.4153,  1.9817


Train Diffusion:  51%|█████     | 2562/5001 [1:58:56<1:45:41,  2.60s/it][A
Train Diffusion:  51%|█████     | 2563/5001 [1:58:58<1:45:31,  2.60s/it][A
Train Diffusion:  51%|█████▏    | 2564/5001 [1:59:01<1:45:41,  2.60s/it][A
Train Diffusion:  51%|█████▏    | 2565/5001 [1:59:03<1:45:45,  2.60s/it][A
Train Diffusion:  51%|█████▏    | 2566/5001 [1:59:06<1:45:37,  2.60s/it][A
Train Diffusion:  51%|█████▏    | 2567/5001 [1:59:09<1:45:42,  2.61s/it][A
Train Diffusion:  51%|█████▏    | 2568/5001 [1:59:11<1:45:35,  2.60s/it][A
Train Diffusion:  51%|█████▏    | 2569/5001 [1:59:14<1:45:16,  2.60s/it][A
Train Diffusion:  51%|█████▏    | 2570/5001 [1:59:16<1:45:19,  2.60s/it][A
Train Diffusion:  51%|█████▏    | 2571/5001 [1:59:19<1:45:31,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 323023872.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7324, 0.4760, 1.2898],
        [8.6135, 0.5043, 1.3189],
        [8.9156, 0.4788, 1.2760]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.5833e+00, 8.1861e-01, 5.4925e+00],
         [9.7604e-01, 5.3112e-01, 1.1166e+00],
         [7.8042e-01, 7.6325e-01, 9.9018e-01],
         ...,
         [4.1117e-01, 2.4780e+00, 4.4611e+00],
         [5.1345e+00, 2.2601e-01, 1.0857e+00],
         [5.4922e+00, 1.6946e-01, 5.4838e+00]],

        [[5.6387e-01, 9.5239e-01, 7.0909e-01],
         [1.0398e+01, 1.0659e+00, 1.1114e+00],
         [1.3163e+00, 5.7259e-01, 9.1970e-01],
         ...,
         [2.4060e-05, 5.7085e-01, 4.6563e+00],
         [1.1280e-01, 5.5123e-01, 1.5439e+00],
         [1.3174e+01, 1.7242e+00, 1.9685e+00]],

        [[7.5501e-01, 9.6158e-01, 8.1956e-01],
         [6.8067e-01, 6.7025e-01, 1.7361e+00],
         [2.7527e+01, 2.1184e-01, 1.1


Train Diffusion:  51%|█████▏    | 2572/5001 [1:59:22<1:45:15,  2.60s/it][A
Train Diffusion:  51%|█████▏    | 2573/5001 [1:59:24<1:45:04,  2.60s/it][A
Train Diffusion:  51%|█████▏    | 2574/5001 [1:59:27<1:45:42,  2.61s/it][A
Train Diffusion:  51%|█████▏    | 2575/5001 [1:59:29<1:46:41,  2.64s/it][A
Train Diffusion:  52%|█████▏    | 2576/5001 [1:59:32<1:46:44,  2.64s/it][A
Train Diffusion:  52%|█████▏    | 2577/5001 [1:59:35<1:46:05,  2.63s/it][A
Train Diffusion:  52%|█████▏    | 2578/5001 [1:59:37<1:45:37,  2.62s/it][A
Train Diffusion:  52%|█████▏    | 2579/5001 [1:59:40<1:45:26,  2.61s/it][A
Train Diffusion:  52%|█████▏    | 2580/5001 [1:59:42<1:45:13,  2.61s/it][A
Train Diffusion:  52%|█████▏    | 2581/5001 [1:59:45<1:47:08,  2.66s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 328434771.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6553, 0.4962, 1.3244],
        [8.9258, 0.4993, 1.3130],
        [8.8161, 0.4941, 1.2612]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[6.7208e-01, 9.5596e-01, 9.7730e-01],
         [1.2997e+01, 5.0578e-01, 1.1416e+00],
         [2.2996e+00, 3.8065e-01, 7.4044e-01],
         ...,
         [3.9826e+00, 1.6306e-01, 8.6911e-01],
         [4.6988e-01, 1.4274e+00, 5.3123e+00],
         [3.9453e-01, 3.6392e+00, 1.0144e+00]],

        [[1.6052e+00, 8.0809e-01, 3.1035e+00],
         [1.2569e+00, 6.1077e-01, 8.7963e-01],
         [6.7277e-01, 7.7285e-01, 1.1974e+00],
         ...,
         [4.8358e-01, 6.8226e-01, 9.1790e-01],
         [1.7478e-06, 5.7531e-01, 3.4136e+00],
         [1.4437e-01, 1.0851e+00, 1.1436e+01]],

        [[6.2439e-01, 9.5383e-01, 9.1099e-01],
         [5.7819e-01, 7.7402e-01, 1.9043e+00],
         [2.6168e+01, 2.6262e-01, 1.2


Train Diffusion:  52%|█████▏    | 2582/5001 [1:59:48<1:46:30,  2.64s/it][A
Train Diffusion:  52%|█████▏    | 2583/5001 [1:59:50<1:45:39,  2.62s/it][A
Train Diffusion:  52%|█████▏    | 2584/5001 [1:59:53<1:45:21,  2.62s/it][A
Train Diffusion:  52%|█████▏    | 2585/5001 [1:59:56<1:45:01,  2.61s/it][A
Train Diffusion:  52%|█████▏    | 2586/5001 [1:59:58<1:44:58,  2.61s/it][A
Train Diffusion:  52%|█████▏    | 2587/5001 [2:00:01<1:45:18,  2.62s/it][A
Train Diffusion:  52%|█████▏    | 2588/5001 [2:00:03<1:44:57,  2.61s/it][A
Train Diffusion:  52%|█████▏    | 2589/5001 [2:00:06<1:44:55,  2.61s/it][A
Train Diffusion:  52%|█████▏    | 2590/5001 [2:00:09<1:44:54,  2.61s/it][A
Train Diffusion:  52%|█████▏    | 2591/5001 [2:00:11<1:44:55,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 327950486.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7445, 0.4974, 1.3223],
        [8.7679, 0.4987, 1.2808],
        [8.7477, 0.4855, 1.2505]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.7075,  0.9573,  1.0379],
         [12.8498,  0.4666,  1.1923],
         [ 2.3938,  0.4190,  0.6669],
         ...,
         [ 4.3677,  0.1278,  0.6106],
         [ 0.2830,  0.7161,  0.8335],
         [18.9798,  1.5887,  1.9782]],

        [[ 1.5995,  0.8056,  2.4538],
         [ 1.3955,  0.5334,  0.7926],
         [ 0.6960,  0.7113,  1.3108],
         ...,
         [27.7049,  0.0933,  1.3128],
         [ 5.6886,  0.0731,  0.6046],
         [ 0.9524,  1.9942,  1.1274]],

        [[ 0.5959,  0.9520,  0.9308],
         [ 0.5668,  0.8237,  1.7525],
         [27.8419,  0.1355,  1.2025],
         ...,
         [ 0.1727, 11.7824,  0.8755],
         [25.5210,  0.1695,  0.7933],
         [ 5.9484,  0.2721,  0.8556


Train Diffusion:  52%|█████▏    | 2592/5001 [2:00:14<1:44:36,  2.61s/it][A
Train Diffusion:  52%|█████▏    | 2593/5001 [2:00:16<1:44:19,  2.60s/it][A
Train Diffusion:  52%|█████▏    | 2594/5001 [2:00:19<1:44:31,  2.61s/it][A
Train Diffusion:  52%|█████▏    | 2595/5001 [2:00:22<1:44:27,  2.60s/it][A
Train Diffusion:  52%|█████▏    | 2596/5001 [2:00:24<1:44:16,  2.60s/it][A
Train Diffusion:  52%|█████▏    | 2597/5001 [2:00:27<1:44:05,  2.60s/it][A
Train Diffusion:  52%|█████▏    | 2598/5001 [2:00:29<1:43:55,  2.59s/it][A
Train Diffusion:  52%|█████▏    | 2599/5001 [2:00:32<1:43:59,  2.60s/it][A
Train Diffusion:  52%|█████▏    | 2600/5001 [2:00:35<1:43:51,  2.60s/it][A
Train Diffusion:  52%|█████▏    | 2601/5001 [2:00:37<1:43:57,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 336475644.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.9056, 0.4863, 1.3081],
        [8.6085, 0.4917, 1.2708],
        [8.8089, 0.4851, 1.2942]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.7092,  0.9579,  1.0598],
         [12.6672,  0.4543,  1.2046],
         [ 2.3319,  0.3878,  0.9311],
         ...,
         [ 0.2180,  2.1809,  1.2381],
         [ 0.3941,  0.4840,  0.7820],
         [22.9935,  1.1421,  1.9719]],

        [[ 0.5944,  0.9525,  0.9340],
         [ 0.5626,  0.8058,  1.7569],
         [28.0166,  0.0831,  1.2006],
         ...,
         [ 0.5281,  4.1378,  1.3693],
         [ 1.1574,  1.1574,  0.9707],
         [ 0.9786,  2.2447,  1.0005]],

        [[ 1.5991,  0.8069,  2.2573],
         [ 1.3918,  0.5178,  0.7744],
         [ 0.7236,  0.7722,  0.6597],
         ...,
         [ 0.1658,  2.5229,  7.3236],
         [ 0.3404,  0.3404,  0.3969],
         [ 5.0962,  0.2251,  3.4460


Train Diffusion:  52%|█████▏    | 2602/5001 [2:00:40<1:44:01,  2.60s/it][A
Train Diffusion:  52%|█████▏    | 2603/5001 [2:00:42<1:43:53,  2.60s/it][A
Train Diffusion:  52%|█████▏    | 2604/5001 [2:00:45<1:44:13,  2.61s/it][A
Train Diffusion:  52%|█████▏    | 2605/5001 [2:00:48<1:44:13,  2.61s/it][A
Train Diffusion:  52%|█████▏    | 2606/5001 [2:00:50<1:44:18,  2.61s/it][A
Train Diffusion:  52%|█████▏    | 2607/5001 [2:00:53<1:44:03,  2.61s/it][A
Train Diffusion:  52%|█████▏    | 2608/5001 [2:00:56<1:43:50,  2.60s/it][A
Train Diffusion:  52%|█████▏    | 2609/5001 [2:00:58<1:43:36,  2.60s/it][A
Train Diffusion:  52%|█████▏    | 2610/5001 [2:01:01<1:43:34,  2.60s/it][A
Train Diffusion:  52%|█████▏    | 2611/5001 [2:01:03<1:43:44,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 322435516.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8125, 0.5041, 1.3201],
        [8.6482, 0.4973, 1.2986],
        [8.9234, 0.5094, 1.3025]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.7582,  0.9630,  1.1487],
         [13.0752,  0.3868,  1.2680],
         [ 2.6807,  0.3619,  0.8362],
         ...,
         [ 4.0835,  0.1012,  0.9740],
         [ 0.2319,  0.3981,  1.7560],
         [ 2.1222,  2.0894,  1.7855]],

        [[ 1.5885,  0.8068,  1.2292],
         [ 1.6609,  0.3872,  0.7318],
         [ 0.7063,  0.7532,  0.8894],
         ...,
         [ 4.1503,  2.1565,  1.1053],
         [ 6.0026,  0.3753,  0.0431],
         [ 5.1184,  0.2302,  0.7147]],

        [[ 0.5600,  0.9534,  0.9387],
         [ 0.5296,  0.8327,  1.6370],
         [26.9459,  0.0825,  1.1917],
         ...,
         [17.5992,  0.1693,  0.9182],
         [ 5.0356,  0.0695,  0.6308],
         [12.4189,  1.7134,  3.0140


Train Diffusion:  52%|█████▏    | 2612/5001 [2:01:06<1:43:26,  2.60s/it][A
Train Diffusion:  52%|█████▏    | 2613/5001 [2:01:09<1:43:22,  2.60s/it][A
Train Diffusion:  52%|█████▏    | 2614/5001 [2:01:11<1:43:15,  2.60s/it][A
Train Diffusion:  52%|█████▏    | 2615/5001 [2:01:14<1:43:10,  2.59s/it][A
Train Diffusion:  52%|█████▏    | 2616/5001 [2:01:16<1:43:11,  2.60s/it][A
Train Diffusion:  52%|█████▏    | 2617/5001 [2:01:19<1:43:46,  2.61s/it][A
Train Diffusion:  52%|█████▏    | 2618/5001 [2:01:22<1:43:29,  2.61s/it][A
Train Diffusion:  52%|█████▏    | 2619/5001 [2:01:24<1:43:12,  2.60s/it][A
Train Diffusion:  52%|█████▏    | 2620/5001 [2:01:27<1:43:44,  2.61s/it][A
Train Diffusion:  52%|█████▏    | 2621/5001 [2:01:29<1:43:25,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 342181897.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7739, 0.5127, 1.2902],
        [8.6968, 0.4930, 1.2988],
        [8.8279, 0.5066, 1.2825]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.6877,  0.9561,  1.0024],
         [12.1243,  0.5152,  1.1545],
         [ 2.1413,  0.4729,  1.1771],
         ...,
         [ 1.0394,  0.3079,  0.9096],
         [ 4.5030,  0.0385,  5.7081],
         [ 2.8459,  0.8839,  1.3512]],

        [[ 0.6108,  0.9528,  0.9210],
         [ 0.5707,  0.7956,  1.8030],
         [28.5265,  0.1187,  1.1985],
         ...,
         [ 0.0556,  0.8544,  3.5097],
         [ 0.1647,  0.6920,  0.6031],
         [ 9.7904,  1.9490,  1.7141]],

        [[ 1.6052,  0.8067,  2.8644],
         [ 1.3095,  0.5350,  0.7959],
         [ 0.7655,  0.6730,  0.7962],
         ...,
         [ 0.1850,  0.5123,  1.5517],
         [ 0.5076,  0.5470,  2.5158],
         [ 0.3497,  1.3516, 11.2979


Train Diffusion:  52%|█████▏    | 2622/5001 [2:01:32<1:45:18,  2.66s/it][A
Train Diffusion:  52%|█████▏    | 2623/5001 [2:01:35<1:44:58,  2.65s/it][A
Train Diffusion:  52%|█████▏    | 2624/5001 [2:01:37<1:44:20,  2.63s/it][A
Train Diffusion:  52%|█████▏    | 2625/5001 [2:01:40<1:43:52,  2.62s/it][A
Train Diffusion:  53%|█████▎    | 2626/5001 [2:01:43<1:43:35,  2.62s/it][A
Train Diffusion:  53%|█████▎    | 2627/5001 [2:01:45<1:43:23,  2.61s/it][A
Train Diffusion:  53%|█████▎    | 2628/5001 [2:01:48<1:46:21,  2.69s/it][A
Train Diffusion:  53%|█████▎    | 2629/5001 [2:01:51<1:45:08,  2.66s/it][A
Train Diffusion:  53%|█████▎    | 2630/5001 [2:01:53<1:44:35,  2.65s/it][A
Train Diffusion:  53%|█████▎    | 2631/5001 [2:01:56<1:44:01,  2.63s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 339297512.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6253, 0.5119, 1.3263],
        [8.6467, 0.5248, 1.3197],
        [8.7443, 0.5007, 1.2876]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.6840,  0.9550,  0.8640],
         [ 0.6197,  0.7470,  1.8811],
         [27.2118,  0.2377,  1.2012],
         ...,
         [ 0.8067,  0.9103,  7.6005],
         [ 0.0887,  0.9134,  2.8470],
         [ 0.1912,  1.5367,  6.6549]],

        [[ 1.6019,  0.8091,  4.2818],
         [ 1.1463,  0.6163,  1.0010],
         [ 0.6541,  0.7414,  1.2357],
         ...,
         [24.5650,  1.1781,  1.1638],
         [ 3.1962,  0.5036,  0.3123],
         [ 4.5592,  0.4020,  0.8662]],

        [[ 0.6155,  0.9522,  0.8551],
         [12.1406,  0.6813,  0.9383],
         [ 1.9754,  0.4131,  0.7442],
         ...,
         [ 1.0968,  1.2605,  1.6118],
         [ 0.3541,  0.4089,  2.1009],
         [ 1.7315,  1.9406,  2.7230


Train Diffusion:  53%|█████▎    | 2632/5001 [2:01:58<1:43:23,  2.62s/it][A
Train Diffusion:  53%|█████▎    | 2633/5001 [2:02:01<1:43:04,  2.61s/it][A
Train Diffusion:  53%|█████▎    | 2634/5001 [2:02:04<1:42:58,  2.61s/it][A
Train Diffusion:  53%|█████▎    | 2635/5001 [2:02:06<1:43:24,  2.62s/it][A
Train Diffusion:  53%|█████▎    | 2636/5001 [2:02:09<1:43:06,  2.62s/it][A
Train Diffusion:  53%|█████▎    | 2637/5001 [2:02:11<1:42:45,  2.61s/it][A
Train Diffusion:  53%|█████▎    | 2638/5001 [2:02:14<1:43:02,  2.62s/it][A
Train Diffusion:  53%|█████▎    | 2639/5001 [2:02:17<1:42:49,  2.61s/it][A
Train Diffusion:  53%|█████▎    | 2640/5001 [2:02:19<1:42:49,  2.61s/it][A
Train Diffusion:  53%|█████▎    | 2641/5001 [2:02:22<1:42:29,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 333956636.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.4636, 0.5114, 1.3220],
        [8.8458, 0.5117, 1.2978],
        [8.7035, 0.5159, 1.3097]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.9646,  0.9496,  0.7921],
         [ 0.8796,  0.5665,  0.8008],
         [ 2.6532,  0.6544,  1.3416],
         ...,
         [ 0.3014,  0.6081,  0.9906],
         [35.8117,  0.0579,  1.4434],
         [ 6.4765,  0.2399,  0.7788]],

        [[ 1.4676,  0.8395,  5.2474],
         [ 0.8364,  0.0436,  0.8928],
         [ 2.6371,  0.3765,  0.8038],
         ...,
         [32.7369,  0.0954,  1.2354],
         [ 5.4490,  0.1165,  0.7739],
         [ 5.4752,  1.3989,  1.6227]],

        [[ 0.4771,  0.9521,  0.6566],
         [ 1.4176,  1.7034,  0.2699],
         [13.2338,  0.3973,  0.9854],
         ...,
         [ 2.6664,  0.2337,  0.9246],
         [ 0.4516,  0.6118,  0.4264],
         [11.5057,  2.2231,  1.1800


Train Diffusion:  53%|█████▎    | 2642/5001 [2:02:25<1:42:35,  2.61s/it][A
Train Diffusion:  53%|█████▎    | 2643/5001 [2:02:27<1:42:16,  2.60s/it][A
Train Diffusion:  53%|█████▎    | 2644/5001 [2:02:30<1:42:06,  2.60s/it][A
Train Diffusion:  53%|█████▎    | 2645/5001 [2:02:32<1:42:15,  2.60s/it][A
Train Diffusion:  53%|█████▎    | 2646/5001 [2:02:35<1:42:10,  2.60s/it][A
Train Diffusion:  53%|█████▎    | 2647/5001 [2:02:38<1:42:19,  2.61s/it][A
Train Diffusion:  53%|█████▎    | 2648/5001 [2:02:40<1:42:04,  2.60s/it][A
Train Diffusion:  53%|█████▎    | 2649/5001 [2:02:43<1:42:04,  2.60s/it][A
Train Diffusion:  53%|█████▎    | 2650/5001 [2:02:45<1:42:09,  2.61s/it][A
Train Diffusion:  53%|█████▎    | 2651/5001 [2:02:48<1:41:54,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 344105526.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7886, 0.4760, 1.3040],
        [8.7380, 0.5035, 1.2879],
        [8.7805, 0.5085, 1.3003]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[6.5050e-01, 9.5458e-01, 9.2219e-01],
         [1.2203e+01, 5.9786e-01, 1.0392e+00],
         [2.0554e+00, 4.3527e-01, 7.2454e-01],
         ...,
         [3.8811e+00, 1.8950e-01, 8.7742e-01],
         [2.2543e-03, 6.3193e-01, 2.1545e+00],
         [1.4511e+01, 2.3775e+00, 3.1788e+00]],

        [[6.4382e-01, 9.5428e-01, 8.9545e-01],
         [5.9140e-01, 7.7232e-01, 1.8658e+00],
         [2.7853e+01, 1.7670e-01, 1.2027e+00],
         ...,
         [2.6353e+01, 4.8704e-01, 8.6306e-01],
         [5.1775e+00, 1.3004e-02, 1.2825e-01],
         [4.0558e+00, 2.5810e-01, 7.7228e-01]],

        [[1.6080e+00, 8.0787e-01, 3.6518e+00],
         [1.2230e+00, 5.6299e-01, 8.7957e-01],
         [7.2224e-01, 7.1398e-01, 1.2


Train Diffusion:  53%|█████▎    | 2652/5001 [2:02:51<1:41:49,  2.60s/it][A
Train Diffusion:  53%|█████▎    | 2653/5001 [2:02:53<1:41:45,  2.60s/it][A
Train Diffusion:  53%|█████▎    | 2654/5001 [2:02:56<1:41:48,  2.60s/it][A
Train Diffusion:  53%|█████▎    | 2655/5001 [2:02:58<1:41:48,  2.60s/it][A
Train Diffusion:  53%|█████▎    | 2656/5001 [2:03:01<1:41:39,  2.60s/it][A
Train Diffusion:  53%|█████▎    | 2657/5001 [2:03:04<1:41:39,  2.60s/it][A
Train Diffusion:  53%|█████▎    | 2658/5001 [2:03:06<1:41:39,  2.60s/it][A
Train Diffusion:  53%|█████▎    | 2659/5001 [2:03:09<1:41:31,  2.60s/it][A
Train Diffusion:  53%|█████▎    | 2660/5001 [2:03:11<1:41:22,  2.60s/it][A
Train Diffusion:  53%|█████▎    | 2661/5001 [2:03:14<1:41:23,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 336112201.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6938, 0.5035, 1.3190],
        [8.6647, 0.5216, 1.2538],
        [8.9272, 0.5020, 1.3049]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.8053,  0.9619,  1.1801],
         [12.2491,  0.3442,  1.2773],
         [ 2.8440,  0.3268,  1.2401],
         ...,
         [ 2.4818,  0.2302,  1.0813],
         [36.3948,  0.4386,  0.2652],
         [ 2.9726,  3.6635,  2.6065]],

        [[ 1.5649,  0.8117,  0.6799],
         [ 1.8125,  0.3488,  0.6686],
         [ 0.0642,  0.7611,  1.3384],
         ...,
         [30.1082,  0.3434,  0.5005],
         [ 4.4841,  0.1134,  0.9379],
         [ 0.6555,  5.9722,  1.6100]],

        [[ 0.5358,  0.9520,  0.9167],
         [ 0.6058,  0.7812,  1.4519],
         [20.9090,  0.2187,  1.1275],
         ...,
         [ 0.6265,  0.2494,  2.3401],
         [ 0.2484,  0.6334,  0.4256],
         [ 7.4644,  2.3534,  1.1320


Train Diffusion:  53%|█████▎    | 2662/5001 [2:03:17<1:41:25,  2.60s/it][A
Train Diffusion:  53%|█████▎    | 2663/5001 [2:03:19<1:41:52,  2.61s/it][A
Train Diffusion:  53%|█████▎    | 2664/5001 [2:03:22<1:41:50,  2.61s/it][A
Train Diffusion:  53%|█████▎    | 2665/5001 [2:03:24<1:41:28,  2.61s/it][A
Train Diffusion:  53%|█████▎    | 2666/5001 [2:03:27<1:41:47,  2.62s/it][A
Train Diffusion:  53%|█████▎    | 2667/5001 [2:03:30<1:41:42,  2.61s/it][A
Train Diffusion:  53%|█████▎    | 2668/5001 [2:03:32<1:41:43,  2.62s/it][A
Train Diffusion:  53%|█████▎    | 2669/5001 [2:03:35<1:42:26,  2.64s/it][A
Train Diffusion:  53%|█████▎    | 2670/5001 [2:03:38<1:42:32,  2.64s/it][A
Train Diffusion:  53%|█████▎    | 2671/5001 [2:03:40<1:41:57,  2.63s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 330026227.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.9953, 0.5033, 1.3044],
        [8.6405, 0.5041, 1.3427],
        [8.6674, 0.4999, 1.3009]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.8364,  0.9625,  1.2320],
         [11.3905,  0.3805,  1.2752],
         [ 2.6322,  0.3044,  0.7672],
         ...,
         [ 0.2600,  0.4800,  1.1949],
         [35.5735,  0.1064,  1.2776],
         [ 5.5689,  0.3443,  4.2839]],

        [[ 0.5198,  0.9525,  0.9412],
         [ 0.6582,  0.7542,  1.8804],
         [19.5273,  0.0752,  1.1857],
         ...,
         [20.9686,  0.2261,  0.9392],
         [ 4.2218,  0.1511,  0.8513],
         [ 0.7155,  1.6053,  0.9272]],

        [[ 1.5498,  0.8200,  0.6018],
         [ 1.7415,  0.3736,  0.6683],
         [ 0.2379,  0.8942,  0.7499],
         ...,
         [ 0.3024,  1.0471,  3.4046],
         [ 0.2518,  0.5810,  0.8061],
         [19.4140,  1.2691,  1.8950


Train Diffusion:  53%|█████▎    | 2672/5001 [2:03:43<1:41:40,  2.62s/it][A
Train Diffusion:  53%|█████▎    | 2673/5001 [2:03:45<1:41:20,  2.61s/it][A
Train Diffusion:  53%|█████▎    | 2674/5001 [2:03:48<1:41:34,  2.62s/it][A
Train Diffusion:  53%|█████▎    | 2675/5001 [2:03:51<1:45:46,  2.73s/it][A
Train Diffusion:  54%|█████▎    | 2676/5001 [2:03:54<1:44:39,  2.70s/it][A
Train Diffusion:  54%|█████▎    | 2677/5001 [2:03:56<1:43:20,  2.67s/it][A
Train Diffusion:  54%|█████▎    | 2678/5001 [2:03:59<1:42:30,  2.65s/it][A
Train Diffusion:  54%|█████▎    | 2679/5001 [2:04:01<1:42:07,  2.64s/it][A
Train Diffusion:  54%|█████▎    | 2680/5001 [2:04:04<1:41:36,  2.63s/it][A
Train Diffusion:  54%|█████▎    | 2681/5001 [2:04:07<1:41:09,  2.62s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 333620524.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6389, 0.5033, 1.3139],
        [8.7165, 0.4958, 1.2783],
        [8.8344, 0.4824, 1.2886]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.2638e-01, 9.5150e-01, 6.8227e-01],
         [1.0145e+01, 1.1999e+00, 2.2209e+00],
         [1.0234e+00, 7.5794e-01, 1.1880e+00],
         ...,
         [2.2882e-01, 9.7955e+00, 1.1363e+00],
         [2.8755e-01, 1.1890e+00, 8.5950e-01],
         [6.0802e-01, 1.5242e+00, 1.8594e+00]],

        [[1.5551e+00, 8.2270e-01, 5.5126e+00],
         [9.3578e-01, 3.8452e-01, 1.1312e+00],
         [8.6091e-01, 7.8927e-01, 8.0347e-01],
         ...,
         [2.0323e+01, 1.6714e-01, 7.9524e-01],
         [5.5330e+00, 2.2488e-02, 6.3029e+00],
         [2.0236e+00, 9.9709e-01, 1.4394e+00]],

        [[8.2327e-01, 9.6131e-01, 7.9344e-01],
         [7.3140e-01, 6.4703e-01, 1.4549e+00],
         [2.5063e+01, 2.8526e-01, 1.1


Train Diffusion:  54%|█████▎    | 2682/5001 [2:04:09<1:41:33,  2.63s/it][A
Train Diffusion:  54%|█████▎    | 2683/5001 [2:04:12<1:42:02,  2.64s/it][A
Train Diffusion:  54%|█████▎    | 2684/5001 [2:04:15<1:42:03,  2.64s/it][A
Train Diffusion:  54%|█████▎    | 2685/5001 [2:04:17<1:41:27,  2.63s/it][A
Train Diffusion:  54%|█████▎    | 2686/5001 [2:04:20<1:41:15,  2.62s/it][A
Train Diffusion:  54%|█████▎    | 2687/5001 [2:04:22<1:41:17,  2.63s/it][A
Train Diffusion:  54%|█████▎    | 2688/5001 [2:04:25<1:41:15,  2.63s/it][A
Train Diffusion:  54%|█████▍    | 2689/5001 [2:04:28<1:40:55,  2.62s/it][A
Train Diffusion:  54%|█████▍    | 2690/5001 [2:04:30<1:40:41,  2.61s/it][A
Train Diffusion:  54%|█████▍    | 2691/5001 [2:04:33<1:40:39,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 334181132.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7895, 0.4805, 1.3227],
        [8.7527, 0.4731, 1.2973],
        [8.6964, 0.5216, 1.3118]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.4454,  0.9541,  0.9716],
         [ 5.6899,  0.6924,  0.8317],
         [ 1.4196,  0.4701,  0.7566],
         ...,
         [ 0.2686,  8.3699,  1.3193],
         [21.7885,  0.1182,  1.1159],
         [ 5.0476,  0.4959,  1.0466]],

        [[ 1.2906,  0.8849,  0.7587],
         [ 1.4725,  0.3879,  1.8848],
         [28.2095,  0.2268,  1.1851],
         ...,
         [ 0.4773,  1.4571,  2.2371],
         [ 0.9826,  0.2752,  2.3316],
         [12.8418,  1.9071,  1.9580]],

        [[ 1.1747,  0.9088,  3.1663],
         [ 2.5471,  0.8594,  1.5782],
         [ 0.9067,  0.7144,  1.3123],
         ...,
         [ 0.2110,  1.3172,  9.7267],
         [ 0.3673,  1.6870,  1.4997],
         [ 0.3219,  1.5875,  7.3489


Train Diffusion:  54%|█████▍    | 2692/5001 [2:04:36<1:40:18,  2.61s/it][A
Train Diffusion:  54%|█████▍    | 2693/5001 [2:04:38<1:40:01,  2.60s/it][A
Train Diffusion:  54%|█████▍    | 2694/5001 [2:04:41<1:40:03,  2.60s/it][A
Train Diffusion:  54%|█████▍    | 2695/5001 [2:04:43<1:39:59,  2.60s/it][A
Train Diffusion:  54%|█████▍    | 2696/5001 [2:04:46<1:40:02,  2.60s/it][A
Train Diffusion:  54%|█████▍    | 2697/5001 [2:04:49<1:40:07,  2.61s/it][A
Train Diffusion:  54%|█████▍    | 2698/5001 [2:04:51<1:40:01,  2.61s/it][A
Train Diffusion:  54%|█████▍    | 2699/5001 [2:04:54<1:39:57,  2.61s/it][A
Train Diffusion:  54%|█████▍    | 2700/5001 [2:04:56<1:39:49,  2.60s/it][A
Train Diffusion:  54%|█████▍    | 2701/5001 [2:04:59<1:39:45,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 340165078.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.9204, 0.4776, 1.2931],
        [8.5936, 0.5220, 1.2881],
        [8.6668, 0.5166, 1.2652]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.7159,  0.9564,  0.8421],
         [ 0.6564,  0.6992,  1.8459],
         [26.4054,  0.2475,  1.1997],
         ...,
         [ 1.0065,  0.5720,  1.2866],
         [ 0.3492,  0.6691,  0.5923],
         [18.4022,  1.6299,  1.9554]],

        [[ 0.5886,  0.9502,  0.9041],
         [12.2142,  0.6117,  1.0993],
         [ 1.9248,  0.4651,  0.8221],
         ...,
         [ 2.1995,  0.2920,  1.0153],
         [39.6778,  0.0676,  1.3981],
         [ 6.4626,  0.1953,  1.5031]],

        [[ 1.5974,  0.8164,  4.2783],
         [ 0.9990,  0.7379,  1.0048],
         [ 0.6934,  0.8411,  0.9676],
         ...,
         [33.8900,  0.1381,  1.1672],
         [ 5.2652,  0.1039,  0.5546],
         [ 0.1731,  2.1692,  1.1445


Train Diffusion:  54%|█████▍    | 2702/5001 [2:05:02<1:39:48,  2.60s/it][A
Train Diffusion:  54%|█████▍    | 2703/5001 [2:05:04<1:39:53,  2.61s/it][A
Train Diffusion:  54%|█████▍    | 2704/5001 [2:05:07<1:40:06,  2.62s/it][A
Train Diffusion:  54%|█████▍    | 2705/5001 [2:05:09<1:39:52,  2.61s/it][A
Train Diffusion:  54%|█████▍    | 2706/5001 [2:05:12<1:39:48,  2.61s/it][A
Train Diffusion:  54%|█████▍    | 2707/5001 [2:05:15<1:39:42,  2.61s/it][A
Train Diffusion:  54%|█████▍    | 2708/5001 [2:05:17<1:39:34,  2.61s/it][A
Train Diffusion:  54%|█████▍    | 2709/5001 [2:05:20<1:39:45,  2.61s/it][A
Train Diffusion:  54%|█████▍    | 2710/5001 [2:05:22<1:39:53,  2.62s/it][A
Train Diffusion:  54%|█████▍    | 2711/5001 [2:05:25<1:39:54,  2.62s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 359010963.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.9875, 0.5195, 1.3009],
        [8.5939, 0.5075, 1.2971],
        [8.6830, 0.5370, 1.2979]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.3709,  0.8616,  0.7712],
         [ 1.6238,  0.3491,  1.3272],
         [22.8580,  0.3000,  1.1829],
         ...,
         [31.8373,  0.0411,  1.2488],
         [ 6.1060,  0.0678,  0.8119],
         [28.4763,  1.0355,  2.0201]],

        [[ 1.0885,  0.9275,  2.0834],
         [ 3.6614,  0.6040,  1.4310],
         [ 1.3610,  0.7580,  0.7200],
         ...,
         [ 0.3724,  0.7160,  0.7005],
         [13.8640,  0.3064,  0.8704],
         [ 3.8508,  0.6605,  0.9173]],

        [[ 0.4525,  0.9547,  0.8687],
         [ 6.7129,  0.9013,  2.1767],
         [ 0.8422,  0.8198,  1.1375],
         ...,
         [ 3.3020,  0.1467,  0.8177],
         [16.5771,  0.2840,  1.0350],
         [ 3.3716,  0.8535,  1.2330


Train Diffusion:  54%|█████▍    | 2712/5001 [2:05:28<1:40:25,  2.63s/it][A
Train Diffusion:  54%|█████▍    | 2713/5001 [2:05:30<1:40:10,  2.63s/it][A
Train Diffusion:  54%|█████▍    | 2714/5001 [2:05:33<1:40:02,  2.62s/it][A
Train Diffusion:  54%|█████▍    | 2715/5001 [2:05:36<1:39:38,  2.62s/it][A
Train Diffusion:  54%|█████▍    | 2716/5001 [2:05:38<1:39:29,  2.61s/it][A
Train Diffusion:  54%|█████▍    | 2717/5001 [2:05:41<1:40:40,  2.64s/it][A
Train Diffusion:  54%|█████▍    | 2718/5001 [2:05:44<1:40:13,  2.63s/it][A
Train Diffusion:  54%|█████▍    | 2719/5001 [2:05:46<1:39:45,  2.62s/it][A
Train Diffusion:  54%|█████▍    | 2720/5001 [2:05:49<1:39:45,  2.62s/it][A
Train Diffusion:  54%|█████▍    | 2721/5001 [2:05:51<1:39:44,  2.62s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 341891497.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8061, 0.4773, 1.2937],
        [8.6562, 0.5037, 1.3019],
        [8.9051, 0.4891, 1.2670]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.1541e+00, 9.1480e-01, 9.2435e-01],
         [1.2730e+00, 4.4224e-01, 2.0154e-01],
         [2.0037e-05, 2.4127e+00, 2.0678e-01],
         ...,
         [3.2957e+01, 1.9935e-01, 1.0041e+00],
         [4.4355e+00, 1.0843e-01, 1.4158e+00],
         [3.0906e+00, 4.4859e-01, 1.1555e+00]],

        [[4.4546e-01, 9.5607e-01, 1.0170e+00],
         [8.8418e+00, 5.3463e-01, 1.6499e+00],
         [1.7919e+00, 5.1894e-01, 5.6935e-01],
         ...,
         [4.8323e-01, 2.0503e-01, 8.4226e-01],
         [4.7870e-01, 1.4401e+00, 7.9200e-01],
         [2.3413e+01, 1.3472e+00, 2.0126e+00]],

        [[1.3128e+00, 8.8003e-01, 4.0275e+00],
         [1.3611e+00, 1.0911e+00, 1.3039e+00],
         [9.8105e-01, 1.1124e+00, 6.9


Train Diffusion:  54%|█████▍    | 2722/5001 [2:05:54<1:42:36,  2.70s/it][A
Train Diffusion:  54%|█████▍    | 2723/5001 [2:05:57<1:41:29,  2.67s/it][A
Train Diffusion:  54%|█████▍    | 2724/5001 [2:05:59<1:40:36,  2.65s/it][A
Train Diffusion:  54%|█████▍    | 2725/5001 [2:06:02<1:39:58,  2.64s/it][A
Train Diffusion:  55%|█████▍    | 2726/5001 [2:06:05<1:39:45,  2.63s/it][A
Train Diffusion:  55%|█████▍    | 2727/5001 [2:06:07<1:39:14,  2.62s/it][A
Train Diffusion:  55%|█████▍    | 2728/5001 [2:06:10<1:39:07,  2.62s/it][A
Train Diffusion:  55%|█████▍    | 2729/5001 [2:06:12<1:38:50,  2.61s/it][A
Train Diffusion:  55%|█████▍    | 2730/5001 [2:06:15<1:38:33,  2.60s/it][A
Train Diffusion:  55%|█████▍    | 2731/5001 [2:06:18<1:38:35,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 331635484.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8011, 0.4779, 1.3181],
        [8.7764, 0.4936, 1.3087],
        [8.5934, 0.5042, 1.2706]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.0071,  0.9409,  1.6220],
         [ 5.0382,  0.6187,  1.3746],
         [ 1.4355,  0.7617,  0.6674],
         ...,
         [ 0.3513,  0.5536,  1.0667],
         [39.9039,  0.1015,  1.2837],
         [ 5.9894,  0.3236,  0.8588]],

        [[ 0.4661,  0.9519,  0.9456],
         [ 5.3613,  0.6836,  2.0868],
         [ 1.2043,  0.3109,  0.5767],
         ...,
         [ 2.2795,  0.2373,  0.7595],
         [ 1.3212,  0.3247,  0.7919],
         [24.9450,  1.2314,  1.9871]],

        [[ 1.4341,  0.8441,  0.5967],
         [ 1.7295,  0.3409,  1.1802],
         [ 8.3904,  0.4252,  1.4955],
         ...,
         [27.7449,  0.1669,  1.1678],
         [ 4.3957,  0.1658,  0.9796],
         [ 0.6090,  1.8782,  0.9854


Train Diffusion:  55%|█████▍    | 2732/5001 [2:06:20<1:38:27,  2.60s/it][A
Train Diffusion:  55%|█████▍    | 2733/5001 [2:06:23<1:38:19,  2.60s/it][A
Train Diffusion:  55%|█████▍    | 2734/5001 [2:06:25<1:38:17,  2.60s/it][A
Train Diffusion:  55%|█████▍    | 2735/5001 [2:06:28<1:38:15,  2.60s/it][A
Train Diffusion:  55%|█████▍    | 2736/5001 [2:06:31<1:38:26,  2.61s/it][A
Train Diffusion:  55%|█████▍    | 2737/5001 [2:06:33<1:38:27,  2.61s/it][A
Train Diffusion:  55%|█████▍    | 2738/5001 [2:06:36<1:38:18,  2.61s/it][A
Train Diffusion:  55%|█████▍    | 2739/5001 [2:06:38<1:38:04,  2.60s/it][A
Train Diffusion:  55%|█████▍    | 2740/5001 [2:06:41<1:38:10,  2.61s/it][A
Train Diffusion:  55%|█████▍    | 2741/5001 [2:06:44<1:38:02,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 332066304.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8327, 0.5015, 1.3111],
        [8.5948, 0.4973, 1.2780],
        [8.6012, 0.5031, 1.3076]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.6227,  0.9521,  0.9111],
         [ 0.5766,  0.7890,  1.8285],
         [28.2647,  0.1003,  1.2039],
         ...,
         [ 5.3319,  0.5324,  1.1320],
         [ 1.8372,  0.1897,  7.8959],
         [ 0.5276,  2.3134,  1.9031]],

        [[ 1.6034,  0.8061,  3.2076],
         [ 1.2795,  0.5638,  0.8426],
         [ 0.7133,  0.6928,  1.1125],
         ...,
         [ 4.5084,  0.0978,  0.9716],
         [28.6366,  0.6196,  1.9738],
         [ 3.6353,  3.9069,  1.8033]],

        [[ 0.6752,  0.9544,  0.9612],
         [12.4155,  0.5379,  1.0968],
         [ 2.1795,  0.4404,  0.6898],
         ...,
         [22.0190,  1.2753,  0.9470],
         [ 1.9031,  1.4469,  1.0108],
         [ 0.3304,  1.3405,  7.9028


Train Diffusion:  55%|█████▍    | 2742/5001 [2:06:46<1:37:50,  2.60s/it][A
Train Diffusion:  55%|█████▍    | 2743/5001 [2:06:49<1:37:54,  2.60s/it][A
Train Diffusion:  55%|█████▍    | 2744/5001 [2:06:51<1:37:50,  2.60s/it][A
Train Diffusion:  55%|█████▍    | 2745/5001 [2:06:54<1:37:54,  2.60s/it][A
Train Diffusion:  55%|█████▍    | 2746/5001 [2:06:57<1:39:30,  2.65s/it][A
Train Diffusion:  55%|█████▍    | 2747/5001 [2:06:59<1:38:50,  2.63s/it][A
Train Diffusion:  55%|█████▍    | 2748/5001 [2:07:02<1:38:32,  2.62s/it][A
Train Diffusion:  55%|█████▍    | 2749/5001 [2:07:05<1:38:11,  2.62s/it][A
Train Diffusion:  55%|█████▍    | 2750/5001 [2:07:07<1:37:50,  2.61s/it][A
Train Diffusion:  55%|█████▌    | 2751/5001 [2:07:10<1:37:44,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 317765105.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8615, 0.5118, 1.2618],
        [8.7244, 0.4945, 1.3127],
        [8.6746, 0.4940, 1.3009]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.6010e+00, 8.0691e-01, 2.5550e+00],
         [1.3435e+00, 5.4336e-01, 7.9028e-01],
         [7.2335e-01, 7.0297e-01, 1.4322e+00],
         ...,
         [2.9876e-01, 8.1147e-02, 1.9697e+00],
         [2.0601e-01, 3.6303e+00, 7.6413e-01],
         [2.1473e+01, 1.6236e+00, 1.8785e+00]],

        [[7.0447e-01, 9.5702e-01, 1.0286e+00],
         [1.2514e+01, 4.8453e-01, 1.1847e+00],
         [2.2748e+00, 4.3695e-01, 6.6932e-01],
         ...,
         [4.1794e+00, 1.3296e-01, 8.0059e-01],
         [6.9363e+00, 7.5687e-01, 9.7196e-01],
         [7.6854e-01, 2.2697e+00, 1.2746e+00]],

        [[5.9770e-01, 9.5201e-01, 9.3181e-01],
         [5.6319e-01, 8.0456e-01, 1.7777e+00],
         [2.7478e+01, 2.1062e-01, 1.1


Train Diffusion:  55%|█████▌    | 2752/5001 [2:07:12<1:37:33,  2.60s/it][A
Train Diffusion:  55%|█████▌    | 2753/5001 [2:07:15<1:37:23,  2.60s/it][A
Train Diffusion:  55%|█████▌    | 2754/5001 [2:07:18<1:37:17,  2.60s/it][A
Train Diffusion:  55%|█████▌    | 2755/5001 [2:07:20<1:37:20,  2.60s/it][A
Train Diffusion:  55%|█████▌    | 2756/5001 [2:07:23<1:37:20,  2.60s/it][A
Train Diffusion:  55%|█████▌    | 2757/5001 [2:07:25<1:37:16,  2.60s/it][A
Train Diffusion:  55%|█████▌    | 2758/5001 [2:07:28<1:37:33,  2.61s/it][A
Train Diffusion:  55%|█████▌    | 2759/5001 [2:07:31<1:37:41,  2.61s/it][A
Train Diffusion:  55%|█████▌    | 2760/5001 [2:07:33<1:37:38,  2.61s/it][A
Train Diffusion:  55%|█████▌    | 2761/5001 [2:07:36<1:37:36,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 331140790.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.9391, 0.5031, 1.3207],
        [8.5907, 0.5023, 1.2980],
        [8.6422, 0.5081, 1.3164]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.5565e+00, 8.1549e-01, 6.0764e-01],
         [1.8078e+00, 3.5430e-01, 6.5132e-01],
         [1.3824e-04, 9.5801e-01, 1.1022e+00],
         ...,
         [2.3208e+01, 3.7914e-01, 3.6555e-01],
         [5.1143e+00, 8.1551e-02, 6.5814e-01],
         [6.1143e-01, 1.8407e+00, 1.1919e+00]],

        [[5.2788e-01, 9.5422e-01, 9.2539e-01],
         [7.8228e-01, 7.5582e-01, 6.5217e-01],
         [4.5472e+00, 1.2468e-01, 6.2264e-01],
         ...,
         [4.3994e+00, 1.1382e-01, 7.1097e-01],
         [2.0349e-01, 4.2636e+00, 9.9043e-01],
         [1.9661e+01, 1.4720e+00, 2.0201e+00]],

        [[8.2153e-01, 9.6387e-01, 1.1921e+00],
         [1.1245e+01, 3.8130e-01, 1.2719e+00],
         [2.5919e+00, 3.5255e-01, 1.3


Train Diffusion:  55%|█████▌    | 2762/5001 [2:07:39<1:37:33,  2.61s/it][A
Train Diffusion:  55%|█████▌    | 2763/5001 [2:07:41<1:37:31,  2.61s/it][A
Train Diffusion:  55%|█████▌    | 2764/5001 [2:07:44<1:38:35,  2.64s/it][A
Train Diffusion:  55%|█████▌    | 2765/5001 [2:07:46<1:38:11,  2.63s/it][A
Train Diffusion:  55%|█████▌    | 2766/5001 [2:07:49<1:37:40,  2.62s/it][A
Train Diffusion:  55%|█████▌    | 2767/5001 [2:07:52<1:37:28,  2.62s/it][A
Train Diffusion:  55%|█████▌    | 2768/5001 [2:07:54<1:37:15,  2.61s/it][A
Train Diffusion:  55%|█████▌    | 2769/5001 [2:07:57<1:42:29,  2.75s/it][A
Train Diffusion:  55%|█████▌    | 2770/5001 [2:08:00<1:40:43,  2.71s/it][A
Train Diffusion:  55%|█████▌    | 2771/5001 [2:08:03<1:39:31,  2.68s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 332859612.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7554, 0.5110, 1.3231],
        [8.6449, 0.4906, 1.3105],
        [8.8850, 0.4900, 1.2898]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.7622,  0.9626,  1.1531],
         [13.1826,  0.3679,  1.2723],
         [ 2.7839,  0.3391,  0.9532],
         ...,
         [30.8337,  0.1225,  1.2082],
         [ 5.3976,  0.1074,  0.7500],
         [ 0.4317,  6.6417,  0.9602]],

        [[ 1.5866,  0.8070,  1.1141],
         [ 1.6919,  0.3779,  0.7126],
         [ 0.5228,  0.7777,  1.0246],
         ...,
         [ 0.2099, 14.1033,  0.7779],
         [26.2486,  0.3844,  0.4490],
         [ 3.2629,  2.2348, 10.8201]],

        [[ 0.5576,  0.9527,  0.9364],
         [ 0.5204,  0.8245,  1.5833],
         [25.7915,  0.0795,  1.1731],
         ...,
         [ 4.2422,  0.1492,  0.6820],
         [ 0.5717,  1.5623,  1.3266],
         [ 0.1114,  3.8916,  1.1532


Train Diffusion:  55%|█████▌    | 2772/5001 [2:08:05<1:38:41,  2.66s/it][A
Train Diffusion:  55%|█████▌    | 2773/5001 [2:08:08<1:37:48,  2.63s/it][A
Train Diffusion:  55%|█████▌    | 2774/5001 [2:08:10<1:37:26,  2.63s/it][A
Train Diffusion:  55%|█████▌    | 2775/5001 [2:08:13<1:37:09,  2.62s/it][A
Train Diffusion:  56%|█████▌    | 2776/5001 [2:08:16<1:36:48,  2.61s/it][A
Train Diffusion:  56%|█████▌    | 2777/5001 [2:08:18<1:36:28,  2.60s/it][A
Train Diffusion:  56%|█████▌    | 2778/5001 [2:08:21<1:36:40,  2.61s/it][A
Train Diffusion:  56%|█████▌    | 2779/5001 [2:08:23<1:36:25,  2.60s/it][A
Train Diffusion:  56%|█████▌    | 2780/5001 [2:08:26<1:36:25,  2.60s/it][A
Train Diffusion:  56%|█████▌    | 2781/5001 [2:08:29<1:36:16,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 326497180.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7477, 0.4817, 1.3084],
        [8.7189, 0.4923, 1.2976],
        [8.5852, 0.4993, 1.3275]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[7.6300e-01, 9.6195e-01, 8.1370e-01],
         [7.0357e-01, 6.6785e-01, 1.9061e+00],
         [2.7158e+01, 2.5211e-01, 1.2005e+00],
         ...,
         [1.3310e-02, 5.6369e-01, 4.0195e+00],
         [2.2084e-01, 4.8804e-01, 1.6053e+00],
         [1.8938e-03, 1.7484e+00, 3.2337e+00]],

        [[5.5795e-01, 9.5228e-01, 8.3214e-01],
         [1.1606e+01, 7.3737e-01, 8.5321e-01],
         [1.7870e+00, 4.3487e-01, 7.3176e-01],
         ...,
         [3.8085e-01, 6.2091e-01, 6.9819e-01],
         [3.0583e+01, 1.5779e-01, 1.0559e+00],
         [5.6761e+00, 1.7739e-01, 3.6171e+00]],

        [[1.5833e+00, 8.2228e-01, 5.0554e+00],
         [9.0151e-01, 7.1434e-01, 1.1711e+00],
         [6.8311e-01, 7.2907e-01, 1.2


Train Diffusion:  56%|█████▌    | 2782/5001 [2:08:31<1:36:20,  2.61s/it][A
Train Diffusion:  56%|█████▌    | 2783/5001 [2:08:34<1:36:48,  2.62s/it][A
Train Diffusion:  56%|█████▌    | 2784/5001 [2:08:36<1:36:29,  2.61s/it][A
Train Diffusion:  56%|█████▌    | 2785/5001 [2:08:39<1:36:25,  2.61s/it][A
Train Diffusion:  56%|█████▌    | 2786/5001 [2:08:42<1:36:18,  2.61s/it][A
Train Diffusion:  56%|█████▌    | 2787/5001 [2:08:44<1:36:26,  2.61s/it][A
Train Diffusion:  56%|█████▌    | 2788/5001 [2:08:47<1:36:21,  2.61s/it][A
Train Diffusion:  56%|█████▌    | 2789/5001 [2:08:49<1:36:15,  2.61s/it][A
Train Diffusion:  56%|█████▌    | 2790/5001 [2:08:52<1:36:03,  2.61s/it][A
Train Diffusion:  56%|█████▌    | 2791/5001 [2:08:55<1:35:57,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 330361612.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7847, 0.5123, 1.3352],
        [8.6187, 0.5100, 1.2925],
        [8.7755, 0.4989, 1.2767]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[6.1724e-01, 9.5284e-01, 9.0528e-01],
         [1.1533e+01, 6.1565e-01, 9.6527e-01],
         [1.8962e+00, 5.0065e-01, 1.2890e+00],
         ...,
         [2.0963e+00, 5.5334e+00, 9.1420e-01],
         [1.4830e+00, 1.0211e+00, 1.2472e+00],
         [1.0398e+00, 1.9576e+00, 8.1185e-01]],

        [[6.8103e-01, 9.5549e-01, 8.6421e-01],
         [6.2645e-01, 7.2091e-01, 1.8687e+00],
         [2.8514e+01, 1.7865e-01, 1.1924e+00],
         ...,
         [6.5621e-06, 5.0488e-01, 3.7462e+00],
         [1.5957e-01, 4.9989e-01, 8.0706e-01],
         [2.5805e+01, 1.1130e+00, 2.0069e+00]],

        [[1.6040e+00, 8.1333e-01, 4.0763e+00],
         [1.0606e+00, 5.8506e-01, 8.8504e-01],
         [8.1831e-01, 6.5752e-01, 7.8


Train Diffusion:  56%|█████▌    | 2792/5001 [2:08:57<1:36:09,  2.61s/it][A
Train Diffusion:  56%|█████▌    | 2793/5001 [2:09:00<1:35:54,  2.61s/it][A
Train Diffusion:  56%|█████▌    | 2794/5001 [2:09:03<1:36:03,  2.61s/it][A
Train Diffusion:  56%|█████▌    | 2795/5001 [2:09:05<1:35:55,  2.61s/it][A
Train Diffusion:  56%|█████▌    | 2796/5001 [2:09:08<1:35:47,  2.61s/it][A
Train Diffusion:  56%|█████▌    | 2797/5001 [2:09:10<1:35:39,  2.60s/it][A
Train Diffusion:  56%|█████▌    | 2798/5001 [2:09:13<1:35:33,  2.60s/it][A
Train Diffusion:  56%|█████▌    | 2799/5001 [2:09:16<1:35:43,  2.61s/it][A
Train Diffusion:  56%|█████▌    | 2800/5001 [2:09:18<1:35:39,  2.61s/it][A
Train Diffusion:  56%|█████▌    | 2801/5001 [2:09:21<1:35:58,  2.62s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 328412768.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8590, 0.5294, 1.2944],
        [8.7421, 0.4800, 1.2559],
        [8.7763, 0.5046, 1.2728]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.4088,  0.8511,  0.6747],
         [ 1.6904,  0.3424,  1.5555],
         [25.2466,  0.2905,  1.1812],
         ...,
         [21.3821,  0.4373,  1.0254],
         [ 3.0758,  0.4926,  0.7473],
         [ 0.7964,  1.5269,  8.1602]],

        [[ 0.4596,  0.9538,  0.9216],
         [ 6.4604,  0.7603,  1.7770],
         [ 1.0877,  0.7117,  1.0559],
         ...,
         [ 0.7471,  1.4433,  1.8544],
         [ 0.3048,  0.5011,  1.0699],
         [ 0.0929,  1.6160,  2.9533]],

        [[ 1.0424,  0.9371,  1.8281],
         [ 4.0479,  0.6821,  1.4619],
         [ 1.1736,  0.8534,  0.7732],
         ...,
         [ 0.5074,  1.9537,  8.8921],
         [12.4446,  0.0601,  1.2304],
         [ 6.0252,  0.3014,  0.9547


Train Diffusion:  56%|█████▌    | 2802/5001 [2:09:23<1:37:00,  2.65s/it][A
Train Diffusion:  56%|█████▌    | 2803/5001 [2:09:26<1:36:35,  2.64s/it][A
Train Diffusion:  56%|█████▌    | 2804/5001 [2:09:29<1:36:41,  2.64s/it][A
Train Diffusion:  56%|█████▌    | 2805/5001 [2:09:31<1:36:19,  2.63s/it][A
Train Diffusion:  56%|█████▌    | 2806/5001 [2:09:34<1:36:16,  2.63s/it][A
Train Diffusion:  56%|█████▌    | 2807/5001 [2:09:37<1:35:55,  2.62s/it][A
Train Diffusion:  56%|█████▌    | 2808/5001 [2:09:39<1:35:36,  2.62s/it][A
Train Diffusion:  56%|█████▌    | 2809/5001 [2:09:42<1:35:26,  2.61s/it][A
Train Diffusion:  56%|█████▌    | 2810/5001 [2:09:45<1:38:26,  2.70s/it][A
Train Diffusion:  56%|█████▌    | 2811/5001 [2:09:47<1:37:39,  2.68s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 348664611.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.9005, 0.5010, 1.2835],
        [8.8686, 0.5251, 1.3140],
        [8.5531, 0.5125, 1.2900]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.5481e+00, 8.1883e-01, 5.6159e-01],
         [1.7873e+00, 3.5941e-01, 5.3409e-01],
         [1.4957e-04, 1.2842e+00, 7.7828e-01],
         ...,
         [7.3014e-01, 4.9957e-01, 1.0193e+00],
         [3.6523e+01, 8.0041e-02, 1.3735e+00],
         [5.8333e+00, 3.2359e-01, 1.1041e+00]],

        [[8.3742e-01, 9.6145e-01, 1.2082e+00],
         [1.0823e+01, 3.8437e-01, 1.2639e+00],
         [2.5457e+00, 3.1688e-01, 1.4555e+00],
         ...,
         [3.5060e+01, 1.5149e-01, 1.1387e+00],
         [4.7123e+00, 2.2327e-01, 6.5918e-01],
         [6.2691e-01, 1.6402e+00, 9.9321e+00]],

        [[5.2016e-01, 9.5145e-01, 9.3012e-01],
         [8.2320e-01, 7.4129e-01, 8.3911e-01],
         [5.6263e+00, 3.2646e-01, 6.8


Train Diffusion:  56%|█████▌    | 2812/5001 [2:09:50<1:36:50,  2.65s/it][A
Train Diffusion:  56%|█████▌    | 2813/5001 [2:09:53<1:36:14,  2.64s/it][A
Train Diffusion:  56%|█████▋    | 2814/5001 [2:09:55<1:35:48,  2.63s/it][A
Train Diffusion:  56%|█████▋    | 2815/5001 [2:09:58<1:35:29,  2.62s/it][A
Train Diffusion:  56%|█████▋    | 2816/5001 [2:10:01<1:37:16,  2.67s/it][A
Train Diffusion:  56%|█████▋    | 2817/5001 [2:10:03<1:36:38,  2.66s/it][A
Train Diffusion:  56%|█████▋    | 2818/5001 [2:10:06<1:36:08,  2.64s/it][A
Train Diffusion:  56%|█████▋    | 2819/5001 [2:10:08<1:35:45,  2.63s/it][A
Train Diffusion:  56%|█████▋    | 2820/5001 [2:10:11<1:35:36,  2.63s/it][A
Train Diffusion:  56%|█████▋    | 2821/5001 [2:10:14<1:35:15,  2.62s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 324188435.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6930, 0.4980, 1.2484],
        [8.7824, 0.5120, 1.2952],
        [8.8503, 0.4767, 1.2797]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[6.2038e-01, 9.5382e-01, 9.1389e-01],
         [5.7585e-01, 7.6950e-01, 1.8107e+00],
         [2.6869e+01, 1.9720e-01, 1.2030e+00],
         ...,
         [4.3753e+00, 1.1050e-01, 1.0867e+00],
         [1.4694e-06, 5.3072e-01, 3.2205e+00],
         [3.2375e-01, 2.3937e+00, 5.8197e+00]],

        [[1.6056e+00, 8.0886e-01, 2.9278e+00],
         [1.2500e+00, 5.9055e-01, 8.1525e-01],
         [7.2628e-01, 8.1284e-01, 1.0280e+00],
         ...,
         [8.6104e-02, 2.9033e-02, 2.8839e+00],
         [2.0469e-01, 1.5152e+00, 5.5959e+00],
         [1.9085e-01, 4.0055e+00, 1.3884e+00]],

        [[6.7593e-01, 9.5619e-01, 9.9853e-01],
         [1.2473e+01, 4.9994e-01, 1.1573e+00],
         [2.1700e+00, 4.1758e-01, 8.1


Train Diffusion:  56%|█████▋    | 2822/5001 [2:10:16<1:34:56,  2.61s/it][A
Train Diffusion:  56%|█████▋    | 2823/5001 [2:10:19<1:34:37,  2.61s/it][A
Train Diffusion:  56%|█████▋    | 2824/5001 [2:10:21<1:34:30,  2.60s/it][A
Train Diffusion:  56%|█████▋    | 2825/5001 [2:10:24<1:34:17,  2.60s/it][A
Train Diffusion:  57%|█████▋    | 2826/5001 [2:10:27<1:34:18,  2.60s/it][A
Train Diffusion:  57%|█████▋    | 2827/5001 [2:10:29<1:34:21,  2.60s/it][A
Train Diffusion:  57%|█████▋    | 2828/5001 [2:10:32<1:34:21,  2.61s/it][A
Train Diffusion:  57%|█████▋    | 2829/5001 [2:10:34<1:34:16,  2.60s/it][A
Train Diffusion:  57%|█████▋    | 2830/5001 [2:10:37<1:34:09,  2.60s/it][A
Train Diffusion:  57%|█████▋    | 2831/5001 [2:10:40<1:34:13,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 339253929.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7178, 0.4854, 1.2943],
        [8.9031, 0.4808, 1.3012],
        [8.7387, 0.4983, 1.2930]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.1087e-01, 9.5365e-01, 1.0135e+00],
         [5.5812e-01, 6.3942e-01, 2.3210e-01],
         [1.4024e-04, 2.3411e+00, 2.7789e-01],
         ...,
         [1.6806e+00, 3.8740e-01, 1.1917e+00],
         [3.0982e-01, 5.2579e+00, 4.4217e+00],
         [1.2820e+00, 3.3695e+00, 2.0114e+00]],

        [[8.5848e-01, 9.6295e-01, 9.1016e-01],
         [6.4862e+00, 6.8485e-01, 1.3016e+00],
         [1.3216e+00, 6.3158e+00, 9.5408e-01],
         ...,
         [2.8981e-01, 5.9921e-01, 7.8539e-01],
         [1.4167e-06, 6.5237e-01, 2.5172e+00],
         [1.9069e-01, 1.7234e+00, 7.7247e-01]],

        [[1.5370e+00, 8.3037e-01, 5.5973e+00],
         [1.1085e+00, 1.0543e+00, 1.2337e+00],
         [1.6418e+00, 2.3129e+00, 4.6


Train Diffusion:  57%|█████▋    | 2832/5001 [2:10:42<1:34:10,  2.61s/it][A
Train Diffusion:  57%|█████▋    | 2833/5001 [2:10:45<1:33:56,  2.60s/it][A
Train Diffusion:  57%|█████▋    | 2834/5001 [2:10:47<1:34:20,  2.61s/it][A
Train Diffusion:  57%|█████▋    | 2835/5001 [2:10:50<1:34:23,  2.61s/it][A
Train Diffusion:  57%|█████▋    | 2836/5001 [2:10:53<1:34:34,  2.62s/it][A
Train Diffusion:  57%|█████▋    | 2837/5001 [2:10:55<1:34:14,  2.61s/it][A
Train Diffusion:  57%|█████▋    | 2838/5001 [2:10:58<1:34:25,  2.62s/it][A
Train Diffusion:  57%|█████▋    | 2839/5001 [2:11:01<1:34:08,  2.61s/it][A
Train Diffusion:  57%|█████▋    | 2840/5001 [2:11:03<1:34:05,  2.61s/it][A
Train Diffusion:  57%|█████▋    | 2841/5001 [2:11:06<1:33:58,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 331200044.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7212, 0.4895, 1.2919],
        [8.8993, 0.4931, 1.3268],
        [8.6859, 0.4762, 1.2705]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.5048,  0.9535,  0.7925],
         [10.3518,  0.8828,  1.4165],
         [ 1.3388,  0.6147,  1.0184],
         ...,
         [28.3809,  0.0704,  1.0306],
         [ 5.1811,  0.1313,  0.7249],
         [20.3009,  1.3392,  2.0344]],

        [[ 1.5230,  0.8287,  5.8214],
         [ 0.8187,  0.7977,  1.3331],
         [ 0.8068,  0.8425,  0.8989],
         ...,
         [ 0.4837,  1.4627,  1.1826],
         [ 0.4029,  0.5997,  0.5891],
         [ 0.5020,  2.3562,  1.2769]],

        [[ 0.8780,  0.9612,  0.7861],
         [ 0.8358,  0.5817,  1.6656],
         [26.7302,  0.2653,  1.1802],
         ...,
         [ 1.8913,  0.2459,  1.3115],
         [32.6664,  0.0463,  1.4918],
         [ 6.5344,  0.2080,  0.8140


Train Diffusion:  57%|█████▋    | 2842/5001 [2:11:08<1:33:46,  2.61s/it][A
Train Diffusion:  57%|█████▋    | 2843/5001 [2:11:11<1:33:41,  2.61s/it][A
Train Diffusion:  57%|█████▋    | 2844/5001 [2:11:14<1:33:27,  2.60s/it][A
Train Diffusion:  57%|█████▋    | 2845/5001 [2:11:16<1:33:29,  2.60s/it][A
Train Diffusion:  57%|█████▋    | 2846/5001 [2:11:19<1:33:32,  2.60s/it][A
Train Diffusion:  57%|█████▋    | 2847/5001 [2:11:21<1:33:33,  2.61s/it][A
Train Diffusion:  57%|█████▋    | 2848/5001 [2:11:24<1:33:33,  2.61s/it][A
Train Diffusion:  57%|█████▋    | 2849/5001 [2:11:27<1:33:42,  2.61s/it][A
Train Diffusion:  57%|█████▋    | 2850/5001 [2:11:29<1:33:55,  2.62s/it][A
Train Diffusion:  57%|█████▋    | 2851/5001 [2:11:32<1:34:00,  2.62s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 326989984.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7395, 0.4768, 1.3015],
        [8.7682, 0.4862, 1.3250],
        [8.6600, 0.5012, 1.3555]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.5385e+00, 8.2472e-01, 5.7509e-01],
         [1.7517e+00, 3.6896e-01, 5.0664e-01],
         [6.2799e-06, 1.4308e+00, 7.7799e-01],
         ...,
         [8.0998e-01, 5.2435e-01, 8.0436e-01],
         [3.2831e+01, 9.2457e-02, 1.2394e+00],
         [6.0399e+00, 2.3097e-01, 1.0830e+00]],

        [[8.5401e-01, 9.6252e-01, 1.2437e+00],
         [1.0276e+01, 4.6003e-01, 1.2736e+00],
         [2.2320e+00, 3.8498e-01, 1.3799e+00],
         ...,
         [1.6346e+00, 2.1481e+00, 1.6456e+00],
         [2.4027e-01, 7.4795e-01, 8.3448e-01],
         [2.1681e+01, 1.3694e+00, 2.0550e+00]],

        [[5.1398e-01, 9.5298e-01, 9.3849e-01],
         [8.2225e-01, 7.3078e-01, 7.1864e-01],
         [1.9545e+00, 5.8390e-02, 5.2


Train Diffusion:  57%|█████▋    | 2852/5001 [2:11:34<1:33:51,  2.62s/it][A
Train Diffusion:  57%|█████▋    | 2853/5001 [2:11:37<1:33:36,  2.61s/it][A
Train Diffusion:  57%|█████▋    | 2854/5001 [2:11:40<1:33:31,  2.61s/it][A
Train Diffusion:  57%|█████▋    | 2855/5001 [2:11:42<1:33:16,  2.61s/it][A
Train Diffusion:  57%|█████▋    | 2856/5001 [2:11:45<1:33:15,  2.61s/it][A
Train Diffusion:  57%|█████▋    | 2857/5001 [2:11:48<1:33:26,  2.62s/it][A
Train Diffusion:  57%|█████▋    | 2858/5001 [2:11:50<1:33:55,  2.63s/it][A
Train Diffusion:  57%|█████▋    | 2859/5001 [2:11:53<1:33:46,  2.63s/it][A
Train Diffusion:  57%|█████▋    | 2860/5001 [2:11:55<1:33:30,  2.62s/it][A
Train Diffusion:  57%|█████▋    | 2861/5001 [2:11:58<1:33:19,  2.62s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 327471660.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8471, 0.5187, 1.3024],
        [8.8530, 0.4870, 1.2679],
        [8.7105, 0.5006, 1.2757]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.4841,  0.8380,  0.5560],
         [ 1.7946,  0.3380,  1.2277],
         [20.6586,  0.3371,  1.2122],
         ...,
         [ 3.5741,  0.1331,  0.7712],
         [ 0.1969,  0.6674,  1.9778],
         [ 0.2767,  1.7329,  8.7240]],

        [[ 0.9445,  0.9538,  1.4023],
         [ 5.2702,  0.6472,  1.4233],
         [ 1.2498,  0.8503,  0.6662],
         ...,
         [30.2238,  1.7204,  1.0970],
         [ 3.1652,  0.4895,  0.4044],
         [ 4.8758,  0.2967,  0.8608]],

        [[ 0.4812,  0.9533,  0.9463],
         [ 4.5198,  0.6694,  2.5447],
         [ 1.0632,  0.5826,  0.8584],
         ...,
         [ 0.3207,  0.8791,  1.8455],
         [ 0.4866,  0.4106,  1.0538],
         [ 6.7108,  1.5302,  2.3691


Train Diffusion:  57%|█████▋    | 2862/5001 [2:12:01<1:33:06,  2.61s/it][A
Train Diffusion:  57%|█████▋    | 2863/5001 [2:12:04<1:40:07,  2.81s/it][A
Train Diffusion:  57%|█████▋    | 2864/5001 [2:12:07<1:39:01,  2.78s/it][A
Train Diffusion:  57%|█████▋    | 2865/5001 [2:12:09<1:37:08,  2.73s/it][A
Train Diffusion:  57%|█████▋    | 2866/5001 [2:12:12<1:35:40,  2.69s/it][A
Train Diffusion:  57%|█████▋    | 2867/5001 [2:12:14<1:34:46,  2.66s/it][A
Train Diffusion:  57%|█████▋    | 2868/5001 [2:12:17<1:34:07,  2.65s/it][A
Train Diffusion:  57%|█████▋    | 2869/5001 [2:12:20<1:33:31,  2.63s/it][A
Train Diffusion:  57%|█████▋    | 2870/5001 [2:12:22<1:33:24,  2.63s/it][A
Train Diffusion:  57%|█████▋    | 2871/5001 [2:12:25<1:33:12,  2.63s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 313461856.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6509, 0.5114, 1.3008],
        [8.9521, 0.4670, 1.2795],
        [8.7480, 0.4716, 1.2893]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.5832e+00, 8.1033e-01, 1.5083e+00],
         [1.4998e+00, 4.3846e-01, 7.3437e-01],
         [5.8551e-01, 9.9967e-01, 1.0196e+00],
         ...,
         [3.1285e+00, 3.1420e-01, 8.8998e-01],
         [4.4195e-01, 6.2956e-01, 7.3312e-01],
         [1.4360e-02, 1.6209e+00, 2.8975e+00]],

        [[5.6105e-01, 9.5150e-01, 9.5437e-01],
         [5.5319e-01, 7.9820e-01, 1.5555e+00],
         [2.5745e+01, 1.3301e-01, 1.1746e+00],
         ...,
         [3.1270e+01, 9.7756e-02, 1.2459e+00],
         [5.3869e+00, 1.3402e-01, 5.0071e-01],
         [5.1783e+00, 1.7465e+00, 5.7805e+00]],

        [[7.5877e-01, 9.6102e-01, 1.1289e+00],
         [1.2683e+01, 4.0193e-01, 1.2590e+00],
         [2.6400e+00, 4.2719e-01, 5.1


Train Diffusion:  57%|█████▋    | 2872/5001 [2:12:27<1:32:54,  2.62s/it][A
Train Diffusion:  57%|█████▋    | 2873/5001 [2:12:30<1:32:39,  2.61s/it][A
Train Diffusion:  57%|█████▋    | 2874/5001 [2:12:33<1:32:37,  2.61s/it][A
Train Diffusion:  57%|█████▋    | 2875/5001 [2:12:35<1:32:17,  2.60s/it][A
Train Diffusion:  58%|█████▊    | 2876/5001 [2:12:38<1:32:16,  2.61s/it][A
Train Diffusion:  58%|█████▊    | 2877/5001 [2:12:40<1:31:58,  2.60s/it][A
Train Diffusion:  58%|█████▊    | 2878/5001 [2:12:43<1:31:57,  2.60s/it][A
Train Diffusion:  58%|█████▊    | 2879/5001 [2:12:46<1:32:04,  2.60s/it][A
Train Diffusion:  58%|█████▊    | 2880/5001 [2:12:48<1:32:28,  2.62s/it][A
Train Diffusion:  58%|█████▊    | 2881/5001 [2:12:51<1:33:29,  2.65s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 328624713.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7292, 0.5075, 1.2860],
        [8.7436, 0.4852, 1.3070],
        [8.7222, 0.4938, 1.2909]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.5680,  0.8112,  0.9957],
         [ 1.7673,  0.3515,  0.7571],
         [ 0.2852,  0.7839,  1.0487],
         ...,
         [ 4.4094,  0.1156,  0.7241],
         [ 0.6713,  0.5524,  0.8366],
         [23.9601,  1.3598,  2.0432]],

        [[ 0.7978,  0.9614,  1.1884],
         [12.3466,  0.3677,  1.2806],
         [ 2.7373,  0.3483,  0.9347],
         ...,
         [ 0.1595,  6.7062,  0.8976],
         [35.2980,  0.1522,  1.1726],
         [ 5.7149,  0.4135,  1.0874]],

        [[ 0.5383,  0.9516,  0.9206],
         [ 0.6218,  0.8288,  1.8799],
         [22.3592,  0.0854,  1.2225],
         ...,
         [29.1199,  0.1274,  1.2364],
         [ 5.1106,  0.1394,  0.6330],
         [ 0.6110,  1.8098,  0.8393


Train Diffusion:  58%|█████▊    | 2882/5001 [2:12:54<1:32:57,  2.63s/it][A
Train Diffusion:  58%|█████▊    | 2883/5001 [2:12:56<1:32:42,  2.63s/it][A
Train Diffusion:  58%|█████▊    | 2884/5001 [2:12:59<1:32:22,  2.62s/it][A
Train Diffusion:  58%|█████▊    | 2885/5001 [2:13:01<1:32:10,  2.61s/it][A
Train Diffusion:  58%|█████▊    | 2886/5001 [2:13:04<1:32:08,  2.61s/it][A
Train Diffusion:  58%|█████▊    | 2887/5001 [2:13:07<1:31:59,  2.61s/it][A
Train Diffusion:  58%|█████▊    | 2888/5001 [2:13:09<1:31:44,  2.61s/it][A
Train Diffusion:  58%|█████▊    | 2889/5001 [2:13:12<1:31:29,  2.60s/it][A
Train Diffusion:  58%|█████▊    | 2890/5001 [2:13:14<1:31:29,  2.60s/it][A
Train Diffusion:  58%|█████▊    | 2891/5001 [2:13:17<1:31:29,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 329632886.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7950, 0.5032, 1.2668],
        [8.8349, 0.4843, 1.3190],
        [8.7225, 0.5145, 1.2924]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.5735,  0.9512,  0.8646],
         [11.3570,  0.6778,  0.8535],
         [ 1.7890,  0.4713,  0.7184],
         ...,
         [33.0722,  0.2179,  0.9124],
         [ 4.0336,  0.4770,  0.4649],
         [ 0.6441,  1.6391,  9.7584]],

        [[ 1.5910,  0.8199,  4.7739],
         [ 0.9191,  0.6731,  1.0303],
         [ 0.7795,  0.6771,  1.0504],
         ...,
         [ 0.6533,  0.4863,  0.5533],
         [33.4985,  0.4227,  1.3393],
         [ 5.1604,  0.3711,  1.5621]],

        [[ 0.7386,  0.9593,  0.8296],
         [ 0.6823,  0.6769,  1.9006],
         [28.5958,  0.1112,  1.1979],
         ...,
         [ 2.5706,  0.3714,  0.7073],
         [ 0.6345, 10.4725,  0.7578],
         [11.7957,  1.5555,  1.8292


Train Diffusion:  58%|█████▊    | 2892/5001 [2:13:20<1:31:27,  2.60s/it][A
Train Diffusion:  58%|█████▊    | 2893/5001 [2:13:22<1:31:52,  2.62s/it][A
Train Diffusion:  58%|█████▊    | 2894/5001 [2:13:25<1:31:41,  2.61s/it][A
Train Diffusion:  58%|█████▊    | 2895/5001 [2:13:27<1:31:29,  2.61s/it][A
Train Diffusion:  58%|█████▊    | 2896/5001 [2:13:30<1:31:37,  2.61s/it][A
Train Diffusion:  58%|█████▊    | 2897/5001 [2:13:33<1:31:35,  2.61s/it][A
Train Diffusion:  58%|█████▊    | 2898/5001 [2:13:35<1:31:51,  2.62s/it][A
Train Diffusion:  58%|█████▊    | 2899/5001 [2:13:38<1:31:36,  2.62s/it][A
Train Diffusion:  58%|█████▊    | 2900/5001 [2:13:41<1:31:18,  2.61s/it][A
Train Diffusion:  58%|█████▊    | 2901/5001 [2:13:43<1:31:16,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 321093913.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.9417, 0.5111, 1.3016],
        [8.7743, 0.4883, 1.2793],
        [8.6165, 0.4848, 1.2900]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.8113,  0.9625,  0.7969],
         [ 0.7308,  0.6577,  1.8485],
         [26.6018,  0.2665,  1.2023],
         ...,
         [ 1.0652,  0.4542,  1.9506],
         [ 0.2860,  0.6061,  0.4477],
         [21.4685,  2.0378,  1.8284]],

        [[ 0.5323,  0.9524,  0.7676],
         [11.2815,  0.9069,  0.8839],
         [ 1.5553,  0.4623,  0.7545],
         ...,
         [ 0.2446,  0.5279,  1.1811],
         [36.3268,  0.0819,  1.3787],
         [ 6.0662,  0.3432,  0.7837]],

        [[ 1.5622,  0.8234,  5.3220],
         [ 0.9127,  0.6253,  1.2373],
         [ 0.6746,  0.7421,  1.1433],
         ...,
         [26.9058,  0.1315,  1.1113],
         [ 4.9805,  0.1346,  0.7156],
         [ 0.4286,  3.1524,  1.4516


Train Diffusion:  58%|█████▊    | 2902/5001 [2:13:46<1:31:06,  2.60s/it][A
Train Diffusion:  58%|█████▊    | 2903/5001 [2:13:48<1:31:02,  2.60s/it][A
Train Diffusion:  58%|█████▊    | 2904/5001 [2:13:51<1:31:59,  2.63s/it][A
Train Diffusion:  58%|█████▊    | 2905/5001 [2:13:54<1:32:02,  2.63s/it][A
Train Diffusion:  58%|█████▊    | 2906/5001 [2:13:56<1:31:37,  2.62s/it][A
Train Diffusion:  58%|█████▊    | 2907/5001 [2:13:59<1:31:14,  2.61s/it][A
Train Diffusion:  58%|█████▊    | 2908/5001 [2:14:02<1:31:12,  2.61s/it][A
Train Diffusion:  58%|█████▊    | 2909/5001 [2:14:04<1:31:16,  2.62s/it][A
Train Diffusion:  58%|█████▊    | 2910/5001 [2:14:07<1:31:51,  2.64s/it][A
Train Diffusion:  58%|█████▊    | 2911/5001 [2:14:10<1:32:28,  2.65s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 328901734.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8592, 0.4888, 1.2989],
        [8.6655, 0.4920, 1.2635],
        [8.6934, 0.4927, 1.3022]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.3753e+00, 8.5933e-01, 5.8883e-01],
         [1.5363e+00, 3.9932e-01, 2.2812e-01],
         [5.1836e-05, 2.3738e+00, 1.9622e-01],
         ...,
         [9.1233e-01, 4.1049e-01, 7.2655e-01],
         [3.0209e-01, 2.9920e+00, 9.9423e-01],
         [2.4794e+01, 1.2152e+00, 2.0130e+00]],

        [[1.0832e+00, 9.2771e-01, 2.0088e+00],
         [5.1323e+00, 6.9918e-01, 1.3369e+00],
         [1.2487e+00, 1.3571e+00, 9.4412e-01],
         ...,
         [3.1474e+00, 2.1512e-01, 7.4011e-01],
         [1.1815e+01, 7.1817e-01, 8.5863e-01],
         [2.0987e+00, 1.1520e+00, 8.0771e-01]],

        [[4.5289e-01, 9.5367e-01, 9.9062e-01],
         [2.5913e+00, 6.6043e-01, 1.8989e+00],
         [1.3792e+00, 3.3518e-01, 5.7


Train Diffusion:  58%|█████▊    | 2912/5001 [2:14:12<1:31:49,  2.64s/it][A
Train Diffusion:  58%|█████▊    | 2913/5001 [2:14:15<1:31:15,  2.62s/it][A
Train Diffusion:  58%|█████▊    | 2914/5001 [2:14:17<1:31:07,  2.62s/it][A
Train Diffusion:  58%|█████▊    | 2915/5001 [2:14:20<1:30:41,  2.61s/it][A
Train Diffusion:  58%|█████▊    | 2916/5001 [2:14:23<1:30:44,  2.61s/it][A
Train Diffusion:  58%|█████▊    | 2917/5001 [2:14:25<1:30:33,  2.61s/it][A
Train Diffusion:  58%|█████▊    | 2918/5001 [2:14:28<1:30:17,  2.60s/it][A
Train Diffusion:  58%|█████▊    | 2919/5001 [2:14:30<1:30:09,  2.60s/it][A
Train Diffusion:  58%|█████▊    | 2920/5001 [2:14:33<1:30:14,  2.60s/it][A
Train Diffusion:  58%|█████▊    | 2921/5001 [2:14:35<1:30:04,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 328495792.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6895, 0.4905, 1.2937],
        [8.6296, 0.4889, 1.2847],
        [8.8182, 0.4913, 1.3165]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[6.3642e-01, 9.5453e-01, 9.0235e-01],
         [5.8556e-01, 7.8252e-01, 1.9059e+00],
         [2.6838e+01, 2.4850e-01, 1.2033e+00],
         ...,
         [3.1920e-01, 6.3891e-01, 5.8054e-01],
         [3.2886e+01, 4.4295e-01, 9.3611e-01],
         [5.4769e+00, 1.4654e-01, 5.5681e+00]],

        [[1.6058e+00, 8.0724e-01, 3.4285e+00],
         [1.2597e+00, 6.1054e-01, 9.1334e-01],
         [6.6014e-01, 7.4886e-01, 1.2833e+00],
         ...,
         [1.1964e+01, 7.0963e-02, 1.5352e+00],
         [5.6815e+00, 1.0207e-01, 5.6236e-01],
         [1.0152e+00, 2.2105e+00, 1.5211e+00]],

        [[6.6000e-01, 9.5560e-01, 9.4162e-01],
         [1.2842e+01, 5.4955e-01, 1.0826e+00],
         [2.2396e+00, 3.9016e-01, 7.2


Train Diffusion:  58%|█████▊    | 2922/5001 [2:14:38<1:30:01,  2.60s/it][A
Train Diffusion:  58%|█████▊    | 2923/5001 [2:14:41<1:29:53,  2.60s/it][A
Train Diffusion:  58%|█████▊    | 2924/5001 [2:14:43<1:29:57,  2.60s/it][A
Train Diffusion:  58%|█████▊    | 2925/5001 [2:14:46<1:30:04,  2.60s/it][A
Train Diffusion:  59%|█████▊    | 2926/5001 [2:14:48<1:29:57,  2.60s/it][A
Train Diffusion:  59%|█████▊    | 2927/5001 [2:14:51<1:29:49,  2.60s/it][A
Train Diffusion:  59%|█████▊    | 2928/5001 [2:14:54<1:29:45,  2.60s/it][A
Train Diffusion:  59%|█████▊    | 2929/5001 [2:14:56<1:29:39,  2.60s/it][A
Train Diffusion:  59%|█████▊    | 2930/5001 [2:14:59<1:29:41,  2.60s/it][A
Train Diffusion:  59%|█████▊    | 2931/5001 [2:15:02<1:30:04,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 340182873.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7748, 0.5084, 1.2954],
        [8.6609, 0.5125, 1.3158],
        [8.9428, 0.4818, 1.2967]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.5965e+00, 8.0181e-01, 4.7923e+00],
         [1.4626e+00, 2.8498e-02, 8.1116e-01],
         [1.1810e+00, 6.2151e-01, 7.5189e-01],
         ...,
         [2.2161e-01, 1.4920e+00, 2.6367e+00],
         [2.2905e-01, 1.2590e+00, 9.2939e+00],
         [9.5655e-01, 2.8740e+00, 1.9121e+00]],

        [[7.3220e-01, 9.5975e-01, 8.1449e-01],
         [6.0274e-01, 8.1853e-01, 1.1422e+00],
         [1.3858e+01, 4.0201e-01, 1.1797e+00],
         ...,
         [2.8769e-01, 1.2612e+00, 7.6693e+00],
         [6.9153e-04, 9.6119e-01, 3.4687e+00],
         [1.6193e-01, 1.2328e+00, 9.5507e+00]],

        [[5.7671e-01, 9.5211e-01, 6.1396e-01],
         [8.7644e+00, 1.4229e+00, 3.2042e+00],
         [1.0038e+00, 7.8495e-01, 1.0


Train Diffusion:  59%|█████▊    | 2932/5001 [2:15:04<1:30:07,  2.61s/it][A
Train Diffusion:  59%|█████▊    | 2933/5001 [2:15:07<1:30:09,  2.62s/it][A
Train Diffusion:  59%|█████▊    | 2934/5001 [2:15:09<1:30:07,  2.62s/it][A
Train Diffusion:  59%|█████▊    | 2935/5001 [2:15:12<1:29:51,  2.61s/it][A
Train Diffusion:  59%|█████▊    | 2936/5001 [2:15:15<1:29:32,  2.60s/it][A
Train Diffusion:  59%|█████▊    | 2937/5001 [2:15:17<1:29:25,  2.60s/it][A
Train Diffusion:  59%|█████▊    | 2938/5001 [2:15:20<1:29:21,  2.60s/it][A
Train Diffusion:  59%|█████▉    | 2939/5001 [2:15:22<1:29:51,  2.61s/it][A
Train Diffusion:  59%|█████▉    | 2940/5001 [2:15:25<1:29:55,  2.62s/it][A
Train Diffusion:  59%|█████▉    | 2941/5001 [2:15:28<1:29:43,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 337213196.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8011, 0.4989, 1.3018],
        [8.6824, 0.4842, 1.3327],
        [8.6421, 0.4908, 1.2796]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.8307,  0.9624,  0.7927],
         [ 0.7892,  0.6162,  1.9414],
         [28.1140,  0.1889,  1.2013],
         ...,
         [ 1.2088,  0.0708,  0.8503],
         [ 2.6122,  0.1963,  0.3815],
         [ 8.3779,  2.4148,  1.6056]],

        [[ 0.5239,  0.9526,  0.8950],
         [12.0525,  0.6203,  0.8481],
         [ 1.9122,  0.4376,  0.7530],
         ...,
         [ 0.4807,  0.4712,  1.0947],
         [43.7341,  0.0744,  1.3494],
         [ 6.5958,  0.2100,  0.8899]],

        [[ 1.5489,  0.8254,  5.0231],
         [ 0.7967,  0.8588,  1.3119],
         [ 0.6513,  0.6992,  1.2748],
         ...,
         [22.1820,  0.2206,  1.1104],
         [ 3.5529,  0.2352,  0.7625],
         [11.9579,  1.5905,  1.7448


Train Diffusion:  59%|█████▉    | 2942/5001 [2:15:30<1:30:25,  2.64s/it][A
Train Diffusion:  59%|█████▉    | 2943/5001 [2:15:33<1:30:07,  2.63s/it][A
Train Diffusion:  59%|█████▉    | 2944/5001 [2:15:36<1:29:48,  2.62s/it][A
Train Diffusion:  59%|█████▉    | 2945/5001 [2:15:38<1:29:43,  2.62s/it][A
Train Diffusion:  59%|█████▉    | 2946/5001 [2:15:41<1:29:44,  2.62s/it][A
Train Diffusion:  59%|█████▉    | 2947/5001 [2:15:43<1:29:28,  2.61s/it][A
Train Diffusion:  59%|█████▉    | 2948/5001 [2:15:46<1:29:17,  2.61s/it][A
Train Diffusion:  59%|█████▉    | 2949/5001 [2:15:49<1:29:15,  2.61s/it][A
Train Diffusion:  59%|█████▉    | 2950/5001 [2:15:51<1:29:29,  2.62s/it][A
Train Diffusion:  59%|█████▉    | 2951/5001 [2:15:54<1:29:19,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 313927862.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7453, 0.4969, 1.3117],
        [8.7992, 0.5131, 1.2862],
        [8.8946, 0.4615, 1.3088]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.6057e+00, 8.0637e-01, 2.9564e+00],
         [1.3283e+00, 5.8123e-01, 8.6224e-01],
         [6.7019e-01, 7.4847e-01, 1.2979e+00],
         ...,
         [3.9510e+00, 1.5186e-01, 8.3270e-01],
         [3.3868e-01, 2.8694e+00, 1.4985e+00],
         [1.5350e+00, 2.1009e+00, 7.0822e+00]],

        [[6.1145e-01, 9.5400e-01, 9.2143e-01],
         [5.7293e-01, 8.0616e-01, 1.8839e+00],
         [2.4026e+01, 2.6580e-01, 1.2384e+00],
         ...,
         [2.5587e+01, 6.7782e-02, 1.3383e+00],
         [5.6343e+00, 9.1149e-02, 1.0165e+00],
         [9.8943e-03, 1.5100e+00, 2.7347e+00]],

        [[6.8684e-01, 9.5729e-01, 9.8975e-01],
         [1.2999e+01, 5.0079e-01, 1.1359e+00],
         [2.3381e+00, 3.7753e-01, 7.2


Train Diffusion:  59%|█████▉    | 2952/5001 [2:15:57<1:30:04,  2.64s/it][A
Train Diffusion:  59%|█████▉    | 2953/5001 [2:15:59<1:29:49,  2.63s/it][A
Train Diffusion:  59%|█████▉    | 2954/5001 [2:16:02<1:29:36,  2.63s/it][A
Train Diffusion:  59%|█████▉    | 2955/5001 [2:16:04<1:29:08,  2.61s/it][A
Train Diffusion:  59%|█████▉    | 2956/5001 [2:16:07<1:29:00,  2.61s/it][A
Train Diffusion:  59%|█████▉    | 2957/5001 [2:16:10<1:31:26,  2.68s/it][A
Train Diffusion:  59%|█████▉    | 2958/5001 [2:16:12<1:31:35,  2.69s/it][A
Train Diffusion:  59%|█████▉    | 2959/5001 [2:16:15<1:30:44,  2.67s/it][A
Train Diffusion:  59%|█████▉    | 2960/5001 [2:16:18<1:29:55,  2.64s/it][A
Train Diffusion:  59%|█████▉    | 2961/5001 [2:16:20<1:29:27,  2.63s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 335449244.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7776, 0.4847, 1.2699],
        [8.8281, 0.4842, 1.3100],
        [8.7404, 0.5087, 1.2961]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.8711,  0.9617,  0.7885],
         [ 0.8373,  0.5912,  1.9273],
         [28.3360,  0.1670,  1.2001],
         ...,
         [ 0.3933,  0.7641,  0.2277],
         [ 0.6696,  0.6845,  0.0735],
         [ 1.3345,  1.8217,  0.6503]],

        [[ 0.5050,  0.9533,  0.8873],
         [11.6474,  0.6519,  0.7697],
         [ 1.8266,  0.4499,  0.7297],
         ...,
         [ 0.3966,  3.0681,  0.7707],
         [ 0.3784,  2.1759, 14.3477],
         [ 0.5872,  2.9064,  2.4313]],

        [[ 1.5309,  0.8293,  5.3092],
         [ 0.7875,  0.8866,  1.3749],
         [ 0.6786,  0.6889,  1.2895],
         ...,
         [ 0.9733, 26.1111,  0.7409],
         [ 0.0470,  1.7185,  4.0558],
         [ 0.1204,  1.2520,  4.0806


Train Diffusion:  59%|█████▉    | 2962/5001 [2:16:23<1:29:19,  2.63s/it][A
Train Diffusion:  59%|█████▉    | 2963/5001 [2:16:25<1:28:52,  2.62s/it][A
Train Diffusion:  59%|█████▉    | 2964/5001 [2:16:28<1:28:42,  2.61s/it][A
Train Diffusion:  59%|█████▉    | 2965/5001 [2:16:31<1:28:27,  2.61s/it][A
Train Diffusion:  59%|█████▉    | 2966/5001 [2:16:33<1:28:24,  2.61s/it][A
Train Diffusion:  59%|█████▉    | 2967/5001 [2:16:36<1:28:27,  2.61s/it][A
Train Diffusion:  59%|█████▉    | 2968/5001 [2:16:39<1:28:10,  2.60s/it][A
Train Diffusion:  59%|█████▉    | 2969/5001 [2:16:41<1:28:11,  2.60s/it][A
Train Diffusion:  59%|█████▉    | 2970/5001 [2:16:44<1:28:12,  2.61s/it][A
Train Diffusion:  59%|█████▉    | 2971/5001 [2:16:46<1:28:10,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 323756291.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.9047, 0.5052, 1.3238],
        [8.7211, 0.4743, 1.3145],
        [8.5308, 0.4854, 1.3088]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.5613,  0.9509,  0.7928],
         [11.8522,  0.8155,  0.9120],
         [ 1.7750,  0.4187,  0.7661],
         ...,
         [ 3.0007,  0.1334,  1.0525],
         [29.3485,  0.7613,  3.1737],
         [ 0.6291,  3.7425,  2.5471]],

        [[ 1.5836,  0.8167,  4.9657],
         [ 1.0242,  0.6875,  1.1924],
         [ 0.6014,  0.7718,  1.1348],
         ...,
         [25.6947,  0.6872,  0.7236],
         [ 1.9638,  1.9013,  0.6491],
         [ 0.3726,  1.2202, 11.2514]],

        [[ 0.7584,  0.9604,  0.8165],
         [ 0.6786,  0.6976,  1.8701],
         [26.6198,  0.2703,  1.2053],
         ...,
         [ 2.4371,  0.4984,  0.9796],
         [ 2.2030,  0.1325,  3.8785],
         [ 0.6656,  1.8727,  1.0218


Train Diffusion:  59%|█████▉    | 2972/5001 [2:16:49<1:28:05,  2.61s/it][A
Train Diffusion:  59%|█████▉    | 2973/5001 [2:16:52<1:27:57,  2.60s/it][A
Train Diffusion:  59%|█████▉    | 2974/5001 [2:16:54<1:28:04,  2.61s/it][A
Train Diffusion:  59%|█████▉    | 2975/5001 [2:16:57<1:27:57,  2.60s/it][A
Train Diffusion:  60%|█████▉    | 2976/5001 [2:16:59<1:27:50,  2.60s/it][A
Train Diffusion:  60%|█████▉    | 2977/5001 [2:17:02<1:27:49,  2.60s/it][A
Train Diffusion:  60%|█████▉    | 2978/5001 [2:17:05<1:27:46,  2.60s/it][A
Train Diffusion:  60%|█████▉    | 2979/5001 [2:17:07<1:27:48,  2.61s/it][A
Train Diffusion:  60%|█████▉    | 2980/5001 [2:17:10<1:27:39,  2.60s/it][A
Train Diffusion:  60%|█████▉    | 2981/5001 [2:17:12<1:27:35,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 325009670.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8607, 0.5028, 1.2835],
        [8.7626, 0.5083, 1.2731],
        [8.7680, 0.4951, 1.3040]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.5404e-01, 9.5222e-01, 9.1261e-01],
         [1.1409e+01, 5.8505e-01, 9.3718e-01],
         [1.8650e+00, 4.5911e-01, 7.2003e-01],
         ...,
         [2.7487e-01, 7.7344e-01, 1.7154e+00],
         [3.6542e-01, 5.1823e-01, 7.9005e-01],
         [1.2414e-03, 1.2900e+00, 3.1312e+00]],

        [[1.5820e+00, 8.2844e-01, 4.9402e+00],
         [8.1332e-01, 8.4426e-01, 1.1453e+00],
         [7.9067e-01, 6.9607e-01, 1.3370e+00],
         ...,
         [3.1933e+01, 1.9076e-01, 1.0265e+00],
         [4.3124e+00, 6.6089e-02, 1.3316e+00],
         [3.9599e+00, 2.5225e-01, 1.6763e+00]],

        [[7.6891e-01, 9.6203e-01, 8.1055e-01],
         [7.4269e-01, 6.2910e-01, 1.9122e+00],
         [2.8177e+01, 2.0236e-01, 1.1


Train Diffusion:  60%|█████▉    | 2982/5001 [2:17:15<1:27:46,  2.61s/it][A
Train Diffusion:  60%|█████▉    | 2983/5001 [2:17:18<1:27:46,  2.61s/it][A
Train Diffusion:  60%|█████▉    | 2984/5001 [2:17:20<1:27:32,  2.60s/it][A
Train Diffusion:  60%|█████▉    | 2985/5001 [2:17:23<1:27:19,  2.60s/it][A
Train Diffusion:  60%|█████▉    | 2986/5001 [2:17:25<1:27:34,  2.61s/it][A
Train Diffusion:  60%|█████▉    | 2987/5001 [2:17:28<1:27:29,  2.61s/it][A
Train Diffusion:  60%|█████▉    | 2988/5001 [2:17:31<1:27:39,  2.61s/it][A
Train Diffusion:  60%|█████▉    | 2989/5001 [2:17:33<1:27:43,  2.62s/it][A
Train Diffusion:  60%|█████▉    | 2990/5001 [2:17:36<1:27:36,  2.61s/it][A
Train Diffusion:  60%|█████▉    | 2991/5001 [2:17:38<1:27:26,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 334937014.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.9947, 0.5082, 1.3256],
        [8.6373, 0.5002, 1.2619],
        [8.6968, 0.5131, 1.2843]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.5628,  0.9511,  0.9337],
         [ 0.5463,  0.8533,  1.6111],
         [26.7009,  0.1792,  1.1937],
         ...,
         [ 3.7415,  0.1943,  0.7769],
         [ 0.6310,  0.5253,  0.6093],
         [ 0.8017,  1.7638,  4.8444]],

        [[ 0.7526,  0.9603,  1.1338],
         [13.0785,  0.3965,  1.2587],
         [ 2.6537,  0.3806,  0.6430],
         ...,
         [32.4408,  0.1093,  1.2167],
         [ 5.2447,  0.1371,  0.8717],
         [18.9702,  1.3924,  1.9463]],

        [[ 1.5890,  0.8057,  1.4190],
         [ 1.6428,  0.3918,  0.7571],
         [ 0.6803,  0.7521,  1.3994],
         ...,
         [ 0.5136,  0.6067,  1.0372],
         [39.2861,  0.0776,  1.3741],
         [ 6.2565,  0.2937,  1.1167


Train Diffusion:  60%|█████▉    | 2992/5001 [2:17:41<1:27:13,  2.61s/it][A
Train Diffusion:  60%|█████▉    | 2993/5001 [2:17:44<1:26:59,  2.60s/it][A
Train Diffusion:  60%|█████▉    | 2994/5001 [2:17:46<1:27:00,  2.60s/it][A
Train Diffusion:  60%|█████▉    | 2995/5001 [2:17:49<1:28:11,  2.64s/it][A
Train Diffusion:  60%|█████▉    | 2996/5001 [2:17:52<1:29:01,  2.66s/it][A
Train Diffusion:  60%|█████▉    | 2997/5001 [2:17:54<1:28:21,  2.65s/it][A
Train Diffusion:  60%|█████▉    | 2998/5001 [2:17:57<1:27:50,  2.63s/it][A
Train Diffusion:  60%|█████▉    | 2999/5001 [2:18:00<1:29:49,  2.69s/it][A
Train Diffusion:  60%|█████▉    | 3000/5001 [2:18:02<1:28:58,  2.67s/it][A
Train Diffusion:  60%|██████    | 3001/5001 [2:18:05<1:28:17,  2.65s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 326307862.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6603, 0.4788, 1.2848],
        [8.7315, 0.5037, 1.2522],
        [9.0213, 0.4882, 1.3095]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3927e-01, 9.5381e-01, 9.2869e-01],
         [5.7463e-01, 7.7732e-01, 1.6531e+00],
         [2.0452e+01, 1.2666e-01, 1.2398e+00],
         ...,
         [8.9639e-01, 2.2145e+00, 9.5468e-01],
         [1.3315e-02, 1.6522e+00, 7.2974e-01],
         [3.7198e-01, 1.1626e+00, 1.0458e+01]],

        [[1.5723e+00, 8.1126e-01, 6.9097e-01],
         [1.7729e+00, 3.6427e-01, 6.9257e-01],
         [1.8144e-01, 8.1528e-01, 1.2326e+00],
         ...,
         [3.8253e+00, 1.2436e-01, 9.1595e-01],
         [2.8805e+01, 2.0734e-01, 1.6458e+00],
         [3.0915e+00, 1.7504e+00, 1.7252e+00]],

        [[7.9466e-01, 9.6374e-01, 1.1766e+00],
         [1.2572e+01, 3.6969e-01, 1.2746e+00],
         [2.7722e+00, 3.3319e-01, 9.7


Train Diffusion:  60%|██████    | 3002/5001 [2:18:08<1:27:45,  2.63s/it][A
Train Diffusion:  60%|██████    | 3003/5001 [2:18:10<1:27:21,  2.62s/it][A
Train Diffusion:  60%|██████    | 3004/5001 [2:18:14<1:42:25,  3.08s/it][A
Train Diffusion:  60%|██████    | 3005/5001 [2:18:17<1:38:08,  2.95s/it][A
Train Diffusion:  60%|██████    | 3006/5001 [2:18:20<1:34:40,  2.85s/it][A
Train Diffusion:  60%|██████    | 3007/5001 [2:18:22<1:31:59,  2.77s/it][A
Train Diffusion:  60%|██████    | 3008/5001 [2:18:25<1:30:18,  2.72s/it][A
Train Diffusion:  60%|██████    | 3009/5001 [2:18:27<1:28:50,  2.68s/it][A
Train Diffusion:  60%|██████    | 3010/5001 [2:18:30<1:27:51,  2.65s/it][A
Train Diffusion:  60%|██████    | 3011/5001 [2:18:32<1:27:14,  2.63s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 337675072.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7263, 0.4809, 1.2541],
        [8.8089, 0.4890, 1.2788],
        [8.7352, 0.4980, 1.3101]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.5673,  0.9506,  0.7662],
         [11.6071,  0.8773,  0.8545],
         [ 1.7022,  0.4290,  0.7709],
         ...,
         [ 0.7125,  0.5332,  0.9553],
         [23.8574,  0.7369,  2.4949],
         [ 5.5579,  3.2171,  2.0170]],

        [[ 0.7504,  0.9594,  0.8201],
         [ 0.6709,  0.6936,  1.8592],
         [27.2075,  0.2508,  1.2010],
         ...,
         [ 3.6225,  0.1769,  0.8371],
         [ 0.4932,  0.6402,  5.2458],
         [ 0.7220,  2.3010,  1.8094]],

        [[ 1.5820,  0.8154,  5.1180],
         [ 1.0240,  0.6384,  1.2010],
         [ 0.6095,  0.7455,  1.1851],
         ...,
         [35.8672,  0.0783,  1.1839],
         [ 5.7360,  0.1016,  0.8036],
         [ 0.4156,  1.5612,  1.8113


Train Diffusion:  60%|██████    | 3012/5001 [2:18:35<1:26:41,  2.62s/it][A
Train Diffusion:  60%|██████    | 3013/5001 [2:18:38<1:26:24,  2.61s/it][A
Train Diffusion:  60%|██████    | 3014/5001 [2:18:40<1:26:00,  2.60s/it][A
Train Diffusion:  60%|██████    | 3015/5001 [2:18:43<1:25:53,  2.59s/it][A
Train Diffusion:  60%|██████    | 3016/5001 [2:18:45<1:25:36,  2.59s/it][A
Train Diffusion:  60%|██████    | 3017/5001 [2:18:48<1:25:22,  2.58s/it][A
Train Diffusion:  60%|██████    | 3018/5001 [2:18:51<1:25:26,  2.59s/it][A
Train Diffusion:  60%|██████    | 3019/5001 [2:18:53<1:25:28,  2.59s/it][A
Train Diffusion:  60%|██████    | 3020/5001 [2:18:56<1:25:23,  2.59s/it][A
Train Diffusion:  60%|██████    | 3021/5001 [2:18:58<1:25:14,  2.58s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 321630700.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7293, 0.4937, 1.3087],
        [8.7277, 0.5077, 1.2987],
        [8.9612, 0.4840, 1.3097]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.6006,  0.9531,  0.9231],
         [12.6868,  0.5833,  1.1147],
         [ 2.0614,  0.4297,  0.7867],
         ...,
         [ 3.4971,  0.1516,  0.9909],
         [33.0214,  0.3515,  2.7501],
         [ 6.1907,  1.6264,  2.0992]],

        [[ 0.6994,  0.9576,  0.8537],
         [ 0.6442,  0.7087,  1.9065],
         [25.4768,  0.2885,  1.2061],
         ...,
         [35.2040,  0.2377,  0.8216],
         [ 3.7647,  0.5004,  0.3060],
         [ 1.6586,  0.7577,  0.9278]],

        [[ 1.6055,  0.8167,  4.0526],
         [ 1.0292,  0.7372,  1.0103],
         [ 0.6637,  0.8423,  0.9807],
         ...,
         [ 0.7692,  0.6800,  0.6990],
         [ 0.7778,  0.4492,  0.6396],
         [ 0.8031,  2.2738,  0.8037


Train Diffusion:  60%|██████    | 3022/5001 [2:19:01<1:25:09,  2.58s/it][A
Train Diffusion:  60%|██████    | 3023/5001 [2:19:03<1:25:27,  2.59s/it][A
Train Diffusion:  60%|██████    | 3024/5001 [2:19:06<1:25:21,  2.59s/it][A
Train Diffusion:  60%|██████    | 3025/5001 [2:19:09<1:25:18,  2.59s/it][A
Train Diffusion:  61%|██████    | 3026/5001 [2:19:11<1:25:20,  2.59s/it][A
Train Diffusion:  61%|██████    | 3027/5001 [2:19:14<1:25:15,  2.59s/it][A
Train Diffusion:  61%|██████    | 3028/5001 [2:19:16<1:25:14,  2.59s/it][A
Train Diffusion:  61%|██████    | 3029/5001 [2:19:19<1:25:13,  2.59s/it][A
Train Diffusion:  61%|██████    | 3030/5001 [2:19:22<1:25:08,  2.59s/it][A
Train Diffusion:  61%|██████    | 3031/5001 [2:19:24<1:25:12,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 322786180.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8932, 0.4990, 1.2778],
        [8.6336, 0.5066, 1.3181],
        [8.6911, 0.4932, 1.2769]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.6130,  0.9526,  0.9191],
         [ 0.5728,  0.8016,  1.8064],
         [28.4603,  0.1035,  1.2017],
         ...,
         [ 0.6632,  0.3035,  0.3409],
         [ 4.1887,  0.1087,  0.7017],
         [ 0.5973,  1.9303,  1.0726]],

        [[ 1.6058,  0.8065,  3.0407],
         [ 1.3121,  0.5409,  0.8090],
         [ 0.7436,  0.6814,  0.7665],
         ...,
         [ 4.6435,  0.5209,  2.3367],
         [ 0.6823,  0.6027,  0.7924],
         [22.6296,  1.3814,  2.0070]],

        [[ 0.6847,  0.9556,  0.9822],
         [12.2112,  0.5305,  1.1249],
         [ 2.1471,  0.4572,  1.1372],
         ...,
         [ 0.2095,  0.5403,  1.0505],
         [41.4622,  0.0832,  1.3386],
         [ 6.1743,  0.3157,  0.9057


Train Diffusion:  61%|██████    | 3032/5001 [2:19:27<1:25:05,  2.59s/it][A
Train Diffusion:  61%|██████    | 3033/5001 [2:19:29<1:24:58,  2.59s/it][A
Train Diffusion:  61%|██████    | 3034/5001 [2:19:32<1:25:23,  2.60s/it][A
Train Diffusion:  61%|██████    | 3035/5001 [2:19:35<1:25:24,  2.61s/it][A
Train Diffusion:  61%|██████    | 3036/5001 [2:19:37<1:25:18,  2.61s/it][A
Train Diffusion:  61%|██████    | 3037/5001 [2:19:40<1:24:58,  2.60s/it][A
Train Diffusion:  61%|██████    | 3038/5001 [2:19:42<1:24:49,  2.59s/it][A
Train Diffusion:  61%|██████    | 3039/5001 [2:19:45<1:24:56,  2.60s/it][A
Train Diffusion:  61%|██████    | 3040/5001 [2:19:48<1:24:52,  2.60s/it][A
Train Diffusion:  61%|██████    | 3041/5001 [2:19:50<1:24:41,  2.59s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 327246070.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7797, 0.5088, 1.3195],
        [8.8572, 0.5157, 1.3012],
        [8.6178, 0.4945, 1.2713]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.6091,  0.9529,  0.8520],
         [12.1774,  0.6952,  0.9301],
         [ 1.9682,  0.4028,  0.7270],
         ...,
         [ 3.8781,  0.1733,  0.8145],
         [ 0.5123,  0.7113,  0.5561],
         [12.7114,  1.6903,  1.8611]],

        [[ 1.6042,  0.8106,  4.3546],
         [ 1.1450,  0.6269,  1.0242],
         [ 0.6429,  0.7583,  1.2189],
         ...,
         [31.9182,  0.0653,  1.2714],
         [ 5.8250,  0.0909,  0.5667],
         [ 7.5876,  1.8906,  1.5801]],

        [[ 0.6900,  0.9565,  0.8612],
         [ 0.6254,  0.7466,  1.9014],
         [25.4589,  0.2923,  1.2142],
         ...,
         [ 0.4537,  0.6770,  0.8245],
         [34.0375,  0.0690,  1.3156],
         [ 6.4611,  0.2242,  0.7478


Train Diffusion:  61%|██████    | 3042/5001 [2:19:53<1:24:40,  2.59s/it][A
Train Diffusion:  61%|██████    | 3043/5001 [2:19:55<1:24:42,  2.60s/it][A
Train Diffusion:  61%|██████    | 3044/5001 [2:19:58<1:24:29,  2.59s/it][A
Train Diffusion:  61%|██████    | 3045/5001 [2:20:01<1:24:19,  2.59s/it][A
Train Diffusion:  61%|██████    | 3046/5001 [2:20:03<1:25:55,  2.64s/it][A
Train Diffusion:  61%|██████    | 3047/5001 [2:20:06<1:25:28,  2.62s/it][A
Train Diffusion:  61%|██████    | 3048/5001 [2:20:08<1:24:55,  2.61s/it][A
Train Diffusion:  61%|██████    | 3049/5001 [2:20:11<1:24:41,  2.60s/it][A
Train Diffusion:  61%|██████    | 3050/5001 [2:20:14<1:24:38,  2.60s/it][A
Train Diffusion:  61%|██████    | 3051/5001 [2:20:16<1:25:35,  2.63s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 338569238.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8032, 0.5271, 1.2760],
        [8.6254, 0.4954, 1.3303],
        [8.7573, 0.5223, 1.2858]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.4628,  0.8404,  4.3434],
         [ 0.7396,  1.0560,  1.2825],
         [ 0.6632,  0.9915,  0.7183],
         ...,
         [ 0.3892,  0.5757,  0.9242],
         [37.9874,  0.1032,  1.1845],
         [ 5.9487,  0.3802,  0.8731]],

        [[ 0.4744,  0.9523,  1.0090],
         [12.6134,  0.4913,  1.2640],
         [ 1.9761,  0.5034,  0.9919],
         ...,
         [ 2.9423,  0.1275,  1.7299],
         [ 2.9747,  0.2098,  0.7916],
         [ 0.5205,  1.8549,  1.2069]],

        [[ 0.9726,  0.9480,  0.7988],
         [ 0.9601,  0.5447,  1.6874],
         [24.7268,  0.2928,  1.1838],
         ...,
         [ 0.5711,  0.4209,  3.2353],
         [ 0.3514,  0.5089,  0.8926],
         [22.2372,  1.4118,  1.9793


Train Diffusion:  61%|██████    | 3052/5001 [2:20:19<1:26:47,  2.67s/it][A
Train Diffusion:  61%|██████    | 3053/5001 [2:20:22<1:25:51,  2.64s/it][A
Train Diffusion:  61%|██████    | 3054/5001 [2:20:24<1:25:32,  2.64s/it][A
Train Diffusion:  61%|██████    | 3055/5001 [2:20:27<1:25:11,  2.63s/it][A
Train Diffusion:  61%|██████    | 3056/5001 [2:20:30<1:24:35,  2.61s/it][A
Train Diffusion:  61%|██████    | 3057/5001 [2:20:32<1:24:32,  2.61s/it][A
Train Diffusion:  61%|██████    | 3058/5001 [2:20:35<1:24:18,  2.60s/it][A
Train Diffusion:  61%|██████    | 3059/5001 [2:20:37<1:24:09,  2.60s/it][A
Train Diffusion:  61%|██████    | 3060/5001 [2:20:40<1:23:57,  2.60s/it][A
Train Diffusion:  61%|██████    | 3061/5001 [2:20:42<1:23:51,  2.59s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 339850867.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7991, 0.5186, 1.3210],
        [8.6663, 0.5082, 1.3157],
        [8.6880, 0.4810, 1.2585]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.5683,  0.9507,  0.8403],
         [11.9619,  0.7163,  0.8747],
         [ 1.8676,  0.3956,  0.8595],
         ...,
         [ 0.7740,  3.3289,  1.6717],
         [ 0.6693,  1.8765,  1.0181],
         [ 0.6212,  2.2642,  2.6950]],

        [[ 0.7458,  0.9592,  0.8247],
         [ 0.6778,  0.6898,  1.9089],
         [23.5475,  0.3159,  1.2426],
         ...,
         [ 9.8856,  0.1160,  0.7246],
         [ 5.5609,  0.0735,  0.4473],
         [ 0.6572,  2.5533,  0.5434]],

        [[ 1.5895,  0.8177,  4.8052],
         [ 0.9794,  0.6967,  1.1293],
         [ 0.6473,  0.7527,  1.2238],
         ...,
         [ 0.2882,  0.6130,  0.4214],
         [26.6287,  0.5687,  0.4853],
         [ 5.4307,  0.1157,  2.8271


Train Diffusion:  61%|██████    | 3062/5001 [2:20:45<1:23:45,  2.59s/it][A
Train Diffusion:  61%|██████    | 3063/5001 [2:20:48<1:23:33,  2.59s/it][A
Train Diffusion:  61%|██████▏   | 3064/5001 [2:20:50<1:23:47,  2.60s/it][A
Train Diffusion:  61%|██████▏   | 3065/5001 [2:20:53<1:23:49,  2.60s/it][A
Train Diffusion:  61%|██████▏   | 3066/5001 [2:20:55<1:23:43,  2.60s/it][A
Train Diffusion:  61%|██████▏   | 3067/5001 [2:20:58<1:23:36,  2.59s/it][A
Train Diffusion:  61%|██████▏   | 3068/5001 [2:21:01<1:23:20,  2.59s/it][A
Train Diffusion:  61%|██████▏   | 3069/5001 [2:21:03<1:23:16,  2.59s/it][A
Train Diffusion:  61%|██████▏   | 3070/5001 [2:21:06<1:23:26,  2.59s/it][A
Train Diffusion:  61%|██████▏   | 3071/5001 [2:21:08<1:23:14,  2.59s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 326072880.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.9463, 0.4798, 1.3188],
        [8.9138, 0.5088, 1.2856],
        [8.5311, 0.5061, 1.2798]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.7616e-01, 9.5415e-01, 9.5136e-01],
         [2.0447e+00, 8.9511e-01, 6.6094e-01],
         [8.1697e-01, 8.5300e-01, 9.6432e-01],
         ...,
         [1.6878e-01, 5.7307e-01, 6.6673e-01],
         [1.2535e-03, 1.3118e+00, 1.2770e+00],
         [3.8367e-01, 1.6154e+00, 6.9332e-01]],

        [[9.6363e-01, 9.5174e-01, 1.5844e+00],
         [8.1934e+00, 4.6175e-01, 1.3062e+00],
         [2.2241e+00, 5.8851e-01, 6.9433e-01],
         ...,
         [6.5467e-01, 9.4897e-02, 7.4615e-01],
         [3.8965e+00, 3.9394e+00, 5.2854e-01],
         [1.1229e+01, 2.4240e+00, 1.8683e+00]],

        [[1.4697e+00, 8.4108e-01, 8.3430e-01],
         [1.7528e+00, 3.3326e-01, 1.1592e+00],
         [1.9723e+01, 3.3194e-01, 1.1


Train Diffusion:  61%|██████▏   | 3072/5001 [2:21:11<1:23:06,  2.58s/it][A
Train Diffusion:  61%|██████▏   | 3073/5001 [2:21:14<1:23:02,  2.58s/it][A
Train Diffusion:  61%|██████▏   | 3074/5001 [2:21:16<1:22:51,  2.58s/it][A
Train Diffusion:  61%|██████▏   | 3075/5001 [2:21:19<1:23:12,  2.59s/it][A
Train Diffusion:  62%|██████▏   | 3076/5001 [2:21:21<1:23:00,  2.59s/it][A
Train Diffusion:  62%|██████▏   | 3077/5001 [2:21:24<1:22:50,  2.58s/it][A
Train Diffusion:  62%|██████▏   | 3078/5001 [2:21:26<1:22:56,  2.59s/it][A
Train Diffusion:  62%|██████▏   | 3079/5001 [2:21:29<1:22:47,  2.58s/it][A
Train Diffusion:  62%|██████▏   | 3080/5001 [2:21:32<1:23:27,  2.61s/it][A
Train Diffusion:  62%|██████▏   | 3081/5001 [2:21:34<1:23:22,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 313667027.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8173, 0.4844, 1.2970],
        [8.8274, 0.4933, 1.3258],
        [8.6152, 0.4763, 1.2894]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.9482,  0.9536,  0.7924],
         [ 0.9801,  0.5138,  0.2066],
         [16.1517,  1.4666, 13.5296],
         ...,
         [ 0.4610,  1.7365,  2.6018],
         [ 0.0949,  3.5319,  1.3886],
         [21.8198,  1.4959,  1.8539]],

        [[ 1.4800,  0.8386,  5.5801],
         [ 0.9970,  1.1976,  1.3200],
         [ 0.3722,  7.8436,  2.4652],
         ...,
         [ 1.7949,  0.4455,  0.1284],
         [ 4.9694,  0.0505,  4.1668],
         [ 1.1470,  1.3472,  1.3377]],

        [[ 0.4810,  0.9538,  0.9842],
         [ 8.9213,  0.5384,  1.5921],
         [ 1.3109,  2.4252,  1.9973],
         ...,
         [ 0.1641,  0.5879,  0.4786],
         [23.1628,  0.6953,  0.4039],
         [ 4.0427,  0.5685,  2.2114


Train Diffusion:  62%|██████▏   | 3082/5001 [2:21:37<1:23:22,  2.61s/it][A
Train Diffusion:  62%|██████▏   | 3083/5001 [2:21:40<1:23:06,  2.60s/it][A
Train Diffusion:  62%|██████▏   | 3084/5001 [2:21:42<1:22:55,  2.60s/it][A
Train Diffusion:  62%|██████▏   | 3085/5001 [2:21:45<1:22:39,  2.59s/it][A
Train Diffusion:  62%|██████▏   | 3086/5001 [2:21:47<1:22:31,  2.59s/it][A
Train Diffusion:  62%|██████▏   | 3087/5001 [2:21:50<1:22:31,  2.59s/it][A
Train Diffusion:  62%|██████▏   | 3088/5001 [2:21:52<1:22:49,  2.60s/it][A
Train Diffusion:  62%|██████▏   | 3089/5001 [2:21:55<1:22:38,  2.59s/it][A
Train Diffusion:  62%|██████▏   | 3090/5001 [2:21:58<1:22:50,  2.60s/it][A
Train Diffusion:  62%|██████▏   | 3091/5001 [2:22:00<1:22:40,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 334908646.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8762, 0.4938, 1.3202],
        [8.7742, 0.5018, 1.2962],
        [8.6655, 0.5043, 1.3073]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[6.0038e-01, 9.5123e-01, 8.5674e-01],
         [1.1830e+01, 6.9039e-01, 9.7995e-01],
         [1.8729e+00, 4.3011e-01, 7.8951e-01],
         ...,
         [2.2864e+01, 3.2931e-02, 1.4343e+00],
         [6.0425e+00, 6.3769e-02, 2.5567e-01],
         [1.6273e+01, 2.9186e+00, 5.1928e+00]],

        [[6.9902e-01, 9.5570e-01, 8.5158e-01],
         [6.3614e-01, 7.2595e-01, 1.8630e+00],
         [2.7340e+01, 2.1456e-01, 1.1998e+00],
         ...,
         [1.9231e-01, 3.9594e+00, 6.9513e-01],
         [7.0280e+00, 1.2769e+00, 3.4684e+00],
         [9.3968e-01, 2.3193e+00, 1.7944e+00]],

        [[1.6034e+00, 8.1272e-01, 4.4500e+00],
         [1.0844e+00, 6.3709e-01, 9.8384e-01],
         [6.9964e-01, 7.6859e-01, 1.1


Train Diffusion:  62%|██████▏   | 3092/5001 [2:22:03<1:23:19,  2.62s/it][A
Train Diffusion:  62%|██████▏   | 3093/5001 [2:22:06<1:23:31,  2.63s/it][A
Train Diffusion:  62%|██████▏   | 3094/5001 [2:22:08<1:23:06,  2.61s/it][A
Train Diffusion:  62%|██████▏   | 3095/5001 [2:22:11<1:22:46,  2.61s/it][A
Train Diffusion:  62%|██████▏   | 3096/5001 [2:22:13<1:22:49,  2.61s/it][A
Train Diffusion:  62%|██████▏   | 3097/5001 [2:22:16<1:22:30,  2.60s/it][A
Train Diffusion:  62%|██████▏   | 3098/5001 [2:22:19<1:23:08,  2.62s/it][A
Train Diffusion:  62%|██████▏   | 3099/5001 [2:22:21<1:24:20,  2.66s/it][A
Train Diffusion:  62%|██████▏   | 3100/5001 [2:22:24<1:23:36,  2.64s/it][A
Train Diffusion:  62%|██████▏   | 3101/5001 [2:22:27<1:23:15,  2.63s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 323800883.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6970, 0.5057, 1.2867],
        [8.8060, 0.4693, 1.3417],
        [8.5536, 0.4934, 1.3047]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.1157e-01, 9.5131e-01, 9.7865e-01],
         [1.2591e+01, 5.0066e-01, 1.3627e+00],
         [1.8623e+00, 6.5073e-01, 1.1740e+00],
         ...,
         [3.7882e-01, 3.5301e+00, 6.9067e-01],
         [2.9411e+01, 2.0797e-01, 1.2271e+00],
         [4.5085e+00, 5.9024e-01, 6.4264e+00]],

        [[8.5971e-01, 9.6044e-01, 7.8797e-01],
         [8.3696e-01, 5.8524e-01, 1.3517e+00],
         [2.3112e+01, 2.9328e-01, 1.1740e+00],
         ...,
         [3.1679e+01, 9.1847e-02, 1.2418e+00],
         [5.4685e+00, 6.7581e-02, 8.6786e-01],
         [1.4061e+00, 7.0539e-01, 3.1489e+00]],

        [[1.5326e+00, 8.2540e-01, 4.5598e+00],
         [7.4801e-01, 1.0415e+00, 1.2430e+00],
         [6.7193e-01, 1.0397e+00, 6.3


Train Diffusion:  62%|██████▏   | 3102/5001 [2:22:29<1:22:47,  2.62s/it][A
Train Diffusion:  62%|██████▏   | 3103/5001 [2:22:32<1:22:50,  2.62s/it][A
Train Diffusion:  62%|██████▏   | 3104/5001 [2:22:34<1:22:38,  2.61s/it][A
Train Diffusion:  62%|██████▏   | 3105/5001 [2:22:37<1:22:27,  2.61s/it][A
Train Diffusion:  62%|██████▏   | 3106/5001 [2:22:40<1:22:26,  2.61s/it][A
Train Diffusion:  62%|██████▏   | 3107/5001 [2:22:42<1:22:19,  2.61s/it][A
Train Diffusion:  62%|██████▏   | 3108/5001 [2:22:45<1:22:09,  2.60s/it][A
Train Diffusion:  62%|██████▏   | 3109/5001 [2:22:47<1:22:01,  2.60s/it][A
Train Diffusion:  62%|██████▏   | 3110/5001 [2:22:50<1:21:56,  2.60s/it][A
Train Diffusion:  62%|██████▏   | 3111/5001 [2:22:53<1:22:05,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 336842198.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6771, 0.5075, 1.2887],
        [8.8787, 0.4790, 1.3077],
        [8.6681, 0.5070, 1.2815]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3351e-01, 9.5146e-01, 9.2510e-01],
         [6.3376e-01, 7.6413e-01, 1.2856e+00],
         [1.2353e-02, 7.0188e-01, 4.2570e+00],
         ...,
         [2.5135e+00, 8.4618e+00, 1.3490e+00],
         [7.9493e-01, 2.0355e-01, 5.1657e-02],
         [5.2654e+00, 2.2671e-01, 7.4315e-01]],

        [[1.5637e+00, 8.1276e-01, 6.2806e-01],
         [1.7772e+00, 3.6387e-01, 5.9179e-01],
         [1.1161e+01, 6.4543e-01, 4.5088e+00],
         ...,
         [1.1243e-04, 4.4946e-01, 3.8117e+00],
         [1.9037e-01, 4.0918e-01, 1.3237e+00],
         [1.0495e+00, 2.1830e+00, 1.8613e+00]],

        [[8.0802e-01, 9.6150e-01, 1.1856e+00],
         [1.1985e+01, 3.4587e-01, 1.2773e+00],
         [2.7790e+00, 2.8771e-01, 1.2


Train Diffusion:  62%|██████▏   | 3112/5001 [2:22:55<1:21:50,  2.60s/it][A
Train Diffusion:  62%|██████▏   | 3113/5001 [2:22:58<1:21:37,  2.59s/it][A
Train Diffusion:  62%|██████▏   | 3114/5001 [2:23:00<1:21:39,  2.60s/it][A
Train Diffusion:  62%|██████▏   | 3115/5001 [2:23:03<1:21:38,  2.60s/it][A
Train Diffusion:  62%|██████▏   | 3116/5001 [2:23:06<1:21:32,  2.60s/it][A
Train Diffusion:  62%|██████▏   | 3117/5001 [2:23:08<1:21:29,  2.60s/it][A
Train Diffusion:  62%|██████▏   | 3118/5001 [2:23:11<1:21:24,  2.59s/it][A
Train Diffusion:  62%|██████▏   | 3119/5001 [2:23:13<1:21:35,  2.60s/it][A
Train Diffusion:  62%|██████▏   | 3120/5001 [2:23:16<1:21:23,  2.60s/it][A
Train Diffusion:  62%|██████▏   | 3121/5001 [2:23:19<1:21:17,  2.59s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 314219331.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7819, 0.5024, 1.3095],
        [8.7714, 0.4839, 1.2940],
        [8.6767, 0.4912, 1.2690]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.6044,  0.8068,  3.0213],
         [ 1.3035,  0.5896,  0.8536],
         [ 0.6847,  0.7718,  1.1963],
         ...,
         [ 0.2435, 18.6324,  0.9651],
         [21.3151,  0.6966,  1.9612],
         [ 4.6295,  0.4930,  1.4223]],

        [[ 0.6807,  0.9558,  0.9835],
         [12.8427,  0.5063,  1.1451],
         [ 2.2854,  0.3835,  0.7410],
         ...,
         [ 3.2535,  0.3047,  0.5721],
         [ 0.6457,  8.6131,  0.6531],
         [ 0.1059,  2.6654,  1.9209]],

        [[ 0.6168,  0.9531,  0.9166],
         [ 0.5746,  0.7946,  1.8589],
         [26.2550,  0.2603,  1.2054],
         ...,
         [30.1814,  0.2224,  0.7882],
         [ 5.3119,  0.0910,  0.6576],
         [ 0.5337,  1.6756, 11.2781


Train Diffusion:  62%|██████▏   | 3122/5001 [2:23:21<1:21:18,  2.60s/it][A
Train Diffusion:  62%|██████▏   | 3123/5001 [2:23:24<1:21:09,  2.59s/it][A
Train Diffusion:  62%|██████▏   | 3124/5001 [2:23:26<1:21:14,  2.60s/it][A
Train Diffusion:  62%|██████▏   | 3125/5001 [2:23:29<1:21:06,  2.59s/it][A
Train Diffusion:  63%|██████▎   | 3126/5001 [2:23:32<1:21:24,  2.61s/it][A
Train Diffusion:  63%|██████▎   | 3127/5001 [2:23:34<1:21:37,  2.61s/it][A
Train Diffusion:  63%|██████▎   | 3128/5001 [2:23:37<1:21:26,  2.61s/it][A
Train Diffusion:  63%|██████▎   | 3129/5001 [2:23:39<1:21:22,  2.61s/it][A
Train Diffusion:  63%|██████▎   | 3130/5001 [2:23:42<1:21:14,  2.61s/it][A
Train Diffusion:  63%|██████▎   | 3131/5001 [2:23:45<1:21:13,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 321386182.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6661, 0.4832, 1.3296],
        [8.7416, 0.5174, 1.3338],
        [8.7489, 0.4749, 1.3169]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.2693,  0.8936,  0.7742],
         [ 1.4504,  0.3934,  1.8207],
         [25.9746,  0.2832,  1.1939],
         ...,
         [29.4206,  0.1117,  1.2440],
         [ 5.2011,  0.1466,  0.8297],
         [ 0.2618,  1.5410,  2.7215]],

        [[ 0.4424,  0.9554,  0.9001],
         [ 9.4131,  0.7392,  1.3047],
         [ 1.3749,  0.5514,  0.8122],
         ...,
         [ 3.4127,  0.1917,  0.7725],
         [ 0.6042,  0.6240,  0.5054],
         [11.4465,  2.1549,  4.3293]],

        [[ 1.2011,  0.9065,  3.2016],
         [ 1.5501,  0.8443,  1.5409],
         [ 0.8462,  0.8307,  0.9356],
         ...,
         [ 0.3020,  0.6829,  0.8156],
         [40.0648,  0.1053,  1.3676],
         [ 6.4568,  0.1932,  0.8639


Train Diffusion:  63%|██████▎   | 3132/5001 [2:23:47<1:21:07,  2.60s/it][A
Train Diffusion:  63%|██████▎   | 3133/5001 [2:23:50<1:20:57,  2.60s/it][A
Train Diffusion:  63%|██████▎   | 3134/5001 [2:23:52<1:20:59,  2.60s/it][A
Train Diffusion:  63%|██████▎   | 3135/5001 [2:23:55<1:20:49,  2.60s/it][A
Train Diffusion:  63%|██████▎   | 3136/5001 [2:23:58<1:20:42,  2.60s/it][A
Train Diffusion:  63%|██████▎   | 3137/5001 [2:24:00<1:20:28,  2.59s/it][A
Train Diffusion:  63%|██████▎   | 3138/5001 [2:24:03<1:20:34,  2.59s/it][A
Train Diffusion:  63%|██████▎   | 3139/5001 [2:24:06<1:23:31,  2.69s/it][A
Train Diffusion:  63%|██████▎   | 3140/5001 [2:24:08<1:22:44,  2.67s/it][A
Train Diffusion:  63%|██████▎   | 3141/5001 [2:24:11<1:22:00,  2.65s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 309475408.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6845, 0.4910, 1.2934],
        [8.8366, 0.4897, 1.2783],
        [8.8288, 0.5048, 1.3375]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.2181e+00, 9.0428e-01, 8.6400e-01],
         [1.3596e+00, 4.0889e-01, 1.5942e+00],
         [2.3827e+01, 3.0601e-01, 1.1892e+00],
         ...,
         [4.0947e+00, 9.9536e-02, 8.7831e-01],
         [1.9800e-01, 9.3053e-01, 1.8577e+00],
         [1.5880e+00, 1.9359e+00, 6.8817e+00]],

        [[4.4258e-01, 9.5451e-01, 8.5120e-01],
         [8.9737e+00, 8.6114e-01, 1.9807e+00],
         [1.1227e+00, 6.2773e-01, 8.5166e-01],
         ...,
         [8.0584e-03, 1.0656e+00, 1.2920e+00],
         [4.3594e-01, 4.4312e-01, 1.0325e+00],
         [8.1173e+00, 1.5721e+00, 2.1960e+00]],

        [[1.2514e+00, 8.9755e-01, 4.0326e+00],
         [1.4606e+00, 8.1478e-01, 1.5550e+00],
         [9.4140e-01, 8.5270e-01, 7.5


Train Diffusion:  63%|██████▎   | 3142/5001 [2:24:13<1:21:29,  2.63s/it][A
Train Diffusion:  63%|██████▎   | 3143/5001 [2:24:16<1:21:04,  2.62s/it][A
Train Diffusion:  63%|██████▎   | 3144/5001 [2:24:19<1:20:48,  2.61s/it][A
Train Diffusion:  63%|██████▎   | 3145/5001 [2:24:21<1:20:36,  2.61s/it][A
Train Diffusion:  63%|██████▎   | 3146/5001 [2:24:25<1:29:17,  2.89s/it][A
Train Diffusion:  63%|██████▎   | 3147/5001 [2:24:27<1:26:52,  2.81s/it][A
Train Diffusion:  63%|██████▎   | 3148/5001 [2:24:30<1:24:51,  2.75s/it][A
Train Diffusion:  63%|██████▎   | 3149/5001 [2:24:33<1:23:28,  2.70s/it][A
Train Diffusion:  63%|██████▎   | 3150/5001 [2:24:35<1:22:24,  2.67s/it][A
Train Diffusion:  63%|██████▎   | 3151/5001 [2:24:38<1:21:44,  2.65s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 327039820.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7937, 0.4709, 1.3169],
        [8.7267, 0.5066, 1.2690],
        [8.6305, 0.5151, 1.3021]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.6543,  0.9548,  0.9233],
         [12.4979,  0.5858,  1.0426],
         [ 2.1356,  0.4156,  0.7261],
         ...,
         [ 0.4434,  2.0733,  0.7656],
         [36.0826,  0.1110,  1.3236],
         [ 5.9575,  0.2717,  1.3039]],

        [[ 1.6065,  0.8076,  3.6027],
         [ 1.2400,  0.5845,  0.9046],
         [ 0.6803,  0.7266,  1.3187],
         ...,
         [31.9543,  0.0962,  1.2435],
         [ 5.4525,  0.1231,  0.6176],
         [ 0.5554,  2.1697,  0.7232]],

        [[ 0.6413,  0.9542,  0.8983],
         [ 0.5889,  0.7784,  1.8829],
         [27.3869,  0.2312,  1.2000],
         ...,
         [ 3.9252,  0.1753,  0.7375],
         [ 0.5566,  0.6197,  0.7821],
         [21.7395,  1.4374,  2.0204


Train Diffusion:  63%|██████▎   | 3152/5001 [2:24:40<1:21:10,  2.63s/it][A
Train Diffusion:  63%|██████▎   | 3153/5001 [2:24:43<1:20:45,  2.62s/it][A
Train Diffusion:  63%|██████▎   | 3154/5001 [2:24:46<1:20:33,  2.62s/it][A
Train Diffusion:  63%|██████▎   | 3155/5001 [2:24:48<1:20:27,  2.61s/it][A
Train Diffusion:  63%|██████▎   | 3156/5001 [2:24:51<1:20:12,  2.61s/it][A
Train Diffusion:  63%|██████▎   | 3157/5001 [2:24:53<1:20:05,  2.61s/it][A
Train Diffusion:  63%|██████▎   | 3158/5001 [2:24:56<1:19:59,  2.60s/it][A
Train Diffusion:  63%|██████▎   | 3159/5001 [2:24:59<1:19:49,  2.60s/it][A
Train Diffusion:  63%|██████▎   | 3160/5001 [2:25:01<1:19:45,  2.60s/it][A
Train Diffusion:  63%|██████▎   | 3161/5001 [2:25:04<1:19:40,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 328516380.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6850, 0.5133, 1.2980],
        [8.7161, 0.5084, 1.3175],
        [8.8830, 0.4923, 1.2835]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.5961,  0.8139,  4.6818],
         [ 1.0706,  0.6535,  1.0411],
         [ 0.6760,  0.7872,  1.0917],
         ...,
         [ 0.3092,  0.5380,  1.0527],
         [36.4813,  0.1022,  1.1894],
         [ 5.9114,  0.3598,  1.0738]],

        [[ 0.5869,  0.9517,  0.8288],
         [11.7603,  0.7437,  0.9808],
         [ 1.8007,  0.4382,  0.8138],
         ...,
         [ 1.9123,  0.4559,  1.5019],
         [ 0.2955,  0.6844,  0.7807],
         [23.6026,  1.3914,  2.0327]],

        [[ 0.7197,  0.9582,  0.8403],
         [ 0.6491,  0.7189,  1.8470],
         [27.0601,  0.2523,  1.1975],
         ...,
         [29.4566,  0.0999,  1.2712],
         [ 5.3442,  0.1154,  0.6445],
         [ 0.5833,  1.8833,  0.8578


Train Diffusion:  63%|██████▎   | 3162/5001 [2:25:06<1:19:34,  2.60s/it][A
Train Diffusion:  63%|██████▎   | 3163/5001 [2:25:09<1:19:38,  2.60s/it][A
Train Diffusion:  63%|██████▎   | 3164/5001 [2:25:12<1:19:36,  2.60s/it][A
Train Diffusion:  63%|██████▎   | 3165/5001 [2:25:14<1:19:23,  2.59s/it][A
Train Diffusion:  63%|██████▎   | 3166/5001 [2:25:17<1:19:23,  2.60s/it][A
Train Diffusion:  63%|██████▎   | 3167/5001 [2:25:20<1:20:38,  2.64s/it][A
Train Diffusion:  63%|██████▎   | 3168/5001 [2:25:22<1:21:01,  2.65s/it][A
Train Diffusion:  63%|██████▎   | 3169/5001 [2:25:25<1:21:00,  2.65s/it][A
Train Diffusion:  63%|██████▎   | 3170/5001 [2:25:28<1:21:04,  2.66s/it][A
Train Diffusion:  63%|██████▎   | 3171/5001 [2:25:30<1:21:06,  2.66s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 335294099.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8577, 0.4913, 1.3143],
        [8.8791, 0.4786, 1.2560],
        [8.7678, 0.4949, 1.2934]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.4947,  0.9538,  0.7943],
         [10.9673,  0.8812,  1.5940],
         [ 1.3649,  0.6160,  1.0839],
         ...,
         [ 0.3120,  7.2402,  0.7598],
         [33.8492,  0.1056,  1.3594],
         [ 5.9057,  0.3230,  1.1755]],

        [[ 0.9015,  0.9597,  0.7841],
         [ 0.8486,  0.5874,  1.6742],
         [26.6935,  0.2702,  1.1902],
         ...,
         [ 4.0377,  0.1660,  0.6917],
         [ 0.6015,  0.6058,  0.7983],
         [24.7526,  1.2612,  2.0525]],

        [[ 1.5115,  0.8328,  5.5521],
         [ 0.7914,  0.7625,  1.3736],
         [ 0.7272,  0.8421,  0.9007],
         ...,
         [32.2298,  0.0897,  1.2389],
         [ 5.5329,  0.0938,  0.7518],
         [ 0.6123,  2.0030,  0.7420


Train Diffusion:  63%|██████▎   | 3172/5001 [2:25:33<1:21:01,  2.66s/it][A
Train Diffusion:  63%|██████▎   | 3173/5001 [2:25:35<1:20:30,  2.64s/it][A
Train Diffusion:  63%|██████▎   | 3174/5001 [2:25:38<1:20:04,  2.63s/it][A
Train Diffusion:  63%|██████▎   | 3175/5001 [2:25:41<1:19:52,  2.62s/it][A
Train Diffusion:  64%|██████▎   | 3176/5001 [2:25:43<1:19:36,  2.62s/it][A
Train Diffusion:  64%|██████▎   | 3177/5001 [2:25:46<1:19:21,  2.61s/it][A
Train Diffusion:  64%|██████▎   | 3178/5001 [2:25:49<1:19:49,  2.63s/it][A
Train Diffusion:  64%|██████▎   | 3179/5001 [2:25:51<1:19:50,  2.63s/it][A
Train Diffusion:  64%|██████▎   | 3180/5001 [2:25:54<1:20:59,  2.67s/it][A
Train Diffusion:  64%|██████▎   | 3181/5001 [2:25:57<1:20:45,  2.66s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 324966032.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8290, 0.4808, 1.3300],
        [8.8234, 0.5084, 1.2839],
        [8.6293, 0.5107, 1.2759]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.7469,  0.9596,  1.1195],
         [12.8065,  0.4193,  1.2487],
         [ 2.5422,  0.4310,  0.9555],
         ...,
         [ 0.4945,  0.6017,  1.1857],
         [41.2761,  0.0736,  1.3350],
         [ 6.3758,  0.2333,  0.8456]],

        [[ 1.5888,  0.8069,  1.6085],
         [ 1.5441,  0.4264,  0.7584],
         [ 0.6853,  0.7847,  1.0083],
         ...,
         [14.9496,  0.3154,  1.4463],
         [ 1.7959,  0.5167,  0.7236],
         [ 0.6475,  2.7283,  1.8107]],

        [[ 0.5677,  0.9509,  0.9421],
         [ 0.5737,  0.8254,  1.6177],
         [27.5375,  0.1050,  1.1954],
         ...,
         [ 3.2227,  0.0769,  0.3575],
         [ 3.2899,  0.1187,  0.3441],
         [18.6177,  2.3974,  3.4410


Train Diffusion:  64%|██████▎   | 3182/5001 [2:25:59<1:20:38,  2.66s/it][A
Train Diffusion:  64%|██████▎   | 3183/5001 [2:26:02<1:20:21,  2.65s/it][A
Train Diffusion:  64%|██████▎   | 3184/5001 [2:26:04<1:20:23,  2.65s/it][A
Train Diffusion:  64%|██████▎   | 3185/5001 [2:26:07<1:21:47,  2.70s/it][A
Train Diffusion:  64%|██████▎   | 3186/5001 [2:26:10<1:21:39,  2.70s/it][A
Train Diffusion:  64%|██████▎   | 3187/5001 [2:26:13<1:21:55,  2.71s/it][A
Train Diffusion:  64%|██████▎   | 3188/5001 [2:26:15<1:21:20,  2.69s/it][A
Train Diffusion:  64%|██████▍   | 3189/5001 [2:26:18<1:20:55,  2.68s/it][A
Train Diffusion:  64%|██████▍   | 3190/5001 [2:26:21<1:20:45,  2.68s/it][A
Train Diffusion:  64%|██████▍   | 3191/5001 [2:26:23<1:20:34,  2.67s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 319196102.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8292, 0.4707, 1.3004],
        [8.5791, 0.4907, 1.2787],
        [8.8221, 0.5049, 1.3209]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.9232,  0.9557,  1.2617],
         [ 7.6006,  0.6688,  1.2098],
         [ 1.6372,  0.6063,  1.2209],
         ...,
         [ 2.1017,  0.2240,  0.4734],
         [ 2.3093,  0.2114,  0.6387],
         [23.4120,  1.6643,  1.9136]],

        [[ 0.4893,  0.9529,  1.0387],
         [ 0.5079,  0.7029,  1.7932],
         [28.6172,  0.2060,  1.1823],
         ...,
         [ 0.3058,  0.5602,  1.0124],
         [41.8997,  0.1395,  0.9507],
         [ 5.6164,  0.5204,  1.0392]],

        [[ 1.4946,  0.8343,  2.8553],
         [ 0.8084,  0.7310,  0.9147],
         [ 1.0830,  0.5959,  0.8470],
         ...,
         [19.2339,  0.2762,  1.2095],
         [ 3.3359,  0.3006,  0.7689],
         [ 0.4650,  2.7563,  0.9383


Train Diffusion:  64%|██████▍   | 3192/5001 [2:26:27<1:30:59,  3.02s/it][A
Train Diffusion:  64%|██████▍   | 3193/5001 [2:26:30<1:29:46,  2.98s/it][A
Train Diffusion:  64%|██████▍   | 3194/5001 [2:26:33<1:27:11,  2.90s/it][A
Train Diffusion:  64%|██████▍   | 3195/5001 [2:26:35<1:24:56,  2.82s/it][A
Train Diffusion:  64%|██████▍   | 3196/5001 [2:26:38<1:23:26,  2.77s/it][A
Train Diffusion:  64%|██████▍   | 3197/5001 [2:26:41<1:23:39,  2.78s/it][A
Train Diffusion:  64%|██████▍   | 3198/5001 [2:26:44<1:22:20,  2.74s/it][A
Train Diffusion:  64%|██████▍   | 3199/5001 [2:26:46<1:21:44,  2.72s/it][A
Train Diffusion:  64%|██████▍   | 3200/5001 [2:26:49<1:20:59,  2.70s/it][A
Train Diffusion:  64%|██████▍   | 3201/5001 [2:26:52<1:20:51,  2.70s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 315286006.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6556, 0.5011, 1.2926],
        [8.8274, 0.5036, 1.3158],
        [8.7778, 0.5077, 1.2968]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.6771e-01, 9.5266e-01, 7.3502e-01],
         [8.7987e+00, 1.1699e+00, 3.5561e+00],
         [7.8128e-01, 7.8433e-01, 8.3354e-01],
         ...,
         [3.7312e+00, 1.2723e-01, 9.7381e-01],
         [1.9407e-03, 4.1219e-01, 2.7936e+00],
         [1.8307e-01, 1.5606e+00, 1.0952e+01]],

        [[1.4346e+00, 8.4418e-01, 5.5706e+00],
         [9.0935e-01, 3.7421e-01, 1.2712e+00],
         [1.2110e+00, 6.9101e-01, 7.0341e-01],
         ...,
         [1.0550e+01, 2.0172e-01, 5.2472e-01],
         [5.3563e+00, 5.6006e-02, 1.5021e+00],
         [9.9099e-01, 1.9051e+00, 2.7629e+00]],

        [[1.0067e+00, 9.4206e-01, 8.1319e-01],
         [9.7681e-01, 5.1511e-01, 1.1071e+00],
         [1.5707e+01, 3.7088e-01, 1.1


Train Diffusion:  64%|██████▍   | 3202/5001 [2:26:54<1:20:35,  2.69s/it][A
Train Diffusion:  64%|██████▍   | 3203/5001 [2:26:57<1:20:24,  2.68s/it][A
Train Diffusion:  64%|██████▍   | 3204/5001 [2:27:00<1:20:08,  2.68s/it][A
Train Diffusion:  64%|██████▍   | 3205/5001 [2:27:02<1:20:04,  2.68s/it][A
Train Diffusion:  64%|██████▍   | 3206/5001 [2:27:05<1:19:34,  2.66s/it][A
Train Diffusion:  64%|██████▍   | 3207/5001 [2:27:07<1:18:51,  2.64s/it][A
Train Diffusion:  64%|██████▍   | 3208/5001 [2:27:10<1:18:30,  2.63s/it][A
Train Diffusion:  64%|██████▍   | 3209/5001 [2:27:13<1:18:06,  2.62s/it][A
Train Diffusion:  64%|██████▍   | 3210/5001 [2:27:15<1:17:59,  2.61s/it][A
Train Diffusion:  64%|██████▍   | 3211/5001 [2:27:18<1:17:54,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 331269859.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7798, 0.4942, 1.2813],
        [8.7081, 0.4933, 1.2881],
        [8.6273, 0.5041, 1.3229]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.4901,  0.9513,  0.9384],
         [ 2.0874,  0.9287,  0.7910],
         [ 0.5345,  1.3979,  2.0402],
         ...,
         [12.0055,  0.4579,  1.9642],
         [ 1.3942,  0.9434,  0.8836],
         [ 0.8895,  3.3809,  1.0622]],

        [[ 0.9198,  0.9548,  1.4027],
         [ 8.5038,  0.4518,  1.3054],
         [ 2.1794,  0.5658,  0.4359],
         ...,
         [ 7.8202,  0.3858,  1.9863],
         [ 1.6542,  0.2324,  0.0860],
         [ 4.0278,  4.3507,  0.3596]],

        [[ 1.4955,  0.8329,  0.9087],
         [ 1.7959,  0.3247,  1.0909],
         [23.4873,  0.2642,  2.1545],
         ...,
         [ 4.2644,  0.0964,  0.9821],
         [37.1206,  0.1513,  1.0807],
         [ 4.4803,  1.0445,  1.3228


Train Diffusion:  64%|██████▍   | 3212/5001 [2:27:20<1:17:39,  2.60s/it][A
Train Diffusion:  64%|██████▍   | 3213/5001 [2:27:23<1:17:39,  2.61s/it][A
Train Diffusion:  64%|██████▍   | 3214/5001 [2:27:26<1:17:28,  2.60s/it][A
Train Diffusion:  64%|██████▍   | 3215/5001 [2:27:28<1:17:30,  2.60s/it][A
Train Diffusion:  64%|██████▍   | 3216/5001 [2:27:31<1:17:25,  2.60s/it][A
Train Diffusion:  64%|██████▍   | 3217/5001 [2:27:34<1:18:14,  2.63s/it][A
Train Diffusion:  64%|██████▍   | 3218/5001 [2:27:36<1:17:59,  2.62s/it][A
Train Diffusion:  64%|██████▍   | 3219/5001 [2:27:39<1:18:00,  2.63s/it][A
Train Diffusion:  64%|██████▍   | 3220/5001 [2:27:41<1:17:44,  2.62s/it][A
Train Diffusion:  64%|██████▍   | 3221/5001 [2:27:44<1:17:37,  2.62s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 328512294.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7531, 0.5011, 1.3163],
        [8.6693, 0.4924, 1.2931],
        [8.9939, 0.4930, 1.2978]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.5772,  0.8089,  0.7346],
         [ 1.7427,  0.3726,  0.5842],
         [10.6670,  0.7359, 10.9175],
         ...,
         [ 0.2040,  0.1441,  2.1706],
         [ 0.2549,  0.5902,  0.1425],
         [ 3.0652,  4.3537,  0.4887]],

        [[ 0.5505,  0.9531,  0.9328],
         [ 0.5557,  0.7789,  0.9506],
         [ 2.9833,  0.2170,  2.2174],
         ...,
         [ 4.4571,  0.1133,  1.0332],
         [36.2648,  0.0794,  1.3659],
         [ 6.1301,  0.2468,  1.3855]],

        [[ 0.7776,  0.9629,  1.1564],
         [12.9696,  0.3360,  1.2767],
         [ 2.9008,  0.2988,  0.9319],
         ...,
         [23.6437,  0.5861,  0.2050],
         [ 4.1455,  0.1588,  0.5246],
         [ 8.0060,  2.0699,  1.5186


Train Diffusion:  64%|██████▍   | 3222/5001 [2:27:47<1:17:23,  2.61s/it][A
Train Diffusion:  64%|██████▍   | 3223/5001 [2:27:49<1:17:17,  2.61s/it][A
Train Diffusion:  64%|██████▍   | 3224/5001 [2:27:52<1:17:13,  2.61s/it][A
Train Diffusion:  64%|██████▍   | 3225/5001 [2:27:54<1:17:03,  2.60s/it][A
Train Diffusion:  65%|██████▍   | 3226/5001 [2:27:57<1:17:14,  2.61s/it][A
Train Diffusion:  65%|██████▍   | 3227/5001 [2:28:00<1:17:07,  2.61s/it][A
Train Diffusion:  65%|██████▍   | 3228/5001 [2:28:02<1:17:11,  2.61s/it][A
Train Diffusion:  65%|██████▍   | 3229/5001 [2:28:05<1:16:57,  2.61s/it][A
Train Diffusion:  65%|██████▍   | 3230/5001 [2:28:07<1:16:52,  2.60s/it][A
Train Diffusion:  65%|██████▍   | 3231/5001 [2:28:10<1:17:47,  2.64s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 326661369.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8305, 0.5024, 1.3257],
        [8.7310, 0.4913, 1.2833],
        [8.7070, 0.4953, 1.3108]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.1528e-01, 9.5304e-01, 9.3392e-01],
         [7.1094e-01, 8.5345e-01, 5.8751e-01],
         [1.4224e+00, 7.8939e-01, 8.9485e-01],
         ...,
         [4.0177e+00, 1.6111e-01, 8.1965e-01],
         [5.7350e-01, 1.0937e+00, 5.2211e+00],
         [4.6147e-01, 3.2386e+00, 1.0136e+00]],

        [[1.5435e+00, 8.2240e-01, 9.3729e-01],
         [1.7853e+00, 3.4068e-01, 1.0222e+00],
         [1.6221e+01, 3.8606e-01, 1.1876e+00],
         ...,
         [3.2597e+01, 1.7578e-01, 1.1418e+00],
         [4.6227e+00, 1.0951e-01, 1.0763e+01],
         [3.4780e-01, 2.6650e+00, 2.5308e+00]],

        [[8.4834e-01, 9.6278e-01, 1.2661e+00],
         [1.1412e+01, 3.8107e-01, 1.2908e+00],
         [2.6961e+00, 4.6957e-01, 7.1


Train Diffusion:  65%|██████▍   | 3232/5001 [2:28:13<1:17:24,  2.63s/it][A
Train Diffusion:  65%|██████▍   | 3233/5001 [2:28:15<1:17:15,  2.62s/it][A
Train Diffusion:  65%|██████▍   | 3234/5001 [2:28:18<1:17:01,  2.62s/it][A
Train Diffusion:  65%|██████▍   | 3235/5001 [2:28:21<1:16:57,  2.61s/it][A
Train Diffusion:  65%|██████▍   | 3236/5001 [2:28:23<1:16:46,  2.61s/it][A
Train Diffusion:  65%|██████▍   | 3237/5001 [2:28:26<1:16:37,  2.61s/it][A
Train Diffusion:  65%|██████▍   | 3238/5001 [2:28:28<1:16:46,  2.61s/it][A
Train Diffusion:  65%|██████▍   | 3239/5001 [2:28:31<1:20:31,  2.74s/it][A
Train Diffusion:  65%|██████▍   | 3240/5001 [2:28:34<1:19:20,  2.70s/it][A
Train Diffusion:  65%|██████▍   | 3241/5001 [2:28:37<1:18:25,  2.67s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 323562947.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6793, 0.4809, 1.3044],
        [8.8341, 0.4829, 1.3322],
        [8.7607, 0.5072, 1.3066]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.2656e+00, 8.9546e-01, 7.2492e-01],
         [1.4364e+00, 4.1095e-01, 2.1777e-01],
         [1.9964e-05, 2.3592e+00, 2.2345e-01],
         ...,
         [2.2895e+00, 1.0105e+00, 1.6701e+00],
         [3.1545e-02, 1.0870e+00, 1.4884e+00],
         [4.6631e+00, 2.6461e+00, 3.4011e+00]],

        [[1.2065e+00, 9.0671e-01, 3.0121e+00],
         [2.2026e+00, 9.8023e-01, 1.3241e+00],
         [9.8538e-01, 5.4041e+00, 7.3624e-01],
         ...,
         [2.3670e+01, 9.2290e-02, 1.2708e+00],
         [5.2750e+00, 7.2853e-02, 2.8050e+00],
         [8.9447e-03, 1.9638e+00, 2.7094e+00]],

        [[4.4197e-01, 9.5554e-01, 9.9596e-01],
         [6.6161e+00, 5.8490e-01, 1.6900e+00],
         [1.4728e+00, 5.7471e-02, 6.9


Train Diffusion:  65%|██████▍   | 3242/5001 [2:28:39<1:17:47,  2.65s/it][A
Train Diffusion:  65%|██████▍   | 3243/5001 [2:28:42<1:17:27,  2.64s/it][A
Train Diffusion:  65%|██████▍   | 3244/5001 [2:28:44<1:17:06,  2.63s/it][A
Train Diffusion:  65%|██████▍   | 3245/5001 [2:28:47<1:16:51,  2.63s/it][A
Train Diffusion:  65%|██████▍   | 3246/5001 [2:28:50<1:16:38,  2.62s/it][A
Train Diffusion:  65%|██████▍   | 3247/5001 [2:28:52<1:16:29,  2.62s/it][A
Train Diffusion:  65%|██████▍   | 3248/5001 [2:28:55<1:16:23,  2.61s/it][A
Train Diffusion:  65%|██████▍   | 3249/5001 [2:28:58<1:16:17,  2.61s/it][A
Train Diffusion:  65%|██████▍   | 3250/5001 [2:29:00<1:16:16,  2.61s/it][A
Train Diffusion:  65%|██████▌   | 3251/5001 [2:29:03<1:16:24,  2.62s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 330786435.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7762, 0.5034, 1.2717],
        [8.8805, 0.4965, 1.3022],
        [8.6185, 0.4839, 1.3008]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.5953,  0.8160,  4.8071],
         [ 1.0214,  0.6897,  1.1640],
         [ 0.6059,  0.7095,  1.3265],
         ...,
         [ 0.8541,  4.9457,  1.1297],
         [ 0.0731,  0.7817,  1.4082],
         [12.5030,  2.4596,  5.0088]],

        [[ 0.7294,  0.9577,  0.8347],
         [ 0.6605,  0.7017,  1.9322],
         [27.7762,  0.2192,  1.2007],
         ...,
         [ 0.7811,  0.0713,  0.8660],
         [ 5.4301,  0.0618,  1.6534],
         [ 0.5697,  1.8325,  2.2671]],

        [[ 0.5784,  0.9504,  0.8257],
         [12.0610,  0.7445,  0.7667],
         [ 1.9052,  0.4242,  0.7351],
         ...,
         [ 0.2575,  0.5271,  0.5935],
         [32.0034,  0.2102,  0.9296],
         [ 5.8317,  0.2032,  1.2250


Train Diffusion:  65%|██████▌   | 3252/5001 [2:29:05<1:16:18,  2.62s/it][A
Train Diffusion:  65%|██████▌   | 3253/5001 [2:29:08<1:16:07,  2.61s/it][A
Train Diffusion:  65%|██████▌   | 3254/5001 [2:29:11<1:16:06,  2.61s/it][A
Train Diffusion:  65%|██████▌   | 3255/5001 [2:29:13<1:15:51,  2.61s/it][A
Train Diffusion:  65%|██████▌   | 3256/5001 [2:29:16<1:15:50,  2.61s/it][A
Train Diffusion:  65%|██████▌   | 3257/5001 [2:29:18<1:15:45,  2.61s/it][A
Train Diffusion:  65%|██████▌   | 3258/5001 [2:29:21<1:15:46,  2.61s/it][A
Train Diffusion:  65%|██████▌   | 3259/5001 [2:29:24<1:15:45,  2.61s/it][A
Train Diffusion:  65%|██████▌   | 3260/5001 [2:29:26<1:15:54,  2.62s/it][A
Train Diffusion:  65%|██████▌   | 3261/5001 [2:29:29<1:15:55,  2.62s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 316385504.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8877, 0.4814, 1.2775],
        [8.6140, 0.4833, 1.3060],
        [8.6829, 0.4935, 1.2916]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.0283e-01, 9.5148e-01, 9.0817e-01],
         [1.2484e+01, 6.1149e-01, 9.0006e-01],
         [1.9696e+00, 4.0618e-01, 7.3828e-01],
         ...,
         [1.3756e+00, 2.8761e-01, 9.5518e-01],
         [1.4085e-06, 4.0478e-01, 2.4082e+00],
         [2.2629e+01, 1.4685e+00, 1.8793e+00]],

        [[8.8042e-01, 9.5913e-01, 7.8477e-01],
         [8.2954e-01, 6.0583e-01, 1.9802e+00],
         [2.7109e+01, 2.4158e-01, 1.2055e+00],
         ...,
         [3.1100e+01, 5.5186e-01, 3.7775e-01],
         [2.4381e+00, 8.9475e-01, 7.8873e-03],
         [2.9751e+00, 4.8955e-01, 7.0093e-01]],

        [[1.5211e+00, 8.2813e-01, 4.8267e+00],
         [7.8590e-01, 8.8981e-01, 1.3538e+00],
         [5.9812e-01, 7.3741e-01, 1.2


Train Diffusion:  65%|██████▌   | 3262/5001 [2:29:31<1:15:41,  2.61s/it][A
Train Diffusion:  65%|██████▌   | 3263/5001 [2:29:34<1:16:30,  2.64s/it][A
Train Diffusion:  65%|██████▌   | 3264/5001 [2:29:37<1:16:05,  2.63s/it][A
Train Diffusion:  65%|██████▌   | 3265/5001 [2:29:39<1:16:01,  2.63s/it][A
Train Diffusion:  65%|██████▌   | 3266/5001 [2:29:42<1:15:43,  2.62s/it][A
Train Diffusion:  65%|██████▌   | 3267/5001 [2:29:45<1:15:35,  2.62s/it][A
Train Diffusion:  65%|██████▌   | 3268/5001 [2:29:47<1:15:33,  2.62s/it][A
Train Diffusion:  65%|██████▌   | 3269/5001 [2:29:50<1:15:26,  2.61s/it][A
Train Diffusion:  65%|██████▌   | 3270/5001 [2:29:52<1:15:25,  2.61s/it][A
Train Diffusion:  65%|██████▌   | 3271/5001 [2:29:55<1:15:16,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 330907014.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7366, 0.5055, 1.2991],
        [9.0280, 0.4661, 1.3092],
        [8.6217, 0.4906, 1.3183]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.8416e-01, 9.5258e-01, 8.5313e-01],
         [1.1902e+01, 6.9834e-01, 8.5641e-01],
         [1.8842e+00, 4.3886e-01, 7.6425e-01],
         ...,
         [4.0919e-01, 7.8276e+00, 7.4389e-01],
         [9.9478e+00, 5.4844e-01, 3.9663e-01],
         [4.0284e+00, 5.7371e-01, 8.2285e-01]],

        [[1.6008e+00, 8.1791e-01, 4.6764e+00],
         [1.0066e+00, 6.6160e-01, 1.0681e+00],
         [6.8069e-01, 7.0710e-01, 1.2474e+00],
         ...,
         [3.0334e+01, 8.3645e-02, 1.2535e+00],
         [5.8745e+00, 3.5294e-02, 7.8062e+00],
         [9.8224e-01, 5.4210e-01, 1.2382e+00]],

        [[7.2031e-01, 9.5940e-01, 8.4214e-01],
         [6.5836e-01, 7.0179e-01, 1.9180e+00],
         [2.8202e+01, 1.7438e-01, 1.1


Train Diffusion:  65%|██████▌   | 3272/5001 [2:29:58<1:15:05,  2.61s/it][A
Train Diffusion:  65%|██████▌   | 3273/5001 [2:30:00<1:15:14,  2.61s/it][A
Train Diffusion:  65%|██████▌   | 3274/5001 [2:30:03<1:15:30,  2.62s/it][A
Train Diffusion:  65%|██████▌   | 3275/5001 [2:30:06<1:15:25,  2.62s/it][A
Train Diffusion:  66%|██████▌   | 3276/5001 [2:30:08<1:15:12,  2.62s/it][A
Train Diffusion:  66%|██████▌   | 3277/5001 [2:30:11<1:15:24,  2.62s/it][A
Train Diffusion:  66%|██████▌   | 3278/5001 [2:30:14<1:15:54,  2.64s/it][A
Train Diffusion:  66%|██████▌   | 3279/5001 [2:30:16<1:15:35,  2.63s/it][A
Train Diffusion:  66%|██████▌   | 3280/5001 [2:30:19<1:15:11,  2.62s/it][A
Train Diffusion:  66%|██████▌   | 3281/5001 [2:30:21<1:14:56,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 340317926.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6958, 0.5184, 1.3113],
        [8.7850, 0.5051, 1.3124],
        [8.5390, 0.5109, 1.2875]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.8420,  0.9603,  0.7910],
         [ 0.8180,  0.5917,  1.8931],
         [28.4963,  0.1511,  1.1953],
         ...,
         [ 2.7195,  0.1930,  0.8319],
         [ 0.4962, 14.4080,  1.4396],
         [ 8.4908,  2.5011,  2.0579]],

        [[ 1.5447,  0.8254,  5.2324],
         [ 0.8148,  0.8925,  1.2170],
         [ 0.8327,  0.6846,  1.2434],
         ...,
         [ 0.5407,  0.6057,  0.5898],
         [27.6076,  0.5375,  1.1467],
         [ 3.4107,  1.1230,  1.2252]],

        [[ 0.5177,  0.9504,  0.9149],
         [10.9286,  0.5930,  0.8893],
         [ 1.7717,  0.4680,  0.7141],
         ...,
         [35.4501,  0.1061,  1.1740],
         [ 5.3407,  0.1628,  0.6359],
         [ 0.4296,  1.7793,  0.8411


Train Diffusion:  66%|██████▌   | 3282/5001 [2:30:24<1:15:19,  2.63s/it][A
Train Diffusion:  66%|██████▌   | 3283/5001 [2:30:27<1:15:07,  2.62s/it][A
Train Diffusion:  66%|██████▌   | 3284/5001 [2:30:29<1:15:16,  2.63s/it][A
Train Diffusion:  66%|██████▌   | 3285/5001 [2:30:32<1:15:06,  2.63s/it][A
Train Diffusion:  66%|██████▌   | 3286/5001 [2:30:35<1:18:08,  2.73s/it][A
Train Diffusion:  66%|██████▌   | 3287/5001 [2:30:38<1:18:41,  2.75s/it][A
Train Diffusion:  66%|██████▌   | 3288/5001 [2:30:40<1:17:41,  2.72s/it][A
Train Diffusion:  66%|██████▌   | 3289/5001 [2:30:43<1:16:51,  2.69s/it][A
Train Diffusion:  66%|██████▌   | 3290/5001 [2:30:46<1:16:03,  2.67s/it][A
Train Diffusion:  66%|██████▌   | 3291/5001 [2:30:48<1:15:32,  2.65s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 327154406.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6655, 0.4830, 1.3038],
        [8.9691, 0.4873, 1.2827],
        [8.7849, 0.4872, 1.2849]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5266e-01, 9.5439e-01, 9.8383e-01],
         [4.9831e+00, 5.9955e-01, 1.0846e+00],
         [1.8397e+00, 3.2192e+00, 4.6418e-01],
         ...,
         [2.2830e+00, 8.2912e-01, 1.2945e+00],
         [1.0269e+00, 7.9480e-01, 8.7972e-01],
         [1.8337e+00, 1.9939e+00, 9.4560e+00]],

        [[1.3776e+00, 8.5962e-01, 5.8031e-01],
         [1.6261e+00, 3.6857e-01, 7.8728e-01],
         [6.8896e-03, 6.0930e-01, 1.5154e+00],
         ...,
         [3.2900e+00, 1.2026e-01, 9.9913e-01],
         [3.8852e+01, 8.9389e-02, 1.3386e+00],
         [5.9669e+00, 2.2021e-01, 1.7375e+00]],

        [[1.0810e+00, 9.2886e-01, 1.8289e+00],
         [4.9029e+00, 6.8201e-01, 1.2516e+00],
         [1.5289e+00, 6.8076e-01, 6.2


Train Diffusion:  66%|██████▌   | 3292/5001 [2:30:51<1:15:19,  2.64s/it][A
Train Diffusion:  66%|██████▌   | 3293/5001 [2:30:53<1:15:07,  2.64s/it][A
Train Diffusion:  66%|██████▌   | 3294/5001 [2:30:56<1:14:53,  2.63s/it][A
Train Diffusion:  66%|██████▌   | 3295/5001 [2:30:59<1:14:43,  2.63s/it][A
Train Diffusion:  66%|██████▌   | 3296/5001 [2:31:01<1:14:31,  2.62s/it][A
Train Diffusion:  66%|██████▌   | 3297/5001 [2:31:04<1:14:30,  2.62s/it][A
Train Diffusion:  66%|██████▌   | 3298/5001 [2:31:06<1:14:19,  2.62s/it][A
Train Diffusion:  66%|██████▌   | 3299/5001 [2:31:09<1:14:30,  2.63s/it][A
Train Diffusion:  66%|██████▌   | 3300/5001 [2:31:12<1:14:14,  2.62s/it][A
Train Diffusion:  66%|██████▌   | 3301/5001 [2:31:14<1:14:13,  2.62s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 339157616.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6911, 0.4753, 1.2834],
        [8.8562, 0.5000, 1.2905],
        [8.6812, 0.5245, 1.2893]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[9.1666e-01, 9.5528e-01, 7.8275e-01],
         [8.8258e-01, 5.7587e-01, 1.7981e+00],
         [1.2224e-01, 4.8137e-01, 3.7110e+00],
         ...,
         [9.9013e-03, 1.2212e+00, 3.2448e+00],
         [2.1613e-01, 4.6529e-01, 8.4414e-01],
         [3.0500e-03, 1.3536e+00, 3.0781e+00]],

        [[4.9045e-01, 9.5122e-01, 9.1435e-01],
         [1.1370e+01, 6.2253e-01, 1.1311e+00],
         [1.6773e+00, 5.7239e-01, 1.2459e+00],
         ...,
         [2.2381e+00, 1.4653e-01, 5.0967e+00],
         [1.3255e+00, 6.7593e-01, 2.0698e+00],
         [4.6155e-01, 1.9262e+00, 9.4877e+00]],

        [[1.4993e+00, 8.3269e-01, 5.0461e+00],
         [8.0696e-01, 9.3212e-01, 1.2636e+00],
         [7.5024e-01, 4.7733e-01, 1.7


Train Diffusion:  66%|██████▌   | 3302/5001 [2:31:17<1:13:57,  2.61s/it][A
Train Diffusion:  66%|██████▌   | 3303/5001 [2:31:20<1:13:55,  2.61s/it][A
Train Diffusion:  66%|██████▌   | 3304/5001 [2:31:22<1:13:48,  2.61s/it][A
Train Diffusion:  66%|██████▌   | 3305/5001 [2:31:25<1:13:42,  2.61s/it][A
Train Diffusion:  66%|██████▌   | 3306/5001 [2:31:27<1:13:43,  2.61s/it][A
Train Diffusion:  66%|██████▌   | 3307/5001 [2:31:30<1:13:37,  2.61s/it][A
Train Diffusion:  66%|██████▌   | 3308/5001 [2:31:33<1:13:56,  2.62s/it][A
Train Diffusion:  66%|██████▌   | 3309/5001 [2:31:35<1:14:04,  2.63s/it][A
Train Diffusion:  66%|██████▌   | 3310/5001 [2:31:38<1:13:52,  2.62s/it][A
Train Diffusion:  66%|██████▌   | 3311/5001 [2:31:40<1:13:47,  2.62s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 315969100.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7081, 0.4829, 1.3197],
        [8.6281, 0.4883, 1.2947],
        [8.8604, 0.4819, 1.3005]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[6.7079e-01, 9.5554e-01, 8.7405e-01],
         [6.1158e-01, 7.5119e-01, 1.8666e+00],
         [2.8471e+01, 8.3926e-02, 1.2049e+00],
         ...,
         [4.3544e+00, 1.1111e-01, 7.8409e-01],
         [4.8495e-01, 1.2301e+00, 5.6692e-01],
         [7.4183e+00, 2.4465e+00, 6.1477e+00]],

        [[6.2578e-01, 9.5354e-01, 8.7862e-01],
         [1.1912e+01, 6.5214e-01, 9.4114e-01],
         [1.9525e+00, 4.5097e-01, 8.0075e-01],
         ...,
         [2.0868e+01, 5.7649e-01, 1.6461e-01],
         [4.5361e+00, 1.2719e-01, 9.1439e-01],
         [4.8576e-03, 1.4915e+00, 3.1926e+00]],

        [[1.6044e+00, 8.0974e-01, 4.0880e+00],
         [1.1554e+00, 5.6763e-01, 9.1957e-01],
         [7.2311e-01, 6.9127e-01, 7.8


Train Diffusion:  66%|██████▌   | 3312/5001 [2:31:43<1:13:35,  2.61s/it][A
Train Diffusion:  66%|██████▌   | 3313/5001 [2:31:46<1:13:32,  2.61s/it][A
Train Diffusion:  66%|██████▋   | 3314/5001 [2:31:48<1:13:25,  2.61s/it][A
Train Diffusion:  66%|██████▋   | 3315/5001 [2:31:51<1:13:22,  2.61s/it][A
Train Diffusion:  66%|██████▋   | 3316/5001 [2:31:54<1:13:31,  2.62s/it][A
Train Diffusion:  66%|██████▋   | 3317/5001 [2:31:56<1:13:26,  2.62s/it][A
Train Diffusion:  66%|██████▋   | 3318/5001 [2:31:59<1:13:14,  2.61s/it][A
Train Diffusion:  66%|██████▋   | 3319/5001 [2:32:01<1:13:08,  2.61s/it][A
Train Diffusion:  66%|██████▋   | 3320/5001 [2:32:04<1:13:07,  2.61s/it][A
Train Diffusion:  66%|██████▋   | 3321/5001 [2:32:07<1:12:59,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 323149104.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.9222, 0.4701, 1.2976],
        [9.0678, 0.4922, 1.3037],
        [8.6079, 0.4804, 1.2689]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.7616,  0.9631,  1.1365],
         [12.9439,  0.3918,  1.2626],
         [ 2.6805,  0.3848,  0.7947],
         ...,
         [ 4.7585,  0.5094,  1.4863],
         [12.0565,  0.6816,  0.0979],
         [ 5.0863,  0.2378,  0.9125]],

        [[ 1.5835,  0.8075,  1.3705],
         [ 1.6686,  0.3828,  0.7503],
         [ 0.6073,  0.7961,  1.0755],
         ...,
         [ 8.5975,  0.2340,  0.3918],
         [ 4.9922,  0.0681,  0.5665],
         [ 1.1651,  2.1893,  0.8838]],

        [[ 0.5585,  0.9534,  0.9351],
         [ 0.5291,  0.8610,  1.5595],
         [26.7250,  0.0842,  1.1925],
         ...,
         [ 3.9894,  0.1186,  0.8598],
         [ 0.6609,  0.5054,  1.5838],
         [15.7839,  1.7820,  2.0423


Train Diffusion:  66%|██████▋   | 3322/5001 [2:32:09<1:12:56,  2.61s/it][A
Train Diffusion:  66%|██████▋   | 3323/5001 [2:32:12<1:12:59,  2.61s/it][A
Train Diffusion:  66%|██████▋   | 3324/5001 [2:32:15<1:15:04,  2.69s/it][A
Train Diffusion:  66%|██████▋   | 3325/5001 [2:32:17<1:15:02,  2.69s/it][A
Train Diffusion:  67%|██████▋   | 3326/5001 [2:32:20<1:14:20,  2.66s/it][A
Train Diffusion:  67%|██████▋   | 3327/5001 [2:32:23<1:13:43,  2.64s/it][A
Train Diffusion:  67%|██████▋   | 3328/5001 [2:32:25<1:13:25,  2.63s/it][A
Train Diffusion:  67%|██████▋   | 3329/5001 [2:32:28<1:13:10,  2.63s/it][A
Train Diffusion:  67%|██████▋   | 3330/5001 [2:32:30<1:12:59,  2.62s/it][A
Train Diffusion:  67%|██████▋   | 3331/5001 [2:32:33<1:12:59,  2.62s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 323991536.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6334, 0.5106, 1.3494],
        [8.7516, 0.4903, 1.2995],
        [8.7957, 0.4750, 1.3145]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.7708e-01, 9.5329e-01, 9.6624e-01],
         [2.4545e+00, 6.7197e-01, 2.1573e+00],
         [1.7893e+00, 2.9595e-01, 4.5713e-01],
         ...,
         [7.4144e-02, 1.4197e+00, 2.6583e+00],
         [3.3424e-01, 4.2196e-01, 9.0307e-01],
         [2.1207e-02, 1.5384e+00, 2.8487e+00]],

        [[9.6283e-01, 9.5084e-01, 1.4831e+00],
         [6.4409e+00, 6.3099e-01, 1.3323e+00],
         [1.4709e+00, 5.4586e-01, 1.3082e+00],
         ...,
         [9.6883e-01, 5.3016e+00, 9.9951e-01],
         [1.5568e-01, 6.0148e-01, 1.9482e+00],
         [5.7153e+00, 2.0008e+00, 3.8348e+00]],

        [[1.4694e+00, 8.3994e-01, 5.2795e-01],
         [1.7095e+00, 3.6475e-01, 4.0986e-01],
         [6.7281e-05, 1.6938e+00, 5.1


Train Diffusion:  67%|██████▋   | 3332/5001 [2:32:37<1:21:45,  2.94s/it][A
Train Diffusion:  67%|██████▋   | 3333/5001 [2:32:39<1:20:23,  2.89s/it][A
Train Diffusion:  67%|██████▋   | 3334/5001 [2:32:42<1:17:57,  2.81s/it][A
Train Diffusion:  67%|██████▋   | 3335/5001 [2:32:45<1:16:05,  2.74s/it][A
Train Diffusion:  67%|██████▋   | 3336/5001 [2:32:47<1:14:47,  2.69s/it][A
Train Diffusion:  67%|██████▋   | 3337/5001 [2:32:50<1:13:48,  2.66s/it][A
Train Diffusion:  67%|██████▋   | 3338/5001 [2:32:52<1:13:18,  2.65s/it][A
Train Diffusion:  67%|██████▋   | 3339/5001 [2:32:55<1:12:44,  2.63s/it][A
Train Diffusion:  67%|██████▋   | 3340/5001 [2:32:58<1:12:25,  2.62s/it][A
Train Diffusion:  67%|██████▋   | 3341/5001 [2:33:00<1:12:08,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 337463718.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.5464, 0.4930, 1.3092],
        [8.8896, 0.5103, 1.2989],
        [8.6578, 0.4977, 1.2724]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.5468e+00, 8.1968e-01, 1.0189e+00],
         [1.7920e+00, 3.3682e-01, 9.8356e-01],
         [1.0405e+01, 4.5462e-01, 1.1694e+00],
         ...,
         [4.1534e+00, 1.5176e-01, 6.7941e-01],
         [9.9604e+00, 2.4641e-01, 1.0250e+00],
         [2.9676e+00, 1.1807e+00, 1.2535e+00]],

        [[5.2143e-01, 9.5168e-01, 9.2730e-01],
         [5.3279e-01, 8.9063e-01, 5.4952e-01],
         [5.4313e+00, 6.0644e-01, 8.3671e-01],
         ...,
         [2.8998e+01, 5.8374e-02, 1.2796e+00],
         [5.8191e+00, 3.9116e-02, 1.2660e+01],
         [1.0129e-01, 3.1218e+00, 2.7992e+00]],

        [[8.3560e-01, 9.6160e-01, 1.2341e+00],
         [1.2051e+01, 3.3280e-01, 1.2858e+00],
         [2.9352e+00, 3.9569e-01, 7.2


Train Diffusion:  67%|██████▋   | 3342/5001 [2:33:03<1:11:58,  2.60s/it][A
Train Diffusion:  67%|██████▋   | 3343/5001 [2:33:05<1:12:00,  2.61s/it][A
Train Diffusion:  67%|██████▋   | 3344/5001 [2:33:08<1:11:58,  2.61s/it][A
Train Diffusion:  67%|██████▋   | 3345/5001 [2:33:11<1:11:49,  2.60s/it][A
Train Diffusion:  67%|██████▋   | 3346/5001 [2:33:13<1:11:45,  2.60s/it][A
Train Diffusion:  67%|██████▋   | 3347/5001 [2:33:16<1:11:36,  2.60s/it][A
Train Diffusion:  67%|██████▋   | 3348/5001 [2:33:18<1:11:36,  2.60s/it][A
Train Diffusion:  67%|██████▋   | 3349/5001 [2:33:21<1:11:29,  2.60s/it][A
Train Diffusion:  67%|██████▋   | 3350/5001 [2:33:24<1:11:32,  2.60s/it][A
Train Diffusion:  67%|██████▋   | 3351/5001 [2:33:26<1:11:27,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 335170102.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8205, 0.4887, 1.3285],
        [8.6544, 0.4981, 1.3307],
        [8.5929, 0.5052, 1.3320]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.7697,  0.9606,  1.1586],
         [13.1502,  0.3451,  1.2766],
         [ 2.9069,  0.3457,  1.1216],
         ...,
         [ 1.1441,  0.3599,  0.7849],
         [ 0.5008,  0.7194,  0.9052],
         [22.4583,  1.4224,  1.8091]],

        [[ 1.5805,  0.8064,  0.9495],
         [ 1.7490,  0.3611,  0.6786],
         [ 0.2567,  0.8169,  1.3165],
         ...,
         [ 2.7487,  0.2469,  0.9023],
         [21.2570,  0.2815,  1.0989],
         [ 3.9878,  0.6410,  5.1525]],

        [[ 0.5545,  0.9509,  0.9247],
         [ 0.5063,  0.8091,  1.3988],
         [24.2905,  0.2294,  1.1117],
         ...,
         [36.5412,  0.0763,  1.1787],
         [ 5.8440,  0.0434,  4.8431],
         [ 1.6259,  1.0355,  1.4070


Train Diffusion:  67%|██████▋   | 3352/5001 [2:33:29<1:11:36,  2.61s/it][A
Train Diffusion:  67%|██████▋   | 3353/5001 [2:33:31<1:11:24,  2.60s/it][A
Train Diffusion:  67%|██████▋   | 3354/5001 [2:33:34<1:11:45,  2.61s/it][A
Train Diffusion:  67%|██████▋   | 3355/5001 [2:33:37<1:11:35,  2.61s/it][A
Train Diffusion:  67%|██████▋   | 3356/5001 [2:33:39<1:11:25,  2.60s/it][A
Train Diffusion:  67%|██████▋   | 3357/5001 [2:33:42<1:11:11,  2.60s/it][A
Train Diffusion:  67%|██████▋   | 3358/5001 [2:33:44<1:11:13,  2.60s/it][A
Train Diffusion:  67%|██████▋   | 3359/5001 [2:33:47<1:11:04,  2.60s/it][A
Train Diffusion:  67%|██████▋   | 3360/5001 [2:33:50<1:11:00,  2.60s/it][A
Train Diffusion:  67%|██████▋   | 3361/5001 [2:33:52<1:10:53,  2.59s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 316190438.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6584, 0.5171, 1.3245],
        [8.8060, 0.4974, 1.2942],
        [8.6516, 0.5042, 1.3235]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.4680,  0.9520,  0.9535],
         [ 6.8839,  0.6301,  2.1554],
         [ 1.3674,  0.5906,  0.9575],
         ...,
         [ 4.1712,  0.1063,  1.6516],
         [26.0514,  0.1549,  1.3029],
         [ 5.0334,  0.6328,  0.7856]],

        [[ 1.0032,  0.9422,  1.5655],
         [ 3.2350,  0.7711,  1.4680],
         [ 0.8889,  0.9590,  0.7012],
         ...,
         [29.9165,  0.1413,  1.0740],
         [ 4.7589,  0.0812,  3.0116],
         [ 1.1017,  1.4434,  1.2764]],

        [[ 1.4377,  0.8435,  0.5669],
         [ 1.7420,  0.3402,  1.5169],
         [24.1058,  0.3075,  1.1808],
         ...,
         [ 0.0841,  0.7241,  2.7444],
         [ 0.1899,  0.6926,  0.6151],
         [26.7340,  1.3539,  1.8949


Train Diffusion:  67%|██████▋   | 3362/5001 [2:33:55<1:10:50,  2.59s/it][A
Train Diffusion:  67%|██████▋   | 3363/5001 [2:33:57<1:10:56,  2.60s/it][A
Train Diffusion:  67%|██████▋   | 3364/5001 [2:34:00<1:10:44,  2.59s/it][A
Train Diffusion:  67%|██████▋   | 3365/5001 [2:34:03<1:10:49,  2.60s/it][A
Train Diffusion:  67%|██████▋   | 3366/5001 [2:34:05<1:10:42,  2.59s/it][A
Train Diffusion:  67%|██████▋   | 3367/5001 [2:34:08<1:10:46,  2.60s/it][A
Train Diffusion:  67%|██████▋   | 3368/5001 [2:34:10<1:10:57,  2.61s/it][A
Train Diffusion:  67%|██████▋   | 3369/5001 [2:34:13<1:10:40,  2.60s/it][A
Train Diffusion:  67%|██████▋   | 3370/5001 [2:34:16<1:10:37,  2.60s/it][A
Train Diffusion:  67%|██████▋   | 3371/5001 [2:34:18<1:10:49,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 328082073.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7453, 0.4890, 1.2855],
        [8.9449, 0.4934, 1.2901],
        [8.7024, 0.4975, 1.3029]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.5201,  0.9521,  0.9273],
         [ 0.3875,  0.9601,  0.9885],
         [20.5181,  0.2918,  1.0592],
         ...,
         [ 4.0595,  0.1543,  0.8234],
         [ 0.5223,  1.3691,  0.3136],
         [ 0.2463,  4.9644,  0.4052]],

        [[ 1.5482,  0.8187,  1.1906],
         [ 1.7899,  0.3370,  0.7135],
         [ 0.7360,  0.8108,  1.2913],
         ...,
         [33.1396,  0.0904,  1.2319],
         [ 5.4616,  0.1116,  0.8463],
         [ 1.6832,  1.1994,  1.1876]],

        [[ 0.8372,  0.9620,  1.2275],
         [12.0255,  0.3282,  1.2824],
         [ 2.9457,  0.3643,  0.8780],
         ...,
         [ 0.5087,  0.7179,  0.9116],
         [25.6028,  0.1397,  1.2453],
         [ 5.0272,  0.2476,  8.3214


Train Diffusion:  67%|██████▋   | 3372/5001 [2:34:21<1:11:12,  2.62s/it][A
Train Diffusion:  67%|██████▋   | 3373/5001 [2:34:24<1:11:49,  2.65s/it][A
Train Diffusion:  67%|██████▋   | 3374/5001 [2:34:26<1:11:33,  2.64s/it][A
Train Diffusion:  67%|██████▋   | 3375/5001 [2:34:29<1:11:24,  2.63s/it][A
Train Diffusion:  68%|██████▊   | 3376/5001 [2:34:31<1:10:59,  2.62s/it][A
Train Diffusion:  68%|██████▊   | 3377/5001 [2:34:34<1:10:52,  2.62s/it][A
Train Diffusion:  68%|██████▊   | 3378/5001 [2:34:37<1:10:44,  2.61s/it][A
Train Diffusion:  68%|██████▊   | 3379/5001 [2:34:40<1:13:46,  2.73s/it][A
Train Diffusion:  68%|██████▊   | 3380/5001 [2:34:42<1:13:05,  2.71s/it][A
Train Diffusion:  68%|██████▊   | 3381/5001 [2:34:45<1:12:22,  2.68s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 336867427.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8017, 0.4850, 1.3017],
        [8.8056, 0.4972, 1.2918],
        [8.7734, 0.4901, 1.3021]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.4887,  0.8380,  4.5624],
         [ 0.7788,  0.9527,  1.3042],
         [ 0.6307,  0.8782,  0.8961],
         ...,
         [20.3827,  0.3666,  0.8592],
         [ 3.1775,  0.3093,  0.9403],
         [ 0.3548,  1.4719,  8.9688]],

        [[ 0.4834,  0.9544,  0.9461],
         [12.5818,  0.5738,  1.1617],
         [ 1.8997,  0.4769,  0.8381],
         ...,
         [ 3.8708,  0.0948,  1.0957],
         [31.4700,  0.1389,  1.2624],
         [ 4.9444,  0.6391,  0.9857]],

        [[ 0.9365,  0.9560,  0.7873],
         [ 0.8925,  0.5864,  1.8792],
         [26.0091,  0.2766,  1.1958],
         ...,
         [ 0.1168,  0.5782,  3.0194],
         [ 0.2979,  1.0629, 10.2531],
         [ 3.3624,  2.6528,  2.1043


Train Diffusion:  68%|██████▊   | 3382/5001 [2:34:47<1:11:44,  2.66s/it][A
Train Diffusion:  68%|██████▊   | 3383/5001 [2:34:50<1:11:14,  2.64s/it][A
Train Diffusion:  68%|██████▊   | 3384/5001 [2:34:53<1:10:52,  2.63s/it][A
Train Diffusion:  68%|██████▊   | 3385/5001 [2:34:55<1:10:36,  2.62s/it][A
Train Diffusion:  68%|██████▊   | 3386/5001 [2:34:58<1:10:21,  2.61s/it][A
Train Diffusion:  68%|██████▊   | 3387/5001 [2:35:01<1:10:15,  2.61s/it][A
Train Diffusion:  68%|██████▊   | 3388/5001 [2:35:03<1:10:02,  2.61s/it][A
Train Diffusion:  68%|██████▊   | 3389/5001 [2:35:06<1:10:03,  2.61s/it][A
Train Diffusion:  68%|██████▊   | 3390/5001 [2:35:08<1:09:52,  2.60s/it][A
Train Diffusion:  68%|██████▊   | 3391/5001 [2:35:11<1:09:47,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 320933987.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7798, 0.4986, 1.3172],
        [8.9377, 0.4921, 1.3071],
        [8.5172, 0.5202, 1.3004]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.5594,  0.9519,  0.9336],
         [ 0.5219,  0.8350,  1.5103],
         [25.8959,  0.2191,  1.1459],
         ...,
         [14.0975,  0.2027,  0.5878],
         [ 5.5024,  0.0716,  0.6349],
         [23.1716,  1.4927,  1.9336]],

        [[ 1.5842,  0.8062,  1.1796],
         [ 1.6877,  0.3779,  0.7134],
         [ 0.4614,  0.7990,  1.1351],
         ...,
         [ 0.1748,  0.6957,  0.5342],
         [28.4752,  0.2417,  0.4410],
         [ 5.6012,  0.2949,  0.7376]],

        [[ 0.7608,  0.9616,  1.1474],
         [13.0879,  0.3659,  1.2725],
         [ 2.7970,  0.3620,  1.1512],
         ...,
         [ 1.4246,  3.9171,  1.6388],
         [ 0.6057,  0.6918,  1.1005],
         [ 0.4985,  2.4562,  1.3190


Train Diffusion:  68%|██████▊   | 3392/5001 [2:35:13<1:09:42,  2.60s/it][A
Train Diffusion:  68%|██████▊   | 3393/5001 [2:35:16<1:09:33,  2.60s/it][A
Train Diffusion:  68%|██████▊   | 3394/5001 [2:35:19<1:09:35,  2.60s/it][A
Train Diffusion:  68%|██████▊   | 3395/5001 [2:35:21<1:09:38,  2.60s/it][A
Train Diffusion:  68%|██████▊   | 3396/5001 [2:35:24<1:09:28,  2.60s/it][A
Train Diffusion:  68%|██████▊   | 3397/5001 [2:35:26<1:09:34,  2.60s/it][A
Train Diffusion:  68%|██████▊   | 3398/5001 [2:35:29<1:09:53,  2.62s/it][A
Train Diffusion:  68%|██████▊   | 3399/5001 [2:35:32<1:10:08,  2.63s/it][A
Train Diffusion:  68%|██████▊   | 3400/5001 [2:35:34<1:10:15,  2.63s/it][A
Train Diffusion:  68%|██████▊   | 3401/5001 [2:35:37<1:09:58,  2.62s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 316374585.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6868, 0.4868, 1.3264],
        [8.7674, 0.4798, 1.2816],
        [8.8795, 0.4852, 1.2953]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.6692,  0.9579,  0.8778],
         [ 0.6107,  0.7575,  1.9295],
         [27.7136,  0.2167,  1.2008],
         ...,
         [ 2.5283,  0.4754,  1.0354],
         [ 3.3635,  0.1839,  0.8477],
         [ 0.7206,  1.5651,  0.9667]],

        [[ 0.6263,  0.9560,  0.8749],
         [12.4788,  0.6537,  0.9035],
         [ 2.0854,  0.4186,  0.7262],
         ...,
         [ 2.9732,  0.1443,  1.0416],
         [36.4355,  0.1162,  1.2225],
         [ 5.5085,  0.5162,  0.9480]],

        [[ 1.6078,  0.8109,  4.1099],
         [ 1.1728,  0.6205,  1.0036],
         [ 0.6346,  0.7137,  1.3518],
         ...,
         [ 0.2646,  0.3400,  3.0020],
         [ 0.3170,  0.5250,  0.8217],
         [26.5925,  1.1783,  2.0392


Train Diffusion:  68%|██████▊   | 3402/5001 [2:35:40<1:09:51,  2.62s/it][A
Train Diffusion:  68%|██████▊   | 3403/5001 [2:35:42<1:10:01,  2.63s/it][A
Train Diffusion:  68%|██████▊   | 3404/5001 [2:35:45<1:10:02,  2.63s/it][A
Train Diffusion:  68%|██████▊   | 3405/5001 [2:35:47<1:09:16,  2.60s/it][A
Train Diffusion:  68%|██████▊   | 3406/5001 [2:35:50<1:08:51,  2.59s/it][A
Train Diffusion:  68%|██████▊   | 3407/5001 [2:35:53<1:08:46,  2.59s/it][A
Train Diffusion:  68%|██████▊   | 3408/5001 [2:35:55<1:08:21,  2.57s/it][A
Train Diffusion:  68%|██████▊   | 3409/5001 [2:35:58<1:08:05,  2.57s/it][A
Train Diffusion:  68%|██████▊   | 3410/5001 [2:36:00<1:07:45,  2.56s/it][A
Train Diffusion:  68%|██████▊   | 3411/5001 [2:36:03<1:07:41,  2.55s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 328635548.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6978, 0.4886, 1.2765],
        [8.8843, 0.5020, 1.2734],
        [8.8904, 0.4896, 1.3169]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.8934,  0.9605,  0.7843],
         [ 0.8400,  0.5965,  1.8496],
         [27.3451,  0.2272,  1.1981],
         ...,
         [ 2.7849,  0.2948,  0.8560],
         [ 0.5227,  0.5552,  0.8059],
         [24.3390,  1.3665,  2.0440]],

        [[ 0.4977,  0.9537,  0.8295],
         [11.3757,  0.7833,  1.0092],
         [ 1.6164,  0.4625,  0.8455],
         ...,
         [ 0.4549,  0.5429,  1.0272],
         [37.4564,  0.0933,  1.2751],
         [ 5.8409,  0.4330,  1.0667]],

        [[ 1.5163,  0.8315,  5.3684],
         [ 0.7907,  0.8232,  1.3624],
         [ 0.6751,  0.7827,  1.0483],
         ...,
         [33.7721,  0.1172,  1.1924],
         [ 5.1549,  0.1473,  0.6880],
         [ 0.5807,  1.8185,  0.8739


Train Diffusion:  68%|██████▊   | 3412/5001 [2:36:05<1:07:32,  2.55s/it][A
Train Diffusion:  68%|██████▊   | 3413/5001 [2:36:08<1:07:22,  2.55s/it][A
Train Diffusion:  68%|██████▊   | 3414/5001 [2:36:10<1:07:13,  2.54s/it][A
Train Diffusion:  68%|██████▊   | 3415/5001 [2:36:13<1:07:12,  2.54s/it][A
Train Diffusion:  68%|██████▊   | 3416/5001 [2:36:15<1:07:08,  2.54s/it][A
Train Diffusion:  68%|██████▊   | 3417/5001 [2:36:18<1:07:03,  2.54s/it][A
Train Diffusion:  68%|██████▊   | 3418/5001 [2:36:21<1:07:04,  2.54s/it][A
Train Diffusion:  68%|██████▊   | 3419/5001 [2:36:23<1:07:40,  2.57s/it][A
Train Diffusion:  68%|██████▊   | 3420/5001 [2:36:26<1:07:23,  2.56s/it][A
Train Diffusion:  68%|██████▊   | 3421/5001 [2:36:28<1:07:16,  2.55s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 334581856.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7508, 0.4806, 1.3236],
        [8.7916, 0.5035, 1.3188],
        [8.7253, 0.4939, 1.2978]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.6044e+00, 8.1452e-01, 4.3959e+00],
         [1.0587e+00, 6.3870e-01, 1.0062e+00],
         [6.8908e-01, 7.2037e-01, 1.2603e+00],
         ...,
         [3.5533e+01, 1.6999e-01, 1.0713e+00],
         [4.4715e+00, 5.9327e-02, 2.1045e-02],
         [4.4662e+00, 2.1374e-01, 1.1102e+00]],

        [[6.9567e-01, 9.5712e-01, 8.5509e-01],
         [6.3713e-01, 7.1909e-01, 1.9098e+00],
         [2.7879e+01, 1.9456e-01, 1.2010e+00],
         ...,
         [3.2563e+00, 1.6942e-01, 1.1514e+00],
         [1.5402e-03, 4.7953e-01, 2.3660e+00],
         [1.5504e+01, 1.8663e+00, 1.9248e+00]],

        [[6.0370e-01, 9.5297e-01, 8.7091e-01],
         [1.2046e+01, 6.6248e-01, 9.3188e-01],
         [1.9433e+00, 4.3058e-01, 7.5


Train Diffusion:  68%|██████▊   | 3422/5001 [2:36:31<1:07:18,  2.56s/it][A
Train Diffusion:  68%|██████▊   | 3423/5001 [2:36:33<1:07:15,  2.56s/it][A
Train Diffusion:  68%|██████▊   | 3424/5001 [2:36:36<1:07:04,  2.55s/it][A
Train Diffusion:  68%|██████▊   | 3425/5001 [2:36:38<1:06:53,  2.55s/it][A
Train Diffusion:  69%|██████▊   | 3426/5001 [2:36:41<1:07:13,  2.56s/it][A
Train Diffusion:  69%|██████▊   | 3427/5001 [2:36:44<1:09:20,  2.64s/it][A
Train Diffusion:  69%|██████▊   | 3428/5001 [2:36:46<1:08:23,  2.61s/it][A
Train Diffusion:  69%|██████▊   | 3429/5001 [2:36:49<1:07:45,  2.59s/it][A
Train Diffusion:  69%|██████▊   | 3430/5001 [2:36:52<1:07:22,  2.57s/it][A
Train Diffusion:  69%|██████▊   | 3431/5001 [2:36:54<1:07:05,  2.56s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 316215520.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8037, 0.4801, 1.2778],
        [8.7159, 0.4890, 1.2999],
        [8.6740, 0.4997, 1.3005]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.6008,  0.9520,  0.8412],
         [11.7275,  0.7175,  0.8845],
         [ 1.8606,  0.4399,  0.7783],
         ...,
         [36.3224,  0.3574,  0.3827],
         [ 2.5105,  2.3568,  0.2989],
         [ 0.4841,  0.9762,  1.2029]],

        [[ 0.7019,  0.9566,  0.8516],
         [ 0.6344,  0.7359,  1.8602],
         [28.2890,  0.1386,  1.2017],
         ...,
         [ 1.8307,  0.2251,  0.9469],
         [ 4.2001,  0.4228,  0.9474],
         [ 8.9864,  3.9045,  5.7726]],

        [[ 1.5995,  0.8112,  4.4707],
         [ 1.1165,  0.5893,  0.9826],
         [ 0.7006,  0.7105,  0.9268],
         ...,
         [ 1.5916,  0.4303,  0.7899],
         [ 0.6858,  0.8856,  0.9391],
         [ 1.2466,  2.5743,  0.7793


Train Diffusion:  69%|██████▊   | 3432/5001 [2:36:57<1:06:48,  2.55s/it][A
Train Diffusion:  69%|██████▊   | 3433/5001 [2:36:59<1:06:31,  2.55s/it][A
Train Diffusion:  69%|██████▊   | 3434/5001 [2:37:02<1:06:29,  2.55s/it][A
Train Diffusion:  69%|██████▊   | 3435/5001 [2:37:04<1:06:25,  2.55s/it][A
Train Diffusion:  69%|██████▊   | 3436/5001 [2:37:07<1:06:12,  2.54s/it][A
Train Diffusion:  69%|██████▊   | 3437/5001 [2:37:09<1:06:16,  2.54s/it][A
Train Diffusion:  69%|██████▊   | 3438/5001 [2:37:12<1:06:11,  2.54s/it][A
Train Diffusion:  69%|██████▉   | 3439/5001 [2:37:14<1:06:06,  2.54s/it][A
Train Diffusion:  69%|██████▉   | 3440/5001 [2:37:17<1:06:07,  2.54s/it][A
Train Diffusion:  69%|██████▉   | 3441/5001 [2:37:19<1:06:03,  2.54s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 325407072.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.9732, 0.4790, 1.3030],
        [8.9613, 0.4891, 1.2649],
        [8.5711, 0.5032, 1.2898]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.7168,  0.9589,  1.0572],
         [12.6728,  0.4652,  1.2020],
         [ 2.3541,  0.4211,  0.6903],
         ...,
         [18.8287,  0.1849,  1.2367],
         [ 4.3039,  0.1748,  0.4234],
         [ 0.9864,  2.2746,  0.8154]],

        [[ 1.5988,  0.8060,  2.2988],
         [ 1.4386,  0.5096,  0.7812],
         [ 0.7166,  0.7095,  1.3695],
         ...,
         [ 0.9790,  0.1682,  0.7390],
         [ 1.8419,  0.3892,  0.6616],
         [17.4241,  1.5209,  2.0440]],

        [[ 0.5882,  0.9525,  0.9351],
         [ 0.5704,  0.8354,  1.7291],
         [27.7482,  0.1803,  1.2005],
         ...,
         [ 0.2082,  0.5670,  0.8837],
         [42.4724,  0.1471,  1.2510],
         [ 6.3962,  0.2102,  0.9323


Train Diffusion:  69%|██████▉   | 3442/5001 [2:37:22<1:06:00,  2.54s/it][A
Train Diffusion:  69%|██████▉   | 3443/5001 [2:37:25<1:05:54,  2.54s/it][A
Train Diffusion:  69%|██████▉   | 3444/5001 [2:37:27<1:05:43,  2.53s/it][A
Train Diffusion:  69%|██████▉   | 3445/5001 [2:37:30<1:05:53,  2.54s/it][A
Train Diffusion:  69%|██████▉   | 3446/5001 [2:37:32<1:05:48,  2.54s/it][A
Train Diffusion:  69%|██████▉   | 3447/5001 [2:37:35<1:07:00,  2.59s/it][A
Train Diffusion:  69%|██████▉   | 3448/5001 [2:37:37<1:06:34,  2.57s/it][A
Train Diffusion:  69%|██████▉   | 3449/5001 [2:37:40<1:06:28,  2.57s/it][A
Train Diffusion:  69%|██████▉   | 3450/5001 [2:37:42<1:06:09,  2.56s/it][A
Train Diffusion:  69%|██████▉   | 3451/5001 [2:37:45<1:06:02,  2.56s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 329503456.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6313, 0.4898, 1.2952],
        [8.8475, 0.4845, 1.3108],
        [8.8313, 0.4917, 1.3242]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.5237,  0.9533,  0.6527],
         [ 9.2700,  1.3481,  5.0499],
         [ 0.9024,  0.9003,  1.2484],
         ...,
         [ 4.1236,  0.1466,  0.7195],
         [ 0.5680,  0.6285,  0.8271],
         [ 0.4796,  1.9592,  2.6785]],

        [[ 1.5513,  0.8233,  5.2887],
         [ 1.0209,  0.0850,  0.9807],
         [ 1.1623,  0.6316,  0.7595],
         ...,
         [24.6904,  0.3333,  0.5600],
         [ 4.9368,  0.1066,  0.5975],
         [ 0.6073,  2.5202,  0.5332]],

        [[ 0.8306,  0.9631,  0.7930],
         [ 0.7197,  0.6685,  0.9433],
         [15.8890,  0.3827,  1.2004],
         ...,
         [ 0.1388, 16.7763,  1.0630],
         [28.6141,  0.1071,  1.4172],
         [ 6.0681,  0.1693,  5.9310


Train Diffusion:  69%|██████▉   | 3452/5001 [2:37:48<1:05:57,  2.55s/it][A
Train Diffusion:  69%|██████▉   | 3453/5001 [2:37:50<1:05:45,  2.55s/it][A
Train Diffusion:  69%|██████▉   | 3454/5001 [2:37:53<1:05:42,  2.55s/it][A
Train Diffusion:  69%|██████▉   | 3455/5001 [2:37:55<1:05:38,  2.55s/it][A
Train Diffusion:  69%|██████▉   | 3456/5001 [2:37:58<1:05:26,  2.54s/it][A
Train Diffusion:  69%|██████▉   | 3457/5001 [2:38:00<1:05:19,  2.54s/it][A
Train Diffusion:  69%|██████▉   | 3458/5001 [2:38:03<1:05:18,  2.54s/it][A
Train Diffusion:  69%|██████▉   | 3459/5001 [2:38:05<1:05:20,  2.54s/it][A
Train Diffusion:  69%|██████▉   | 3460/5001 [2:38:08<1:05:14,  2.54s/it][A
Train Diffusion:  69%|██████▉   | 3461/5001 [2:38:10<1:05:11,  2.54s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 324645337.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7263, 0.4994, 1.2995],
        [8.7657, 0.5044, 1.2780],
        [8.7789, 0.4845, 1.2687]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[6.9630e-01, 9.5602e-01, 8.5357e-01],
         [6.3238e-01, 7.2960e-01, 1.8872e+00],
         [2.7535e+01, 2.2789e-01, 1.1977e+00],
         ...,
         [3.9815e+00, 1.6336e-01, 8.0925e-01],
         [5.7273e-01, 5.2813e-01, 6.7021e-01],
         [1.2108e+01, 1.6922e+00, 1.6881e+00]],

        [[1.5985e+00, 8.1145e-01, 4.3543e+00],
         [1.0980e+00, 6.5020e-01, 1.0127e+00],
         [6.6020e-01, 7.5521e-01, 1.1978e+00],
         ...,
         [5.1519e-01, 7.0379e-01, 9.1678e-01],
         [4.6193e+00, 4.8202e-01, 1.8470e+00],
         [4.8193e-01, 1.8588e+00, 2.9274e+00]],

        [[6.0537e-01, 9.5194e-01, 8.6203e-01],
         [1.2137e+01, 6.7323e-01, 9.7092e-01],
         [1.9609e+00, 4.1440e-01, 7.9


Train Diffusion:  69%|██████▉   | 3462/5001 [2:38:13<1:05:10,  2.54s/it][A
Train Diffusion:  69%|██████▉   | 3463/5001 [2:38:15<1:05:04,  2.54s/it][A
Train Diffusion:  69%|██████▉   | 3464/5001 [2:38:18<1:05:04,  2.54s/it][A
Train Diffusion:  69%|██████▉   | 3465/5001 [2:38:21<1:05:05,  2.54s/it][A
Train Diffusion:  69%|██████▉   | 3466/5001 [2:38:23<1:05:34,  2.56s/it][A
Train Diffusion:  69%|██████▉   | 3467/5001 [2:38:26<1:05:26,  2.56s/it][A
Train Diffusion:  69%|██████▉   | 3468/5001 [2:38:28<1:05:10,  2.55s/it][A
Train Diffusion:  69%|██████▉   | 3469/5001 [2:38:31<1:05:08,  2.55s/it][A
Train Diffusion:  69%|██████▉   | 3470/5001 [2:38:33<1:05:13,  2.56s/it][A
Train Diffusion:  69%|██████▉   | 3471/5001 [2:38:36<1:05:03,  2.55s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 336314678.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8215, 0.4956, 1.3210],
        [8.7628, 0.4714, 1.3077],
        [8.7822, 0.5063, 1.3048]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.3133,  0.8798,  4.8169],
         [ 1.2148,  0.9625,  1.6559],
         [ 0.7754,  0.7064,  1.2927],
         ...,
         [ 2.5855,  0.2769,  0.8365],
         [ 0.5053,  0.6067,  0.8196],
         [24.0202,  1.3496,  2.0432]],

        [[ 0.4443,  0.9549,  0.9173],
         [ 9.0501,  0.7195,  0.7497],
         [ 1.5522,  0.4538,  0.7574],
         ...,
         [ 0.3383,  0.5872,  0.9502],
         [37.8578,  0.1055,  1.2654],
         [ 5.7986,  0.3671,  1.1946]],

        [[ 1.1558,  0.9138,  0.9275],
         [ 1.2698,  0.4289,  1.9486],
         [28.3129,  0.2006,  1.1921],
         ...,
         [33.0378,  0.0971,  1.2258],
         [ 5.4297,  0.1203,  0.6551],
         [ 0.6009,  1.9015,  0.7702


Train Diffusion:  69%|██████▉   | 3472/5001 [2:38:38<1:04:48,  2.54s/it][A
Train Diffusion:  69%|██████▉   | 3473/5001 [2:38:41<1:04:52,  2.55s/it][A
Train Diffusion:  69%|██████▉   | 3474/5001 [2:38:44<1:05:48,  2.59s/it][A
Train Diffusion:  69%|██████▉   | 3475/5001 [2:38:46<1:06:23,  2.61s/it][A
Train Diffusion:  70%|██████▉   | 3476/5001 [2:38:49<1:05:44,  2.59s/it][A
Train Diffusion:  70%|██████▉   | 3477/5001 [2:38:51<1:05:23,  2.57s/it][A
Train Diffusion:  70%|██████▉   | 3478/5001 [2:38:54<1:05:02,  2.56s/it][A
Train Diffusion:  70%|██████▉   | 3479/5001 [2:38:57<1:04:49,  2.56s/it][A
Train Diffusion:  70%|██████▉   | 3480/5001 [2:38:59<1:04:35,  2.55s/it][A
Train Diffusion:  70%|██████▉   | 3481/5001 [2:39:02<1:04:28,  2.55s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 315295168.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6496, 0.5104, 1.2973],
        [8.6518, 0.4907, 1.2966],
        [8.9738, 0.4823, 1.3180]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.5209,  0.9526,  0.9733],
         [12.4334,  0.5066,  1.4563],
         [ 1.6531,  0.6062,  1.6936],
         ...,
         [ 0.5898,  2.0555,  0.6289],
         [ 2.1452,  0.6046,  1.1314],
         [ 2.8018,  2.2436,  1.6161]],

        [[ 1.5479,  0.8275,  4.5696],
         [ 0.7647,  1.0284,  1.2049],
         [ 0.6838,  0.9557,  0.7720],
         ...,
         [33.7133,  0.0557,  1.2383],
         [ 6.1466,  0.0670,  0.3765],
         [15.3615,  2.1356,  2.0488]],

        [[ 0.8362,  0.9626,  0.7926],
         [ 0.8138,  0.5961,  0.7101],
         [19.2358,  0.4555,  1.7683],
         ...,
         [ 3.4145,  0.2645,  0.5296],
         [27.4342,  0.3671,  0.4731],
         [ 5.5857,  0.2462,  0.7414


Train Diffusion:  70%|██████▉   | 3482/5001 [2:39:04<1:04:33,  2.55s/it][A
Train Diffusion:  70%|██████▉   | 3483/5001 [2:39:07<1:04:19,  2.54s/it][A
Train Diffusion:  70%|██████▉   | 3484/5001 [2:39:09<1:04:19,  2.54s/it][A
Train Diffusion:  70%|██████▉   | 3485/5001 [2:39:12<1:04:11,  2.54s/it][A
Train Diffusion:  70%|██████▉   | 3486/5001 [2:39:14<1:04:07,  2.54s/it][A
Train Diffusion:  70%|██████▉   | 3487/5001 [2:39:17<1:04:10,  2.54s/it][A
Train Diffusion:  70%|██████▉   | 3488/5001 [2:39:19<1:04:02,  2.54s/it][A
Train Diffusion:  70%|██████▉   | 3489/5001 [2:39:22<1:03:57,  2.54s/it][A
Train Diffusion:  70%|██████▉   | 3490/5001 [2:39:24<1:03:58,  2.54s/it][A
Train Diffusion:  70%|██████▉   | 3491/5001 [2:39:27<1:03:52,  2.54s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 325715648.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7359, 0.4835, 1.2887],
        [8.7991, 0.4982, 1.2841],
        [8.8371, 0.4988, 1.2704]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.7635,  0.9615,  1.1305],
         [12.6227,  0.4028,  1.2597],
         [ 2.6255,  0.4319,  0.9894],
         ...,
         [32.6628,  0.1061,  1.2174],
         [ 5.3133,  0.0569,  1.0092],
         [ 2.6062,  0.5089,  0.9127]],

        [[ 0.5585,  0.9518,  0.9472],
         [ 0.5565,  0.8300,  1.5297],
         [25.9065,  0.2544,  1.1696],
         ...,
         [ 0.3475,  0.5894,  1.0889],
         [20.9628,  0.3136,  1.1212],
         [ 3.2877,  0.8342,  9.0934]],

        [[ 1.5813,  0.8093,  1.4926],
         [ 1.5714,  0.4122,  0.7437],
         [ 0.6030,  0.8439,  1.0403],
         ...,
         [ 2.7193,  0.2980,  1.1291],
         [ 0.3811,  0.6230,  0.8707],
         [ 7.7960,  1.2274,  2.3428


Train Diffusion:  70%|██████▉   | 3492/5001 [2:39:30<1:03:53,  2.54s/it][A
Train Diffusion:  70%|██████▉   | 3493/5001 [2:39:32<1:03:58,  2.55s/it][A
Train Diffusion:  70%|██████▉   | 3494/5001 [2:39:35<1:04:06,  2.55s/it][A
Train Diffusion:  70%|██████▉   | 3495/5001 [2:39:37<1:04:10,  2.56s/it][A
Train Diffusion:  70%|██████▉   | 3496/5001 [2:39:40<1:03:55,  2.55s/it][A
Train Diffusion:  70%|██████▉   | 3497/5001 [2:39:42<1:03:48,  2.55s/it][A
Train Diffusion:  70%|██████▉   | 3498/5001 [2:39:45<1:03:58,  2.55s/it][A
Train Diffusion:  70%|██████▉   | 3499/5001 [2:39:47<1:03:52,  2.55s/it][A
Train Diffusion:  70%|██████▉   | 3500/5001 [2:39:50<1:03:42,  2.55s/it][A
Train Diffusion:  70%|███████   | 3501/5001 [2:39:52<1:03:35,  2.54s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 314267833.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7006, 0.5119, 1.3368],
        [8.6446, 0.5100, 1.2918],
        [8.7520, 0.5180, 1.2784]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.7365,  0.9581,  0.8309],
         [ 0.6783,  0.6836,  1.9206],
         [26.0091,  0.2725,  1.2032],
         ...,
         [ 0.0708,  8.0675,  2.0726],
         [28.0688,  0.1996,  1.1155],
         [ 5.4678,  0.4115,  1.1529]],

        [[ 0.5742,  0.9502,  0.9335],
         [12.8710,  0.5638,  1.1239],
         [ 2.0689,  0.4325,  0.8104],
         ...,
         [17.5126,  0.5497,  0.1764],
         [ 4.8990,  0.0842,  0.6773],
         [ 0.6183,  1.8075,  0.8509]],

        [[ 1.5945,  0.8195,  4.1506],
         [ 0.9498,  0.7899,  1.0704],
         [ 0.6482,  0.8390,  0.9738],
         ...,
         [ 4.0591,  0.1103,  0.7464],
         [ 0.3871,  0.7869,  0.7431],
         [23.4342,  1.3940,  2.0345


Train Diffusion:  70%|███████   | 3502/5001 [2:39:55<1:03:33,  2.54s/it][A
Train Diffusion:  70%|███████   | 3503/5001 [2:39:58<1:03:23,  2.54s/it][A
Train Diffusion:  70%|███████   | 3504/5001 [2:40:00<1:03:11,  2.53s/it][A
Train Diffusion:  70%|███████   | 3505/5001 [2:40:03<1:03:22,  2.54s/it][A
Train Diffusion:  70%|███████   | 3506/5001 [2:40:05<1:03:19,  2.54s/it][A
Train Diffusion:  70%|███████   | 3507/5001 [2:40:08<1:03:21,  2.54s/it][A
Train Diffusion:  70%|███████   | 3508/5001 [2:40:10<1:03:14,  2.54s/it][A
Train Diffusion:  70%|███████   | 3509/5001 [2:40:13<1:03:12,  2.54s/it][A
Train Diffusion:  70%|███████   | 3510/5001 [2:40:15<1:03:09,  2.54s/it][A
Train Diffusion:  70%|███████   | 3511/5001 [2:40:18<1:03:03,  2.54s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 337452748.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7733, 0.5006, 1.3214],
        [8.6058, 0.5134, 1.3077],
        [8.7894, 0.5121, 1.2907]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.1889e+00, 9.0663e-01, 2.7223e+00],
         [1.9267e+00, 9.5843e-01, 1.2959e+00],
         [1.4053e+00, 1.6404e+00, 5.7760e-01],
         ...,
         [1.9649e+00, 1.9875e+00, 1.0100e+00],
         [6.5839e+00, 4.0142e-01, 1.3782e+00],
         [3.6024e-01, 2.6130e+00, 2.2726e+00]],

        [[4.4412e-01, 9.5420e-01, 1.0101e+00],
         [8.5492e+00, 5.4756e-01, 1.3865e+00],
         [1.6588e+00, 3.1000e-01, 7.6466e-01],
         ...,
         [2.0168e+00, 8.1557e-01, 5.2953e-01],
         [3.1711e+00, 3.4142e-01, 6.2477e-02],
         [4.7635e+00, 2.3360e-01, 9.2938e-01]],

        [[1.2787e+00, 8.8936e-01, 7.0697e-01],
         [1.4811e+00, 3.9471e-01, 2.3537e-01],
         [1.4682e-05, 2.2832e+00, 2.4


Train Diffusion:  70%|███████   | 3512/5001 [2:40:20<1:02:55,  2.54s/it][A
Train Diffusion:  70%|███████   | 3513/5001 [2:40:23<1:02:51,  2.53s/it][A
Train Diffusion:  70%|███████   | 3514/5001 [2:40:26<1:03:32,  2.56s/it][A
Train Diffusion:  70%|███████   | 3515/5001 [2:40:28<1:03:31,  2.57s/it][A
Train Diffusion:  70%|███████   | 3516/5001 [2:40:31<1:03:25,  2.56s/it][A
Train Diffusion:  70%|███████   | 3517/5001 [2:40:33<1:03:13,  2.56s/it][A
Train Diffusion:  70%|███████   | 3518/5001 [2:40:36<1:03:08,  2.55s/it][A
Train Diffusion:  70%|███████   | 3519/5001 [2:40:38<1:02:54,  2.55s/it][A
Train Diffusion:  70%|███████   | 3520/5001 [2:40:41<1:02:51,  2.55s/it][A
Train Diffusion:  70%|███████   | 3521/5001 [2:40:43<1:03:00,  2.55s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 334126566.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7481, 0.4825, 1.3055],
        [8.8650, 0.4847, 1.3237],
        [8.7413, 0.4870, 1.2797]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.5656,  0.9514,  0.7506],
         [10.9820,  0.9282,  1.0305],
         [ 1.5086,  0.4929,  0.8171],
         ...,
         [ 2.2481,  0.5553,  1.5962],
         [ 0.2382,  0.7927,  0.8283],
         [23.0780,  1.4927,  1.8747]],

        [[ 1.5872,  0.8186,  5.3656],
         [ 0.9601,  0.6249,  1.1638],
         [ 0.7055,  0.7824,  1.0392],
         ...,
         [25.6685,  0.1112,  1.3013],
         [ 5.5298,  0.0510,  6.0212],
         [ 0.7468,  1.8856,  1.4302]],

        [[ 0.7511,  0.9604,  0.8222],
         [ 0.6811,  0.6736,  1.8102],
         [26.8102,  0.2635,  1.1968],
         ...,
         [ 0.5042,  0.6584,  1.0659],
         [23.1823,  0.1556,  1.2343],
         [ 4.9777,  0.4324,  0.7124


Train Diffusion:  70%|███████   | 3522/5001 [2:40:46<1:03:21,  2.57s/it][A
Train Diffusion:  70%|███████   | 3523/5001 [2:40:49<1:04:36,  2.62s/it][A
Train Diffusion:  70%|███████   | 3524/5001 [2:40:51<1:03:59,  2.60s/it][A
Train Diffusion:  70%|███████   | 3525/5001 [2:40:54<1:03:38,  2.59s/it][A
Train Diffusion:  71%|███████   | 3526/5001 [2:40:56<1:03:10,  2.57s/it][A
Train Diffusion:  71%|███████   | 3527/5001 [2:40:59<1:02:56,  2.56s/it][A
Train Diffusion:  71%|███████   | 3528/5001 [2:41:01<1:02:38,  2.55s/it][A
Train Diffusion:  71%|███████   | 3529/5001 [2:41:04<1:02:31,  2.55s/it][A
Train Diffusion:  71%|███████   | 3530/5001 [2:41:07<1:02:33,  2.55s/it][A
Train Diffusion:  71%|███████   | 3531/5001 [2:41:09<1:02:24,  2.55s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 333579024.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6630, 0.5069, 1.2898],
        [8.7862, 0.5048, 1.3061],
        [8.8259, 0.5063, 1.2892]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.5641e+00, 8.1230e-01, 7.2425e-01],
         [1.8024e+00, 3.4970e-01, 6.8970e-01],
         [1.1262e-01, 7.9650e-01, 1.4132e+00],
         ...,
         [5.3634e-01, 1.2582e+01, 4.0155e-01],
         [1.0317e-03, 1.9027e+00, 1.3563e+00],
         [3.1344e-01, 1.6321e+00, 6.9597e+00]],

        [[8.0793e-01, 9.6221e-01, 1.1950e+00],
         [1.2206e+01, 3.5679e-01, 1.2796e+00],
         [2.7870e+00, 3.3845e-01, 5.9350e-01],
         ...,
         [3.8163e+00, 1.5022e-01, 6.3842e-01],
         [6.2868e+00, 8.9195e-03, 8.4329e-01],
         [3.4016e+00, 5.9510e-01, 8.8290e-01]],

        [[5.3324e-01, 9.5238e-01, 9.1815e-01],
         [6.1502e-01, 7.8797e-01, 1.5972e+00],
         [2.0925e+01, 2.5718e-01, 1.1


Train Diffusion:  71%|███████   | 3532/5001 [2:41:12<1:02:16,  2.54s/it][A
Train Diffusion:  71%|███████   | 3533/5001 [2:41:14<1:02:09,  2.54s/it][A
Train Diffusion:  71%|███████   | 3534/5001 [2:41:17<1:02:06,  2.54s/it][A
Train Diffusion:  71%|███████   | 3535/5001 [2:41:19<1:02:03,  2.54s/it][A
Train Diffusion:  71%|███████   | 3536/5001 [2:41:22<1:01:55,  2.54s/it][A
Train Diffusion:  71%|███████   | 3537/5001 [2:41:24<1:01:47,  2.53s/it][A
Train Diffusion:  71%|███████   | 3538/5001 [2:41:27<1:01:51,  2.54s/it][A
Train Diffusion:  71%|███████   | 3539/5001 [2:41:29<1:01:53,  2.54s/it][A
Train Diffusion:  71%|███████   | 3540/5001 [2:41:32<1:02:14,  2.56s/it][A
Train Diffusion:  71%|███████   | 3541/5001 [2:41:35<1:02:07,  2.55s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 323180825.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.9331, 0.5011, 1.3223],
        [8.6163, 0.4944, 1.2994],
        [8.7635, 0.4994, 1.2863]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.5985,  0.9505,  0.9301],
         [ 0.5619,  0.7920,  1.7769],
         [28.0878,  0.0947,  1.2010],
         ...,
         [ 3.4988,  0.3432,  2.1006],
         [ 0.6726,  0.4314,  0.2165],
         [13.0421,  3.0567,  2.3249]],

        [[ 0.7042,  0.9555,  1.0482],
         [12.5307,  0.4660,  1.1971],
         [ 2.2602,  0.4032,  0.8066],
         ...,
         [ 7.5420,  0.7459,  0.5397],
         [ 3.6732,  0.1894,  0.6423],
         [ 1.9095,  5.5189,  1.1141]],

        [[ 1.5982,  0.8064,  2.3638],
         [ 1.3503,  0.5364,  0.7774],
         [ 0.7284,  0.7552,  0.6688],
         ...,
         [ 3.1157,  0.1381,  1.0827],
         [41.1626,  0.0760,  1.3559],
         [ 6.3173,  0.2640,  0.9352


Train Diffusion:  71%|███████   | 3542/5001 [2:41:37<1:02:14,  2.56s/it][A
Train Diffusion:  71%|███████   | 3543/5001 [2:41:40<1:02:09,  2.56s/it][A
Train Diffusion:  71%|███████   | 3544/5001 [2:41:42<1:01:58,  2.55s/it][A
Train Diffusion:  71%|███████   | 3545/5001 [2:41:45<1:01:46,  2.55s/it][A
Train Diffusion:  71%|███████   | 3546/5001 [2:41:47<1:01:51,  2.55s/it][A
Train Diffusion:  71%|███████   | 3547/5001 [2:41:50<1:01:40,  2.55s/it][A
Train Diffusion:  71%|███████   | 3548/5001 [2:41:52<1:01:43,  2.55s/it][A
Train Diffusion:  71%|███████   | 3549/5001 [2:41:55<1:01:39,  2.55s/it][A
Train Diffusion:  71%|███████   | 3550/5001 [2:41:58<1:01:37,  2.55s/it][A
Train Diffusion:  71%|███████   | 3551/5001 [2:42:00<1:01:29,  2.54s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 338026041.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7327, 0.5046, 1.3108],
        [8.8075, 0.5043, 1.2983],
        [8.7292, 0.5100, 1.3019]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.0410,  0.9376,  1.8645],
         [ 5.1533,  0.5490,  1.3732],
         [ 1.6423,  0.7006,  0.6932],
         ...,
         [ 1.6911,  0.3321,  0.9066],
         [ 0.5638,  0.9606,  6.7020],
         [ 0.2041,  2.7990,  1.6591]],

        [[ 1.4104,  0.8510,  0.7561],
         [ 1.6898,  0.3378,  1.3397],
         [21.1811,  0.3166,  1.1897],
         ...,
         [ 1.6155,  0.3610,  1.0742],
         [29.1790,  0.6527,  2.3024],
         [ 1.9725,  3.3816,  1.6410]],

        [[ 0.4595,  0.9537,  0.8937],
         [ 5.2149,  0.8810,  1.5752],
         [ 0.8134,  0.8194,  0.8831],
         ...,
         [36.4435,  0.2294,  0.9226],
         [ 3.8103,  0.5571,  0.7819],
         [ 0.4168,  1.1007, 11.3387


Train Diffusion:  71%|███████   | 3552/5001 [2:42:03<1:01:26,  2.54s/it][A
Train Diffusion:  71%|███████   | 3553/5001 [2:42:05<1:01:23,  2.54s/it][A
Train Diffusion:  71%|███████   | 3554/5001 [2:42:08<1:01:18,  2.54s/it][A
Train Diffusion:  71%|███████   | 3555/5001 [2:42:10<1:01:24,  2.55s/it][A
Train Diffusion:  71%|███████   | 3556/5001 [2:42:13<1:01:19,  2.55s/it][A
Train Diffusion:  71%|███████   | 3557/5001 [2:42:15<1:01:09,  2.54s/it][A
Train Diffusion:  71%|███████   | 3558/5001 [2:42:18<1:01:05,  2.54s/it][A
Train Diffusion:  71%|███████   | 3559/5001 [2:42:20<1:00:59,  2.54s/it][A
Train Diffusion:  71%|███████   | 3560/5001 [2:42:23<1:00:58,  2.54s/it][A
Train Diffusion:  71%|███████   | 3561/5001 [2:42:25<1:01:09,  2.55s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 331452921.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8791, 0.4682, 1.2963],
        [8.7522, 0.4956, 1.2954],
        [8.7222, 0.5156, 1.2980]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.5258,  0.9540,  0.7389],
         [10.7975,  1.0099,  1.6253],
         [ 1.2838,  0.6214,  1.0352],
         ...,
         [ 2.0235,  0.0433,  0.8266],
         [ 5.6655,  0.0704,  0.7427],
         [23.2124,  1.4949,  1.9471]],

        [[ 0.8245,  0.9640,  0.7935],
         [ 0.7463,  0.6424,  1.6424],
         [26.2864,  0.2756,  1.1868],
         ...,
         [ 0.2113,  0.5226,  0.5520],
         [27.4300,  0.3317,  0.2969],
         [ 5.4020,  0.3432,  0.7302]],

        [[ 1.5563,  0.8257,  5.5203],
         [ 0.8829,  0.5984,  1.2329],
         [ 0.7468,  0.8279,  0.8786],
         ...,
         [ 0.2468,  1.1970,  2.6340],
         [ 0.2744,  0.7191,  1.3389],
         [ 0.3772,  2.3278,  1.1802


Train Diffusion:  71%|███████   | 3562/5001 [2:42:28<1:03:15,  2.64s/it][A
Train Diffusion:  71%|███████   | 3563/5001 [2:42:31<1:02:36,  2.61s/it][A
Train Diffusion:  71%|███████▏  | 3564/5001 [2:42:33<1:02:23,  2.61s/it][A
Train Diffusion:  71%|███████▏  | 3565/5001 [2:42:36<1:01:48,  2.58s/it][A
Train Diffusion:  71%|███████▏  | 3566/5001 [2:42:39<1:01:35,  2.58s/it][A
Train Diffusion:  71%|███████▏  | 3567/5001 [2:42:41<1:01:15,  2.56s/it][A
Train Diffusion:  71%|███████▏  | 3568/5001 [2:42:44<1:01:05,  2.56s/it][A
Train Diffusion:  71%|███████▏  | 3569/5001 [2:42:46<1:00:54,  2.55s/it][A
Train Diffusion:  71%|███████▏  | 3570/5001 [2:42:49<1:01:21,  2.57s/it][A
Train Diffusion:  71%|███████▏  | 3571/5001 [2:42:52<1:02:32,  2.62s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 334324819.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8530, 0.5177, 1.3063],
        [8.7516, 0.5050, 1.2967],
        [8.6019, 0.4961, 1.3216]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.9758,  0.9491,  1.5913],
         [ 8.8904,  0.3495,  1.2756],
         [ 2.7581,  0.4489,  0.7083],
         ...,
         [ 1.3572,  0.8369,  7.8837],
         [10.3353,  0.1349,  1.2707],
         [ 5.0875,  0.6012,  0.6841]],

        [[ 0.4730,  0.9540,  0.8788],
         [ 0.9792,  1.1754,  0.4292],
         [ 1.5818,  0.8107,  0.8421],
         ...,
         [ 0.0793,  1.1212,  4.7046],
         [ 0.1310,  1.4738,  7.2066],
         [ 2.1709,  3.3196,  1.5953]],

        [[ 1.4624,  0.8421,  1.0220],
         [ 1.7481,  0.3272,  1.2076],
         [16.9695,  0.3571,  1.1899],
         ...,
         [11.7110,  0.5144,  1.2608],
         [ 1.8526,  0.6766,  1.0562],
         [ 0.6261,  2.4125,  1.6783


Train Diffusion:  71%|███████▏  | 3572/5001 [2:42:54<1:01:52,  2.60s/it][A
Train Diffusion:  71%|███████▏  | 3573/5001 [2:42:57<1:01:40,  2.59s/it][A
Train Diffusion:  71%|███████▏  | 3574/5001 [2:42:59<1:01:11,  2.57s/it][A
Train Diffusion:  71%|███████▏  | 3575/5001 [2:43:02<1:01:03,  2.57s/it][A
Train Diffusion:  72%|███████▏  | 3576/5001 [2:43:04<1:00:50,  2.56s/it][A
Train Diffusion:  72%|███████▏  | 3577/5001 [2:43:07<1:00:36,  2.55s/it][A
Train Diffusion:  72%|███████▏  | 3578/5001 [2:43:09<1:00:24,  2.55s/it][A
Train Diffusion:  72%|███████▏  | 3579/5001 [2:43:12<1:00:21,  2.55s/it][A
Train Diffusion:  72%|███████▏  | 3580/5001 [2:43:14<1:00:36,  2.56s/it][A
Train Diffusion:  72%|███████▏  | 3581/5001 [2:43:17<1:00:37,  2.56s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 336274649.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8453, 0.5099, 1.2548],
        [8.8588, 0.4767, 1.3011],
        [8.6909, 0.5011, 1.2648]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.7783,  0.9627,  0.8057],
         [ 0.7311,  0.6497,  1.9172],
         [27.9452,  0.1679,  1.2020],
         ...,
         [27.6410,  0.1543,  1.1817],
         [ 4.5312,  0.1758,  0.8221],
         [ 1.6408,  1.5391,  2.2151]],

        [[ 0.5484,  0.9529,  0.8691],
         [11.5651,  0.6691,  0.8696],
         [ 1.8066,  0.4478,  0.7333],
         ...,
         [ 0.5249,  0.5837,  1.1463],
         [42.7135,  0.0781,  1.3580],
         [ 6.5865,  0.2084,  0.8840]],

        [[ 1.5786,  0.8259,  5.0003],
         [ 0.8332,  0.7593,  1.1577],
         [ 0.7278,  0.7090,  1.1767],
         ...,
         [ 3.5407,  0.1721,  0.8166],
         [ 1.2255,  0.3284,  0.4314],
         [ 9.9028,  1.9701,  4.8438


Train Diffusion:  72%|███████▏  | 3582/5001 [2:43:20<1:00:49,  2.57s/it][A
Train Diffusion:  72%|███████▏  | 3583/5001 [2:43:22<1:00:41,  2.57s/it][A
Train Diffusion:  72%|███████▏  | 3584/5001 [2:43:25<1:00:36,  2.57s/it][A
Train Diffusion:  72%|███████▏  | 3585/5001 [2:43:27<1:01:09,  2.59s/it][A
Train Diffusion:  72%|███████▏  | 3586/5001 [2:43:30<1:00:51,  2.58s/it][A
Train Diffusion:  72%|███████▏  | 3587/5001 [2:43:33<1:00:44,  2.58s/it][A
Train Diffusion:  72%|███████▏  | 3588/5001 [2:43:35<1:00:39,  2.58s/it][A
Train Diffusion:  72%|███████▏  | 3589/5001 [2:43:38<1:00:33,  2.57s/it][A
Train Diffusion:  72%|███████▏  | 3590/5001 [2:43:40<1:00:16,  2.56s/it][A
Train Diffusion:  72%|███████▏  | 3591/5001 [2:43:43<1:00:14,  2.56s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 321883491.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8756, 0.5166, 1.3163],
        [8.5761, 0.4894, 1.3236],
        [8.6244, 0.4941, 1.2598]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.4908e-01, 9.5365e-01, 1.0869e+00],
         [5.0428e-01, 6.5517e-01, 2.2699e-01],
         [3.0816e-05, 2.3896e+00, 2.0047e-01],
         ...,
         [1.8845e-01, 5.6311e-01, 3.7217e+00],
         [4.7436e-05, 7.5205e-01, 3.5243e+00],
         [1.3308e-01, 1.3603e+00, 1.0058e+01]],

        [[1.1232e+00, 9.1981e-01, 1.7731e+00],
         [3.7490e+00, 8.0027e-01, 1.1714e+00],
         [1.2444e+00, 2.2429e+00, 7.7604e-01],
         ...,
         [2.6456e-04, 5.5143e-01, 3.9979e+00],
         [1.3214e-01, 1.2852e+00, 6.8845e+00],
         [7.7448e-02, 3.2704e+00, 2.1459e+00]],

        [[1.3397e+00, 8.6881e-01, 3.7542e+00],
         [1.7901e+00, 9.3309e-01, 1.2547e+00],
         [1.5585e+00, 1.0753e-01, 5.3


Train Diffusion:  72%|███████▏  | 3592/5001 [2:43:45<1:00:06,  2.56s/it][A
Train Diffusion:  72%|███████▏  | 3593/5001 [2:43:48<1:00:03,  2.56s/it][A
Train Diffusion:  72%|███████▏  | 3594/5001 [2:43:50<59:49,  2.55s/it]  [A
Train Diffusion:  72%|███████▏  | 3595/5001 [2:43:53<59:41,  2.55s/it][A
Train Diffusion:  72%|███████▏  | 3596/5001 [2:43:56<59:38,  2.55s/it][A
Train Diffusion:  72%|███████▏  | 3597/5001 [2:43:58<59:30,  2.54s/it][A
Train Diffusion:  72%|███████▏  | 3598/5001 [2:44:01<59:43,  2.55s/it][A
Train Diffusion:  72%|███████▏  | 3599/5001 [2:44:03<59:40,  2.55s/it][A
Train Diffusion:  72%|███████▏  | 3600/5001 [2:44:06<59:39,  2.56s/it][A
Train Diffusion:  72%|███████▏  | 3601/5001 [2:44:08<59:29,  2.55s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 332597872.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6164, 0.4953, 1.3085],
        [8.6010, 0.5162, 1.3152],
        [9.0866, 0.4903, 1.3162]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.0100,  0.9426,  1.7311],
         [ 6.6973,  0.4698,  1.2956],
         [ 2.0917,  0.5881,  0.7539],
         ...,
         [33.9614,  0.1003,  1.2126],
         [ 5.3698,  0.0380,  0.5565],
         [ 3.0743,  0.4916,  0.9567]],

        [[ 1.4357,  0.8461,  0.8152],
         [ 1.7280,  0.3309,  1.3819],
         [23.4016,  0.2920,  1.1776],
         ...,
         [ 3.7010,  0.1889,  0.8723],
         [ 0.5056,  0.6040,  0.8184],
         [21.8729,  1.1691,  1.9567]],

        [[ 0.4652,  0.9536,  0.8882],
         [ 3.5186,  0.9507,  0.6118],
         [ 0.7059,  0.9410,  1.1572],
         ...,
         [ 0.5286,  0.5979,  1.0202],
         [12.5175,  0.4249,  1.2909],
         [ 1.8228,  1.2163,  4.7679


Train Diffusion:  72%|███████▏  | 3602/5001 [2:44:11<59:20,  2.54s/it][A
Train Diffusion:  72%|███████▏  | 3603/5001 [2:44:13<59:19,  2.55s/it][A
Train Diffusion:  72%|███████▏  | 3604/5001 [2:44:16<59:12,  2.54s/it][A
Train Diffusion:  72%|███████▏  | 3605/5001 [2:44:18<59:14,  2.55s/it][A
Train Diffusion:  72%|███████▏  | 3606/5001 [2:44:21<59:10,  2.55s/it][A
Train Diffusion:  72%|███████▏  | 3607/5001 [2:44:24<59:08,  2.55s/it][A
Train Diffusion:  72%|███████▏  | 3608/5001 [2:44:26<59:07,  2.55s/it][A
Train Diffusion:  72%|███████▏  | 3609/5001 [2:44:29<59:32,  2.57s/it][A
Train Diffusion:  72%|███████▏  | 3610/5001 [2:44:31<59:35,  2.57s/it][A
Train Diffusion:  72%|███████▏  | 3611/5001 [2:44:34<59:49,  2.58s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 335384355.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7407, 0.4878, 1.2476],
        [8.8342, 0.4778, 1.2921],
        [8.9428, 0.4675, 1.2740]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.0827,  0.9293,  1.8301],
         [ 2.5280,  0.8489,  1.3766],
         [ 0.8560,  1.0071,  0.5977],
         ...,
         [ 1.3561,  0.2934,  0.8272],
         [31.9508,  0.1161,  1.4754],
         [ 5.3553,  0.4068,  1.6015]],

        [[ 0.4530,  0.9551,  0.9871],
         [ 7.8050,  0.5786,  2.1392],
         [ 1.6192,  0.4252,  0.6003],
         ...,
         [ 1.1367,  2.7657,  1.4392],
         [ 0.3756,  0.7392,  0.1338],
         [ 0.1147,  5.5091,  0.1582]],

        [[ 1.3759,  0.8606,  0.5846],
         [ 1.6341,  0.3644,  1.2851],
         [17.2227,  0.3653,  1.2229],
         ...,
         [31.8699,  0.1769,  1.1987],
         [ 5.6593,  0.0812,  0.6519],
         [ 2.5536,  4.2411,  1.4176


Train Diffusion:  72%|███████▏  | 3612/5001 [2:44:36<59:26,  2.57s/it][A
Train Diffusion:  72%|███████▏  | 3613/5001 [2:44:39<59:16,  2.56s/it][A
Train Diffusion:  72%|███████▏  | 3614/5001 [2:44:42<59:09,  2.56s/it][A
Train Diffusion:  72%|███████▏  | 3615/5001 [2:44:44<59:06,  2.56s/it][A
Train Diffusion:  72%|███████▏  | 3616/5001 [2:44:47<58:57,  2.55s/it][A
Train Diffusion:  72%|███████▏  | 3617/5001 [2:44:49<58:47,  2.55s/it][A
Train Diffusion:  72%|███████▏  | 3618/5001 [2:44:52<1:00:18,  2.62s/it][A
Train Diffusion:  72%|███████▏  | 3619/5001 [2:44:55<1:00:09,  2.61s/it][A
Train Diffusion:  72%|███████▏  | 3620/5001 [2:44:57<59:34,  2.59s/it]  [A
Train Diffusion:  72%|███████▏  | 3621/5001 [2:45:00<59:15,  2.58s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 321615382.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6677, 0.4891, 1.2386],
        [8.8586, 0.5129, 1.3029],
        [8.7777, 0.4751, 1.3108]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.7076e-01, 9.5017e-01, 8.5952e-01],
         [1.1331e+01, 6.8308e-01, 8.4059e-01],
         [1.7787e+00, 4.7772e-01, 8.3718e-01],
         ...,
         [2.0679e+01, 4.0297e-02, 1.4870e+00],
         [6.0287e+00, 3.3947e-02, 4.2575e+00],
         [9.1846e-01, 1.5636e+00, 1.0534e+00]],

        [[7.4275e-01, 9.5848e-01, 8.2542e-01],
         [6.8657e-01, 6.7360e-01, 1.8937e+00],
         [2.8694e+01, 8.3416e-02, 1.2010e+00],
         ...,
         [2.2877e+00, 2.5351e-01, 8.4874e-01],
         [6.3534e+00, 1.5509e+00, 6.3896e+00],
         [8.0692e-01, 3.4617e+00, 2.3032e+00]],

        [[1.5875e+00, 8.1927e-01, 4.8201e+00],
         [9.1040e-01, 6.7745e-01, 1.0387e+00],
         [7.8031e-01, 6.6833e-01, 7.5


Train Diffusion:  72%|███████▏  | 3622/5001 [2:45:02<59:17,  2.58s/it][A
Train Diffusion:  72%|███████▏  | 3623/5001 [2:45:05<59:06,  2.57s/it][A
Train Diffusion:  72%|███████▏  | 3624/5001 [2:45:07<58:47,  2.56s/it][A
Train Diffusion:  72%|███████▏  | 3625/5001 [2:45:10<58:43,  2.56s/it][A
Train Diffusion:  73%|███████▎  | 3626/5001 [2:45:12<58:31,  2.55s/it][A
Train Diffusion:  73%|███████▎  | 3627/5001 [2:45:15<58:23,  2.55s/it][A
Train Diffusion:  73%|███████▎  | 3628/5001 [2:45:17<58:15,  2.55s/it][A
Train Diffusion:  73%|███████▎  | 3629/5001 [2:45:20<58:12,  2.55s/it][A
Train Diffusion:  73%|███████▎  | 3630/5001 [2:45:23<58:08,  2.54s/it][A
Train Diffusion:  73%|███████▎  | 3631/5001 [2:45:25<58:14,  2.55s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 325233731.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8641, 0.4899, 1.3025],
        [8.7439, 0.4982, 1.3089],
        [8.6459, 0.4781, 1.2929]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.5976,  0.8076,  2.0711],
         [ 1.4136,  0.4943,  0.7652],
         [ 0.7269,  0.8582,  0.7786],
         ...,
         [ 3.0980,  0.1382,  0.8033],
         [19.3555,  0.1602,  0.8639],
         [ 3.9820,  1.2196,  0.7092]],

        [[ 0.5868,  0.9522,  0.9404],
         [ 0.5581,  0.8021,  1.7164],
         [26.9684,  0.1054,  1.1952],
         ...,
         [ 6.3466,  0.0935,  0.6457],
         [ 5.4527,  0.0674,  0.8071],
         [26.8686,  1.1252,  1.9909]],

        [[ 0.7192,  0.9588,  1.0795],
         [12.6502,  0.4374,  1.2163],
         [ 2.3570,  0.3918,  0.8182],
         ...,
         [ 0.2556,  0.5639,  1.6629],
         [ 6.9729,  1.4988,  0.7594],
         [ 1.8636,  0.9232,  1.2652


Train Diffusion:  73%|███████▎  | 3632/5001 [2:45:28<58:10,  2.55s/it][A
Train Diffusion:  73%|███████▎  | 3633/5001 [2:45:30<58:07,  2.55s/it][A
Train Diffusion:  73%|███████▎  | 3634/5001 [2:45:33<58:19,  2.56s/it][A
Train Diffusion:  73%|███████▎  | 3635/5001 [2:45:36<59:43,  2.62s/it][A
Train Diffusion:  73%|███████▎  | 3636/5001 [2:45:38<1:00:52,  2.68s/it][A
Train Diffusion:  73%|███████▎  | 3637/5001 [2:45:41<1:01:06,  2.69s/it][A
Train Diffusion:  73%|███████▎  | 3638/5001 [2:45:44<1:00:46,  2.68s/it][A
Train Diffusion:  73%|███████▎  | 3639/5001 [2:45:46<1:00:30,  2.67s/it][A
Train Diffusion:  73%|███████▎  | 3640/5001 [2:45:49<59:58,  2.64s/it]  [A
Train Diffusion:  73%|███████▎  | 3641/5001 [2:45:52<1:00:24,  2.66s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 328623360.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7752, 0.4803, 1.3221],
        [8.6510, 0.4949, 1.2829],
        [8.7009, 0.4966, 1.2928]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[6.2494e-01, 9.5312e-01, 9.0916e-01],
         [5.7917e-01, 7.9377e-01, 1.8281e+00],
         [2.8346e+01, 9.7924e-02, 1.2038e+00],
         ...,
         [1.2698e+01, 3.5107e-01, 1.2337e+00],
         [5.9245e+00, 4.3524e-01, 6.0135e+00],
         [4.5278e+00, 2.1492e-01, 1.1442e+00]],

        [[1.6051e+00, 8.0634e-01, 3.2468e+00],
         [1.2881e+00, 5.5617e-01, 8.4389e-01],
         [7.1500e-01, 6.8608e-01, 7.7877e-01],
         ...,
         [1.5902e+00, 3.2268e-01, 1.0141e-01],
         [3.9440e+00, 8.5930e-02, 5.5688e-01],
         [4.3892e+00, 2.3157e+00, 8.6974e+00]],

        [[6.7181e-01, 9.5518e-01, 9.5760e-01],
         [1.2402e+01, 5.4423e-01, 1.0859e+00],
         [2.1653e+00, 4.4906e-01, 1.0


Train Diffusion:  73%|███████▎  | 3642/5001 [2:45:54<1:00:06,  2.65s/it][A
Train Diffusion:  73%|███████▎  | 3643/5001 [2:45:57<59:50,  2.64s/it]  [A
Train Diffusion:  73%|███████▎  | 3644/5001 [2:46:00<59:16,  2.62s/it][A
Train Diffusion:  73%|███████▎  | 3645/5001 [2:46:02<59:00,  2.61s/it][A
Train Diffusion:  73%|███████▎  | 3646/5001 [2:46:05<59:06,  2.62s/it][A
Train Diffusion:  73%|███████▎  | 3647/5001 [2:46:07<58:52,  2.61s/it][A
Train Diffusion:  73%|███████▎  | 3648/5001 [2:46:10<58:53,  2.61s/it][A
Train Diffusion:  73%|███████▎  | 3649/5001 [2:46:12<58:30,  2.60s/it][A
Train Diffusion:  73%|███████▎  | 3650/5001 [2:46:15<58:52,  2.61s/it][A
Train Diffusion:  73%|███████▎  | 3651/5001 [2:46:18<58:48,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 342624697.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7613, 0.4908, 1.2935],
        [8.8477, 0.4912, 1.2652],
        [8.7622, 0.4940, 1.2987]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.4227,  0.8475,  4.7985],
         [ 0.7619,  0.9567,  1.4849],
         [ 0.6234,  0.8169,  1.0210],
         ...,
         [ 0.4551,  0.6354,  1.1707],
         [42.5240,  0.0932,  1.3356],
         [ 6.4310,  0.2092,  1.7758]],

        [[ 1.0245,  0.9399,  0.8246],
         [ 1.0015,  0.5332,  1.8885],
         [26.1741,  0.2794,  1.2031],
         ...,
         [ 3.7215,  0.1075,  1.0200],
         [ 1.5374,  0.2500,  0.5302],
         [17.9497,  1.9840,  1.8484]],

        [[ 0.4626,  0.9532,  0.9105],
         [11.8170,  0.6609,  1.0750],
         [ 1.7614,  0.4425,  0.7994],
         ...,
         [25.9194,  0.1924,  1.1693],
         [ 4.0017,  0.2542,  0.4985],
         [ 0.1881,  2.3758,  1.8988


Train Diffusion:  73%|███████▎  | 3652/5001 [2:46:20<58:48,  2.62s/it][A
Train Diffusion:  73%|███████▎  | 3653/5001 [2:46:23<58:38,  2.61s/it][A
Train Diffusion:  73%|███████▎  | 3654/5001 [2:46:26<58:38,  2.61s/it][A
Train Diffusion:  73%|███████▎  | 3655/5001 [2:46:28<58:39,  2.61s/it][A
Train Diffusion:  73%|███████▎  | 3656/5001 [2:46:31<59:33,  2.66s/it][A
Train Diffusion:  73%|███████▎  | 3657/5001 [2:46:34<59:24,  2.65s/it][A
Train Diffusion:  73%|███████▎  | 3658/5001 [2:46:36<58:48,  2.63s/it][A
Train Diffusion:  73%|███████▎  | 3659/5001 [2:46:39<58:08,  2.60s/it][A
Train Diffusion:  73%|███████▎  | 3660/5001 [2:46:41<57:47,  2.59s/it][A
Train Diffusion:  73%|███████▎  | 3661/5001 [2:46:44<57:24,  2.57s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 321545155.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7376, 0.4754, 1.2890],
        [8.8891, 0.4681, 1.3249],
        [8.6328, 0.4982, 1.2881]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.4430,  0.9555,  0.9610],
         [11.5104,  0.5931,  1.4293],
         [ 1.7780,  0.4555,  0.8200],
         ...,
         [13.0169,  0.1087,  1.2145],
         [ 3.5282,  0.1444,  0.6325],
         [ 6.0190,  1.7947,  1.4470]],

        [[ 1.2677,  0.8939,  3.6219],
         [ 0.8420,  1.0086,  1.5758],
         [ 0.6169,  0.9270,  0.7786],
         ...,
         [ 3.7305,  0.1004,  1.0223],
         [ 1.9303,  0.3575,  1.4942],
         [10.8811,  2.1095,  1.8846]],

        [[ 1.2018,  0.9065,  0.8340],
         [ 1.3276,  0.4348,  1.8263],
         [24.4602,  0.3097,  1.2005],
         ...,
         [12.3368,  0.7153,  0.9979],
         [ 1.5204,  0.1494,  0.0341],
         [ 5.0710,  0.2342,  0.8071


Train Diffusion:  73%|███████▎  | 3662/5001 [2:46:46<57:14,  2.56s/it][A
Train Diffusion:  73%|███████▎  | 3663/5001 [2:46:49<57:06,  2.56s/it][A
Train Diffusion:  73%|███████▎  | 3664/5001 [2:46:51<57:02,  2.56s/it][A
Train Diffusion:  73%|███████▎  | 3665/5001 [2:46:54<58:17,  2.62s/it][A
Train Diffusion:  73%|███████▎  | 3666/5001 [2:46:57<58:06,  2.61s/it][A
Train Diffusion:  73%|███████▎  | 3667/5001 [2:46:59<57:35,  2.59s/it][A
Train Diffusion:  73%|███████▎  | 3668/5001 [2:47:02<57:20,  2.58s/it][A
Train Diffusion:  73%|███████▎  | 3669/5001 [2:47:04<57:02,  2.57s/it][A
Train Diffusion:  73%|███████▎  | 3670/5001 [2:47:07<56:51,  2.56s/it][A
Train Diffusion:  73%|███████▎  | 3671/5001 [2:47:10<56:43,  2.56s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 323701868.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.9025, 0.4950, 1.3072],
        [8.6837, 0.4991, 1.2546],
        [8.7065, 0.4834, 1.3175]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.7388,  0.9598,  1.1029],
         [12.4584,  0.4394,  1.2341],
         [ 2.4071,  0.4512,  1.1691],
         ...,
         [ 1.5756,  0.3987,  0.8468],
         [11.7143,  0.2197,  0.6049],
         [ 3.5111,  0.9331,  0.8309]],

        [[ 1.5906,  0.8060,  1.7919],
         [ 1.5554,  0.4294,  0.7684],
         [ 0.7107,  0.7251,  0.9236],
         ...,
         [ 4.8799,  0.0288,  1.6827],
         [ 6.0890,  0.0695,  0.7381],
         [28.0854,  1.1206,  1.9699]],

        [[ 0.5732,  0.9517,  0.9353],
         [ 0.5828,  0.8506,  1.6293],
         [27.9019,  0.1750,  1.1941],
         ...,
         [ 0.2849,  2.6944,  0.6988],
         [17.5612,  0.4085,  1.0339],
         [ 3.4622,  0.8075,  1.2414


Train Diffusion:  73%|███████▎  | 3672/5001 [2:47:12<56:35,  2.55s/it][A
Train Diffusion:  73%|███████▎  | 3673/5001 [2:47:15<56:51,  2.57s/it][A
Train Diffusion:  73%|███████▎  | 3674/5001 [2:47:17<56:39,  2.56s/it][A
Train Diffusion:  73%|███████▎  | 3675/5001 [2:47:20<56:35,  2.56s/it][A
Train Diffusion:  74%|███████▎  | 3676/5001 [2:47:22<56:27,  2.56s/it][A
Train Diffusion:  74%|███████▎  | 3677/5001 [2:47:25<56:18,  2.55s/it][A
Train Diffusion:  74%|███████▎  | 3678/5001 [2:47:27<56:10,  2.55s/it][A
Train Diffusion:  74%|███████▎  | 3679/5001 [2:47:30<56:05,  2.55s/it][A
Train Diffusion:  74%|███████▎  | 3680/5001 [2:47:33<56:16,  2.56s/it][A
Train Diffusion:  74%|███████▎  | 3681/5001 [2:47:35<56:26,  2.57s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 325964864.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6910, 0.5122, 1.2684],
        [8.7531, 0.5079, 1.2808],
        [8.8101, 0.4828, 1.3200]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.7567,  0.9608,  0.8173],
         [ 0.6689,  0.6980,  1.7896],
         [23.0484,  0.3189,  1.2446],
         ...,
         [ 3.6348,  0.2574,  0.9713],
         [ 0.4509,  0.5765,  0.8228],
         [19.3213,  1.1772,  1.9410]],

        [[ 0.5619,  0.9514,  0.6963],
         [ 9.9633,  1.1219,  0.6497],
         [ 1.3314,  0.4702,  0.7533],
         ...,
         [33.5047,  0.1318,  1.1885],
         [ 5.0564,  0.0530,  0.4299],
         [ 3.1563,  0.4835,  0.9484]],

        [[ 1.5849,  0.8153,  5.2605],
         [ 1.0797,  0.4281,  0.9676],
         [ 0.8533,  0.6911,  1.2366],
         ...,
         [ 0.6122,  0.5610,  1.0892],
         [13.3411,  0.4315,  1.2462],
         [ 1.9946,  1.1183,  7.2206


Train Diffusion:  74%|███████▎  | 3682/5001 [2:47:38<56:31,  2.57s/it][A
Train Diffusion:  74%|███████▎  | 3683/5001 [2:47:40<56:22,  2.57s/it][A
Train Diffusion:  74%|███████▎  | 3684/5001 [2:47:43<56:09,  2.56s/it][A
Train Diffusion:  74%|███████▎  | 3685/5001 [2:47:45<56:05,  2.56s/it][A
Train Diffusion:  74%|███████▎  | 3686/5001 [2:47:48<55:58,  2.55s/it][A
Train Diffusion:  74%|███████▎  | 3687/5001 [2:47:50<55:47,  2.55s/it][A
Train Diffusion:  74%|███████▎  | 3688/5001 [2:47:53<55:43,  2.55s/it][A
Train Diffusion:  74%|███████▍  | 3689/5001 [2:47:56<55:46,  2.55s/it][A
Train Diffusion:  74%|███████▍  | 3690/5001 [2:47:58<55:45,  2.55s/it][A
Train Diffusion:  74%|███████▍  | 3691/5001 [2:48:01<55:42,  2.55s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 327599660.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.9053, 0.4920, 1.3042],
        [8.5383, 0.4981, 1.2833],
        [8.8997, 0.4998, 1.2994]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.4134e+00, 8.4992e-01, 5.4262e-01],
         [1.6700e+00, 3.6296e-01, 5.6446e-01],
         [1.5919e-03, 1.0709e+00, 7.9749e-01],
         ...,
         [4.2728e+00, 1.3141e-01, 7.7938e-01],
         [2.9051e-01, 2.0336e+00, 1.0832e+00],
         [7.1091e-01, 1.8844e+00, 8.0887e+00]],

        [[1.0358e+00, 9.3876e-01, 1.6721e+00],
         [3.7485e+00, 8.0153e-01, 1.3606e+00],
         [1.0204e+00, 7.0065e-01, 1.2398e+00],
         ...,
         [4.6269e-01, 7.7251e-01, 4.8311e-01],
         [3.0470e+01, 4.7470e-01, 7.6239e-01],
         [5.6035e+00, 2.1031e-01, 1.1405e+00]],

        [[4.6096e-01, 9.5386e-01, 9.6706e-01],
         [5.4105e+00, 6.1343e-01, 2.9724e+00],
         [2.1610e+00, 1.6073e-01, 4.6


Train Diffusion:  74%|███████▍  | 3692/5001 [2:48:03<55:42,  2.55s/it][A
Train Diffusion:  74%|███████▍  | 3693/5001 [2:48:06<55:38,  2.55s/it][A
Train Diffusion:  74%|███████▍  | 3694/5001 [2:48:08<55:31,  2.55s/it][A
Train Diffusion:  74%|███████▍  | 3695/5001 [2:48:11<55:27,  2.55s/it][A
Train Diffusion:  74%|███████▍  | 3696/5001 [2:48:13<55:25,  2.55s/it][A
Train Diffusion:  74%|███████▍  | 3697/5001 [2:48:16<55:22,  2.55s/it][A
Train Diffusion:  74%|███████▍  | 3698/5001 [2:48:19<55:28,  2.55s/it][A
Train Diffusion:  74%|███████▍  | 3699/5001 [2:48:21<55:20,  2.55s/it][A
Train Diffusion:  74%|███████▍  | 3700/5001 [2:48:24<55:17,  2.55s/it][A
Train Diffusion:  74%|███████▍  | 3701/5001 [2:48:26<55:09,  2.55s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 325091721.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.9091, 0.5031, 1.3140],
        [8.6647, 0.5136, 1.3214],
        [8.7505, 0.5114, 1.3027]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.4649,  0.9534,  0.8498],
         [10.6580,  0.7937,  1.3996],
         [ 1.4226,  0.5809,  0.9657],
         ...,
         [ 3.9139,  0.1017,  0.9458],
         [29.6622,  0.8327,  3.4862],
         [ 0.9194,  2.8799,  2.9567]],

        [[ 1.4305,  0.8461,  5.4127],
         [ 0.8275,  0.8994,  1.4967],
         [ 0.7527,  0.8401,  0.9221],
         ...,
         [19.9540,  0.1071,  1.2465],
         [ 5.0543,  0.0533,  1.3450],
         [ 0.8592,  3.3332,  0.7421]],

        [[ 1.0148,  0.9415,  0.8194],
         [ 1.0053,  0.5160,  1.7417],
         [26.7807,  0.2719,  1.1880],
         ...,
         [ 3.1429,  5.4288,  0.6850],
         [ 0.2790,  2.4931,  0.5862],
         [ 0.4137,  1.1793, 11.5708


Train Diffusion:  74%|███████▍  | 3702/5001 [2:48:29<55:10,  2.55s/it][A
Train Diffusion:  74%|███████▍  | 3703/5001 [2:48:31<55:06,  2.55s/it][A
Train Diffusion:  74%|███████▍  | 3704/5001 [2:48:34<55:55,  2.59s/it][A
Train Diffusion:  74%|███████▍  | 3705/5001 [2:48:37<55:47,  2.58s/it][A
Train Diffusion:  74%|███████▍  | 3706/5001 [2:48:39<55:36,  2.58s/it][A
Train Diffusion:  74%|███████▍  | 3707/5001 [2:48:42<55:22,  2.57s/it][A
Train Diffusion:  74%|███████▍  | 3708/5001 [2:48:44<55:15,  2.56s/it][A
Train Diffusion:  74%|███████▍  | 3709/5001 [2:48:47<55:04,  2.56s/it][A
Train Diffusion:  74%|███████▍  | 3710/5001 [2:48:49<55:03,  2.56s/it][A
Train Diffusion:  74%|███████▍  | 3711/5001 [2:48:52<54:53,  2.55s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 311534144.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.9955, 0.4842, 1.2960],
        [8.7135, 0.4939, 1.2596],
        [8.8444, 0.4767, 1.2735]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.8589,  0.9632,  0.7887],
         [ 0.8000,  0.6290,  1.9266],
         [26.6633,  0.2597,  1.1986],
         ...,
         [ 0.5301,  0.2695,  0.1712],
         [ 4.9283,  0.0813,  0.5143],
         [18.7314,  1.8855,  2.3477]],

        [[ 1.5382,  0.8294,  4.6398],
         [ 0.8147,  0.8978,  1.2449],
         [ 0.6225,  0.8348,  0.9979],
         ...,
         [ 0.4317,  0.5634,  2.7294],
         [ 0.3221,  0.7360,  0.5024],
         [ 0.6572,  2.2140,  1.7780]],

        [[ 0.5098,  0.9540,  0.9168],
         [12.5808,  0.6025,  1.0897],
         [ 1.9306,  0.4409,  0.8524],
         ...,
         [ 0.2385,  0.5084,  0.9674],
         [36.6975,  0.0560,  1.4235],
         [ 6.4308,  0.2278,  0.7704


Train Diffusion:  74%|███████▍  | 3712/5001 [2:48:54<54:47,  2.55s/it][A
Train Diffusion:  74%|███████▍  | 3713/5001 [2:48:57<55:53,  2.60s/it][A
Train Diffusion:  74%|███████▍  | 3714/5001 [2:49:00<55:35,  2.59s/it][A
Train Diffusion:  74%|███████▍  | 3715/5001 [2:49:02<55:21,  2.58s/it][A
Train Diffusion:  74%|███████▍  | 3716/5001 [2:49:05<55:01,  2.57s/it][A
Train Diffusion:  74%|███████▍  | 3717/5001 [2:49:07<54:47,  2.56s/it][A
Train Diffusion:  74%|███████▍  | 3718/5001 [2:49:10<54:42,  2.56s/it][A
Train Diffusion:  74%|███████▍  | 3719/5001 [2:49:12<54:32,  2.55s/it][A
Train Diffusion:  74%|███████▍  | 3720/5001 [2:49:15<54:25,  2.55s/it][A
Train Diffusion:  74%|███████▍  | 3721/5001 [2:49:18<54:34,  2.56s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 317070358.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7755, 0.5051, 1.2875],
        [8.8877, 0.4657, 1.3285],
        [8.7420, 0.4669, 1.2650]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[6.0929e-01, 9.5316e-01, 9.1943e-01],
         [6.2419e-01, 6.3639e-01, 2.0385e-01],
         [7.7476e-03, 2.3404e+00, 2.2307e-01],
         ...,
         [3.0925e-01, 7.9990e+00, 7.1208e-01],
         [1.8057e-02, 1.8608e+00, 2.4002e-01],
         [5.2539e+00, 1.8746e+00, 1.8605e+00]],

        [[1.6043e+00, 8.3503e-01, 4.9955e+00],
         [7.7274e-01, 1.0764e+00, 1.1493e+00],
         [3.8354e-01, 7.8081e+00, 2.1078e+00],
         ...,
         [3.1526e+01, 5.3873e-02, 1.2934e+00],
         [6.0363e+00, 1.1454e-02, 1.8309e-02],
         [3.9096e+00, 2.0125e-01, 4.5546e+00]],

        [[6.8884e-01, 9.5664e-01, 9.0129e-01],
         [1.0491e+01, 5.5854e-01, 1.3753e+00],
         [2.4123e+00, 7.3548e-01, 6.0


Train Diffusion:  74%|███████▍  | 3722/5001 [2:49:20<54:30,  2.56s/it][A
Train Diffusion:  74%|███████▍  | 3723/5001 [2:49:23<54:29,  2.56s/it][A
Train Diffusion:  74%|███████▍  | 3724/5001 [2:49:25<54:16,  2.55s/it][A
Train Diffusion:  74%|███████▍  | 3725/5001 [2:49:28<54:19,  2.55s/it][A
Train Diffusion:  75%|███████▍  | 3726/5001 [2:49:30<54:14,  2.55s/it][A
Train Diffusion:  75%|███████▍  | 3727/5001 [2:49:33<54:14,  2.55s/it][A
Train Diffusion:  75%|███████▍  | 3728/5001 [2:49:35<54:25,  2.57s/it][A
Train Diffusion:  75%|███████▍  | 3729/5001 [2:49:38<54:37,  2.58s/it][A
Train Diffusion:  75%|███████▍  | 3730/5001 [2:49:41<54:35,  2.58s/it][A
Train Diffusion:  75%|███████▍  | 3731/5001 [2:49:43<55:41,  2.63s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 329236012.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7224, 0.5054, 1.3128],
        [8.9085, 0.4718, 1.3140],
        [8.7216, 0.4930, 1.3039]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.4797e-01, 9.5508e-01, 9.8977e-01],
         [6.7316e+00, 5.8511e-01, 2.0879e+00],
         [1.7128e+00, 1.7318e-02, 5.7304e-01],
         ...,
         [4.0654e+00, 1.5971e-01, 7.3725e-01],
         [6.9125e-02, 1.8316e+00, 9.4433e-01],
         [1.0950e+01, 3.5300e+00, 3.3494e+00]],

        [[1.1292e+00, 9.1941e-01, 2.2489e+00],
         [2.6273e+00, 9.0383e-01, 1.3286e+00],
         [9.0712e-01, 6.2842e+00, 6.9919e-01],
         ...,
         [4.4722e-01, 3.2667e+00, 5.7451e-01],
         [7.1430e-01, 1.9775e+00, 6.1766e-01],
         [7.9461e-01, 1.5363e+00, 1.7484e+00]],

        [[1.3336e+00, 8.7190e-01, 6.2332e-01],
         [1.5489e+00, 3.8385e-01, 2.0744e-01],
         [3.1185e-05, 2.4107e+00, 2.0


Train Diffusion:  75%|███████▍  | 3732/5001 [2:49:46<55:16,  2.61s/it][A
Train Diffusion:  75%|███████▍  | 3733/5001 [2:49:48<54:50,  2.60s/it][A
Train Diffusion:  75%|███████▍  | 3734/5001 [2:49:51<54:28,  2.58s/it][A
Train Diffusion:  75%|███████▍  | 3735/5001 [2:49:54<54:20,  2.58s/it][A
Train Diffusion:  75%|███████▍  | 3736/5001 [2:49:56<54:03,  2.56s/it][A
Train Diffusion:  75%|███████▍  | 3737/5001 [2:49:59<54:04,  2.57s/it][A
Train Diffusion:  75%|███████▍  | 3738/5001 [2:50:01<53:53,  2.56s/it][A
Train Diffusion:  75%|███████▍  | 3739/5001 [2:50:04<53:53,  2.56s/it][A
Train Diffusion:  75%|███████▍  | 3740/5001 [2:50:06<53:44,  2.56s/it][A
Train Diffusion:  75%|███████▍  | 3741/5001 [2:50:09<53:39,  2.55s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 337773398.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7360, 0.4911, 1.3164],
        [8.5793, 0.5104, 1.3324],
        [8.8345, 0.4954, 1.2857]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.5064e-01, 9.5437e-01, 1.0858e+00],
         [5.0389e-01, 6.5897e-01, 2.2923e-01],
         [1.6573e-05, 2.3899e+00, 3.0403e-01],
         ...,
         [3.8188e+00, 1.7808e-01, 8.3654e-01],
         [5.7051e-01, 6.6298e-01, 4.2467e+00],
         [5.2879e-02, 2.4861e+00, 2.1186e+00]],

        [[1.3531e+00, 8.6568e-01, 4.2864e+00],
         [2.2193e+00, 9.1927e-01, 1.2918e+00],
         [1.7085e+00, 7.0965e+00, 4.9647e-01],
         ...,
         [3.4276e+01, 1.4240e-01, 1.1701e+00],
         [4.8746e+00, 1.9473e-01, 5.5904e-01],
         [7.7204e+00, 2.3471e+00, 7.1802e+00]],

        [[1.1079e+00, 9.2393e-01, 1.5037e+00],
         [2.7483e+00, 8.1563e-01, 1.0457e+00],
         [1.1507e+00, 8.7156e+00, 9.1


Train Diffusion:  75%|███████▍  | 3742/5001 [2:50:11<53:35,  2.55s/it][A
Train Diffusion:  75%|███████▍  | 3743/5001 [2:50:14<53:34,  2.56s/it][A
Train Diffusion:  75%|███████▍  | 3744/5001 [2:50:17<53:30,  2.55s/it][A
Train Diffusion:  75%|███████▍  | 3745/5001 [2:50:19<53:23,  2.55s/it][A
Train Diffusion:  75%|███████▍  | 3746/5001 [2:50:22<53:24,  2.55s/it][A
Train Diffusion:  75%|███████▍  | 3747/5001 [2:50:24<53:14,  2.55s/it][A
Train Diffusion:  75%|███████▍  | 3748/5001 [2:50:27<53:20,  2.55s/it][A
Train Diffusion:  75%|███████▍  | 3749/5001 [2:50:29<53:15,  2.55s/it][A
Train Diffusion:  75%|███████▍  | 3750/5001 [2:50:32<53:18,  2.56s/it][A
Train Diffusion:  75%|███████▌  | 3751/5001 [2:50:34<53:18,  2.56s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 344592918.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8077, 0.4890, 1.2544],
        [8.7091, 0.5129, 1.3026],
        [8.7018, 0.5238, 1.2791]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.5420,  0.8231,  4.8963],
         [ 0.8462,  0.8229,  1.2400],
         [ 0.6340,  0.7851,  0.6201],
         ...,
         [14.1793,  0.0361,  1.4648],
         [ 5.6498,  0.0731,  0.7034],
         [26.3226,  1.2484,  1.8859]],

        [[ 0.5197,  0.9504,  0.8624],
         [12.0495,  0.6924,  1.0162],
         [ 1.8058,  0.4417,  0.8982],
         ...,
         [ 3.8412,  0.1312,  0.9098],
         [19.6265,  0.2255,  1.0187],
         [ 3.1429,  1.2012,  1.4109]],

        [[ 0.8407,  0.9604,  0.7877],
         [ 0.7707,  0.6413,  1.8758],
         [28.0151,  0.1148,  1.2024],
         ...,
         [ 1.5152,  2.8404,  0.6712],
         [ 5.7285,  0.6678,  0.1573],
         [ 3.6202,  0.6364,  0.6922


Train Diffusion:  75%|███████▌  | 3752/5001 [2:50:37<54:32,  2.62s/it][A
Train Diffusion:  75%|███████▌  | 3753/5001 [2:50:40<54:07,  2.60s/it][A
Train Diffusion:  75%|███████▌  | 3754/5001 [2:50:42<53:42,  2.58s/it][A
Train Diffusion:  75%|███████▌  | 3755/5001 [2:50:45<53:36,  2.58s/it][A
Train Diffusion:  75%|███████▌  | 3756/5001 [2:50:47<53:26,  2.58s/it][A
Train Diffusion:  75%|███████▌  | 3757/5001 [2:50:50<53:15,  2.57s/it][A
Train Diffusion:  75%|███████▌  | 3758/5001 [2:50:53<53:56,  2.60s/it][A
Train Diffusion:  75%|███████▌  | 3759/5001 [2:50:55<53:33,  2.59s/it][A
Train Diffusion:  75%|███████▌  | 3760/5001 [2:50:58<53:50,  2.60s/it][A
Train Diffusion:  75%|███████▌  | 3761/5001 [2:51:01<54:32,  2.64s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 336750828.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6263, 0.4820, 1.2874],
        [8.8298, 0.5114, 1.2988],
        [8.8274, 0.4950, 1.2754]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.7174e-01, 9.5146e-01, 6.8864e-01],
         [1.0278e+01, 1.1246e+00, 1.0634e+00],
         [1.2765e+00, 5.6482e-01, 9.0717e-01],
         ...,
         [6.8347e-01, 9.6729e-01, 6.5762e-01],
         [1.2816e-01, 6.0137e-01, 9.9715e-01],
         [2.6564e+00, 1.7076e+00, 2.6173e+00]],

        [[7.4089e-01, 9.5963e-01, 8.2737e-01],
         [6.6083e-01, 6.8701e-01, 1.7368e+00],
         [2.7623e+01, 2.2304e-01, 1.1866e+00],
         ...,
         [3.5115e+00, 2.0826e-01, 7.2562e-01],
         [6.4048e-01, 5.1730e-01, 1.0128e+00],
         [3.4349e-01, 1.5628e+00, 9.6841e+00]],

        [[1.5908e+00, 8.1532e-01, 5.4115e+00],
         [1.0427e+00, 4.5897e-01, 1.0592e+00],
         [7.9425e-01, 7.4528e-01, 1.0


Train Diffusion:  75%|███████▌  | 3762/5001 [2:51:03<54:00,  2.62s/it][A
Train Diffusion:  75%|███████▌  | 3763/5001 [2:51:06<53:35,  2.60s/it][A
Train Diffusion:  75%|███████▌  | 3764/5001 [2:51:08<53:15,  2.58s/it][A
Train Diffusion:  75%|███████▌  | 3765/5001 [2:51:11<52:57,  2.57s/it][A
Train Diffusion:  75%|███████▌  | 3766/5001 [2:51:13<52:49,  2.57s/it][A
Train Diffusion:  75%|███████▌  | 3767/5001 [2:51:16<52:39,  2.56s/it][A
Train Diffusion:  75%|███████▌  | 3768/5001 [2:51:18<52:32,  2.56s/it][A
Train Diffusion:  75%|███████▌  | 3769/5001 [2:51:21<52:34,  2.56s/it][A
Train Diffusion:  75%|███████▌  | 3770/5001 [2:51:24<52:23,  2.55s/it][A
Train Diffusion:  75%|███████▌  | 3771/5001 [2:51:26<52:26,  2.56s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 310883696.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8117, 0.4760, 1.3460],
        [8.6872, 0.4844, 1.2999],
        [8.6924, 0.4830, 1.2938]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[7.0563e-01, 9.5762e-01, 1.0342e+00],
         [1.2528e+01, 4.8198e-01, 1.1883e+00],
         [2.2603e+00, 4.2605e-01, 7.1641e-01],
         ...,
         [1.1368e-03, 1.0003e+00, 1.6005e+00],
         [3.5280e-01, 1.5694e+00, 1.2972e+01],
         [4.8027e-02, 2.7070e+00, 3.3634e+00]],

        [[5.9656e-01, 9.5256e-01, 9.3017e-01],
         [5.6722e-01, 8.2265e-01, 1.7557e+00],
         [2.8205e+01, 9.7674e-02, 1.2049e+00],
         ...,
         [4.3283e+00, 8.8381e-02, 2.9609e+00],
         [1.3928e+01, 3.8828e-01, 9.0124e-01],
         [3.6562e+00, 1.2809e+00, 4.8119e+00]],

        [[1.6005e+00, 8.0588e-01, 2.4933e+00],
         [1.3870e+00, 5.2627e-01, 7.8627e-01],
         [7.2506e-01, 7.0843e-01, 8.1


Train Diffusion:  75%|███████▌  | 3772/5001 [2:51:29<52:17,  2.55s/it][A
Train Diffusion:  75%|███████▌  | 3773/5001 [2:51:31<52:18,  2.56s/it][A
Train Diffusion:  75%|███████▌  | 3774/5001 [2:51:34<52:18,  2.56s/it][A
Train Diffusion:  75%|███████▌  | 3775/5001 [2:51:36<52:43,  2.58s/it][A
Train Diffusion:  76%|███████▌  | 3776/5001 [2:51:39<52:30,  2.57s/it][A
Train Diffusion:  76%|███████▌  | 3777/5001 [2:51:42<52:25,  2.57s/it][A
Train Diffusion:  76%|███████▌  | 3778/5001 [2:51:44<52:16,  2.56s/it][A
Train Diffusion:  76%|███████▌  | 3779/5001 [2:51:47<52:04,  2.56s/it][A
Train Diffusion:  76%|███████▌  | 3780/5001 [2:51:49<52:02,  2.56s/it][A
Train Diffusion:  76%|███████▌  | 3781/5001 [2:51:52<51:54,  2.55s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 334746905.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6871, 0.4953, 1.2897],
        [8.8244, 0.4936, 1.3117],
        [8.7766, 0.4865, 1.3010]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.3664,  0.8626,  5.2090],
         [ 1.1581,  0.6292,  1.3279],
         [ 1.2225,  0.7108,  0.8651],
         ...,
         [ 0.6167,  0.1075,  1.3424],
         [ 3.4103,  0.1019,  2.7878],
         [ 1.6896,  2.8115,  1.4358]],

        [[ 1.0931,  0.9265,  0.8755],
         [ 1.1059,  0.4782,  1.6621],
         [23.4301,  0.3034,  1.1948],
         ...,
         [20.7119,  0.6954,  1.2121],
         [ 0.9753,  2.1185,  0.9627],
         [ 0.3596,  1.3323, 10.3912]],

        [[ 0.4518,  0.9548,  0.8003],
         [ 8.3401,  1.0030,  1.2206],
         [ 0.9651,  0.6382,  0.7952],
         ...,
         [ 2.3800,  0.2187,  0.9323],
         [33.6860,  0.1161,  1.4918],
         [ 4.2909,  1.0591,  1.5740


Train Diffusion:  76%|███████▌  | 3782/5001 [2:51:54<52:06,  2.57s/it][A
Train Diffusion:  76%|███████▌  | 3783/5001 [2:51:57<52:01,  2.56s/it][A
Train Diffusion:  76%|███████▌  | 3784/5001 [2:51:59<51:53,  2.56s/it][A
Train Diffusion:  76%|███████▌  | 3785/5001 [2:52:02<51:53,  2.56s/it][A
Train Diffusion:  76%|███████▌  | 3786/5001 [2:52:05<51:47,  2.56s/it][A
Train Diffusion:  76%|███████▌  | 3787/5001 [2:52:07<51:41,  2.55s/it][A
Train Diffusion:  76%|███████▌  | 3788/5001 [2:52:10<51:40,  2.56s/it][A
Train Diffusion:  76%|███████▌  | 3789/5001 [2:52:12<51:31,  2.55s/it][A
Train Diffusion:  76%|███████▌  | 3790/5001 [2:52:15<51:30,  2.55s/it][A
Train Diffusion:  76%|███████▌  | 3791/5001 [2:52:17<51:39,  2.56s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 331898748.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6669, 0.4769, 1.3075],
        [8.8766, 0.4873, 1.2926],
        [8.8629, 0.4928, 1.3027]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.7162,  0.9590,  0.9895],
         [11.1490,  0.5377,  1.1529],
         [ 2.0060,  0.5392,  1.1787],
         ...,
         [33.8951,  0.1366,  1.1673],
         [ 4.9645,  0.2015,  0.5150],
         [ 1.1831,  1.8391, 10.6118]],

        [[ 1.5986,  0.8213,  3.2871],
         [ 0.9433,  0.6912,  0.8620],
         [ 0.8409,  0.6471,  0.7342],
         ...,
         [ 1.7522,  0.2786,  1.1547],
         [ 0.2945, 12.0670,  0.4536],
         [ 1.3394,  2.5588,  2.2055]],

        [[ 0.5885,  0.9527,  0.9405],
         [ 0.5746,  0.6989,  1.8606],
         [26.6321,  0.2402,  1.2028],
         ...,
         [ 0.4856,  0.5164,  0.5099],
         [32.1121,  0.4475,  1.6315],
         [ 5.1286,  0.4125,  1.1026


Train Diffusion:  76%|███████▌  | 3792/5001 [2:52:20<51:26,  2.55s/it][A
Train Diffusion:  76%|███████▌  | 3793/5001 [2:52:22<51:24,  2.55s/it][A
Train Diffusion:  76%|███████▌  | 3794/5001 [2:52:25<51:19,  2.55s/it][A
Train Diffusion:  76%|███████▌  | 3795/5001 [2:52:28<51:14,  2.55s/it][A
Train Diffusion:  76%|███████▌  | 3796/5001 [2:52:30<51:16,  2.55s/it][A
Train Diffusion:  76%|███████▌  | 3797/5001 [2:52:33<51:12,  2.55s/it][A
Train Diffusion:  76%|███████▌  | 3798/5001 [2:52:35<51:33,  2.57s/it][A
Train Diffusion:  76%|███████▌  | 3799/5001 [2:52:38<51:28,  2.57s/it][A
Train Diffusion:  76%|███████▌  | 3800/5001 [2:52:41<52:28,  2.62s/it][A
Train Diffusion:  76%|███████▌  | 3801/5001 [2:52:43<52:03,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 321323825.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7614, 0.4976, 1.2894],
        [8.7329, 0.4846, 1.2843],
        [8.7201, 0.5136, 1.3130]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.5579,  0.8237,  4.6585],
         [ 0.8622,  0.8211,  1.2310],
         [ 0.6051,  0.7597,  1.1966],
         ...,
         [ 4.6010,  0.0954,  1.0737],
         [33.5593,  0.5150,  2.8395],
         [ 4.7883,  3.5767,  1.9532]],

        [[ 0.8183,  0.9614,  0.7939],
         [ 0.7522,  0.6537,  1.9745],
         [26.6809,  0.2591,  1.2061],
         ...,
         [ 2.1550,  0.3565,  1.5140],
         [ 0.8690,  0.2506,  4.2394],
         [ 0.6776,  2.0988,  1.5029]],

        [[ 0.5286,  0.9514,  0.8945],
         [12.6095,  0.6248,  0.9483],
         [ 1.9904,  0.4017,  0.7461],
         ...,
         [26.8627,  0.2677,  0.8511],
         [ 3.7124,  0.3938,  0.7413],
         [ 0.4056,  1.4532,  6.0035


Train Diffusion:  76%|███████▌  | 3802/5001 [2:52:46<51:49,  2.59s/it][A
Train Diffusion:  76%|███████▌  | 3803/5001 [2:52:48<51:29,  2.58s/it][A
Train Diffusion:  76%|███████▌  | 3804/5001 [2:52:51<51:14,  2.57s/it][A
Train Diffusion:  76%|███████▌  | 3805/5001 [2:52:53<51:09,  2.57s/it][A
Train Diffusion:  76%|███████▌  | 3806/5001 [2:52:56<51:01,  2.56s/it][A
Train Diffusion:  76%|███████▌  | 3807/5001 [2:52:58<50:56,  2.56s/it][A
Train Diffusion:  76%|███████▌  | 3808/5001 [2:53:01<50:48,  2.56s/it][A
Train Diffusion:  76%|███████▌  | 3809/5001 [2:53:04<52:01,  2.62s/it][A
Train Diffusion:  76%|███████▌  | 3810/5001 [2:53:06<51:40,  2.60s/it][A
Train Diffusion:  76%|███████▌  | 3811/5001 [2:53:09<51:17,  2.59s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 323867484.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8417, 0.4830, 1.3567],
        [8.9693, 0.4713, 1.2652],
        [8.5374, 0.5083, 1.2830]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.4824e+00, 8.3726e-01, 4.9681e+00],
         [1.1417e+00, 1.0386e+00, 1.1787e+00],
         [1.4985e+00, 1.2888e+01, 7.3824e-01],
         ...,
         [1.4476e+01, 3.5466e-02, 1.5754e+00],
         [6.1933e+00, 6.7323e-02, 8.2175e-01],
         [2.7410e+01, 1.0332e+00, 2.0207e+00]],

        [[4.8193e-01, 9.5247e-01, 1.0449e+00],
         [5.3264e-01, 6.4729e-01, 2.3298e-01],
         [2.7644e-05, 2.3464e+00, 3.0830e-01],
         ...,
         [3.6844e-01, 6.1881e-01, 5.3650e-01],
         [1.0490e+01, 4.6103e-01, 6.2858e-01],
         [2.1594e+00, 1.3889e+00, 8.1765e-01]],

        [[9.4430e-01, 9.5308e-01, 1.0625e+00],
         [6.0567e+00, 7.1956e-01, 1.3484e+00],
         [1.3978e+00, 3.9226e+00, 7.1


Train Diffusion:  76%|███████▌  | 3812/5001 [2:53:11<50:56,  2.57s/it][A
Train Diffusion:  76%|███████▌  | 3813/5001 [2:53:14<50:48,  2.57s/it][A
Train Diffusion:  76%|███████▋  | 3814/5001 [2:53:17<50:36,  2.56s/it][A
Train Diffusion:  76%|███████▋  | 3815/5001 [2:53:19<50:40,  2.56s/it][A
Train Diffusion:  76%|███████▋  | 3816/5001 [2:53:22<50:28,  2.56s/it][A
Train Diffusion:  76%|███████▋  | 3817/5001 [2:53:24<50:19,  2.55s/it][A
Train Diffusion:  76%|███████▋  | 3818/5001 [2:53:27<50:18,  2.55s/it][A
Train Diffusion:  76%|███████▋  | 3819/5001 [2:53:29<50:15,  2.55s/it][A
Train Diffusion:  76%|███████▋  | 3820/5001 [2:53:32<50:15,  2.55s/it][A
Train Diffusion:  76%|███████▋  | 3821/5001 [2:53:34<50:13,  2.55s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 335907609.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8619, 0.4775, 1.2967],
        [8.8242, 0.5044, 1.3015],
        [8.7487, 0.4764, 1.2997]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[7.0194e-01, 9.5976e-01, 8.0853e-01],
         [1.9372e+00, 8.1254e-01, 1.3070e+01],
         [7.5824e+00, 4.4891e-01, 1.4202e+00],
         ...,
         [3.2928e+01, 1.7860e-01, 1.1063e+00],
         [4.6670e+00, 1.9063e-01, 8.4176e-01],
         [8.6778e-01, 1.2792e+00, 1.0511e+00]],

        [[1.6024e+00, 7.9360e-01, 4.6211e+00],
         [1.6258e+00, 5.5888e-02, 7.7700e-01],
         [1.0805e+00, 6.0114e-01, 1.1479e+00],
         ...,
         [5.7036e-01, 4.4648e-01, 9.7122e-01],
         [4.8196e-01, 5.1695e-01, 8.2163e-01],
         [2.5894e+01, 1.1997e+00, 2.0267e+00]],

        [[5.9958e-01, 9.5490e-01, 6.0329e-01],
         [7.7352e-03, 1.8121e+00, 4.0372e-01],
         [3.3618e-01, 8.2449e-01, 1.0


Train Diffusion:  76%|███████▋  | 3822/5001 [2:53:37<50:29,  2.57s/it][A
Train Diffusion:  76%|███████▋  | 3823/5001 [2:53:40<50:25,  2.57s/it][A
Train Diffusion:  76%|███████▋  | 3824/5001 [2:53:42<50:20,  2.57s/it][A
Train Diffusion:  76%|███████▋  | 3825/5001 [2:53:45<50:20,  2.57s/it][A
Train Diffusion:  77%|███████▋  | 3826/5001 [2:53:47<50:10,  2.56s/it][A
Train Diffusion:  77%|███████▋  | 3827/5001 [2:53:50<50:07,  2.56s/it][A
Train Diffusion:  77%|███████▋  | 3828/5001 [2:53:52<49:57,  2.56s/it][A
Train Diffusion:  77%|███████▋  | 3829/5001 [2:53:55<49:50,  2.55s/it][A
Train Diffusion:  77%|███████▋  | 3830/5001 [2:53:57<49:46,  2.55s/it][A
Train Diffusion:  77%|███████▋  | 3831/5001 [2:54:01<54:25,  2.79s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 322151684.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7354, 0.5218, 1.3247],
        [8.6614, 0.4987, 1.3570],
        [8.5730, 0.4975, 1.3053]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.5952,  0.8146,  4.5948],
         [ 1.0029,  0.6327,  1.0008],
         [ 0.7394,  0.6679,  0.7538],
         ...,
         [23.0699,  0.4271,  1.5723],
         [ 4.2311,  0.1248,  1.2750],
         [ 0.3074,  1.7745,  0.6882]],

        [[ 0.7179,  0.9561,  0.8403],
         [ 0.6543,  0.7005,  1.8815],
         [28.3775,  0.1599,  1.1982],
         ...,
         [ 0.6920,  1.1269,  0.4337],
         [ 1.8307,  1.5322,  0.8027],
         [ 3.4075,  1.3778,  1.8308]],

        [[ 0.5890,  0.9498,  0.8608],
         [11.6080,  0.6824,  0.8385],
         [ 1.8494,  0.4764,  1.2758],
         ...,
         [ 1.7091, 16.8928,  1.1133],
         [ 6.3092,  2.8524,  2.5966],
         [ 2.4163,  3.0801,  1.6549


Train Diffusion:  77%|███████▋  | 3832/5001 [2:54:03<53:03,  2.72s/it][A
Train Diffusion:  77%|███████▋  | 3833/5001 [2:54:06<51:59,  2.67s/it][A
Train Diffusion:  77%|███████▋  | 3834/5001 [2:54:08<51:12,  2.63s/it][A
Train Diffusion:  77%|███████▋  | 3835/5001 [2:54:11<50:49,  2.62s/it][A
Train Diffusion:  77%|███████▋  | 3836/5001 [2:54:14<50:36,  2.61s/it][A
Train Diffusion:  77%|███████▋  | 3837/5001 [2:54:16<50:25,  2.60s/it][A
Train Diffusion:  77%|███████▋  | 3838/5001 [2:54:19<50:03,  2.58s/it][A
Train Diffusion:  77%|███████▋  | 3839/5001 [2:54:21<49:48,  2.57s/it][A
Train Diffusion:  77%|███████▋  | 3840/5001 [2:54:24<49:39,  2.57s/it][A
Train Diffusion:  77%|███████▋  | 3841/5001 [2:54:26<49:27,  2.56s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 312782268.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.9310, 0.4861, 1.2853],
        [8.6919, 0.4861, 1.3046],
        [8.7137, 0.4794, 1.2979]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.5034,  0.8344,  4.3734],
         [ 0.7830,  0.9481,  1.2420],
         [ 0.6416,  0.8604,  0.9464],
         ...,
         [ 0.2734,  0.6211,  0.7411],
         [36.5511,  0.1721,  0.9777],
         [ 5.9742,  0.2309,  1.5051]],

        [[ 0.9130,  0.9581,  0.7840],
         [ 0.8705,  0.5933,  1.9152],
         [25.8024,  0.2821,  1.2013],
         ...,
         [31.5125,  0.0821,  1.2573],
         [ 5.5987,  0.1068,  0.6005],
         [ 0.4544,  2.0143,  0.8725]],

        [[ 0.4913,  0.9537,  0.9707],
         [12.8031,  0.5335,  1.1281],
         [ 2.0185,  0.4433,  0.8090],
         ...,
         [ 2.6832,  0.2250,  0.7972],
         [ 0.4786,  2.2171,  0.6588],
         [20.2648,  1.4509,  1.9955


Train Diffusion:  77%|███████▋  | 3842/5001 [2:54:29<49:21,  2.55s/it][A
Train Diffusion:  77%|███████▋  | 3843/5001 [2:54:31<49:16,  2.55s/it][A
Train Diffusion:  77%|███████▋  | 3844/5001 [2:54:34<49:13,  2.55s/it][A
Train Diffusion:  77%|███████▋  | 3845/5001 [2:54:37<49:16,  2.56s/it][A
Train Diffusion:  77%|███████▋  | 3846/5001 [2:54:39<49:13,  2.56s/it][A
Train Diffusion:  77%|███████▋  | 3847/5001 [2:54:42<49:08,  2.56s/it][A
Train Diffusion:  77%|███████▋  | 3848/5001 [2:54:44<50:27,  2.63s/it][A
Train Diffusion:  77%|███████▋  | 3849/5001 [2:54:47<50:02,  2.61s/it][A
Train Diffusion:  77%|███████▋  | 3850/5001 [2:54:50<49:42,  2.59s/it][A
Train Diffusion:  77%|███████▋  | 3851/5001 [2:54:52<49:24,  2.58s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 333736345.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7211, 0.5078, 1.3057],
        [8.6700, 0.4924, 1.3061],
        [8.6259, 0.5132, 1.3185]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.2985,  0.8816,  0.6809],
         [ 1.5088,  0.3863,  1.1834],
         [26.6410,  0.1456,  1.6578],
         ...,
         [30.5512,  0.0562,  1.2820],
         [ 5.9560,  0.0722,  0.4546],
         [ 8.3989,  1.9365,  4.5804]],

        [[ 0.4447,  0.9526,  0.9916],
         [ 7.5281,  0.5748,  1.1492],
         [ 0.9006,  4.1485,  2.4172],
         ...,
         [ 0.3638,  6.4242,  0.4692],
         [22.6829,  0.3372,  0.4880],
         [ 5.7896,  0.2245,  0.7531]],

        [[ 1.1686,  0.9088,  2.5117],
         [ 2.8504,  0.7609,  1.2670],
         [ 1.2893,  0.8503,  0.3613],
         ...,
         [ 4.1988,  0.1492,  0.6226],
         [ 0.2553,  1.3566,  1.0786],
         [ 6.3769,  1.7576,  2.0610


Train Diffusion:  77%|███████▋  | 3852/5001 [2:54:55<49:14,  2.57s/it][A
Train Diffusion:  77%|███████▋  | 3853/5001 [2:54:57<49:00,  2.56s/it][A
Train Diffusion:  77%|███████▋  | 3854/5001 [2:55:00<48:54,  2.56s/it][A
Train Diffusion:  77%|███████▋  | 3855/5001 [2:55:02<48:55,  2.56s/it][A
Train Diffusion:  77%|███████▋  | 3856/5001 [2:55:05<49:08,  2.58s/it][A
Train Diffusion:  77%|███████▋  | 3857/5001 [2:55:08<49:45,  2.61s/it][A
Train Diffusion:  77%|███████▋  | 3858/5001 [2:55:10<49:23,  2.59s/it][A
Train Diffusion:  77%|███████▋  | 3859/5001 [2:55:13<49:04,  2.58s/it][A
Train Diffusion:  77%|███████▋  | 3860/5001 [2:55:15<48:51,  2.57s/it][A
Train Diffusion:  77%|███████▋  | 3861/5001 [2:55:18<48:42,  2.56s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 334489459.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8794, 0.4976, 1.3171],
        [8.7707, 0.4855, 1.3058],
        [8.4994, 0.4989, 1.2690]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.4419,  0.8447,  5.4192],
         [ 0.8721,  0.9133,  1.3883],
         [ 0.8363,  0.8757,  0.8619],
         ...,
         [18.5918,  0.1639,  0.6720],
         [ 5.5067,  0.0540,  1.4068],
         [ 0.1881,  2.2563,  1.7594]],

        [[ 0.4686,  0.9538,  0.8688],
         [10.2520,  0.7529,  1.3668],
         [ 1.4125,  0.6250,  1.0386],
         ...,
         [ 4.2203,  0.1087,  0.9376],
         [31.8706,  0.0331,  1.5272],
         [ 5.7244,  0.4068,  1.4179]],

        [[ 0.9996,  0.9446,  0.8112],
         [ 0.9890,  0.5242,  1.6825],
         [26.7029,  0.2535,  1.1781],
         ...,
         [ 0.2405,  3.6729,  1.7157],
         [ 0.0386,  1.0740,  0.5882],
         [ 1.8944,  2.0583,  9.3285


Train Diffusion:  77%|███████▋  | 3862/5001 [2:55:20<48:40,  2.56s/it][A
Train Diffusion:  77%|███████▋  | 3863/5001 [2:55:23<48:32,  2.56s/it][A
Train Diffusion:  77%|███████▋  | 3864/5001 [2:55:26<48:26,  2.56s/it][A
Train Diffusion:  77%|███████▋  | 3865/5001 [2:55:28<48:28,  2.56s/it][A
Train Diffusion:  77%|███████▋  | 3866/5001 [2:55:31<48:20,  2.56s/it][A
Train Diffusion:  77%|███████▋  | 3867/5001 [2:55:33<48:18,  2.56s/it][A
Train Diffusion:  77%|███████▋  | 3868/5001 [2:55:36<48:20,  2.56s/it][A
Train Diffusion:  77%|███████▋  | 3869/5001 [2:55:38<48:29,  2.57s/it][A
Train Diffusion:  77%|███████▋  | 3870/5001 [2:55:41<48:36,  2.58s/it][A
Train Diffusion:  77%|███████▋  | 3871/5001 [2:55:44<48:36,  2.58s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 322844766.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6309, 0.5092, 1.3217],
        [8.9539, 0.4837, 1.3165],
        [8.6079, 0.5243, 1.2768]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.6788,  0.9559,  1.0093],
         [12.2231,  0.4943,  1.1649],
         [ 2.1243,  0.4335,  1.2122],
         ...,
         [ 3.9272,  0.0974,  1.0724],
         [29.7175,  0.6655,  2.4998],
         [ 3.7773,  3.6779,  1.8466]],

        [[ 1.6072,  0.8091,  2.8277],
         [ 1.2343,  0.5829,  0.7982],
         [ 0.7524,  0.7951,  0.6880],
         ...,
         [19.0942,  0.4885,  0.4212],
         [ 3.1692,  0.2965,  0.8795],
         [ 0.3906,  1.2660,  9.6266]],

        [[ 0.6189,  0.9534,  0.9157],
         [ 0.5737,  0.7625,  1.7972],
         [28.0084,  0.1314,  1.1953],
         ...,
         [ 0.2270,  0.3492,  2.8099],
         [ 0.3242,  0.4640,  6.3688],
         [ 0.4406,  2.3661,  1.7811


Train Diffusion:  77%|███████▋  | 3872/5001 [2:55:46<48:20,  2.57s/it][A
Train Diffusion:  77%|███████▋  | 3873/5001 [2:55:49<48:29,  2.58s/it][A
Train Diffusion:  77%|███████▋  | 3874/5001 [2:55:51<48:18,  2.57s/it][A
Train Diffusion:  77%|███████▋  | 3875/5001 [2:55:54<48:09,  2.57s/it][A
Train Diffusion:  78%|███████▊  | 3876/5001 [2:55:56<47:54,  2.56s/it][A
Train Diffusion:  78%|███████▊  | 3877/5001 [2:55:59<47:52,  2.56s/it][A
Train Diffusion:  78%|███████▊  | 3878/5001 [2:56:01<47:48,  2.55s/it][A
Train Diffusion:  78%|███████▊  | 3879/5001 [2:56:04<47:47,  2.56s/it][A
Train Diffusion:  78%|███████▊  | 3880/5001 [2:56:07<47:45,  2.56s/it][A
Train Diffusion:  78%|███████▊  | 3881/5001 [2:56:09<47:41,  2.55s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 336546291.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7272, 0.4971, 1.2867],
        [8.8428, 0.5074, 1.2766],
        [8.8349, 0.5116, 1.2649]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.5662e+00, 8.1150e-01, 7.0049e-01],
         [1.8042e+00, 3.5047e-01, 7.7026e-01],
         [1.1137e-02, 6.7940e-01, 1.4383e+00],
         ...,
         [3.0296e+00, 1.7367e-01, 7.2429e-01],
         [4.6752e-01, 7.1173e-01, 1.1574e+00],
         [4.0619e-01, 2.1512e+00, 1.1662e+00]],

        [[8.0585e-01, 9.6175e-01, 1.1913e+00],
         [1.2180e+01, 3.3674e-01, 1.2794e+00],
         [2.8616e+00, 3.3288e-01, 1.1389e+00],
         ...,
         [2.4756e-01, 7.2266e-01, 5.8048e-01],
         [2.7588e+01, 2.0715e-01, 4.9318e-01],
         [5.5648e+00, 3.4045e-01, 7.4188e-01]],

        [[5.3362e-01, 9.5179e-01, 9.1771e-01],
         [6.2346e-01, 7.8332e-01, 8.9785e-01],
         [1.1700e+01, 2.4833e-01, 8.1


Train Diffusion:  78%|███████▊  | 3882/5001 [2:56:12<47:39,  2.56s/it][A
Train Diffusion:  78%|███████▊  | 3883/5001 [2:56:14<47:32,  2.55s/it][A
Train Diffusion:  78%|███████▊  | 3884/5001 [2:56:17<47:25,  2.55s/it][A
Train Diffusion:  78%|███████▊  | 3885/5001 [2:56:19<47:28,  2.55s/it][A
Train Diffusion:  78%|███████▊  | 3886/5001 [2:56:22<47:22,  2.55s/it][A
Train Diffusion:  78%|███████▊  | 3887/5001 [2:56:24<47:21,  2.55s/it][A
Train Diffusion:  78%|███████▊  | 3888/5001 [2:56:27<47:22,  2.55s/it][A
Train Diffusion:  78%|███████▊  | 3889/5001 [2:56:29<47:13,  2.55s/it][A
Train Diffusion:  78%|███████▊  | 3890/5001 [2:56:32<47:24,  2.56s/it][A
Train Diffusion:  78%|███████▊  | 3891/5001 [2:56:35<47:18,  2.56s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 343585155.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8187, 0.5233, 1.3111],
        [8.6765, 0.4906, 1.2896],
        [8.7818, 0.5038, 1.2902]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[6.9533e-01, 9.5681e-01, 8.0634e-01],
         [1.9640e-01, 9.9746e-01, 5.8247e-01],
         [4.9762e+00, 5.9815e-01, 1.3122e+00],
         ...,
         [4.2077e+00, 1.4766e-01, 6.8258e-01],
         [4.5773e-01, 6.1851e+00, 4.5324e-01],
         [1.1570e+01, 3.8615e+00, 3.3601e+00]],

        [[1.6042e+00, 7.9089e-01, 4.5526e+00],
         [1.6613e+00, 7.5779e-03, 5.9528e-01],
         [2.0534e+00, 4.6454e-01, 7.3749e-01],
         ...,
         [3.1780e-01, 8.4793e+00, 5.9423e-01],
         [2.7021e+01, 4.6374e-01, 6.2957e-01],
         [4.1149e+00, 1.0226e+00, 1.0064e+00]],

        [[6.0428e-01, 9.5269e-01, 6.1849e-01],
         [4.3620e+00, 1.4921e+00, 8.9915e-01],
         [7.7627e+00, 5.2750e-01, 1.1


Train Diffusion:  78%|███████▊  | 3892/5001 [2:56:37<47:18,  2.56s/it][A
Train Diffusion:  78%|███████▊  | 3893/5001 [2:56:40<47:20,  2.56s/it][A
Train Diffusion:  78%|███████▊  | 3894/5001 [2:56:42<47:12,  2.56s/it][A
Train Diffusion:  78%|███████▊  | 3895/5001 [2:56:45<47:12,  2.56s/it][A
Train Diffusion:  78%|███████▊  | 3896/5001 [2:56:48<47:46,  2.59s/it][A
Train Diffusion:  78%|███████▊  | 3897/5001 [2:56:50<47:31,  2.58s/it][A
Train Diffusion:  78%|███████▊  | 3898/5001 [2:56:53<47:25,  2.58s/it][A
Train Diffusion:  78%|███████▊  | 3899/5001 [2:56:55<47:10,  2.57s/it][A
Train Diffusion:  78%|███████▊  | 3900/5001 [2:56:58<47:03,  2.56s/it][A
Train Diffusion:  78%|███████▊  | 3901/5001 [2:57:00<46:55,  2.56s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 321659779.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7451, 0.4795, 1.3058],
        [8.8616, 0.5009, 1.3029],
        [8.6417, 0.5069, 1.3179]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.6504e-01, 9.5218e-01, 9.3670e-01],
         [5.5560e-01, 8.5605e-01, 1.6232e+00],
         [2.7144e+01, 1.4156e-01, 1.1992e+00],
         ...,
         [3.5924e+00, 1.6758e-01, 6.7344e-01],
         [9.4498e-02, 5.5500e+00, 1.0805e+00],
         [7.6098e-03, 3.0338e+00, 7.7547e-01]],

        [[1.5885e+00, 8.0663e-01, 1.5612e+00],
         [1.6129e+00, 4.0415e-01, 7.6523e-01],
         [6.8883e-01, 7.3803e-01, 1.3402e+00],
         ...,
         [3.0904e+01, 9.2446e-02, 1.2407e+00],
         [5.8065e+00, 2.8085e-02, 1.9073e-02],
         [2.9717e+00, 3.9876e-01, 1.6713e+00]],

        [[7.5130e-01, 9.6123e-01, 1.1224e+00],
         [1.2906e+01, 4.1185e-01, 1.2513e+00],
         [2.5792e+00, 3.9392e-01, 6.3


Train Diffusion:  78%|███████▊  | 3902/5001 [2:57:03<46:55,  2.56s/it][A
Train Diffusion:  78%|███████▊  | 3903/5001 [2:57:05<46:48,  2.56s/it][A
Train Diffusion:  78%|███████▊  | 3904/5001 [2:57:08<47:22,  2.59s/it][A
Train Diffusion:  78%|███████▊  | 3905/5001 [2:57:11<47:25,  2.60s/it][A
Train Diffusion:  78%|███████▊  | 3906/5001 [2:57:13<47:05,  2.58s/it][A
Train Diffusion:  78%|███████▊  | 3907/5001 [2:57:16<46:51,  2.57s/it][A
Train Diffusion:  78%|███████▊  | 3908/5001 [2:57:18<46:41,  2.56s/it][A
Train Diffusion:  78%|███████▊  | 3909/5001 [2:57:21<46:32,  2.56s/it][A
Train Diffusion:  78%|███████▊  | 3910/5001 [2:57:23<46:31,  2.56s/it][A
Train Diffusion:  78%|███████▊  | 3911/5001 [2:57:26<46:26,  2.56s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 315918323.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7843, 0.4965, 1.3389],
        [8.6939, 0.4912, 1.2864],
        [8.7301, 0.5099, 1.2853]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.7446,  0.9616,  0.8269],
         [ 0.6767,  0.6834,  1.8769],
         [27.7402,  0.2063,  1.1995],
         ...,
         [ 0.7017,  0.7231,  0.7617],
         [ 0.7426,  0.6754,  0.6966],
         [ 0.7466,  3.5579,  1.3590]],

        [[ 0.5702,  0.9531,  0.7878],
         [11.2355,  0.8319,  0.8102],
         [ 1.6783,  0.4460,  0.7547],
         ...,
         [27.1300,  0.0405,  1.2307],
         [ 5.7952,  0.0796,  0.4145],
         [20.5212,  1.9948,  1.9897]],

        [[ 1.5892,  0.8188,  5.1707],
         [ 0.9690,  0.6302,  1.1277],
         [ 0.7050,  0.7185,  1.2175],
         ...,
         [ 3.5823,  0.0997,  0.9612],
         [36.5547,  0.0782,  1.3636],
         [ 6.1016,  0.2819,  0.7951


Train Diffusion:  78%|███████▊  | 3912/5001 [2:57:29<46:21,  2.55s/it][A
Train Diffusion:  78%|███████▊  | 3913/5001 [2:57:31<46:16,  2.55s/it][A
Train Diffusion:  78%|███████▊  | 3914/5001 [2:57:34<46:19,  2.56s/it][A
Train Diffusion:  78%|███████▊  | 3915/5001 [2:57:36<46:29,  2.57s/it][A
Train Diffusion:  78%|███████▊  | 3916/5001 [2:57:39<46:29,  2.57s/it][A
Train Diffusion:  78%|███████▊  | 3917/5001 [2:57:41<46:26,  2.57s/it][A
Train Diffusion:  78%|███████▊  | 3918/5001 [2:57:44<46:24,  2.57s/it][A
Train Diffusion:  78%|███████▊  | 3919/5001 [2:57:47<46:21,  2.57s/it][A
Train Diffusion:  78%|███████▊  | 3920/5001 [2:57:49<46:12,  2.57s/it][A
Train Diffusion:  78%|███████▊  | 3921/5001 [2:57:52<46:13,  2.57s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 339366486.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7407, 0.5344, 1.2970],
        [8.8182, 0.5070, 1.3041],
        [8.7271, 0.4900, 1.3113]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.6565,  0.9550,  0.8861],
         [ 0.5997,  0.7680,  1.8896],
         [27.4433,  0.1602,  1.2086],
         ...,
         [20.0114,  0.1571,  1.1918],
         [ 4.5916,  0.1485,  0.7240],
         [16.0852,  1.4998,  2.0258]],

        [[ 0.6389,  0.9542,  0.9038],
         [12.3869,  0.6195,  1.0083],
         [ 2.0754,  0.4169,  0.7159],
         ...,
         [ 0.4610,  0.9638,  3.0699],
         [ 0.1809,  0.6655,  0.4734],
         [ 2.8062,  2.1861,  1.2366]],

        [[ 1.6072,  0.8082,  3.8269],
         [ 1.2076,  0.5875,  0.9265],
         [ 0.6790,  0.7306,  1.2626],
         ...,
         [ 0.3240,  0.5281,  3.5775],
         [26.2184,  0.0342,  1.6156],
         [ 6.6449,  0.2132,  0.7100


Train Diffusion:  78%|███████▊  | 3922/5001 [2:57:54<46:03,  2.56s/it][A
Train Diffusion:  78%|███████▊  | 3923/5001 [2:57:57<46:00,  2.56s/it][A
Train Diffusion:  78%|███████▊  | 3924/5001 [2:57:59<45:51,  2.56s/it][A
Train Diffusion:  78%|███████▊  | 3925/5001 [2:58:02<45:53,  2.56s/it][A
Train Diffusion:  79%|███████▊  | 3926/5001 [2:58:04<45:51,  2.56s/it][A
Train Diffusion:  79%|███████▊  | 3927/5001 [2:58:07<45:49,  2.56s/it][A
Train Diffusion:  79%|███████▊  | 3928/5001 [2:58:10<45:48,  2.56s/it][A
Train Diffusion:  79%|███████▊  | 3929/5001 [2:58:12<45:43,  2.56s/it][A
Train Diffusion:  79%|███████▊  | 3930/5001 [2:58:15<45:44,  2.56s/it][A
Train Diffusion:  79%|███████▊  | 3931/5001 [2:58:17<45:37,  2.56s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 342804800.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.3164, 0.5178, 1.3042],
        [8.9009, 0.5069, 1.3023],
        [8.9084, 0.4863, 1.3047]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.6050,  0.9528,  0.9252],
         [ 0.5693,  0.8146,  1.7930],
         [27.9892,  0.1435,  1.2039],
         ...,
         [ 0.0869, 12.0806,  1.8101],
         [29.4590,  0.1669,  1.1796],
         [ 5.6836,  0.4204,  0.9159]],

        [[ 0.6949,  0.9569,  1.0034],
         [12.5849,  0.5025,  1.1513],
         [ 2.2563,  0.4314,  1.2050],
         ...,
         [ 3.6858,  0.1151,  0.7786],
         [ 0.5686,  0.6679,  1.4802],
         [ 7.9872,  1.8945,  2.1924]],

        [[ 1.6043,  0.8064,  2.7961],
         [ 1.3491,  0.5447,  0.8115],
         [ 0.7139,  0.7003,  0.9031],
         ...,
         [19.7383,  0.6126,  1.4750],
         [ 4.3585,  0.1201,  1.0093],
         [ 0.3742,  1.5088,  7.6553


Train Diffusion:  79%|███████▊  | 3932/5001 [2:58:20<45:32,  2.56s/it][A
Train Diffusion:  79%|███████▊  | 3933/5001 [2:58:22<45:32,  2.56s/it][A
Train Diffusion:  79%|███████▊  | 3934/5001 [2:58:25<45:23,  2.55s/it][A
Train Diffusion:  79%|███████▊  | 3935/5001 [2:58:27<45:18,  2.55s/it][A
Train Diffusion:  79%|███████▊  | 3936/5001 [2:58:30<45:15,  2.55s/it][A
Train Diffusion:  79%|███████▊  | 3937/5001 [2:58:33<45:15,  2.55s/it][A
Train Diffusion:  79%|███████▊  | 3938/5001 [2:58:35<45:16,  2.56s/it][A
Train Diffusion:  79%|███████▉  | 3939/5001 [2:58:38<45:19,  2.56s/it][A
Train Diffusion:  79%|███████▉  | 3940/5001 [2:58:40<45:12,  2.56s/it][A
Train Diffusion:  79%|███████▉  | 3941/5001 [2:58:43<45:11,  2.56s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 337268761.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.9000, 0.4837, 1.2871],
        [8.7739, 0.4991, 1.3007],
        [8.7361, 0.5075, 1.2868]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.5964,  0.8148,  4.5326],
         [ 1.0377,  0.6826,  1.0248],
         [ 0.6853,  0.7869,  1.1029],
         ...,
         [ 2.2533,  1.8460,  1.8519],
         [ 0.1676,  2.0450,  1.0058],
         [ 0.8616,  2.7390,  0.6689]],

        [[ 0.5879,  0.9509,  0.8605],
         [11.9470,  0.6826,  1.0017],
         [ 1.8746,  0.4295,  0.7976],
         ...,
         [22.5204,  0.1035,  1.2315],
         [ 5.5895,  0.0798,  0.8866],
         [18.7758,  1.3711,  2.0123]],

        [[ 0.7175,  0.9573,  0.8412],
         [ 0.6519,  0.7104,  1.8663],
         [26.9264,  0.2558,  1.1985],
         ...,
         [ 0.3697,  0.5912,  0.5604],
         [27.2083,  0.3196,  0.5607],
         [ 5.8385,  0.2082,  0.8801


Train Diffusion:  79%|███████▉  | 3942/5001 [2:58:45<45:07,  2.56s/it][A
Train Diffusion:  79%|███████▉  | 3943/5001 [2:58:48<45:08,  2.56s/it][A
Train Diffusion:  79%|███████▉  | 3944/5001 [2:58:51<45:52,  2.60s/it][A
Train Diffusion:  79%|███████▉  | 3945/5001 [2:58:53<45:30,  2.59s/it][A
Train Diffusion:  79%|███████▉  | 3946/5001 [2:58:56<45:20,  2.58s/it][A
Train Diffusion:  79%|███████▉  | 3947/5001 [2:58:58<45:11,  2.57s/it][A
Train Diffusion:  79%|███████▉  | 3948/5001 [2:59:01<45:04,  2.57s/it][A
Train Diffusion:  79%|███████▉  | 3949/5001 [2:59:03<44:55,  2.56s/it][A
Train Diffusion:  79%|███████▉  | 3950/5001 [2:59:06<44:53,  2.56s/it][A
Train Diffusion:  79%|███████▉  | 3951/5001 [2:59:09<44:49,  2.56s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 319088899.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7217, 0.4780, 1.3031],
        [8.8373, 0.4961, 1.2789],
        [8.8704, 0.4943, 1.2541]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[6.2076e-01, 9.5387e-01, 9.1375e-01],
         [5.7770e-01, 7.9431e-01, 1.8838e+00],
         [2.6507e+01, 2.5621e-01, 1.2035e+00],
         ...,
         [2.0678e-04, 4.6460e-01, 3.3539e+00],
         [2.0932e-01, 4.4590e-01, 8.0171e-01],
         [5.9567e-01, 1.7536e+00, 2.2020e+00]],

        [[1.6067e+00, 8.0681e-01, 3.1212e+00],
         [1.2972e+00, 5.9612e-01, 8.7350e-01],
         [6.7341e-01, 7.6344e-01, 1.2336e+00],
         ...,
         [4.3261e+00, 8.2995e-02, 2.3965e+00],
         [1.4980e-01, 6.3698e-01, 1.6184e+00],
         [1.3581e+01, 2.0018e+00, 2.2872e+00]],

        [[6.7564e-01, 9.5622e-01, 9.7321e-01],
         [1.2914e+01, 5.1439e-01, 1.1323e+00],
         [2.2889e+00, 3.8265e-01, 7.3


Train Diffusion:  79%|███████▉  | 3952/5001 [2:59:11<45:40,  2.61s/it][A
Train Diffusion:  79%|███████▉  | 3953/5001 [2:59:14<45:20,  2.60s/it][A
Train Diffusion:  79%|███████▉  | 3954/5001 [2:59:16<45:04,  2.58s/it][A
Train Diffusion:  79%|███████▉  | 3955/5001 [2:59:19<45:00,  2.58s/it][A
Train Diffusion:  79%|███████▉  | 3956/5001 [2:59:21<44:46,  2.57s/it][A
Train Diffusion:  79%|███████▉  | 3957/5001 [2:59:24<44:34,  2.56s/it][A
Train Diffusion:  79%|███████▉  | 3958/5001 [2:59:27<44:32,  2.56s/it][A
Train Diffusion:  79%|███████▉  | 3959/5001 [2:59:29<44:26,  2.56s/it][A
Train Diffusion:  79%|███████▉  | 3960/5001 [2:59:32<44:20,  2.56s/it][A
Train Diffusion:  79%|███████▉  | 3961/5001 [2:59:34<44:21,  2.56s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 331662672.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7153, 0.4992, 1.3055],
        [8.6764, 0.5149, 1.2760],
        [8.9871, 0.5007, 1.3053]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.4853,  0.9533,  1.0011],
         [11.8789,  0.4921,  1.1633],
         [ 1.9745,  2.9195,  1.4457],
         ...,
         [ 1.0882,  1.1202,  0.4347],
         [ 0.6297,  0.6270,  0.7174],
         [25.5523,  1.4434,  1.9046]],

        [[ 0.9310,  0.9555,  0.7877],
         [ 0.9289,  0.5493,  0.2295],
         [15.4629,  1.3454,  1.9828],
         ...,
         [ 2.1150, 13.7321,  0.8752],
         [11.4474,  0.4362,  1.1610],
         [ 2.7778,  0.9855,  0.7531]],

        [[ 1.4931,  0.8365,  4.6492],
         [ 0.7768,  1.1283,  1.2055],
         [ 0.2161,  1.2414,  3.2178],
         ...,
         [31.8442,  0.1271,  1.0453],
         [ 5.2479,  0.0395,  6.4917],
         [ 1.7265,  1.2558,  1.3566


Train Diffusion:  79%|███████▉  | 3962/5001 [2:59:37<44:15,  2.56s/it][A
Train Diffusion:  79%|███████▉  | 3963/5001 [2:59:39<44:37,  2.58s/it][A
Train Diffusion:  79%|███████▉  | 3964/5001 [2:59:42<44:28,  2.57s/it][A
Train Diffusion:  79%|███████▉  | 3965/5001 [2:59:45<44:15,  2.56s/it][A
Train Diffusion:  79%|███████▉  | 3966/5001 [2:59:47<44:19,  2.57s/it][A
Train Diffusion:  79%|███████▉  | 3967/5001 [2:59:50<44:11,  2.56s/it][A
Train Diffusion:  79%|███████▉  | 3968/5001 [2:59:52<44:02,  2.56s/it][A
Train Diffusion:  79%|███████▉  | 3969/5001 [2:59:55<43:59,  2.56s/it][A
Train Diffusion:  79%|███████▉  | 3970/5001 [2:59:57<43:54,  2.55s/it][A
Train Diffusion:  79%|███████▉  | 3971/5001 [3:00:00<43:55,  2.56s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 329709212.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6621, 0.4930, 1.3173],
        [8.6763, 0.4786, 1.2840],
        [8.7726, 0.4786, 1.3388]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.5832,  0.9501,  0.8122],
         [11.9307,  0.7713,  0.9137],
         [ 1.8462,  0.4140,  0.7798],
         ...,
         [ 4.0710,  0.1137,  0.9763],
         [ 5.5201,  0.4057,  1.5811],
         [ 2.1830,  1.9829,  3.6068]],

        [[ 1.5908,  0.8124,  4.7186],
         [ 1.0890,  0.6589,  1.1098],
         [ 0.6175,  0.7635,  1.1611],
         ...,
         [20.8016,  2.9537,  0.7327],
         [ 2.4833,  0.3052,  0.0603],
         [ 4.7978,  0.2359,  0.9071]],

        [[ 0.7266,  0.9571,  0.8347],
         [ 0.6501,  0.7209,  1.8659],
         [27.0729,  0.2507,  1.2021],
         ...,
         [ 0.5068,  1.2458,  0.7940],
         [ 0.8352,  0.3817,  0.7299],
         [ 0.1058,  1.4060,  2.6314


Train Diffusion:  79%|███████▉  | 3972/5001 [3:00:02<44:06,  2.57s/it][A
Train Diffusion:  79%|███████▉  | 3973/5001 [3:00:05<44:04,  2.57s/it][A
Train Diffusion:  79%|███████▉  | 3974/5001 [3:00:08<43:53,  2.56s/it][A
Train Diffusion:  79%|███████▉  | 3975/5001 [3:00:10<43:55,  2.57s/it][A
Train Diffusion:  80%|███████▉  | 3976/5001 [3:00:13<43:49,  2.57s/it][A
Train Diffusion:  80%|███████▉  | 3977/5001 [3:00:15<43:39,  2.56s/it][A
Train Diffusion:  80%|███████▉  | 3978/5001 [3:00:18<43:35,  2.56s/it][A
Train Diffusion:  80%|███████▉  | 3979/5001 [3:00:20<43:29,  2.55s/it][A
Train Diffusion:  80%|███████▉  | 3980/5001 [3:00:23<43:23,  2.55s/it][A
Train Diffusion:  80%|███████▉  | 3981/5001 [3:00:25<43:25,  2.55s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 323149027.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6569, 0.5142, 1.3037],
        [8.8591, 0.4890, 1.2969],
        [8.7249, 0.4969, 1.2523]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.4468,  0.9535,  0.8928],
         [ 9.1630,  0.7460,  1.3255],
         [ 1.3172,  0.7706,  1.2613],
         ...,
         [33.6205,  0.2478,  1.0684],
         [ 3.8917,  0.3596,  0.8816],
         [ 0.6896,  2.8049,  1.1937]],

        [[ 1.3231,  0.8742,  4.7251],
         [ 1.2688,  0.9084,  1.4656],
         [ 0.9352,  0.8384,  0.7213],
         ...,
         [ 3.3857,  0.2514,  0.9340],
         [ 0.5125,  0.4844,  0.5052],
         [24.7332,  1.6367,  1.8355]],

        [[ 1.1418,  0.9157,  0.9317],
         [ 1.2240,  0.4482,  1.7377],
         [28.3068,  0.1714,  1.1809],
         ...,
         [ 0.5915,  0.5467,  1.0466],
         [30.7195,  0.1061,  1.1833],
         [ 5.3857,  0.6191,  0.7288


Train Diffusion:  80%|███████▉  | 3982/5001 [3:00:28<43:18,  2.55s/it][A
Train Diffusion:  80%|███████▉  | 3983/5001 [3:00:31<43:19,  2.55s/it][A
Train Diffusion:  80%|███████▉  | 3984/5001 [3:00:33<43:16,  2.55s/it][A
Train Diffusion:  80%|███████▉  | 3985/5001 [3:00:36<43:16,  2.56s/it][A
Train Diffusion:  80%|███████▉  | 3986/5001 [3:00:38<43:17,  2.56s/it][A
Train Diffusion:  80%|███████▉  | 3987/5001 [3:00:41<43:16,  2.56s/it][A
Train Diffusion:  80%|███████▉  | 3988/5001 [3:00:43<43:15,  2.56s/it][A
Train Diffusion:  80%|███████▉  | 3989/5001 [3:00:46<43:13,  2.56s/it][A
Train Diffusion:  80%|███████▉  | 3990/5001 [3:00:49<43:14,  2.57s/it][A
Train Diffusion:  80%|███████▉  | 3991/5001 [3:00:51<43:14,  2.57s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 338226944.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6105, 0.5198, 1.3072],
        [8.6165, 0.4895, 1.2809],
        [8.9614, 0.5000, 1.3239]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[6.3759e-01, 9.5429e-01, 9.1673e-01],
         [1.1734e+01, 6.0633e-01, 1.0080e+00],
         [1.9565e+00, 4.8827e-01, 1.1682e+00],
         ...,
         [2.8402e-01, 8.8238e+00, 6.5826e-01],
         [3.1948e-05, 7.9959e-01, 2.2252e+00],
         [1.8501e-01, 1.3224e+00, 8.7979e+00]],

        [[6.5831e-01, 9.5523e-01, 8.8202e-01],
         [6.0493e-01, 7.4516e-01, 1.8565e+00],
         [2.8508e+01, 1.3391e-01, 1.1964e+00],
         ...,
         [2.9129e+00, 1.3470e-01, 6.3872e-01],
         [1.2621e+01, 1.8770e+00, 6.6474e+00],
         [3.9348e-01, 3.5893e+00, 2.7472e+00]],

        [[1.6045e+00, 8.1066e-01, 3.7957e+00],
         [1.1475e+00, 5.5788e-01, 8.5861e-01],
         [7.9631e-01, 6.6400e-01, 7.5


Train Diffusion:  80%|███████▉  | 3992/5001 [3:00:54<43:29,  2.59s/it][A
Train Diffusion:  80%|███████▉  | 3993/5001 [3:00:56<43:19,  2.58s/it][A
Train Diffusion:  80%|███████▉  | 3994/5001 [3:00:59<43:05,  2.57s/it][A
Train Diffusion:  80%|███████▉  | 3995/5001 [3:01:01<42:57,  2.56s/it][A
Train Diffusion:  80%|███████▉  | 3996/5001 [3:01:04<42:57,  2.57s/it][A
Train Diffusion:  80%|███████▉  | 3997/5001 [3:01:07<42:56,  2.57s/it][A
Train Diffusion:  80%|███████▉  | 3998/5001 [3:01:09<42:57,  2.57s/it][A
Train Diffusion:  80%|███████▉  | 3999/5001 [3:01:12<42:53,  2.57s/it][A
Train Diffusion:  80%|███████▉  | 4000/5001 [3:01:14<44:06,  2.64s/it][A
Train Diffusion:  80%|████████  | 4001/5001 [3:01:17<43:32,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 335290787.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8897, 0.5078, 1.2750],
        [8.8155, 0.4793, 1.2769],
        [8.8404, 0.4535, 1.2937]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[6.7899e-01, 9.5785e-01, 8.6760e-01],
         [6.2531e-01, 7.2336e-01, 1.8714e+00],
         [2.6338e+01, 2.3555e-01, 1.2011e+00],
         ...,
         [4.5776e+00, 1.0487e-01, 5.9471e-01],
         [4.8774e-01, 1.1019e+00, 2.2986e-01],
         [1.7660e-01, 1.9278e+00, 1.8933e+00]],

        [[1.6068e+00, 8.1514e-01, 3.8039e+00],
         [1.0862e+00, 7.0191e-01, 9.4571e-01],
         [6.8768e-01, 8.4739e-01, 9.7067e-01],
         ...,
         [6.3539e-01, 7.5049e-01, 6.7329e-01],
         [2.8502e+01, 1.5991e-02, 1.5303e+00],
         [6.5735e+00, 1.9045e-01, 9.5730e-01]],

        [[6.1740e-01, 9.5527e-01, 9.3455e-01],
         [1.2660e+01, 5.7011e-01, 1.1267e+00],
         [2.0767e+00, 4.4280e-01, 8.2


Train Diffusion:  80%|████████  | 4002/5001 [3:01:20<43:20,  2.60s/it][A
Train Diffusion:  80%|████████  | 4003/5001 [3:01:22<43:01,  2.59s/it][A
Train Diffusion:  80%|████████  | 4004/5001 [3:01:25<42:48,  2.58s/it][A
Train Diffusion:  80%|████████  | 4005/5001 [3:01:27<42:37,  2.57s/it][A
Train Diffusion:  80%|████████  | 4006/5001 [3:01:30<42:30,  2.56s/it][A
Train Diffusion:  80%|████████  | 4007/5001 [3:01:32<42:26,  2.56s/it][A
Train Diffusion:  80%|████████  | 4008/5001 [3:01:35<42:22,  2.56s/it][A
Train Diffusion:  80%|████████  | 4009/5001 [3:01:37<42:13,  2.55s/it][A
Train Diffusion:  80%|████████  | 4010/5001 [3:01:40<42:32,  2.58s/it][A
Train Diffusion:  80%|████████  | 4011/5001 [3:01:43<42:32,  2.58s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 326146544.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7007, 0.5137, 1.2776],
        [8.6198, 0.4634, 1.3514],
        [8.8734, 0.4634, 1.3032]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.4514,  0.9546,  0.8241],
         [ 1.9618,  1.2391,  0.4027],
         [ 0.8632,  0.9309,  1.8338],
         ...,
         [31.1453,  0.0898,  1.2497],
         [ 5.5091,  0.1234,  0.8372],
         [22.7541,  1.2195,  2.8692]],

        [[ 1.0950,  0.9264,  2.2167],
         [ 6.1117,  0.2992,  1.2728],
         [ 2.6568,  0.4240,  0.7833],
         ...,
         [ 4.0376,  0.1542,  0.7238],
         [ 0.5174,  1.8612,  0.6430],
         [ 0.5277,  2.7071,  0.7947]],

        [[ 1.3650,  0.8631,  0.9361],
         [ 1.5937,  0.3505,  1.1159],
         [20.3532,  0.3021,  1.1428],
         ...,
         [ 0.3936,  2.7034,  0.6705],
         [34.3030,  0.2273,  1.1095],
         [ 6.0166,  0.2151,  1.2418


Train Diffusion:  80%|████████  | 4012/5001 [3:01:45<42:19,  2.57s/it][A
Train Diffusion:  80%|████████  | 4013/5001 [3:01:48<42:15,  2.57s/it][A
Train Diffusion:  80%|████████  | 4014/5001 [3:01:50<42:09,  2.56s/it][A
Train Diffusion:  80%|████████  | 4015/5001 [3:01:53<42:01,  2.56s/it][A
Train Diffusion:  80%|████████  | 4016/5001 [3:01:55<41:57,  2.56s/it][A
Train Diffusion:  80%|████████  | 4017/5001 [3:01:58<41:54,  2.56s/it][A
Train Diffusion:  80%|████████  | 4018/5001 [3:02:01<41:48,  2.55s/it][A
Train Diffusion:  80%|████████  | 4019/5001 [3:02:03<41:49,  2.56s/it][A
Train Diffusion:  80%|████████  | 4020/5001 [3:02:06<41:51,  2.56s/it][A
Train Diffusion:  80%|████████  | 4021/5001 [3:02:08<41:43,  2.55s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 341688643.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7181, 0.5115, 1.3128],
        [8.6211, 0.4846, 1.2698],
        [8.7246, 0.4938, 1.3244]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.4281e-01, 9.5284e-01, 9.5630e-01],
         [6.7220e+00, 6.5498e-01, 8.3019e-01],
         [1.1148e+00, 1.9061e+00, 1.3500e+00],
         ...,
         [3.9562e+00, 1.3897e-01, 8.7743e-01],
         [2.7543e+01, 4.2986e-02, 1.3990e+00],
         [5.1296e+00, 5.1865e-01, 1.0132e+00]],

        [[1.2201e+00, 9.0316e-01, 8.1639e-01],
         [1.3700e+00, 4.1810e-01, 1.5786e+00],
         [2.7389e+01, 1.5194e-01, 1.1763e+00],
         ...,
         [2.1589e+01, 3.1696e-02, 1.4585e+00],
         [6.1032e+00, 4.8039e-02, 2.9159e+00],
         [2.6428e-01, 2.4104e+00, 2.4855e+00]],

        [[1.2491e+00, 8.9738e-01, 3.5621e+00],
         [2.3139e+00, 8.3702e-01, 1.2479e+00],
         [1.4265e+00, 8.6999e-01, 6.7


Train Diffusion:  80%|████████  | 4022/5001 [3:02:11<41:40,  2.55s/it][A
Train Diffusion:  80%|████████  | 4023/5001 [3:02:13<41:39,  2.56s/it][A
Train Diffusion:  80%|████████  | 4024/5001 [3:02:16<41:33,  2.55s/it][A
Train Diffusion:  80%|████████  | 4025/5001 [3:02:18<41:30,  2.55s/it][A
Train Diffusion:  81%|████████  | 4026/5001 [3:02:21<41:30,  2.55s/it][A
Train Diffusion:  81%|████████  | 4027/5001 [3:02:24<41:25,  2.55s/it][A
Train Diffusion:  81%|████████  | 4028/5001 [3:02:26<41:23,  2.55s/it][A
Train Diffusion:  81%|████████  | 4029/5001 [3:02:29<41:17,  2.55s/it][A
Train Diffusion:  81%|████████  | 4030/5001 [3:02:31<41:19,  2.55s/it][A
Train Diffusion:  81%|████████  | 4031/5001 [3:02:34<41:19,  2.56s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 333500691.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8621, 0.4832, 1.2757],
        [8.7169, 0.4908, 1.3023],
        [8.7417, 0.5234, 1.2797]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.5613e+00, 8.1780e-01, 4.8679e+00],
         [1.2509e+00, 2.9372e-02, 7.0362e-01],
         [1.6729e+00, 4.7607e-01, 1.0409e+00],
         ...,
         [3.1871e+01, 2.9658e-01, 7.1764e-01],
         [5.2939e+00, 4.9409e-02, 1.2829e+01],
         [2.5895e+00, 2.6590e+00, 2.0364e+00]],

        [[8.1109e-01, 9.6258e-01, 7.9668e-01],
         [6.9359e-01, 7.3271e-01, 7.3759e-01],
         [2.0420e-01, 7.8738e-01, 1.1911e+00],
         ...,
         [1.1149e+00, 4.4738e-01, 8.8882e-01],
         [3.0040e-06, 6.5956e-01, 2.5796e+00],
         [1.7314e-01, 1.4082e+00, 7.9862e+00]],

        [[5.3179e-01, 9.5263e-01, 6.0511e-01],
         [7.0639e+00, 1.5794e+00, 4.7492e+00],
         [9.2526e+00, 4.2043e-01, 1.4


Train Diffusion:  81%|████████  | 4032/5001 [3:02:36<41:11,  2.55s/it][A
Train Diffusion:  81%|████████  | 4033/5001 [3:02:39<41:19,  2.56s/it][A
Train Diffusion:  81%|████████  | 4034/5001 [3:02:41<41:21,  2.57s/it][A
Train Diffusion:  81%|████████  | 4035/5001 [3:02:44<41:15,  2.56s/it][A
Train Diffusion:  81%|████████  | 4036/5001 [3:02:47<41:10,  2.56s/it][A
Train Diffusion:  81%|████████  | 4037/5001 [3:02:49<41:10,  2.56s/it][A
Train Diffusion:  81%|████████  | 4038/5001 [3:02:52<41:05,  2.56s/it][A
Train Diffusion:  81%|████████  | 4039/5001 [3:02:54<41:08,  2.57s/it][A
Train Diffusion:  81%|████████  | 4040/5001 [3:02:57<41:18,  2.58s/it][A
Train Diffusion:  81%|████████  | 4041/5001 [3:02:59<41:08,  2.57s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 344331792.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7736, 0.5069, 1.2689],
        [8.9579, 0.5183, 1.3129],
        [8.6153, 0.4969, 1.3164]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.0339e-01, 9.5277e-01, 9.4668e-01],
         [9.4480e-01, 7.1166e-01, 7.5009e-01],
         [1.8349e+00, 6.5638e-01, 5.1380e-01],
         ...,
         [2.4692e-01, 5.4414e-01, 1.1442e+00],
         [3.8354e+01, 9.6299e-02, 1.2584e+00],
         [5.7759e+00, 4.5612e-01, 1.0238e+00]],

        [[8.7959e-01, 9.6067e-01, 1.2953e+00],
         [9.4599e+00, 4.8618e-01, 1.2769e+00],
         [2.0583e+00, 3.9461e-01, 1.3052e+00],
         ...,
         [2.5547e+01, 1.9860e-01, 8.8621e-01],
         [4.7705e+00, 1.3821e-01, 6.8113e-01],
         [5.9195e-01, 1.7244e+00, 1.0403e+00]],

        [[1.5241e+00, 8.2892e-01, 5.5081e-01],
         [1.7325e+00, 3.7246e-01, 3.9135e-01],
         [1.4834e-05, 1.8407e+00, 5.0


Train Diffusion:  81%|████████  | 4042/5001 [3:03:02<41:01,  2.57s/it][A
Train Diffusion:  81%|████████  | 4043/5001 [3:03:05<40:55,  2.56s/it][A
Train Diffusion:  81%|████████  | 4044/5001 [3:03:07<40:53,  2.56s/it][A
Train Diffusion:  81%|████████  | 4045/5001 [3:03:10<40:44,  2.56s/it][A
Train Diffusion:  81%|████████  | 4046/5001 [3:03:12<40:44,  2.56s/it][A
Train Diffusion:  81%|████████  | 4047/5001 [3:03:15<40:36,  2.55s/it][A
Train Diffusion:  81%|████████  | 4048/5001 [3:03:18<41:48,  2.63s/it][A
Train Diffusion:  81%|████████  | 4049/5001 [3:03:20<41:38,  2.62s/it][A
Train Diffusion:  81%|████████  | 4050/5001 [3:03:23<41:16,  2.60s/it][A
Train Diffusion:  81%|████████  | 4051/5001 [3:03:25<41:02,  2.59s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 325852601.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.5157, 0.4964, 1.2745],
        [8.8323, 0.4747, 1.3035],
        [8.7173, 0.4983, 1.3158]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[6.6527e-01, 9.5376e-01, 9.4154e-01],
         [1.2424e+01, 5.5949e-01, 1.0634e+00],
         [2.1581e+00, 4.3143e-01, 6.9962e-01],
         ...,
         [2.3634e-01, 6.1175e-01, 1.0470e+00],
         [4.2392e+01, 1.0861e-01, 1.3374e+00],
         [6.3712e+00, 1.3444e-01, 2.6801e+00]],

        [[1.6037e+00, 8.0638e-01, 3.3978e+00],
         [1.2582e+00, 5.7211e-01, 8.6825e-01],
         [6.9956e-01, 7.0140e-01, 1.2618e+00],
         ...,
         [2.3351e+00, 7.7826e-02, 1.2967e+00],
         [2.7322e+00, 1.7221e-01, 7.4093e-01],
         [3.9209e+00, 2.0887e+00, 5.9141e+00]],

        [[6.3197e-01, 9.5228e-01, 9.0365e-01],
         [5.8144e-01, 7.8196e-01, 1.8456e+00],
         [2.8013e+01, 1.3535e-01, 1.2


Train Diffusion:  81%|████████  | 4052/5001 [3:03:28<40:47,  2.58s/it][A
Train Diffusion:  81%|████████  | 4053/5001 [3:03:30<40:43,  2.58s/it][A
Train Diffusion:  81%|████████  | 4054/5001 [3:03:33<40:33,  2.57s/it][A
Train Diffusion:  81%|████████  | 4055/5001 [3:03:36<40:28,  2.57s/it][A
Train Diffusion:  81%|████████  | 4056/5001 [3:03:38<40:17,  2.56s/it][A
Train Diffusion:  81%|████████  | 4057/5001 [3:03:41<40:34,  2.58s/it][A
Train Diffusion:  81%|████████  | 4058/5001 [3:03:43<40:32,  2.58s/it][A
Train Diffusion:  81%|████████  | 4059/5001 [3:03:46<40:23,  2.57s/it][A
Train Diffusion:  81%|████████  | 4060/5001 [3:03:48<40:21,  2.57s/it][A
Train Diffusion:  81%|████████  | 4061/5001 [3:03:51<40:10,  2.56s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 329112579.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8158, 0.5100, 1.3140],
        [8.6912, 0.5071, 1.3005],
        [8.5746, 0.4834, 1.3071]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.5515e+00, 8.2596e-01, 5.4666e+00],
         [8.0396e-01, 7.7151e-01, 1.3580e+00],
         [6.4667e-01, 7.2074e-01, 1.2581e+00],
         ...,
         [1.8517e-01, 1.1110e+00, 3.8736e+00],
         [1.6683e-01, 5.1848e-01, 5.5936e-01],
         [1.7834e-02, 3.4301e+00, 1.0446e+00]],

        [[8.3149e-01, 9.6145e-01, 7.9198e-01],
         [7.7487e-01, 6.2211e-01, 1.8802e+00],
         [2.7621e+01, 2.3990e-01, 1.1970e+00],
         ...,
         [2.0799e+01, 2.5576e-01, 1.0846e+00],
         [3.6784e+00, 3.8516e-02, 7.7886e+00],
         [3.8599e+00, 1.5652e+00, 1.1494e+00]],

        [[5.2225e-01, 9.5141e-01, 8.0657e-01],
         [1.1385e+01, 8.0921e-01, 7.9688e-01],
         [1.6787e+00, 4.3806e-01, 7.6


Train Diffusion:  81%|████████  | 4062/5001 [3:03:53<40:04,  2.56s/it][A
Train Diffusion:  81%|████████  | 4063/5001 [3:03:56<40:05,  2.56s/it][A
Train Diffusion:  81%|████████▏ | 4064/5001 [3:03:59<39:57,  2.56s/it][A
Train Diffusion:  81%|████████▏ | 4065/5001 [3:04:01<39:50,  2.55s/it][A
Train Diffusion:  81%|████████▏ | 4066/5001 [3:04:04<39:53,  2.56s/it][A
Train Diffusion:  81%|████████▏ | 4067/5001 [3:04:06<39:50,  2.56s/it][A
Train Diffusion:  81%|████████▏ | 4068/5001 [3:04:09<39:45,  2.56s/it][A
Train Diffusion:  81%|████████▏ | 4069/5001 [3:04:11<39:47,  2.56s/it][A
Train Diffusion:  81%|████████▏ | 4070/5001 [3:04:14<39:39,  2.56s/it][A
Train Diffusion:  81%|████████▏ | 4071/5001 [3:04:17<39:41,  2.56s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 336906115.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6493, 0.5251, 1.3054],
        [8.6003, 0.4998, 1.3013],
        [8.9365, 0.4774, 1.2670]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.5565,  0.8240,  5.4346],
         [ 0.8999,  0.5899,  1.2259],
         [ 0.7137,  0.7333,  1.1455],
         ...,
         [ 3.1921,  0.1142,  0.8187],
         [ 5.3733,  0.0686,  0.7018],
         [ 1.5838,  1.6867,  1.6593]],

        [[ 0.5295,  0.9533,  0.7497],
         [10.9741,  0.9708,  0.9003],
         [ 1.4571,  0.4821,  0.7974],
         ...,
         [ 0.5805,  5.1043,  2.3816],
         [ 1.4192,  0.4728,  1.2094],
         [10.6222,  2.3080,  1.9213]],

        [[ 0.8184,  0.9632,  0.7940],
         [ 0.7374,  0.6500,  1.8195],
         [27.3497,  0.2535,  1.1931],
         ...,
         [ 0.1657,  0.5260,  1.0343],
         [12.5457,  0.3179,  0.1577],
         [ 5.5105,  0.2232,  0.8964


Train Diffusion:  81%|████████▏ | 4072/5001 [3:04:19<39:35,  2.56s/it][A
Train Diffusion:  81%|████████▏ | 4073/5001 [3:04:22<39:33,  2.56s/it][A
Train Diffusion:  81%|████████▏ | 4074/5001 [3:04:24<39:31,  2.56s/it][A
Train Diffusion:  81%|████████▏ | 4075/5001 [3:04:27<39:28,  2.56s/it][A
Train Diffusion:  82%|████████▏ | 4076/5001 [3:04:29<39:21,  2.55s/it][A
Train Diffusion:  82%|████████▏ | 4077/5001 [3:04:32<39:22,  2.56s/it][A
Train Diffusion:  82%|████████▏ | 4078/5001 [3:04:34<39:24,  2.56s/it][A
Train Diffusion:  82%|████████▏ | 4079/5001 [3:04:37<39:16,  2.56s/it][A
Train Diffusion:  82%|████████▏ | 4080/5001 [3:04:40<39:20,  2.56s/it][A
Train Diffusion:  82%|████████▏ | 4081/5001 [3:04:42<39:17,  2.56s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 323462284.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6237, 0.5056, 1.2918],
        [8.8641, 0.5230, 1.3039],
        [8.7032, 0.5148, 1.2948]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.5931e+00, 8.1705e-01, 4.6415e+00],
         [9.6531e-01, 7.0858e-01, 1.1222e+00],
         [6.4322e-01, 7.0306e-01, 1.3231e+00],
         ...,
         [2.4141e+00, 7.6076e-02, 5.5657e+00],
         [2.8961e+00, 2.9163e-03, 8.9179e-01],
         [4.0514e+00, 6.3366e-01, 1.3413e+00]],

        [[5.8281e-01, 9.5010e-01, 8.6655e-01],
         [1.2181e+01, 6.6443e-01, 8.4836e-01],
         [1.9553e+00, 4.3248e-01, 7.3051e-01],
         ...,
         [2.8480e-02, 5.9002e-01, 3.8979e+00],
         [2.0186e-01, 5.5327e-01, 2.4446e+00],
         [1.1182e-02, 2.3120e+00, 2.7095e+00]],

        [[7.2576e-01, 9.5714e-01, 8.3676e-01],
         [6.6554e-01, 6.8896e-01, 1.9414e+00],
         [2.7890e+01, 2.0153e-01, 1.2


Train Diffusion:  82%|████████▏ | 4082/5001 [3:04:45<39:13,  2.56s/it][A
Train Diffusion:  82%|████████▏ | 4083/5001 [3:04:47<39:13,  2.56s/it][A
Train Diffusion:  82%|████████▏ | 4084/5001 [3:04:50<39:06,  2.56s/it][A
Train Diffusion:  82%|████████▏ | 4085/5001 [3:04:52<39:03,  2.56s/it][A
Train Diffusion:  82%|████████▏ | 4086/5001 [3:04:55<39:04,  2.56s/it][A
Train Diffusion:  82%|████████▏ | 4087/5001 [3:04:58<39:16,  2.58s/it][A
Train Diffusion:  82%|████████▏ | 4088/5001 [3:05:00<39:19,  2.58s/it][A
Train Diffusion:  82%|████████▏ | 4089/5001 [3:05:03<39:11,  2.58s/it][A
Train Diffusion:  82%|████████▏ | 4090/5001 [3:05:05<39:00,  2.57s/it][A
Train Diffusion:  82%|████████▏ | 4091/5001 [3:05:08<38:55,  2.57s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 335603180.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6556, 0.5239, 1.3079],
        [8.8956, 0.5028, 1.3014],
        [8.5815, 0.5256, 1.2817]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.5062,  0.9499,  0.8258],
         [11.6835,  0.7753,  0.8807],
         [ 1.6963,  0.4368,  0.8338],
         ...,
         [ 2.9579,  0.3276,  0.7528],
         [ 0.4635,  1.2773,  0.9336],
         [ 1.2882,  6.6261,  1.0426]],

        [[ 1.5244,  0.8262,  5.2553],
         [ 0.7965,  0.7826,  1.3628],
         [ 0.6243,  0.7332,  1.2084],
         ...,
         [34.8727,  0.1185,  1.1440],
         [ 5.1391,  0.1717,  0.2243],
         [12.2876,  2.9520,  2.6989]],

        [[ 0.8736,  0.9585,  0.7840],
         [ 0.8082,  0.6123,  1.8820],
         [27.9726,  0.2158,  1.1952],
         ...,
         [ 0.7942,  0.4565,  0.7269],
         [34.5890,  0.1866,  0.9620],
         [ 5.9350,  0.2737,  1.1497


Train Diffusion:  82%|████████▏ | 4092/5001 [3:05:10<38:45,  2.56s/it][A
Train Diffusion:  82%|████████▏ | 4093/5001 [3:05:13<38:43,  2.56s/it][A
Train Diffusion:  82%|████████▏ | 4094/5001 [3:05:15<38:34,  2.55s/it][A
Train Diffusion:  82%|████████▏ | 4095/5001 [3:05:18<38:31,  2.55s/it][A
Train Diffusion:  82%|████████▏ | 4096/5001 [3:05:21<39:04,  2.59s/it][A
Train Diffusion:  82%|████████▏ | 4097/5001 [3:05:23<39:25,  2.62s/it][A
Train Diffusion:  82%|████████▏ | 4098/5001 [3:05:26<39:09,  2.60s/it][A
Train Diffusion:  82%|████████▏ | 4099/5001 [3:05:28<38:58,  2.59s/it][A
Train Diffusion:  82%|████████▏ | 4100/5001 [3:05:31<38:45,  2.58s/it][A
Train Diffusion:  82%|████████▏ | 4101/5001 [3:05:34<38:37,  2.58s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 335763209.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8036, 0.5075, 1.3119],
        [8.7490, 0.4985, 1.3092],
        [8.8172, 0.4958, 1.3010]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[6.3089e-01, 9.5416e-01, 9.0187e-01],
         [5.9762e-01, 7.0674e-01, 1.7906e+00],
         [2.5133e+01, 2.5705e-01, 1.2002e+00],
         ...,
         [1.3789e-01, 1.5628e+01, 8.8763e-01],
         [2.7987e+01, 1.6031e-01, 1.3399e+00],
         [5.3951e+00, 3.1700e-01, 2.7932e+00]],

        [[6.6588e-01, 9.5574e-01, 9.7205e-01],
         [1.2862e+01, 4.9448e-01, 1.1792e+00],
         [2.1434e+00, 5.0243e-01, 8.8239e-01],
         ...,
         [3.4627e+00, 1.6628e-01, 6.7374e-01],
         [5.7615e-01, 7.7238e-01, 1.6697e+00],
         [7.2360e-03, 2.4787e+00, 2.8582e+00]],

        [[1.6067e+00, 8.1600e-01, 3.3883e+00],
         [1.0154e+00, 7.3698e-01, 9.4508e-01],
         [6.8633e-01, 9.1723e-01, 7.9


Train Diffusion:  82%|████████▏ | 4102/5001 [3:05:36<38:30,  2.57s/it][A
Train Diffusion:  82%|████████▏ | 4103/5001 [3:05:39<38:23,  2.57s/it][A
Train Diffusion:  82%|████████▏ | 4104/5001 [3:05:41<38:55,  2.60s/it][A
Train Diffusion:  82%|████████▏ | 4105/5001 [3:05:44<38:52,  2.60s/it][A
Train Diffusion:  82%|████████▏ | 4106/5001 [3:05:47<38:36,  2.59s/it][A
Train Diffusion:  82%|████████▏ | 4107/5001 [3:05:49<38:33,  2.59s/it][A
Train Diffusion:  82%|████████▏ | 4108/5001 [3:05:52<38:25,  2.58s/it][A
Train Diffusion:  82%|████████▏ | 4109/5001 [3:05:54<38:13,  2.57s/it][A
Train Diffusion:  82%|████████▏ | 4110/5001 [3:05:57<38:06,  2.57s/it][A
Train Diffusion:  82%|████████▏ | 4111/5001 [3:05:59<38:00,  2.56s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 323380432.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.9472, 0.4766, 1.3276],
        [8.6463, 0.5068, 1.3079],
        [8.7771, 0.4764, 1.3194]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.4197e-01, 9.5723e-01, 1.0992e+00],
         [4.9792e-01, 6.6456e-01, 2.2266e-01],
         [2.0106e-05, 2.4114e+00, 2.0306e-01],
         ...,
         [2.7138e+01, 2.8876e-01, 8.1894e-01],
         [4.0093e+00, 3.7592e-01, 6.4541e-01],
         [7.4828e-01, 3.4896e+00, 1.3785e+00]],

        [[1.2612e+00, 8.9840e-01, 2.6590e+00],
         [1.5718e+00, 9.2827e-01, 1.0624e+00],
         [1.0627e+00, 2.2169e+00, 6.8876e-01],
         ...,
         [5.0941e-01, 6.2245e-01, 1.0250e+00],
         [3.4873e+01, 2.3260e-01, 6.3670e-01],
         [4.1784e+00, 1.7727e+00, 5.8546e-01]],

        [[1.2119e+00, 9.0808e-01, 2.7449e+00],
         [3.7681e+00, 8.1870e-01, 1.3467e+00],
         [1.8316e+00, 1.5545e-01, 5.3


Train Diffusion:  82%|████████▏ | 4112/5001 [3:06:02<37:59,  2.56s/it][A
Train Diffusion:  82%|████████▏ | 4113/5001 [3:06:04<37:54,  2.56s/it][A
Train Diffusion:  82%|████████▏ | 4114/5001 [3:06:07<37:50,  2.56s/it][A
Train Diffusion:  82%|████████▏ | 4115/5001 [3:06:10<37:45,  2.56s/it][A
Train Diffusion:  82%|████████▏ | 4116/5001 [3:06:12<37:45,  2.56s/it][A
Train Diffusion:  82%|████████▏ | 4117/5001 [3:06:15<37:39,  2.56s/it][A
Train Diffusion:  82%|████████▏ | 4118/5001 [3:06:17<37:38,  2.56s/it][A
Train Diffusion:  82%|████████▏ | 4119/5001 [3:06:20<37:31,  2.55s/it][A
Train Diffusion:  82%|████████▏ | 4120/5001 [3:06:22<37:27,  2.55s/it][A
Train Diffusion:  82%|████████▏ | 4121/5001 [3:06:25<37:26,  2.55s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 335068288.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8969, 0.4895, 1.3176],
        [8.6647, 0.5031, 1.2849],
        [8.8584, 0.4826, 1.3129]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.4452e-01, 9.5599e-01, 8.9793e-01],
         [9.8444e+00, 7.3568e-01, 8.8115e-01],
         [1.5499e+00, 4.4885e-01, 7.4379e-01],
         ...,
         [2.2451e+00, 1.5705e+00, 1.2252e+00],
         [7.9401e+00, 2.5115e+00, 4.2351e+00],
         [4.3177e+00, 3.2505e+00, 1.8619e+00]],

        [[1.2903e+00, 8.8727e-01, 4.4156e+00],
         [1.1328e+00, 9.3140e-01, 1.6164e+00],
         [7.6331e-01, 7.4686e-01, 1.2058e+00],
         ...,
         [2.8833e+01, 5.6555e-02, 1.3281e+00],
         [5.9864e+00, 3.3293e-02, 5.5884e+00],
         [8.5365e-01, 1.7121e+00, 1.4382e+00]],

        [[1.1772e+00, 9.1032e-01, 8.9120e-01],
         [1.2983e+00, 4.2826e-01, 1.9293e+00],
         [2.7089e+01, 2.5882e-01, 1.1


Train Diffusion:  82%|████████▏ | 4122/5001 [3:06:27<37:28,  2.56s/it][A
Train Diffusion:  82%|████████▏ | 4123/5001 [3:06:30<37:28,  2.56s/it][A
Train Diffusion:  82%|████████▏ | 4124/5001 [3:06:33<37:29,  2.56s/it][A
Train Diffusion:  82%|████████▏ | 4125/5001 [3:06:35<37:25,  2.56s/it][A
Train Diffusion:  83%|████████▎ | 4126/5001 [3:06:38<37:17,  2.56s/it][A
Train Diffusion:  83%|████████▎ | 4127/5001 [3:06:40<37:19,  2.56s/it][A
Train Diffusion:  83%|████████▎ | 4128/5001 [3:06:43<37:12,  2.56s/it][A
Train Diffusion:  83%|████████▎ | 4129/5001 [3:06:45<37:10,  2.56s/it][A
Train Diffusion:  83%|████████▎ | 4130/5001 [3:06:48<37:06,  2.56s/it][A
Train Diffusion:  83%|████████▎ | 4131/5001 [3:06:51<37:06,  2.56s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 327408656.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8073, 0.5191, 1.3041],
        [8.7949, 0.4759, 1.3170],
        [8.7232, 0.4945, 1.2701]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.5968e+00, 8.0581e-01, 2.1993e+00],
         [1.4596e+00, 4.8961e-01, 7.7347e-01],
         [7.2815e-01, 6.9814e-01, 8.6543e-01],
         ...,
         [4.2955e-01, 5.0450e-01, 7.2145e-01],
         [3.0895e+01, 1.3010e-01, 1.1111e+00],
         [5.4606e+00, 3.0144e-01, 1.2555e+00]],

        [[7.2228e-01, 9.5888e-01, 1.0655e+00],
         [1.2305e+01, 4.6787e-01, 1.2070e+00],
         [2.2964e+00, 4.6231e-01, 1.2129e+00],
         ...,
         [3.0792e-01, 1.7423e+00, 1.9072e+00],
         [5.5925e-01, 3.7734e-01, 1.6834e+00],
         [1.6672e-02, 1.7998e+00, 3.1551e+00]],

        [[5.8438e-01, 9.5212e-01, 9.3570e-01],
         [5.7185e-01, 8.3903e-01, 1.6866e+00],
         [2.8116e+01, 1.9345e-01, 1.1


Train Diffusion:  83%|████████▎ | 4132/5001 [3:06:53<37:00,  2.55s/it][A
Train Diffusion:  83%|████████▎ | 4133/5001 [3:06:56<36:58,  2.56s/it][A
Train Diffusion:  83%|████████▎ | 4134/5001 [3:06:58<36:52,  2.55s/it][A
Train Diffusion:  83%|████████▎ | 4135/5001 [3:07:01<37:17,  2.58s/it][A
Train Diffusion:  83%|████████▎ | 4136/5001 [3:07:03<37:07,  2.58s/it][A
Train Diffusion:  83%|████████▎ | 4137/5001 [3:07:06<36:55,  2.56s/it][A
Train Diffusion:  83%|████████▎ | 4138/5001 [3:07:09<36:54,  2.57s/it][A
Train Diffusion:  83%|████████▎ | 4139/5001 [3:07:11<36:48,  2.56s/it][A
Train Diffusion:  83%|████████▎ | 4140/5001 [3:07:14<36:45,  2.56s/it][A
Train Diffusion:  83%|████████▎ | 4141/5001 [3:07:16<36:39,  2.56s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 323386476.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6609, 0.5188, 1.2804],
        [8.8123, 0.4904, 1.3143],
        [8.7750, 0.4983, 1.2840]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.8581,  0.9617,  0.7867],
         [ 0.7787,  0.6143,  1.4745],
         [25.3405,  0.2793,  1.1746],
         ...,
         [29.6284,  0.0574,  1.2927],
         [ 5.8866,  0.0721,  0.7351],
         [ 0.6350,  1.8713,  0.9609]],

        [[ 0.5109,  0.9522,  0.6877],
         [ 9.8046,  1.2171,  2.0124],
         [ 0.9824,  0.7667,  1.1738],
         ...,
         [ 0.2116,  4.7579,  0.6016],
         [28.4279,  0.4388,  1.2461],
         [ 5.0465,  0.3991,  1.3314]],

        [[ 1.5360,  0.8269,  5.6524],
         [ 0.8541,  0.3980,  1.1656],
         [ 0.9184,  0.7638,  0.8379],
         ...,
         [ 4.0763,  0.1311,  0.6722],
         [ 0.1603,  2.9823,  1.1859],
         [21.8334,  1.4016,  2.0081


Train Diffusion:  83%|████████▎ | 4142/5001 [3:07:19<36:33,  2.55s/it][A
Train Diffusion:  83%|████████▎ | 4143/5001 [3:07:21<36:31,  2.55s/it][A
Train Diffusion:  83%|████████▎ | 4144/5001 [3:07:24<37:10,  2.60s/it][A
Train Diffusion:  83%|████████▎ | 4145/5001 [3:07:27<37:09,  2.60s/it][A
Train Diffusion:  83%|████████▎ | 4146/5001 [3:07:29<36:53,  2.59s/it][A
Train Diffusion:  83%|████████▎ | 4147/5001 [3:07:32<36:45,  2.58s/it][A
Train Diffusion:  83%|████████▎ | 4148/5001 [3:07:34<36:36,  2.57s/it][A
Train Diffusion:  83%|████████▎ | 4149/5001 [3:07:37<36:34,  2.58s/it][A
Train Diffusion:  83%|████████▎ | 4150/5001 [3:07:39<36:31,  2.58s/it][A
Train Diffusion:  83%|████████▎ | 4151/5001 [3:07:42<36:42,  2.59s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 327424320.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6021, 0.4826, 1.2944],
        [8.9187, 0.4819, 1.3086],
        [8.9121, 0.4897, 1.2925]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.5390e+00, 8.3255e-01, 5.7826e+00],
         [9.1501e-01, 1.1967e+00, 1.3070e+00],
         [3.9134e-01, 1.0437e+01, 2.4028e+00],
         ...,
         [7.7886e-01, 6.7187e-02, 9.3161e-01],
         [4.1165e+00, 9.6803e-02, 6.3393e-01],
         [1.7163e-03, 2.1127e+00, 1.0125e+00]],

        [[8.5770e-01, 9.6372e-01, 7.9241e-01],
         [8.7921e-01, 5.4333e-01, 2.2394e-01],
         [1.6276e+01, 1.4218e+00, 1.4884e+00],
         ...,
         [1.3634e+01, 3.6761e-01, 1.1728e+00],
         [1.3784e+00, 9.4391e-01, 9.5822e+00],
         [8.6477e-01, 2.4523e+00, 1.8984e+00]],

        [[5.1112e-01, 9.5429e-01, 9.6873e-01],
         [9.2896e+00, 5.3231e-01, 1.4832e+00],
         [1.5101e+00, 8.6346e-01, 1.8


Train Diffusion:  83%|████████▎ | 4152/5001 [3:07:45<36:35,  2.59s/it][A
Train Diffusion:  83%|████████▎ | 4153/5001 [3:07:47<36:23,  2.58s/it][A
Train Diffusion:  83%|████████▎ | 4154/5001 [3:07:50<36:20,  2.57s/it][A
Train Diffusion:  83%|████████▎ | 4155/5001 [3:07:52<36:11,  2.57s/it][A
Train Diffusion:  83%|████████▎ | 4156/5001 [3:07:55<36:05,  2.56s/it][A
Train Diffusion:  83%|████████▎ | 4157/5001 [3:07:57<35:57,  2.56s/it][A
Train Diffusion:  83%|████████▎ | 4158/5001 [3:08:00<35:56,  2.56s/it][A
Train Diffusion:  83%|████████▎ | 4159/5001 [3:08:03<35:52,  2.56s/it][A
Train Diffusion:  83%|████████▎ | 4160/5001 [3:08:05<35:49,  2.56s/it][A
Train Diffusion:  83%|████████▎ | 4161/5001 [3:08:08<35:45,  2.55s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 337994032.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7307, 0.4895, 1.3248],
        [8.7063, 0.4868, 1.2733],
        [8.7775, 0.5092, 1.3135]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.5417,  0.9519,  0.9632],
         [12.4255,  0.5246,  1.1813],
         [ 1.8510,  0.7622,  1.6498],
         ...,
         [ 0.2196,  0.4877,  1.5912],
         [32.3107,  0.0494,  1.5165],
         [ 6.4746,  0.2619,  0.7093]],

        [[ 1.5703,  0.8261,  4.2705],
         [ 0.8158,  0.9575,  1.0789],
         [ 0.1488,  7.8232,  3.0789],
         ...,
         [16.5789,  0.2682,  0.9025],
         [ 4.1130,  0.1826,  0.6064],
         [ 3.3420,  2.2685,  1.5619]],

        [[ 0.7923,  0.9620,  0.8010],
         [ 0.7538,  0.6336,  0.2038],
         [15.5543,  1.4304,  0.5355],
         ...,
         [ 0.2227,  1.0745,  3.8166],
         [ 0.1751,  0.6066,  0.5311],
         [12.8505,  2.0456,  1.6368


Train Diffusion:  83%|████████▎ | 4162/5001 [3:08:10<35:41,  2.55s/it][A
Train Diffusion:  83%|████████▎ | 4163/5001 [3:08:13<35:43,  2.56s/it][A
Train Diffusion:  83%|████████▎ | 4164/5001 [3:08:15<35:39,  2.56s/it][A
Train Diffusion:  83%|████████▎ | 4165/5001 [3:08:18<35:33,  2.55s/it][A
Train Diffusion:  83%|████████▎ | 4166/5001 [3:08:20<35:32,  2.55s/it][A
Train Diffusion:  83%|████████▎ | 4167/5001 [3:08:23<35:29,  2.55s/it][A
Train Diffusion:  83%|████████▎ | 4168/5001 [3:08:25<35:25,  2.55s/it][A
Train Diffusion:  83%|████████▎ | 4169/5001 [3:08:28<35:24,  2.55s/it][A
Train Diffusion:  83%|████████▎ | 4170/5001 [3:08:31<35:20,  2.55s/it][A
Train Diffusion:  83%|████████▎ | 4171/5001 [3:08:33<35:29,  2.57s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 323173670.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7879, 0.5165, 1.3022],
        [8.7445, 0.4851, 1.3261],
        [8.6231, 0.4941, 1.3238]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.6717,  0.9566,  0.8745],
         [ 0.6132,  0.7506,  1.8966],
         [27.7215,  0.2181,  1.1995],
         ...,
         [ 0.6080,  6.3936,  1.0434],
         [28.9172,  0.5658,  4.3885],
         [ 3.1858,  2.8084,  2.0618]],

        [[ 0.6239,  0.9545,  0.8760],
         [12.1419,  0.6540,  0.9454],
         [ 1.9934,  0.4285,  0.7406],
         ...,
         [ 1.0047,  1.7605,  0.1522],
         [ 2.1368,  0.1507,  0.1300],
         [ 0.0687,  3.7869,  0.2473]],

        [[ 1.6080,  0.8106,  4.1222],
         [ 1.1534,  0.5908,  0.9589],
         [ 0.6849,  0.7197,  1.3075],
         ...,
         [19.1282,  1.2409,  1.1702],
         [ 1.1290,  2.3579,  0.7814],
         [ 0.4999,  1.2242,  3.7337


Train Diffusion:  83%|████████▎ | 4172/5001 [3:08:36<35:24,  2.56s/it][A
Train Diffusion:  83%|████████▎ | 4173/5001 [3:08:38<35:16,  2.56s/it][A
Train Diffusion:  83%|████████▎ | 4174/5001 [3:08:41<35:23,  2.57s/it][A
Train Diffusion:  83%|████████▎ | 4175/5001 [3:08:43<35:20,  2.57s/it][A
Train Diffusion:  84%|████████▎ | 4176/5001 [3:08:46<35:14,  2.56s/it][A
Train Diffusion:  84%|████████▎ | 4177/5001 [3:08:49<35:08,  2.56s/it][A
Train Diffusion:  84%|████████▎ | 4178/5001 [3:08:51<35:06,  2.56s/it][A
Train Diffusion:  84%|████████▎ | 4179/5001 [3:08:54<34:59,  2.55s/it][A
Train Diffusion:  84%|████████▎ | 4180/5001 [3:08:56<34:55,  2.55s/it][A
Train Diffusion:  84%|████████▎ | 4181/5001 [3:08:59<34:53,  2.55s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 310865548.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7465, 0.5186, 1.3071],
        [8.5699, 0.4975, 1.3365],
        [8.9018, 0.4927, 1.3322]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[6.4591e-01, 9.5512e-01, 6.3104e-01],
         [1.0177e+01, 1.2446e+00, 1.3492e+00],
         [1.1681e+00, 6.4837e-01, 1.0673e+00],
         ...,
         [6.3147e+00, 1.8091e-03, 7.5742e-01],
         [3.2893e+00, 1.4290e-01, 6.3150e-01],
         [7.1274e-01, 2.1759e+00, 8.1154e-01]],

        [[1.6075e+00, 7.9862e-01, 4.8775e+00],
         [1.4179e+00, 2.1274e-01, 1.0032e+00],
         [7.2249e-01, 7.0608e-01, 1.0313e+00],
         ...,
         [1.7967e+01, 4.1958e-01, 5.9179e-01],
         [2.0220e+00, 1.6586e-01, 1.0135e+01],
         [3.8767e+00, 1.0293e+00, 1.7296e+00]],

        [[6.5002e-01, 9.5531e-01, 8.7098e-01],
         [6.1235e-01, 8.4092e-01, 1.5639e+00],
         [2.7669e+01, 2.0697e-01, 1.1


Train Diffusion:  84%|████████▎ | 4182/5001 [3:09:01<34:53,  2.56s/it][A
Train Diffusion:  84%|████████▎ | 4183/5001 [3:09:04<35:23,  2.60s/it][A
Train Diffusion:  84%|████████▎ | 4184/5001 [3:09:07<35:08,  2.58s/it][A
Train Diffusion:  84%|████████▎ | 4185/5001 [3:09:09<34:55,  2.57s/it][A
Train Diffusion:  84%|████████▎ | 4186/5001 [3:09:12<34:53,  2.57s/it][A
Train Diffusion:  84%|████████▎ | 4187/5001 [3:09:14<34:45,  2.56s/it][A
Train Diffusion:  84%|████████▎ | 4188/5001 [3:09:17<34:39,  2.56s/it][A
Train Diffusion:  84%|████████▍ | 4189/5001 [3:09:19<34:41,  2.56s/it][A
Train Diffusion:  84%|████████▍ | 4190/5001 [3:09:22<34:33,  2.56s/it][A
Train Diffusion:  84%|████████▍ | 4191/5001 [3:09:24<34:31,  2.56s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 330543504.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8247, 0.5113, 1.2799],
        [8.6157, 0.4990, 1.2751],
        [8.7605, 0.4840, 1.2879]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.7819e-01, 9.5086e-01, 6.0913e-01],
         [8.7148e+00, 1.4458e+00, 1.7946e+00],
         [7.5800e-01, 7.7799e-01, 9.3665e-01],
         ...,
         [1.7076e+00, 4.4734e-01, 1.3410e+00],
         [3.1359e-01, 6.5534e-01, 7.3602e-01],
         [3.4073e-03, 1.5891e+00, 3.1474e+00]],

        [[7.3362e-01, 9.5849e-01, 8.1562e-01],
         [6.1499e-01, 8.0717e-01, 1.3629e+00],
         [2.0244e+01, 3.2938e-01, 1.1513e+00],
         ...,
         [3.1698e+01, 9.8062e-02, 1.2371e+00],
         [5.4231e+00, 1.1248e-01, 5.4755e-01],
         [3.7290e+00, 1.7635e+00, 6.9393e+00]],

        [[1.5902e+00, 8.0305e-01, 4.8158e+00],
         [1.4381e+00, 4.7492e-02, 8.0761e-01],
         [1.1368e+00, 6.2755e-01, 8.4


Train Diffusion:  84%|████████▍ | 4192/5001 [3:09:27<35:06,  2.60s/it][A
Train Diffusion:  84%|████████▍ | 4193/5001 [3:09:30<34:53,  2.59s/it][A
Train Diffusion:  84%|████████▍ | 4194/5001 [3:09:32<34:49,  2.59s/it][A
Train Diffusion:  84%|████████▍ | 4195/5001 [3:09:35<34:35,  2.57s/it][A
Train Diffusion:  84%|████████▍ | 4196/5001 [3:09:37<34:26,  2.57s/it][A
Train Diffusion:  84%|████████▍ | 4197/5001 [3:09:40<34:16,  2.56s/it][A
Train Diffusion:  84%|████████▍ | 4198/5001 [3:09:43<34:31,  2.58s/it][A
Train Diffusion:  84%|████████▍ | 4199/5001 [3:09:45<34:27,  2.58s/it][A
Train Diffusion:  84%|████████▍ | 4200/5001 [3:09:48<34:20,  2.57s/it][A
Train Diffusion:  84%|████████▍ | 4201/5001 [3:09:50<34:13,  2.57s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 316526875.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7704, 0.4959, 1.2940],
        [8.7907, 0.5103, 1.2756],
        [8.6161, 0.5128, 1.2923]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.7427,  0.9579,  0.8253],
         [ 0.6614,  0.7009,  1.8363],
         [26.9594,  0.2583,  1.2003],
         ...,
         [ 0.2659,  0.5190,  1.5447],
         [31.5560,  0.0510,  1.4792],
         [ 6.3355,  0.2462,  0.7718]],

        [[ 1.5899,  0.8140,  5.1618],
         [ 1.0593,  0.5676,  1.1610],
         [ 0.6398,  0.7462,  1.1414],
         ...,
         [12.9144,  0.3715,  0.8027],
         [ 4.5402,  0.1120,  1.3101],
         [ 3.2300,  1.7225,  1.4968]],

        [[ 0.5704,  0.9496,  0.7386],
         [11.3114,  0.9504,  0.8894],
         [ 1.5901,  0.4511,  0.7730],
         ...,
         [ 0.4230,  0.7074,  3.4586],
         [ 0.1619,  0.7152,  0.2798],
         [11.6153,  2.5150,  1.3948


Train Diffusion:  84%|████████▍ | 4202/5001 [3:09:53<34:05,  2.56s/it][A
Train Diffusion:  84%|████████▍ | 4203/5001 [3:09:55<34:04,  2.56s/it][A
Train Diffusion:  84%|████████▍ | 4204/5001 [3:09:58<33:56,  2.56s/it][A
Train Diffusion:  84%|████████▍ | 4205/5001 [3:10:00<33:54,  2.56s/it][A
Train Diffusion:  84%|████████▍ | 4206/5001 [3:10:03<33:53,  2.56s/it][A
Train Diffusion:  84%|████████▍ | 4207/5001 [3:10:06<33:48,  2.56s/it][A
Train Diffusion:  84%|████████▍ | 4208/5001 [3:10:08<33:44,  2.55s/it][A
Train Diffusion:  84%|████████▍ | 4209/5001 [3:10:11<33:39,  2.55s/it][A
Train Diffusion:  84%|████████▍ | 4210/5001 [3:10:13<33:40,  2.55s/it][A
Train Diffusion:  84%|████████▍ | 4211/5001 [3:10:16<33:42,  2.56s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 322014400.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8072, 0.4901, 1.3144],
        [8.7707, 0.4818, 1.3211],
        [8.6993, 0.4919, 1.2613]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.5979e+00, 8.0576e-01, 2.2558e+00],
         [1.4378e+00, 5.0757e-01, 7.8404e-01],
         [7.1103e-01, 7.3655e-01, 1.3508e+00],
         ...,
         [1.3829e+01, 8.5568e-01, 8.5453e-02],
         [4.8711e+00, 7.2516e-02, 4.0070e-01],
         [5.6773e+00, 2.1890e+00, 8.8421e+00]],

        [[7.1413e-01, 9.5832e-01, 1.0568e+00],
         [1.2833e+01, 4.5825e-01, 1.2055e+00],
         [2.3724e+00, 3.9761e-01, 6.9166e-01],
         ...,
         [1.5174e-01, 1.0095e+01, 1.5409e+00],
         [2.2387e+01, 2.1529e-01, 1.4848e+00],
         [5.7553e+00, 2.2970e-01, 1.1575e+00]],

        [[5.9044e-01, 9.5223e-01, 9.3341e-01],
         [5.6939e-01, 8.2941e-01, 1.7383e+00],
         [2.6623e+01, 2.5665e-01, 1.2


Train Diffusion:  84%|████████▍ | 4212/5001 [3:10:18<33:37,  2.56s/it][A
Train Diffusion:  84%|████████▍ | 4213/5001 [3:10:21<33:33,  2.55s/it][A
Train Diffusion:  84%|████████▍ | 4214/5001 [3:10:23<33:30,  2.55s/it][A
Train Diffusion:  84%|████████▍ | 4215/5001 [3:10:26<33:25,  2.55s/it][A
Train Diffusion:  84%|████████▍ | 4216/5001 [3:10:29<33:22,  2.55s/it][A
Train Diffusion:  84%|████████▍ | 4217/5001 [3:10:31<33:16,  2.55s/it][A
Train Diffusion:  84%|████████▍ | 4218/5001 [3:10:34<33:17,  2.55s/it][A
Train Diffusion:  84%|████████▍ | 4219/5001 [3:10:36<33:20,  2.56s/it][A
Train Diffusion:  84%|████████▍ | 4220/5001 [3:10:39<33:18,  2.56s/it][A
Train Diffusion:  84%|████████▍ | 4221/5001 [3:10:41<33:20,  2.56s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 335646192.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8783, 0.5007, 1.3202],
        [8.7870, 0.4927, 1.3079],
        [8.7008, 0.5224, 1.2777]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.5616,  0.8252,  5.7222],
         [ 0.8505,  0.6090,  1.1881],
         [ 0.8279,  0.8178,  0.9116],
         ...,
         [31.7440,  0.0761,  1.2628],
         [ 5.6813,  0.1045,  0.6150],
         [ 0.5280,  2.0539,  0.7621]],

        [[ 0.5302,  0.9527,  0.7268],
         [10.1947,  1.0512,  1.5027],
         [ 1.2167,  0.6649,  1.0781],
         ...,
         [ 3.4868,  0.1958,  0.8591],
         [ 0.4462,  0.8298,  0.6641],
         [22.2256,  1.4305,  2.0439]],

        [[ 0.8146,  0.9627,  0.7955],
         [ 0.7478,  0.6314,  1.6260],
         [26.8552,  0.2557,  1.1773],
         ...,
         [ 0.3664,  0.6208,  0.8106],
         [35.2752,  0.1236,  1.1438],
         [ 5.9404,  0.2822,  1.2113


Train Diffusion:  84%|████████▍ | 4222/5001 [3:10:44<33:19,  2.57s/it][A
Train Diffusion:  84%|████████▍ | 4223/5001 [3:10:46<33:11,  2.56s/it][A
Train Diffusion:  84%|████████▍ | 4224/5001 [3:10:49<33:20,  2.57s/it][A
Train Diffusion:  84%|████████▍ | 4225/5001 [3:10:52<33:19,  2.58s/it][A
Train Diffusion:  85%|████████▍ | 4226/5001 [3:10:54<33:12,  2.57s/it][A
Train Diffusion:  85%|████████▍ | 4227/5001 [3:10:57<33:08,  2.57s/it][A
Train Diffusion:  85%|████████▍ | 4228/5001 [3:10:59<33:02,  2.57s/it][A
Train Diffusion:  85%|████████▍ | 4229/5001 [3:11:02<32:59,  2.56s/it][A
Train Diffusion:  85%|████████▍ | 4230/5001 [3:11:05<33:13,  2.59s/it][A
Train Diffusion:  85%|████████▍ | 4231/5001 [3:11:07<33:12,  2.59s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 321338160.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7283, 0.4903, 1.2798],
        [8.6223, 0.4933, 1.2931],
        [8.9823, 0.4790, 1.3121]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[6.0377e-01, 9.5299e-01, 8.9682e-01],
         [1.1782e+01, 6.2044e-01, 9.5855e-01],
         [1.9154e+00, 4.5621e-01, 7.3001e-01],
         ...,
         [3.8878e+00, 1.3376e-01, 7.0801e-01],
         [4.6373e-03, 7.4065e-01, 1.9716e+00],
         [1.9314e+00, 2.0275e+00, 1.7846e+00]],

        [[6.9698e-01, 9.5729e-01, 8.5432e-01],
         [6.4626e-01, 6.9704e-01, 1.9037e+00],
         [2.8187e+01, 1.7738e-01, 1.1970e+00],
         ...,
         [3.0182e-01, 4.2023e+00, 4.6420e+00],
         [5.0220e-01, 2.8442e+00, 5.8592e-01],
         [1.4352e+00, 7.3481e-01, 9.5901e+00]],

        [[1.6023e+00, 8.1733e-01, 4.3373e+00],
         [9.7762e-01, 6.5882e-01, 9.6961e-01],
         [7.6040e-01, 6.9682e-01, 1.2


Train Diffusion:  85%|████████▍ | 4232/5001 [3:11:10<32:58,  2.57s/it][A
Train Diffusion:  85%|████████▍ | 4233/5001 [3:11:12<32:51,  2.57s/it][A
Train Diffusion:  85%|████████▍ | 4234/5001 [3:11:15<32:45,  2.56s/it][A
Train Diffusion:  85%|████████▍ | 4235/5001 [3:11:17<32:41,  2.56s/it][A
Train Diffusion:  85%|████████▍ | 4236/5001 [3:11:20<32:41,  2.56s/it][A
Train Diffusion:  85%|████████▍ | 4237/5001 [3:11:22<32:34,  2.56s/it][A
Train Diffusion:  85%|████████▍ | 4238/5001 [3:11:25<32:28,  2.55s/it][A
Train Diffusion:  85%|████████▍ | 4239/5001 [3:11:28<32:25,  2.55s/it][A
Train Diffusion:  85%|████████▍ | 4240/5001 [3:11:30<33:08,  2.61s/it][A
Train Diffusion:  85%|████████▍ | 4241/5001 [3:11:33<32:57,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 344567152.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6088, 0.5264, 1.2636],
        [8.8449, 0.5197, 1.2858],
        [8.9031, 0.4907, 1.2699]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.1157e-01, 9.5254e-01, 9.3809e-01],
         [8.3652e-01, 7.3344e-01, 7.0907e-01],
         [2.4995e+00, 1.5028e-01, 5.2033e-01],
         ...,
         [4.4023e+00, 1.1952e-01, 7.7459e-01],
         [6.2087e-01, 5.3503e-01, 2.8693e+00],
         [9.5422e-03, 2.7089e+00, 2.4129e+00]],

        [[8.5726e-01, 9.6189e-01, 1.2511e+00],
         [1.0337e+01, 4.4710e-01, 1.2680e+00],
         [2.3011e+00, 3.9189e-01, 6.6171e-01],
         ...,
         [4.3722e-01, 1.7797e+00, 7.1109e-01],
         [3.3676e+01, 2.5866e-01, 6.9494e-01],
         [4.1307e+00, 1.3763e+00, 9.5583e+00]],

        [[1.5367e+00, 8.2493e-01, 5.6619e-01],
         [1.7537e+00, 3.6792e-01, 5.0927e-01],
         [9.1158e-05, 1.3792e+00, 6.0


Train Diffusion:  85%|████████▍ | 4242/5001 [3:11:35<32:41,  2.58s/it][A
Train Diffusion:  85%|████████▍ | 4243/5001 [3:11:38<32:29,  2.57s/it][A
Train Diffusion:  85%|████████▍ | 4244/5001 [3:11:40<32:24,  2.57s/it][A
Train Diffusion:  85%|████████▍ | 4245/5001 [3:11:43<32:41,  2.59s/it][A
Train Diffusion:  85%|████████▍ | 4246/5001 [3:11:46<32:35,  2.59s/it][A
Train Diffusion:  85%|████████▍ | 4247/5001 [3:11:48<32:23,  2.58s/it][A
Train Diffusion:  85%|████████▍ | 4248/5001 [3:11:51<32:14,  2.57s/it][A
Train Diffusion:  85%|████████▍ | 4249/5001 [3:11:53<32:11,  2.57s/it][A
Train Diffusion:  85%|████████▍ | 4250/5001 [3:11:56<32:04,  2.56s/it][A
Train Diffusion:  85%|████████▌ | 4251/5001 [3:11:58<32:00,  2.56s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 340985308.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7447, 0.4796, 1.3140],
        [8.8201, 0.4875, 1.2609],
        [8.7050, 0.5052, 1.3291]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.6056,  0.8105,  3.9863],
         [ 1.1501,  0.5829,  0.9231],
         [ 0.7143,  0.7096,  1.3103],
         ...,
         [ 2.5550,  0.2011,  1.0984],
         [37.6499,  0.1016,  1.2930],
         [ 5.6955,  0.4260,  1.0479]],

        [[ 0.6663,  0.9565,  0.8774],
         [ 0.6096,  0.7482,  1.8777],
         [27.8515,  0.1963,  1.1999],
         ...,
         [28.1838,  0.1846,  1.0007],
         [ 4.3256,  0.1543,  0.8216],
         [ 0.6720,  1.6925,  0.8690]],

        [[ 0.6302,  0.9549,  0.8912],
         [12.1002,  0.6315,  0.9739],
         [ 1.9999,  0.4384,  0.7258],
         ...,
         [ 0.3114,  0.9506,  2.8246],
         [ 0.3490,  0.5481,  0.8231],
         [26.2363,  1.1883,  2.0424


Train Diffusion:  85%|████████▌ | 4252/5001 [3:12:01<31:54,  2.56s/it][A
Train Diffusion:  85%|████████▌ | 4253/5001 [3:12:04<31:58,  2.56s/it][A
Train Diffusion:  85%|████████▌ | 4254/5001 [3:12:06<31:56,  2.57s/it][A
Train Diffusion:  85%|████████▌ | 4255/5001 [3:12:09<31:52,  2.56s/it][A
Train Diffusion:  85%|████████▌ | 4256/5001 [3:12:11<31:48,  2.56s/it][A
Train Diffusion:  85%|████████▌ | 4257/5001 [3:12:14<31:41,  2.56s/it][A
Train Diffusion:  85%|████████▌ | 4258/5001 [3:12:16<31:41,  2.56s/it][A
Train Diffusion:  85%|████████▌ | 4259/5001 [3:12:19<31:34,  2.55s/it][A
Train Diffusion:  85%|████████▌ | 4260/5001 [3:12:22<31:31,  2.55s/it][A
Train Diffusion:  85%|████████▌ | 4261/5001 [3:12:24<31:32,  2.56s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 339650617.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8810, 0.5063, 1.2962],
        [8.6507, 0.5223, 1.2807],
        [8.6287, 0.5041, 1.3096]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.5985e+00, 8.0644e-01, 2.4998e+00],
         [1.3641e+00, 5.2347e-01, 7.8091e-01],
         [7.4492e-01, 6.7873e-01, 7.8445e-01],
         ...,
         [2.6983e-01, 2.1385e+00, 1.3316e+00],
         [3.0163e-01, 1.4542e+00, 1.0400e+01],
         [8.0044e-02, 2.9472e+00, 3.2593e+00]],

        [[7.0711e-01, 9.5686e-01, 1.0350e+00],
         [1.2219e+01, 4.8786e-01, 1.1862e+00],
         [2.2136e+00, 4.8436e-01, 1.2329e+00],
         ...,
         [4.1675e+00, 1.5097e-01, 1.0839e+00],
         [1.3935e-06, 4.6473e-01, 3.2562e+00],
         [1.2541e-01, 1.3862e+00, 4.4663e+00]],

        [[5.9721e-01, 9.5168e-01, 9.3164e-01],
         [5.6286e-01, 8.1201e-01, 1.7504e+00],
         [2.7714e+01, 2.0058e-01, 1.1


Train Diffusion:  85%|████████▌ | 4262/5001 [3:12:27<31:27,  2.55s/it][A
Train Diffusion:  85%|████████▌ | 4263/5001 [3:12:29<31:22,  2.55s/it][A
Train Diffusion:  85%|████████▌ | 4264/5001 [3:12:32<31:23,  2.56s/it][A
Train Diffusion:  85%|████████▌ | 4265/5001 [3:12:34<31:17,  2.55s/it][A
Train Diffusion:  85%|████████▌ | 4266/5001 [3:12:37<31:18,  2.56s/it][A
Train Diffusion:  85%|████████▌ | 4267/5001 [3:12:39<31:13,  2.55s/it][A
Train Diffusion:  85%|████████▌ | 4268/5001 [3:12:42<31:13,  2.56s/it][A
Train Diffusion:  85%|████████▌ | 4269/5001 [3:12:45<31:13,  2.56s/it][A
Train Diffusion:  85%|████████▌ | 4270/5001 [3:12:47<31:17,  2.57s/it][A
Train Diffusion:  85%|████████▌ | 4271/5001 [3:12:50<31:24,  2.58s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 325919408.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6508, 0.5111, 1.3025],
        [8.7280, 0.4995, 1.3098],
        [8.8933, 0.4791, 1.2758]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[7.8632e-01, 9.6221e-01, 1.1647e+00],
         [1.2712e+01, 3.3528e-01, 1.2770e+00],
         [2.8752e+00, 2.7547e-01, 6.5677e-01],
         ...,
         [3.5067e+00, 2.2659e-01, 7.7948e-01],
         [3.2870e+01, 1.9697e-01, 1.3238e+00],
         [4.9887e+00, 5.9921e-01, 1.1877e+00]],

        [[1.5734e+00, 8.0944e-01, 7.0207e-01],
         [1.7698e+00, 3.6435e-01, 5.7673e-01],
         [1.4518e-03, 1.1579e+00, 7.1731e-01],
         ...,
         [3.5277e+01, 6.8985e-02, 1.2063e+00],
         [5.9787e+00, 8.1801e-02, 6.6552e-01],
         [5.1073e-01, 1.4301e+00, 1.0572e+01]],

        [[5.4471e-01, 9.5231e-01, 9.2494e-01],
         [5.6983e-01, 7.7824e-01, 1.0220e+00],
         [1.2960e+01, 5.7427e-02, 8.4


Train Diffusion:  85%|████████▌ | 4272/5001 [3:12:52<31:39,  2.61s/it][A
Train Diffusion:  85%|████████▌ | 4273/5001 [3:12:55<31:23,  2.59s/it][A
Train Diffusion:  85%|████████▌ | 4274/5001 [3:12:57<31:16,  2.58s/it][A
Train Diffusion:  85%|████████▌ | 4275/5001 [3:13:00<31:06,  2.57s/it][A
Train Diffusion:  86%|████████▌ | 4276/5001 [3:13:03<31:06,  2.57s/it][A
Train Diffusion:  86%|████████▌ | 4277/5001 [3:13:05<30:57,  2.57s/it][A
Train Diffusion:  86%|████████▌ | 4278/5001 [3:13:08<31:03,  2.58s/it][A
Train Diffusion:  86%|████████▌ | 4279/5001 [3:13:10<31:08,  2.59s/it][A
Train Diffusion:  86%|████████▌ | 4280/5001 [3:13:13<30:56,  2.57s/it][A
Train Diffusion:  86%|████████▌ | 4281/5001 [3:13:15<30:49,  2.57s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 340390883.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8551, 0.5232, 1.2851],
        [8.7752, 0.5064, 1.2554],
        [8.8903, 0.5060, 1.3000]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.6010e+00, 8.4940e-01, 6.0152e+00],
         [9.6625e-01, 1.1154e+00, 1.2653e+00],
         [9.9635e-01, 1.2335e+01, 6.3680e-01],
         ...,
         [2.5917e+01, 7.4364e-02, 1.2413e+00],
         [5.4132e+00, 1.6883e-02, 2.4952e+00],
         [2.7594e+00, 5.7166e-01, 1.1463e+00]],

        [[5.9198e-01, 9.5255e-01, 9.3647e-01],
         [6.2323e-01, 6.1955e-01, 2.1920e-01],
         [2.8967e-05, 2.3934e+00, 2.8383e-01],
         ...,
         [3.8105e+00, 1.1008e-01, 1.0286e+00],
         [8.6177e+00, 3.5129e-01, 1.8692e+00],
         [5.9290e+00, 2.7535e+00, 2.1071e+00]],

        [[7.1183e-01, 9.5842e-01, 8.7246e-01],
         [7.8785e+00, 5.9757e-01, 1.5274e+00],
         [1.7157e+00, 1.3962e+00, 5.9


Train Diffusion:  86%|████████▌ | 4282/5001 [3:13:18<30:44,  2.56s/it][A
Train Diffusion:  86%|████████▌ | 4283/5001 [3:13:21<30:41,  2.56s/it][A
Train Diffusion:  86%|████████▌ | 4284/5001 [3:13:23<30:37,  2.56s/it][A
Train Diffusion:  86%|████████▌ | 4285/5001 [3:13:26<30:31,  2.56s/it][A
Train Diffusion:  86%|████████▌ | 4286/5001 [3:13:28<30:27,  2.56s/it][A
Train Diffusion:  86%|████████▌ | 4287/5001 [3:13:31<30:41,  2.58s/it][A
Train Diffusion:  86%|████████▌ | 4288/5001 [3:13:34<31:13,  2.63s/it][A
Train Diffusion:  86%|████████▌ | 4289/5001 [3:13:36<30:51,  2.60s/it][A
Train Diffusion:  86%|████████▌ | 4290/5001 [3:13:39<30:38,  2.59s/it][A
Train Diffusion:  86%|████████▌ | 4291/5001 [3:13:41<30:33,  2.58s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 318094928.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6659, 0.4993, 1.2710],
        [8.7071, 0.5138, 1.2918],
        [8.8457, 0.4855, 1.3144]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.9666e-01, 9.5387e-01, 9.4946e-01],
         [1.7030e+00, 6.8854e-01, 1.5533e+00],
         [1.7861e+00, 1.0334e-02, 5.5220e-01],
         ...,
         [3.0513e+01, 1.7980e-01, 1.0715e+00],
         [5.6380e+00, 7.2213e-02, 7.5629e-01],
         [4.1410e-01, 1.5120e+00, 7.3967e+00]],

        [[8.9715e-01, 9.6025e-01, 1.2898e+00],
         [8.0865e+00, 5.2086e-01, 1.2801e+00],
         [1.8590e+00, 4.1928e-01, 1.6516e+00],
         ...,
         [2.5964e-01, 1.6002e+01, 9.2178e-01],
         [1.5679e+01, 8.0094e-01, 1.1850e+00],
         [3.2686e+00, 9.0116e-01, 1.1802e+00]],

        [[1.5139e+00, 8.3197e-01, 5.1351e-01],
         [1.7743e+00, 3.5804e-01, 4.5402e-01],
         [9.1941e-06, 1.5635e+00, 6.1


Train Diffusion:  86%|████████▌ | 4292/5001 [3:13:44<30:51,  2.61s/it][A
Train Diffusion:  86%|████████▌ | 4293/5001 [3:13:47<30:35,  2.59s/it][A
Train Diffusion:  86%|████████▌ | 4294/5001 [3:13:49<30:27,  2.59s/it][A
Train Diffusion:  86%|████████▌ | 4295/5001 [3:13:52<30:19,  2.58s/it][A
Train Diffusion:  86%|████████▌ | 4296/5001 [3:13:54<30:11,  2.57s/it][A
Train Diffusion:  86%|████████▌ | 4297/5001 [3:13:57<30:03,  2.56s/it][A
Train Diffusion:  86%|████████▌ | 4298/5001 [3:13:59<29:57,  2.56s/it][A
Train Diffusion:  86%|████████▌ | 4299/5001 [3:14:02<30:03,  2.57s/it][A
Train Diffusion:  86%|████████▌ | 4300/5001 [3:14:04<29:58,  2.57s/it][A
Train Diffusion:  86%|████████▌ | 4301/5001 [3:14:07<29:57,  2.57s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 320327891.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8441, 0.4800, 1.2950],
        [8.7915, 0.4873, 1.3242],
        [8.5867, 0.4726, 1.3015]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.5741e+00, 8.0921e-01, 7.6849e-01],
         [1.7765e+00, 3.5858e-01, 6.6412e-01],
         [5.7934e-02, 7.9805e-01, 1.1036e+00],
         ...,
         [2.9282e+01, 8.7461e-02, 1.2911e+00],
         [5.7598e+00, 2.5083e-02, 6.3827e+00],
         [1.9470e+00, 1.0760e+00, 1.3434e+00]],

        [[5.4515e-01, 9.5154e-01, 9.2050e-01],
         [5.3208e-01, 7.8853e-01, 1.4226e+00],
         [2.1621e+01, 6.8553e-02, 1.0896e+00],
         ...,
         [2.5786e-01, 1.3032e+01, 6.5123e-01],
         [7.2230e-01, 6.4757e-01, 1.7485e+00],
         [5.5536e-01, 1.5617e+00, 7.9434e-01]],

        [[7.8494e-01, 9.6153e-01, 1.1646e+00],
         [1.2890e+01, 3.3824e-01, 1.2781e+00],
         [2.9082e+00, 3.0602e-01, 7.4


Train Diffusion:  86%|████████▌ | 4302/5001 [3:14:10<29:52,  2.56s/it][A
Train Diffusion:  86%|████████▌ | 4303/5001 [3:14:12<29:49,  2.56s/it][A
Train Diffusion:  86%|████████▌ | 4304/5001 [3:14:15<29:44,  2.56s/it][A
Train Diffusion:  86%|████████▌ | 4305/5001 [3:14:17<29:40,  2.56s/it][A
Train Diffusion:  86%|████████▌ | 4306/5001 [3:14:20<29:39,  2.56s/it][A
Train Diffusion:  86%|████████▌ | 4307/5001 [3:14:22<29:36,  2.56s/it][A
Train Diffusion:  86%|████████▌ | 4308/5001 [3:14:25<29:35,  2.56s/it][A
Train Diffusion:  86%|████████▌ | 4309/5001 [3:14:27<29:27,  2.55s/it][A
Train Diffusion:  86%|████████▌ | 4310/5001 [3:14:30<29:23,  2.55s/it][A
Train Diffusion:  86%|████████▌ | 4311/5001 [3:14:33<29:25,  2.56s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 326237548.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6269, 0.5105, 1.2819],
        [8.8470, 0.4907, 1.2812],
        [8.8634, 0.5058, 1.3026]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.7278,  0.9573,  1.0912],
         [12.5654,  0.4491,  1.2226],
         [ 2.3794,  0.4710,  1.1000],
         ...,
         [ 0.5055,  0.7175,  0.8370],
         [36.7332,  0.1436,  1.1543],
         [ 5.8491,  0.3435,  0.9731]],

        [[ 0.5802,  0.9501,  0.9435],
         [ 0.5537,  0.7976,  1.7243],
         [27.8422,  0.1514,  1.1961],
         ...,
         [ 4.0422,  0.1554,  0.7800],
         [ 0.5168,  0.7070,  0.7092],
         [ 0.5161,  1.8738,  1.6512]],

        [[ 1.5950,  0.8073,  1.9903],
         [ 1.4120,  0.4867,  0.7665],
         [ 0.7079,  0.7415,  0.9146],
         ...,
         [31.8155,  0.0964,  1.2449],
         [ 5.4603,  0.1303,  0.9052],
         [22.8005,  1.3090,  2.0046


Train Diffusion:  86%|████████▌ | 4312/5001 [3:14:35<29:20,  2.55s/it][A
Train Diffusion:  86%|████████▌ | 4313/5001 [3:14:38<29:20,  2.56s/it][A
Train Diffusion:  86%|████████▋ | 4314/5001 [3:14:40<29:16,  2.56s/it][A
Train Diffusion:  86%|████████▋ | 4315/5001 [3:14:43<29:17,  2.56s/it][A
Train Diffusion:  86%|████████▋ | 4316/5001 [3:14:45<29:18,  2.57s/it][A
Train Diffusion:  86%|████████▋ | 4317/5001 [3:14:48<29:11,  2.56s/it][A
Train Diffusion:  86%|████████▋ | 4318/5001 [3:14:51<29:10,  2.56s/it][A
Train Diffusion:  86%|████████▋ | 4319/5001 [3:14:53<29:05,  2.56s/it][A
Train Diffusion:  86%|████████▋ | 4320/5001 [3:14:56<28:59,  2.55s/it][A
Train Diffusion:  86%|████████▋ | 4321/5001 [3:14:58<28:57,  2.56s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 315821692.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6205, 0.4787, 1.3109],
        [8.8862, 0.4741, 1.3070],
        [8.8572, 0.4949, 1.2851]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.6073e+00, 8.1312e-01, 3.8947e+00],
         [1.1199e+00, 5.9578e-01, 9.0166e-01],
         [7.4347e-01, 6.8206e-01, 1.0089e+00],
         ...,
         [3.0717e-01, 2.0495e+00, 1.6146e+01],
         [1.6531e-01, 1.3638e+00, 3.5280e+00],
         [1.7239e-01, 1.2052e+00, 1.1587e+01]],

        [[6.3244e-01, 9.5606e-01, 9.1206e-01],
         [1.2063e+01, 6.0787e-01, 9.9412e-01],
         [2.0138e+00, 4.6072e-01, 7.1083e-01],
         ...,
         [3.4302e-05, 4.4362e-01, 3.3992e+00],
         [9.8647e+00, 4.0439e-01, 2.0077e+00],
         [2.6998e+00, 1.2310e+00, 1.3753e+00]],

        [[6.6349e-01, 9.5748e-01, 8.7926e-01],
         [6.1115e-01, 7.3755e-01, 1.8855e+00],
         [2.8543e+01, 9.2100e-02, 1.2


Train Diffusion:  86%|████████▋ | 4322/5001 [3:15:01<29:05,  2.57s/it][A
Train Diffusion:  86%|████████▋ | 4323/5001 [3:15:03<29:00,  2.57s/it][A
Train Diffusion:  86%|████████▋ | 4324/5001 [3:15:06<29:01,  2.57s/it][A
Train Diffusion:  86%|████████▋ | 4325/5001 [3:15:08<28:53,  2.56s/it][A
Train Diffusion:  87%|████████▋ | 4326/5001 [3:15:11<29:05,  2.59s/it][A
Train Diffusion:  87%|████████▋ | 4327/5001 [3:15:14<28:59,  2.58s/it][A
Train Diffusion:  87%|████████▋ | 4328/5001 [3:15:16<28:51,  2.57s/it][A
Train Diffusion:  87%|████████▋ | 4329/5001 [3:15:19<28:47,  2.57s/it][A
Train Diffusion:  87%|████████▋ | 4330/5001 [3:15:21<28:41,  2.57s/it][A
Train Diffusion:  87%|████████▋ | 4331/5001 [3:15:24<28:39,  2.57s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 306818748.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8268, 0.4834, 1.3039],
        [8.8397, 0.4901, 1.2831],
        [8.7900, 0.4833, 1.3014]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.5174,  0.8317,  5.9559],
         [ 0.8390,  0.6423,  1.2460],
         [ 0.9549,  0.6955,  1.2058],
         ...,
         [ 0.8667,  0.4081,  1.3582],
         [27.2654,  0.1047,  1.2098],
         [ 5.3736,  0.5219,  1.0195]],

        [[ 0.8940,  0.9608,  0.7850],
         [ 0.8506,  0.5695,  1.7442],
         [28.3391,  0.2234,  1.1797],
         ...,
         [29.9785,  0.4622,  1.1052],
         [ 3.0408,  0.7242,  0.9401],
         [ 0.4187,  1.7249,  2.0497]],

        [[ 0.4975,  0.9540,  0.7407],
         [ 9.3568,  1.0710,  0.8235],
         [ 1.2017,  0.5432,  0.8726],
         ...,
         [ 1.4635,  1.1392,  1.2342],
         [ 0.4686,  0.4319,  1.5893],
         [19.0930,  1.6806,  2.0340


Train Diffusion:  87%|████████▋ | 4332/5001 [3:15:26<28:34,  2.56s/it][A
Train Diffusion:  87%|████████▋ | 4333/5001 [3:15:29<28:33,  2.57s/it][A
Train Diffusion:  87%|████████▋ | 4334/5001 [3:15:32<28:59,  2.61s/it][A
Train Diffusion:  87%|████████▋ | 4335/5001 [3:15:34<29:15,  2.64s/it][A
Train Diffusion:  87%|████████▋ | 4336/5001 [3:15:37<29:05,  2.62s/it][A
Train Diffusion:  87%|████████▋ | 4337/5001 [3:15:40<28:52,  2.61s/it][A
Train Diffusion:  87%|████████▋ | 4338/5001 [3:15:42<28:40,  2.59s/it][A
Train Diffusion:  87%|████████▋ | 4339/5001 [3:15:45<28:49,  2.61s/it][A
Train Diffusion:  87%|████████▋ | 4340/5001 [3:15:47<28:39,  2.60s/it][A
Train Diffusion:  87%|████████▋ | 4341/5001 [3:15:50<28:32,  2.59s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 309876326.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8140, 0.4666, 1.2533],
        [8.8855, 0.4735, 1.3063],
        [8.8304, 0.4879, 1.2894]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.8822,  0.9619,  1.3222],
         [11.1883,  0.3867,  1.2804],
         [ 2.7447,  0.4014,  0.8909],
         ...,
         [ 0.6823,  1.0643,  3.9079],
         [12.5143,  0.1921,  1.2934],
         [ 4.2359,  0.4185,  3.9597]],

        [[ 0.5016,  0.9544,  0.9420],
         [ 0.5174,  1.0076,  1.1821],
         [24.2020,  0.0943,  1.2294],
         ...,
         [ 1.9919,  0.3360,  1.0672],
         [ 0.3620,  0.4294,  2.1366],
         [ 1.1666,  1.8176,  1.4168]],

        [[ 1.5241,  0.8309,  1.1197],
         [ 1.7882,  0.3315,  0.7913],
         [ 0.3245,  0.9702,  1.2170],
         ...,
         [31.4589,  0.1564,  1.1199],
         [ 4.4963,  0.2294,  0.7514],
         [23.2507,  1.1497,  1.9459


Train Diffusion:  87%|████████▋ | 4342/5001 [3:15:53<28:23,  2.59s/it][A
Train Diffusion:  87%|████████▋ | 4343/5001 [3:15:55<28:17,  2.58s/it][A
Train Diffusion:  87%|████████▋ | 4344/5001 [3:15:58<28:11,  2.57s/it][A
Train Diffusion:  87%|████████▋ | 4345/5001 [3:16:00<28:02,  2.57s/it][A
Train Diffusion:  87%|████████▋ | 4346/5001 [3:16:03<27:59,  2.56s/it][A
Train Diffusion:  87%|████████▋ | 4347/5001 [3:16:05<27:57,  2.56s/it][A
Train Diffusion:  87%|████████▋ | 4348/5001 [3:16:08<27:52,  2.56s/it][A
Train Diffusion:  87%|████████▋ | 4349/5001 [3:16:11<27:55,  2.57s/it][A
Train Diffusion:  87%|████████▋ | 4350/5001 [3:16:13<27:52,  2.57s/it][A
Train Diffusion:  87%|████████▋ | 4351/5001 [3:16:16<27:50,  2.57s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 339874192.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7933, 0.4827, 1.2803],
        [8.6969, 0.4982, 1.2972],
        [8.7962, 0.4801, 1.2927]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.6330,  0.9551,  0.9241],
         [12.5985,  0.5852,  1.1028],
         [ 2.1018,  0.4115,  0.7927],
         ...,
         [ 1.2215,  5.2002,  1.4497],
         [ 2.0387,  0.7019,  1.1571],
         [ 0.4468,  1.7259,  3.6308]],

        [[ 0.6624,  0.9564,  0.8812],
         [ 0.6074,  0.7492,  1.8852],
         [26.4474,  0.2546,  1.2033],
         ...,
         [ 0.2577,  1.1281,  3.1945],
         [ 0.4992,  0.3944,  1.0675],
         [20.5958,  1.4151,  1.9884]],

        [[ 1.6085,  0.8113,  3.7393],
         [ 1.1689,  0.6479,  0.9266],
         [ 0.6793,  0.8086,  1.0739],
         ...,
         [ 1.2507,  0.4108,  0.7685],
         [34.7388,  0.1305,  0.9643],
         [ 5.5166,  0.4197,  0.9403


Train Diffusion:  87%|████████▋ | 4352/5001 [3:16:18<27:45,  2.57s/it][A
Train Diffusion:  87%|████████▋ | 4353/5001 [3:16:21<27:42,  2.57s/it][A
Train Diffusion:  87%|████████▋ | 4354/5001 [3:16:23<27:37,  2.56s/it][A
Train Diffusion:  87%|████████▋ | 4355/5001 [3:16:26<27:32,  2.56s/it][A
Train Diffusion:  87%|████████▋ | 4356/5001 [3:16:28<27:31,  2.56s/it][A
Train Diffusion:  87%|████████▋ | 4357/5001 [3:16:31<27:27,  2.56s/it][A
Train Diffusion:  87%|████████▋ | 4358/5001 [3:16:34<27:29,  2.57s/it][A
Train Diffusion:  87%|████████▋ | 4359/5001 [3:16:36<27:27,  2.57s/it][A
Train Diffusion:  87%|████████▋ | 4360/5001 [3:16:39<27:21,  2.56s/it][A
Train Diffusion:  87%|████████▋ | 4361/5001 [3:16:41<27:18,  2.56s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 345983392.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7631, 0.5152, 1.3032],
        [8.6908, 0.5236, 1.3003],
        [8.7321, 0.4984, 1.3097]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.6026e+00, 8.0775e-01, 2.4556e+00],
         [1.3095e+00, 5.5154e-01, 7.7861e-01],
         [7.3727e-01, 8.2303e-01, 8.8318e-01],
         ...,
         [1.0364e+00, 6.7602e-01, 8.5668e-01],
         [4.7001e-01, 5.5202e-01, 6.7402e-01],
         [3.6347e-01, 1.6880e+00, 2.3358e+00]],

        [[6.9644e-01, 9.5640e-01, 1.0430e+00],
         [1.2395e+01, 4.6641e-01, 1.1911e+00],
         [2.2074e+00, 4.1992e-01, 8.5560e-01],
         ...,
         [3.6100e+01, 1.4074e-01, 1.1156e+00],
         [4.8415e+00, 5.7984e-02, 1.6387e-02],
         [4.2247e+00, 2.1983e-01, 8.2896e-01]],

        [[6.0405e-01, 9.5218e-01, 9.2781e-01],
         [5.6425e-01, 7.7641e-01, 1.7706e+00],
         [2.7309e+01, 1.6312e-01, 1.1


Train Diffusion:  87%|████████▋ | 4362/5001 [3:16:44<27:26,  2.58s/it][A
Train Diffusion:  87%|████████▋ | 4363/5001 [3:16:46<27:19,  2.57s/it][A
Train Diffusion:  87%|████████▋ | 4364/5001 [3:16:49<27:15,  2.57s/it][A
Train Diffusion:  87%|████████▋ | 4365/5001 [3:16:52<27:09,  2.56s/it][A
Train Diffusion:  87%|████████▋ | 4366/5001 [3:16:54<27:08,  2.56s/it][A
Train Diffusion:  87%|████████▋ | 4367/5001 [3:16:57<27:03,  2.56s/it][A
Train Diffusion:  87%|████████▋ | 4368/5001 [3:16:59<26:58,  2.56s/it][A
Train Diffusion:  87%|████████▋ | 4369/5001 [3:17:02<27:02,  2.57s/it][A
Train Diffusion:  87%|████████▋ | 4370/5001 [3:17:04<26:56,  2.56s/it][A
Train Diffusion:  87%|████████▋ | 4371/5001 [3:17:07<26:54,  2.56s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 335127587.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6521, 0.5036, 1.2846],
        [8.8077, 0.4932, 1.2939],
        [8.8637, 0.4959, 1.3118]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[6.5730e-01, 9.5573e-01, 8.8562e-01],
         [6.0058e-01, 7.6995e-01, 1.9190e+00],
         [2.7717e+01, 2.1817e-01, 1.1997e+00],
         ...,
         [2.8692e+01, 1.0023e-01, 1.2808e+00],
         [5.3742e+00, 4.5563e-02, 3.8468e+00],
         [1.6238e+00, 1.1596e+00, 1.2583e+00]],

        [[1.6089e+00, 8.0868e-01, 3.9131e+00],
         [1.2078e+00, 6.0574e-01, 9.6701e-01],
         [6.4531e-01, 7.2879e-01, 1.2859e+00],
         ...,
         [1.3611e+00, 5.4763e-01, 1.7369e+00],
         [2.5274e-01, 1.8850e+00, 8.7262e+00],
         [2.5671e+00, 3.0113e+00, 1.9052e+00]],

        [[6.3713e-01, 9.5482e-01, 8.9291e-01],
         [1.2541e+01, 6.3235e-01, 9.8232e-01],
         [2.1087e+00, 4.0726e-01, 7.6


Train Diffusion:  87%|████████▋ | 4372/5001 [3:17:09<26:51,  2.56s/it][A
Train Diffusion:  87%|████████▋ | 4373/5001 [3:17:12<26:48,  2.56s/it][A
Train Diffusion:  87%|████████▋ | 4374/5001 [3:17:15<27:12,  2.60s/it][A
Train Diffusion:  87%|████████▋ | 4375/5001 [3:17:17<27:00,  2.59s/it][A
Train Diffusion:  88%|████████▊ | 4376/5001 [3:17:20<26:51,  2.58s/it][A
Train Diffusion:  88%|████████▊ | 4377/5001 [3:17:22<26:43,  2.57s/it][A
Train Diffusion:  88%|████████▊ | 4378/5001 [3:17:25<26:39,  2.57s/it][A
Train Diffusion:  88%|████████▊ | 4379/5001 [3:17:28<26:35,  2.56s/it][A
Train Diffusion:  88%|████████▊ | 4380/5001 [3:17:30<26:29,  2.56s/it][A
Train Diffusion:  88%|████████▊ | 4381/5001 [3:17:33<26:30,  2.57s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 344949843.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7776, 0.4752, 1.2817],
        [8.8586, 0.5109, 1.2992],
        [8.7875, 0.4895, 1.3059]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.5743e+00, 8.1068e-01, 4.6955e+00],
         [1.4211e+00, 1.5552e-02, 6.5521e-01],
         [1.7813e+00, 5.1273e-01, 7.4436e-01],
         ...,
         [5.3471e-01, 5.0548e-01, 9.7329e-01],
         [2.2577e+01, 2.9286e-01, 1.0656e+00],
         [3.4934e+00, 7.1037e-01, 2.5193e+00]],

        [[7.8566e-01, 9.6343e-01, 7.9092e-01],
         [7.7941e-01, 7.1232e-01, 3.8670e+00],
         [1.5685e+00, 8.2599e-01, 1.0930e+00],
         ...,
         [1.2823e+00, 7.7206e-01, 2.3660e+00],
         [1.0628e+00, 3.9191e-01, 8.1098e-01],
         [2.4158e+01, 1.0873e+00, 1.9615e+00]],

        [[5.4556e-01, 9.5355e-01, 5.9593e-01],
         [4.3244e-03, 1.9294e+00, 3.2414e-01],
         [1.5447e+01, 3.8019e-01, 1.0


Train Diffusion:  88%|████████▊ | 4382/5001 [3:17:35<26:29,  2.57s/it][A
Train Diffusion:  88%|████████▊ | 4383/5001 [3:17:38<26:57,  2.62s/it][A
Train Diffusion:  88%|████████▊ | 4384/5001 [3:17:40<26:40,  2.59s/it][A
Train Diffusion:  88%|████████▊ | 4385/5001 [3:17:43<26:34,  2.59s/it][A
Train Diffusion:  88%|████████▊ | 4386/5001 [3:17:46<26:50,  2.62s/it][A
Train Diffusion:  88%|████████▊ | 4387/5001 [3:17:48<26:36,  2.60s/it][A
Train Diffusion:  88%|████████▊ | 4388/5001 [3:17:51<26:24,  2.59s/it][A
Train Diffusion:  88%|████████▊ | 4389/5001 [3:17:53<26:15,  2.57s/it][A
Train Diffusion:  88%|████████▊ | 4390/5001 [3:17:56<26:08,  2.57s/it][A
Train Diffusion:  88%|████████▊ | 4391/5001 [3:17:59<26:08,  2.57s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 338005171.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7900, 0.4884, 1.3214],
        [8.5285, 0.4925, 1.3022],
        [8.6207, 0.4987, 1.3180]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.0094,  0.9410,  0.8160],
         [ 0.9956,  0.5361,  1.8841],
         [25.5550,  0.1850,  1.2145],
         ...,
         [30.6001,  0.0547,  1.2950],
         [ 5.9859,  0.0765,  0.7089],
         [21.2690,  1.5078,  1.9408]],

        [[ 1.4332,  0.8446,  4.2022],
         [ 0.7585,  1.0143,  1.2852],
         [ 0.6652,  0.8721,  0.5625],
         ...,
         [ 3.9950,  0.1706,  0.6164],
         [ 0.5629,  0.7192,  0.8671],
         [ 0.3077,  2.3275,  1.3525]],

        [[ 0.4659,  0.9522,  1.0020],
         [12.4794,  0.5074,  1.1387],
         [ 1.9919,  0.4545,  0.7847],
         ...,
         [ 0.3395,  5.4722,  0.5454],
         [28.5301,  0.1367,  0.7730],
         [ 5.9451,  0.2391,  1.2166


Train Diffusion:  88%|████████▊ | 4392/5001 [3:18:01<26:01,  2.56s/it][A
Train Diffusion:  88%|████████▊ | 4393/5001 [3:18:04<25:57,  2.56s/it][A
Train Diffusion:  88%|████████▊ | 4394/5001 [3:18:06<25:59,  2.57s/it][A
Train Diffusion:  88%|████████▊ | 4395/5001 [3:18:09<25:53,  2.56s/it][A
Train Diffusion:  88%|████████▊ | 4396/5001 [3:18:11<25:49,  2.56s/it][A
Train Diffusion:  88%|████████▊ | 4397/5001 [3:18:14<25:44,  2.56s/it][A
Train Diffusion:  88%|████████▊ | 4398/5001 [3:18:16<25:39,  2.55s/it][A
Train Diffusion:  88%|████████▊ | 4399/5001 [3:18:19<25:38,  2.56s/it][A
Train Diffusion:  88%|████████▊ | 4400/5001 [3:18:23<29:38,  2.96s/it][A
Train Diffusion:  88%|████████▊ | 4401/5001 [3:18:26<30:41,  3.07s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 320633184.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.9171, 0.5012, 1.2965],
        [8.6663, 0.5184, 1.3275],
        [8.6210, 0.4886, 1.2796]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.0214,  0.9394,  0.8224],
         [ 1.0200,  0.4990,  1.7939],
         [27.3996,  0.2552,  1.1839],
         ...,
         [27.7201,  0.3948,  0.6055],
         [ 4.6920,  0.1086,  0.7050],
         [ 0.5334,  1.9896,  0.7819]],

        [[ 0.4636,  0.9523,  0.8056],
         [ 9.2021,  0.9414,  0.8560],
         [ 1.2146,  0.5402,  0.7850],
         ...,
         [ 4.3212,  0.1254,  1.1370],
         [37.7845,  0.0742,  1.3735],
         [ 6.0735,  0.1851,  8.3153]],

        [[ 1.4238,  0.8462,  5.6963],
         [ 0.9656,  0.7516,  1.4038],
         [ 0.9758,  0.6959,  1.1715],
         ...,
         [ 0.2188,  0.5844,  2.0025],
         [ 0.2367,  0.6916,  0.7523],
         [ 7.8831,  1.5187,  2.2538


Train Diffusion:  88%|████████▊ | 4402/5001 [3:18:29<29:06,  2.92s/it][A
Train Diffusion:  88%|████████▊ | 4403/5001 [3:18:31<27:58,  2.81s/it][A
Train Diffusion:  88%|████████▊ | 4404/5001 [3:18:34<27:11,  2.73s/it][A
Train Diffusion:  88%|████████▊ | 4405/5001 [3:18:36<26:37,  2.68s/it][A
Train Diffusion:  88%|████████▊ | 4406/5001 [3:18:39<26:11,  2.64s/it][A
Train Diffusion:  88%|████████▊ | 4407/5001 [3:18:42<25:53,  2.61s/it][A
Train Diffusion:  88%|████████▊ | 4408/5001 [3:18:44<25:43,  2.60s/it][A
Train Diffusion:  88%|████████▊ | 4409/5001 [3:18:47<25:32,  2.59s/it][A
Train Diffusion:  88%|████████▊ | 4410/5001 [3:18:49<25:25,  2.58s/it][A
Train Diffusion:  88%|████████▊ | 4411/5001 [3:18:52<25:19,  2.58s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 318778096.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8523, 0.4813, 1.2668],
        [8.6878, 0.5310, 1.2838],
        [8.8885, 0.5000, 1.2997]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[6.0398e-01, 9.5422e-01, 9.2561e-01],
         [6.3233e-01, 6.1878e-01, 8.8235e-01],
         [2.4644e+01, 2.4435e-01, 4.0413e+00],
         ...,
         [2.8139e+01, 1.3808e-01, 1.1058e+00],
         [5.4552e+00, 5.8400e-02, 1.7024e+00],
         [5.8732e-01, 1.9408e+00, 2.1719e+00]],

        [[6.9420e-01, 9.5835e-01, 8.6243e-01],
         [6.9504e+00, 6.2468e-01, 6.5018e-01],
         [1.5898e+00, 7.2493e-01, 6.0140e-01],
         ...,
         [4.3113e+00, 1.3806e-01, 6.6501e-01],
         [9.0053e-01, 1.3845e+00, 1.2928e+01],
         [1.9799e-01, 3.2024e+00, 3.0511e+00]],

        [[1.6065e+00, 8.5928e-01, 6.5854e+00],
         [9.7870e-01, 1.1725e+00, 1.3433e+00],
         [5.1177e-01, 3.9295e+00, 2.6


Train Diffusion:  88%|████████▊ | 4412/5001 [3:18:54<25:12,  2.57s/it][A
Train Diffusion:  88%|████████▊ | 4413/5001 [3:18:57<25:07,  2.56s/it][A
Train Diffusion:  88%|████████▊ | 4414/5001 [3:18:59<25:04,  2.56s/it][A
Train Diffusion:  88%|████████▊ | 4415/5001 [3:19:02<25:03,  2.57s/it][A
Train Diffusion:  88%|████████▊ | 4416/5001 [3:19:05<25:01,  2.57s/it][A
Train Diffusion:  88%|████████▊ | 4417/5001 [3:19:07<24:55,  2.56s/it][A
Train Diffusion:  88%|████████▊ | 4418/5001 [3:19:10<24:53,  2.56s/it][A
Train Diffusion:  88%|████████▊ | 4419/5001 [3:19:12<24:49,  2.56s/it][A
Train Diffusion:  88%|████████▊ | 4420/5001 [3:19:15<24:48,  2.56s/it][A
Train Diffusion:  88%|████████▊ | 4421/5001 [3:19:18<25:19,  2.62s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 330079392.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.5241, 0.5263, 1.3368],
        [8.5777, 0.5042, 1.3362],
        [8.9522, 0.4806, 1.3096]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.5858,  0.8064,  1.5798],
         [ 1.6023,  0.4070,  0.7619],
         [ 0.6630,  0.7880,  1.0095],
         ...,
         [ 1.0378,  1.1405,  1.3538],
         [ 0.4544,  0.4610,  0.7116],
         [27.5660,  1.1054,  1.9882]],

        [[ 0.7517,  0.9598,  1.1201],
         [12.8796,  0.4052,  1.2511],
         [ 2.6208,  0.4071,  1.0211],
         ...,
         [31.2770,  0.6395,  1.0814],
         [ 2.6974,  0.5620,  1.6685],
         [ 3.6216,  0.5086,  1.2385]],

        [[ 0.5650,  0.9508,  0.9358],
         [ 0.5586,  0.8537,  1.5761],
         [27.1534,  0.1353,  1.1847],
         ...,
         [ 1.8406,  0.2322,  1.4570],
         [13.3110,  0.3586,  1.1494],
         [ 2.7834,  1.5205,  0.6903


Train Diffusion:  88%|████████▊ | 4422/5001 [3:19:20<25:05,  2.60s/it][A
Train Diffusion:  88%|████████▊ | 4423/5001 [3:19:23<24:58,  2.59s/it][A
Train Diffusion:  88%|████████▊ | 4424/5001 [3:19:25<24:47,  2.58s/it][A
Train Diffusion:  88%|████████▊ | 4425/5001 [3:19:28<24:43,  2.58s/it][A
Train Diffusion:  89%|████████▊ | 4426/5001 [3:19:30<24:36,  2.57s/it][A
Train Diffusion:  89%|████████▊ | 4427/5001 [3:19:33<24:34,  2.57s/it][A
Train Diffusion:  89%|████████▊ | 4428/5001 [3:19:36<24:29,  2.57s/it][A
Train Diffusion:  89%|████████▊ | 4429/5001 [3:19:38<24:25,  2.56s/it][A
Train Diffusion:  89%|████████▊ | 4430/5001 [3:19:41<24:51,  2.61s/it][A
Train Diffusion:  89%|████████▊ | 4431/5001 [3:19:43<24:57,  2.63s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 331931168.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7590, 0.4972, 1.3236],
        [8.8523, 0.5173, 1.3354],
        [8.5824, 0.5047, 1.3063]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[7.9494e-01, 9.6184e-01, 8.0575e-01],
         [8.0943e-01, 5.6776e-01, 2.1976e-01],
         [1.4321e-05, 2.3654e+00, 3.2222e-01],
         ...,
         [3.6514e+00, 1.5596e-01, 7.0611e-01],
         [2.9863e+01, 3.7197e-01, 1.7478e+00],
         [3.7664e+00, 1.2575e+00, 1.8708e+00]],

        [[1.5703e+00, 8.3623e-01, 5.6309e+00],
         [8.7887e-01, 1.1059e+00, 1.2682e+00],
         [1.2700e+00, 1.2622e+01, 6.5854e-01],
         ...,
         [3.9649e-01, 1.8526e-01, 2.2175e+00],
         [1.7564e-01, 1.2252e+01, 9.7637e-01],
         [3.3558e+00, 3.6010e+00, 1.2255e+00]],

        [[5.4033e-01, 9.5182e-01, 9.4921e-01],
         [9.9331e+00, 5.2170e-01, 1.3928e+00],
         [1.6206e+00, 4.0942e+00, 1.0


Train Diffusion:  89%|████████▊ | 4432/5001 [3:19:46<24:55,  2.63s/it][A
Train Diffusion:  89%|████████▊ | 4433/5001 [3:19:49<24:42,  2.61s/it][A
Train Diffusion:  89%|████████▊ | 4434/5001 [3:19:51<24:30,  2.59s/it][A
Train Diffusion:  89%|████████▊ | 4435/5001 [3:19:54<24:22,  2.58s/it][A
Train Diffusion:  89%|████████▊ | 4436/5001 [3:19:56<24:18,  2.58s/it][A
Train Diffusion:  89%|████████▊ | 4437/5001 [3:19:59<24:09,  2.57s/it][A
Train Diffusion:  89%|████████▊ | 4438/5001 [3:20:01<24:07,  2.57s/it][A
Train Diffusion:  89%|████████▉ | 4439/5001 [3:20:04<24:04,  2.57s/it][A
Train Diffusion:  89%|████████▉ | 4440/5001 [3:20:07<23:59,  2.57s/it][A
Train Diffusion:  89%|████████▉ | 4441/5001 [3:20:09<23:54,  2.56s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 321833232.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[9.0187, 0.4965, 1.2985],
        [8.8499, 0.4801, 1.2827],
        [8.5063, 0.4931, 1.3157]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.4842,  0.9540,  0.8980],
         [ 0.4900,  1.1712,  0.4164],
         [13.9178,  0.3596,  0.9359],
         ...,
         [ 0.2204, 12.1566,  0.7365],
         [32.3359,  0.1158,  1.2808],
         [ 5.8161,  0.4243,  1.0359]],

        [[ 1.4878,  0.8378,  1.0718],
         [ 1.7857,  0.3235,  0.8298],
         [ 2.0930,  0.6724,  1.5449],
         ...,
         [30.8591,  0.1118,  1.2497],
         [ 5.3383,  0.1050,  0.7040],
         [ 0.5871,  1.7787,  0.9643]],

        [[ 0.9365,  0.9554,  1.4385],
         [ 9.9059,  0.2998,  1.2765],
         [ 2.9550,  0.3684,  0.8066],
         ...,
         [ 4.1498,  0.1599,  0.6789],
         [ 0.6079,  0.6077,  0.7573],
         [24.0407,  1.3547,  2.0321


Train Diffusion:  89%|████████▉ | 4442/5001 [3:20:12<23:52,  2.56s/it][A
Train Diffusion:  89%|████████▉ | 4443/5001 [3:20:14<23:50,  2.56s/it][A
Train Diffusion:  89%|████████▉ | 4444/5001 [3:20:17<23:47,  2.56s/it][A
Train Diffusion:  89%|████████▉ | 4445/5001 [3:20:19<23:45,  2.56s/it][A
Train Diffusion:  89%|████████▉ | 4446/5001 [3:20:22<23:40,  2.56s/it][A
Train Diffusion:  89%|████████▉ | 4447/5001 [3:20:24<23:37,  2.56s/it][A
Train Diffusion:  89%|████████▉ | 4448/5001 [3:20:27<23:37,  2.56s/it][A
Train Diffusion:  89%|████████▉ | 4449/5001 [3:20:30<23:31,  2.56s/it][A
Train Diffusion:  89%|████████▉ | 4450/5001 [3:20:32<23:34,  2.57s/it][A
Train Diffusion:  89%|████████▉ | 4451/5001 [3:20:35<23:29,  2.56s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 326315289.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.9191, 0.5028, 1.3105],
        [8.6386, 0.4923, 1.3288],
        [8.7406, 0.4924, 1.2744]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.9640,  0.9507,  1.5983],
         [ 8.9064,  0.4531,  1.3042],
         [ 2.3845,  0.5799,  0.6747],
         ...,
         [ 0.7773,  0.5460,  0.9007],
         [ 6.6762,  1.0377,  1.7302],
         [ 0.0224,  2.8268,  3.0966]],

        [[ 0.4770,  0.9534,  0.9573],
         [ 1.3909,  0.9307,  0.5694],
         [ 0.8555,  0.8048,  0.8874],
         ...,
         [18.9809,  0.6310,  0.5461],
         [ 0.9864,  3.7727,  0.3205],
         [ 0.4083,  1.3261,  3.5476]],

        [[ 1.4687,  0.8401,  0.9064],
         [ 1.7311,  0.3372,  1.1932],
         [19.5773,  0.3385,  1.1922],
         ...,
         [ 2.1365,  0.4650,  0.3643],
         [ 3.0160,  0.0349,  0.0321],
         [ 2.2058,  0.7816,  4.3695


Train Diffusion:  89%|████████▉ | 4452/5001 [3:20:37<23:29,  2.57s/it][A
Train Diffusion:  89%|████████▉ | 4453/5001 [3:20:40<23:25,  2.56s/it][A
Train Diffusion:  89%|████████▉ | 4454/5001 [3:20:42<23:23,  2.57s/it][A
Train Diffusion:  89%|████████▉ | 4455/5001 [3:20:45<23:21,  2.57s/it][A
Train Diffusion:  89%|████████▉ | 4456/5001 [3:20:48<23:24,  2.58s/it][A
Train Diffusion:  89%|████████▉ | 4457/5001 [3:20:50<23:22,  2.58s/it][A
Train Diffusion:  89%|████████▉ | 4458/5001 [3:20:53<23:20,  2.58s/it][A
Train Diffusion:  89%|████████▉ | 4459/5001 [3:20:55<23:13,  2.57s/it][A
Train Diffusion:  89%|████████▉ | 4460/5001 [3:20:58<23:11,  2.57s/it][A
Train Diffusion:  89%|████████▉ | 4461/5001 [3:21:01<23:25,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 320661385.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6825, 0.5029, 1.3291],
        [8.7302, 0.5171, 1.2664],
        [8.6938, 0.4949, 1.3018]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.6549,  0.9541,  0.8863],
         [ 0.5982,  0.7668,  1.8402],
         [28.5226,  0.0979,  1.2015],
         ...,
         [ 1.5670,  0.5550,  0.9448],
         [ 0.4779,  0.5858,  0.7944],
         [26.1128,  1.1989,  2.0308]],

        [[ 1.6059,  0.8081,  3.8140],
         [ 1.2031,  0.5450,  0.8741],
         [ 0.7499,  0.6816,  0.7455],
         ...,
         [ 2.0374,  0.2447,  1.0720],
         [35.5729,  0.0995,  1.3266],
         [ 5.6616,  0.4324,  1.1346]],

        [[ 0.6407,  0.9535,  0.9022],
         [11.8865,  0.6253,  0.9911],
         [ 1.9802,  0.4619,  1.1265],
         ...,
         [35.5536,  0.1102,  1.1216],
         [ 5.1423,  0.1248,  0.8284],
         [ 0.6830,  1.6892,  0.8193


Train Diffusion:  89%|████████▉ | 4462/5001 [3:21:03<23:17,  2.59s/it][A
Train Diffusion:  89%|████████▉ | 4463/5001 [3:21:06<23:10,  2.58s/it][A
Train Diffusion:  89%|████████▉ | 4464/5001 [3:21:08<23:02,  2.58s/it][A
Train Diffusion:  89%|████████▉ | 4465/5001 [3:21:11<22:57,  2.57s/it][A
Train Diffusion:  89%|████████▉ | 4466/5001 [3:21:13<22:53,  2.57s/it][A
Train Diffusion:  89%|████████▉ | 4467/5001 [3:21:16<22:49,  2.56s/it][A
Train Diffusion:  89%|████████▉ | 4468/5001 [3:21:19<22:55,  2.58s/it][A
Train Diffusion:  89%|████████▉ | 4469/5001 [3:21:21<22:50,  2.58s/it][A
Train Diffusion:  89%|████████▉ | 4470/5001 [3:21:24<22:45,  2.57s/it][A
Train Diffusion:  89%|████████▉ | 4471/5001 [3:21:26<22:39,  2.57s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 333800944.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6261, 0.5005, 1.2825],
        [8.8856, 0.4807, 1.2826],
        [8.6385, 0.4697, 1.3155]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.2813e-01, 9.5192e-01, 9.2299e-01],
         [4.8444e-01, 8.6393e-01, 1.0400e+00],
         [1.4253e+01, 4.2830e-01, 1.0649e+00],
         ...,
         [4.4222e+00, 8.2573e-02, 7.2643e+00],
         [9.5378e+00, 3.8325e-02, 1.4305e+00],
         [5.5477e+00, 4.2129e-01, 1.1505e+00]],

        [[1.5580e+00, 8.1549e-01, 1.1132e+00],
         [1.7584e+00, 3.4928e-01, 7.5271e-01],
         [4.4129e+00, 6.0043e-01, 1.0339e+00],
         ...,
         [4.5323e-05, 6.4085e-01, 2.5928e+00],
         [2.4108e-01, 4.5180e-01, 9.3302e-01],
         [2.2017e+01, 1.3489e+00, 1.9764e+00]],

        [[8.1868e-01, 9.6185e-01, 1.2153e+00],
         [1.2473e+01, 3.2991e-01, 1.2843e+00],
         [2.9785e+00, 3.6926e-01, 7.8


Train Diffusion:  89%|████████▉ | 4472/5001 [3:21:29<22:33,  2.56s/it][A
Train Diffusion:  89%|████████▉ | 4473/5001 [3:21:31<22:31,  2.56s/it][A
Train Diffusion:  89%|████████▉ | 4474/5001 [3:21:34<22:32,  2.57s/it][A
Train Diffusion:  89%|████████▉ | 4475/5001 [3:21:37<22:31,  2.57s/it][A
Train Diffusion:  90%|████████▉ | 4476/5001 [3:21:39<22:28,  2.57s/it][A
Train Diffusion:  90%|████████▉ | 4477/5001 [3:21:42<22:25,  2.57s/it][A
Train Diffusion:  90%|████████▉ | 4478/5001 [3:21:44<22:53,  2.63s/it][A
Train Diffusion:  90%|████████▉ | 4479/5001 [3:21:47<22:46,  2.62s/it][A
Train Diffusion:  90%|████████▉ | 4480/5001 [3:21:50<22:37,  2.60s/it][A
Train Diffusion:  90%|████████▉ | 4481/5001 [3:21:52<22:28,  2.59s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 328508854.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6120, 0.5026, 1.2994],
        [8.6850, 0.4644, 1.3156],
        [8.9427, 0.4979, 1.3108]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.6069e+00, 8.1329e-01, 3.8056e+00],
         [1.1064e+00, 6.4412e-01, 9.5774e-01],
         [6.7657e-01, 7.1127e-01, 1.3727e+00],
         ...,
         [2.2145e+01, 1.0840e+00, 3.0153e+00],
         [4.6388e-01, 2.5819e-01, 2.5487e+00],
         [4.9208e+00, 3.2738e-01, 1.3565e+00]],

        [[6.3286e-01, 9.5525e-01, 9.2402e-01],
         [1.2633e+01, 5.7877e-01, 1.0175e+00],
         [2.1411e+00, 4.2567e-01, 7.1074e-01],
         ...,
         [3.2746e+00, 3.4428e-01, 7.5605e-01],
         [1.0443e+00, 2.7630e-01, 1.6754e+00],
         [3.3267e-01, 2.3736e+00, 1.6768e+00]],

        [[6.6254e-01, 9.5659e-01, 8.7865e-01],
         [6.1190e-01, 7.3067e-01, 1.9473e+00],
         [2.7550e+01, 2.2870e-01, 1.2


Train Diffusion:  90%|████████▉ | 4482/5001 [3:21:55<22:24,  2.59s/it][A
Train Diffusion:  90%|████████▉ | 4483/5001 [3:21:57<22:20,  2.59s/it][A
Train Diffusion:  90%|████████▉ | 4484/5001 [3:22:00<22:11,  2.58s/it][A
Train Diffusion:  90%|████████▉ | 4485/5001 [3:22:02<22:07,  2.57s/it][A
Train Diffusion:  90%|████████▉ | 4486/5001 [3:22:05<22:01,  2.57s/it][A
Train Diffusion:  90%|████████▉ | 4487/5001 [3:22:08<21:58,  2.57s/it][A
Train Diffusion:  90%|████████▉ | 4488/5001 [3:22:10<21:54,  2.56s/it][A
Train Diffusion:  90%|████████▉ | 4489/5001 [3:22:13<21:50,  2.56s/it][A
Train Diffusion:  90%|████████▉ | 4490/5001 [3:22:15<21:48,  2.56s/it][A
Train Diffusion:  90%|████████▉ | 4491/5001 [3:22:18<21:45,  2.56s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 317301260.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6267, 0.5344, 1.3121],
        [8.8304, 0.5078, 1.2910],
        [8.6736, 0.4996, 1.2958]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[8.7438e-01, 9.6023e-01, 1.2582e+00],
         [8.3334e+00, 4.8795e-01, 1.3020e+00],
         [1.9369e+00, 5.0282e-01, 1.1732e+00],
         ...,
         [3.0825e+00, 2.2497e-01, 7.9991e-01],
         [1.8703e-01, 1.1570e+01, 1.1650e+00],
         [2.0286e+01, 1.3934e+00, 1.9923e+00]],

        [[1.5267e+00, 8.2778e-01, 5.5288e-01],
         [1.8044e+00, 3.4781e-01, 7.4448e-01],
         [1.1671e-03, 7.1991e-01, 1.3248e+00],
         ...,
         [2.6924e-01, 6.3585e-01, 3.2965e-01],
         [2.9505e+01, 8.5819e-01, 1.5362e+00],
         [4.3869e+00, 4.2074e-01, 1.3678e+00]],

        [[5.0502e-01, 9.5206e-01, 9.3978e-01],
         [1.7604e+00, 7.0011e-01, 1.7775e+00],
         [1.9837e+00, 1.7652e-01, 5.5


Train Diffusion:  90%|████████▉ | 4492/5001 [3:22:20<21:40,  2.55s/it][A
Train Diffusion:  90%|████████▉ | 4493/5001 [3:22:23<21:38,  2.56s/it][A
Train Diffusion:  90%|████████▉ | 4494/5001 [3:22:25<21:33,  2.55s/it][A
Train Diffusion:  90%|████████▉ | 4495/5001 [3:22:28<21:30,  2.55s/it][A
Train Diffusion:  90%|████████▉ | 4496/5001 [3:22:31<21:29,  2.55s/it][A
Train Diffusion:  90%|████████▉ | 4497/5001 [3:22:33<21:32,  2.56s/it][A
Train Diffusion:  90%|████████▉ | 4498/5001 [3:22:36<21:36,  2.58s/it][A
Train Diffusion:  90%|████████▉ | 4499/5001 [3:22:38<21:30,  2.57s/it][A
Train Diffusion:  90%|████████▉ | 4500/5001 [3:22:41<21:27,  2.57s/it][A
Train Diffusion:  90%|█████████ | 4501/5001 [3:22:43<21:23,  2.57s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 331822960.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7833, 0.4650, 1.3107],
        [8.7740, 0.5012, 1.2885],
        [8.8335, 0.4869, 1.2684]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[8.4015e-01, 9.6350e-01, 7.9086e-01],
         [7.7879e-01, 6.1714e-01, 1.8413e+00],
         [2.8303e+01, 1.8773e-01, 1.1947e+00],
         ...,
         [6.1030e-01, 1.5041e+00, 2.7199e+00],
         [2.0033e-01, 8.9603e-01, 2.8744e+00],
         [6.2919e-01, 3.4866e+00, 1.2718e+00]],

        [[5.1912e-01, 9.5362e-01, 7.6262e-01],
         [1.0905e+01, 9.4071e-01, 6.9523e-01],
         [1.5174e+00, 4.7029e-01, 7.8515e-01],
         ...,
         [8.7828e+00, 3.5931e-01, 3.9819e-01],
         [5.0492e+00, 5.3605e-02, 8.7262e+00],
         [9.6466e-01, 2.5528e+00, 1.8420e+00]],

        [[1.5462e+00, 8.2717e-01, 5.6526e+00],
         [8.1043e-01, 6.8803e-01, 1.3341e+00],
         [6.9892e-01, 6.8757e-01, 1.2


Train Diffusion:  90%|█████████ | 4502/5001 [3:22:46<21:26,  2.58s/it][A
Train Diffusion:  90%|█████████ | 4503/5001 [3:22:49<21:20,  2.57s/it][A
Train Diffusion:  90%|█████████ | 4504/5001 [3:22:51<21:14,  2.56s/it][A
Train Diffusion:  90%|█████████ | 4505/5001 [3:22:54<21:10,  2.56s/it][A
Train Diffusion:  90%|█████████ | 4506/5001 [3:22:56<21:12,  2.57s/it][A
Train Diffusion:  90%|█████████ | 4507/5001 [3:22:59<21:05,  2.56s/it][A
Train Diffusion:  90%|█████████ | 4508/5001 [3:23:01<21:03,  2.56s/it][A
Train Diffusion:  90%|█████████ | 4509/5001 [3:23:04<21:00,  2.56s/it][A
Train Diffusion:  90%|█████████ | 4510/5001 [3:23:06<20:58,  2.56s/it][A
Train Diffusion:  90%|█████████ | 4511/5001 [3:23:09<20:53,  2.56s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 327184697.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6959, 0.4933, 1.2776],
        [8.8261, 0.4784, 1.2874],
        [8.8434, 0.4852, 1.3023]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.7110,  0.9588,  1.0252],
         [12.6496,  0.4726,  1.1807],
         [ 2.1823,  0.5002,  0.9435],
         ...,
         [ 0.1750,  0.5660,  1.0477],
         [38.5655,  0.0539,  1.3873],
         [ 6.5038,  0.2168,  0.7908]],

        [[ 0.5914,  0.9530,  0.9385],
         [ 0.5629,  0.7274,  1.7300],
         [25.5072,  0.2670,  1.1937],
         ...,
         [ 0.4163,  0.0935,  2.4319],
         [ 0.5337,  0.3639,  0.7562],
         [ 0.6980,  2.3602,  1.1838]],

        [[ 1.6013,  0.8163,  2.8024],
         [ 1.0941,  0.6750,  0.8424],
         [ 0.7143,  0.9215,  0.7999],
         ...,
         [ 3.5518,  1.1220,  0.4030],
         [ 3.2883,  0.3612,  0.6231],
         [20.3454,  1.3467,  2.0470


Train Diffusion:  90%|█████████ | 4512/5001 [3:23:12<20:50,  2.56s/it][A
Train Diffusion:  90%|█████████ | 4513/5001 [3:23:14<20:49,  2.56s/it][A
Train Diffusion:  90%|█████████ | 4514/5001 [3:23:17<20:44,  2.56s/it][A
Train Diffusion:  90%|█████████ | 4515/5001 [3:23:19<20:45,  2.56s/it][A
Train Diffusion:  90%|█████████ | 4516/5001 [3:23:22<20:59,  2.60s/it][A
Train Diffusion:  90%|█████████ | 4517/5001 [3:23:25<20:48,  2.58s/it][A
Train Diffusion:  90%|█████████ | 4518/5001 [3:23:27<20:42,  2.57s/it][A
Train Diffusion:  90%|█████████ | 4519/5001 [3:23:30<20:39,  2.57s/it][A
Train Diffusion:  90%|█████████ | 4520/5001 [3:23:32<20:35,  2.57s/it][A
Train Diffusion:  90%|█████████ | 4521/5001 [3:23:35<20:33,  2.57s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 325912502.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7484, 0.4973, 1.3342],
        [8.8809, 0.4907, 1.3082],
        [8.4785, 0.4910, 1.2989]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.5337,  0.9519,  0.9182],
         [ 0.4548,  0.8731,  1.2192],
         [23.8702,  0.2364,  1.1199],
         ...,
         [ 0.0438,  0.8415,  0.8276],
         [23.0142,  1.6054,  0.4940],
         [ 3.6918,  0.3091,  1.7997]],

        [[ 0.8092,  0.9618,  1.1922],
         [12.5270,  0.3453,  1.2803],
         [ 2.9003,  0.3667,  1.0114],
         ...,
         [25.1171,  0.0461,  1.0306],
         [ 5.1794,  0.0740,  4.5450],
         [ 0.5360,  4.8067,  1.6650]],

        [[ 1.5611,  0.8135,  1.0904],
         [ 1.7862,  0.3418,  0.7089],
         [ 0.3198,  0.9004,  1.3715],
         ...,
         [ 3.1126,  0.1110,  0.7285],
         [ 0.1304, 11.7781,  1.4887],
         [14.5851,  3.1196,  4.1183


Train Diffusion:  90%|█████████ | 4522/5001 [3:23:37<20:29,  2.57s/it][A
Train Diffusion:  90%|█████████ | 4523/5001 [3:23:40<20:28,  2.57s/it][A
Train Diffusion:  90%|█████████ | 4524/5001 [3:23:42<20:22,  2.56s/it][A
Train Diffusion:  90%|█████████ | 4525/5001 [3:23:45<20:20,  2.56s/it][A
Train Diffusion:  91%|█████████ | 4526/5001 [3:23:48<20:38,  2.61s/it][A
Train Diffusion:  91%|█████████ | 4527/5001 [3:23:50<20:52,  2.64s/it][A
Train Diffusion:  91%|█████████ | 4528/5001 [3:23:53<20:36,  2.61s/it][A
Train Diffusion:  91%|█████████ | 4529/5001 [3:23:56<20:26,  2.60s/it][A
Train Diffusion:  91%|█████████ | 4530/5001 [3:23:58<20:25,  2.60s/it][A
Train Diffusion:  91%|█████████ | 4531/5001 [3:24:01<20:16,  2.59s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 324675868.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8411, 0.5092, 1.2790],
        [8.8662, 0.4825, 1.3184],
        [8.6321, 0.4815, 1.2906]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.6749,  0.9565,  0.8696],
         [ 0.6230,  0.7181,  1.9413],
         [26.0461,  0.2714,  1.2081],
         ...,
         [ 3.6349,  0.1376,  0.7908],
         [13.1212,  0.7331,  0.9560],
         [ 1.5727,  2.3131,  1.4119]],

        [[ 0.6226,  0.9542,  0.9414],
         [12.8909,  0.5463,  1.1092],
         [ 2.1724,  0.3973,  0.7618],
         ...,
         [ 4.2788,  0.4361,  1.5826],
         [10.0013,  1.2512,  2.6778],
         [ 3.6469,  1.3922,  1.2242]],

        [[ 1.6052,  0.8143,  3.7243],
         [ 1.0699,  0.6896,  0.9733],
         [ 0.6681,  0.7893,  1.1159],
         ...,
         [ 3.4987,  0.2407,  0.2764],
         [ 4.9426,  0.0729,  0.1972],
         [19.9776,  3.1305,  5.1272


Train Diffusion:  91%|█████████ | 4532/5001 [3:24:03<20:10,  2.58s/it][A
Train Diffusion:  91%|█████████ | 4533/5001 [3:24:06<20:04,  2.57s/it][A
Train Diffusion:  91%|█████████ | 4534/5001 [3:24:08<20:00,  2.57s/it][A
Train Diffusion:  91%|█████████ | 4535/5001 [3:24:11<19:56,  2.57s/it][A
Train Diffusion:  91%|█████████ | 4536/5001 [3:24:14<19:51,  2.56s/it][A
Train Diffusion:  91%|█████████ | 4537/5001 [3:24:16<19:49,  2.56s/it][A
Train Diffusion:  91%|█████████ | 4538/5001 [3:24:19<19:48,  2.57s/it][A
Train Diffusion:  91%|█████████ | 4539/5001 [3:24:21<19:43,  2.56s/it][A
Train Diffusion:  91%|█████████ | 4540/5001 [3:24:24<19:39,  2.56s/it][A
Train Diffusion:  91%|█████████ | 4541/5001 [3:24:26<19:36,  2.56s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 335130275.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6100, 0.4996, 1.2791],
        [8.7418, 0.5124, 1.2846],
        [8.8705, 0.4949, 1.3272]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.6108,  0.9542,  0.9227],
         [ 0.5717,  0.8002,  1.8235],
         [27.3391,  0.1883,  1.2041],
         ...,
         [ 1.3356,  0.4617,  0.8198],
         [33.7110,  0.0929,  1.1962],
         [ 6.1795,  0.2282,  0.8727]],

        [[ 0.6878,  0.9576,  0.9923],
         [12.4963,  0.5137,  1.1468],
         [ 2.2061,  0.4050,  0.7498],
         ...,
         [ 2.2380,  0.2110,  0.7521],
         [ 0.4238,  0.7186,  0.6783],
         [14.8727,  1.5575,  1.9605]],

        [[ 1.6054,  0.8078,  2.9597],
         [ 1.3128,  0.5621,  0.8197],
         [ 0.7159,  0.7496,  1.1921],
         ...,
         [36.5035,  0.0644,  1.1882],
         [ 6.0117,  0.0763,  0.5033],
         [ 3.2038,  2.0575,  4.8289


Train Diffusion:  91%|█████████ | 4542/5001 [3:24:29<19:33,  2.56s/it][A
Train Diffusion:  91%|█████████ | 4543/5001 [3:24:31<19:32,  2.56s/it][A
Train Diffusion:  91%|█████████ | 4544/5001 [3:24:34<19:31,  2.56s/it][A
Train Diffusion:  91%|█████████ | 4545/5001 [3:24:37<19:33,  2.57s/it][A
Train Diffusion:  91%|█████████ | 4546/5001 [3:24:39<19:31,  2.57s/it][A
Train Diffusion:  91%|█████████ | 4547/5001 [3:24:42<19:27,  2.57s/it][A
Train Diffusion:  91%|█████████ | 4548/5001 [3:24:44<19:31,  2.59s/it][A
Train Diffusion:  91%|█████████ | 4549/5001 [3:24:47<19:26,  2.58s/it][A
Train Diffusion:  91%|█████████ | 4550/5001 [3:24:50<19:21,  2.58s/it][A
Train Diffusion:  91%|█████████ | 4551/5001 [3:24:52<19:18,  2.57s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 337114681.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.4965, 0.4932, 1.3091],
        [8.7964, 0.5033, 1.2515],
        [8.9529, 0.4647, 1.3103]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.5792,  0.8073,  1.0998],
         [ 1.7396,  0.3618,  0.7056],
         [ 0.3459,  0.8581,  1.2639],
         ...,
         [ 3.8635,  0.1747,  0.8835],
         [ 0.8894,  0.4324,  0.5562],
         [ 9.1537,  2.0087,  4.8848]],

        [[ 0.7684,  0.9626,  1.1498],
         [13.0968,  0.3562,  1.2733],
         [ 2.8533,  0.3641,  1.0842],
         ...,
         [ 0.5304,  0.6165,  1.0002],
         [41.3901,  0.1111,  1.3739],
         [ 6.4793,  0.1913,  0.9479]],

        [[ 0.5558,  0.9529,  0.9253],
         [ 0.5058,  0.8358,  1.4124],
         [25.1677,  0.1790,  1.1338],
         ...,
         [28.8504,  0.1513,  1.1966],
         [ 4.7273,  0.1940,  0.7934],
         [ 0.1089,  1.5166,  2.7986


Train Diffusion:  91%|█████████ | 4552/5001 [3:24:55<19:15,  2.57s/it][A
Train Diffusion:  91%|█████████ | 4553/5001 [3:24:57<19:10,  2.57s/it][A
Train Diffusion:  91%|█████████ | 4554/5001 [3:25:00<19:06,  2.57s/it][A
Train Diffusion:  91%|█████████ | 4555/5001 [3:25:02<19:06,  2.57s/it][A
Train Diffusion:  91%|█████████ | 4556/5001 [3:25:05<19:01,  2.57s/it][A
Train Diffusion:  91%|█████████ | 4557/5001 [3:25:07<18:56,  2.56s/it][A
Train Diffusion:  91%|█████████ | 4558/5001 [3:25:10<18:58,  2.57s/it][A
Train Diffusion:  91%|█████████ | 4559/5001 [3:25:13<18:54,  2.57s/it][A
Train Diffusion:  91%|█████████ | 4560/5001 [3:25:15<18:51,  2.57s/it][A
Train Diffusion:  91%|█████████ | 4561/5001 [3:25:18<18:47,  2.56s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 327963340.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7911, 0.5071, 1.3284],
        [8.6498, 0.5145, 1.3169],
        [8.6654, 0.4959, 1.3013]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.5522,  0.9506,  0.9217],
         [ 0.4828,  0.8185,  1.4557],
         [26.2764,  0.1420,  1.1778],
         ...,
         [ 2.0761,  0.2340,  2.5071],
         [25.2940,  0.0367,  1.5809],
         [ 6.4488,  0.2284,  0.7371]],

        [[ 1.5762,  0.8076,  0.9908],
         [ 1.7556,  0.3563,  0.7066],
         [ 0.4783,  0.8387,  1.1684],
         ...,
         [ 0.9911,  0.5445,  1.1945],
         [ 0.3644,  0.4910,  0.8271],
         [ 4.3876,  2.5506,  1.4781]],

        [[ 0.7749,  0.9605,  1.1584],
         [12.9527,  0.3720,  1.2730],
         [ 2.7658,  0.3787,  1.1290],
         ...,
         [32.6860,  0.1628,  1.0784],
         [ 4.4224,  0.2179,  0.7427],
         [14.7987,  1.4710,  1.8580


Train Diffusion:  91%|█████████ | 4562/5001 [3:25:20<18:42,  2.56s/it][A
Train Diffusion:  91%|█████████ | 4563/5001 [3:25:23<18:41,  2.56s/it][A
Train Diffusion:  91%|█████████▏| 4564/5001 [3:25:25<18:49,  2.58s/it][A
Train Diffusion:  91%|█████████▏| 4565/5001 [3:25:28<18:41,  2.57s/it][A
Train Diffusion:  91%|█████████▏| 4566/5001 [3:25:31<18:38,  2.57s/it][A
Train Diffusion:  91%|█████████▏| 4567/5001 [3:25:33<18:36,  2.57s/it][A
Train Diffusion:  91%|█████████▏| 4568/5001 [3:25:36<18:32,  2.57s/it][A
Train Diffusion:  91%|█████████▏| 4569/5001 [3:25:38<18:28,  2.57s/it][A
Train Diffusion:  91%|█████████▏| 4570/5001 [3:25:41<18:29,  2.57s/it][A
Train Diffusion:  91%|█████████▏| 4571/5001 [3:25:43<18:25,  2.57s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 326053059.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.5949, 0.5111, 1.2979],
        [8.7040, 0.4902, 1.2871],
        [8.9945, 0.5007, 1.3102]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[8.0348e-01, 9.6229e-01, 1.1935e+00],
         [1.2768e+01, 3.3739e-01, 1.2800e+00],
         [2.9660e+00, 3.5749e-01, 8.5956e-01],
         ...,
         [2.6938e+01, 1.9342e-01, 8.0308e-01],
         [5.4851e+00, 2.2977e-02, 2.0715e-01],
         [3.3601e+00, 3.2784e-01, 1.0288e+00]],

        [[1.5645e+00, 8.1320e-01, 1.1918e+00],
         [1.7081e+00, 3.6633e-01, 7.2805e-01],
         [1.6558e+00, 7.3838e-01, 1.0532e+00],
         ...,
         [4.4877e+00, 1.1598e-01, 6.2378e-01],
         [2.7258e-01, 1.1965e+00, 1.0428e+00],
         [2.0480e+01, 1.5611e+00, 2.0597e+00]],

        [[5.3628e-01, 9.5240e-01, 9.3823e-01],
         [5.0645e-01, 8.5613e-01, 1.1828e+00],
         [1.8246e+01, 3.4405e-01, 1.0


Train Diffusion:  91%|█████████▏| 4572/5001 [3:25:46<18:24,  2.57s/it][A
Train Diffusion:  91%|█████████▏| 4573/5001 [3:25:49<18:40,  2.62s/it][A
Train Diffusion:  91%|█████████▏| 4574/5001 [3:25:52<19:13,  2.70s/it][A
Train Diffusion:  91%|█████████▏| 4575/5001 [3:25:54<18:55,  2.67s/it][A
Train Diffusion:  92%|█████████▏| 4576/5001 [3:25:57<18:44,  2.65s/it][A
Train Diffusion:  92%|█████████▏| 4577/5001 [3:25:59<18:33,  2.63s/it][A
Train Diffusion:  92%|█████████▏| 4578/5001 [3:26:02<18:40,  2.65s/it][A
Train Diffusion:  92%|█████████▏| 4579/5001 [3:26:05<18:36,  2.65s/it][A
Train Diffusion:  92%|█████████▏| 4580/5001 [3:26:07<18:28,  2.63s/it][A
Train Diffusion:  92%|█████████▏| 4581/5001 [3:26:10<18:21,  2.62s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 338330531.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6426, 0.5039, 1.3088],
        [8.8915, 0.5006, 1.2945],
        [8.7180, 0.4997, 1.3065]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.2587e-01, 9.5062e-01, 9.5015e-01],
         [5.6909e-01, 7.6131e-01, 6.7051e-01],
         [4.9966e+00, 5.1206e-01, 5.9410e-01],
         ...,
         [4.9002e-01, 9.8863e-01, 8.4780e-01],
         [2.8555e+01, 1.4210e-01, 1.2730e+00],
         [5.0794e+00, 6.1438e-01, 7.4562e-01]],

        [[8.2543e-01, 9.6078e-01, 1.2176e+00],
         [1.2098e+01, 3.7683e-01, 1.2769e+00],
         [2.7028e+00, 2.9952e-01, 1.0986e+00],
         ...,
         [3.2577e+01, 8.0801e-02, 1.2452e+00],
         [5.6018e+00, 6.0539e-02, 2.3811e+00],
         [1.0866e+00, 1.2890e+00, 1.2675e+00]],

        [[1.5536e+00, 8.1784e-01, 6.2686e-01],
         [1.7044e+00, 3.8385e-01, 4.2943e-01],
         [3.9327e-04, 1.6291e+00, 5.2


Train Diffusion:  92%|█████████▏| 4582/5001 [3:26:13<18:23,  2.63s/it][A
Train Diffusion:  92%|█████████▏| 4583/5001 [3:26:15<18:18,  2.63s/it][A
Train Diffusion:  92%|█████████▏| 4584/5001 [3:26:18<18:11,  2.62s/it][A
Train Diffusion:  92%|█████████▏| 4585/5001 [3:26:20<18:13,  2.63s/it][A
Train Diffusion:  92%|█████████▏| 4586/5001 [3:26:23<18:09,  2.63s/it][A
Train Diffusion:  92%|█████████▏| 4587/5001 [3:26:26<18:07,  2.63s/it][A
Train Diffusion:  92%|█████████▏| 4588/5001 [3:26:28<18:02,  2.62s/it][A
Train Diffusion:  92%|█████████▏| 4589/5001 [3:26:31<17:58,  2.62s/it][A
Train Diffusion:  92%|█████████▏| 4590/5001 [3:26:34<17:56,  2.62s/it][A
Train Diffusion:  92%|█████████▏| 4591/5001 [3:26:36<17:55,  2.62s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 332257673.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.5767, 0.5013, 1.2434],
        [8.9381, 0.5003, 1.2935],
        [8.8234, 0.5006, 1.2824]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.6604,  0.9557,  0.9343],
         [12.4408,  0.5731,  1.0925],
         [ 2.1259,  0.4080,  0.7526],
         ...,
         [ 0.3047,  0.6047,  0.4304],
         [22.8320,  0.5657,  0.1889],
         [ 4.0535,  0.6415,  0.7550]],

        [[ 0.6349,  0.9546,  0.9035],
         [ 0.5855,  0.7892,  1.8502],
         [26.7806,  0.2420,  1.2027],
         ...,
         [11.2798,  0.0561,  1.4554],
         [ 5.9131,  0.0691,  0.5538],
         [26.0944,  1.5491,  1.8395]],

        [[ 1.6069,  0.8072,  3.4921],
         [ 1.2669,  0.5814,  0.8790],
         [ 0.6923,  0.7665,  1.1806],
         ...,
         [ 1.8104,  3.6700,  1.1886],
         [ 4.8721,  0.2214,  0.9214],
         [ 1.9353,  1.7558,  1.5239


Train Diffusion:  92%|█████████▏| 4592/5001 [3:26:39<17:51,  2.62s/it][A
Train Diffusion:  92%|█████████▏| 4593/5001 [3:26:41<17:51,  2.63s/it][A
Train Diffusion:  92%|█████████▏| 4594/5001 [3:26:44<17:49,  2.63s/it][A
Train Diffusion:  92%|█████████▏| 4595/5001 [3:26:47<17:48,  2.63s/it][A
Train Diffusion:  92%|█████████▏| 4596/5001 [3:26:49<17:52,  2.65s/it][A
Train Diffusion:  92%|█████████▏| 4597/5001 [3:26:52<17:52,  2.66s/it][A
Train Diffusion:  92%|█████████▏| 4598/5001 [3:26:55<17:50,  2.66s/it][A
Train Diffusion:  92%|█████████▏| 4599/5001 [3:26:57<17:47,  2.66s/it][A
Train Diffusion:  92%|█████████▏| 4600/5001 [3:27:00<17:42,  2.65s/it][A
Train Diffusion:  92%|█████████▏| 4601/5001 [3:27:03<17:41,  2.65s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 321591859.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8113, 0.5004, 1.2913],
        [8.5108, 0.5170, 1.2744],
        [8.7890, 0.5122, 1.3007]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.5740,  0.8094,  1.2782],
         [ 1.6996,  0.3699,  0.7363],
         [ 0.3512,  0.9477,  1.3023],
         ...,
         [ 2.5910,  0.5996,  7.5736],
         [ 0.5538,  1.3161,  1.3119],
         [ 0.4616,  1.2375,  7.6009]],

        [[ 0.7823,  0.9608,  1.1555],
         [12.8505,  0.3729,  1.2709],
         [ 2.7890,  0.3830,  0.7590],
         ...,
         [ 0.3756, 15.3957,  1.1526],
         [21.4327,  0.1929,  2.0641],
         [ 3.9091,  1.2591,  1.7627]],

        [[ 0.5480,  0.9508,  0.9317],
         [ 0.5085,  0.8634,  1.4147],
         [25.9634,  0.1069,  1.1666],
         ...,
         [ 0.4585,  1.9125,  1.3177],
         [ 0.7480,  0.3558,  0.8390],
         [10.3403,  2.1519,  1.3895


Train Diffusion:  92%|█████████▏| 4602/5001 [3:27:05<17:35,  2.65s/it][A
Train Diffusion:  92%|█████████▏| 4603/5001 [3:27:08<17:25,  2.63s/it][A
Train Diffusion:  92%|█████████▏| 4604/5001 [3:27:10<17:18,  2.62s/it][A
Train Diffusion:  92%|█████████▏| 4605/5001 [3:27:13<17:11,  2.60s/it][A
Train Diffusion:  92%|█████████▏| 4606/5001 [3:27:16<17:05,  2.60s/it][A
Train Diffusion:  92%|█████████▏| 4607/5001 [3:27:18<17:01,  2.59s/it][A
Train Diffusion:  92%|█████████▏| 4608/5001 [3:27:21<16:56,  2.59s/it][A
Train Diffusion:  92%|█████████▏| 4609/5001 [3:27:23<16:53,  2.59s/it][A
Train Diffusion:  92%|█████████▏| 4610/5001 [3:27:26<16:54,  2.59s/it][A
Train Diffusion:  92%|█████████▏| 4611/5001 [3:27:29<16:55,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 331880841.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8019, 0.4925, 1.3228],
        [8.6835, 0.5011, 1.2879],
        [8.7833, 0.4863, 1.2962]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.9040e-01, 9.5247e-01, 9.5787e-01],
         [1.1925e+00, 6.9541e-01, 3.2993e+00],
         [9.0096e+00, 2.1782e-01, 1.0103e+00],
         ...,
         [2.6922e+00, 1.3705e-02, 1.3853e+00],
         [4.4177e-01, 4.3871e-01, 6.8609e-01],
         [1.5663e+01, 1.5335e+00, 1.6702e+00]],

        [[1.5005e+00, 8.3348e-01, 5.3462e-01],
         [1.7108e+00, 3.7267e-01, 5.6639e-01],
         [1.7729e-03, 1.2317e+00, 6.0277e-01],
         ...,
         [5.5197e-01, 6.2193e-01, 1.0103e+00],
         [1.5200e+01, 3.5299e-01, 1.1748e+00],
         [2.1455e+00, 1.4900e+00, 1.8375e+00]],

        [[9.1636e-01, 9.5654e-01, 1.3651e+00],
         [8.5518e+00, 4.3995e-01, 1.2764e+00],
         [2.2172e+00, 3.3033e-01, 7.3


Train Diffusion:  92%|█████████▏| 4612/5001 [3:27:31<16:49,  2.60s/it][A
Train Diffusion:  92%|█████████▏| 4613/5001 [3:27:34<16:46,  2.59s/it][A
Train Diffusion:  92%|█████████▏| 4614/5001 [3:27:36<16:41,  2.59s/it][A
Train Diffusion:  92%|█████████▏| 4615/5001 [3:27:39<16:38,  2.59s/it][A
Train Diffusion:  92%|█████████▏| 4616/5001 [3:27:42<16:35,  2.59s/it][A
Train Diffusion:  92%|█████████▏| 4617/5001 [3:27:44<16:32,  2.59s/it][A
Train Diffusion:  92%|█████████▏| 4618/5001 [3:27:47<16:30,  2.59s/it][A
Train Diffusion:  92%|█████████▏| 4619/5001 [3:27:49<16:39,  2.62s/it][A
Train Diffusion:  92%|█████████▏| 4620/5001 [3:27:52<16:39,  2.62s/it][A
Train Diffusion:  92%|█████████▏| 4621/5001 [3:27:55<16:49,  2.66s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 318463948.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8755, 0.4982, 1.3210],
        [8.6445, 0.5086, 1.2956],
        [8.5934, 0.5009, 1.3148]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.4306e-01, 9.5314e-01, 1.0917e+00],
         [4.9970e-01, 6.5769e-01, 2.2716e-01],
         [1.8556e-05, 2.3819e+00, 2.1250e-01],
         ...,
         [7.3395e-01, 6.4259e-01, 2.6407e+00],
         [1.8410e-01, 7.2725e-01, 6.0051e-01],
         [2.3596e-01, 1.7152e+00, 2.2016e+00]],

        [[1.2829e+00, 8.8758e-01, 3.3707e+00],
         [2.3618e+00, 8.8602e-01, 1.1515e+00],
         [1.3071e+00, 5.8738e+00, 5.6515e-01],
         ...,
         [1.9839e-01, 5.6065e-01, 1.2815e+00],
         [3.4569e+01, 4.7623e-02, 1.4956e+00],
         [6.5622e+00, 2.0194e-01, 8.5522e-01]],

        [[1.1862e+00, 9.0607e-01, 2.1564e+00],
         [2.6843e+00, 8.6067e-01, 1.4263e+00],
         [1.4583e+00, 3.2634e-02, 6.0


Train Diffusion:  92%|█████████▏| 4622/5001 [3:27:57<16:39,  2.64s/it][A
Train Diffusion:  92%|█████████▏| 4623/5001 [3:28:00<16:30,  2.62s/it][A
Train Diffusion:  92%|█████████▏| 4624/5001 [3:28:03<16:26,  2.62s/it][A
Train Diffusion:  92%|█████████▏| 4625/5001 [3:28:05<16:20,  2.61s/it][A
Train Diffusion:  93%|█████████▎| 4626/5001 [3:28:08<16:15,  2.60s/it][A
Train Diffusion:  93%|█████████▎| 4627/5001 [3:28:10<16:09,  2.59s/it][A
Train Diffusion:  93%|█████████▎| 4628/5001 [3:28:13<16:05,  2.59s/it][A
Train Diffusion:  93%|█████████▎| 4629/5001 [3:28:15<16:02,  2.59s/it][A
Train Diffusion:  93%|█████████▎| 4630/5001 [3:28:18<15:59,  2.59s/it][A
Train Diffusion:  93%|█████████▎| 4631/5001 [3:28:21<15:57,  2.59s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 338462764.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7349, 0.4790, 1.2980],
        [8.6638, 0.4920, 1.3023],
        [8.8764, 0.4821, 1.2903]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[9.1686e-01, 9.5686e-01, 7.8310e-01],
         [8.6718e-01, 5.8130e-01, 1.7095e+00],
         [2.7324e+01, 1.4061e-01, 1.1947e+00],
         ...,
         [7.3442e-04, 6.6734e-01, 3.4367e+00],
         [1.7915e-01, 4.4303e-01, 1.8958e+00],
         [1.3233e-02, 1.8966e+00, 3.0499e+00]],

        [[4.9046e-01, 9.5301e-01, 8.1972e-01],
         [1.1082e+01, 8.1980e-01, 1.4285e+00],
         [1.4559e+00, 7.9858e-01, 1.0469e+00],
         ...,
         [1.0469e+01, 5.0236e+00, 8.4090e-01],
         [2.2483e+00, 3.7741e-01, 1.0355e+00],
         [4.8045e+00, 2.4637e-01, 1.3307e+00]],

        [[1.5000e+00, 8.3371e-01, 5.4332e+00],
         [7.8986e-01, 8.2900e-01, 1.3649e+00],
         [7.1494e-01, 8.4606e-01, 4.8


Train Diffusion:  93%|█████████▎| 4632/5001 [3:28:23<15:54,  2.59s/it][A
Train Diffusion:  93%|█████████▎| 4633/5001 [3:28:26<15:50,  2.58s/it][A
Train Diffusion:  93%|█████████▎| 4634/5001 [3:28:28<15:48,  2.58s/it][A
Train Diffusion:  93%|█████████▎| 4635/5001 [3:28:31<15:44,  2.58s/it][A
Train Diffusion:  93%|█████████▎| 4636/5001 [3:28:34<15:42,  2.58s/it][A
Train Diffusion:  93%|█████████▎| 4637/5001 [3:28:36<15:40,  2.58s/it][A
Train Diffusion:  93%|█████████▎| 4638/5001 [3:28:39<15:38,  2.58s/it][A
Train Diffusion:  93%|█████████▎| 4639/5001 [3:28:41<15:36,  2.59s/it][A
Train Diffusion:  93%|█████████▎| 4640/5001 [3:28:44<15:33,  2.59s/it][A
Train Diffusion:  93%|█████████▎| 4641/5001 [3:28:46<15:31,  2.59s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 320647804.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8234, 0.4872, 1.3023],
        [8.7570, 0.4825, 1.3007],
        [8.7361, 0.4938, 1.2629]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.6052,  0.8104,  3.1595],
         [ 1.1933,  0.5975,  0.8454],
         [ 0.7264,  0.6869,  1.0713],
         ...,
         [18.2061,  1.7576,  0.8002],
         [ 1.2448,  2.4404,  0.7257],
         [ 0.3785,  1.1650, 10.7491]],

        [[ 0.6699,  0.9559,  0.9770],
         [12.4539,  0.5207,  1.1194],
         [ 2.1890,  0.4550,  0.6753],
         ...,
         [ 4.3804,  0.1012,  1.0687],
         [30.1919,  0.7897,  3.4531],
         [ 2.0143,  3.3695,  2.1794]],

        [[ 0.6263,  0.9540,  0.9083],
         [ 0.5810,  0.7533,  1.8621],
         [28.3211,  0.0929,  1.2047],
         ...,
         [ 8.4783,  0.6795,  0.9805],
         [ 3.1187,  0.0942,  3.3277],
         [ 0.3750,  2.6814,  1.1152


Train Diffusion:  93%|█████████▎| 4642/5001 [3:28:49<15:32,  2.60s/it][A
Train Diffusion:  93%|█████████▎| 4643/5001 [3:28:52<15:27,  2.59s/it][A
Train Diffusion:  93%|█████████▎| 4644/5001 [3:28:54<15:23,  2.59s/it][A
Train Diffusion:  93%|█████████▎| 4645/5001 [3:28:57<15:20,  2.59s/it][A
Train Diffusion:  93%|█████████▎| 4646/5001 [3:28:59<15:18,  2.59s/it][A
Train Diffusion:  93%|█████████▎| 4647/5001 [3:29:02<15:19,  2.60s/it][A
Train Diffusion:  93%|█████████▎| 4648/5001 [3:29:05<15:14,  2.59s/it][A
Train Diffusion:  93%|█████████▎| 4649/5001 [3:29:07<15:11,  2.59s/it][A
Train Diffusion:  93%|█████████▎| 4650/5001 [3:29:10<15:08,  2.59s/it][A
Train Diffusion:  93%|█████████▎| 4651/5001 [3:29:12<15:05,  2.59s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 343314246.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6657, 0.4894, 1.2661],
        [8.7796, 0.5014, 1.2781],
        [9.0151, 0.4832, 1.3077]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.3840e-01, 9.5388e-01, 9.1998e-01],
         [5.0051e-01, 8.2697e-01, 1.1163e+00],
         [1.6778e+01, 3.8160e-01, 1.0251e+00],
         ...,
         [2.4639e-01, 5.8434e-01, 8.7537e-01],
         [3.6697e+01, 1.0875e-01, 1.2395e+00],
         [6.2008e+00, 1.9764e-01, 1.1712e+00]],

        [[1.5658e+00, 8.1272e-01, 9.4401e-01],
         [1.7827e+00, 3.4834e-01, 7.3830e-01],
         [2.3980e+00, 6.8706e-01, 1.0536e+00],
         ...,
         [2.9872e+01, 9.9219e-02, 1.2689e+00],
         [5.3992e+00, 1.2568e-01, 5.0951e-01],
         [3.1791e+00, 1.7245e+00, 7.4424e+00]],

        [[7.9978e-01, 9.6376e-01, 1.1853e+00],
         [1.2783e+01, 3.2705e-01, 1.2802e+00],
         [2.9850e+00, 3.5940e-01, 8.3


Train Diffusion:  93%|█████████▎| 4652/5001 [3:29:15<15:01,  2.58s/it][A
Train Diffusion:  93%|█████████▎| 4653/5001 [3:29:18<14:58,  2.58s/it][A
Train Diffusion:  93%|█████████▎| 4654/5001 [3:29:20<14:59,  2.59s/it][A
Train Diffusion:  93%|█████████▎| 4655/5001 [3:29:23<14:56,  2.59s/it][A
Train Diffusion:  93%|█████████▎| 4656/5001 [3:29:25<14:53,  2.59s/it][A
Train Diffusion:  93%|█████████▎| 4657/5001 [3:29:28<14:52,  2.59s/it][A
Train Diffusion:  93%|█████████▎| 4658/5001 [3:29:31<15:00,  2.62s/it][A
Train Diffusion:  93%|█████████▎| 4659/5001 [3:29:33<14:54,  2.62s/it][A
Train Diffusion:  93%|█████████▎| 4660/5001 [3:29:36<14:48,  2.60s/it][A
Train Diffusion:  93%|█████████▎| 4661/5001 [3:29:38<14:43,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 332709113.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7992, 0.4873, 1.3132],
        [8.4973, 0.5102, 1.2883],
        [8.8419, 0.4860, 1.2965]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[6.0949e-01, 9.5264e-01, 9.2274e-01],
         [5.6846e-01, 7.8060e-01, 1.7849e+00],
         [2.7891e+01, 2.2885e-01, 1.1903e+00],
         ...,
         [2.5713e+01, 3.4998e-01, 4.8219e-01],
         [5.2013e+00, 8.3218e-02, 3.8124e-01],
         [5.6488e+00, 2.2693e+00, 4.6025e+00]],

        [[6.8918e-01, 9.5612e-01, 1.0143e+00],
         [1.1872e+01, 5.0826e-01, 1.1704e+00],
         [2.1017e+00, 5.1100e-01, 1.2242e+00],
         ...,
         [4.0582e+00, 1.6541e-01, 6.4904e-01],
         [4.2084e-01, 1.0531e+00, 6.6261e-01],
         [1.6990e-03, 1.6408e+00, 3.2945e+00]],

        [[1.6038e+00, 8.0804e-01, 2.7414e+00],
         [1.2820e+00, 5.3807e-01, 7.8692e-01],
         [7.8998e-01, 6.6773e-01, 8.0


Train Diffusion:  93%|█████████▎| 4662/5001 [3:29:41<14:40,  2.60s/it][A
Train Diffusion:  93%|█████████▎| 4663/5001 [3:29:44<14:36,  2.59s/it][A
Train Diffusion:  93%|█████████▎| 4664/5001 [3:29:46<14:33,  2.59s/it][A
Train Diffusion:  93%|█████████▎| 4665/5001 [3:29:49<14:34,  2.60s/it][A
Train Diffusion:  93%|█████████▎| 4666/5001 [3:29:51<14:31,  2.60s/it][A
Train Diffusion:  93%|█████████▎| 4667/5001 [3:29:54<14:29,  2.60s/it][A
Train Diffusion:  93%|█████████▎| 4668/5001 [3:29:57<14:40,  2.64s/it][A
Train Diffusion:  93%|█████████▎| 4669/5001 [3:29:59<14:31,  2.63s/it][A
Train Diffusion:  93%|█████████▎| 4670/5001 [3:30:02<14:29,  2.63s/it][A
Train Diffusion:  93%|█████████▎| 4671/5001 [3:30:05<14:22,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 331562889.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.9050, 0.4879, 1.3270],
        [8.6721, 0.4828, 1.2892],
        [8.7409, 0.4855, 1.3146]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.4468,  0.9543,  0.9749],
         [ 4.2279,  0.7462,  0.8324],
         [ 1.1797,  0.5315,  0.7358],
         ...,
         [20.9947,  0.1038,  1.3231],
         [ 5.1001,  0.1142,  0.9071],
         [24.3049,  1.2304,  2.0148]],

        [[ 1.3341,  0.8723,  0.7500],
         [ 1.5238,  0.3783,  1.8371],
         [27.5010,  0.2537,  1.1852],
         ...,
         [ 2.5115,  0.2515,  0.9503],
         [ 0.6990,  0.4910,  1.2881],
         [ 0.4180,  2.0793,  0.7776]],

        [[ 1.1313,  0.9189,  2.7022],
         [ 4.0277,  0.7268,  1.4830],
         [ 1.2406,  0.6721,  1.2552],
         ...,
         [ 0.2019,  0.6785,  0.3922],
         [33.7983,  0.6075,  0.6032],
         [ 4.6440,  0.3102,  1.3800


Train Diffusion:  93%|█████████▎| 4672/5001 [3:30:07<14:17,  2.61s/it][A
Train Diffusion:  93%|█████████▎| 4673/5001 [3:30:10<14:13,  2.60s/it][A
Train Diffusion:  93%|█████████▎| 4674/5001 [3:30:12<14:09,  2.60s/it][A
Train Diffusion:  93%|█████████▎| 4675/5001 [3:30:15<14:04,  2.59s/it][A
Train Diffusion:  94%|█████████▎| 4676/5001 [3:30:17<14:02,  2.59s/it][A
Train Diffusion:  94%|█████████▎| 4677/5001 [3:30:20<13:57,  2.59s/it][A
Train Diffusion:  94%|█████████▎| 4678/5001 [3:30:23<13:56,  2.59s/it][A
Train Diffusion:  94%|█████████▎| 4679/5001 [3:30:25<13:58,  2.60s/it][A
Train Diffusion:  94%|█████████▎| 4680/5001 [3:30:28<13:52,  2.59s/it][A
Train Diffusion:  94%|█████████▎| 4681/5001 [3:30:30<13:50,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 330888531.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7870, 0.5074, 1.2762],
        [8.8348, 0.4924, 1.2815],
        [8.8359, 0.4954, 1.2644]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.4608,  0.8415,  0.5669],
         [ 1.7689,  0.3385,  1.3267],
         [22.7976,  0.3196,  1.1970],
         ...,
         [ 0.3490,  0.6336,  0.3712],
         [23.0317,  0.7396,  0.1297],
         [ 4.5574,  0.4088,  0.8497]],

        [[ 0.4732,  0.9533,  0.9506],
         [ 6.2232,  0.6445,  3.0113],
         [ 1.3328,  0.5403,  1.0274],
         ...,
         [23.2779,  0.0357,  1.4600],
         [ 6.1534,  0.0657,  0.8408],
         [25.2921,  1.2923,  2.0014]],

        [[ 0.9756,  0.9484,  1.4988],
         [ 3.1209,  0.7460,  1.5406],
         [ 0.8259,  1.0039,  0.6543],
         ...,
         [ 3.2319,  0.2461,  0.7109],
         [ 1.5996,  0.5611,  1.5844],
         [ 0.4872,  1.9669,  1.2577


Train Diffusion:  94%|█████████▎| 4682/5001 [3:30:33<13:47,  2.59s/it][A
Train Diffusion:  94%|█████████▎| 4683/5001 [3:30:36<13:45,  2.59s/it][A
Train Diffusion:  94%|█████████▎| 4684/5001 [3:30:38<13:40,  2.59s/it][A
Train Diffusion:  94%|█████████▎| 4685/5001 [3:30:41<13:39,  2.59s/it][A
Train Diffusion:  94%|█████████▎| 4686/5001 [3:30:43<13:35,  2.59s/it][A
Train Diffusion:  94%|█████████▎| 4687/5001 [3:30:46<13:32,  2.59s/it][A
Train Diffusion:  94%|█████████▎| 4688/5001 [3:30:49<13:33,  2.60s/it][A
Train Diffusion:  94%|█████████▍| 4689/5001 [3:30:51<13:30,  2.60s/it][A
Train Diffusion:  94%|█████████▍| 4690/5001 [3:30:54<13:26,  2.59s/it][A
Train Diffusion:  94%|█████████▍| 4691/5001 [3:30:56<13:24,  2.59s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 327646905.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7314, 0.5005, 1.3047],
        [8.6729, 0.5014, 1.3616],
        [8.6046, 0.4867, 1.2926]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.5375,  0.8246,  1.0301],
         [ 1.7760,  0.3382,  1.0016],
         [ 9.1004,  0.4716,  1.2130],
         ...,
         [17.1108,  0.2329,  1.1598],
         [ 4.5204,  0.0662,  9.0905],
         [ 4.1269,  2.5472,  2.0283]],

        [[ 0.5143,  0.9511,  0.9333],
         [ 0.5268,  0.8978,  0.5452],
         [ 5.8018,  0.5762,  0.8093],
         ...,
         [ 0.1925,  1.2161,  6.8294],
         [ 0.1452,  1.8057,  1.6447],
         [ 0.2536,  1.3516,  8.0404]],

        [[ 0.8529,  0.9609,  1.2694],
         [11.8315,  0.3350,  1.2881],
         [ 2.9332,  0.3974,  0.7224],
         ...,
         [ 1.6819,  3.3146,  2.1494],
         [13.0345,  0.2126,  0.8937],
         [ 3.8175,  1.0300,  0.9593


Train Diffusion:  94%|█████████▍| 4692/5001 [3:30:59<13:19,  2.59s/it][A
Train Diffusion:  94%|█████████▍| 4693/5001 [3:31:02<13:16,  2.59s/it][A
Train Diffusion:  94%|█████████▍| 4694/5001 [3:31:04<13:14,  2.59s/it][A
Train Diffusion:  94%|█████████▍| 4695/5001 [3:31:07<13:12,  2.59s/it][A
Train Diffusion:  94%|█████████▍| 4696/5001 [3:31:09<13:11,  2.59s/it][A
Train Diffusion:  94%|█████████▍| 4697/5001 [3:31:12<13:06,  2.59s/it][A
Train Diffusion:  94%|█████████▍| 4698/5001 [3:31:14<13:04,  2.59s/it][A
Train Diffusion:  94%|█████████▍| 4699/5001 [3:31:17<13:00,  2.58s/it][A
Train Diffusion:  94%|█████████▍| 4700/5001 [3:31:20<12:58,  2.59s/it][A
Train Diffusion:  94%|█████████▍| 4701/5001 [3:31:22<12:54,  2.58s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 331789603.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8782, 0.5099, 1.3155],
        [8.4974, 0.4940, 1.2814],
        [8.8249, 0.5028, 1.2594]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.4285e+00, 8.4704e-01, 5.3841e-01],
         [1.7047e+00, 3.5504e-01, 6.8361e-01],
         [1.0940e-06, 8.9785e-01, 1.5861e+00],
         ...,
         [2.5306e+01, 1.8973e-01, 1.1910e+00],
         [4.2277e+00, 2.1722e-01, 9.9498e-01],
         [1.4244e-01, 1.7779e+00, 2.6594e+00]],

        [[1.0179e+00, 9.4125e-01, 1.6000e+00],
         [3.8335e+00, 7.7826e-01, 1.3755e+00],
         [1.0330e+00, 9.7191e-01, 1.5397e+01],
         ...,
         [2.4317e-01, 5.9385e-01, 1.0845e+00],
         [4.2402e+01, 8.0180e-02, 1.3503e+00],
         [6.5622e+00, 1.0948e-01, 2.4387e+00]],

        [[4.6404e-01, 9.5360e-01, 9.6705e-01],
         [5.6692e+00, 6.0895e-01, 3.0744e+00],
         [1.5547e+00, 1.1438e-01, 5.8


Train Diffusion:  94%|█████████▍| 4702/5001 [3:31:25<12:52,  2.58s/it][A
Train Diffusion:  94%|█████████▍| 4703/5001 [3:31:27<12:50,  2.58s/it][A
Train Diffusion:  94%|█████████▍| 4704/5001 [3:31:30<12:46,  2.58s/it][A
Train Diffusion:  94%|█████████▍| 4705/5001 [3:31:33<12:47,  2.59s/it][A
Train Diffusion:  94%|█████████▍| 4706/5001 [3:31:35<12:50,  2.61s/it][A
Train Diffusion:  94%|█████████▍| 4707/5001 [3:31:38<12:46,  2.61s/it][A
Train Diffusion:  94%|█████████▍| 4708/5001 [3:31:40<12:39,  2.59s/it][A
Train Diffusion:  94%|█████████▍| 4709/5001 [3:31:43<12:37,  2.59s/it][A
Train Diffusion:  94%|█████████▍| 4710/5001 [3:31:46<12:33,  2.59s/it][A
Train Diffusion:  94%|█████████▍| 4711/5001 [3:31:48<12:31,  2.59s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 331091238.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8007, 0.4851, 1.2803],
        [8.5526, 0.5061, 1.3016],
        [8.7806, 0.5000, 1.3107]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[7.0614e-01, 9.5699e-01, 8.4879e-01],
         [6.7087e-01, 6.6476e-01, 2.3437e-01],
         [1.5282e+01, 1.3042e+00, 2.4881e-01],
         ...,
         [4.5221e-01, 7.5620e-01, 4.1908e-01],
         [2.7165e+01, 7.1078e-01, 7.4479e-01],
         [4.8009e+00, 1.9843e-01, 1.8450e+00]],

        [[5.9534e-01, 9.5177e-01, 9.5453e-01],
         [1.2449e+01, 5.2864e-01, 1.1146e+00],
         [2.1220e+00, 1.5682e+00, 1.3425e+00],
         ...,
         [4.1592e+00, 1.3899e-01, 6.7131e-01],
         [5.8049e-02, 8.7250e-01, 2.2427e+00],
         [2.2224e-01, 1.9468e+00, 1.0510e+01]],

        [[1.6018e+00, 8.2255e-01, 4.0336e+00],
         [8.6473e-01, 9.7389e-01, 1.0053e+00],
         [1.1944e-01, 5.8772e+00, 3.3


Train Diffusion:  94%|█████████▍| 4712/5001 [3:31:51<12:34,  2.61s/it][A
Train Diffusion:  94%|█████████▍| 4713/5001 [3:31:53<12:28,  2.60s/it][A
Train Diffusion:  94%|█████████▍| 4714/5001 [3:31:56<12:27,  2.60s/it][A
Train Diffusion:  94%|█████████▍| 4715/5001 [3:31:59<12:42,  2.67s/it][A
Train Diffusion:  94%|█████████▍| 4716/5001 [3:32:01<12:33,  2.64s/it][A
Train Diffusion:  94%|█████████▍| 4717/5001 [3:32:04<12:25,  2.63s/it][A
Train Diffusion:  94%|█████████▍| 4718/5001 [3:32:07<12:21,  2.62s/it][A
Train Diffusion:  94%|█████████▍| 4719/5001 [3:32:09<12:16,  2.61s/it][A
Train Diffusion:  94%|█████████▍| 4720/5001 [3:32:12<12:11,  2.60s/it][A
Train Diffusion:  94%|█████████▍| 4721/5001 [3:32:14<12:09,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 337782054.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7621, 0.5143, 1.2963],
        [8.7637, 0.5123, 1.2935],
        [8.7872, 0.4891, 1.2755]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.7185,  0.9589,  0.8421],
         [ 0.6560,  0.7045,  1.9535],
         [27.4889,  0.2385,  1.1995],
         ...,
         [14.8006,  0.8149,  0.4378],
         [ 2.8538,  0.6945,  2.7714],
         [ 1.5152,  1.2001,  1.3786]],

        [[ 1.5986,  0.8168,  4.5272],
         [ 1.0146,  0.6932,  1.0894],
         [ 0.6395,  0.7275,  1.3046],
         ...,
         [ 3.8710,  0.1250,  1.0920],
         [29.9433,  0.1839,  1.0947],
         [ 4.1354,  1.2947,  0.6818]],

        [[ 0.5867,  0.9524,  0.8700],
         [12.3184,  0.6611,  0.9022],
         [ 1.9810,  0.4176,  0.7364],
         ...,
         [ 0.2359,  0.0541,  1.8345],
         [ 0.4965,  0.4031,  0.7180],
         [25.7614,  1.3759,  1.9158


Train Diffusion:  94%|█████████▍| 4722/5001 [3:32:17<12:06,  2.60s/it][A
Train Diffusion:  94%|█████████▍| 4723/5001 [3:32:20<12:03,  2.60s/it][A
Train Diffusion:  94%|█████████▍| 4724/5001 [3:32:22<12:00,  2.60s/it][A
Train Diffusion:  94%|█████████▍| 4725/5001 [3:32:25<11:57,  2.60s/it][A
Train Diffusion:  95%|█████████▍| 4726/5001 [3:32:27<11:53,  2.60s/it][A
Train Diffusion:  95%|█████████▍| 4727/5001 [3:32:30<11:51,  2.60s/it][A
Train Diffusion:  95%|█████████▍| 4728/5001 [3:32:33<11:49,  2.60s/it][A
Train Diffusion:  95%|█████████▍| 4729/5001 [3:32:35<11:46,  2.60s/it][A
Train Diffusion:  95%|█████████▍| 4730/5001 [3:32:38<11:43,  2.59s/it][A
Train Diffusion:  95%|█████████▍| 4731/5001 [3:32:40<11:41,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 335566979.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6378, 0.5154, 1.3112],
        [8.8893, 0.5061, 1.3115],
        [8.6102, 0.5139, 1.3293]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.5456,  0.8196,  1.1932],
         [ 1.7725,  0.3405,  0.7379],
         [ 2.4394,  0.6839,  1.1332],
         ...,
         [ 3.6498,  0.1061,  0.9659],
         [ 0.7770,  0.3885,  1.6790],
         [ 9.0324,  2.1233,  2.8359]],

        [[ 0.8399,  0.9604,  1.2364],
         [12.0075,  0.3252,  1.2840],
         [ 2.9633,  0.3682,  0.8013],
         ...,
         [ 1.2068,  0.6297,  1.4631],
         [ 0.9031,  0.3338,  0.8084],
         [ 1.3056,  1.5127,  2.4300]],

        [[ 0.5187,  0.9504,  0.9278],
         [ 0.3956,  0.9449,  0.9111],
         [16.8994,  0.3750,  1.0336],
         ...,
         [23.6470,  1.3568,  1.0034],
         [ 2.6992,  0.2212,  0.0804],
         [ 5.1452,  0.2167,  0.7640


Train Diffusion:  95%|█████████▍| 4732/5001 [3:32:43<11:38,  2.60s/it][A
Train Diffusion:  95%|█████████▍| 4733/5001 [3:32:46<11:36,  2.60s/it][A
Train Diffusion:  95%|█████████▍| 4734/5001 [3:32:48<11:34,  2.60s/it][A
Train Diffusion:  95%|█████████▍| 4735/5001 [3:32:51<11:31,  2.60s/it][A
Train Diffusion:  95%|█████████▍| 4736/5001 [3:32:53<11:30,  2.61s/it][A
Train Diffusion:  95%|█████████▍| 4737/5001 [3:32:56<11:26,  2.60s/it][A
Train Diffusion:  95%|█████████▍| 4738/5001 [3:32:59<11:23,  2.60s/it][A
Train Diffusion:  95%|█████████▍| 4739/5001 [3:33:01<11:21,  2.60s/it][A
Train Diffusion:  95%|█████████▍| 4740/5001 [3:33:04<11:20,  2.61s/it][A
Train Diffusion:  95%|█████████▍| 4741/5001 [3:33:06<11:16,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 335804646.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8449, 0.4918, 1.3080],
        [8.7187, 0.4866, 1.3117],
        [8.7017, 0.5042, 1.2932]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.7118e-01, 9.5160e-01, 8.0970e-01],
         [1.1688e+01, 7.8524e-01, 7.5269e-01],
         [1.7947e+00, 4.4303e-01, 7.5705e-01],
         ...,
         [2.5858e+00, 2.8255e-01, 7.0178e-01],
         [2.1863e+01, 9.1720e-02, 1.2872e+00],
         [4.8548e+00, 4.7112e-01, 9.8827e-01]],

        [[7.4406e-01, 9.5995e-01, 8.2566e-01],
         [6.6566e-01, 7.0973e-01, 1.8832e+00],
         [2.8358e+01, 1.6179e-01, 1.1997e+00],
         ...,
         [1.7694e+01, 2.6497e-02, 1.5473e+00],
         [6.2032e+00, 6.5020e-02, 1.7318e-01],
         [2.2325e+01, 3.0421e+00, 4.2052e+00]],

        [[1.5878e+00, 8.1527e-01, 4.8280e+00],
         [1.0544e+00, 6.1552e-01, 1.0828e+00],
         [6.6376e-01, 6.9472e-01, 1.2


Train Diffusion:  95%|█████████▍| 4742/5001 [3:33:09<11:13,  2.60s/it][A
Train Diffusion:  95%|█████████▍| 4743/5001 [3:33:12<11:10,  2.60s/it][A
Train Diffusion:  95%|█████████▍| 4744/5001 [3:33:14<11:06,  2.59s/it][A
Train Diffusion:  95%|█████████▍| 4745/5001 [3:33:17<11:03,  2.59s/it][A
Train Diffusion:  95%|█████████▍| 4746/5001 [3:33:19<11:02,  2.60s/it][A
Train Diffusion:  95%|█████████▍| 4747/5001 [3:33:22<10:59,  2.60s/it][A
Train Diffusion:  95%|█████████▍| 4748/5001 [3:33:25<10:57,  2.60s/it][A
Train Diffusion:  95%|█████████▍| 4749/5001 [3:33:27<10:53,  2.59s/it][A
Train Diffusion:  95%|█████████▍| 4750/5001 [3:33:30<10:51,  2.60s/it][A
Train Diffusion:  95%|█████████▌| 4751/5001 [3:33:32<10:50,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 334273488.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6411, 0.5049, 1.3067],
        [8.7342, 0.5153, 1.3070],
        [8.5972, 0.4973, 1.3268]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[6.2982e-01, 9.5177e-01, 9.1918e-01],
         [1.2794e+01, 5.8092e-01, 1.0243e+00],
         [2.1715e+00, 4.0563e-01, 7.2259e-01],
         ...,
         [1.3116e+01, 2.0254e-02, 1.0647e+00],
         [3.4864e+00, 1.4454e-01, 8.0564e-01],
         [5.5834e-01, 1.6657e+00, 6.1153e+00]],

        [[6.6676e-01, 9.5341e-01, 8.7442e-01],
         [6.0954e-01, 7.4189e-01, 1.9315e+00],
         [2.7247e+01, 2.3671e-01, 1.2011e+00],
         ...,
         [1.2587e+01, 6.2858e-01, 8.7752e+00],
         [4.5430e+00, 2.5565e-02, 1.1298e+00],
         [5.9835e+00, 3.3580e-01, 1.1067e+00]],

        [[1.6037e+00, 8.0922e-01, 3.7571e+00],
         [1.1534e+00, 6.4136e-01, 9.6440e-01],
         [6.4972e-01, 7.2815e-01, 1.3


Train Diffusion:  95%|█████████▌| 4752/5001 [3:33:35<10:54,  2.63s/it][A
Train Diffusion:  95%|█████████▌| 4753/5001 [3:33:38<10:48,  2.62s/it][A
Train Diffusion:  95%|█████████▌| 4754/5001 [3:33:40<10:44,  2.61s/it][A
Train Diffusion:  95%|█████████▌| 4755/5001 [3:33:43<10:40,  2.60s/it][A
Train Diffusion:  95%|█████████▌| 4756/5001 [3:33:45<10:37,  2.60s/it][A
Train Diffusion:  95%|█████████▌| 4757/5001 [3:33:48<10:34,  2.60s/it][A
Train Diffusion:  95%|█████████▌| 4758/5001 [3:33:51<10:39,  2.63s/it][A
Train Diffusion:  95%|█████████▌| 4759/5001 [3:33:53<10:34,  2.62s/it][A
Train Diffusion:  95%|█████████▌| 4760/5001 [3:33:56<10:31,  2.62s/it][A
Train Diffusion:  95%|█████████▌| 4761/5001 [3:33:58<10:26,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 327581862.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6037, 0.5121, 1.3093],
        [8.7921, 0.5167, 1.3026],
        [8.8794, 0.4739, 1.3057]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.5758,  0.9528,  0.7363],
         [11.1212,  0.9677,  1.1371],
         [ 1.4915,  0.5265,  0.8396],
         ...,
         [ 5.3444,  0.5407,  1.2225],
         [ 1.3391,  0.4454,  0.8155],
         [24.7513,  1.3144,  2.0358]],

        [[ 0.7368,  0.9606,  0.8310],
         [ 0.6572,  0.7024,  1.7770],
         [26.5600,  0.2650,  1.1981],
         ...,
         [ 0.8679,  0.0947,  1.1897],
         [ 4.6460,  0.1210,  0.7153],
         [ 0.6082,  1.8146,  0.9092]],

        [[ 1.5907,  0.8152,  5.1937],
         [ 1.0614,  0.5735,  1.1276],
         [ 0.6766,  0.8012,  0.9822],
         ...,
         [ 0.2118,  0.5177,  1.0326],
         [42.4864,  0.0989,  1.2933],
         [ 5.8943,  0.3934,  1.0096


Train Diffusion:  95%|█████████▌| 4762/5001 [3:34:01<10:38,  2.67s/it][A
Train Diffusion:  95%|█████████▌| 4763/5001 [3:34:04<10:32,  2.66s/it][A
Train Diffusion:  95%|█████████▌| 4764/5001 [3:34:07<10:25,  2.64s/it][A
Train Diffusion:  95%|█████████▌| 4765/5001 [3:34:09<10:19,  2.63s/it][A
Train Diffusion:  95%|█████████▌| 4766/5001 [3:34:12<10:14,  2.62s/it][A
Train Diffusion:  95%|█████████▌| 4767/5001 [3:34:14<10:12,  2.62s/it][A
Train Diffusion:  95%|█████████▌| 4768/5001 [3:34:17<10:08,  2.61s/it][A
Train Diffusion:  95%|█████████▌| 4769/5001 [3:34:20<10:03,  2.60s/it][A
Train Diffusion:  95%|█████████▌| 4770/5001 [3:34:22<10:07,  2.63s/it][A
Train Diffusion:  95%|█████████▌| 4771/5001 [3:34:25<10:03,  2.62s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 347716739.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7015, 0.5088, 1.2918],
        [8.8407, 0.4990, 1.2782],
        [8.8214, 0.5028, 1.2893]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[7.5406e-01, 9.5976e-01, 1.1148e+00],
         [1.2806e+01, 4.1359e-01, 1.2466e+00],
         [2.5828e+00, 4.0751e-01, 6.6955e-01],
         ...,
         [4.1311e+00, 1.0667e-01, 6.8328e-01],
         [3.2466e-01, 2.8111e+00, 1.0405e+00],
         [3.7598e-03, 1.6921e+00, 3.2156e+00]],

        [[1.5837e+00, 8.0659e-01, 1.6279e+00],
         [1.5815e+00, 4.1477e-01, 7.6429e-01],
         [6.7970e-01, 7.7285e-01, 9.7591e-01],
         ...,
         [1.2565e-01, 8.6490e+00, 1.6334e+00],
         [2.2385e+01, 6.1425e-01, 9.0423e-01],
         [5.6114e+00, 1.6143e-01, 2.2438e+00]],

        [[5.6439e-01, 9.5054e-01, 9.3866e-01],
         [5.6247e-01, 8.5012e-01, 1.5938e+00],
         [2.7395e+01, 8.3934e-02, 1.1


Train Diffusion:  95%|█████████▌| 4772/5001 [3:34:27<09:58,  2.61s/it][A
Train Diffusion:  95%|█████████▌| 4773/5001 [3:34:30<09:54,  2.61s/it][A
Train Diffusion:  95%|█████████▌| 4774/5001 [3:34:33<09:52,  2.61s/it][A
Train Diffusion:  95%|█████████▌| 4775/5001 [3:34:35<09:48,  2.61s/it][A
Train Diffusion:  96%|█████████▌| 4776/5001 [3:34:38<09:48,  2.62s/it][A
Train Diffusion:  96%|█████████▌| 4777/5001 [3:34:40<09:44,  2.61s/it][A
Train Diffusion:  96%|█████████▌| 4778/5001 [3:34:43<09:42,  2.61s/it][A
Train Diffusion:  96%|█████████▌| 4779/5001 [3:34:46<09:38,  2.61s/it][A
Train Diffusion:  96%|█████████▌| 4780/5001 [3:34:48<09:36,  2.61s/it][A
Train Diffusion:  96%|█████████▌| 4781/5001 [3:34:51<09:33,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 319906601.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.9681, 0.4888, 1.3534],
        [8.3977, 0.5207, 1.2803],
        [8.7104, 0.4998, 1.2995]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[7.6290e-01, 9.5974e-01, 1.1566e+00],
         [1.3348e+01, 3.5588e-01, 1.2735e+00],
         [2.8246e+00, 2.9228e-01, 1.0219e+00],
         ...,
         [2.5306e+01, 2.8793e-01, 5.2057e-01],
         [2.9939e+00, 2.4178e-01, 1.5053e+01],
         [2.1350e-01, 3.1696e+00, 2.8990e+00]],

        [[5.5803e-01, 9.5001e-01, 9.3529e-01],
         [5.2242e-01, 7.7951e-01, 1.6035e+00],
         [2.3747e+01, 7.7053e-02, 1.1683e+00],
         ...,
         [3.6135e+00, 1.3843e-01, 1.0830e+00],
         [1.0291e-06, 3.5927e-01, 3.3929e+00],
         [1.1705e-01, 1.4129e+00, 9.7072e+00]],

        [[1.5824e+00, 8.0687e-01, 8.1970e-01],
         [1.6911e+00, 3.8533e-01, 6.7162e-01],
         [4.7606e-01, 8.7561e-01, 8.3


Train Diffusion:  96%|█████████▌| 4782/5001 [3:34:53<09:32,  2.61s/it][A
Train Diffusion:  96%|█████████▌| 4783/5001 [3:34:56<09:28,  2.61s/it][A
Train Diffusion:  96%|█████████▌| 4784/5001 [3:34:59<09:23,  2.60s/it][A
Train Diffusion:  96%|█████████▌| 4785/5001 [3:35:01<09:21,  2.60s/it][A
Train Diffusion:  96%|█████████▌| 4786/5001 [3:35:04<09:19,  2.60s/it][A
Train Diffusion:  96%|█████████▌| 4787/5001 [3:35:06<09:16,  2.60s/it][A
Train Diffusion:  96%|█████████▌| 4788/5001 [3:35:09<09:12,  2.59s/it][A
Train Diffusion:  96%|█████████▌| 4789/5001 [3:35:12<09:09,  2.59s/it][A
Train Diffusion:  96%|█████████▌| 4790/5001 [3:35:14<09:07,  2.60s/it][A
Train Diffusion:  96%|█████████▌| 4791/5001 [3:35:17<09:04,  2.59s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 338303072.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7302, 0.5254, 1.3049],
        [8.8342, 0.4869, 1.2901],
        [8.8664, 0.4832, 1.2606]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.5258,  0.8292,  0.7659],
         [ 1.8112,  0.3322,  1.2117],
         [25.1692,  0.2736,  2.4906],
         ...,
         [ 1.0140,  0.4130,  0.7023],
         [ 1.4527, 10.8315,  0.9608],
         [11.1084,  2.6190,  1.9937]],

        [[ 0.5040,  0.9532,  0.9404],
         [ 0.7103,  0.8485,  0.6177],
         [ 0.2769,  1.1139,  2.4313],
         ...,
         [36.4614,  0.0668,  1.1882],
         [ 5.9805,  0.0821,  0.7250],
         [ 0.4335,  1.7594,  2.3366]],

        [[ 0.8771,  0.9612,  1.3115],
         [11.0083,  0.4492,  1.2837],
         [ 2.4882,  0.5837,  0.4223],
         ...,
         [ 3.0234,  0.2109,  0.4730],
         [25.2608,  0.7604,  1.5231],
         [ 3.5956,  0.8045,  1.2776


Train Diffusion:  96%|█████████▌| 4792/5001 [3:35:19<09:02,  2.59s/it][A
Train Diffusion:  96%|█████████▌| 4793/5001 [3:35:22<08:59,  2.59s/it][A
Train Diffusion:  96%|█████████▌| 4794/5001 [3:35:25<08:56,  2.59s/it][A
Train Diffusion:  96%|█████████▌| 4795/5001 [3:35:27<08:55,  2.60s/it][A
Train Diffusion:  96%|█████████▌| 4796/5001 [3:35:30<08:51,  2.59s/it][A
Train Diffusion:  96%|█████████▌| 4797/5001 [3:35:32<08:50,  2.60s/it][A
Train Diffusion:  96%|█████████▌| 4798/5001 [3:35:35<08:47,  2.60s/it][A
Train Diffusion:  96%|█████████▌| 4799/5001 [3:35:38<08:54,  2.64s/it][A
Train Diffusion:  96%|█████████▌| 4800/5001 [3:35:40<08:51,  2.65s/it][A
Train Diffusion:  96%|█████████▌| 4801/5001 [3:35:43<08:46,  2.63s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 328991324.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6326, 0.5049, 1.3018],
        [8.8833, 0.4910, 1.3142],
        [8.7836, 0.4881, 1.2774]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.5873,  0.9509,  0.8661],
         [12.3386,  0.6707,  0.8641],
         [ 1.9817,  0.4201,  0.7201],
         ...,
         [ 1.9476,  0.2613,  0.4540],
         [ 3.8093,  0.1231,  0.8664],
         [27.3539,  1.0883,  2.0135]],

        [[ 1.5975,  0.8161,  4.5537],
         [ 1.0195,  0.6883,  1.1075],
         [ 0.6248,  0.7191,  1.3421],
         ...,
         [13.9052,  0.3839,  1.2137],
         [ 1.9006,  0.4334,  0.7174],
         [ 0.9806,  1.4132,  0.8651]],

        [[ 0.7178,  0.9574,  0.8411],
         [ 0.6541,  0.7048,  1.9625],
         [27.4452,  0.2386,  1.1997],
         ...,
         [ 0.4737,  0.4974,  1.0479],
         [37.0064,  0.1242,  1.2166],
         [ 5.3270,  0.4473,  1.1250


Train Diffusion:  96%|█████████▌| 4802/5001 [3:35:46<08:42,  2.63s/it][A
Train Diffusion:  96%|█████████▌| 4803/5001 [3:35:48<08:37,  2.62s/it][A
Train Diffusion:  96%|█████████▌| 4804/5001 [3:35:51<08:41,  2.65s/it][A
Train Diffusion:  96%|█████████▌| 4805/5001 [3:35:54<08:42,  2.67s/it][A
Train Diffusion:  96%|█████████▌| 4806/5001 [3:35:56<08:37,  2.65s/it][A
Train Diffusion:  96%|█████████▌| 4807/5001 [3:35:59<08:31,  2.64s/it][A
Train Diffusion:  96%|█████████▌| 4808/5001 [3:36:02<08:28,  2.63s/it][A
Train Diffusion:  96%|█████████▌| 4809/5001 [3:36:04<08:40,  2.71s/it][A
Train Diffusion:  96%|█████████▌| 4810/5001 [3:36:07<08:38,  2.71s/it][A
Train Diffusion:  96%|█████████▌| 4811/5001 [3:36:10<08:29,  2.68s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 311637219.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6743, 0.4798, 1.2977],
        [8.7322, 0.4856, 1.3384],
        [8.6864, 0.4857, 1.3079]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.2410e-01, 9.5232e-01, 9.2586e-01],
         [7.0094e-01, 8.0423e-01, 6.1842e-01],
         [9.6450e-01, 8.8521e-01, 2.0647e+00],
         ...,
         [3.5326e+01, 8.8966e-02, 1.1947e+00],
         [5.8545e+00, 3.4668e-02, 3.2834e+00],
         [1.2288e+00, 1.2510e+00, 1.2116e+00]],

        [[1.5503e+00, 8.1826e-01, 7.3050e-01],
         [1.8155e+00, 3.4069e-01, 9.4746e-01],
         [1.8828e+01, 3.5033e-01, 1.3834e+00],
         ...,
         [2.6748e+00, 2.6997e-01, 9.6851e-01],
         [1.5106e-06, 5.0622e-01, 3.0797e+00],
         [1.3986e-01, 1.5276e+00, 6.9385e+00]],

        [[8.2989e-01, 9.6219e-01, 1.2162e+00],
         [1.1622e+01, 3.5060e-01, 1.2831e+00],
         [2.7876e+00, 4.0363e-01, 8.1


Train Diffusion:  96%|█████████▌| 4812/5001 [3:36:12<08:21,  2.66s/it][A
Train Diffusion:  96%|█████████▌| 4813/5001 [3:36:15<08:15,  2.63s/it][A
Train Diffusion:  96%|█████████▋| 4814/5001 [3:36:18<08:11,  2.63s/it][A
Train Diffusion:  96%|█████████▋| 4815/5001 [3:36:20<08:08,  2.62s/it][A
Train Diffusion:  96%|█████████▋| 4816/5001 [3:36:23<08:03,  2.62s/it][A
Train Diffusion:  96%|█████████▋| 4817/5001 [3:36:25<08:01,  2.61s/it][A
Train Diffusion:  96%|█████████▋| 4818/5001 [3:36:28<07:57,  2.61s/it][A
Train Diffusion:  96%|█████████▋| 4819/5001 [3:36:31<07:55,  2.61s/it][A
Train Diffusion:  96%|█████████▋| 4820/5001 [3:36:33<07:51,  2.60s/it][A
Train Diffusion:  96%|█████████▋| 4821/5001 [3:36:36<07:49,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 336275520.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6130, 0.4947, 1.3357],
        [8.6304, 0.5152, 1.3030],
        [8.6684, 0.5015, 1.3262]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.6031e+00, 8.0579e-01, 3.4220e+00],
         [1.2674e+00, 6.0464e-01, 8.8610e-01],
         [6.7982e-01, 7.9541e-01, 1.0230e+00],
         ...,
         [8.1792e+00, 3.9176e-02, 1.4415e+00],
         [5.8534e+00, 5.6358e-02, 7.8375e-01],
         [6.1575e-01, 2.2780e+00, 2.0658e+00]],

        [[6.6205e-01, 9.5379e-01, 9.3803e-01],
         [1.2608e+01, 5.5694e-01, 1.1185e+00],
         [2.1650e+00, 3.9817e-01, 7.9407e-01],
         ...,
         [3.0397e+00, 1.6099e-01, 1.1019e+00],
         [1.0599e-03, 4.0692e-01, 2.3527e+00],
         [1.7435e+01, 1.8531e+00, 1.8257e+00]],

        [[6.3526e-01, 9.5258e-01, 9.0148e-01],
         [5.8383e-01, 7.8633e-01, 1.8346e+00],
         [2.6714e+01, 1.8909e-01, 1.2


Train Diffusion:  96%|█████████▋| 4822/5001 [3:36:38<07:46,  2.61s/it][A
Train Diffusion:  96%|█████████▋| 4823/5001 [3:36:41<07:43,  2.60s/it][A
Train Diffusion:  96%|█████████▋| 4824/5001 [3:36:44<07:40,  2.60s/it][A
Train Diffusion:  96%|█████████▋| 4825/5001 [3:36:46<07:37,  2.60s/it][A
Train Diffusion:  97%|█████████▋| 4826/5001 [3:36:49<07:35,  2.60s/it][A
Train Diffusion:  97%|█████████▋| 4827/5001 [3:36:51<07:33,  2.61s/it][A
Train Diffusion:  97%|█████████▋| 4828/5001 [3:36:54<07:30,  2.60s/it][A
Train Diffusion:  97%|█████████▋| 4829/5001 [3:36:57<07:27,  2.60s/it][A
Train Diffusion:  97%|█████████▋| 4830/5001 [3:36:59<07:26,  2.61s/it][A
Train Diffusion:  97%|█████████▋| 4831/5001 [3:37:02<07:23,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 336945532.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6180, 0.4800, 1.3103],
        [8.9450, 0.5066, 1.3040],
        [8.5641, 0.5075, 1.2537]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.5584,  0.8259,  4.5329],
         [ 0.7631,  1.0145,  1.2294],
         [ 0.6510,  0.8761, 18.6787],
         ...,
         [ 0.8878,  0.0817,  0.7645],
         [ 4.1326,  0.1050,  0.5173],
         [ 8.7454,  1.9634,  5.5815]],

        [[ 0.5327,  0.9513,  0.9650],
         [12.7386,  0.5096,  1.5336],
         [ 0.8498,  7.0812,  3.5369],
         ...,
         [ 0.3242,  0.4852,  1.0547],
         [42.5009,  0.0641,  1.3610],
         [ 6.5557,  0.2124,  0.7608]],

        [[ 0.8117,  0.9612,  0.7970],
         [ 0.7863,  0.6071,  0.6733],
         [ 0.3542,  0.7485,  0.9440],
         ...,
         [10.3258,  0.3636,  1.6215],
         [ 1.2054,  0.5607,  0.7737],
         [ 4.0804,  1.5572,  2.3457


Train Diffusion:  97%|█████████▋| 4832/5001 [3:37:04<07:21,  2.61s/it][A
Train Diffusion:  97%|█████████▋| 4833/5001 [3:37:07<07:18,  2.61s/it][A
Train Diffusion:  97%|█████████▋| 4834/5001 [3:37:10<07:15,  2.61s/it][A
Train Diffusion:  97%|█████████▋| 4835/5001 [3:37:12<07:11,  2.60s/it][A
Train Diffusion:  97%|█████████▋| 4836/5001 [3:37:15<07:08,  2.60s/it][A
Train Diffusion:  97%|█████████▋| 4837/5001 [3:37:17<07:06,  2.60s/it][A
Train Diffusion:  97%|█████████▋| 4838/5001 [3:37:20<07:03,  2.60s/it][A
Train Diffusion:  97%|█████████▋| 4839/5001 [3:37:23<07:01,  2.60s/it][A
Train Diffusion:  97%|█████████▋| 4840/5001 [3:37:25<06:58,  2.60s/it][A
Train Diffusion:  97%|█████████▋| 4841/5001 [3:37:28<06:56,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 327220729.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8849, 0.4971, 1.3036],
        [8.8594, 0.4847, 1.3116],
        [8.5962, 0.4753, 1.2629]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.1856e-01, 9.5341e-01, 9.2988e-01],
         [7.0215e-01, 8.2084e-01, 6.7086e-01],
         [3.1020e+00, 7.1679e-01, 1.2064e+00],
         ...,
         [1.2530e+01, 8.1369e-02, 1.2514e+00],
         [5.4993e+00, 6.7412e-02, 2.1678e+00],
         [1.7810e+01, 1.8518e+00, 2.0483e+00]],

        [[8.3892e-01, 9.6349e-01, 1.2364e+00],
         [1.1538e+01, 3.4029e-01, 1.2851e+00],
         [2.8307e+00, 4.0183e-01, 7.5582e-01],
         ...,
         [4.8095e-01, 1.1311e+01, 6.3120e-01],
         [5.8261e-03, 1.0160e+00, 2.2673e+00],
         [2.4439e-01, 1.7462e+00, 1.1035e+00]],

        [[1.5498e+00, 8.2011e-01, 7.5200e-01],
         [1.8261e+00, 3.3612e-01, 9.6221e-01],
         [1.4822e+01, 4.0510e-01, 1.2


Train Diffusion:  97%|█████████▋| 4842/5001 [3:37:30<06:53,  2.60s/it][A
Train Diffusion:  97%|█████████▋| 4843/5001 [3:37:33<06:51,  2.60s/it][A
Train Diffusion:  97%|█████████▋| 4844/5001 [3:37:36<06:48,  2.60s/it][A
Train Diffusion:  97%|█████████▋| 4845/5001 [3:37:38<06:45,  2.60s/it][A
Train Diffusion:  97%|█████████▋| 4846/5001 [3:37:41<06:48,  2.63s/it][A
Train Diffusion:  97%|█████████▋| 4847/5001 [3:37:44<06:45,  2.63s/it][A
Train Diffusion:  97%|█████████▋| 4848/5001 [3:37:46<06:42,  2.63s/it][A
Train Diffusion:  97%|█████████▋| 4849/5001 [3:37:49<06:37,  2.62s/it][A
Train Diffusion:  97%|█████████▋| 4850/5001 [3:37:51<06:38,  2.64s/it][A
Train Diffusion:  97%|█████████▋| 4851/5001 [3:37:54<06:36,  2.65s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 326693798.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6070, 0.4990, 1.3117],
        [8.6444, 0.4800, 1.2800],
        [8.8426, 0.4888, 1.3225]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.7953,  0.9619,  0.7995],
         [ 0.7377,  0.6402,  1.8734],
         [28.1873,  0.1948,  1.1979],
         ...,
         [24.2394,  0.0455,  1.1909],
         [ 5.1735,  0.0875,  0.8026],
         [27.6335,  1.1055,  1.9886]],

        [[ 0.5404,  0.9519,  0.7903],
         [11.3662,  0.8305,  0.7737],
         [ 1.6780,  0.4455,  0.8062],
         ...,
         [ 3.7715,  0.5813,  1.4997],
         [ 3.4695,  0.6965,  1.3492],
         [ 3.9262,  0.5099,  1.2629]],

        [[ 1.5686,  0.8234,  5.4782],
         [ 0.8365,  0.7297,  1.3282],
         [ 0.6449,  0.7043,  1.2416],
         ...,
         [ 4.5948,  0.1019,  0.9487],
         [12.5654,  0.2900,  0.9534],
         [ 2.4503,  1.4043,  0.7180


Train Diffusion:  97%|█████████▋| 4852/5001 [3:37:57<06:32,  2.64s/it][A
Train Diffusion:  97%|█████████▋| 4853/5001 [3:37:59<06:28,  2.62s/it][A
Train Diffusion:  97%|█████████▋| 4854/5001 [3:38:02<06:25,  2.62s/it][A
Train Diffusion:  97%|█████████▋| 4855/5001 [3:38:05<06:21,  2.61s/it][A
Train Diffusion:  97%|█████████▋| 4856/5001 [3:38:07<06:33,  2.71s/it][A
Train Diffusion:  97%|█████████▋| 4857/5001 [3:38:10<06:28,  2.70s/it][A
Train Diffusion:  97%|█████████▋| 4858/5001 [3:38:13<06:22,  2.67s/it][A
Train Diffusion:  97%|█████████▋| 4859/5001 [3:38:15<06:16,  2.65s/it][A
Train Diffusion:  97%|█████████▋| 4860/5001 [3:38:18<06:11,  2.63s/it][A
Train Diffusion:  97%|█████████▋| 4861/5001 [3:38:21<06:07,  2.63s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 313467936.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7151, 0.5009, 1.3317],
        [8.8264, 0.4892, 1.3023],
        [8.4775, 0.5011, 1.3124]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.3294,  0.8713,  0.6276],
         [ 1.5352,  0.3862,  0.3026],
         [ 0.3951,  1.9130,  0.2626],
         ...,
         [ 0.5901,  8.2854,  0.5737],
         [ 4.2818,  1.5961,  0.5546],
         [ 1.9100,  0.8982,  0.9452]],

        [[ 1.1325,  0.9169,  2.2943],
         [ 2.8206,  0.8907,  1.3243],
         [ 2.0062, 17.5917,  0.6948],
         ...,
         [26.5724,  0.0628,  1.2792],
         [ 5.6748,  0.0697,  0.8464],
         [27.6240,  1.0737,  2.0031]],

        [[ 0.4478,  0.9530,  0.9851],
         [ 6.2940,  0.5897,  1.1659],
         [ 1.4724,  0.5760,  1.3082],
         ...,
         [ 4.4942,  0.1193,  0.9046],
         [25.7809,  0.1156,  1.1236],
         [ 4.3172,  0.8166,  1.2445


Train Diffusion:  97%|█████████▋| 4862/5001 [3:38:23<06:04,  2.62s/it][A
Train Diffusion:  97%|█████████▋| 4863/5001 [3:38:26<06:01,  2.62s/it][A
Train Diffusion:  97%|█████████▋| 4864/5001 [3:38:28<05:58,  2.62s/it][A
Train Diffusion:  97%|█████████▋| 4865/5001 [3:38:31<05:55,  2.62s/it][A
Train Diffusion:  97%|█████████▋| 4866/5001 [3:38:34<05:52,  2.61s/it][A
Train Diffusion:  97%|█████████▋| 4867/5001 [3:38:36<05:49,  2.61s/it][A
Train Diffusion:  97%|█████████▋| 4868/5001 [3:38:39<05:46,  2.61s/it][A
Train Diffusion:  97%|█████████▋| 4869/5001 [3:38:41<05:44,  2.61s/it][A
Train Diffusion:  97%|█████████▋| 4870/5001 [3:38:44<05:41,  2.61s/it][A
Train Diffusion:  97%|█████████▋| 4871/5001 [3:38:47<05:39,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 325581417.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6128, 0.4873, 1.2909],
        [8.7938, 0.5010, 1.3106],
        [8.8214, 0.4865, 1.2770]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.6579,  0.9542,  0.8828],
         [ 0.6015,  0.7545,  1.8680],
         [26.3514,  0.2593,  1.2020],
         ...,
         [ 0.4493,  0.6613,  0.4103],
         [28.4785,  0.7265,  0.5458],
         [ 4.4882,  0.4057,  1.1377]],

        [[ 0.6378,  0.9533,  0.9293],
         [12.7106,  0.5688,  1.1190],
         [ 2.1353,  0.4106,  0.8090],
         ...,
         [ 3.9526,  0.1641,  0.7533],
         [ 0.5791,  0.3094,  1.1020],
         [ 0.6197,  1.8752,  0.8131]],

        [[ 1.6055,  0.8089,  3.6348],
         [ 1.1903,  0.6526,  0.9235],
         [ 0.6708,  0.8215,  1.0301],
         ...,
         [29.8521,  0.0573,  1.3027],
         [ 5.8589,  0.0881,  0.8607],
         [26.4924,  1.1548,  2.0330


Train Diffusion:  97%|█████████▋| 4872/5001 [3:38:49<05:36,  2.61s/it][A
Train Diffusion:  97%|█████████▋| 4873/5001 [3:38:52<05:34,  2.62s/it][A
Train Diffusion:  97%|█████████▋| 4874/5001 [3:38:55<05:31,  2.61s/it][A
Train Diffusion:  97%|█████████▋| 4875/5001 [3:38:57<05:28,  2.61s/it][A
Train Diffusion:  98%|█████████▊| 4876/5001 [3:39:00<05:25,  2.60s/it][A
Train Diffusion:  98%|█████████▊| 4877/5001 [3:39:02<05:23,  2.61s/it][A
Train Diffusion:  98%|█████████▊| 4878/5001 [3:39:05<05:20,  2.61s/it][A
Train Diffusion:  98%|█████████▊| 4879/5001 [3:39:08<05:18,  2.61s/it][A
Train Diffusion:  98%|█████████▊| 4880/5001 [3:39:10<05:15,  2.61s/it][A
Train Diffusion:  98%|█████████▊| 4881/5001 [3:39:13<05:13,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 325630598.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.8832, 0.4776, 1.2626],
        [8.7197, 0.5063, 1.2711],
        [8.8154, 0.4847, 1.3583]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.1716e+00, 9.1245e-01, 2.8763e+00],
         [3.7228e+00, 8.1375e-01, 1.3490e+00],
         [1.0221e+00, 4.6898e+00, 1.3928e+00],
         ...,
         [3.4317e+00, 2.1611e-01, 7.6227e-01],
         [3.5012e-01, 4.3642e+00, 9.2208e-01],
         [2.3545e-02, 1.9045e+00, 2.4801e+00]],

        [[1.2978e+00, 8.8596e-01, 6.8236e-01],
         [1.4551e+00, 4.1132e-01, 2.7616e-01],
         [1.4443e+01, 1.4324e+00, 2.8046e-01],
         ...,
         [3.6369e-01, 6.8466e-01, 4.7400e-01],
         [2.9261e+01, 6.2534e-01, 1.1345e+00],
         [5.5534e+00, 1.7848e-01, 1.7769e+00]],

        [[4.4457e-01, 9.5700e-01, 9.9131e-01],
         [3.8762e+00, 6.3017e-01, 1.2087e+00],
         [1.1878e+00, 7.9138e-01, 1.5


Train Diffusion:  98%|█████████▊| 4882/5001 [3:39:15<05:10,  2.61s/it][A
Train Diffusion:  98%|█████████▊| 4883/5001 [3:39:18<05:07,  2.61s/it][A
Train Diffusion:  98%|█████████▊| 4884/5001 [3:39:21<05:05,  2.61s/it][A
Train Diffusion:  98%|█████████▊| 4885/5001 [3:39:23<05:02,  2.61s/it][A
Train Diffusion:  98%|█████████▊| 4886/5001 [3:39:26<05:00,  2.61s/it][A
Train Diffusion:  98%|█████████▊| 4887/5001 [3:39:28<04:57,  2.61s/it][A
Train Diffusion:  98%|█████████▊| 4888/5001 [3:39:31<04:54,  2.60s/it][A
Train Diffusion:  98%|█████████▊| 4889/5001 [3:39:34<04:52,  2.61s/it][A
Train Diffusion:  98%|█████████▊| 4890/5001 [3:39:36<04:49,  2.61s/it][A
Train Diffusion:  98%|█████████▊| 4891/5001 [3:39:39<04:46,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 331979046.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.5608, 0.5157, 1.2802],
        [8.8252, 0.5046, 1.2991],
        [8.7220, 0.5045, 1.3120]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.9609,  0.9494,  0.7947],
         [ 0.9721,  0.5265,  1.8095],
         [26.4252,  0.2334,  1.1920],
         ...,
         [16.7675,  0.2509,  1.1923],
         [ 2.9938,  0.2785,  0.6873],
         [ 8.2662,  1.9385,  1.5502]],

        [[ 0.4772,  0.9514,  1.0035],
         [11.6129,  0.4873,  1.1888],
         [ 1.8960,  0.5259,  0.8980],
         ...,
         [ 0.3198,  0.5171,  1.1292],
         [42.9817,  0.0816,  1.3467],
         [ 6.5327,  0.2121,  1.0566]],

        [[ 1.4704,  0.8383,  4.7369],
         [ 0.7925,  1.1177,  1.2759],
         [ 0.7339,  0.8952,  0.8238],
         ...,
         [ 1.3469,  0.3584,  0.3540],
         [ 2.9968,  0.1656,  0.3070],
         [10.4036,  2.5927,  2.3727


Train Diffusion:  98%|█████████▊| 4892/5001 [3:39:41<04:43,  2.60s/it][A
Train Diffusion:  98%|█████████▊| 4893/5001 [3:39:44<04:44,  2.63s/it][A
Train Diffusion:  98%|█████████▊| 4894/5001 [3:39:47<04:43,  2.65s/it][A
Train Diffusion:  98%|█████████▊| 4895/5001 [3:39:49<04:39,  2.64s/it][A
Train Diffusion:  98%|█████████▊| 4896/5001 [3:39:52<04:38,  2.65s/it][A
Train Diffusion:  98%|█████████▊| 4897/5001 [3:39:55<04:34,  2.64s/it][A
Train Diffusion:  98%|█████████▊| 4898/5001 [3:39:57<04:31,  2.63s/it][A
Train Diffusion:  98%|█████████▊| 4899/5001 [3:40:00<04:27,  2.62s/it][A
Train Diffusion:  98%|█████████▊| 4900/5001 [3:40:03<04:24,  2.62s/it][A
Train Diffusion:  98%|█████████▊| 4901/5001 [3:40:05<04:21,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 321103724.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[9.0055, 0.4625, 1.2827],
        [8.7334, 0.4913, 1.2759],
        [8.6871, 0.4923, 1.2939]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.6082,  0.8154,  3.5888],
         [ 1.0699,  0.6725,  0.9394],
         [ 0.6918,  0.7332,  1.2800],
         ...,
         [ 0.7512,  0.5681,  0.7353],
         [ 0.4533,  2.2703,  0.6538],
         [ 0.9053,  1.5696,  2.5279]],

        [[ 0.6394,  0.9559,  0.9508],
         [12.7547,  0.5339,  1.0937],
         [ 2.1851,  0.4096,  0.7329],
         ...,
         [ 3.4270,  0.1771,  0.8283],
         [ 0.8032,  0.6939,  1.3790],
         [ 1.3631,  1.7498,  6.4548]],

        [[ 0.6548,  0.9566,  0.8835],
         [ 0.6112,  0.7199,  1.9557],
         [27.3396,  0.2043,  1.2038],
         ...,
         [35.9589,  0.1025,  1.1651],
         [ 5.3875,  0.0379,  0.0423],
         [ 3.7819,  0.2658,  0.8055


Train Diffusion:  98%|█████████▊| 4902/5001 [3:40:08<04:22,  2.65s/it][A
Train Diffusion:  98%|█████████▊| 4903/5001 [3:40:11<04:23,  2.69s/it][A
Train Diffusion:  98%|█████████▊| 4904/5001 [3:40:13<04:18,  2.66s/it][A
Train Diffusion:  98%|█████████▊| 4905/5001 [3:40:16<04:13,  2.64s/it][A
Train Diffusion:  98%|█████████▊| 4906/5001 [3:40:18<04:09,  2.63s/it][A
Train Diffusion:  98%|█████████▊| 4907/5001 [3:40:21<04:06,  2.62s/it][A
Train Diffusion:  98%|█████████▊| 4908/5001 [3:40:24<04:03,  2.62s/it][A
Train Diffusion:  98%|█████████▊| 4909/5001 [3:40:26<03:59,  2.61s/it][A
Train Diffusion:  98%|█████████▊| 4910/5001 [3:40:29<03:57,  2.61s/it][A
Train Diffusion:  98%|█████████▊| 4911/5001 [3:40:31<03:54,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 323328614.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6263, 0.4922, 1.2766],
        [8.7367, 0.5028, 1.3164],
        [8.9541, 0.4946, 1.3249]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.9919e-01, 9.5264e-01, 7.3749e-01],
         [9.8207e+00, 1.0666e+00, 1.4496e+00],
         [1.1525e+00, 8.2189e-01, 1.1410e+00],
         ...,
         [1.6551e+00, 1.0400e-01, 1.1538e+00],
         [5.3563e+00, 3.4991e-02, 2.6184e+00],
         [2.2048e+00, 7.7114e-01, 1.0376e+00]],

        [[1.5187e+00, 8.3047e-01, 6.0021e+00],
         [8.2311e-01, 6.6829e-01, 1.3354e+00],
         [8.6596e-01, 7.7164e-01, 4.5092e-01],
         ...,
         [1.0044e-01, 9.6533e-01, 1.9552e+00],
         [2.2048e-01, 1.0947e+00, 8.5955e+00],
         [2.4575e-01, 2.8219e+00, 2.7901e+00]],

        [[8.8887e-01, 9.5986e-01, 7.8492e-01],
         [8.4570e-01, 5.6770e-01, 1.6384e+00],
         [2.7871e+01, 1.4309e-01, 1.1


Train Diffusion:  98%|█████████▊| 4912/5001 [3:40:34<03:52,  2.61s/it][A
Train Diffusion:  98%|█████████▊| 4913/5001 [3:40:37<03:49,  2.61s/it][A
Train Diffusion:  98%|█████████▊| 4914/5001 [3:40:39<03:46,  2.61s/it][A
Train Diffusion:  98%|█████████▊| 4915/5001 [3:40:42<03:46,  2.64s/it][A
Train Diffusion:  98%|█████████▊| 4916/5001 [3:40:45<03:43,  2.63s/it][A
Train Diffusion:  98%|█████████▊| 4917/5001 [3:40:47<03:40,  2.62s/it][A
Train Diffusion:  98%|█████████▊| 4918/5001 [3:40:50<03:37,  2.63s/it][A
Train Diffusion:  98%|█████████▊| 4919/5001 [3:40:53<03:37,  2.66s/it][A
Train Diffusion:  98%|█████████▊| 4920/5001 [3:40:55<03:34,  2.65s/it][A
Train Diffusion:  98%|█████████▊| 4921/5001 [3:40:58<03:31,  2.64s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 326833641.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6712, 0.5133, 1.2857],
        [8.8873, 0.5012, 1.3298],
        [8.7991, 0.5059, 1.3103]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.6045e+00, 8.1024e-01, 4.2730e+00],
         [1.1547e+00, 6.2133e-01, 9.9819e-01],
         [6.5496e-01, 7.7166e-01, 1.1805e+00],
         ...,
         [4.4020e-01, 9.4553e-01, 7.9244e-01],
         [3.1022e-04, 5.6583e-01, 2.7958e+00],
         [2.0255e-01, 1.7817e+00, 1.2147e+00]],

        [[6.8269e-01, 9.5649e-01, 8.6672e-01],
         [6.1967e-01, 7.5112e-01, 1.8913e+00],
         [2.4909e+01, 2.9824e-01, 1.2191e+00],
         ...,
         [3.2443e+01, 6.3627e-02, 1.2645e+00],
         [5.8558e+00, 2.7621e-02, 6.3389e+00],
         [2.0516e+00, 1.0470e+00, 1.3732e+00]],

        [[6.1545e-01, 9.5364e-01, 8.5956e-01],
         [1.2180e+01, 6.8048e-01, 9.6594e-01],
         [1.9752e+00, 4.0040e-01, 7.5


Train Diffusion:  98%|█████████▊| 4922/5001 [3:41:00<03:27,  2.63s/it][A
Train Diffusion:  98%|█████████▊| 4923/5001 [3:41:03<03:25,  2.63s/it][A
Train Diffusion:  98%|█████████▊| 4924/5001 [3:41:06<03:22,  2.63s/it][A
Train Diffusion:  98%|█████████▊| 4925/5001 [3:41:08<03:19,  2.62s/it][A
Train Diffusion:  99%|█████████▊| 4926/5001 [3:41:11<03:16,  2.62s/it][A
Train Diffusion:  99%|█████████▊| 4927/5001 [3:41:14<03:13,  2.61s/it][A
Train Diffusion:  99%|█████████▊| 4928/5001 [3:41:16<03:10,  2.61s/it][A
Train Diffusion:  99%|█████████▊| 4929/5001 [3:41:19<03:07,  2.60s/it][A
Train Diffusion:  99%|█████████▊| 4930/5001 [3:41:21<03:05,  2.61s/it][A
Train Diffusion:  99%|█████████▊| 4931/5001 [3:41:24<03:02,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 329386313.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7108, 0.4964, 1.2864],
        [8.6970, 0.5007, 1.3012],
        [8.9166, 0.4904, 1.3220]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[6.4506e-01, 9.5486e-01, 8.9550e-01],
         [5.9162e-01, 7.7429e-01, 1.8599e+00],
         [2.7660e+01, 2.1218e-01, 1.1999e+00],
         ...,
         [4.0852e-01, 1.7080e+00, 6.2872e-01],
         [2.4483e-06, 6.6095e-01, 2.6856e+00],
         [1.5659e-01, 1.3528e+00, 1.2636e+01]],

        [[1.6082e+00, 8.0816e-01, 3.6807e+00],
         [1.2258e+00, 5.6307e-01, 8.8516e-01],
         [7.1679e-01, 7.1841e-01, 1.3059e+00],
         ...,
         [3.9348e+00, 1.6442e-01, 6.8534e-01],
         [3.0612e+00, 2.2256e+00, 6.3749e+00],
         [8.7186e-01, 3.8312e+00, 1.8739e+00]],

        [[6.5009e-01, 9.5509e-01, 9.1715e-01],
         [1.2202e+01, 6.0533e-01, 1.0331e+00],
         [2.0547e+00, 4.3177e-01, 7.2


Train Diffusion:  99%|█████████▊| 4932/5001 [3:41:27<02:59,  2.60s/it][A
Train Diffusion:  99%|█████████▊| 4933/5001 [3:41:29<02:56,  2.60s/it][A
Train Diffusion:  99%|█████████▊| 4934/5001 [3:41:32<02:54,  2.61s/it][A
Train Diffusion:  99%|█████████▊| 4935/5001 [3:41:34<02:51,  2.60s/it][A
Train Diffusion:  99%|█████████▊| 4936/5001 [3:41:37<02:49,  2.60s/it][A
Train Diffusion:  99%|█████████▊| 4937/5001 [3:41:40<02:46,  2.61s/it][A
Train Diffusion:  99%|█████████▊| 4938/5001 [3:41:42<02:43,  2.60s/it][A
Train Diffusion:  99%|█████████▉| 4939/5001 [3:41:45<02:53,  2.80s/it][A
Train Diffusion:  99%|█████████▉| 4940/5001 [3:41:48<02:49,  2.78s/it][A
Train Diffusion:  99%|█████████▉| 4941/5001 [3:41:51<02:43,  2.73s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 315698352.0. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7885, 0.4855, 1.3252],
        [8.7020, 0.4946, 1.2663],
        [8.9126, 0.4787, 1.2818]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.6656e-01, 9.5277e-01, 9.5081e-01],
         [5.5643e-01, 7.8229e-01, 1.5594e+00],
         [2.2765e+01, 2.4187e-01, 1.0756e+00],
         ...,
         [2.1985e-01, 6.3738e-01, 1.6702e+00],
         [5.2405e-01, 6.7668e-01, 6.4882e+00],
         [2.3838e-02, 2.3691e+00, 3.2412e+00]],

        [[7.4841e-01, 9.6156e-01, 1.1460e+00],
         [1.3431e+01, 3.5345e-01, 1.2707e+00],
         [2.7746e+00, 3.1224e-01, 1.0629e+00],
         ...,
         [2.9179e+01, 6.3125e-01, 9.4347e-01],
         [2.6097e+00, 1.1384e+00, 8.3453e-01],
         [3.6595e-01, 1.3537e+00, 7.8987e+00]],

        [[1.5893e+00, 8.0907e-01, 1.2399e+00],
         [1.5364e+00, 4.3229e-01, 6.8970e-01],
         [3.5187e-01, 1.1262e+00, 6.8


Train Diffusion:  99%|█████████▉| 4942/5001 [3:41:54<02:41,  2.74s/it][A
Train Diffusion:  99%|█████████▉| 4943/5001 [3:41:56<02:37,  2.72s/it][A
Train Diffusion:  99%|█████████▉| 4944/5001 [3:41:59<02:33,  2.69s/it][A
Train Diffusion:  99%|█████████▉| 4945/5001 [3:42:01<02:28,  2.66s/it][A
Train Diffusion:  99%|█████████▉| 4946/5001 [3:42:04<02:25,  2.65s/it][A
Train Diffusion:  99%|█████████▉| 4947/5001 [3:42:07<02:22,  2.64s/it][A
Train Diffusion:  99%|█████████▉| 4948/5001 [3:42:09<02:19,  2.63s/it][A
Train Diffusion:  99%|█████████▉| 4949/5001 [3:42:13<02:37,  3.02s/it][A
Train Diffusion:  99%|█████████▉| 4950/5001 [3:42:16<02:28,  2.90s/it][A
Train Diffusion:  99%|█████████▉| 4951/5001 [3:42:18<02:20,  2.81s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 324846070.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6341, 0.4941, 1.3256],
        [8.8292, 0.4913, 1.3142],
        [8.8980, 0.5028, 1.3010]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[8.2458e-01, 9.6431e-01, 1.2117e+00],
         [1.1691e+01, 3.4106e-01, 1.2817e+00],
         [2.8130e+00, 3.2409e-01, 9.6669e-01],
         ...,
         [3.4312e+00, 1.1392e-01, 9.4981e-01],
         [1.0597e-03, 4.3087e-01, 2.0867e+00],
         [1.7652e+01, 1.4105e+00, 1.9507e+00]],

        [[1.5585e+00, 8.1625e-01, 6.8519e-01],
         [1.8179e+00, 3.4566e-01, 6.9468e-01],
         [1.1113e-02, 7.5421e-01, 1.2791e+00],
         ...,
         [2.4217e+01, 6.6206e-02, 1.3662e+00],
         [5.8769e+00, 1.6493e-02, 8.1385e-02],
         [3.4148e+00, 3.4393e-01, 8.9520e-01]],

        [[5.2476e-01, 9.5448e-01, 9.2775e-01],
         [6.9449e-01, 7.8948e-01, 1.4094e+00],
         [1.7730e+01, 6.3620e-02, 1.0


Train Diffusion:  99%|█████████▉| 4952/5001 [3:42:21<02:14,  2.74s/it][A
Train Diffusion:  99%|█████████▉| 4953/5001 [3:42:24<02:09,  2.70s/it][A
Train Diffusion:  99%|█████████▉| 4954/5001 [3:42:26<02:05,  2.66s/it][A
Train Diffusion:  99%|█████████▉| 4955/5001 [3:42:29<02:01,  2.64s/it][A
Train Diffusion:  99%|█████████▉| 4956/5001 [3:42:31<01:58,  2.63s/it][A
Train Diffusion:  99%|█████████▉| 4957/5001 [3:42:34<01:55,  2.62s/it][A
Train Diffusion:  99%|█████████▉| 4958/5001 [3:42:37<01:52,  2.61s/it][A
Train Diffusion:  99%|█████████▉| 4959/5001 [3:42:39<01:49,  2.61s/it][A
Train Diffusion:  99%|█████████▉| 4960/5001 [3:42:42<01:46,  2.61s/it][A
Train Diffusion:  99%|█████████▉| 4961/5001 [3:42:44<01:44,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 334206707.2. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7857, 0.5171, 1.3453],
        [8.7540, 0.4764, 1.3178],
        [8.7550, 0.4971, 1.2833]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[7.9488e-01, 9.6243e-01, 8.0026e-01],
         [7.3576e-01, 6.4456e-01, 1.7764e+00],
         [2.6800e+01, 2.6158e-01, 1.1928e+00],
         ...,
         [4.5044e-01, 4.8281e-01, 1.1120e+00],
         [1.0912e-06, 3.5671e-01, 3.3354e+00],
         [1.5137e-01, 1.1094e+00, 1.1863e+01]],

        [[1.5706e+00, 8.2462e-01, 5.4455e+00],
         [8.4640e-01, 7.4066e-01, 1.2261e+00],
         [7.2189e-01, 8.2337e-01, 9.5485e-01],
         ...,
         [1.2873e+00, 3.4499e-01, 7.9496e-01],
         [2.8653e+00, 1.1676e-01, 7.0356e+00],
         [2.6587e-01, 2.7262e+00, 1.6522e+00]],

        [[5.4023e-01, 9.5263e-01, 7.9194e-01],
         [1.1023e+01, 8.3697e-01, 1.1716e+00],
         [1.5082e+00, 5.5698e-01, 8.7


Train Diffusion:  99%|█████████▉| 4962/5001 [3:42:47<01:41,  2.60s/it][A
Train Diffusion:  99%|█████████▉| 4963/5001 [3:42:50<01:38,  2.59s/it][A
Train Diffusion:  99%|█████████▉| 4964/5001 [3:42:52<01:36,  2.60s/it][A
Train Diffusion:  99%|█████████▉| 4965/5001 [3:42:55<01:33,  2.59s/it][A
Train Diffusion:  99%|█████████▉| 4966/5001 [3:42:57<01:31,  2.60s/it][A
Train Diffusion:  99%|█████████▉| 4967/5001 [3:43:00<01:28,  2.59s/it][A
Train Diffusion:  99%|█████████▉| 4968/5001 [3:43:02<01:25,  2.60s/it][A
Train Diffusion:  99%|█████████▉| 4969/5001 [3:43:05<01:22,  2.59s/it][A
Train Diffusion:  99%|█████████▉| 4970/5001 [3:43:08<01:20,  2.60s/it][A
Train Diffusion:  99%|█████████▉| 4971/5001 [3:43:10<01:17,  2.60s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 332833657.6. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6965, 0.4973, 1.2842],
        [8.8616, 0.4872, 1.2787],
        [8.7696, 0.4911, 1.3058]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[6.7684e-01, 9.5618e-01, 9.7498e-01],
         [1.2292e+01, 5.2977e-01, 1.1189e+00],
         [2.1578e+00, 4.4697e-01, 6.9221e-01],
         ...,
         [2.2322e+01, 3.0652e-02, 1.4827e+00],
         [6.1650e+00, 5.6474e-02, 7.4711e-01],
         [5.7663e-01, 2.3802e+00, 5.4609e-01]],

        [[6.2108e-01, 9.5377e-01, 9.1418e-01],
         [5.7558e-01, 7.8376e-01, 1.8272e+00],
         [2.8256e+01, 1.2962e-01, 1.2006e+00],
         ...,
         [3.7117e-01, 7.6740e-01, 4.5366e-01],
         [1.8644e+01, 6.1685e-01, 3.4441e+00],
         [4.0616e+00, 4.6029e-01, 1.4774e+00]],

        [[1.6045e+00, 8.0764e-01, 3.1124e+00],
         [1.2732e+00, 5.5809e-01, 8.2196e-01],
         [7.3785e-01, 6.9088e-01, 1.2


Train Diffusion:  99%|█████████▉| 4972/5001 [3:43:13<01:15,  2.59s/it][A
Train Diffusion:  99%|█████████▉| 4973/5001 [3:43:15<01:12,  2.59s/it][A
Train Diffusion:  99%|█████████▉| 4974/5001 [3:43:18<01:09,  2.59s/it][A
Train Diffusion:  99%|█████████▉| 4975/5001 [3:43:21<01:07,  2.59s/it][A
Train Diffusion: 100%|█████████▉| 4976/5001 [3:43:23<01:04,  2.59s/it][A
Train Diffusion: 100%|█████████▉| 4977/5001 [3:43:26<01:02,  2.59s/it][A
Train Diffusion: 100%|█████████▉| 4978/5001 [3:43:28<00:59,  2.59s/it][A
Train Diffusion: 100%|█████████▉| 4979/5001 [3:43:31<00:57,  2.59s/it][A
Train Diffusion: 100%|█████████▉| 4980/5001 [3:43:34<00:54,  2.59s/it][A
Train Diffusion: 100%|█████████▉| 4981/5001 [3:43:36<00:51,  2.59s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 329716166.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6465, 0.5071, 1.2959],
        [9.0460, 0.4912, 1.3114],
        [8.6997, 0.5070, 1.3022]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[6.9666e-01, 9.5825e-01, 8.5641e-01],
         [6.3221e-01, 7.3692e-01, 1.8727e+00],
         [2.7227e+01, 2.1945e-01, 1.2028e+00],
         ...,
         [3.2503e+01, 1.6473e-01, 1.0937e+00],
         [4.4649e+00, 2.4227e-01, 8.3479e-01],
         [8.2812e-01, 1.5161e+00, 1.6230e+00]],

        [[1.6030e+00, 8.1272e-01, 4.4611e+00],
         [1.1155e+00, 6.2089e-01, 1.0043e+00],
         [6.7803e-01, 7.6031e-01, 1.1510e+00],
         ...,
         [5.5966e-01, 7.0892e-01, 9.8665e-01],
         [3.2254e-01, 1.5099e+01, 5.0082e+00],
         [3.1594e-01, 4.3524e+00, 1.9831e+00]],

        [[6.0371e-01, 9.5396e-01, 8.4569e-01],
         [1.1904e+01, 7.1007e-01, 9.4913e-01],
         [1.8879e+00, 4.2518e-01, 7.6


Train Diffusion: 100%|█████████▉| 4982/5001 [3:43:39<00:49,  2.59s/it][A
Train Diffusion: 100%|█████████▉| 4983/5001 [3:43:41<00:46,  2.59s/it][A
Train Diffusion: 100%|█████████▉| 4984/5001 [3:43:44<00:43,  2.59s/it][A
Train Diffusion: 100%|█████████▉| 4985/5001 [3:43:47<00:41,  2.59s/it][A
Train Diffusion: 100%|█████████▉| 4986/5001 [3:43:49<00:39,  2.64s/it][A
Train Diffusion: 100%|█████████▉| 4987/5001 [3:43:52<00:37,  2.64s/it][A
Train Diffusion: 100%|█████████▉| 4988/5001 [3:43:55<00:34,  2.64s/it][A
Train Diffusion: 100%|█████████▉| 4989/5001 [3:43:57<00:31,  2.63s/it][A
Train Diffusion: 100%|█████████▉| 4990/5001 [3:44:00<00:28,  2.62s/it][A
Train Diffusion: 100%|█████████▉| 4991/5001 [3:44:02<00:26,  2.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 324193414.4. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.7647, 0.4818, 1.2621],
        [8.6985, 0.5128, 1.2785],
        [8.7969, 0.4749, 1.3027]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.1987,  0.9052,  0.8421],
         [ 1.3448,  0.4271,  1.4641],
         [19.6975,  0.3536,  1.2068],
         ...,
         [ 6.7130,  0.4560,  1.9802],
         [ 1.1190,  0.5215,  0.8123],
         [25.0939,  1.2531,  2.0288]],

        [[ 1.2689,  0.8917,  3.3664],
         [ 1.0271,  1.0307,  1.3662],
         [ 0.7086,  1.0512,  0.5864],
         ...,
         [ 1.2154,  0.2137,  0.5004],
         [ 3.8915,  0.1100,  0.7722],
         [ 0.6003,  1.8829,  0.8345]],

        [[ 0.4437,  0.9539,  1.0193],
         [11.0355,  0.5043,  1.5724],
         [ 1.8704,  0.4087,  0.6368],
         ...,
         [ 0.2473,  0.5465,  1.0783],
         [41.5746,  0.0933,  1.3086],
         [ 5.9820,  0.3418,  1.0661


Train Diffusion: 100%|█████████▉| 4992/5001 [3:44:05<00:23,  2.61s/it][A
Train Diffusion: 100%|█████████▉| 4993/5001 [3:44:08<00:20,  2.60s/it][A
Train Diffusion: 100%|█████████▉| 4994/5001 [3:44:10<00:18,  2.60s/it][A
Train Diffusion: 100%|█████████▉| 4995/5001 [3:44:13<00:15,  2.59s/it][A
Train Diffusion: 100%|█████████▉| 4996/5001 [3:44:15<00:12,  2.59s/it][A
Train Diffusion: 100%|█████████▉| 4997/5001 [3:44:18<00:10,  2.64s/it][A
Train Diffusion: 100%|█████████▉| 4998/5001 [3:44:21<00:07,  2.62s/it][A
Train Diffusion: 100%|█████████▉| 4999/5001 [3:44:23<00:05,  2.61s/it][A
Train Diffusion: 100%|█████████▉| 5000/5001 [3:44:26<00:02,  2.61s/it][A
Train Diffusion: 100%|██████████| 5001/5001 [3:44:28<00:00,  2.69s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 320832684.8. Best ELBO loss value is: 210565504.0.

C_PATH mean = tensor([[8.6869, 0.4912, 1.2754],
        [8.7156, 0.4874, 1.2909],
        [8.9660, 0.4817, 1.3114]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.5203,  0.9535,  0.9292],
         [ 0.3857,  0.9490,  1.0134],
         [20.7611,  0.3062,  1.0621],
         ...,
         [32.6309,  0.1232,  1.1992],
         [ 5.0736,  0.1318,  0.8055],
         [ 0.6420,  1.7724,  1.7481]],

        [[ 1.5472,  0.8197,  1.1879],
         [ 1.7845,  0.3387,  0.7105],
         [ 0.7498,  0.8070,  1.2796],
         ...,
         [ 0.3572,  0.6154,  1.0028],
         [38.5484,  0.0912,  1.3430],
         [ 5.7985,  0.3770,  1.2967]],

        [[ 0.8376,  0.9635,  1.2318],
         [12.0549,  0.3310,  1.2820],
         [ 2.9431,  0.3657,  0.8839],
         ...,
         [ 3.0455,  0.2460,  0.8575],
         [ 0.5711,  0.5401,  0.7778],
         [24.4659,  1.2421,  1.9994


