In [2]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from tqdm import tqdm
import math

#Torch-related imports
import torch
import torch.distributions as D
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Function

#Model-specific imports
from SBM_SDE import *
from obs_and_flow import *
from training import calc_log_lik

In [3]:
torch.manual_seed(0)
np.random.seed(0)

In [4]:
temp_ref = 283
temp_rise = 5 #High estimate of 5 celsius temperature rise by 2100. 

#System parameters from deterministic CON model
u_M = 0.002
a_SD = 0.33
a_DS = 0.33
a_M = 0.33
a_MSC = 0.5
k_S_ref = 0.000025
k_D_ref = 0.005
k_M_ref = 0.0002
Ea_S = 75
Ea_D = 50
Ea_M = 50

#SCON diffusion matrix parameters
c_SOC = 1.0
c_DOC = 0.001
c_MBC = 0.01
s_SOC = 0.001
s_DOC = 0.001
s_MBC = 0.001

SCON_C_params_dict = {'u_M': u_M, 'a_SD': a_SD, 'a_DS': a_DS, 'a_M': a_M, 'a_MSC': a_MSC, 'k_S_ref': k_S_ref, 'k_D_ref': k_D_ref, 'k_M_ref': k_M_ref, 'Ea_S': Ea_S, 'Ea_D': Ea_D, 'Ea_M': Ea_M, 'c_SOC': c_SOC, 'c_DOC': c_DOC, 'c_MBC': c_MBC}
SCON_SS_params_dict = {'u_M': u_M, 'a_SD': a_SD, 'a_DS': a_DS, 'a_M': a_M, 'a_MSC': a_MSC, 'k_S_ref': k_S_ref, 'k_D_ref': k_D_ref, 'k_M_ref': k_M_ref, 'Ea_S': Ea_S, 'Ea_D': Ea_D, 'Ea_M': Ea_M, 's_SOC': s_SOC, 's_DOC': s_DOC, 's_MBC': s_MBC}

#System parameters from deterministic AWB model
#u_Q_ref = 0.2
#Q = 0.002
#a_MSA = 0.5
#K_D = 200
#K_U = 1
#V_D_ref = 0.4
#V_U_ref = 0.02
#Ea_V_D = 75
#Ea_V_U = 50
#r_M = 0.0004
#r_E = 0.00001
#r_L = 0.0005

#SAWB diffusion matrix parameters
#c_SOC = 2
#c_DOC = 0.05
#c_MBC = 0.1
#c_EEC = 0.01
#s_SOC = 0.1
#s_DOC = 0.1
#s_MBC = 0.1
#s_EEC = 0.1

#SAWB_C_params_dict = {'u_Q_ref': u_Q_ref, 'Q': Q, 'a_MSA': a_MSA, 'K_D': K_D, 'K_U': K_U, 'V_D_ref': V_D_ref, 'V_U_ref': V_U_ref, 'Ea_V_D': Ea_V_D, 'Ea_V_U': Ea_V_U, 'r_M': r_M, 'r_E': r_E, 'r_L': r_L, 'c_SOC': c_SOC, 'c_DOC': c_DOC, 'c_MBC': c_MBC, 'c_EEC': c_EEC}
#SAWB_SS_params_dict = {'u_Q_ref': u_Q_ref, 'Q': Q, 'a_MSA': a_MSA, 'K_D': K_D, 'K_U': K_U, 'V_D_ref': V_D_ref, 'V_U_ref': V_U_ref, 'Ea_V_D': Ea_V_D, 'Ea_V_U': Ea_V_U, 'r_M': r_M, 'r_E': r_E, 'r_L': r_L, 's_SOC': s_SOC, 's_DOC': s_DOC, 's_MBC': s_MBC, 's_EEC': s_EEC}

#System parameters from deterministic AWB-ECA model
#u_Q_ref = 0.2
#Q = 0.002
#a_MSA = 0.5
#K_DE = 200
#K_UE = 1
#V_DE_ref = 0.4
#V_UE_ref = 0.02
#Ea_V_DE = 75
#Ea_V_UE = 50
#r_M = 0.0004
#r_E = 0.00001
#r_L = 0.0005

#SAWB-ECA diffusion matrix parameters
#c_SOC = 2
#c_DOC = 0.05
#c_MBC = 0.1
#c_EEC = 0.01
#s_SOC = 0.1
#s_DOC = 0.1
#s_MBC = 0.1
#s_EEC = 0.1

#SAWB_ECA_C_params_dict = {'u_Q_ref': u_Q_ref, 'Q': Q, 'a_MSA': a_MSA, 'K_DE': K_DE, 'K_UE': K_UE, 'V_DE_ref': V_DE_ref, 'V_UE_ref': V_UE_ref, 'Ea_V_DE': Ea_V_DE, 'Ea_V_UE': Ea_V_UE, 'r_M': r_M, 'r_E': r_E, 'r_L': r_L, 'c_SOC': c_SOC, 'c_DOC': c_DOC, 'c_MBC': c_MBC, 'c_EEC': c_EEC}
#SAWB_ECA_SS_params_dict = {'u_Q_ref': u_Q_ref, 'Q': Q, 'a_MSA': a_MSA, 'K_DE': K_DE, 'K_UE': K_UE, 'V_DE_ref': V_DE_ref, 'V_UE_ref': V_UE_ref, 'Ea_V_DE': Ea_V_DE, 'Ea_V_UE': Ea_V_UE, 'r_M': r_M, 'r_E': r_E, 'r_L': r_L, 's_SOC': s_SOC, 's_DOC': s_DOC, 's_MBC': s_MBC, 's_EEC': s_EEC}

In [5]:
#Set flow NN parameters.

devi = torch.device("".join(["cuda:",f'{cuda_id}']) if torch.cuda.is_available() else "cpu")
dt_flow = 0.1
t = 500
n_flow = int(t / dt_flow) + 1
t_span = np.linspace(0, t, n_flow)
t_span_tensor = torch.reshape(torch.Tensor(t_span), [1, n_flow, 1]) #T_span needs to be converted to tensor object. Additionally, facilitates conversion of I_S and I_D to tensor objects.
l_r = 5e-4
niter = 5001
piter = 201
batch_size = 3 #Number of sets of observation outputs to sample per set of parameters.
state_dim_SCON = 3 #Not including CO2 in STATE_DIM, because CO2 is an observation.
obs_error_scale = 0.1 #Proportion of the mean of observation error standard deviation.

x0_SCON = [58, 0.08, 0.8] #Initial condition means for SCON

In [6]:
#Obtain temperature forcing function.
temp_tensor = temp_gen(t_span_tensor, temp_ref, temp_rise)
print(temp_tensor)

#Obtain SOC and DOC pool litter input vectors for use in flow SDE functions.
i_s_tensor = i_s(t_span_tensor) #Exogenous SOC input function
i_d_tensor = i_d(t_span_tensor) #Exogenous DOC input function
print(i_s_tensor)
print(i_d_tensor)

tensor([[[283.0000],
         [283.2625],
         [283.5248],
         ...,
         [277.6021],
         [277.7247],
         [277.8533]]])
tensor([[[0.0010],
         [0.0010],
         [0.0010],
         ...,
         [0.0012],
         [0.0012],
         [0.0012]]])
tensor([[[1.0000e-04],
         [1.0000e-04],
         [1.0001e-04],
         ...,
         [1.1754e-04],
         [1.1755e-04],
         [1.1755e-04]]])


In [7]:
def train(DEVICE, L_R, NITER, PRETRAIN_ITER, BATCH_SIZE, SDEFLOW, ObsModel, csv_to_obs_df, DATA_CSV, OBS_ERROR_SCALE, STATE_DIM, T, DT, N, T_SPAN_TENSOR, I_S_TENSOR, I_D_TENSOR, TEMP_TENSOR, TEMP_REF, C0, DRIFT_DIFFUSION, PARAMS_DICT): 
    #Read-in observation information. 
    obs_times, obs_means, obs_error = csv_to_obs_df(DATA_CSV, STATE_DIM, T, OBS_ERROR_SCALE)
    obs_means = LowerBound.apply(obs_means, 1e-6)
    #Pass observation information to `ObsModel`.
    obs_model = ObsModel(DEVICE, obs_times, DT, obs_means, obs_error)
    net = SDEFlow(DEVICE, obs_model, STATE_DIM, T, DT, N, I_S_TENSOR, I_D_TENSOR, cond_inputs = 3, num_layers = 6).to(DEVICE)
    optimizer = optim.Adam(net.parameters(), lr = L_R)
    if PRETRAIN_ITER >= NITER:
        raise Exception("PRETRAIN_ITER must be < NITER.")
    best_loss_norm = 1e15
    best_loss_ELBO = 1e15
    norm_losses = []
    ELBO_losses = []
    C0_tensor = torch.tensor(C0).to(DEVICE) #Convert initial conditions from list to tensor for X0 prior object.
    #C0 = C0[(None,) * 2].repeat(BATCH_SIZE, 1, 1).to(DEVICE)
    PARAMS_DICT_TENSOR = {k: torch.tensor(v).expand(BATCH_SIZE) for k, v in PARAMS_DICT.items()}
    X0_prior = D.normal.Normal(loc = C0_tensor, scale = OBS_ERROR_SCALE * C0_tensor) #Setting prior noise = observation noise for now.
    with tqdm(total = NITER, desc = f'Train Diffusion', position = -1) as tq:
        for i in range(NITER):
            net.train()
            optimizer.zero_grad()
            C_PATH, log_prob = net(BATCH_SIZE) #For obs_and_flow.py
            #C_PATH = torch.cat([C0, C_PATH], 1) #Learning initial conditions in this version. #Append deterministic CON initial conditions conditional on parameter values to C path.
            if i <= PRETRAIN_ITER:
                l1_norm_element = C_PATH - torch.mean(obs_model.mu, -1)
                l1_norm = torch.sum(torch.abs(l1_norm_element)).mean()
                best_loss_norm = l1_norm if l1_norm < best_loss_norm else best_loss_norm
                norm_losses.append(l1_norm.item())
                #l2_norm_element = C_PATH - torch.mean(obs_model.mu, -1)
                #l2_norm = torch.sqrt(torch.sum(torch.square(l2_norm_element))).mean()
                #best_loss_norm = l2_norm if l2_norm < best_loss_norm else best_loss_norm
                #norm_losses.append(l2_norm.item())
                if i % 10 == 0:
                    ma_norm_loss = sum(norm_losses[-10:]) / len(norm_losses[-10:])
                    print(f"\nMoving average norm loss at {iter} iterations is: {ma_norm_loss}. Best norm loss value is: {best_loss_norm}.")
                    print('\nC_PATH mean =', C_PATH.mean(-2))
                    print('\nC_PATH =', C_PATH)
                l1_norm.backward()
                #l2_norm.backward()
            else:
                log_lik = calc_log_lik(C_PATH, T_SPAN_TENSOR.to(DEVICE), DT, I_S_TENSOR.to(DEVICE), I_D_TENSOR.to(DEVICE), TEMP_TENSOR.to(DEVICE), TEMP_REF, DRIFT_DIFFUSION, PARAMS_DICT)
                neg_ELBO = -X0_prior.log_prob(C_PATH[:, 0, :]).sum(-1).mean() - log_lik.mean() - obs_model(C_PATH, PARAMS_DICT_TENSOR) + log_prob.mean()
                best_loss_ELBO = neg_ELBO if neg_ELBO < best_loss_ELBO else best_loss_ELBO
                ELBO_losses.append(neg_ELBO.item())
                if i % 10 == 0:             
                    ma_elbo_loss = sum(ELBO_losses[-10:]) / len(ELBO_losses[-10:])
                    print(f"\nMoving average ELBO loss at {iter} iterations is: {ma_elbo_loss}. Best ELBO loss value is: {best_loss_ELBO}.")
                    print('\nC_PATH mean =', C_PATH.mean(-2))
                    print('\nC_PATH =', C_PATH)
                neg_ELBO.backward()
            torch.nn.utils.clip_grad_norm_(net.parameters(), 3.0)
            optimizer.step()
            if i % 100000 == 0 and i > 0:
                optimizer.param_groups[0]['lr'] *= 0.1
            tq.update()
    return net, ELBO_losses, norm_losses

In [8]:
net, ELBO_losses, norm_losses = train(devi, l_r, niter, piter, batch_size, SDEFlow, ObsModel, csv_to_obs_df, 'y_from_x_t_1000_dt_0-01.csv', obs_error_scale, state_dim_SCON, t, dt_flow, n_flow, t_span_tensor, i_s_tensor, i_d_tensor, temp_tensor, temp_ref, x0_SCON, drift_diffusion_SCON_C, SCON_C_params_dict)


Train Diffusion:   0%|          | 0/5001 [00:00<?, ?it/s][A


Moving average norm loss at <built-in function iter> iterations is: 349720.375. Best norm loss value is: 349720.375.

C_PATH mean = tensor([[0.8584, 0.8820, 0.8543],
        [0.8721, 0.8674, 0.8597],
        [0.8598, 0.8649, 0.8632]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.0218, 0.6919, 0.8867],
         [1.8316, 0.8502, 0.2385],
         [0.1398, 1.1947, 2.1356],
         ...,
         [0.6374, 0.6841, 0.5368],
         [0.6461, 0.5776, 0.6641],
         [0.5732, 0.6179, 0.5520]],

        [[0.4191, 2.0637, 0.9564],
         [0.4066, 2.0826, 2.2696],
         [0.3793, 0.3986, 1.0027],
         ...,
         [0.7034, 0.9312, 0.5633],
         [0.7227, 1.4726, 0.7203],
         [0.6977, 0.8745, 0.6807]],

        [[0.7853, 0.7400, 0.9096],
         [0.6049, 0.3987, 1.9713],
         [2.9533, 1.6398, 0.8749],
         ...,
         [0.7032, 0.6070, 0.7062],
         [1.2515, 0.5188, 0.7244],
         [1.0447, 0.5262, 0.6705]]], grad_fn=<AddBackward0>)



Train Diffusion:   0%|          | 1/5001 [00:08<11:13:29,  8.08s/it][A
Train Diffusion:   0%|          | 2/5001 [00:15<10:50:18,  7.81s/it][A
Train Diffusion:   0%|          | 3/5001 [00:23<10:43:47,  7.73s/it][A
Train Diffusion:   0%|          | 4/5001 [00:30<10:15:20,  7.39s/it][A
Train Diffusion:   0%|          | 5/5001 [00:38<10:30:56,  7.58s/it][A
Train Diffusion:   0%|          | 6/5001 [00:46<10:48:23,  7.79s/it][A
Train Diffusion:   0%|          | 7/5001 [00:54<11:09:18,  8.04s/it][A
Train Diffusion:   0%|          | 8/5001 [01:03<11:26:04,  8.24s/it][A
Train Diffusion:   0%|          | 9/5001 [01:11<11:19:38,  8.17s/it][A
Train Diffusion:   0%|          | 10/5001 [01:19<11:17:51,  8.15s/it][A


Moving average norm loss at <built-in function iter> iterations is: 346174.596875. Best norm loss value is: 343571.9375.

C_PATH mean = tensor([[1.0583, 0.9074, 1.0489],
        [1.0603, 0.9121, 1.0451],
        [1.0624, 0.9084, 1.0512]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[0.7126, 0.9596, 1.3856],
         [1.2284, 0.8711, 1.3277],
         [1.2485, 1.1998, 1.6346],
         ...,
         [0.9405, 0.7277, 1.1347],
         [1.3223, 1.1839, 1.0994],
         [0.9174, 0.8423, 0.9132]],

        [[0.8070, 1.2108, 1.1495],
         [1.3639, 1.2003, 0.8775],
         [1.2844, 1.2492, 1.0914],
         ...,
         [1.0877, 1.1869, 1.0181],
         [1.4925, 0.8095, 0.7377],
         [1.3692, 0.9612, 1.2545]],

        [[0.8563, 1.3298, 1.1038],
         [1.1160, 1.2523, 1.4398],
         [0.8860, 1.7698, 1.0377],
         ...,
         [1.2002, 0.8105, 1.0333],
         [0.3872, 0.1227, 1.0480],
         [1.2891, 1.2712, 0.8279]]], grad_fn=<AddBackward0>)



Train Diffusion:   0%|          | 11/5001 [01:27<11:10:56,  8.07s/it][A
Train Diffusion:   0%|          | 12/5001 [01:35<10:59:02,  7.93s/it][A
Train Diffusion:   0%|          | 13/5001 [01:42<10:51:33,  7.84s/it][A
Train Diffusion:   0%|          | 14/5001 [01:51<11:01:18,  7.96s/it][A
Train Diffusion:   0%|          | 15/5001 [01:58<10:58:45,  7.93s/it][A
Train Diffusion:   0%|          | 16/5001 [02:06<11:01:33,  7.96s/it][A
Train Diffusion:   0%|          | 17/5001 [02:16<11:34:02,  8.36s/it][A
Train Diffusion:   0%|          | 18/5001 [02:24<11:28:14,  8.29s/it][A
Train Diffusion:   0%|          | 19/5001 [02:31<11:06:01,  8.02s/it][A
Train Diffusion:   0%|          | 20/5001 [02:39<10:47:43,  7.80s/it][A


Moving average norm loss at <built-in function iter> iterations is: 339102.703125. Best norm loss value is: 334613.46875.

C_PATH mean = tensor([[1.7489, 0.9907, 1.2481],
        [1.7229, 0.9916, 1.2606],
        [1.7083, 0.9823, 1.2529]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[0.6329, 1.3944, 1.5519],
         [1.3432, 1.4237, 1.3028],
         [2.4385, 1.4385, 1.7747],
         ...,
         [1.4981, 0.8883, 1.4220],
         [2.2050, 1.2393, 0.6600],
         [3.2636, 1.1050, 0.8536]],

        [[0.9366, 1.4215, 1.1591],
         [1.2589, 1.2855, 1.4717],
         [1.5211, 1.4290, 1.1466],
         ...,
         [1.4258, 1.1660, 1.0723],
         [1.4814, 1.3765, 1.1801],
         [1.8260, 1.3976, 1.1717]],

        [[0.9841, 1.4339, 1.3914],
         [1.9989, 1.6300, 1.0662],
         [0.6877, 0.1277, 1.3291],
         ...,
         [3.6144, 0.7993, 0.6756],
         [0.2021, 0.0757, 1.3011],
         [1.3821, 0.6736, 0.6880]]], grad_fn=<AddBackward0>)



Train Diffusion:   0%|          | 21/5001 [02:48<11:34:21,  8.37s/it][A
Train Diffusion:   0%|          | 22/5001 [02:58<12:17:14,  8.88s/it][A
Train Diffusion:   0%|          | 23/5001 [03:06<11:48:41,  8.54s/it][A
Train Diffusion:   0%|          | 24/5001 [03:15<11:58:08,  8.66s/it][A
Train Diffusion:   0%|          | 25/5001 [03:25<12:33:40,  9.09s/it][A
Train Diffusion:   1%|          | 26/5001 [03:34<12:35:13,  9.11s/it][A
Train Diffusion:   1%|          | 27/5001 [03:42<11:59:11,  8.68s/it][A
Train Diffusion:   1%|          | 28/5001 [03:49<11:28:57,  8.31s/it][A
Train Diffusion:   1%|          | 29/5001 [04:00<12:22:15,  8.96s/it][A
Train Diffusion:   1%|          | 30/5001 [04:09<12:25:40,  9.00s/it][A


Moving average norm loss at <built-in function iter> iterations is: 320710.490625. Best norm loss value is: 307362.28125.

C_PATH mean = tensor([[3.8943, 0.9879, 1.3487],
        [3.8846, 0.9898, 1.3613],
        [4.0028, 0.9501, 1.3795]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 0.7979,  0.8788,  1.1450],
         [ 1.2706,  1.0967,  0.9864],
         [ 0.6079,  0.9982,  0.9475],
         ...,
         [ 2.2537,  1.4551,  1.1013],
         [ 9.7665,  0.5694,  1.7531],
         [ 0.6060,  1.4992,  0.6779]],

        [[ 0.7291,  2.3746,  0.9636],
         [ 1.5800,  5.1591,  0.8235],
         [ 2.8032,  1.6787,  4.2535],
         ...,
         [ 1.5987,  0.1575,  0.8127],
         [ 1.3249,  1.2154,  1.1177],
         [ 0.9828,  0.6280,  1.5416]],

        [[ 1.9692,  1.0697,  1.0117],
         [ 1.3966,  1.0517,  0.9554],
         [ 1.0547,  1.1805,  1.4179],
         ...,
         [11.7193,  1.2630,  0.6617],
         [ 0.8640,  0.6184,  0.7063],
         [ 0.4117,  0.5129,  0.8


Train Diffusion:   1%|          | 31/5001 [04:17<12:14:36,  8.87s/it][A
Train Diffusion:   1%|          | 32/5001 [04:26<12:05:40,  8.76s/it][A
Train Diffusion:   1%|          | 33/5001 [04:36<12:42:14,  9.21s/it][A
Train Diffusion:   1%|          | 34/5001 [04:44<12:16:07,  8.89s/it][A
Train Diffusion:   1%|          | 35/5001 [04:53<12:09:47,  8.82s/it][A
Train Diffusion:   1%|          | 36/5001 [05:02<12:10:22,  8.83s/it][A
Train Diffusion:   1%|          | 37/5001 [05:11<12:06:20,  8.78s/it][A
Train Diffusion:   1%|          | 38/5001 [05:18<11:38:05,  8.44s/it][A
Train Diffusion:   1%|          | 39/5001 [05:27<11:39:48,  8.46s/it][A
Train Diffusion:   1%|          | 40/5001 [05:35<11:35:01,  8.41s/it][A


Moving average norm loss at <built-in function iter> iterations is: 297382.221875. Best norm loss value is: 288198.34375.

C_PATH mean = tensor([[5.2363, 0.8393, 1.3019],
        [5.3068, 0.8234, 1.2776],
        [5.3355, 0.8186, 1.2897]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[2.2289e+00, 1.1222e+00, 1.0637e+00],
         [6.0614e+00, 5.4964e-01, 4.5981e+00],
         [5.4371e-01, 1.0332e-01, 1.2622e+00],
         ...,
         [7.3722e+00, 5.6821e-01, 1.9616e+00],
         [5.2974e-01, 1.2777e+00, 5.2786e+00],
         [4.1166e+00, 3.7530e-01, 8.2605e-01]],

        [[1.4170e+00, 1.3752e+00, 1.1156e+00],
         [6.9456e-01, 3.2846e-01, 1.0528e+00],
         [2.1298e+00, 1.1916e+00, 1.6441e+00],
         ...,
         [1.4475e+00, 7.1198e-01, 2.6294e-01],
         [2.4836e+01, 2.6921e-01, 2.9131e+00],
         [4.1759e+00, 1.9117e+00, 5.2540e-01]],

        [[5.6833e-01, 7.5334e-01, 1.3996e+00],
         [1.9534e+00, 1.1438e+00, 8.4452e-01],
         [1.4269e+01, 6.0477e-01, 


Train Diffusion:   1%|          | 41/5001 [05:45<12:03:46,  8.76s/it][A
Train Diffusion:   1%|          | 42/5001 [05:53<12:02:43,  8.74s/it][A
Train Diffusion:   1%|          | 43/5001 [06:01<11:45:32,  8.54s/it][A
Train Diffusion:   1%|          | 44/5001 [06:10<11:59:34,  8.71s/it][A
Train Diffusion:   1%|          | 45/5001 [06:19<11:49:35,  8.59s/it][A
Train Diffusion:   1%|          | 46/5001 [06:28<12:09:21,  8.83s/it][A
Train Diffusion:   1%|          | 47/5001 [06:37<12:10:12,  8.84s/it][A
Train Diffusion:   1%|          | 48/5001 [06:45<12:00:10,  8.72s/it][A
Train Diffusion:   1%|          | 49/5001 [06:55<12:25:13,  9.03s/it][A
Train Diffusion:   1%|          | 50/5001 [07:04<12:15:23,  8.91s/it][A


Moving average norm loss at <built-in function iter> iterations is: 279069.03125. Best norm loss value is: 270200.375.

C_PATH mean = tensor([[6.3881, 0.6659, 1.3663],
        [6.4019, 0.6787, 1.3606],
        [6.3743, 0.6806, 1.3667]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[7.9811e-01, 3.8535e-01, 2.0634e+00],
         [2.3848e+00, 2.0555e+00, 9.6281e-01],
         [8.5339e+00, 5.9864e-01, 1.6359e+00],
         ...,
         [3.5625e+00, 2.4903e+00, 9.0209e-01],
         [4.8873e+00, 3.9260e-01, 6.5795e-01],
         [1.0286e+00, 1.4222e+00, 1.7489e+00]],

        [[1.9843e+00, 8.3776e-01, 2.4086e+00],
         [1.9854e+00, 9.9010e-02, 1.8527e+00],
         [1.9494e+00, 4.7822e-02, 1.4153e+00],
         ...,
         [1.4328e+00, 7.1639e-01, 1.1784e+00],
         [2.7590e+01, 1.9740e-01, 5.8527e-01],
         [1.7223e+01, 3.0642e-01, 4.1275e+00]],

        [[1.7407e+00, 9.1339e-01, 1.3583e+00],
         [8.1223e+00, 6.3578e-01, 1.0371e+00],
         [5.7407e+00, 5.4522e-01, 8.0


Train Diffusion:   1%|          | 51/5001 [07:12<12:08:38,  8.83s/it][A
Train Diffusion:   1%|          | 52/5001 [07:21<12:03:35,  8.77s/it][A
Train Diffusion:   1%|          | 53/5001 [07:29<11:47:32,  8.58s/it][A
Train Diffusion:   1%|          | 54/5001 [07:37<11:32:57,  8.40s/it][A
Train Diffusion:   1%|          | 55/5001 [07:45<11:22:17,  8.28s/it][A
Train Diffusion:   1%|          | 56/5001 [07:53<11:17:20,  8.22s/it][A
Train Diffusion:   1%|          | 57/5001 [08:02<11:20:38,  8.26s/it][A
Train Diffusion:   1%|          | 58/5001 [08:10<11:22:28,  8.28s/it][A
Train Diffusion:   1%|          | 59/5001 [08:18<11:18:59,  8.24s/it][A
Train Diffusion:   1%|          | 60/5001 [08:27<11:36:54,  8.46s/it][A


Moving average norm loss at <built-in function iter> iterations is: 264234.059375. Best norm loss value is: 260391.28125.

C_PATH mean = tensor([[7.2446, 0.6085, 1.3698],
        [7.1262, 0.6585, 1.3973],
        [7.1803, 0.6147, 1.3798]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.6534e+00, 5.3436e-01, 1.5654e+00],
         [4.4281e+00, 1.1613e+00, 4.7913e-01],
         [4.5037e-01, 8.8695e-01, 6.3569e-01],
         ...,
         [1.9529e+01, 2.6540e-01, 5.5878e-01],
         [5.0138e-01, 1.1459e-02, 1.2791e+00],
         [8.1999e+00, 3.6989e-01, 9.8499e-01]],

        [[1.8407e+00, 4.9898e-01, 2.3520e+00],
         [7.2540e+00, 1.2828e+00, 4.4355e-01],
         [1.8405e-01, 2.7909e-01, 6.6136e-01],
         ...,
         [4.2536e+00, 1.3459e+00, 9.7237e-01],
         [2.0662e+01, 1.9419e-01, 8.8480e-01],
         [8.5442e-01, 6.9764e-01, 9.2771e-01]],

        [[8.5533e-01, 2.2278e-01, 2.4865e+00],
         [2.7838e+00, 5.0948e-01, 9.5963e-01],
         [5.9212e+00, 5.5798e-01, 


Train Diffusion:   1%|          | 61/5001 [08:36<11:48:47,  8.61s/it][A
Train Diffusion:   1%|          | 62/5001 [08:45<12:01:14,  8.76s/it][A
Train Diffusion:   1%|▏         | 63/5001 [08:53<11:47:29,  8.60s/it][A
Train Diffusion:   1%|▏         | 64/5001 [09:01<11:24:05,  8.31s/it][A
Train Diffusion:   1%|▏         | 65/5001 [09:09<11:14:33,  8.20s/it][A
Train Diffusion:   1%|▏         | 66/5001 [09:16<10:57:27,  7.99s/it][A
Train Diffusion:   1%|▏         | 67/5001 [09:25<11:09:58,  8.15s/it][A
Train Diffusion:   1%|▏         | 68/5001 [09:33<11:00:35,  8.03s/it][A
Train Diffusion:   1%|▏         | 69/5001 [09:41<10:56:24,  7.99s/it][A
Train Diffusion:   1%|▏         | 70/5001 [09:48<10:44:13,  7.84s/it][A


Moving average norm loss at <built-in function iter> iterations is: 252792.1375. Best norm loss value is: 246192.5625.

C_PATH mean = tensor([[7.9601, 0.5538, 1.4435],
        [7.9201, 0.5730, 1.4662],
        [8.1723, 0.5506, 1.4333]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[8.1022e-01, 2.8780e-01, 2.6196e+00],
         [5.1306e+00, 4.7336e-01, 1.4370e-01],
         [1.9057e+01, 1.8046e-01, 1.2809e+00],
         ...,
         [5.5728e+00, 4.3871e-01, 9.2195e-01],
         [3.2080e+01, 1.2088e-01, 8.0193e-01],
         [1.2855e+01, 2.6902e-01, 4.8086e+00]],

        [[1.7787e+00, 4.7850e-01, 1.5484e+00],
         [3.7938e+00, 7.0100e-01, 1.9369e+00],
         [7.7969e+00, 1.8353e+00, 2.2233e+00],
         ...,
         [1.3302e+01, 1.5437e-01, 9.4673e-01],
         [6.2400e-01, 1.2030e-02, 1.2413e+00],
         [7.4150e+00, 3.3288e-01, 1.5165e+00]],

        [[2.1528e+00, 4.3618e-01, 3.4590e+00],
         [2.7069e-02, 8.8004e-02, 1.4132e+00],
         [7.4321e+00, 9.5872e-03, 1.1


Train Diffusion:   1%|▏         | 71/5001 [09:56<10:40:18,  7.79s/it][A
Train Diffusion:   1%|▏         | 72/5001 [10:03<10:31:31,  7.69s/it][A
Train Diffusion:   1%|▏         | 73/5001 [10:11<10:29:00,  7.66s/it][A
Train Diffusion:   1%|▏         | 74/5001 [10:18<10:26:37,  7.63s/it][A
Train Diffusion:   1%|▏         | 75/5001 [10:26<10:37:07,  7.76s/it][A
Train Diffusion:   2%|▏         | 76/5001 [10:35<10:49:46,  7.92s/it][A
Train Diffusion:   2%|▏         | 77/5001 [10:42<10:45:07,  7.86s/it][A
Train Diffusion:   2%|▏         | 78/5001 [10:50<10:43:21,  7.84s/it][A
Train Diffusion:   2%|▏         | 79/5001 [10:59<11:05:43,  8.12s/it][A
Train Diffusion:   2%|▏         | 80/5001 [11:07<11:05:03,  8.11s/it][A


Moving average norm loss at <built-in function iter> iterations is: 239106.796875. Best norm loss value is: 232926.5625.

C_PATH mean = tensor([[9.0169, 0.5181, 1.3713],
        [8.9491, 0.5297, 1.3956],
        [8.7207, 0.5249, 1.3432]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[7.8614e-01, 1.6229e-01, 2.6760e+00],
         [5.3282e+00, 4.0905e-01, 1.5473e+00],
         [2.1155e+01, 3.4591e-01, 6.9713e-01],
         ...,
         [6.2706e+00, 7.9174e-04, 1.5943e+00],
         [1.5562e+01, 1.2624e-01, 8.4156e-01],
         [1.4346e+01, 4.3826e-01, 2.0043e+00]],

        [[1.9398e+00, 3.2998e-01, 1.1753e+00],
         [5.5811e+00, 4.7709e-01, 1.2074e+00],
         [2.2167e+00, 9.9512e-02, 7.9184e-01],
         ...,
         [1.9578e+01, 1.5230e-01, 8.3318e-01],
         [4.1341e-01, 2.7388e-02, 1.2719e+00],
         [1.1035e+01, 1.8652e-01, 2.4329e-01]],

        [[2.7039e+00, 2.8527e-01, 3.3155e+00],
         [7.4441e-02, 6.6767e-02, 9.7292e-01],
         [7.0690e+00, 9.3006e-01, 7


Train Diffusion:   2%|▏         | 81/5001 [11:16<11:16:18,  8.25s/it][A
Train Diffusion:   2%|▏         | 82/5001 [11:24<11:17:25,  8.26s/it][A
Train Diffusion:   2%|▏         | 83/5001 [11:32<11:17:52,  8.27s/it][A
Train Diffusion:   2%|▏         | 84/5001 [11:41<11:31:09,  8.43s/it][A
Train Diffusion:   2%|▏         | 85/5001 [11:49<11:17:53,  8.27s/it][A
Train Diffusion:   2%|▏         | 86/5001 [11:58<11:31:56,  8.45s/it][A
Train Diffusion:   2%|▏         | 87/5001 [12:05<11:07:35,  8.15s/it][A
Train Diffusion:   2%|▏         | 88/5001 [12:13<10:57:39,  8.03s/it][A
Train Diffusion:   2%|▏         | 89/5001 [12:21<10:44:17,  7.87s/it][A
Train Diffusion:   2%|▏         | 90/5001 [12:28<10:43:49,  7.87s/it][A


Moving average norm loss at <built-in function iter> iterations is: 225560.2375. Best norm loss value is: 218500.953125.

C_PATH mean = tensor([[9.8946, 0.4306, 1.3933],
        [9.7291, 0.4983, 1.4254],
        [9.6790, 0.4940, 1.4439]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[2.8426e+00, 2.6639e-01, 2.5658e+00],
         [1.5839e-01, 1.2871e-01, 1.2107e+00],
         [1.1072e+01, 3.1334e+00, 3.0116e-01],
         ...,
         [1.9909e+01, 1.8974e-01, 7.1271e-01],
         [1.6158e+00, 7.3192e-01, 7.4169e-01],
         [6.5607e+00, 4.9692e-01, 2.9656e+00]],

        [[6.5476e-01, 2.4172e-01, 2.6061e+00],
         [6.1362e+00, 3.9648e-01, 8.2923e-01],
         [1.0567e+01, 1.4393e-01, 2.1602e+00],
         ...,
         [1.1691e+01, 3.1302e-01, 7.6225e-01],
         [1.7137e-02, 4.1938e-02, 3.9307e+00],
         [1.3292e+01, 2.8345e-01, 1.8236e+00]],

        [[2.7500e+00, 2.7064e-01, 1.3064e+00],
         [6.2282e+00, 4.8072e-01, 2.2079e+00],
         [1.3894e+01, 1.4698e-01, 8


Train Diffusion:   2%|▏         | 91/5001 [12:37<11:04:14,  8.12s/it][A
Train Diffusion:   2%|▏         | 92/5001 [12:44<10:44:48,  7.88s/it][A
Train Diffusion:   2%|▏         | 93/5001 [12:52<10:29:40,  7.70s/it][A
Train Diffusion:   2%|▏         | 94/5001 [13:00<10:44:09,  7.88s/it][A
Train Diffusion:   2%|▏         | 95/5001 [13:09<11:05:05,  8.13s/it][A
Train Diffusion:   2%|▏         | 96/5001 [13:16<10:51:45,  7.97s/it][A
Train Diffusion:   2%|▏         | 97/5001 [13:24<10:47:17,  7.92s/it][A
Train Diffusion:   2%|▏         | 98/5001 [13:32<10:55:49,  8.03s/it][A
Train Diffusion:   2%|▏         | 99/5001 [13:40<10:57:01,  8.04s/it][A
Train Diffusion:   2%|▏         | 100/5001 [13:49<11:09:00,  8.19s/it][A


Moving average norm loss at <built-in function iter> iterations is: 211834.4546875. Best norm loss value is: 205481.53125.

C_PATH mean = tensor([[10.8095,  0.4078,  1.3757],
        [10.9429,  0.4361,  1.4217],
        [10.8066,  0.4345,  1.3722]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[2.7092e+00, 2.4610e-01, 1.5145e+00],
         [2.2064e-01, 3.3714e+00, 4.0714e-01],
         [5.9276e+00, 3.7473e+00, 3.6585e-01],
         ...,
         [9.0547e+00, 3.0076e-01, 1.2704e+00],
         [2.7178e+01, 1.5438e-01, 8.6966e-01],
         [2.5991e+01, 1.2453e-01, 2.4578e+00]],

        [[3.6191e+00, 2.1684e-01, 1.0801e+00],
         [8.0899e+00, 1.5213e-01, 6.9288e+00],
         [2.9402e-01, 1.8694e-01, 1.3694e+00],
         ...,
         [3.5203e+00, 1.0119e+00, 8.9646e-01],
         [6.3757e-01, 3.9513e-01, 1.0390e+00],
         [5.7785e-01, 6.0700e-01, 1.6394e+00]],

        [[5.9844e-01, 1.5959e-01, 2.2953e+00],
         [8.1653e+00, 2.7460e-01, 7.4370e-01],
         [3.1623e+01, 7.


Train Diffusion:   2%|▏         | 101/5001 [13:57<10:51:27,  7.98s/it][A
Train Diffusion:   2%|▏         | 102/5001 [14:04<10:51:23,  7.98s/it][A
Train Diffusion:   2%|▏         | 103/5001 [14:13<11:00:20,  8.09s/it][A
Train Diffusion:   2%|▏         | 104/5001 [14:21<11:08:42,  8.19s/it][A
Train Diffusion:   2%|▏         | 105/5001 [14:29<11:06:42,  8.17s/it][A
Train Diffusion:   2%|▏         | 106/5001 [14:38<11:27:43,  8.43s/it][A
Train Diffusion:   2%|▏         | 107/5001 [14:46<10:59:47,  8.09s/it][A
Train Diffusion:   2%|▏         | 108/5001 [14:54<10:59:28,  8.09s/it][A
Train Diffusion:   2%|▏         | 109/5001 [15:02<11:10:00,  8.22s/it][A
Train Diffusion:   2%|▏         | 110/5001 [15:11<11:31:54,  8.49s/it][A


Moving average norm loss at <built-in function iter> iterations is: 198447.2921875. Best norm loss value is: 192796.453125.

C_PATH mean = tensor([[11.6421,  0.4234,  1.4543],
        [11.5934,  0.4097,  1.4103],
        [11.7027,  0.4425,  1.3642]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[2.0908e+00, 2.7613e-01, 3.6639e+00],
         [6.0489e-01, 3.8771e-01, 5.6625e-01],
         [8.8293e+00, 1.0592e+00, 1.2387e+00],
         ...,
         [7.5771e+00, 4.1382e-04, 1.3580e+00],
         [1.0519e+01, 6.3627e-02, 9.1370e-01],
         [1.0417e+00, 2.0903e+00, 3.3311e-01]],

        [[4.2328e+00, 2.0898e-01, 2.0924e+00],
         [1.2164e+01, 1.2233e-01, 2.4024e+00],
         [5.9586e-01, 1.2380e-01, 1.4641e+00],
         ...,
         [2.4945e+01, 8.3433e-02, 9.4940e-01],
         [5.1559e-01, 2.5938e-01, 5.8753e-01],
         [2.3993e+00, 4.2904e+00, 2.1870e+00]],

        [[7.3795e-01, 3.0025e-01, 2.1615e+00],
         [9.7939e+00, 3.7174e-01, 1.0473e+00],
         [3.1390e+01, 9


Train Diffusion:   2%|▏         | 111/5001 [15:19<11:11:32,  8.24s/it][A
Train Diffusion:   2%|▏         | 112/5001 [15:26<10:48:46,  7.96s/it][A
Train Diffusion:   2%|▏         | 113/5001 [15:34<10:31:15,  7.75s/it][A
Train Diffusion:   2%|▏         | 114/5001 [15:42<10:44:55,  7.92s/it][A
Train Diffusion:   2%|▏         | 115/5001 [15:51<11:01:42,  8.13s/it][A
Train Diffusion:   2%|▏         | 116/5001 [15:59<11:03:42,  8.15s/it][A
Train Diffusion:   2%|▏         | 117/5001 [16:07<11:05:54,  8.18s/it][A
Train Diffusion:   2%|▏         | 118/5001 [16:16<11:21:14,  8.37s/it][A
Train Diffusion:   2%|▏         | 119/5001 [16:24<11:25:27,  8.42s/it][A
Train Diffusion:   2%|▏         | 120/5001 [16:32<11:14:47,  8.29s/it][A


Moving average norm loss at <built-in function iter> iterations is: 182595.4796875. Best norm loss value is: 175042.65625.

C_PATH mean = tensor([[12.8157,  0.3853,  1.3940],
        [12.6527,  0.4034,  1.3679],
        [12.7309,  0.3853,  1.4278]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.0419e-01, 1.2910e-01, 1.6141e+00],
         [1.3082e+01, 2.7873e-01, 1.1133e+00],
         [2.9490e+01, 6.1794e-02, 1.2559e+00],
         ...,
         [6.4082e+00, 3.1213e-01, 1.1125e+00],
         [9.9844e+00, 8.4041e-02, 3.0745e+00],
         [9.6566e+00, 9.9400e-01, 5.7434e-01]],

        [[3.1047e+00, 2.2743e-01, 3.4692e+00],
         [7.1277e-01, 7.7966e-01, 6.0501e-01],
         [1.0021e+01, 8.2934e-02, 9.0479e-01],
         ...,
         [2.4172e+01, 9.0180e-02, 8.3100e-01],
         [2.6431e+00, 8.9718e-01, 8.9936e-01],
         [5.0705e-01, 1.4207e-01, 1.5568e+00]],

        [[4.8262e+00, 1.9614e-01, 2.4945e+00],
         [7.8219e+00, 1.5403e-01, 2.4985e+00],
         [1.8704e+00, 1.


Train Diffusion:   2%|▏         | 121/5001 [16:40<10:57:11,  8.08s/it][A
Train Diffusion:   2%|▏         | 122/5001 [16:47<10:40:50,  7.88s/it][A
Train Diffusion:   2%|▏         | 123/5001 [16:55<10:33:35,  7.79s/it][A
Train Diffusion:   2%|▏         | 124/5001 [17:03<10:50:14,  8.00s/it][A
Train Diffusion:   2%|▏         | 125/5001 [17:12<10:55:16,  8.06s/it][A
Train Diffusion:   3%|▎         | 126/5001 [17:19<10:41:18,  7.89s/it][A
Train Diffusion:   3%|▎         | 127/5001 [17:27<10:41:32,  7.90s/it][A
Train Diffusion:   3%|▎         | 128/5001 [17:36<11:10:56,  8.26s/it][A
Train Diffusion:   3%|▎         | 129/5001 [17:45<11:13:20,  8.29s/it][A
Train Diffusion:   3%|▎         | 130/5001 [17:54<11:29:57,  8.50s/it][A


Moving average norm loss at <built-in function iter> iterations is: 164201.11875. Best norm loss value is: 155669.3125.

C_PATH mean = tensor([[13.9094,  0.2947,  1.3883],
        [13.8329,  0.3409,  1.3784],
        [13.8287,  0.3068,  1.3554]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.9917e+00, 1.3638e-01, 3.3688e+00],
         [1.5195e+01, 1.7487e-01, 3.5148e-01],
         [7.9090e-01, 3.7080e+00, 7.0430e-01],
         ...,
         [2.5450e+01, 8.2919e-02, 1.4272e+00],
         [1.4513e+01, 4.4574e-02, 1.7671e+00],
         [1.5762e+00, 8.0067e-01, 2.4395e-02]],

        [[6.2267e-01, 3.9644e-01, 2.4639e+00],
         [7.1623e+00, 5.3158e-01, 6.7234e-01],
         [2.1183e+01, 8.8349e-02, 1.3165e+00],
         ...,
         [7.2145e+00, 3.6095e-03, 1.4553e+00],
         [3.1304e+01, 8.2406e-02, 1.2017e+00],
         [1.6401e+01, 5.5287e-02, 3.1652e+00]],

        [[2.1448e+00, 1.6972e-01, 1.9668e+00],
         [9.5835e-02, 8.7500e-02, 8.7888e+00],
         [2.4228e-01, 7.641


Train Diffusion:   3%|▎         | 131/5001 [18:01<11:05:35,  8.20s/it][A
Train Diffusion:   3%|▎         | 132/5001 [18:08<10:42:52,  7.92s/it][A
Train Diffusion:   3%|▎         | 133/5001 [18:16<10:25:45,  7.71s/it][A
Train Diffusion:   3%|▎         | 134/5001 [18:23<10:27:19,  7.73s/it][A
Train Diffusion:   3%|▎         | 135/5001 [18:32<10:42:46,  7.93s/it][A
Train Diffusion:   3%|▎         | 136/5001 [18:41<11:16:46,  8.35s/it][A
Train Diffusion:   3%|▎         | 137/5001 [18:51<11:50:15,  8.76s/it][A
Train Diffusion:   3%|▎         | 138/5001 [18:59<11:30:44,  8.52s/it][A
Train Diffusion:   3%|▎         | 139/5001 [19:08<11:49:47,  8.76s/it][A
Train Diffusion:   3%|▎         | 140/5001 [19:18<12:19:24,  9.13s/it][A


Moving average norm loss at <built-in function iter> iterations is: 140937.58515625. Best norm loss value is: 130607.1328125.

C_PATH mean = tensor([[15.3895,  0.2762,  1.3431],
        [15.4107,  0.2540,  1.3380],
        [15.3158,  0.2691,  1.3428]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.0328e+00, 8.6974e-02, 4.1908e+00],
         [1.2782e+01, 5.9657e+00, 2.9925e-01],
         [1.6115e+00, 7.4054e-01, 2.0241e+00],
         ...,
         [1.4043e+01, 2.9823e-01, 4.5645e-01],
         [1.7979e+01, 6.8614e-01, 1.1285e+00],
         [1.1575e+01, 1.1325e-02, 2.9124e-01]],

        [[2.0345e-01, 4.4728e-02, 7.7832e-01],
         [1.5176e+00, 2.0217e+00, 1.6033e-01],
         [9.6078e+00, 2.1549e-01, 1.0065e+00],
         ...,
         [2.7917e+01, 7.4058e-02, 1.2825e+00],
         [1.0372e+01, 1.7058e-02, 1.7824e+00],
         [1.7847e+01, 1.8215e-01, 2.7301e-01]],

        [[5.4763e+00, 8.6939e-02, 2.5059e+00],
         [5.2802e+00, 5.8312e-02, 1.0436e+01],
         [5.9826e-01,


Train Diffusion:   3%|▎         | 141/5001 [19:27<12:06:24,  8.97s/it][A
Train Diffusion:   3%|▎         | 142/5001 [19:36<12:05:05,  8.95s/it][A
Train Diffusion:   3%|▎         | 143/5001 [19:44<11:58:13,  8.87s/it][A
Train Diffusion:   3%|▎         | 144/5001 [19:54<12:28:37,  9.25s/it][A
Train Diffusion:   3%|▎         | 145/5001 [20:02<11:58:45,  8.88s/it][A
Train Diffusion:   3%|▎         | 146/5001 [20:11<11:46:08,  8.73s/it][A
Train Diffusion:   3%|▎         | 147/5001 [20:19<11:43:03,  8.69s/it][A
Train Diffusion:   3%|▎         | 148/5001 [20:28<11:37:00,  8.62s/it][A
Train Diffusion:   3%|▎         | 149/5001 [20:36<11:19:04,  8.40s/it][A
Train Diffusion:   3%|▎         | 150/5001 [20:44<11:08:25,  8.27s/it][A


Moving average norm loss at <built-in function iter> iterations is: 115561.12421875. Best norm loss value is: 104676.421875.

C_PATH mean = tensor([[17.0292,  0.2352,  1.3090],
        [17.0703,  0.2053,  1.3135],
        [17.0139,  0.2259,  1.2931]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[6.3969e+00, 8.2004e-02, 3.9028e+00],
         [1.0083e+01, 1.5723e+00, 3.9345e-01],
         [1.8164e+01, 9.7433e-02, 1.1138e+00],
         ...,
         [1.9391e+01, 4.5590e-01, 8.9078e-01],
         [2.2151e+00, 7.6981e-02, 1.3338e+00],
         [2.3042e+01, 1.5480e-01, 6.6575e-01]],

        [[1.7442e-01, 2.5616e-02, 6.4385e-01],
         [5.1590e-01, 3.8646e-02, 2.5388e+00],
         [6.8741e-03, 6.1343e-02, 4.7340e-01],
         ...,
         [1.7120e+01, 4.1661e-01, 2.0652e+00],
         [1.5257e+00, 5.1406e-01, 9.1067e-01],
         [8.1080e+00, 1.2007e-01, 8.5304e-02]],

        [[5.6049e+00, 8.4284e-02, 3.1290e+00],
         [1.6098e+01, 3.8214e-01, 1.0035e+00],
         [1.6414e+01, 


Train Diffusion:   3%|▎         | 151/5001 [20:53<11:25:14,  8.48s/it][A
Train Diffusion:   3%|▎         | 152/5001 [21:01<11:13:40,  8.34s/it][A
Train Diffusion:   3%|▎         | 153/5001 [21:08<11:00:43,  8.18s/it][A
Train Diffusion:   3%|▎         | 154/5001 [21:20<12:17:16,  9.13s/it][A
Train Diffusion:   3%|▎         | 155/5001 [21:28<11:49:14,  8.78s/it][A
Train Diffusion:   3%|▎         | 156/5001 [21:36<11:29:55,  8.54s/it][A
Train Diffusion:   3%|▎         | 157/5001 [21:44<11:12:01,  8.32s/it][A
Train Diffusion:   3%|▎         | 158/5001 [21:52<11:12:45,  8.33s/it][A
Train Diffusion:   3%|▎         | 159/5001 [22:02<11:46:32,  8.76s/it][A
Train Diffusion:   3%|▎         | 160/5001 [22:10<11:46:14,  8.75s/it][A


Moving average norm loss at <built-in function iter> iterations is: 95340.4578125. Best norm loss value is: 88067.921875.

C_PATH mean = tensor([[18.2286,  0.1866,  1.3298],
        [18.2311,  0.1879,  1.3269],
        [18.1812,  0.2108,  1.3267]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.1437e+00, 8.0392e-02, 4.5884e+00],
         [1.1285e+01, 1.8018e+00, 1.0352e+00],
         [1.2449e+01, 6.2593e-02, 1.1554e+00],
         ...,
         [6.3888e+00, 1.6818e+00, 1.0599e+00],
         [1.9877e+01, 4.1234e-01, 1.2941e+00],
         [1.7983e+00, 3.2254e-01, 1.3333e+00]],

        [[6.7852e+00, 7.1466e-02, 4.0079e+00],
         [1.0943e+01, 5.0802e-02, 1.4443e+00],
         [9.8613e+00, 2.8284e-01, 1.6676e+00],
         ...,
         [2.0717e+01, 2.8170e-03, 1.3479e+00],
         [2.3445e+00, 9.0473e-04, 6.0674e-02],
         [8.0468e+00, 8.4204e-01, 9.5550e-01]],

        [[2.6688e-01, 1.7800e-02, 4.9252e-01],
         [6.0698e+00, 3.3633e-01, 4.1651e-01],
         [2.6158e+01, 1.0


Train Diffusion:   3%|▎         | 161/5001 [22:19<11:30:40,  8.56s/it][A
Train Diffusion:   3%|▎         | 162/5001 [22:27<11:33:57,  8.60s/it][A
Train Diffusion:   3%|▎         | 163/5001 [22:36<11:35:33,  8.63s/it][A
Train Diffusion:   3%|▎         | 164/5001 [22:45<11:39:02,  8.67s/it][A
Train Diffusion:   3%|▎         | 165/5001 [22:53<11:38:37,  8.67s/it][A
Train Diffusion:   3%|▎         | 166/5001 [23:02<11:35:44,  8.63s/it][A
Train Diffusion:   3%|▎         | 167/5001 [23:11<11:46:10,  8.77s/it][A
Train Diffusion:   3%|▎         | 168/5001 [23:20<12:01:18,  8.95s/it][A
Train Diffusion:   3%|▎         | 169/5001 [23:28<11:38:25,  8.67s/it][A
Train Diffusion:   3%|▎         | 170/5001 [23:37<11:35:41,  8.64s/it][A


Moving average norm loss at <built-in function iter> iterations is: 82162.55390625. Best norm loss value is: 75171.609375.

C_PATH mean = tensor([[18.9771,  0.1915,  1.3108],
        [18.9928,  0.2054,  1.3076],
        [18.9480,  0.1941,  1.2984]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.1503e+00, 7.3034e-02, 3.5270e+00],
         [2.0799e+00, 1.7797e-01, 1.0517e+01],
         [1.4201e-01, 7.4431e-02, 1.1066e+00],
         ...,
         [1.5677e+01, 4.7646e-03, 1.0092e+00],
         [2.2087e+01, 1.2643e-01, 1.1800e+00],
         [1.6391e+01, 9.4255e-03, 3.6087e-01]],

        [[6.3062e+00, 6.3478e-02, 4.3696e+00],
         [1.6829e+01, 1.6338e+00, 8.0661e-01],
         [3.0260e+00, 1.9956e+00, 1.1279e+00],
         ...,
         [2.4938e+01, 1.1168e-01, 1.3800e+00],
         [2.4931e+01, 8.9087e-02, 1.2526e+00],
         [2.1667e+01, 1.1008e-01, 1.8167e+00]],

        [[4.8886e-01, 3.7147e-02, 1.3348e+00],
         [6.4765e-01, 4.2121e-01, 8.0208e-03],
         [8.6786e+00, 2.


Train Diffusion:   3%|▎         | 171/5001 [23:46<11:35:26,  8.64s/it][A
Train Diffusion:   3%|▎         | 172/5001 [23:53<11:06:23,  8.28s/it][A
Train Diffusion:   3%|▎         | 173/5001 [24:01<10:54:10,  8.13s/it][A
Train Diffusion:   3%|▎         | 174/5001 [24:08<10:42:54,  7.99s/it][A
Train Diffusion:   3%|▎         | 175/5001 [24:17<10:52:43,  8.12s/it][A
Train Diffusion:   4%|▎         | 176/5001 [24:26<11:05:52,  8.28s/it][A
Train Diffusion:   4%|▎         | 177/5001 [24:34<10:59:23,  8.20s/it][A
Train Diffusion:   4%|▎         | 178/5001 [24:42<11:08:43,  8.32s/it][A
Train Diffusion:   4%|▎         | 179/5001 [24:50<10:53:34,  8.13s/it][A
Train Diffusion:   4%|▎         | 180/5001 [24:57<10:32:08,  7.87s/it][A


Moving average norm loss at <built-in function iter> iterations is: 67134.17421875. Best norm loss value is: 60837.09375.

C_PATH mean = tensor([[19.9804,  0.1887,  1.3120],
        [19.9507,  0.1962,  1.3201],
        [19.9375,  0.1942,  1.3323]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[7.2690e+00, 4.8627e-02, 3.2371e+00],
         [1.4927e+01, 5.9647e-03, 4.6941e+00],
         [3.8240e+00, 2.6345e-02, 1.0576e+00],
         ...,
         [2.1205e+01, 3.3023e-03, 1.2919e+00],
         [6.0845e+00, 4.8616e-04, 1.0547e+00],
         [2.1771e+01, 1.7657e-01, 1.5305e+00]],

        [[4.1621e+00, 6.4143e-01, 5.5863e+00],
         [3.4230e+00, 1.6400e+01, 2.2721e-01],
         [4.3759e+00, 1.0350e+00, 8.3220e+00],
         ...,
         [1.3355e+01, 1.6137e+00, 4.7014e-01],
         [2.4596e+01, 6.4225e-02, 1.5483e+00],
         [2.2343e+01, 1.0341e-01, 1.9070e-01]],

        [[9.8219e-01, 1.0260e-02, 3.3191e-01],
         [5.3019e-01, 4.3222e+00, 2.3707e-01],
         [4.1115e+00, 3.8


Train Diffusion:   4%|▎         | 181/5001 [25:04<10:14:39,  7.65s/it][A
Train Diffusion:   4%|▎         | 182/5001 [25:12<10:22:32,  7.75s/it][A
Train Diffusion:   4%|▎         | 183/5001 [25:21<10:52:48,  8.13s/it][A
Train Diffusion:   4%|▎         | 184/5001 [25:29<10:43:22,  8.01s/it][A
Train Diffusion:   4%|▎         | 185/5001 [25:37<10:48:54,  8.08s/it][A
Train Diffusion:   4%|▎         | 186/5001 [25:46<11:05:19,  8.29s/it][A
Train Diffusion:   4%|▎         | 187/5001 [25:56<11:46:36,  8.81s/it][A
Train Diffusion:   4%|▍         | 188/5001 [26:03<11:14:39,  8.41s/it][A
Train Diffusion:   4%|▍         | 189/5001 [26:11<11:03:59,  8.28s/it][A
Train Diffusion:   4%|▍         | 190/5001 [26:20<11:08:31,  8.34s/it][A


Moving average norm loss at <built-in function iter> iterations is: 55781.902734375. Best norm loss value is: 50242.6015625.

C_PATH mean = tensor([[20.8495,  0.1670,  1.3255],
        [20.7568,  0.1595,  1.3239],
        [20.7572,  0.1648,  1.3306]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[5.9015e+00, 6.8585e-02, 5.6539e+00],
         [1.3789e+01, 9.4066e+00, 4.7412e-01],
         [2.8935e+00, 1.4291e+00, 1.5250e+00],
         ...,
         [1.9805e+01, 4.1387e-03, 1.0938e+00],
         [2.4813e+01, 8.6564e-02, 1.0971e+00],
         [2.1549e+01, 5.4352e-03, 8.9335e-01]],

        [[1.8102e+00, 8.8361e-03, 7.4288e-01],
         [6.8349e-01, 2.9960e+00, 1.6929e-01],
         [9.5922e+00, 2.4677e-01, 1.2671e+00],
         ...,
         [2.2258e+01, 8.8811e-02, 1.3205e+00],
         [2.2107e+01, 8.8499e-04, 1.1917e+00],
         [2.3360e+01, 1.4166e-01, 1.2147e+00]],

        [[6.5133e+00, 4.4457e-02, 3.6089e+00],
         [1.4601e+01, 1.4008e-02, 1.3461e+01],
         [1.0044e+00, 


Train Diffusion:   4%|▍         | 191/5001 [26:29<11:29:19,  8.60s/it][A
Train Diffusion:   4%|▍         | 192/5001 [26:37<11:21:21,  8.50s/it][A
Train Diffusion:   4%|▍         | 193/5001 [26:45<10:56:26,  8.19s/it][A
Train Diffusion:   4%|▍         | 194/5001 [26:52<10:40:58,  8.00s/it][A
Train Diffusion:   4%|▍         | 195/5001 [27:01<10:46:58,  8.08s/it][A
Train Diffusion:   4%|▍         | 196/5001 [27:09<10:49:12,  8.11s/it][A
Train Diffusion:   4%|▍         | 197/5001 [27:16<10:35:52,  7.94s/it][A
Train Diffusion:   4%|▍         | 198/5001 [27:25<10:50:21,  8.12s/it][A
Train Diffusion:   4%|▍         | 199/5001 [27:34<11:01:35,  8.27s/it][A
Train Diffusion:   4%|▍         | 200/5001 [27:41<10:33:35,  7.92s/it][A


Moving average norm loss at <built-in function iter> iterations is: 47692.557421875. Best norm loss value is: 43205.9921875.

C_PATH mean = tensor([[21.1485,  0.1682,  1.3701],
        [21.2622,  0.1786,  1.2948],
        [21.2247,  0.1540,  1.3189]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[2.6849e+00, 9.1445e-01, 2.5714e+00],
         [9.7106e+00, 3.5456e-01, 7.2882e-01],
         [2.7924e+01, 6.8831e-02, 1.1027e+00],
         ...,
         [2.2891e+01, 1.1189e-01, 1.0608e+00],
         [2.4739e+01, 1.0818e-01, 1.0784e+00],
         [2.1252e+01, 1.2126e-02, 1.3549e+00]],

        [[4.3312e+00, 3.1212e-02, 3.7152e+00],
         [3.3952e+00, 3.4500e+00, 6.7437e-01],
         [9.3037e+00, 8.7510e+00, 5.0636e-01],
         ...,
         [1.9556e+01, 1.2838e-02, 1.3387e+00],
         [2.1532e+01, 8.2632e-02, 4.7698e-01],
         [2.2618e+01, 6.5285e-01, 1.4842e+00]],

        [[7.5000e+00, 5.9046e-02, 2.8154e+00],
         [2.0382e+01, 6.8669e-03, 1.8242e+00],
         [1.3091e+01, 


Train Diffusion:   4%|▍         | 201/5001 [27:48<10:30:32,  7.88s/it][A
Train Diffusion:   4%|▍         | 202/5001 [27:56<10:33:20,  7.92s/it][A
Train Diffusion:   4%|▍         | 203/5001 [28:04<10:35:17,  7.94s/it][A
Train Diffusion:   4%|▍         | 204/5001 [28:13<10:49:33,  8.12s/it][A
Train Diffusion:   4%|▍         | 205/5001 [28:22<11:06:46,  8.34s/it][A
Train Diffusion:   4%|▍         | 206/5001 [28:30<11:10:07,  8.39s/it][A
Train Diffusion:   4%|▍         | 207/5001 [28:39<11:16:12,  8.46s/it][A
Train Diffusion:   4%|▍         | 208/5001 [28:46<10:52:22,  8.17s/it][A
Train Diffusion:   4%|▍         | 209/5001 [28:55<11:01:50,  8.29s/it][A
Train Diffusion:   4%|▍         | 210/5001 [29:03<10:59:50,  8.26s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 43304775.777777776. Best ELBO loss value is: 33199604.0.

C_PATH mean = tensor([[19.2791,  0.2103,  1.4310],
        [19.1896,  0.2089,  1.4224],
        [19.2973,  0.1888,  1.4395]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[3.6257e+00, 7.6475e-02, 1.2488e+00],
         [5.0732e+00, 1.9941e-01, 8.0901e-01],
         [1.3729e+01, 1.7697e-01, 8.1320e+00],
         ...,
         [1.8595e+01, 3.4760e-03, 1.9232e-01],
         [2.3760e+01, 8.3453e-02, 8.5330e-01],
         [2.1848e+01, 7.9191e-02, 1.4035e+00]],

        [[3.7387e+00, 7.8751e-01, 1.5981e+00],
         [1.3755e+00, 4.6189e+00, 4.7291e-01],
         [1.3052e+01, 2.0597e+00, 1.9260e+00],
         ...,
         [2.1507e+01, 4.1213e-01, 1.5978e+00],
         [2.1451e+01, 1.9655e-01, 2.4350e+00],
         [1.8523e+01, 1.4992e-02, 1.2004e-01]],

        [[4.4560e+00, 1.3604e-01, 2.9175e+00],
         [1.9610e+01, 1.2192e-02, 6.1777e+00],
         [1.0925e+01, 


Train Diffusion:   4%|▍         | 211/5001 [29:12<11:02:35,  8.30s/it][A
Train Diffusion:   4%|▍         | 212/5001 [29:20<10:56:46,  8.23s/it][A
Train Diffusion:   4%|▍         | 213/5001 [29:28<10:49:33,  8.14s/it][A
Train Diffusion:   4%|▍         | 214/5001 [29:36<10:51:50,  8.17s/it][A
Train Diffusion:   4%|▍         | 215/5001 [29:45<11:14:29,  8.46s/it][A
Train Diffusion:   4%|▍         | 216/5001 [29:52<10:46:06,  8.10s/it][A
Train Diffusion:   4%|▍         | 217/5001 [29:59<10:24:00,  7.83s/it][A
Train Diffusion:   4%|▍         | 218/5001 [30:07<10:26:48,  7.86s/it][A
Train Diffusion:   4%|▍         | 219/5001 [30:15<10:26:43,  7.86s/it][A
Train Diffusion:   4%|▍         | 220/5001 [30:23<10:13:06,  7.69s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 28135700.4. Best ELBO loss value is: 24346608.0.

C_PATH mean = tensor([[18.0083,  0.2428,  1.5096],
        [17.9276,  0.2424,  1.4799],
        [18.1391,  0.2249,  1.4776]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[2.4422e+00, 8.4070e-02, 4.7201e-01],
         [1.2787e+00, 1.1540e-01, 1.1127e-01],
         [2.4915e+00, 1.3257e+00, 2.1055e-01],
         ...,
         [5.3426e+00, 9.0520e-01, 1.2393e+00],
         [2.6250e+01, 1.1357e+00, 2.8052e+00],
         [1.3546e+00, 5.5042e-01, 1.4223e+00]],

        [[4.5975e+00, 1.9878e-01, 7.2074e-01],
         [1.7044e+01, 3.1361e+00, 2.9526e+00],
         [4.2650e+00, 3.4550e+00, 4.8416e+00],
         ...,
         [2.5967e+01, 9.9823e-02, 1.6121e+00],
         [3.5395e+00, 3.8424e-04, 1.2172e-01],
         [1.2794e+01, 2.3532e-01, 3.0194e+00]],

        [[4.4189e+00, 1.5122e-01, 4.0457e-01],
         [8.4686e+00, 1.3090e-01, 1.5651e+00],
         [9.6385e-02, 3.5056e-


Train Diffusion:   4%|▍         | 221/5001 [30:30<10:06:19,  7.61s/it][A
Train Diffusion:   4%|▍         | 222/5001 [30:39<10:35:31,  7.98s/it][A
Train Diffusion:   4%|▍         | 223/5001 [30:48<10:59:09,  8.28s/it][A
Train Diffusion:   4%|▍         | 224/5001 [30:55<10:43:48,  8.09s/it][A
Train Diffusion:   4%|▍         | 225/5001 [31:03<10:37:26,  8.01s/it][A
Train Diffusion:   5%|▍         | 226/5001 [31:11<10:25:32,  7.86s/it][A
Train Diffusion:   5%|▍         | 227/5001 [31:20<10:48:19,  8.15s/it][A
Train Diffusion:   5%|▍         | 228/5001 [31:28<10:53:44,  8.22s/it][A
Train Diffusion:   5%|▍         | 229/5001 [31:35<10:34:20,  7.98s/it][A
Train Diffusion:   5%|▍         | 230/5001 [31:43<10:15:39,  7.74s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 26685096.6. Best ELBO loss value is: 18317102.0.

C_PATH mean = tensor([[17.9513,  0.1689,  1.5507],
        [17.9135,  0.1571,  1.5324],
        [18.2440,  0.1442,  1.5894]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.1703e+00, 4.2880e-01, 1.7082e-01],
         [5.5694e+00, 2.6340e+00, 6.4529e-01],
         [6.2274e+00, 9.3534e-01, 1.8384e+00],
         ...,
         [2.2045e-01, 9.2684e-03, 2.2200e+00],
         [2.5339e+01, 9.8452e-02, 1.5696e+00],
         [3.0360e+01, 5.4280e-02, 4.3495e+00]],

        [[2.1519e+00, 8.1597e-02, 5.7366e-01],
         [3.7466e+00, 1.7302e+00, 5.9138e-02],
         [8.3205e+00, 1.2312e-01, 1.5213e+00],
         ...,
         [2.7639e+01, 4.4871e-01, 2.1589e+00],
         [1.5760e+00, 4.4391e-01, 8.5409e-01],
         [5.5669e-01, 4.2278e+00, 5.3793e-02]],

        [[4.8998e+00, 1.6384e-01, 5.5813e-01],
         [1.2904e+01, 2.1470e-02, 6.0373e+00],
         [3.9701e+00, 9.7657e-


Train Diffusion:   5%|▍         | 231/5001 [31:52<10:52:56,  8.21s/it][A
Train Diffusion:   5%|▍         | 232/5001 [32:00<10:51:41,  8.20s/it][A
Train Diffusion:   5%|▍         | 233/5001 [32:08<10:46:16,  8.13s/it][A
Train Diffusion:   5%|▍         | 234/5001 [32:16<10:41:07,  8.07s/it][A
Train Diffusion:   5%|▍         | 235/5001 [32:23<10:27:41,  7.90s/it][A
Train Diffusion:   5%|▍         | 236/5001 [32:31<10:25:02,  7.87s/it][A
Train Diffusion:   5%|▍         | 237/5001 [32:40<10:40:27,  8.07s/it][A
Train Diffusion:   5%|▍         | 238/5001 [32:48<10:39:29,  8.06s/it][A
Train Diffusion:   5%|▍         | 239/5001 [32:55<10:25:55,  7.89s/it][A
Train Diffusion:   5%|▍         | 240/5001 [33:04<10:35:11,  8.00s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 18929880.1. Best ELBO loss value is: 16543021.0.

C_PATH mean = tensor([[17.8520,  0.1521,  1.3822],
        [18.1220,  0.1499,  1.4280],
        [17.7744,  0.1509,  1.4236]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.3286e+00, 2.5325e-01, 3.0367e-01],
         [1.2209e+01, 1.0006e+00, 1.0910e+00],
         [1.9421e+00, 1.2796e+00, 1.4858e+00],
         ...,
         [1.4074e+01, 2.2976e-01, 1.0410e+00],
         [2.2743e+00, 6.4440e-03, 9.5145e-01],
         [2.1476e+01, 1.1391e-01, 1.7759e+00]],

        [[2.0394e+00, 7.0600e-02, 5.4943e-01],
         [2.7097e+00, 2.2959e+00, 1.1888e-01],
         [7.3153e+00, 2.7874e-01, 9.0677e-01],
         ...,
         [2.0927e+01, 1.3283e-01, 1.6008e+00],
         [2.7579e+01, 9.1385e-02, 1.9463e+00],
         [1.0579e+01, 1.0448e-01, 1.1041e+00]],

        [[4.4609e+00, 1.5291e-01, 3.1502e-01],
         [8.7785e+00, 2.0014e-02, 4.2226e+00],
         [6.8805e-01, 4.2608e-


Train Diffusion:   5%|▍         | 241/5001 [33:12<10:50:44,  8.20s/it][A
Train Diffusion:   5%|▍         | 242/5001 [33:21<10:52:17,  8.22s/it][A
Train Diffusion:   5%|▍         | 243/5001 [33:29<11:08:15,  8.43s/it][A
Train Diffusion:   5%|▍         | 244/5001 [33:38<11:21:53,  8.60s/it][A
Train Diffusion:   5%|▍         | 245/5001 [33:48<11:40:54,  8.84s/it][A
Train Diffusion:   5%|▍         | 246/5001 [33:55<11:09:42,  8.45s/it][A
Train Diffusion:   5%|▍         | 247/5001 [34:03<10:52:53,  8.24s/it][A
Train Diffusion:   5%|▍         | 248/5001 [34:12<11:10:57,  8.47s/it][A
Train Diffusion:   5%|▍         | 249/5001 [34:22<11:35:58,  8.79s/it][A
Train Diffusion:   5%|▍         | 250/5001 [34:30<11:19:18,  8.58s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 13139548.6. Best ELBO loss value is: 9857564.0.

C_PATH mean = tensor([[17.7844,  0.1213,  1.0488],
        [17.8311,  0.1204,  1.0451],
        [17.5614,  0.1262,  1.0634]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[2.5929e+00, 1.2671e-01, 5.9082e-01],
         [4.1029e-01, 7.8331e-01, 1.1530e+00],
         [1.1492e-01, 1.0902e+00, 1.0996e+00],
         ...,
         [2.3251e+01, 8.3844e-02, 1.3662e+00],
         [2.3214e+01, 3.8357e-03, 4.1888e+00],
         [1.5746e+01, 1.3501e-01, 4.4722e-01]],

        [[2.5141e+00, 6.0532e-01, 4.3927e-01],
         [2.0383e+00, 3.7306e-01, 3.6429e-01],
         [2.7376e+01, 3.8222e-02, 1.1802e+00],
         ...,
         [1.1796e+01, 4.9200e-03, 4.6339e-01],
         [2.8962e+01, 7.3605e-02, 8.9608e-01],
         [2.7995e+01, 8.7303e-03, 2.6979e+00]],

        [[4.4840e+00, 1.5070e-01, 8.4318e-01],
         [1.9461e+01, 6.6236e-02, 3.4742e+00],
         [5.4261e-01, 1.3555e-0


Train Diffusion:   5%|▌         | 251/5001 [34:39<11:28:44,  8.70s/it][A
Train Diffusion:   5%|▌         | 252/5001 [34:47<11:10:25,  8.47s/it][A
Train Diffusion:   5%|▌         | 253/5001 [34:54<10:54:39,  8.27s/it][A
Train Diffusion:   5%|▌         | 254/5001 [35:02<10:44:07,  8.14s/it][A
Train Diffusion:   5%|▌         | 255/5001 [35:10<10:44:30,  8.15s/it][A
Train Diffusion:   5%|▌         | 256/5001 [35:19<11:04:30,  8.40s/it][A
Train Diffusion:   5%|▌         | 257/5001 [35:29<11:30:11,  8.73s/it][A
Train Diffusion:   5%|▌         | 258/5001 [35:38<11:36:53,  8.82s/it][A
Train Diffusion:   5%|▌         | 259/5001 [35:45<11:04:40,  8.41s/it][A
Train Diffusion:   5%|▌         | 260/5001 [35:53<10:43:48,  8.15s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 11478312.8. Best ELBO loss value is: 8856316.0.

C_PATH mean = tensor([[17.6494,  0.1174,  0.9181],
        [17.6467,  0.1256,  0.9126],
        [17.7322,  0.1118,  0.9121]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[2.8238e+00, 5.3558e-01, 3.9050e-01],
         [3.5718e-01, 7.7741e-01, 2.7340e-01],
         [2.8312e-01, 1.8548e+00, 5.1329e-01],
         ...,
         [1.7109e+01, 1.2958e-03, 6.1647e-01],
         [2.9613e+01, 4.0164e-02, 9.7366e-01],
         [2.5250e+01, 1.6919e-02, 1.1630e+00]],

        [[2.4270e+00, 1.0713e-01, 6.1695e-01],
         [6.8737e-01, 5.3558e-01, 7.0122e-01],
         [2.6575e+00, 8.5858e-01, 7.4608e-01],
         ...,
         [1.8040e+01, 4.0800e-02, 2.6730e+00],
         [1.6232e+01, 6.0043e-02, 8.0736e-01],
         [1.5605e+01, 3.4511e-02, 4.6359e-01]],

        [[4.1923e+00, 1.4838e-01, 8.9681e-01],
         [2.0138e+01, 2.0524e-01, 1.2981e+00],
         [1.7971e+00, 2.5203e+0


Train Diffusion:   5%|▌         | 261/5001 [36:01<10:34:34,  8.03s/it][A
Train Diffusion:   5%|▌         | 262/5001 [36:09<10:31:58,  8.00s/it][A
Train Diffusion:   5%|▌         | 263/5001 [36:16<10:17:00,  7.81s/it][A
Train Diffusion:   5%|▌         | 264/5001 [36:25<10:52:30,  8.26s/it][A
Train Diffusion:   5%|▌         | 265/5001 [36:34<10:51:40,  8.26s/it][A
Train Diffusion:   5%|▌         | 266/5001 [36:41<10:29:45,  7.98s/it][A
Train Diffusion:   5%|▌         | 267/5001 [36:49<10:28:04,  7.96s/it][A
Train Diffusion:   5%|▌         | 268/5001 [36:56<10:16:14,  7.81s/it][A
Train Diffusion:   5%|▌         | 269/5001 [37:04<10:14:23,  7.79s/it][A
Train Diffusion:   5%|▌         | 270/5001 [37:13<10:40:10,  8.12s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 9649660.65. Best ELBO loss value is: 8109782.5.

C_PATH mean = tensor([[17.5323,  0.0997,  0.8654],
        [17.8017,  0.1001,  0.8544],
        [17.5941,  0.0894,  0.8733]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[2.1998e+00, 5.3461e-02, 7.0088e-01],
         [4.6973e+00, 2.4069e-01, 6.8071e-01],
         [3.1297e+01, 4.2675e-02, 1.2900e+00],
         ...,
         [1.7985e+00, 8.7492e-01, 3.6098e-01],
         [1.6021e+01, 1.0181e-03, 5.8557e-01],
         [2.5992e+01, 4.4100e-02, 1.3517e+00]],

        [[3.2774e+00, 1.9455e-01, 1.6131e-01],
         [2.0179e+00, 3.7508e-01, 1.8040e+00],
         [8.3799e-02, 4.1347e-01, 1.9369e+00],
         ...,
         [2.3593e+01, 4.1261e-04, 6.3636e-01],
         [2.6533e+01, 4.6134e-02, 8.4738e-01],
         [1.8307e+01, 7.3049e-02, 1.0886e+00]],

        [[3.7609e+00, 1.4249e-01, 6.4094e-01],
         [1.3572e+01, 3.3454e-02, 5.2911e+00],
         [5.7686e+00, 2.6985e-0


Train Diffusion:   5%|▌         | 271/5001 [37:21<10:42:07,  8.15s/it][A
Train Diffusion:   5%|▌         | 272/5001 [37:30<10:48:39,  8.23s/it][A
Train Diffusion:   5%|▌         | 273/5001 [37:38<10:57:46,  8.35s/it][A
Train Diffusion:   5%|▌         | 274/5001 [37:47<11:11:47,  8.53s/it][A
Train Diffusion:   5%|▌         | 275/5001 [37:55<11:00:53,  8.39s/it][A
Train Diffusion:   6%|▌         | 276/5001 [38:03<10:48:39,  8.24s/it][A
Train Diffusion:   6%|▌         | 277/5001 [38:11<10:42:01,  8.15s/it][A
Train Diffusion:   6%|▌         | 278/5001 [38:19<10:46:58,  8.22s/it][A
Train Diffusion:   6%|▌         | 279/5001 [38:29<11:12:13,  8.54s/it][A
Train Diffusion:   6%|▌         | 280/5001 [38:37<10:57:45,  8.36s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 10354200.1. Best ELBO loss value is: 7814746.0.

C_PATH mean = tensor([[17.3490,  0.1114,  0.8390],
        [16.9208,  0.1192,  0.7832],
        [16.7434,  0.1166,  0.8188]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[2.2708e+00, 8.1379e-02, 8.6293e-01],
         [3.3336e+00, 2.3857e-01, 5.5921e-01],
         [2.6965e+01, 3.9335e-02, 1.0244e+00],
         ...,
         [1.9420e+01, 7.4655e-02, 1.2640e+00],
         [2.4279e+01, 5.4134e-02, 8.0306e-01],
         [1.5896e+01, 3.3319e-02, 1.2254e+00]],

        [[3.7777e+00, 1.8427e-01, 6.2767e-01],
         [1.4343e+01, 2.8326e-02, 6.3611e+00],
         [7.8072e+00, 3.9797e-02, 3.4917e+00],
         ...,
         [1.9248e+01, 5.3704e-02, 6.4606e-01],
         [9.0635e+00, 3.9828e-03, 5.9382e-01],
         [2.4376e+01, 4.7805e-02, 1.0586e+00]],

        [[3.0258e+00, 3.0125e-01, 1.6954e-01],
         [9.9937e-01, 2.5861e-01, 3.0475e-01],
         [4.8229e-01, 5.6196e-0


Train Diffusion:   6%|▌         | 281/5001 [38:45<10:47:45,  8.23s/it][A
Train Diffusion:   6%|▌         | 282/5001 [38:53<10:40:35,  8.14s/it][A
Train Diffusion:   6%|▌         | 283/5001 [39:01<10:52:44,  8.30s/it][A
Train Diffusion:   6%|▌         | 284/5001 [39:10<11:00:29,  8.40s/it][A
Train Diffusion:   6%|▌         | 285/5001 [39:18<10:57:51,  8.37s/it][A
Train Diffusion:   6%|▌         | 286/5001 [39:26<10:54:41,  8.33s/it][A
Train Diffusion:   6%|▌         | 287/5001 [39:34<10:37:11,  8.11s/it][A
Train Diffusion:   6%|▌         | 288/5001 [39:42<10:26:17,  7.97s/it][A
Train Diffusion:   6%|▌         | 289/5001 [39:49<10:22:15,  7.92s/it][A
Train Diffusion:   6%|▌         | 290/5001 [39:58<10:32:49,  8.06s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 8446026.45. Best ELBO loss value is: 7181231.0.

C_PATH mean = tensor([[16.9414,  0.1062,  0.8055],
        [16.9470,  0.1015,  0.8137],
        [16.8867,  0.1072,  0.8305]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[2.2460e+00, 7.5487e-02, 8.5689e-01],
         [3.5839e+00, 1.7920e-01, 6.5313e-01],
         [2.9816e+01, 3.7835e-02, 1.0036e+00],
         ...,
         [1.3219e+01, 1.5770e-02, 5.9808e-01],
         [6.7181e+00, 2.1281e-01, 7.8778e-01],
         [2.8696e+01, 7.1061e-03, 3.6297e-01]],

        [[3.4789e+00, 1.9664e-01, 5.5526e-01],
         [1.4358e+01, 5.5180e-02, 3.1544e+00],
         [9.6610e-01, 1.7350e-01, 2.8531e+00],
         ...,
         [2.1338e+01, 1.1917e-01, 8.4555e-01],
         [1.5060e+01, 1.0955e-01, 4.7812e-01],
         [2.9377e+00, 2.7407e-01, 5.0556e-01]],

        [[2.9827e+00, 2.5313e-01, 1.7007e-01],
         [2.9676e-01, 6.7634e-01, 1.5648e+00],
         [2.2902e-01, 7.3311e-0


Train Diffusion:   6%|▌         | 291/5001 [40:06<10:45:18,  8.22s/it][A
Train Diffusion:   6%|▌         | 292/5001 [40:15<10:56:55,  8.37s/it][A
Train Diffusion:   6%|▌         | 293/5001 [40:22<10:30:53,  8.04s/it][A
Train Diffusion:   6%|▌         | 294/5001 [40:37<13:06:46, 10.03s/it][A
Train Diffusion:   6%|▌         | 295/5001 [40:47<13:09:17, 10.06s/it][A
Train Diffusion:   6%|▌         | 296/5001 [40:56<12:39:54,  9.69s/it][A
Train Diffusion:   6%|▌         | 297/5001 [41:05<12:11:35,  9.33s/it][A
Train Diffusion:   6%|▌         | 298/5001 [41:14<12:03:35,  9.23s/it][A
Train Diffusion:   6%|▌         | 299/5001 [41:22<11:53:29,  9.10s/it][A
Train Diffusion:   6%|▌         | 300/5001 [41:33<12:29:58,  9.57s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 10735585.7. Best ELBO loss value is: 7181231.0.

C_PATH mean = tensor([[15.8762,  0.1433,  0.7567],
        [16.0465,  0.1425,  0.7519],
        [16.1674,  0.1434,  0.7738]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[3.2553e+00, 1.7361e-01, 4.3934e-01],
         [1.3505e+01, 7.6154e-02, 2.6022e+00],
         [3.3625e-01, 3.9830e-02, 2.5482e+00],
         ...,
         [1.1189e+01, 2.1252e-01, 7.1207e-01],
         [3.2829e+01, 8.2550e-01, 1.1080e+00],
         [2.4476e+00, 3.6338e-01, 3.7947e-01]],

        [[2.7642e+00, 1.8606e-01, 1.3504e-01],
         [2.6886e-01, 2.9706e-01, 1.8445e+00],
         [3.7498e-01, 3.9900e-01, 5.8638e+00],
         ...,
         [2.7190e+01, 4.6457e-02, 6.8694e-01],
         [5.6300e+00, 3.2080e-02, 9.0453e-01],
         [4.0676e+00, 4.5803e-01, 2.7088e+00]],

        [[1.8697e+00, 5.3851e-02, 7.3652e-01],
         [3.3879e+00, 2.1025e-01, 5.9329e-01],
         [2.7243e+01, 3.3471e-0


Train Diffusion:   6%|▌         | 301/5001 [41:42<12:23:59,  9.50s/it][A
Train Diffusion:   6%|▌         | 302/5001 [41:52<12:32:19,  9.61s/it][A
Train Diffusion:   6%|▌         | 303/5001 [42:01<12:15:22,  9.39s/it][A
Train Diffusion:   6%|▌         | 304/5001 [42:09<11:39:44,  8.94s/it][A
Train Diffusion:   6%|▌         | 305/5001 [42:18<11:38:05,  8.92s/it][A
Train Diffusion:   6%|▌         | 306/5001 [42:26<11:15:16,  8.63s/it][A
Train Diffusion:   6%|▌         | 307/5001 [42:34<11:06:10,  8.52s/it][A
Train Diffusion:   6%|▌         | 308/5001 [42:44<11:36:54,  8.91s/it][A
Train Diffusion:   6%|▌         | 309/5001 [42:54<11:55:10,  9.15s/it][A
Train Diffusion:   6%|▌         | 310/5001 [43:06<13:08:16, 10.08s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 9189234.35. Best ELBO loss value is: 7181231.0.

C_PATH mean = tensor([[16.0364,  0.1102,  0.8483],
        [15.9511,  0.1135,  0.8327],
        [15.8644,  0.1085,  0.8351]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 2.2599,  0.1045,  0.8181],
         [ 2.0140,  0.3917,  0.1789],
         [13.4841,  0.0645,  0.7531],
         ...,
         [26.6035,  0.0451,  0.6262],
         [10.5356,  0.0454,  2.0889],
         [ 0.2370,  0.3785,  0.3262]],

        [[ 3.6171,  0.2340,  0.5979],
         [13.3405,  0.0356,  5.7119],
         [ 7.5111,  0.0467,  2.9013],
         ...,
         [ 0.2739,  0.0509,  0.4124],
         [ 1.6860,  0.2309,  0.8203],
         [ 3.5906,  0.4080,  1.5485]],

        [[ 2.9602,  0.2804,  0.2018],
         [ 0.9143,  0.3038,  0.2332],
         [ 2.8750,  0.4142,  0.2019],
         ...,
         [13.4416,  0.1283,  0.8347],
         [25.4313,  0.3856,  0.8522],
         [ 1.1744,  0.1675,  


Train Diffusion:   6%|▌         | 311/5001 [43:21<15:02:28, 11.55s/it][A
Train Diffusion:   6%|▌         | 312/5001 [43:33<15:21:49, 11.80s/it][A
Train Diffusion:   6%|▋         | 313/5001 [43:46<15:38:17, 12.01s/it][A
Train Diffusion:   6%|▋         | 314/5001 [43:55<14:39:36, 11.26s/it][A
Train Diffusion:   6%|▋         | 315/5001 [44:03<13:12:21, 10.15s/it][A
Train Diffusion:   6%|▋         | 316/5001 [44:10<12:15:11,  9.42s/it][A
Train Diffusion:   6%|▋         | 317/5001 [44:21<12:35:30,  9.68s/it][A
Train Diffusion:   6%|▋         | 318/5001 [44:30<12:34:16,  9.66s/it][A
Train Diffusion:   6%|▋         | 319/5001 [44:44<14:05:20, 10.83s/it][A
Train Diffusion:   6%|▋         | 320/5001 [44:55<14:19:13, 11.01s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 9054518.3. Best ELBO loss value is: 7181231.0.

C_PATH mean = tensor([[15.3575,  0.1312,  0.8556],
        [15.5802,  0.1255,  0.8632],
        [15.5558,  0.1252,  0.8375]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[2.7972e+00, 4.1487e-01, 3.2385e-01],
         [2.3839e-01, 1.1606e+00, 9.1558e-02],
         [3.0876e+00, 2.2519e-01, 3.8816e-01],
         ...,
         [1.4915e+01, 1.0975e-02, 7.2592e-01],
         [2.6790e+01, 4.4341e-02, 9.3950e-01],
         [2.3144e+01, 3.3855e-02, 5.5258e-01]],

        [[2.3016e+00, 1.6053e-01, 8.7334e-01],
         [8.6423e-01, 5.7377e-01, 1.1201e+00],
         [7.3518e-01, 7.8944e-01, 1.5562e+00],
         ...,
         [1.7660e+01, 6.1840e-02, 7.1291e-01],
         [6.6313e+00, 1.1158e-01, 3.1756e-01],
         [1.4641e+01, 8.7929e-02, 4.7363e-01]],

        [[3.7498e+00, 2.2860e-01, 5.0148e-01],
         [1.3585e+01, 1.8534e-01, 1.0613e+00],
         [6.8030e-01, 1.0374e+00


Train Diffusion:   6%|▋         | 321/5001 [45:06<14:21:28, 11.04s/it][A
Train Diffusion:   6%|▋         | 322/5001 [45:16<13:51:25, 10.66s/it][A
Train Diffusion:   6%|▋         | 323/5001 [45:27<13:44:58, 10.58s/it][A
Train Diffusion:   6%|▋         | 324/5001 [45:37<13:42:37, 10.55s/it][A
Train Diffusion:   6%|▋         | 325/5001 [45:46<13:00:47, 10.02s/it][A
Train Diffusion:   7%|▋         | 326/5001 [45:54<12:06:47,  9.33s/it][A
Train Diffusion:   7%|▋         | 327/5001 [46:01<11:17:14,  8.69s/it][A
Train Diffusion:   7%|▋         | 328/5001 [46:08<10:44:21,  8.27s/it][A
Train Diffusion:   7%|▋         | 329/5001 [46:24<13:40:29, 10.54s/it][A
Train Diffusion:   7%|▋         | 330/5001 [46:38<15:00:03, 11.56s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 6946483.95. Best ELBO loss value is: 5686509.0.

C_PATH mean = tensor([[14.9415,  0.1469,  0.8178],
        [14.7044,  0.1399,  0.8333],
        [14.8624,  0.1477,  0.8285]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[3.4796e+00, 2.7982e-01, 6.0795e-01],
         [1.2460e+01, 2.4704e-01, 7.7412e-01],
         [1.0011e+00, 8.9196e-01, 1.0053e+00],
         ...,
         [7.0245e-01, 7.3738e-01, 3.0118e-01],
         [2.4135e-01, 9.0720e-03, 2.6835e-01],
         [1.1202e+01, 2.0554e-01, 2.5006e+00]],

        [[2.7711e+00, 2.4537e-01, 1.8984e-01],
         [2.3073e-01, 1.8961e-01, 1.4877e+00],
         [1.2345e-01, 4.4113e-01, 6.9502e-01],
         ...,
         [2.3530e+01, 1.2784e-02, 6.6727e-01],
         [2.6693e+01, 5.8580e-02, 9.9262e-01],
         [2.4671e+01, 4.5322e-02, 1.6529e+00]],

        [[2.1440e+00, 2.0332e-01, 8.2833e-01],
         [6.8515e-01, 9.2620e-01, 3.5665e-01],
         [1.5476e+00, 5.6623e-0


Train Diffusion:   7%|▋         | 331/5001 [46:53<16:11:20, 12.48s/it][A
Train Diffusion:   7%|▋         | 332/5001 [47:06<16:40:34, 12.86s/it][A
Train Diffusion:   7%|▋         | 333/5001 [47:17<15:56:28, 12.29s/it][A
Train Diffusion:   7%|▋         | 334/5001 [47:30<16:04:30, 12.40s/it][A
Train Diffusion:   7%|▋         | 335/5001 [47:42<16:03:24, 12.39s/it][A
Train Diffusion:   7%|▋         | 336/5001 [47:55<16:10:02, 12.48s/it][A
Train Diffusion:   7%|▋         | 337/5001 [48:07<15:55:20, 12.29s/it][A
Train Diffusion:   7%|▋         | 338/5001 [48:23<17:34:25, 13.57s/it][A
Train Diffusion:   7%|▋         | 339/5001 [48:38<18:09:22, 14.02s/it][A
Train Diffusion:   7%|▋         | 340/5001 [48:51<17:38:21, 13.62s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 7265545.6. Best ELBO loss value is: 5686509.0.

C_PATH mean = tensor([[14.2522,  0.2262,  0.8761],
        [14.3664,  0.2215,  0.8640],
        [14.2932,  0.2133,  0.8821]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[2.6783e+00, 4.4440e-01, 1.6612e+00],
         [1.6296e+00, 2.4271e-01, 2.3072e-01],
         [1.8097e+01, 1.8697e-01, 7.9929e-01],
         ...,
         [1.9649e+01, 7.6716e-02, 7.9926e-01],
         [2.0941e+01, 9.5566e-03, 1.4201e+00],
         [1.8745e+01, 1.1695e-01, 8.8114e-01]],

        [[3.4997e+00, 4.4939e-01, 6.6103e-01],
         [1.2940e+01, 3.1644e-01, 1.5588e+00],
         [2.0771e-01, 1.4149e-01, 1.5861e+00],
         ...,
         [7.6494e+00, 4.2533e-02, 8.2821e-01],
         [2.2951e+01, 1.3202e-01, 4.5178e-01],
         [2.7463e+01, 2.1701e-02, 6.7569e-01]],

        [[3.1049e+00, 3.6389e-01, 3.6615e-01],
         [2.3885e-01, 5.2138e-01, 1.8402e+00],
         [2.6700e-01, 4.9126e-01


Train Diffusion:   7%|▋         | 341/5001 [49:04<17:15:37, 13.33s/it][A
Train Diffusion:   7%|▋         | 342/5001 [49:15<16:32:50, 12.79s/it][A
Train Diffusion:   7%|▋         | 343/5001 [49:25<15:26:59, 11.94s/it][A
Train Diffusion:   7%|▋         | 344/5001 [49:36<14:48:09, 11.44s/it][A
Train Diffusion:   7%|▋         | 345/5001 [49:46<14:30:53, 11.22s/it][A
Train Diffusion:   7%|▋         | 346/5001 [49:56<14:02:11, 10.86s/it][A
Train Diffusion:   7%|▋         | 347/5001 [50:07<14:10:04, 10.96s/it][A
Train Diffusion:   7%|▋         | 348/5001 [50:22<15:24:07, 11.92s/it][A
Train Diffusion:   7%|▋         | 349/5001 [50:37<16:36:24, 12.85s/it][A
Train Diffusion:   7%|▋         | 350/5001 [50:53<17:48:21, 13.78s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 8576050.3. Best ELBO loss value is: 5686509.0.

C_PATH mean = tensor([[15.2106,  0.1225,  0.8668],
        [15.1580,  0.1245,  0.8955],
        [15.1621,  0.1266,  0.9108]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[3.2143e+00, 4.4558e-01, 3.5773e-01],
         [3.8736e+00, 5.3892e-01, 1.7706e+00],
         [5.1651e-01, 6.7562e-01, 1.8490e+00],
         ...,
         [9.8014e+00, 3.0969e-02, 1.0776e+00],
         [2.4028e+01, 1.2300e-02, 1.8790e+00],
         [1.1990e+01, 3.8323e-02, 7.6624e-01]],

        [[3.5295e+00, 3.2162e-01, 4.1325e-01],
         [9.4553e+00, 1.1200e-01, 3.4005e+00],
         [6.0408e-01, 7.1606e-01, 1.4224e+00],
         ...,
         [5.9605e-01, 1.2918e+00, 6.5019e-01],
         [3.6092e-01, 3.6526e-01, 6.5904e-01],
         [2.0372e+00, 8.3427e-02, 1.1439e+00]],

        [[2.0543e+00, 2.9230e-01, 4.1863e-01],
         [3.1546e+00, 1.4073e-01, 4.0136e-01],
         [2.6752e+01, 2.2838e-02


Train Diffusion:   7%|▋         | 351/5001 [51:02<16:12:43, 12.55s/it][A
Train Diffusion:   7%|▋         | 352/5001 [51:10<14:29:38, 11.22s/it][A
Train Diffusion:   7%|▋         | 353/5001 [51:18<13:04:51, 10.13s/it][A
Train Diffusion:   7%|▋         | 354/5001 [51:26<12:08:02,  9.40s/it][A
Train Diffusion:   7%|▋         | 355/5001 [51:34<11:40:51,  9.05s/it][A
Train Diffusion:   7%|▋         | 356/5001 [51:44<12:16:51,  9.52s/it][A
Train Diffusion:   7%|▋         | 357/5001 [51:57<13:14:48, 10.27s/it][A
Train Diffusion:   7%|▋         | 358/5001 [52:10<14:39:11, 11.36s/it][A
Train Diffusion:   7%|▋         | 359/5001 [52:23<15:09:41, 11.76s/it][A
Train Diffusion:   7%|▋         | 360/5001 [52:37<16:06:18, 12.49s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 7359668.5. Best ELBO loss value is: 5686509.0.

C_PATH mean = tensor([[14.0418,  0.1303,  1.1855],
        [14.0465,  0.1260,  1.1862],
        [14.0013,  0.1280,  1.1994]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.4590e+00, 2.6238e-01, 5.1767e-01],
         [1.2656e+01, 2.6223e-01, 2.3287e+00],
         [1.6734e+00, 1.1482e+00, 2.9239e+00],
         ...,
         [1.3994e+01, 2.8199e-01, 7.7773e-01],
         [2.0100e+00, 3.2345e-01, 5.7751e-01],
         [4.4848e-01, 3.8977e-01, 4.0679e-01]],

        [[2.0254e+00, 2.7684e-01, 5.5013e-01],
         [9.9190e-01, 3.3322e-01, 6.6665e-01],
         [2.6702e+00, 2.9034e-01, 8.6728e-01],
         ...,
         [4.0363e+00, 3.9523e-02, 1.2391e+00],
         [2.5386e+01, 3.1767e-02, 1.0300e+00],
         [2.5567e+01, 1.4189e-02, 1.2284e+00]],

        [[2.9023e+00, 4.3423e-01, 3.3947e-01],
         [2.5635e-01, 7.1864e-01, 1.8490e-01],
         [4.2653e-02, 4.6395e-01


Train Diffusion:   7%|▋         | 361/5001 [52:49<15:38:26, 12.14s/it][A
Train Diffusion:   7%|▋         | 362/5001 [53:02<15:55:44, 12.36s/it][A
Train Diffusion:   7%|▋         | 363/5001 [53:13<15:37:31, 12.13s/it][A
Train Diffusion:   7%|▋         | 364/5001 [53:26<16:02:39, 12.46s/it][A
Train Diffusion:   7%|▋         | 365/5001 [53:38<15:46:35, 12.25s/it][A
Train Diffusion:   7%|▋         | 366/5001 [53:56<17:51:51, 13.88s/it][A
Train Diffusion:   7%|▋         | 367/5001 [54:08<17:12:04, 13.36s/it][A
Train Diffusion:   7%|▋         | 368/5001 [54:17<15:29:52, 12.04s/it][A
Train Diffusion:   7%|▋         | 369/5001 [54:25<14:07:57, 10.98s/it][A
Train Diffusion:   7%|▋         | 370/5001 [54:36<13:47:53, 10.73s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 6312947.6. Best ELBO loss value is: 5408053.5.

C_PATH mean = tensor([[14.0906,  0.1057,  0.7560],
        [13.8204,  0.1150,  0.7559],
        [13.9227,  0.1143,  0.7630]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[2.7449e+00, 1.8919e-01, 2.9070e-01],
         [4.5944e-01, 5.3951e-01, 9.1350e-01],
         [4.9002e-01, 9.7796e-01, 6.1764e-01],
         ...,
         [3.5195e+00, 4.0470e-01, 1.1285e+00],
         [1.9090e+01, 1.8352e-03, 7.3643e-01],
         [2.2579e+01, 3.4106e-02, 1.4704e+00]],

        [[2.3451e+00, 7.4441e-01, 2.5377e-01],
         [4.1585e-01, 2.3644e+00, 6.4399e-02],
         [2.5002e+00, 2.6886e-01, 4.4486e-01],
         ...,
         [4.7013e-01, 4.5815e-02, 8.3362e-01],
         [2.3575e+01, 5.3534e-02, 9.3494e-01],
         [2.1683e+01, 1.4846e-02, 1.2874e+00]],

        [[4.3838e+00, 2.1638e-01, 3.4414e-01],
         [1.3942e+01, 1.2608e-01, 1.4331e+00],
         [1.3868e-01, 1.5745e-01


Train Diffusion:   7%|▋         | 371/5001 [54:44<13:05:53, 10.18s/it][A
Train Diffusion:   7%|▋         | 372/5001 [54:54<13:02:21, 10.14s/it][A
Train Diffusion:   7%|▋         | 373/5001 [55:07<14:03:59, 10.94s/it][A
Train Diffusion:   7%|▋         | 374/5001 [55:18<13:55:11, 10.83s/it][A
Train Diffusion:   7%|▋         | 375/5001 [55:28<13:44:39, 10.70s/it][A
Train Diffusion:   8%|▊         | 376/5001 [55:38<13:16:35, 10.33s/it][A
Train Diffusion:   8%|▊         | 377/5001 [55:47<12:48:20,  9.97s/it][A
Train Diffusion:   8%|▊         | 378/5001 [55:55<12:04:21,  9.40s/it][A
Train Diffusion:   8%|▊         | 379/5001 [56:03<11:41:05,  9.10s/it][A
Train Diffusion:   8%|▊         | 380/5001 [56:14<12:07:47,  9.45s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 4819792.75. Best ELBO loss value is: 3594019.0.

C_PATH mean = tensor([[13.6798,  0.0996,  0.4822],
        [13.8162,  0.1014,  0.4886],
        [13.6700,  0.1007,  0.4812]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[4.3101e+00, 2.2309e-01, 2.6946e-01],
         [9.6143e+00, 6.5946e-02, 2.4241e+00],
         [1.3083e-01, 6.6847e-02, 9.7337e-01],
         ...,
         [6.8823e-01, 2.0054e-01, 5.4256e-01],
         [1.0959e+00, 2.4092e-01, 4.0574e-01],
         [1.0728e+01, 1.1825e-01, 1.1901e+00]],

        [[1.9733e+00, 1.5003e-01, 4.4261e-01],
         [5.2920e-01, 1.1497e+00, 4.7481e-01],
         [1.3281e+00, 5.2237e-01, 2.4984e-01],
         ...,
         [1.6813e+01, 9.9786e-02, 6.4618e-01],
         [2.0559e+01, 2.7319e-03, 9.7330e-01],
         [2.0668e+01, 4.3499e-02, 1.1330e+00]],

        [[2.7201e+00, 4.2206e-01, 2.1012e-01],
         [7.2748e-01, 4.1081e-01, 1.6572e-01],
         [7.5504e+00, 1.0701e-0


Train Diffusion:   8%|▊         | 381/5001 [56:25<12:43:06,  9.91s/it][A
Train Diffusion:   8%|▊         | 382/5001 [56:36<13:27:44, 10.49s/it][A
Train Diffusion:   8%|▊         | 383/5001 [56:48<13:56:42, 10.87s/it][A
Train Diffusion:   8%|▊         | 384/5001 [56:56<12:57:21, 10.10s/it][A
Train Diffusion:   8%|▊         | 385/5001 [57:04<12:04:40,  9.42s/it][A
Train Diffusion:   8%|▊         | 386/5001 [57:12<11:16:33,  8.80s/it][A
Train Diffusion:   8%|▊         | 387/5001 [57:19<10:42:59,  8.36s/it][A
Train Diffusion:   8%|▊         | 388/5001 [57:26<10:19:21,  8.06s/it][A
Train Diffusion:   8%|▊         | 389/5001 [57:34<10:07:49,  7.91s/it][A
Train Diffusion:   8%|▊         | 390/5001 [57:41<9:55:31,  7.75s/it] [A


Moving average ELBO loss at <built-in function iter> iterations is: 3381011.5. Best ELBO loss value is: 2930815.0.

C_PATH mean = tensor([[12.7015,  0.1186,  0.5001],
        [13.1638,  0.1144,  0.5107],
        [12.8562,  0.1195,  0.5081]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.6459e+00, 2.3865e-01, 5.0582e-01],
         [2.0433e+00, 1.7419e+00, 1.6997e-01],
         [4.2330e+00, 5.0565e-01, 3.0199e-01],
         ...,
         [1.3904e+01, 1.3793e-01, 4.0601e-01],
         [1.2548e+01, 4.1488e-02, 2.9177e-01],
         [8.8091e+00, 1.0892e-01, 6.1591e-01]],

        [[3.2301e+00, 4.2276e-01, 3.2881e-01],
         [4.5068e+00, 8.1336e-01, 6.7339e-01],
         [6.2823e-01, 5.7771e-01, 8.9048e-01],
         ...,
         [1.6602e+01, 4.7840e-02, 6.8865e-01],
         [2.0575e+01, 5.7732e-03, 8.0906e-01],
         [1.8305e+01, 5.3544e-02, 5.5349e-01]],

        [[3.3437e+00, 3.0985e-01, 3.1578e-01],
         [2.1764e+00, 1.0401e-01, 2.3598e+00],
         [3.4699e-01, 2.5564e-01


Train Diffusion:   8%|▊         | 391/5001 [57:49<9:47:40,  7.65s/it][A
Train Diffusion:   8%|▊         | 392/5001 [57:56<9:40:12,  7.55s/it][A
Train Diffusion:   8%|▊         | 393/5001 [58:03<9:34:07,  7.48s/it][A
Train Diffusion:   8%|▊         | 394/5001 [58:12<10:06:26,  7.90s/it][A
Train Diffusion:   8%|▊         | 395/5001 [58:21<10:31:58,  8.23s/it][A
Train Diffusion:   8%|▊         | 396/5001 [58:31<11:15:11,  8.80s/it][A
Train Diffusion:   8%|▊         | 397/5001 [58:41<11:39:36,  9.12s/it][A
Train Diffusion:   8%|▊         | 398/5001 [58:57<14:20:16, 11.21s/it][A
Train Diffusion:   8%|▊         | 399/5001 [59:06<13:28:42, 10.54s/it][A
Train Diffusion:   8%|▊         | 400/5001 [59:16<13:02:42, 10.21s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 3367112.35. Best ELBO loss value is: 2901916.0.

C_PATH mean = tensor([[11.6782,  0.1268,  0.6088],
        [11.6270,  0.1322,  0.6130],
        [11.4276,  0.1305,  0.6051]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.4627,  0.2670,  0.6027],
         [ 0.9096,  0.3996,  0.1723],
         [ 6.0408,  0.1845,  0.3041],
         ...,
         [11.8660,  0.1285,  0.5840],
         [ 3.9140,  0.1833,  0.4235],
         [21.2896,  0.0521,  1.0508]],

        [[ 2.8570,  0.1997,  0.3673],
         [ 8.0859,  0.3058,  0.5750],
         [ 0.2525,  0.1877,  1.2226],
         ...,
         [ 3.0452,  0.1359,  0.7957],
         [ 8.6615,  0.0815,  0.4450],
         [ 1.6619,  0.1070,  0.4488]],

        [[ 2.1124,  0.3224,  0.1853],
         [ 0.1270,  0.1717,  1.0175],
         [ 0.1798,  0.3412,  1.0410],
         ...,
         [14.8737,  0.5903,  0.7925],
         [ 1.0829,  0.1575,  1.3909],
         [15.8400,  0.0473,  


Train Diffusion:   8%|▊         | 401/5001 [59:25<12:35:08,  9.85s/it][A
Train Diffusion:   8%|▊         | 402/5001 [59:33<11:53:17,  9.31s/it][A
Train Diffusion:   8%|▊         | 403/5001 [59:41<11:32:11,  9.03s/it][A
Train Diffusion:   8%|▊         | 404/5001 [59:50<11:28:14,  8.98s/it][A
Train Diffusion:   8%|▊         | 405/5001 [59:59<11:16:33,  8.83s/it][A
Train Diffusion:   8%|▊         | 406/5001 [1:00:09<12:01:56,  9.43s/it][A
Train Diffusion:   8%|▊         | 407/5001 [1:00:20<12:33:34,  9.84s/it][A
Train Diffusion:   8%|▊         | 408/5001 [1:00:31<12:57:20, 10.15s/it][A
Train Diffusion:   8%|▊         | 409/5001 [1:00:44<14:03:33, 11.02s/it][A
Train Diffusion:   8%|▊         | 410/5001 [1:00:52<12:52:31, 10.10s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 2785686.4. Best ELBO loss value is: 2310129.5.

C_PATH mean = tensor([[10.7372,  0.1397,  0.5381],
        [10.8103,  0.1379,  0.5356],
        [10.8834,  0.1366,  0.5492]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.5244e+00, 2.1497e-01, 6.7618e-01],
         [1.0767e+00, 1.4323e-01, 2.0901e-01],
         [1.1628e+01, 1.2767e-01, 2.3402e-01],
         ...,
         [1.4318e+01, 8.1360e-02, 5.1071e-01],
         [5.8063e+00, 1.1475e-01, 3.2292e-01],
         [3.5910e+00, 1.7119e-01, 4.5686e-01]],

        [[1.9174e+00, 2.3986e-01, 2.7443e-01],
         [1.4729e+00, 4.7317e-01, 3.0836e-01],
         [1.9644e-01, 2.0532e-01, 7.1770e-01],
         ...,
         [1.3566e+01, 1.1512e-01, 7.7945e-01],
         [1.8205e+01, 1.4512e-02, 1.2026e+00],
         [1.5339e+01, 5.9986e-02, 5.7794e-01]],

        [[2.0227e+00, 2.1076e-01, 2.6893e-01],
         [2.6833e+00, 1.3186e-01, 1.8962e+00],
         [1.2624e+00, 2.7005e-01


Train Diffusion:   8%|▊         | 411/5001 [1:00:59<11:48:51,  9.27s/it][A
Train Diffusion:   8%|▊         | 412/5001 [1:01:07<11:03:01,  8.67s/it][A
Train Diffusion:   8%|▊         | 413/5001 [1:01:14<10:28:37,  8.22s/it][A
Train Diffusion:   8%|▊         | 414/5001 [1:01:21<10:04:06,  7.90s/it][A
Train Diffusion:   8%|▊         | 415/5001 [1:01:28<9:49:24,  7.71s/it] [A
Train Diffusion:   8%|▊         | 416/5001 [1:01:36<9:50:26,  7.73s/it][A
Train Diffusion:   8%|▊         | 417/5001 [1:01:44<9:54:16,  7.78s/it][A
Train Diffusion:   8%|▊         | 418/5001 [1:01:54<10:40:11,  8.38s/it][A
Train Diffusion:   8%|▊         | 419/5001 [1:02:02<10:32:42,  8.29s/it][A
Train Diffusion:   8%|▊         | 420/5001 [1:02:10<10:22:09,  8.15s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 2344917.2125. Best ELBO loss value is: 2060022.375.

C_PATH mean = tensor([[10.4968,  0.1784,  0.4856],
        [10.5457,  0.1777,  0.4798],
        [10.5585,  0.1820,  0.4807]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.9617,  0.2145,  0.2119],
         [ 0.6975,  0.3124,  0.2406],
         [ 0.2972,  0.3172,  0.3033],
         ...,
         [ 2.6983,  0.3175,  0.4252],
         [ 0.6896,  0.1529,  1.2680],
         [ 4.1410,  0.1644,  0.7745]],

        [[ 2.1999,  0.2118,  0.2613],
         [ 4.3744,  0.1342,  2.1299],
         [ 1.1634,  0.3050,  1.0388],
         ...,
         [14.2595,  0.1796,  0.8780],
         [16.2296,  0.2104,  0.8253],
         [ 8.0646,  0.2409,  0.7534]],

        [[ 1.4206,  0.1355,  0.2913],
         [ 0.9494,  0.3808,  0.1806],
         [ 4.3236,  0.1680,  0.1290],
         ...,
         [15.4778,  0.0961,  0.5396],
         [22.0428,  0.1998,  0.3997],
         [20.0142,  0.114


Train Diffusion:   8%|▊         | 421/5001 [1:02:18<10:29:58,  8.25s/it][A
Train Diffusion:   8%|▊         | 422/5001 [1:02:26<10:16:02,  8.07s/it][A
Train Diffusion:   8%|▊         | 423/5001 [1:02:35<10:36:39,  8.34s/it][A
Train Diffusion:   8%|▊         | 424/5001 [1:02:45<11:14:36,  8.84s/it][A
Train Diffusion:   8%|▊         | 425/5001 [1:02:54<11:35:28,  9.12s/it][A
Train Diffusion:   9%|▊         | 426/5001 [1:03:04<11:56:05,  9.39s/it][A
Train Diffusion:   9%|▊         | 427/5001 [1:03:13<11:45:12,  9.25s/it][A
Train Diffusion:   9%|▊         | 428/5001 [1:03:22<11:41:38,  9.21s/it][A
Train Diffusion:   9%|▊         | 429/5001 [1:03:31<11:36:24,  9.14s/it][A
Train Diffusion:   9%|▊         | 430/5001 [1:03:46<13:39:41, 10.76s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 2074062.1. Best ELBO loss value is: 1822407.25.

C_PATH mean = tensor([[9.2638, 0.1603, 0.5428],
        [9.2218, 0.1596, 0.5538],
        [9.3215, 0.1631, 0.5516]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 2.3947,  0.2528,  0.4173],
         [ 7.0410,  0.0913,  3.7200],
         [ 4.0714,  0.3256,  1.6134],
         ...,
         [ 7.2837,  0.2481,  0.5532],
         [ 4.2689,  0.0333,  0.5416],
         [11.3915,  0.0970,  0.8793]],

        [[ 1.2803,  0.2205,  0.4549],
         [ 0.9514,  0.2550,  0.3564],
         [ 6.6956,  0.1469,  0.3037],
         ...,
         [10.9194,  0.1076,  0.9745],
         [12.8132,  0.3213,  0.6499],
         [ 6.2323,  0.1097,  0.3087]],

        [[ 1.6908,  0.4623,  0.2123],
         [ 0.6123,  0.3833,  0.2176],
         [ 1.5316,  0.3388,  0.4051],
         ...,
         [10.0433,  0.0210,  0.2803],
         [14.9185,  0.0937,  0.5613],
         [ 8.5634,  0.2250,  0.3619]]]


Train Diffusion:   9%|▊         | 431/5001 [1:03:56<13:26:22, 10.59s/it][A
Train Diffusion:   9%|▊         | 432/5001 [1:04:05<12:46:00, 10.06s/it][A
Train Diffusion:   9%|▊         | 433/5001 [1:04:15<12:37:54,  9.96s/it][A
Train Diffusion:   9%|▊         | 434/5001 [1:04:24<12:23:43,  9.77s/it][A
Train Diffusion:   9%|▊         | 435/5001 [1:04:32<11:34:31,  9.13s/it][A
Train Diffusion:   9%|▊         | 436/5001 [1:04:41<11:30:20,  9.07s/it][A
Train Diffusion:   9%|▊         | 437/5001 [1:04:52<12:32:41,  9.90s/it][A
Train Diffusion:   9%|▉         | 438/5001 [1:05:04<12:59:37, 10.25s/it][A
Train Diffusion:   9%|▉         | 439/5001 [1:05:15<13:36:22, 10.74s/it][A
Train Diffusion:   9%|▉         | 440/5001 [1:05:26<13:36:25, 10.74s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 1998088.7125. Best ELBO loss value is: 1726660.625.

C_PATH mean = tensor([[9.3499, 0.1857, 0.3678],
        [9.2915, 0.1908, 0.3684],
        [9.2770, 0.1859, 0.3684]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.6591,  0.3275,  0.4603],
         [ 0.2450,  0.5038,  0.8936],
         [ 0.1472,  0.4740,  0.9531],
         ...,
         [ 7.6502,  0.0227,  1.0414],
         [ 3.9716,  0.1487,  0.1287],
         [14.5619,  0.0811,  0.2523]],

        [[ 2.8878,  0.2765,  0.2317],
         [ 8.2216,  0.1799,  0.9091],
         [ 0.2553,  0.2751,  1.4609],
         ...,
         [11.3978,  0.2027,  0.4504],
         [ 0.7924,  0.4216,  0.2224],
         [ 9.9846,  0.1896,  0.6000]],

        [[ 1.7395,  0.6236,  0.4320],
         [ 0.4428,  0.4614,  0.2321],
         [ 5.5148,  0.1231,  0.1781],
         ...,
         [ 3.9706,  0.2465,  0.1730],
         [ 9.3302,  0.0949,  0.6684],
         [12.8809,  0.1016,  0.194


Train Diffusion:   9%|▉         | 441/5001 [1:05:35<13:00:16, 10.27s/it][A
Train Diffusion:   9%|▉         | 442/5001 [1:05:44<12:14:13,  9.66s/it][A
Train Diffusion:   9%|▉         | 443/5001 [1:05:52<11:45:12,  9.28s/it][A
Train Diffusion:   9%|▉         | 444/5001 [1:06:01<11:43:05,  9.26s/it][A
Train Diffusion:   9%|▉         | 445/5001 [1:06:10<11:34:42,  9.15s/it][A
Train Diffusion:   9%|▉         | 446/5001 [1:06:19<11:37:17,  9.19s/it][A
Train Diffusion:   9%|▉         | 447/5001 [1:06:28<11:25:28,  9.03s/it][A
Train Diffusion:   9%|▉         | 448/5001 [1:06:36<11:03:23,  8.74s/it][A
Train Diffusion:   9%|▉         | 449/5001 [1:06:44<10:44:03,  8.49s/it][A
Train Diffusion:   9%|▉         | 450/5001 [1:06:52<10:42:07,  8.47s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 1808491.2. Best ELBO loss value is: 1584176.0.

C_PATH mean = tensor([[8.6165, 0.1816, 0.3618],
        [8.5901, 0.1841, 0.3663],
        [8.5509, 0.1836, 0.3697]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.4756,  0.4648,  0.4886],
         [ 0.4609,  1.1130,  0.1246],
         [ 2.9444,  0.3777,  0.1739],
         ...,
         [12.7574,  0.2561,  0.4640],
         [ 0.4458,  0.3203,  0.4997],
         [ 0.3752,  0.2270,  0.3002]],

        [[ 2.4444,  0.4113,  0.3413],
         [ 7.8240,  0.1992,  0.8147],
         [ 0.1865,  0.5553,  0.7013],
         ...,
         [ 6.3267,  0.0327,  0.3798],
         [ 4.3132,  0.2101,  0.1455],
         [15.8738,  0.1543,  0.3257]],

        [[ 1.7740,  0.6267,  0.2415],
         [ 0.2600,  0.4774,  0.7108],
         [ 0.0820,  0.4245,  0.8675],
         ...,
         [ 2.7494,  0.2913,  0.1739],
         [ 7.7244,  0.0642,  0.4085],
         [ 9.9451,  0.1199,  0.3768]]],


Train Diffusion:   9%|▉         | 451/5001 [1:07:06<12:32:17,  9.92s/it][A
Train Diffusion:   9%|▉         | 452/5001 [1:07:15<12:18:23,  9.74s/it][A
Train Diffusion:   9%|▉         | 453/5001 [1:07:23<11:47:22,  9.33s/it][A
Train Diffusion:   9%|▉         | 454/5001 [1:07:32<11:19:23,  8.96s/it][A
Train Diffusion:   9%|▉         | 455/5001 [1:07:43<12:09:16,  9.63s/it][A
Train Diffusion:   9%|▉         | 456/5001 [1:07:56<13:32:42, 10.73s/it][A
Train Diffusion:   9%|▉         | 457/5001 [1:08:09<14:27:15, 11.45s/it][A
Train Diffusion:   9%|▉         | 458/5001 [1:08:18<13:39:42, 10.83s/it][A
Train Diffusion:   9%|▉         | 459/5001 [1:08:28<13:09:11, 10.43s/it][A
Train Diffusion:   9%|▉         | 460/5001 [1:08:36<12:04:51,  9.58s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 1464426.125. Best ELBO loss value is: 1053276.75.

C_PATH mean = tensor([[7.3390, 0.1838, 0.4123],
        [7.2760, 0.1850, 0.4189],
        [7.3835, 0.1839, 0.4116]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.1396,  0.2028,  0.3592],
         [ 0.4041,  0.5196,  0.2063],
         [ 0.4812,  0.5429,  0.3329],
         ...,
         [ 0.4413,  0.2107,  0.5288],
         [ 0.0910,  0.0541,  0.3440],
         [ 2.1995,  0.1463,  0.4145]],

        [[ 2.1316,  0.2757,  0.3571],
         [ 4.2226,  0.1462,  1.6235],
         [ 0.0851,  0.3364,  0.9669],
         ...,
         [ 0.3187,  0.2002,  0.6657],
         [ 8.9590,  0.1737,  0.3169],
         [11.6706,  0.1361,  0.8533]],

        [[ 1.7931,  0.3432,  0.3307],
         [ 1.0589,  0.6506,  0.3214],
         [ 0.7968,  0.3776,  0.3790],
         ...,
         [11.7021,  0.1286,  0.3148],
         [ 8.7873,  0.2014,  0.4269],
         [ 0.2598,  0.2656,  0.1637]


Train Diffusion:   9%|▉         | 461/5001 [1:08:44<11:42:01,  9.28s/it][A
Train Diffusion:   9%|▉         | 462/5001 [1:08:53<11:26:22,  9.07s/it][A
Train Diffusion:   9%|▉         | 463/5001 [1:09:01<10:57:10,  8.69s/it][A
Train Diffusion:   9%|▉         | 464/5001 [1:09:09<10:44:42,  8.53s/it][A
Train Diffusion:   9%|▉         | 465/5001 [1:09:16<10:28:30,  8.31s/it][A
Train Diffusion:   9%|▉         | 466/5001 [1:09:24<10:18:07,  8.18s/it][A
Train Diffusion:   9%|▉         | 467/5001 [1:09:32<10:07:33,  8.04s/it][A
Train Diffusion:   9%|▉         | 468/5001 [1:09:40<10:02:23,  7.97s/it][A
Train Diffusion:   9%|▉         | 469/5001 [1:09:48<10:03:14,  7.99s/it][A
Train Diffusion:   9%|▉         | 470/5001 [1:09:56<9:57:30,  7.91s/it] [A


Moving average ELBO loss at <built-in function iter> iterations is: 1334031.35. Best ELBO loss value is: 1053276.75.

C_PATH mean = tensor([[6.1489, 0.2235, 0.5748],
        [6.0747, 0.2220, 0.5760],
        [6.3349, 0.2250, 0.5763]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.9744,  0.2623,  0.4407],
         [ 4.2249,  0.2279,  0.8895],
         [ 0.2506,  0.4918,  0.9488],
         ...,
         [ 4.0455,  0.1992,  0.8565],
         [ 1.9430,  0.1592,  0.4868],
         [10.3169,  0.1733,  0.5796]],

        [[ 1.1862,  0.3467,  0.5120],
         [ 0.7827,  0.1920,  0.4719],
         [11.1247,  0.1247,  0.5885],
         ...,
         [ 0.3663,  0.2395,  0.8051],
         [ 4.5985,  0.1371,  0.4966],
         [ 4.9597,  0.1679,  0.2609]],

        [[ 1.8247,  0.2836,  0.3640],
         [ 0.7229,  0.2074,  0.8399],
         [ 0.0895,  0.6044,  1.1398],
         ...,
         [11.2044,  0.2606,  0.5150],
         [ 7.2484,  0.0897,  0.9232],
         [10.4349,  0.0980,  0.8845]]


Train Diffusion:   9%|▉         | 471/5001 [1:10:04<10:06:11,  8.03s/it][A
Train Diffusion:   9%|▉         | 472/5001 [1:10:12<10:01:00,  7.96s/it][A
Train Diffusion:   9%|▉         | 473/5001 [1:10:20<10:01:03,  7.96s/it][A
Train Diffusion:   9%|▉         | 474/5001 [1:10:29<10:25:53,  8.30s/it][A
Train Diffusion:   9%|▉         | 475/5001 [1:10:37<10:28:22,  8.33s/it][A
Train Diffusion:  10%|▉         | 476/5001 [1:10:45<10:11:51,  8.11s/it][A
Train Diffusion:  10%|▉         | 477/5001 [1:10:52<10:01:49,  7.98s/it][A
Train Diffusion:  10%|▉         | 478/5001 [1:11:00<9:58:18,  7.94s/it] [A
Train Diffusion:  10%|▉         | 479/5001 [1:11:08<10:02:28,  7.99s/it][A
Train Diffusion:  10%|▉         | 480/5001 [1:11:16<9:52:06,  7.86s/it] [A


Moving average ELBO loss at <built-in function iter> iterations is: 888369.75625. Best ELBO loss value is: 668511.5.

C_PATH mean = tensor([[5.2803, 0.2352, 0.4367],
        [5.3334, 0.2357, 0.4393],
        [5.3461, 0.2339, 0.4354]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 1.3997,  0.5215,  0.2625],
         [ 0.9437,  0.3812,  0.2480],
         [ 3.3708,  0.2828,  0.3934],
         ...,
         [ 6.7652,  0.2435,  0.3954],
         [10.1248,  0.1995,  0.3026],
         [ 3.4236,  0.2724,  0.4333]],

        [[ 1.2069,  0.2401,  0.4067],
         [ 0.5341,  0.5816,  0.2838],
         [ 1.1830,  0.4300,  0.2805],
         ...,
         [ 5.1178,  0.1867,  0.4676],
         [ 0.2637,  0.0716,  0.4120],
         [ 5.1365,  0.1733,  0.4249]],

        [[ 1.9300,  0.3652,  0.4510],
         [ 4.5143,  0.1869,  1.1607],
         [ 2.2633,  0.2421,  0.4821],
         ...,
         [ 5.0676,  0.1449,  0.2988],
         [ 6.6036,  0.2220,  0.3876],
         [ 5.7593,  0.2100,  0.4379]]


Train Diffusion:  10%|▉         | 481/5001 [1:11:24<9:46:10,  7.78s/it][A
Train Diffusion:  10%|▉         | 482/5001 [1:11:31<9:45:24,  7.77s/it][A
Train Diffusion:  10%|▉         | 483/5001 [1:11:39<9:43:04,  7.74s/it][A
Train Diffusion:  10%|▉         | 484/5001 [1:11:47<9:44:57,  7.77s/it][A
Train Diffusion:  10%|▉         | 485/5001 [1:11:55<9:43:06,  7.75s/it][A
Train Diffusion:  10%|▉         | 486/5001 [1:12:03<9:52:35,  7.87s/it][A
Train Diffusion:  10%|▉         | 487/5001 [1:12:11<10:04:49,  8.04s/it][A
Train Diffusion:  10%|▉         | 488/5001 [1:12:19<9:52:24,  7.88s/it] [A
Train Diffusion:  10%|▉         | 489/5001 [1:12:26<9:46:56,  7.81s/it][A
Train Diffusion:  10%|▉         | 490/5001 [1:12:34<9:52:02,  7.87s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 892910.56875. Best ELBO loss value is: 653497.0.

C_PATH mean = tensor([[5.1288, 0.1489, 0.3866],
        [5.1473, 0.1486, 0.3888],
        [5.0694, 0.1494, 0.3866]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.2448, 0.4369, 0.3045],
         [0.2855, 0.5094, 0.1821],
         [2.0233, 0.2042, 0.2247],
         ...,
         [6.6031, 0.1825, 0.3652],
         [5.5567, 0.1153, 0.3049],
         [7.6871, 0.1341, 0.3522]],

        [[1.3246, 0.2751, 0.2736],
         [0.2294, 0.2037, 0.4630],
         [0.0978, 0.3667, 0.2831],
         ...,
         [7.1545, 0.0770, 0.3828],
         [7.4446, 0.0541, 0.3953],
         [7.0810, 0.0906, 0.2269]],

        [[1.8142, 0.3134, 0.4390],
         [5.9014, 0.2049, 0.4789],
         [0.3092, 0.3388, 0.5575],
         ...,
         [3.7557, 0.1339, 0.4037],
         [5.6334, 0.0893, 0.2833],
         [6.2726, 0.1139, 0.4043]]], grad_fn=<AddBackward0>)



Train Diffusion:  10%|▉         | 491/5001 [1:12:42<9:47:02,  7.81s/it][A
Train Diffusion:  10%|▉         | 492/5001 [1:12:50<9:43:32,  7.76s/it][A
Train Diffusion:  10%|▉         | 493/5001 [1:12:57<9:38:03,  7.69s/it][A
Train Diffusion:  10%|▉         | 494/5001 [1:13:05<9:37:06,  7.68s/it][A
Train Diffusion:  10%|▉         | 495/5001 [1:13:12<9:33:25,  7.64s/it][A
Train Diffusion:  10%|▉         | 496/5001 [1:13:20<9:29:40,  7.59s/it][A
Train Diffusion:  10%|▉         | 497/5001 [1:13:27<9:29:32,  7.59s/it][A
Train Diffusion:  10%|▉         | 498/5001 [1:13:35<9:31:27,  7.61s/it][A
Train Diffusion:  10%|▉         | 499/5001 [1:13:46<10:44:34,  8.59s/it][A
Train Diffusion:  10%|▉         | 500/5001 [1:13:57<11:32:47,  9.24s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 862166.08125. Best ELBO loss value is: 653497.0.

C_PATH mean = tensor([[4.8477, 0.0972, 0.4241],
        [4.8760, 0.0972, 0.4198],
        [4.8258, 0.0985, 0.4243]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.2712, 0.3382, 0.3176],
         [0.1902, 0.5347, 0.2484],
         [0.2939, 0.5386, 0.2231],
         ...,
         [0.9085, 0.1598, 0.3444],
         [3.3134, 0.0936, 0.3652],
         [5.5002, 0.0769, 0.4691]],

        [[1.8291, 0.2609, 0.3878],
         [4.9693, 0.0613, 1.0745],
         [0.1603, 0.2687, 0.7462],
         ...,
         [8.1417, 0.1230, 0.2586],
         [0.6432, 0.0148, 0.5194],
         [6.9677, 0.0564, 0.5312]],

        [[1.0931, 0.2480, 0.3250],
         [0.6396, 0.2267, 0.4696],
         [7.4125, 0.0764, 0.3490],
         ...,
         [3.7864, 0.0460, 0.3403],
         [1.1178, 0.1937, 0.3941],
         [7.1460, 0.0302, 0.3551]]], grad_fn=<AddBackward0>)



Train Diffusion:  10%|█         | 501/5001 [1:14:06<11:29:19,  9.19s/it][A
Train Diffusion:  10%|█         | 502/5001 [1:14:16<11:51:31,  9.49s/it][A
Train Diffusion:  10%|█         | 503/5001 [1:14:25<11:48:10,  9.45s/it][A
Train Diffusion:  10%|█         | 504/5001 [1:14:43<15:01:23, 12.03s/it][A
Train Diffusion:  10%|█         | 505/5001 [1:14:53<14:04:55, 11.28s/it][A
Train Diffusion:  10%|█         | 506/5001 [1:15:01<12:48:15, 10.25s/it][A
Train Diffusion:  10%|█         | 507/5001 [1:15:10<12:34:09, 10.07s/it][A
Train Diffusion:  10%|█         | 508/5001 [1:15:19<12:04:13,  9.67s/it][A
Train Diffusion:  10%|█         | 509/5001 [1:15:29<12:00:47,  9.63s/it][A
Train Diffusion:  10%|█         | 510/5001 [1:15:41<12:55:35, 10.36s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 565929.175. Best ELBO loss value is: 468151.1875.

C_PATH mean = tensor([[4.7665, 0.0783, 0.3264],
        [4.6145, 0.0800, 0.3267],
        [4.7162, 0.0788, 0.3228]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.2559, 0.2991, 0.2125],
         [0.3740, 0.2474, 0.2704],
         [1.5984, 0.1178, 0.3381],
         ...,
         [4.5301, 0.0374, 0.2175],
         [7.1576, 0.0248, 0.2081],
         [9.1853, 0.0841, 0.7180]],

        [[2.0352, 0.1736, 0.2583],
         [3.4382, 0.0675, 1.9823],
         [1.4500, 0.1280, 0.6716],
         ...,
         [4.2624, 0.1047, 0.3413],
         [0.1875, 0.0505, 0.2141],
         [1.3303, 0.0481, 0.2822]],

        [[1.0958, 0.1550, 0.2252],
         [0.4562, 0.1324, 0.2929],
         [4.8400, 0.0747, 0.3126],
         ...,
         [6.0522, 0.0343, 0.2802],
         [8.3591, 0.0946, 0.4828],
         [0.5184, 0.1595, 0.1131]]], grad_fn=<AddBackward0>)



Train Diffusion:  10%|█         | 511/5001 [1:15:54<14:05:00, 11.29s/it][A
Train Diffusion:  10%|█         | 512/5001 [1:16:16<18:07:52, 14.54s/it][A
Train Diffusion:  10%|█         | 513/5001 [1:16:28<17:05:39, 13.71s/it][A
Train Diffusion:  10%|█         | 514/5001 [1:16:38<15:30:55, 12.45s/it][A
Train Diffusion:  10%|█         | 515/5001 [1:16:46<14:03:53, 11.29s/it][A
Train Diffusion:  10%|█         | 516/5001 [1:16:54<12:46:09, 10.25s/it][A
Train Diffusion:  10%|█         | 517/5001 [1:17:02<11:57:19,  9.60s/it][A
Train Diffusion:  10%|█         | 518/5001 [1:17:10<11:14:02,  9.02s/it][A
Train Diffusion:  10%|█         | 519/5001 [1:17:19<11:24:00,  9.16s/it][A
Train Diffusion:  10%|█         | 520/5001 [1:17:28<11:07:05,  8.93s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 389591.128125. Best ELBO loss value is: 341938.125.

C_PATH mean = tensor([[4.0664, 0.1164, 0.3112],
        [4.0099, 0.1167, 0.3102],
        [3.9709, 0.1163, 0.3089]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.2840, 0.4581, 0.1327],
         [0.4745, 0.8516, 0.1513],
         [1.0722, 0.1615, 0.2049],
         ...,
         [2.8390, 0.1486, 0.3195],
         [3.7253, 0.0831, 0.2963],
         [2.8373, 0.0849, 0.3094]],

        [[1.8772, 0.1755, 0.2426],
         [3.5843, 0.1291, 1.4712],
         [0.0450, 0.1926, 0.5793],
         ...,
         [6.4129, 0.0679, 0.2626],
         [7.0491, 0.0986, 0.1752],
         [8.0886, 0.1304, 0.2606]],

        [[1.0277, 0.1456, 0.1889],
         [0.1846, 0.4494, 0.0804],
         [0.8269, 0.3051, 0.3197],
         ...,
         [0.9004, 0.1849, 0.2657],
         [4.0126, 0.1077, 0.3129],
         [2.1278, 0.0895, 0.2429]]], grad_fn=<AddBackward0>)



Train Diffusion:  10%|█         | 521/5001 [1:17:37<11:24:23,  9.17s/it][A
Train Diffusion:  10%|█         | 522/5001 [1:17:47<11:26:28,  9.20s/it][A
Train Diffusion:  10%|█         | 523/5001 [1:17:56<11:28:13,  9.22s/it][A
Train Diffusion:  10%|█         | 524/5001 [1:18:05<11:20:46,  9.12s/it][A
Train Diffusion:  10%|█         | 525/5001 [1:18:13<11:03:44,  8.90s/it][A
Train Diffusion:  11%|█         | 526/5001 [1:18:24<11:38:43,  9.37s/it][A
Train Diffusion:  11%|█         | 527/5001 [1:18:35<12:12:16,  9.82s/it][A
Train Diffusion:  11%|█         | 528/5001 [1:18:43<11:51:07,  9.54s/it][A
Train Diffusion:  11%|█         | 529/5001 [1:18:51<11:14:19,  9.05s/it][A
Train Diffusion:  11%|█         | 530/5001 [1:18:59<10:46:28,  8.68s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 281837.2609375. Best ELBO loss value is: 248189.96875.

C_PATH mean = tensor([[3.2620, 0.1439, 0.2472],
        [3.1848, 0.1452, 0.2449],
        [3.2482, 0.1440, 0.2438]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.3823, 0.2374, 0.3022],
         [0.3023, 0.1328, 0.5546],
         [0.0765, 0.3617, 0.5130],
         ...,
         [4.4348, 0.0861, 0.1958],
         [7.3883, 0.1092, 0.1280],
         [5.4541, 0.1680, 0.1913]],

        [[1.5147, 0.2263, 0.3176],
         [2.0981, 0.1624, 0.5151],
         [0.1457, 0.2092, 0.4471],
         ...,
         [0.5389, 0.2078, 0.2387],
         [2.6860, 0.1393, 0.2706],
         [0.2916, 0.1769, 0.1974]],

        [[0.8515, 0.2245, 0.3001],
         [0.4126, 0.1731, 0.2282],
         [4.0252, 0.1238, 0.1209],
         ...,
         [3.0986, 0.1669, 0.2349],
         [3.2775, 0.0736, 0.2356],
         [5.1185, 0.0894, 0.1864]]], grad_fn=<AddBackward0>)



Train Diffusion:  11%|█         | 531/5001 [1:19:07<10:34:13,  8.51s/it][A
Train Diffusion:  11%|█         | 532/5001 [1:19:15<10:19:19,  8.32s/it][A
Train Diffusion:  11%|█         | 533/5001 [1:19:23<10:14:57,  8.26s/it][A
Train Diffusion:  11%|█         | 534/5001 [1:19:31<10:06:17,  8.14s/it][A
Train Diffusion:  11%|█         | 535/5001 [1:19:39<9:59:19,  8.05s/it] [A
Train Diffusion:  11%|█         | 536/5001 [1:19:47<10:03:23,  8.11s/it][A
Train Diffusion:  11%|█         | 537/5001 [1:19:55<9:57:23,  8.03s/it] [A
Train Diffusion:  11%|█         | 538/5001 [1:20:03<9:56:16,  8.02s/it][A
Train Diffusion:  11%|█         | 539/5001 [1:20:11<9:49:22,  7.93s/it][A
Train Diffusion:  11%|█         | 540/5001 [1:20:19<9:55:51,  8.01s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 224607.421875. Best ELBO loss value is: 197764.765625.

C_PATH mean = tensor([[2.2025, 0.1604, 0.2933],
        [2.2468, 0.1609, 0.2948],
        [2.1826, 0.1607, 0.2928]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[0.7726, 0.1166, 0.2490],
         [0.4499, 0.1429, 0.1639],
         [4.5016, 0.1303, 0.2400],
         ...,
         [3.5847, 0.1467, 0.3173],
         [3.6001, 0.1626, 0.2038],
         [3.7493, 0.1415, 0.2590]],

        [[1.2495, 0.1977, 0.2840],
         [0.3795, 0.1760, 0.4156],
         [0.1282, 0.3376, 0.3350],
         ...,
         [0.9237, 0.2559, 0.2489],
         [3.0606, 0.1274, 0.3066],
         [3.1520, 0.1357, 0.3192]],

        [[1.3958, 0.1979, 0.3426],
         [1.2290, 0.1524, 0.4955],
         [0.3905, 0.2779, 0.3702],
         ...,
         [1.2038, 0.1647, 0.2569],
         [0.3987, 0.1171, 0.2111],
         [1.5251, 0.1176, 0.2268]]], grad_fn=<AddBackward0>)



Train Diffusion:  11%|█         | 541/5001 [1:20:28<10:11:13,  8.22s/it][A
Train Diffusion:  11%|█         | 542/5001 [1:20:37<10:39:22,  8.60s/it][A
Train Diffusion:  11%|█         | 543/5001 [1:20:45<10:30:05,  8.48s/it][A
Train Diffusion:  11%|█         | 544/5001 [1:20:53<10:06:12,  8.16s/it][A
Train Diffusion:  11%|█         | 545/5001 [1:21:01<10:06:12,  8.16s/it][A
Train Diffusion:  11%|█         | 546/5001 [1:21:09<10:08:23,  8.19s/it][A
Train Diffusion:  11%|█         | 547/5001 [1:21:17<9:51:27,  7.97s/it] [A
Train Diffusion:  11%|█         | 548/5001 [1:21:25<9:53:53,  8.00s/it][A
Train Diffusion:  11%|█         | 549/5001 [1:21:33<9:56:40,  8.04s/it][A
Train Diffusion:  11%|█         | 550/5001 [1:21:42<10:15:47,  8.30s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 205942.25. Best ELBO loss value is: 176917.375.

C_PATH mean = tensor([[1.8055, 0.1638, 0.2152],
        [1.7710, 0.1643, 0.2161],
        [1.7360, 0.1636, 0.2189]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.5646, 0.1974, 0.2664],
         [1.2407, 0.1199, 0.4436],
         [0.3801, 0.1606, 0.2003],
         ...,
         [2.1347, 0.1053, 0.2556],
         [2.6109, 0.2040, 0.1045],
         [3.0529, 0.1200, 0.2027]],

        [[0.8521, 0.1537, 0.3239],
         [0.3438, 0.1835, 0.1775],
         [3.2221, 0.1482, 0.2208],
         ...,
         [1.4562, 0.2264, 0.1443],
         [1.9225, 0.0971, 0.2482],
         [2.4502, 0.1455, 0.1852]],

        [[1.0802, 0.1829, 0.2448],
         [0.1793, 0.2771, 0.2087],
         [0.1890, 0.3197, 0.2238],
         ...,
         [1.6590, 0.1402, 0.2665],
         [1.1813, 0.1237, 0.1796],
         [1.6246, 0.1050, 0.3004]]], grad_fn=<AddBackward0>)



Train Diffusion:  11%|█         | 551/5001 [1:21:50<10:08:50,  8.21s/it][A
Train Diffusion:  11%|█         | 552/5001 [1:21:57<9:46:27,  7.91s/it] [A
Train Diffusion:  11%|█         | 553/5001 [1:22:04<9:29:03,  7.68s/it][A
Train Diffusion:  11%|█         | 554/5001 [1:22:12<9:26:21,  7.64s/it][A
Train Diffusion:  11%|█         | 555/5001 [1:22:19<9:22:40,  7.59s/it][A
Train Diffusion:  11%|█         | 556/5001 [1:22:27<9:39:22,  7.82s/it][A
Train Diffusion:  11%|█         | 557/5001 [1:22:42<12:06:09,  9.80s/it][A
Train Diffusion:  11%|█         | 558/5001 [1:22:52<12:08:10,  9.83s/it][A
Train Diffusion:  11%|█         | 559/5001 [1:23:02<12:19:01,  9.98s/it][A
Train Diffusion:  11%|█         | 560/5001 [1:23:12<12:16:14,  9.95s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 169623.8421875. Best ELBO loss value is: 130737.9375.

C_PATH mean = tensor([[1.6276, 0.1233, 0.2358],
        [1.6251, 0.1221, 0.2343],
        [1.6517, 0.1225, 0.2364]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[0.8125, 0.1622, 0.2141],
         [0.3245, 0.1617, 0.2137],
         [2.5930, 0.0998, 0.2190],
         ...,
         [1.6856, 0.1323, 0.2110],
         [1.7297, 0.1048, 0.1579],
         [0.2250, 0.1271, 0.1409]],

        [[1.6048, 0.1258, 0.2117],
         [1.6584, 0.1179, 0.5861],
         [0.0691, 0.1590, 0.2591],
         ...,
         [0.7777, 0.1167, 0.1692],
         [1.0023, 0.0897, 0.2285],
         [1.9848, 0.0704, 0.2478]],

        [[0.9418, 0.1772, 0.2051],
         [0.1327, 0.2459, 0.1933],
         [0.2105, 0.3058, 0.2025],
         ...,
         [1.6006, 0.0934, 0.2380],
         [3.0276, 0.1007, 0.1238],
         [2.8825, 0.1288, 0.1960]]], grad_fn=<AddBackward0>)



Train Diffusion:  11%|█         | 561/5001 [1:23:19<11:18:21,  9.17s/it][A
Train Diffusion:  11%|█         | 562/5001 [1:23:27<10:50:02,  8.79s/it][A
Train Diffusion:  11%|█▏        | 563/5001 [1:23:35<10:17:10,  8.34s/it][A
Train Diffusion:  11%|█▏        | 564/5001 [1:23:43<10:14:58,  8.32s/it][A
Train Diffusion:  11%|█▏        | 565/5001 [1:23:50<9:54:01,  8.03s/it] [A
Train Diffusion:  11%|█▏        | 566/5001 [1:23:58<9:46:51,  7.94s/it][A
Train Diffusion:  11%|█▏        | 567/5001 [1:24:05<9:37:24,  7.81s/it][A
Train Diffusion:  11%|█▏        | 568/5001 [1:24:13<9:34:30,  7.78s/it][A
Train Diffusion:  11%|█▏        | 569/5001 [1:24:21<9:31:23,  7.74s/it][A
Train Diffusion:  11%|█▏        | 570/5001 [1:24:28<9:26:46,  7.67s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 106494.2328125. Best ELBO loss value is: 87741.7109375.

C_PATH mean = tensor([[1.0996, 0.1178, 0.1974],
        [1.0657, 0.1174, 0.1968],
        [1.0863, 0.1175, 0.1965]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[0.6841, 0.1203, 0.1897],
         [0.1862, 0.1315, 0.1758],
         [1.2560, 0.1196, 0.1482],
         ...,
         [1.7098, 0.1097, 0.1910],
         [1.6117, 0.0965, 0.1843],
         [1.4433, 0.1051, 0.1767]],

        [[1.1718, 0.1156, 0.2006],
         [0.2294, 0.1201, 0.4199],
         [0.0926, 0.2013, 0.3529],
         ...,
         [0.6465, 0.1007, 0.1705],
         [1.3053, 0.1003, 0.1815],
         [1.4131, 0.1147, 0.1516]],

        [[1.1585, 0.1238, 0.2009],
         [0.6445, 0.1165, 0.2484],
         [0.1225, 0.2001, 0.2659],
         ...,
         [1.0513, 0.1140, 0.2095],
         [1.2064, 0.1132, 0.1723],
         [1.0898, 0.0938, 0.1610]]], grad_fn=<AddBackward0>)



Train Diffusion:  11%|█▏        | 571/5001 [1:24:36<9:35:56,  7.80s/it][A
Train Diffusion:  11%|█▏        | 572/5001 [1:24:44<9:34:10,  7.78s/it][A
Train Diffusion:  11%|█▏        | 573/5001 [1:24:52<9:26:51,  7.68s/it][A
Train Diffusion:  11%|█▏        | 574/5001 [1:24:59<9:28:59,  7.71s/it][A
Train Diffusion:  11%|█▏        | 575/5001 [1:25:07<9:30:17,  7.73s/it][A
Train Diffusion:  12%|█▏        | 576/5001 [1:25:14<9:19:01,  7.58s/it][A
Train Diffusion:  12%|█▏        | 577/5001 [1:25:22<9:18:05,  7.57s/it][A
Train Diffusion:  12%|█▏        | 578/5001 [1:25:29<9:18:11,  7.57s/it][A
Train Diffusion:  12%|█▏        | 579/5001 [1:25:37<9:19:31,  7.59s/it][A
Train Diffusion:  12%|█▏        | 580/5001 [1:25:45<9:20:30,  7.61s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 154610.3703125. Best ELBO loss value is: 87741.7109375.

C_PATH mean = tensor([[0.8012, 0.0944, 0.1815],
        [0.7954, 0.0942, 0.1817],
        [0.8178, 0.0939, 0.1824]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.2401, 0.1060, 0.2210],
         [0.8395, 0.0859, 0.2983],
         [0.0770, 0.1894, 0.2518],
         ...,
         [1.1912, 0.0618, 0.1809],
         [0.9563, 0.0998, 0.1522],
         [1.0724, 0.0601, 0.1979]],

        [[0.9986, 0.1161, 0.1760],
         [0.1145, 0.1208, 0.2091],
         [0.0497, 0.1617, 0.3143],
         ...,
         [0.2896, 0.1304, 0.1985],
         [0.9151, 0.0673, 0.1459],
         [1.1089, 0.0996, 0.1381]],

        [[0.6649, 0.1134, 0.1575],
         [0.1552, 0.0786, 0.1150],
         [2.4941, 0.0871, 0.1480],
         ...,
         [1.1851, 0.1119, 0.1839],
         [0.7178, 0.0859, 0.2035],
         [0.4270, 0.0736, 0.1542]]], grad_fn=<AddBackward0>)



Train Diffusion:  12%|█▏        | 581/5001 [1:25:52<9:14:21,  7.53s/it][A
Train Diffusion:  12%|█▏        | 582/5001 [1:26:00<9:24:22,  7.66s/it][A
Train Diffusion:  12%|█▏        | 583/5001 [1:26:08<9:23:00,  7.65s/it][A
Train Diffusion:  12%|█▏        | 584/5001 [1:26:15<9:13:29,  7.52s/it][A
Train Diffusion:  12%|█▏        | 585/5001 [1:26:23<9:16:44,  7.56s/it][A
Train Diffusion:  12%|█▏        | 586/5001 [1:26:30<9:18:43,  7.59s/it][A
Train Diffusion:  12%|█▏        | 587/5001 [1:26:39<9:36:40,  7.84s/it][A
Train Diffusion:  12%|█▏        | 588/5001 [1:26:46<9:24:48,  7.68s/it][A
Train Diffusion:  12%|█▏        | 589/5001 [1:26:54<9:22:50,  7.65s/it][A
Train Diffusion:  12%|█▏        | 590/5001 [1:27:01<9:25:28,  7.69s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 142879.6875. Best ELBO loss value is: 87741.7109375.

C_PATH mean = tensor([[0.8478, 0.0538, 0.1491],
        [0.8439, 0.0540, 0.1485],
        [0.8631, 0.0525, 0.1484]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.0889, 0.0988, 0.1854],
         [0.3125, 0.0965, 0.1541],
         [0.1389, 0.1969, 0.5129],
         ...,
         [0.1231, 0.0326, 0.0959],
         [0.4478, 0.0338, 0.1157],
         [1.3342, 0.0395, 0.1619]],

        [[0.6942, 0.0810, 0.1589],
         [0.0802, 0.1842, 0.0479],
         [0.4536, 0.1015, 0.2941],
         ...,
         [2.1396, 0.0605, 0.1464],
         [0.0829, 0.0443, 0.1619],
         [0.5528, 0.0355, 0.0945]],

        [[1.2303, 0.0755, 0.1873],
         [0.5254, 0.0654, 0.3743],
         [0.0457, 0.1108, 0.2712],
         ...,
         [0.4222, 0.0772, 0.1802],
         [1.0841, 0.0611, 0.1204],
         [0.5601, 0.0658, 0.1461]]], grad_fn=<AddBackward0>)



Train Diffusion:  12%|█▏        | 591/5001 [1:27:09<9:19:46,  7.62s/it][A
Train Diffusion:  12%|█▏        | 592/5001 [1:27:17<9:27:53,  7.73s/it][A
Train Diffusion:  12%|█▏        | 593/5001 [1:27:24<9:20:37,  7.63s/it][A
Train Diffusion:  12%|█▏        | 594/5001 [1:27:31<9:09:13,  7.48s/it][A
Train Diffusion:  12%|█▏        | 595/5001 [1:27:39<9:08:03,  7.46s/it][A
Train Diffusion:  12%|█▏        | 596/5001 [1:27:46<9:12:39,  7.53s/it][A
Train Diffusion:  12%|█▏        | 597/5001 [1:27:54<9:14:50,  7.56s/it][A
Train Diffusion:  12%|█▏        | 598/5001 [1:28:02<9:13:24,  7.54s/it][A
Train Diffusion:  12%|█▏        | 599/5001 [1:28:10<9:25:43,  7.71s/it][A
Train Diffusion:  12%|█▏        | 600/5001 [1:28:17<9:28:27,  7.75s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 105367.059375. Best ELBO loss value is: 87741.7109375.

C_PATH mean = tensor([[0.8313, 0.0430, 0.1290],
        [0.8201, 0.0426, 0.1300],
        [0.8138, 0.0436, 0.1297]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[0.6899, 0.0589, 0.1312],
         [0.1649, 0.0536, 0.0982],
         [1.7944, 0.0337, 0.1403],
         ...,
         [0.3054, 0.0196, 0.0611],
         [0.4264, 0.0708, 0.0935],
         [1.6908, 0.0250, 0.1258]],

        [[1.2624, 0.0545, 0.1394],
         [0.5406, 0.0383, 0.2740],
         [0.1103, 0.0615, 0.1931],
         ...,
         [0.9404, 0.0758, 0.1402],
         [0.1089, 0.1009, 0.1482],
         [0.9121, 0.0350, 0.1724]],

        [[0.9929, 0.0850, 0.1005],
         [0.1226, 0.1059, 0.1113],
         [0.0671, 0.1806, 0.1698],
         ...,
         [1.1625, 0.0489, 0.1258],
         [0.1000, 0.0575, 0.1335],
         [0.1587, 0.0400, 0.1425]]], grad_fn=<AddBackward0>)



Train Diffusion:  12%|█▏        | 601/5001 [1:28:25<9:29:18,  7.76s/it][A
Train Diffusion:  12%|█▏        | 602/5001 [1:28:33<9:30:07,  7.78s/it][A
Train Diffusion:  12%|█▏        | 603/5001 [1:28:40<9:21:46,  7.66s/it][A
Train Diffusion:  12%|█▏        | 604/5001 [1:28:49<9:43:54,  7.97s/it][A
Train Diffusion:  12%|█▏        | 605/5001 [1:28:57<9:40:01,  7.92s/it][A
Train Diffusion:  12%|█▏        | 606/5001 [1:29:05<9:32:37,  7.82s/it][A
Train Diffusion:  12%|█▏        | 607/5001 [1:29:12<9:26:32,  7.74s/it][A
Train Diffusion:  12%|█▏        | 608/5001 [1:29:21<9:41:57,  7.95s/it][A
Train Diffusion:  12%|█▏        | 609/5001 [1:29:29<9:58:09,  8.17s/it][A
Train Diffusion:  12%|█▏        | 610/5001 [1:29:37<9:44:34,  7.99s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 84686.4484375. Best ELBO loss value is: 67461.390625.

C_PATH mean = tensor([[0.5398, 0.0411, 0.0989],
        [0.5292, 0.0420, 0.0992],
        [0.5425, 0.0409, 0.0989]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[0.6708, 0.0745, 0.0898],
         [0.0907, 0.1027, 0.0703],
         [0.5181, 0.0434, 0.0455],
         ...,
         [0.3295, 0.0407, 0.1197],
         [0.1595, 0.0348, 0.1242],
         [0.1955, 0.0284, 0.1069]],

        [[1.2608, 0.0425, 0.0684],
         [0.4767, 0.0501, 0.1612],
         [0.0333, 0.0383, 0.2019],
         ...,
         [0.3981, 0.0603, 0.0846],
         [0.7838, 0.0349, 0.0824],
         [0.6550, 0.0282, 0.0628]],

        [[0.7268, 0.0605, 0.1044],
         [0.0509, 0.0620, 0.1043],
         [0.0446, 0.0817, 0.1197],
         ...,
         [0.8853, 0.0425, 0.1101],
         [1.0738, 0.0443, 0.0577],
         [1.1598, 0.0482, 0.2190]]], grad_fn=<AddBackward0>)



Train Diffusion:  12%|█▏        | 611/5001 [1:29:46<10:21:49,  8.50s/it][A
Train Diffusion:  12%|█▏        | 612/5001 [1:29:55<10:19:17,  8.47s/it][A
Train Diffusion:  12%|█▏        | 613/5001 [1:30:03<10:01:55,  8.23s/it][A
Train Diffusion:  12%|█▏        | 614/5001 [1:30:10<9:52:43,  8.11s/it] [A
Train Diffusion:  12%|█▏        | 615/5001 [1:30:18<9:43:20,  7.98s/it][A
Train Diffusion:  12%|█▏        | 616/5001 [1:30:26<9:37:14,  7.90s/it][A
Train Diffusion:  12%|█▏        | 617/5001 [1:30:33<9:25:53,  7.74s/it][A
Train Diffusion:  12%|█▏        | 618/5001 [1:30:40<9:13:50,  7.58s/it][A
Train Diffusion:  12%|█▏        | 619/5001 [1:30:48<9:14:51,  7.60s/it][A
Train Diffusion:  12%|█▏        | 620/5001 [1:30:56<9:19:19,  7.66s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 61636.84296875. Best ELBO loss value is: 50696.7734375.

C_PATH mean = tensor([[0.2805, 0.0542, 0.0798],
        [0.2750, 0.0541, 0.0806],
        [0.2789, 0.0546, 0.0802]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[0.6945, 0.0991, 0.0410],
         [0.1940, 0.0647, 0.0810],
         [0.4689, 0.0623, 0.0993],
         ...,
         [0.0608, 0.0501, 0.0929],
         [0.6006, 0.0603, 0.0376],
         [0.3324, 0.0232, 0.0227]],

        [[0.5666, 0.0593, 0.1267],
         [0.0532, 0.0755, 0.0668],
         [0.0986, 0.0523, 0.0812],
         ...,
         [0.4451, 0.0393, 0.0615],
         [0.1608, 0.0403, 0.0792],
         [0.3976, 0.0451, 0.0958]],

        [[1.0942, 0.0568, 0.0802],
         [0.2149, 0.0535, 0.1344],
         [0.0457, 0.1012, 0.1302],
         ...,
         [0.2060, 0.0756, 0.0883],
         [0.2305, 0.0550, 0.1041],
         [0.1475, 0.0394, 0.0458]]], grad_fn=<AddBackward0>)



Train Diffusion:  12%|█▏        | 621/5001 [1:31:03<9:07:30,  7.50s/it][A
Train Diffusion:  12%|█▏        | 622/5001 [1:31:11<9:15:07,  7.61s/it][A
Train Diffusion:  12%|█▏        | 623/5001 [1:31:19<9:27:47,  7.78s/it][A
Train Diffusion:  12%|█▏        | 624/5001 [1:31:27<9:27:43,  7.78s/it][A
Train Diffusion:  12%|█▏        | 625/5001 [1:31:34<9:22:22,  7.71s/it][A
Train Diffusion:  13%|█▎        | 626/5001 [1:31:42<9:16:10,  7.63s/it][A
Train Diffusion:  13%|█▎        | 627/5001 [1:31:50<9:33:14,  7.86s/it][A
Train Diffusion:  13%|█▎        | 628/5001 [1:31:59<9:49:41,  8.09s/it][A
Train Diffusion:  13%|█▎        | 629/5001 [1:32:07<10:03:33,  8.28s/it][A
Train Diffusion:  13%|█▎        | 630/5001 [1:32:15<9:51:29,  8.12s/it] [A


Moving average ELBO loss at <built-in function iter> iterations is: 44806.6109375. Best ELBO loss value is: 40030.62109375.

C_PATH mean = tensor([[0.1836, 0.0569, 0.0683],
        [0.1828, 0.0565, 0.0682],
        [0.1829, 0.0564, 0.0680]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[0.8496, 0.0691, 0.0677],
         [0.1698, 0.0526, 0.1112],
         [0.0354, 0.0361, 0.0955],
         ...,
         [0.0765, 0.0441, 0.0346],
         [0.2298, 0.0444, 0.0564],
         [0.0777, 0.0307, 0.0030]],

        [[0.6969, 0.0727, 0.0941],
         [0.0537, 0.0622, 0.1281],
         [0.0363, 0.1002, 0.0839],
         ...,
         [0.1700, 0.0552, 0.0409],
         [0.6668, 0.0683, 0.0345],
         [0.0779, 0.0234, 0.0660]],

        [[0.5007, 0.0710, 0.0834],
         [0.0761, 0.0826, 0.0604],
         [0.3032, 0.0548, 0.0449],
         ...,
         [0.0682, 0.0493, 0.0764],
         [0.0332, 0.0199, 0.0603],
         [0.2936, 0.0626, 0.0309]]], grad_fn=<AddBackward0>)



Train Diffusion:  13%|█▎        | 631/5001 [1:32:23<9:42:29,  8.00s/it][A
Train Diffusion:  13%|█▎        | 632/5001 [1:32:35<11:09:14,  9.19s/it][A
Train Diffusion:  13%|█▎        | 633/5001 [1:32:43<10:55:17,  9.00s/it][A
Train Diffusion:  13%|█▎        | 634/5001 [1:32:54<11:38:45,  9.60s/it][A
Train Diffusion:  13%|█▎        | 635/5001 [1:33:02<10:54:33,  9.00s/it][A
Train Diffusion:  13%|█▎        | 636/5001 [1:33:10<10:26:30,  8.61s/it][A
Train Diffusion:  13%|█▎        | 637/5001 [1:33:18<10:10:17,  8.39s/it][A
Train Diffusion:  13%|█▎        | 638/5001 [1:33:27<10:42:29,  8.84s/it][A
Train Diffusion:  13%|█▎        | 639/5001 [1:33:36<10:37:57,  8.78s/it][A
Train Diffusion:  13%|█▎        | 640/5001 [1:33:45<10:39:06,  8.79s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 36723.5328125. Best ELBO loss value is: 33530.69140625.

C_PATH mean = tensor([[0.1343, 0.0545, 0.0704],
        [0.1373, 0.0550, 0.0714],
        [0.1363, 0.0549, 0.0711]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[0.7843, 0.0538, 0.0635],
         [0.0506, 0.0360, 0.0857],
         [0.0310, 0.0564, 0.0945],
         ...,
         [0.0177, 0.0332, 0.0506],
         [0.0330, 0.0180, 0.0452],
         [0.0318, 0.0214, 0.0201]],

        [[0.8421, 0.0694, 0.0871],
         [0.0888, 0.0596, 0.1196],
         [0.0497, 0.0715, 0.0775],
         ...,
         [0.0517, 0.0574, 0.0643],
         [0.1017, 0.0595, 0.0380],
         [0.2083, 0.0316, 0.1268]],

        [[0.4916, 0.0522, 0.0783],
         [0.0969, 0.0555, 0.0283],
         [0.5010, 0.0486, 0.0837],
         ...,
         [0.1739, 0.0570, 0.0565],
         [0.0932, 0.0531, 0.0511],
         [0.0652, 0.0406, 0.0539]]], grad_fn=<AddBackward0>)



Train Diffusion:  13%|█▎        | 641/5001 [1:33:56<11:34:48,  9.56s/it][A
Train Diffusion:  13%|█▎        | 642/5001 [1:34:20<16:34:09, 13.68s/it][A
Train Diffusion:  13%|█▎        | 643/5001 [1:34:30<15:22:19, 12.70s/it][A
Train Diffusion:  13%|█▎        | 644/5001 [1:34:39<14:06:34, 11.66s/it][A
Train Diffusion:  13%|█▎        | 645/5001 [1:34:47<12:45:54, 10.55s/it][A
Train Diffusion:  13%|█▎        | 646/5001 [1:34:55<11:42:09,  9.67s/it][A
Train Diffusion:  13%|█▎        | 647/5001 [1:35:05<11:50:49,  9.80s/it][A
Train Diffusion:  13%|█▎        | 648/5001 [1:35:16<12:27:48, 10.31s/it][A
Train Diffusion:  13%|█▎        | 649/5001 [1:35:28<12:57:44, 10.72s/it][A
Train Diffusion:  13%|█▎        | 650/5001 [1:35:37<12:24:17, 10.26s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 30208.291015625. Best ELBO loss value is: 28329.7109375.

C_PATH mean = tensor([[0.0775, 0.0586, 0.0627],
        [0.0780, 0.0586, 0.0633],
        [0.0772, 0.0584, 0.0628]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[0.4612, 0.0688, 0.0316],
         [0.0839, 0.0508, 0.0576],
         [0.0904, 0.0495, 0.0855],
         ...,
         [0.0286, 0.0320, 0.0463],
         [0.0773, 0.0567, 0.0387],
         [0.1844, 0.0556, 0.0800]],

        [[0.7721, 0.0329, 0.0998],
         [0.0318, 0.0585, 0.0416],
         [0.0467, 0.0579, 0.0368],
         ...,
         [0.0811, 0.0824, 0.0594],
         [0.1624, 0.0658, 0.0459],
         [0.0956, 0.0438, 0.0478]],

        [[0.8429, 0.0320, 0.0685],
         [0.0678, 0.0367, 0.0884],
         [0.0197, 0.0702, 0.0716],
         ...,
         [0.1227, 0.0803, 0.0757],
         [0.0370, 0.0598, 0.0644],
         [0.0425, 0.0433, 0.0164]]], grad_fn=<AddBackward0>)



Train Diffusion:  13%|█▎        | 651/5001 [1:35:47<12:17:52, 10.18s/it][A
Train Diffusion:  13%|█▎        | 652/5001 [1:35:59<12:47:14, 10.59s/it][A
Train Diffusion:  13%|█▎        | 653/5001 [1:36:11<13:27:45, 11.15s/it][A
Train Diffusion:  13%|█▎        | 654/5001 [1:36:22<13:15:25, 10.98s/it][A
Train Diffusion:  13%|█▎        | 655/5001 [1:36:32<12:56:29, 10.72s/it][A
Train Diffusion:  13%|█▎        | 656/5001 [1:36:42<12:31:41, 10.38s/it][A
Train Diffusion:  13%|█▎        | 657/5001 [1:36:50<11:58:20,  9.92s/it][A
Train Diffusion:  13%|█▎        | 658/5001 [1:37:00<11:44:39,  9.74s/it][A
Train Diffusion:  13%|█▎        | 659/5001 [1:37:11<12:14:10, 10.15s/it][A
Train Diffusion:  13%|█▎        | 660/5001 [1:37:20<11:57:36,  9.92s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 26614.293359375. Best ELBO loss value is: 24315.515625.

C_PATH mean = tensor([[0.0683, 0.0535, 0.0574],
        [0.0678, 0.0534, 0.0573],
        [0.0676, 0.0537, 0.0576]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[0.4556, 0.0345, 0.0601],
         [0.0388, 0.0385, 0.0387],
         [0.0284, 0.0350, 0.0718],
         ...,
         [0.0323, 0.0672, 0.0353],
         [0.1019, 0.0480, 0.0271],
         [0.0539, 0.0531, 0.0351]],

        [[0.6047, 0.0569, 0.0451],
         [0.0592, 0.0594, 0.0603],
         [0.0407, 0.0692, 0.0487],
         ...,
         [0.1177, 0.0608, 0.0599],
         [0.0838, 0.0810, 0.0570],
         [0.0720, 0.0303, 0.1662]],

        [[0.8579, 0.0571, 0.0705],
         [0.0892, 0.0582, 0.0693],
         [0.0824, 0.0533, 0.0709],
         ...,
         [0.0494, 0.0303, 0.0632],
         [0.0355, 0.0274, 0.0493],
         [0.0810, 0.0450, 0.0074]]], grad_fn=<AddBackward0>)



Train Diffusion:  13%|█▎        | 661/5001 [1:37:29<11:29:38,  9.53s/it][A
Train Diffusion:  13%|█▎        | 662/5001 [1:37:38<11:10:48,  9.28s/it][A
Train Diffusion:  13%|█▎        | 663/5001 [1:37:45<10:40:49,  8.86s/it][A
Train Diffusion:  13%|█▎        | 664/5001 [1:37:54<10:40:00,  8.85s/it][A
Train Diffusion:  13%|█▎        | 665/5001 [1:38:03<10:44:43,  8.92s/it][A
Train Diffusion:  13%|█▎        | 666/5001 [1:38:11<10:27:00,  8.68s/it][A
Train Diffusion:  13%|█▎        | 667/5001 [1:38:20<10:22:23,  8.62s/it][A
Train Diffusion:  13%|█▎        | 668/5001 [1:38:28<10:20:26,  8.59s/it][A
Train Diffusion:  13%|█▎        | 669/5001 [1:38:36<9:56:10,  8.26s/it] [A
Train Diffusion:  13%|█▎        | 670/5001 [1:38:44<9:41:31,  8.06s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 24701.2337890625. Best ELBO loss value is: 23524.86328125.

C_PATH mean = tensor([[0.0624, 0.0534, 0.0564],
        [0.0633, 0.0537, 0.0567],
        [0.0630, 0.0532, 0.0563]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[0.7903, 0.0575, 0.0422],
         [0.0811, 0.0507, 0.0734],
         [0.0265, 0.0577, 0.0521],
         ...,
         [0.0376, 0.0422, 0.0687],
         [0.0474, 0.0462, 0.0507],
         [0.1039, 0.0537, 0.0995]],

        [[0.3868, 0.0374, 0.0747],
         [0.0286, 0.0482, 0.0418],
         [0.0489, 0.0410, 0.0685],
         ...,
         [0.0508, 0.0815, 0.0329],
         [0.0815, 0.0626, 0.0497],
         [0.0384, 0.0469, 0.0087]],

        [[0.6480, 0.0387, 0.0789],
         [0.0443, 0.0639, 0.0476],
         [0.0830, 0.0474, 0.0543],
         ...,
         [0.1401, 0.0454, 0.0665],
         [0.0417, 0.0499, 0.0422],
         [0.0682, 0.0380, 0.1043]]], grad_fn=<AddBackward0>)



Train Diffusion:  13%|█▎        | 671/5001 [1:38:51<9:29:01,  7.88s/it][A
Train Diffusion:  13%|█▎        | 672/5001 [1:39:00<9:43:14,  8.08s/it][A
Train Diffusion:  13%|█▎        | 673/5001 [1:39:11<10:37:52,  8.84s/it][A


KeyboardInterrupt: 