In [2]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from tqdm import tqdm
import math

#Torch-related imports
import torch
import torch.distributions as D
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Function

#Model-specific imports
from SBM_SDE import *
from obs_and_flow import *
from training import calc_log_lik

In [18]:
torch.manual_seed(0)
devi = torch.device("".join(["cuda:",f'{cuda_id}']) if torch.cuda.is_available() else "cpu")

dt_flow = 0.2 #SDE discretization timestep.
t = 250 #Simulation run for T hours.
n = int(t / dt_flow) + 1
t_span = np.linspace(0, t, n)
t_span_tensor = torch.reshape(torch.Tensor(t_span), [1, n, 1]) #T_span needs to be converted to tensor object. Additionally, facilitates conversion of I_S and I_D to tensor objects.
niter = 2200
piter = 50
state_dim_SCON = 3 #Not including CO2 in STATE_DIM, because CO2 is an observation.
state_dim_SAWB = 4 #Not including CO2 in STATE_DIM, because CO2 is an observation.
pretrain_lr = 1e-2
train_lr = 1e-3
batch_size = 10 #Number of sets of observation outputs to sample per set of parameters.

In [19]:
temp_ref = 283
temp_rise = 5 #High estimate of 5 celsius temperature rise by 2100. 

#System parameters from deterministic CON model
u_M = 0.002
a_SD = 0.33
a_DS = 0.33
a_M = 0.33
a_MSC = 0.5
k_S_ref = 0.000025
k_D_ref = 0.005
k_M_ref = 0.0002
Ea_S = 75
Ea_D = 50
Ea_M = 50

#SCON diffusion matrix parameters
c_SOC = 1.
c_DOC = 0.01
c_MBC = 0.05

SCON_C_params_dict = {'u_M': u_M, 'a_SD': a_SD, 'a_DS': a_DS, 'a_M': a_M, 'a_MSC': a_MSC, 'k_S_ref': k_S_ref, 'k_D_ref': k_D_ref, 'k_M_ref': k_M_ref, 'Ea_S': Ea_S, 'Ea_D': Ea_D, 'Ea_M': Ea_M, 'c_SOC': c_SOC, 'c_DOC': c_DOC, 'c_MBC': c_MBC}

In [20]:
obs_error_scale = 0.1

x0_SCON = [37, 0.1, 0.9]
x0_SCON_tensor = torch.tensor(x0_SCON)
x0_prior_SCON = D.multivariate_normal.MultivariateNormal(x0_SCON_tensor,
                                                         scale_tril=torch.eye(state_dim_SCON) * obs_error_scale * x0_SCON_tensor)

In [21]:
obs_times, obs_means_CON, obs_error_CON = csv_to_obs_df('y_from_x_t_250_dt_0-01.csv', state_dim_SCON, t, obs_error_scale)

In [22]:
#Obtain temperature forcing function.
temp_tensor = temp_gen(t_span_tensor, temp_ref, temp_rise)

#Obtain SOC and DOC pool litter input vectors for use in flow SDE functions.
i_s_tensor = i_s(t_span_tensor) #Exogenous SOC input function
i_d_tensor = i_d(t_span_tensor) #Exogenous DOC input function

In [23]:
obs_model_CON_noCO2 = ObsModel(DEVICE = devi, TIMES = obs_times, DT = dt_flow, MU = obs_means_CON, SCALE = obs_error_CON)

In [24]:
def calc_log_lik(C_PATH, T_SPAN_TENSOR, DT, I_S_TENSOR, I_D_TENSOR, TEMP_TENSOR, TEMP_REF, DRIFT_DIFFUSION, X0_PRIOR, PARAMS_DICT):
    drift, diffusion_sqrt = DRIFT_DIFFUSION(C_PATH[:, :-1, :], T_SPAN_TENSOR[:, :-1, :], I_S_TENSOR[:, :-1, :], I_D_TENSOR[:, :-1, :], TEMP_TENSOR[:, :-1, :], TEMP_REF, PARAMS_DICT)
    euler_maruyama_state_sample_object = D.multivariate_normal.MultivariateNormal(loc = C_PATH[:, :-1, :] + drift * DT, scale_tril = diffusion_sqrt * math.sqrt(DT))
    
    # Compute log p(x|theta) = log p(x|x0, theta) + log p(x0|theta)
    ll = euler_maruyama_state_sample_object.log_prob(C_PATH[:, 1:, :]).sum(-1) # log p(x|x0, theta)
    ll += X0_PRIOR.log_prob(C_PATH[:, 0, :]) # log p(x0|theta)
    
    return ll # (batch_size, )

In [25]:
def train(DEVICE, PRETRAIN_LR, TRAIN_LR, NITER, PRETRAIN_ITER, BATCH_SIZE, OBS_MODEL,
          STATE_DIM, T, DT, N, T_SPAN_TENSOR, I_S_TENSOR, I_D_TENSOR, TEMP_TENSOR, TEMP_REF,
          DRIFT_DIFFUSION, X0_PRIOR, PARAMS_DICT,
          LEARN_PARAMS = False, LR_DECAY = 0.1, DECAY_STEP_SIZE = 1000, PRINT_EVERY = 500):
    net = SDEFlow(DEVICE, OBS_MODEL, STATE_DIM, T, DT, N, num_layers = 7).to(DEVICE)
    optimizer = optim.Adam(net.parameters(), lr = PRETRAIN_LR)
    
    if LEARN_PARAMS:
        theta_post = MeanField(PARAMS_DICT)
        theta_prior = D.normal.Normal(torch.zeros_like(theta_post.means),
                                      torch.ones_like(theta_post.std))
    if PRETRAIN_ITER >= NITER:
        raise Exception("PRETRAIN_ITER must be < NITER.")
    best_loss_norm = 1e10
    best_loss_ELBO = 1e10
    norm_losses = [] #[best_loss_norm] * 10 
    ELBO_losses = [] #[best_loss_ELBO] * 10
    #C0 = ANALYTICAL_STEADY_STATE_INIT(I_S_TENSOR[0, 0, 0].item(), I_D_TENSOR[0, 0, 0].item(), PARAMS_DICT) #Calculate deterministic initial conditions.
    #C0 = C0[(None,) * 2].repeat(BATCH_SIZE, 1, 1).to(DEVICE) #Assign initial conditions to C_PATH.
    
    with tqdm(total = NITER, desc = f'Train Diffusion', position = -1) as tq:
        for it in range(NITER):
            net.train()
            optimizer.zero_grad()
            C_PATH, log_prob = net(BATCH_SIZE) #Obtain paths with solutions at times after t0.
            #C_PATH = torch.cat([C0, C_PATH], 1) #Append deterministic CON initial conditions conditional on parameter values to C path. 
            
            if it < PRETRAIN_ITER:
                l1_norm_element = C_PATH - torch.mean(OBS_MODEL.mu[:3], -1)
                l1_norm = torch.sum(torch.abs(l1_norm_element)).mean()
                best_loss_norm = l1_norm if l1_norm < best_loss_norm else best_loss_norm
                norm_losses.append(l1_norm.item())
                #l2_norm_element = C_PATH - torch.mean(OBS_MODEL.mu, -1)
                #l2_norm = torch.sqrt(torch.sum(torch.square(l2_norm_element))).mean()
                #best_loss_norm = l2_norm if l2_norm < best_loss_norm else best_loss_norm
                #norm_losses.append(l2_norm.item())
                
                if (it + 1) % PRINT_EVERY == 0:
                    print(f"Moving average norm loss at {it + 1} iterations is: {sum(norm_losses[-10:]) / len(norm_losses[-10:])}. Best norm loss value is: {best_loss_norm}.")
                    print('\nC_PATH mean =', C_PATH.mean(-2))
                    print('\nC_PATH =', C_PATH)
                l1_norm.backward()
                #l2_norm.backward()
                
            else:
                if LEARN_PARAMS:
                    theta_dict, theta, log_q_theta = theta_post()
                    log_p_theta = theta_prior.log_prob(theta).sum(-1)
                else:
                    theta_dict = PARAMS_DICT
                    log_q_theta, log_p_theta = torch.zeros(2)
                log_lik = calc_log_lik(C_PATH, T_SPAN_TENSOR.to(DEVICE), DT, I_S_TENSOR.to(DEVICE), I_D_TENSOR.to(DEVICE),
                                       TEMP_TENSOR, TEMP_REF, DRIFT_DIFFUSION, X0_PRIOR, theta_dict)
                
                # - log p(theta) + log q(theta) + log q(x|theta) - log p(x|theta) - log p(y|x, theta)
                ELBO = -log_p_theta.mean() + log_q_theta.mean() - log_lik.mean() - OBS_MODEL(C_PATH, theta_dict) + log_prob.mean()
                best_loss_ELBO = ELBO if ELBO < best_loss_ELBO else best_loss_ELBO
                ELBO_losses.append(ELBO.item())

                if (it + 1) % PRINT_EVERY == 0:
                    print(f"Moving average ELBO loss at {it + 1} iterations is: {sum(ELBO_losses[-10:]) / len(ELBO_losses[-10:])}. Best ELBO loss value is: {best_loss_ELBO}.")
                    print('\nC_PATH mean =', C_PATH.mean(-2))
                    print('\n C_PATH =', C_PATH)
                    print(theta_dict)
                ELBO.backward()
                
            torch.nn.utils.clip_grad_norm_(net.parameters(), 3.0)
            if it == PRETRAIN_ITER:
                optimizer.param_groups[0]['lr'] = TRAIN_LR
            elif it % DECAY_STEP_SIZE == 0 and it > PRETRAIN_ITER:
                optimizer.param_groups[0]['lr'] *= LR_DECAY
            optimizer.step()
            tq.update()
            
    return net, ELBO_losses

In [26]:
net_batch_10_dt_flow_0_2, elbo_hist_batch_10_dt_flow_0_2 = train(devi, pretrain_lr, train_lr, niter, piter, batch_size, obs_model_CON_noCO2,
                       state_dim_SCON, t, dt_flow, n, t_span_tensor, i_s_tensor, i_d_tensor, temp_tensor, temp_ref,
                       drift_diffusion_SCON_C, x0_prior_SCON, SCON_C_params_dict,
                       LR_DECAY = 0.1, DECAY_STEP_SIZE = 5000, PRINT_EVERY = 20)


Train Diffusion:   0%|          | 0/2200 [00:00<?, ?it/s][A
Train Diffusion:   0%|          | 1/2200 [00:06<3:59:16,  6.53s/it][A
Train Diffusion:   0%|          | 2/2200 [00:12<3:56:07,  6.45s/it][A
Train Diffusion:   0%|          | 3/2200 [00:19<3:54:17,  6.40s/it][A
Train Diffusion:   0%|          | 4/2200 [00:25<3:54:24,  6.40s/it][A
Train Diffusion:   0%|          | 5/2200 [00:31<3:53:02,  6.37s/it][A
Train Diffusion:   0%|          | 6/2200 [00:38<3:52:37,  6.36s/it][A
Train Diffusion:   0%|          | 7/2200 [00:45<3:57:43,  6.50s/it][A
Train Diffusion:   0%|          | 8/2200 [00:52<4:03:11,  6.66s/it][A
Train Diffusion:   0%|          | 9/2200 [00:58<4:00:58,  6.60s/it][A
Train Diffusion:   0%|          | 10/2200 [01:04<3:57:56,  6.52s/it][A
Train Diffusion:   0%|          | 11/2200 [01:11<3:54:45,  6.43s/it][A
Train Diffusion:   1%|          | 12/2200 [01:17<3:55:58,  6.47s/it][A
Train Diffusion:   1%|          | 13/2200 [01:24<3:59:28,  6.57s/it][A
Train Diffu

Moving average norm loss at 20 iterations is: 381897.159375. Best norm loss value is: 379952.25.

C_PATH mean = tensor([[1.7509, 1.7300, 1.7481],
        [1.7526, 1.7299, 1.7384],
        [1.7506, 1.7317, 1.7354],
        [1.7515, 1.7240, 1.7422],
        [1.7556, 1.7319, 1.7437],
        [1.7487, 1.7273, 1.7426],
        [1.7550, 1.7302, 1.7380],
        [1.7465, 1.7366, 1.7461],
        [1.7515, 1.7347, 1.7477],
        [1.7488, 1.7269, 1.7395]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.2846, 2.0741, 1.7830],
         [1.5008, 1.5309, 1.5655],
         [1.5080, 1.4883, 1.4751],
         ...,
         [1.7999, 2.0723, 1.9963],
         [1.8099, 1.9339, 1.8996],
         [1.9000, 1.9868, 1.8119]],

        [[2.2386, 1.5710, 1.6057],
         [1.5871, 1.5022, 1.5390],
         [1.5120, 1.5168, 1.4998],
         ...,
         [2.1553, 2.1221, 1.9819],
         [1.6007, 1.8264, 2.1494],
         [1.8871, 1.9030, 2.1736]],

        [[1.2243, 1.5896, 1.5379],
         [1.5409, 1.5224,


Train Diffusion:   1%|          | 20/2200 [02:12<4:05:15,  6.75s/it][A
Train Diffusion:   1%|          | 21/2200 [02:19<4:04:05,  6.72s/it][A
Train Diffusion:   1%|          | 22/2200 [02:25<4:02:53,  6.69s/it][A
Train Diffusion:   1%|          | 23/2200 [02:32<4:01:22,  6.65s/it][A
Train Diffusion:   1%|          | 24/2200 [02:38<3:59:55,  6.62s/it][A
Train Diffusion:   1%|          | 25/2200 [02:45<3:59:14,  6.60s/it][A
Train Diffusion:   1%|          | 26/2200 [02:51<3:58:07,  6.57s/it][A
Train Diffusion:   1%|          | 27/2200 [02:58<3:55:48,  6.51s/it][A
Train Diffusion:   1%|▏         | 28/2200 [03:04<3:54:20,  6.47s/it][A
Train Diffusion:   1%|▏         | 29/2200 [03:11<3:55:01,  6.50s/it][A
Train Diffusion:   1%|▏         | 30/2200 [03:17<3:57:55,  6.58s/it][A
Train Diffusion:   1%|▏         | 31/2200 [03:24<3:54:16,  6.48s/it][A
Train Diffusion:   1%|▏         | 32/2200 [03:30<3:51:13,  6.40s/it][A
Train Diffusion:   2%|▏         | 33/2200 [03:36<3:49:15,  6.35

Moving average norm loss at 40 iterations is: 367635.809375. Best norm loss value is: 335138.8125.

C_PATH mean = tensor([[6.3320, 1.3909, 2.1964],
        [6.4895, 1.2605, 2.1353],
        [6.6678, 1.3695, 2.1205],
        [6.6565, 1.3231, 2.1691],
        [6.7758, 1.1778, 2.1829],
        [6.6315, 1.3833, 2.2206],
        [6.1893, 1.2581, 2.1604],
        [6.6728, 1.2616, 2.1515],
        [6.3876, 1.2707, 2.1179],
        [6.4209, 1.2913, 2.2077]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[3.4397e+00, 9.6461e-01, 7.9647e-01],
         [1.3439e+01, 3.0523e-01, 1.1216e+01],
         [2.7091e+00, 8.2507e-01, 1.2765e+00],
         ...,
         [6.6265e-01, 8.3804e-01, 1.0045e+00],
         [9.9052e-01, 5.4575e+00, 2.9963e-01],
         [3.0741e-01, 1.8066e+00, 3.4728e+00]],

        [[4.1695e+00, 5.6732e-01, 9.8999e-01],
         [2.8062e+01, 1.3180e+00, 6.7675e-01],
         [1.6098e+00, 8.3818e-01, 9.3220e-01],
         ...,
         [1.6359e+01, 2.3367e-01, 9.1332e-01],
         [


Train Diffusion:   2%|▏         | 40/2200 [04:19<3:40:09,  6.12s/it][A
Train Diffusion:   2%|▏         | 41/2200 [04:25<3:39:06,  6.09s/it][A
Train Diffusion:   2%|▏         | 42/2200 [04:31<3:39:10,  6.09s/it][A
Train Diffusion:   2%|▏         | 43/2200 [04:38<3:42:33,  6.19s/it][A
Train Diffusion:   2%|▏         | 44/2200 [04:44<3:43:49,  6.23s/it][A
Train Diffusion:   2%|▏         | 45/2200 [04:50<3:44:59,  6.26s/it][A
Train Diffusion:   2%|▏         | 46/2200 [04:57<3:44:44,  6.26s/it][A
Train Diffusion:   2%|▏         | 47/2200 [05:03<3:46:25,  6.31s/it][A
Train Diffusion:   2%|▏         | 48/2200 [05:09<3:46:22,  6.31s/it][A
Train Diffusion:   2%|▏         | 49/2200 [05:16<3:46:57,  6.33s/it][A
Train Diffusion:   2%|▏         | 50/2200 [05:22<3:47:30,  6.35s/it][A
Train Diffusion:   2%|▏         | 51/2200 [05:28<3:48:11,  6.37s/it][A
Train Diffusion:   2%|▏         | 52/2200 [05:35<3:51:07,  6.46s/it][A
Train Diffusion:   2%|▏         | 53/2200 [05:42<3:52:25,  6.50

Moving average ELBO loss at 60 iterations is: 2606665.05. Best ELBO loss value is: 2282392.25.

C_PATH mean = tensor([[22.6426,  1.1864,  2.5462],
        [23.4201,  1.1508,  2.5246],
        [23.2106,  1.1936,  2.5673],
        [23.3048,  1.1535,  2.5065],
        [21.6320,  1.1794,  2.4487],
        [23.8005,  1.1031,  2.4560],
        [22.5662,  1.1951,  2.5730],
        [22.9441,  1.1487,  2.5584],
        [22.3688,  1.1881,  2.4977],
        [23.3800,  1.1716,  2.5407]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.8463e+00, 9.1406e+00, 2.9260e+00],
         [1.5318e+01, 5.2423e+00, 2.1511e+00],
         [2.4183e+01, 6.1135e-01, 2.5431e+00],
         ...,
         [7.3541e+00, 3.4934e+00, 9.1423e+00],
         [1.0290e+01, 7.8771e+00, 3.2930e+00],
         [1.6590e+01, 8.1686e+00, 2.1880e+00]],

        [[6.8091e-02, 4.4374e-02, 7.4708e-02],
         [1.1966e-01, 3.9059e-01, 2.1179e-01],
         [2.3240e-06, 9.3089e-02, 1.1356e+00],
         ...,
         [2.0165e+01, 1.4422e+


Train Diffusion:   3%|▎         | 60/2200 [06:27<3:49:58,  6.45s/it][A
Train Diffusion:   3%|▎         | 61/2200 [06:33<3:49:08,  6.43s/it][A
Train Diffusion:   3%|▎         | 62/2200 [06:40<3:47:45,  6.39s/it][A
Train Diffusion:   3%|▎         | 63/2200 [06:46<3:45:32,  6.33s/it][A
Train Diffusion:   3%|▎         | 64/2200 [06:52<3:43:56,  6.29s/it][A
Train Diffusion:   3%|▎         | 65/2200 [06:58<3:41:26,  6.22s/it][A
Train Diffusion:   3%|▎         | 66/2200 [07:04<3:39:51,  6.18s/it][A
Train Diffusion:   3%|▎         | 67/2200 [07:10<3:39:16,  6.17s/it][A
Train Diffusion:   3%|▎         | 68/2200 [07:16<3:39:00,  6.16s/it][A
Train Diffusion:   3%|▎         | 69/2200 [07:23<3:38:09,  6.14s/it][A
Train Diffusion:   3%|▎         | 70/2200 [07:29<3:36:42,  6.10s/it][A
Train Diffusion:   3%|▎         | 71/2200 [07:35<3:36:01,  6.09s/it][A
Train Diffusion:   3%|▎         | 72/2200 [07:41<3:35:27,  6.07s/it][A
Train Diffusion:   3%|▎         | 73/2200 [07:47<3:36:40,  6.11

Moving average ELBO loss at 80 iterations is: 928298.1875. Best ELBO loss value is: 748321.3125.

C_PATH mean = tensor([[21.2103,  0.8375,  2.9007],
        [20.2048,  0.8316,  2.8085],
        [22.6897,  0.8363,  2.9731],
        [21.0890,  0.8393,  2.8524],
        [21.6750,  0.8628,  2.9157],
        [20.8740,  0.8024,  2.8387],
        [22.1510,  0.8241,  2.9529],
        [20.8311,  0.8214,  2.8638],
        [20.4057,  0.8469,  2.8482],
        [19.9589,  0.8330,  2.7883]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[5.4478e+00, 1.2825e+00, 5.2964e+00],
         [5.2964e+01, 4.3627e-01, 1.2959e+00],
         [1.7343e+01, 9.6390e-01, 4.4344e+00],
         ...,
         [1.0000e-06, 4.4255e-02, 5.4610e-01],
         [1.0004e-06, 5.0964e-02, 2.2593e-01],
         [3.9436e-04, 5.7593e-02, 6.3467e-01]],

        [[1.2740e+00, 4.8670e+00, 3.1807e+00],
         [4.3762e+00, 4.4929e+00, 2.7203e+00],
         [8.7950e+00, 4.8588e-02, 2.2728e+00],
         ...,
         [2.1786e+01, 9.2181


Train Diffusion:   4%|▎         | 80/2200 [08:30<3:42:50,  6.31s/it][A
Train Diffusion:   4%|▎         | 81/2200 [08:37<3:44:31,  6.36s/it][A
Train Diffusion:   4%|▎         | 82/2200 [08:43<3:45:29,  6.39s/it][A
Train Diffusion:   4%|▍         | 83/2200 [08:50<3:47:41,  6.45s/it][A
Train Diffusion:   4%|▍         | 84/2200 [08:56<3:47:13,  6.44s/it][A
Train Diffusion:   4%|▍         | 85/2200 [09:03<3:46:18,  6.42s/it][A
Train Diffusion:   4%|▍         | 86/2200 [09:09<3:45:24,  6.40s/it][A
Train Diffusion:   4%|▍         | 87/2200 [09:16<3:46:51,  6.44s/it][A
Train Diffusion:   4%|▍         | 88/2200 [09:22<3:47:57,  6.48s/it][A
Train Diffusion:   4%|▍         | 89/2200 [09:29<3:47:27,  6.46s/it][A
Train Diffusion:   4%|▍         | 90/2200 [09:35<3:48:26,  6.50s/it][A
Train Diffusion:   4%|▍         | 91/2200 [09:42<3:49:21,  6.53s/it][A
Train Diffusion:   4%|▍         | 92/2200 [09:48<3:49:36,  6.54s/it][A
Train Diffusion:   4%|▍         | 93/2200 [09:55<3:49:37,  6.54

Moving average ELBO loss at 100 iterations is: 405725.234375. Best ELBO loss value is: 333198.5625.

C_PATH mean = tensor([[18.6972,  0.4762,  2.1785],
        [18.5618,  0.4760,  2.1723],
        [17.5926,  0.4747,  2.1488],
        [18.6742,  0.4632,  2.1952],
        [18.3430,  0.4776,  2.1812],
        [18.6263,  0.4724,  2.1910],
        [18.9116,  0.4658,  2.1785],
        [18.4229,  0.4839,  2.1785],
        [18.4132,  0.4690,  2.1550],
        [17.3069,  0.4755,  2.1323]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[3.0675e-01, 6.6739e-01, 6.3197e-01],
         [5.8331e-01, 4.8807e-01, 8.6350e-01],
         [1.5544e-03, 1.2400e-01, 6.7375e-01],
         ...,
         [2.0785e-06, 1.4646e-01, 1.3686e+00],
         [1.4813e-05, 4.2149e-01, 5.8110e-01],
         [6.4378e+00, 8.6389e-01, 1.7404e+00]],

        [[2.6421e+00, 9.4153e-01, 1.7439e+00],
         [2.9435e+01, 7.3762e-01, 1.2465e+00],
         [2.1307e+01, 5.8162e-01, 3.0894e+00],
         ...,
         [8.8115e+00, 1.4


Train Diffusion:   5%|▍         | 100/2200 [10:41<3:49:04,  6.55s/it][A
Train Diffusion:   5%|▍         | 101/2200 [10:48<3:50:04,  6.58s/it][A
Train Diffusion:   5%|▍         | 102/2200 [10:54<3:45:37,  6.45s/it][A
Train Diffusion:   5%|▍         | 103/2200 [11:00<3:41:20,  6.33s/it][A
Train Diffusion:   5%|▍         | 104/2200 [11:06<3:38:21,  6.25s/it][A
Train Diffusion:   5%|▍         | 105/2200 [11:12<3:36:44,  6.21s/it][A
Train Diffusion:   5%|▍         | 106/2200 [11:18<3:34:57,  6.16s/it][A
Train Diffusion:   5%|▍         | 107/2200 [11:24<3:34:48,  6.16s/it][A
Train Diffusion:   5%|▍         | 108/2200 [11:30<3:33:46,  6.13s/it][A
Train Diffusion:   5%|▍         | 109/2200 [11:36<3:33:25,  6.12s/it][A
Train Diffusion:   5%|▌         | 110/2200 [11:42<3:32:41,  6.11s/it][A
Train Diffusion:   5%|▌         | 111/2200 [11:48<3:32:20,  6.10s/it][A
Train Diffusion:   5%|▌         | 112/2200 [11:55<3:33:02,  6.12s/it][A
Train Diffusion:   5%|▌         | 113/2200 [12:01<

Moving average ELBO loss at 120 iterations is: 172039.865625. Best ELBO loss value is: 136320.28125.

C_PATH mean = tensor([[14.4075,  0.2932,  1.6013],
        [13.6704,  0.2939,  1.5801],
        [13.2257,  0.2980,  1.5763],
        [14.1569,  0.2958,  1.6055],
        [13.9968,  0.2950,  1.5881],
        [13.9633,  0.2948,  1.5753],
        [14.1481,  0.2936,  1.5851],
        [13.6200,  0.2979,  1.5553],
        [14.2691,  0.2991,  1.5740],
        [14.0236,  0.2918,  1.5949]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[2.6841e+00, 6.6328e-01, 1.2276e+00],
         [1.8881e+01, 4.1030e-01, 1.2105e+00],
         [1.1803e+01, 3.4389e-01, 1.5251e+00],
         ...,
         [1.5238e+01, 3.1235e-01, 1.2744e+00],
         [2.1098e+01, 5.0574e-01, 1.5419e+00],
         [1.2094e+01, 6.3968e-01, 1.2775e+00]],

        [[2.5404e+00, 5.1238e-01, 1.4042e+00],
         [1.8354e+01, 7.1051e-02, 2.5924e+00],
         [2.9568e-05, 4.3158e-02, 1.0565e+00],
         ...,
         [1.5104e+01, 3.


Train Diffusion:   5%|▌         | 120/2200 [12:45<3:37:57,  6.29s/it][A
Train Diffusion:   6%|▌         | 121/2200 [12:52<3:37:36,  6.28s/it][A
Train Diffusion:   6%|▌         | 122/2200 [12:58<3:40:02,  6.35s/it][A
Train Diffusion:   6%|▌         | 123/2200 [13:04<3:40:21,  6.37s/it][A
Train Diffusion:   6%|▌         | 124/2200 [13:11<3:39:19,  6.34s/it][A
Train Diffusion:   6%|▌         | 125/2200 [13:17<3:39:49,  6.36s/it][A
Train Diffusion:   6%|▌         | 126/2200 [13:23<3:39:38,  6.35s/it][A
Train Diffusion:   6%|▌         | 127/2200 [13:30<3:40:09,  6.37s/it][A
Train Diffusion:   6%|▌         | 128/2200 [13:36<3:40:06,  6.37s/it][A
Train Diffusion:   6%|▌         | 129/2200 [13:42<3:38:14,  6.32s/it][A
Train Diffusion:   6%|▌         | 130/2200 [13:49<3:37:35,  6.31s/it][A
Train Diffusion:   6%|▌         | 131/2200 [13:55<3:37:57,  6.32s/it][A
Train Diffusion:   6%|▌         | 132/2200 [14:01<3:37:28,  6.31s/it][A
Train Diffusion:   6%|▌         | 133/2200 [14:08<

Moving average ELBO loss at 140 iterations is: 70941.705859375. Best ELBO loss value is: 55693.265625.

C_PATH mean = tensor([[8.4909, 0.2322, 1.3842],
        [8.6126, 0.2315, 1.4038],
        [8.3128, 0.2280, 1.3805],
        [8.8312, 0.2279, 1.4136],
        [8.5789, 0.2304, 1.3900],
        [8.1386, 0.2244, 1.3720],
        [8.6707, 0.2312, 1.3992],
        [8.4485, 0.2277, 1.3836],
        [8.8636, 0.2287, 1.3971],
        [8.8264, 0.2331, 1.3987]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[3.3116e-01, 1.9034e-01, 6.6194e-01],
         [5.2791e-01, 3.3287e-01, 4.9025e-01],
         [1.1203e-02, 7.0627e-02, 8.1880e-01],
         ...,
         [7.1393e+00, 3.1421e-01, 1.0703e+00],
         [1.3220e+01, 2.4065e-01, 1.3063e+00],
         [1.0107e+01, 2.2872e-01, 1.1768e+00]],

        [[1.3111e+00, 5.5984e-01, 7.0233e-01],
         [5.2763e+00, 3.3115e-01, 1.2790e+00],
         [8.6717e+00, 2.4704e-01, 1.7831e+00],
         ...,
         [1.5176e-03, 2.2903e-02, 1.1611e+00],
     


Train Diffusion:   6%|▋         | 140/2200 [14:53<3:39:17,  6.39s/it][A
Train Diffusion:   6%|▋         | 141/2200 [14:59<3:40:05,  6.41s/it][A
Train Diffusion:   6%|▋         | 142/2200 [15:06<3:38:40,  6.38s/it][A
Train Diffusion:   6%|▋         | 143/2200 [15:12<3:39:12,  6.39s/it][A
Train Diffusion:   7%|▋         | 144/2200 [15:18<3:39:33,  6.41s/it][A
Train Diffusion:   7%|▋         | 145/2200 [15:25<3:37:57,  6.36s/it][A
Train Diffusion:   7%|▋         | 146/2200 [15:31<3:40:38,  6.45s/it][A
Train Diffusion:   7%|▋         | 147/2200 [15:38<3:38:51,  6.40s/it][A
Train Diffusion:   7%|▋         | 148/2200 [15:44<3:35:59,  6.32s/it][A
Train Diffusion:   7%|▋         | 149/2200 [15:50<3:35:21,  6.30s/it][A
Train Diffusion:   7%|▋         | 150/2200 [15:56<3:34:35,  6.28s/it][A
Train Diffusion:   7%|▋         | 151/2200 [16:03<3:35:41,  6.32s/it][A
Train Diffusion:   7%|▋         | 152/2200 [16:09<3:35:23,  6.31s/it][A
Train Diffusion:   7%|▋         | 153/2200 [16:15<

Moving average ELBO loss at 160 iterations is: 34251.690234375. Best ELBO loss value is: 30379.92578125.

C_PATH mean = tensor([[5.1418, 0.2102, 0.9719],
        [5.0909, 0.2075, 0.9695],
        [5.3787, 0.2130, 0.9810],
        [5.3287, 0.2115, 0.9842],
        [5.1445, 0.2133, 0.9718],
        [5.4253, 0.2088, 0.9814],
        [5.5345, 0.2101, 0.9930],
        [5.1136, 0.2079, 0.9577],
        [5.3332, 0.2130, 0.9813],
        [5.4441, 0.2150, 0.9839]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[1.0238, 0.3142, 0.4290],
         [2.8941, 0.2354, 0.7835],
         [4.5574, 0.2218, 1.0607],
         ...,
         [6.4011, 0.1711, 0.9319],
         [8.1258, 0.2766, 1.0149],
         [5.2904, 0.2338, 0.8515]],

        [[1.3311, 0.2426, 0.8799],
         [6.0681, 0.2547, 1.0076],
         [5.1258, 0.2129, 1.0753],
         ...,
         [7.8735, 0.2144, 0.8883],
         [8.3336, 0.2644, 1.0127],
         [5.4470, 0.2291, 0.8715]],

        [[1.5260, 0.1686, 1.2227],
         [7.5057


Train Diffusion:   7%|▋         | 160/2200 [16:59<3:29:46,  6.17s/it][A
Train Diffusion:   7%|▋         | 161/2200 [17:05<3:29:24,  6.16s/it][A
Train Diffusion:   7%|▋         | 162/2200 [17:11<3:30:03,  6.18s/it][A
Train Diffusion:   7%|▋         | 163/2200 [17:17<3:29:59,  6.19s/it][A
Train Diffusion:   7%|▋         | 164/2200 [17:24<3:29:13,  6.17s/it][A
Train Diffusion:   8%|▊         | 165/2200 [17:30<3:30:32,  6.21s/it][A
Train Diffusion:   8%|▊         | 166/2200 [17:36<3:30:38,  6.21s/it][A
Train Diffusion:   8%|▊         | 167/2200 [17:43<3:33:16,  6.29s/it][A
Train Diffusion:   8%|▊         | 168/2200 [17:49<3:33:11,  6.30s/it][A
Train Diffusion:   8%|▊         | 169/2200 [17:55<3:33:08,  6.30s/it][A
Train Diffusion:   8%|▊         | 170/2200 [18:01<3:33:34,  6.31s/it][A
Train Diffusion:   8%|▊         | 171/2200 [18:08<3:32:48,  6.29s/it][A
Train Diffusion:   8%|▊         | 172/2200 [18:14<3:34:14,  6.34s/it][A
Train Diffusion:   8%|▊         | 173/2200 [18:20<

Moving average ELBO loss at 180 iterations is: 17824.825. Best ELBO loss value is: 14595.390625.

C_PATH mean = tensor([[2.0992, 0.3612, 0.7676],
        [2.1479, 0.3660, 0.7708],
        [2.1098, 0.3688, 0.7751],
        [2.2537, 0.3650, 0.7865],
        [2.2998, 0.3709, 0.7881],
        [2.2345, 0.3632, 0.7818],
        [2.3006, 0.3682, 0.7896],
        [2.2473, 0.3676, 0.7835],
        [2.1525, 0.3656, 0.7804],
        [2.2362, 0.3651, 0.7869]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[1.0881, 0.2544, 0.7317],
         [3.1555, 0.4074, 0.8248],
         [3.1761, 0.3866, 0.9660],
         ...,
         [3.0496, 0.3397, 0.7034],
         [3.2536, 0.4199, 0.8501],
         [2.2399, 0.4072, 0.7562]],

        [[1.7745, 0.1575, 0.5346],
         [5.1732, 0.2626, 0.6926],
         [1.0496, 0.3909, 0.7171],
         ...,
         [0.5635, 0.4297, 0.6711],
         [0.6870, 0.6099, 0.5840],
         [0.8963, 0.5078, 0.7171]],

        [[0.5612, 0.2943, 0.5154],
         [0.7939, 0.3764


Train Diffusion:   8%|▊         | 180/2200 [19:04<3:30:13,  6.24s/it][A
Train Diffusion:   8%|▊         | 181/2200 [19:11<3:30:02,  6.24s/it][A
Train Diffusion:   8%|▊         | 182/2200 [19:17<3:30:11,  6.25s/it][A
Train Diffusion:   8%|▊         | 183/2200 [19:23<3:30:34,  6.26s/it][A
Train Diffusion:   8%|▊         | 184/2200 [19:30<3:31:58,  6.31s/it][A
Train Diffusion:   8%|▊         | 185/2200 [19:36<3:34:19,  6.38s/it][A
Train Diffusion:   8%|▊         | 186/2200 [19:43<3:36:10,  6.44s/it][A
Train Diffusion:   8%|▊         | 187/2200 [19:49<3:37:51,  6.49s/it][A
Train Diffusion:   9%|▊         | 188/2200 [19:56<3:40:43,  6.58s/it][A
Train Diffusion:   9%|▊         | 189/2200 [20:03<3:40:05,  6.57s/it][A
Train Diffusion:   9%|▊         | 190/2200 [20:09<3:40:02,  6.57s/it][A
Train Diffusion:   9%|▊         | 191/2200 [20:16<3:39:53,  6.57s/it][A
Train Diffusion:   9%|▊         | 192/2200 [20:22<3:40:13,  6.58s/it][A
Train Diffusion:   9%|▉         | 193/2200 [20:29<

Moving average ELBO loss at 200 iterations is: 9777.80048828125. Best ELBO loss value is: 9176.6689453125.

C_PATH mean = tensor([[0.9540, 0.5587, 0.6476],
        [0.9545, 0.5593, 0.6450],
        [0.9304, 0.5601, 0.6428],
        [0.9405, 0.5610, 0.6459],
        [0.9206, 0.5609, 0.6412],
        [0.9195, 0.5583, 0.6422],
        [0.9576, 0.5584, 0.6463],
        [0.9562, 0.5589, 0.6447],
        [0.9603, 0.5607, 0.6479],
        [0.9183, 0.5583, 0.6419]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[0.8728, 0.1689, 0.2078],
         [0.8862, 0.2977, 0.4464],
         [0.7646, 0.3451, 0.3966],
         ...,
         [1.5064, 0.7729, 0.6858],
         [1.4750, 0.7302, 0.7175],
         [0.9299, 0.6009, 0.8845]],

        [[0.5603, 0.1810, 0.2675],
         [0.3859, 0.2976, 0.3765],
         [0.9110, 0.3296, 0.3789],
         ...,
         [1.2094, 0.6355, 0.7421],
         [0.9719, 0.6798, 0.6903],
         [0.8844, 0.6722, 0.7335]],

        [[0.8236, 0.1702, 0.2782],
         [0.67


Train Diffusion:   9%|▉         | 200/2200 [21:15<3:39:15,  6.58s/it][A
Train Diffusion:   9%|▉         | 201/2200 [21:22<3:38:27,  6.56s/it][A
Train Diffusion:   9%|▉         | 202/2200 [21:28<3:35:45,  6.48s/it][A
Train Diffusion:   9%|▉         | 203/2200 [21:34<3:33:30,  6.41s/it][A
Train Diffusion:   9%|▉         | 204/2200 [21:40<3:30:59,  6.34s/it][A
Train Diffusion:   9%|▉         | 205/2200 [21:47<3:29:52,  6.31s/it][A
Train Diffusion:   9%|▉         | 206/2200 [21:53<3:29:49,  6.31s/it][A
Train Diffusion:   9%|▉         | 207/2200 [21:59<3:29:09,  6.30s/it][A
Train Diffusion:   9%|▉         | 208/2200 [22:05<3:29:05,  6.30s/it][A
Train Diffusion:  10%|▉         | 209/2200 [22:12<3:28:07,  6.27s/it][A
Train Diffusion:  10%|▉         | 210/2200 [22:18<3:28:18,  6.28s/it][A
Train Diffusion:  10%|▉         | 211/2200 [22:24<3:29:21,  6.32s/it][A
Train Diffusion:  10%|▉         | 212/2200 [22:31<3:30:02,  6.34s/it][A
Train Diffusion:  10%|▉         | 213/2200 [22:37<

Moving average ELBO loss at 220 iterations is: 7722.9373046875. Best ELBO loss value is: 7489.78515625.

C_PATH mean = tensor([[0.7448, 0.5621, 0.6369],
        [0.7710, 0.5656, 0.6383],
        [0.7622, 0.5704, 0.6389],
        [0.7880, 0.5681, 0.6467],
        [0.7613, 0.5628, 0.6375],
        [0.7744, 0.5677, 0.6446],
        [0.8080, 0.5696, 0.6501],
        [0.7881, 0.5688, 0.6481],
        [0.7756, 0.5676, 0.6443],
        [0.7690, 0.5672, 0.6406]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[1.5491, 0.1426, 0.3107],
         [0.2880, 0.2116, 0.2010],
         [0.1326, 0.4347, 0.1405],
         ...,
         [0.6885, 0.6555, 0.5759],
         [0.5292, 0.7447, 0.7011],
         [0.6946, 0.6974, 0.7879]],

        [[0.9452, 0.1740, 0.2369],
         [0.1953, 0.2239, 0.2602],
         [0.1255, 0.1315, 0.1703],
         ...,
         [1.9686, 0.6765, 0.7435],
         [1.3828, 0.6842, 1.1796],
         [0.9500, 0.7051, 0.9842]],

        [[0.3059, 0.0920, 0.0638],
         [0.1742,


Train Diffusion:  10%|█         | 220/2200 [23:21<3:30:01,  6.36s/it][A
Train Diffusion:  10%|█         | 221/2200 [23:28<3:29:16,  6.34s/it][A
Train Diffusion:  10%|█         | 222/2200 [23:34<3:30:32,  6.39s/it][A
Train Diffusion:  10%|█         | 223/2200 [23:41<3:30:43,  6.40s/it][A
Train Diffusion:  10%|█         | 224/2200 [23:47<3:28:33,  6.33s/it][A
Train Diffusion:  10%|█         | 225/2200 [23:53<3:28:52,  6.35s/it][A
Train Diffusion:  10%|█         | 226/2200 [23:59<3:28:05,  6.32s/it][A
Train Diffusion:  10%|█         | 227/2200 [24:06<3:26:48,  6.29s/it][A
Train Diffusion:  10%|█         | 228/2200 [24:12<3:29:12,  6.37s/it][A
Train Diffusion:  10%|█         | 229/2200 [24:18<3:27:46,  6.33s/it][A
Train Diffusion:  10%|█         | 230/2200 [24:25<3:29:43,  6.39s/it][A
Train Diffusion:  10%|█         | 231/2200 [24:31<3:29:15,  6.38s/it][A
Train Diffusion:  11%|█         | 232/2200 [24:38<3:31:09,  6.44s/it][A
Train Diffusion:  11%|█         | 233/2200 [24:44<

Moving average ELBO loss at 240 iterations is: 6912.746533203125. Best ELBO loss value is: 6796.60693359375.

C_PATH mean = tensor([[0.7607, 0.6113, 0.6622],
        [0.7748, 0.6157, 0.6686],
        [0.7559, 0.6125, 0.6636],
        [0.7821, 0.6117, 0.6647],
        [0.7889, 0.6096, 0.6638],
        [0.7527, 0.6115, 0.6616],
        [0.7659, 0.6142, 0.6598],
        [0.7960, 0.6143, 0.6680],
        [0.7542, 0.6161, 0.6577],
        [0.7806, 0.6156, 0.6609]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[0.6955, 0.1358, 0.1902],
         [0.3328, 0.1758, 0.2241],
         [0.2257, 0.1705, 0.3760],
         ...,
         [1.2476, 0.7396, 0.7866],
         [0.8702, 0.8026, 0.6960],
         [0.8322, 0.7039, 0.8041]],

        [[0.5001, 0.1447, 0.1363],
         [0.1278, 0.1694, 0.2021],
         [0.1775, 0.2689, 0.3177],
         ...,
         [0.7698, 0.7235, 0.7325],
         [0.8993, 0.7447, 0.8497],
         [0.9793, 0.7912, 1.0751]],

        [[0.6754, 0.1067, 0.1707],
         [0.


Train Diffusion:  11%|█         | 240/2200 [25:28<3:25:51,  6.30s/it][A
Train Diffusion:  11%|█         | 241/2200 [25:35<3:27:08,  6.34s/it][A
Train Diffusion:  11%|█         | 242/2200 [25:41<3:28:01,  6.37s/it][A
Train Diffusion:  11%|█         | 243/2200 [25:48<3:29:34,  6.43s/it][A
Train Diffusion:  11%|█         | 244/2200 [25:54<3:27:56,  6.38s/it][A
Train Diffusion:  11%|█         | 245/2200 [26:00<3:26:22,  6.33s/it][A
Train Diffusion:  11%|█         | 246/2200 [26:06<3:26:01,  6.33s/it][A
Train Diffusion:  11%|█         | 247/2200 [26:13<3:27:30,  6.37s/it][A
Train Diffusion:  11%|█▏        | 248/2200 [26:19<3:27:06,  6.37s/it][A
Train Diffusion:  11%|█▏        | 249/2200 [26:26<3:29:27,  6.44s/it][A
Train Diffusion:  11%|█▏        | 250/2200 [26:32<3:31:19,  6.50s/it][A
Train Diffusion:  11%|█▏        | 251/2200 [26:39<3:31:08,  6.50s/it][A
Train Diffusion:  11%|█▏        | 252/2200 [26:45<3:29:08,  6.44s/it][A
Train Diffusion:  12%|█▏        | 253/2200 [26:52<

Moving average ELBO loss at 260 iterations is: 6627.6359375. Best ELBO loss value is: 6560.84814453125.

C_PATH mean = tensor([[0.7748, 0.6348, 0.6814],
        [0.8016, 0.6319, 0.6827],
        [0.8116, 0.6327, 0.6836],
        [0.7953, 0.6343, 0.6855],
        [0.7842, 0.6377, 0.6814],
        [0.7937, 0.6349, 0.6839],
        [0.7920, 0.6376, 0.6853],
        [0.7632, 0.6335, 0.6802],
        [0.7854, 0.6354, 0.6843],
        [0.7839, 0.6329, 0.6830]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[1.1174, 0.1476, 0.1131],
         [0.4180, 0.1844, 0.1547],
         [0.1502, 0.1272, 0.1829],
         ...,
         [0.9842, 0.7586, 0.6683],
         [1.0817, 0.7369, 0.6921],
         [0.5693, 0.6629, 0.7416]],

        [[0.2278, 0.1505, 0.0737],
         [0.0858, 0.1873, 0.1102],
         [0.2475, 0.1200, 0.0882],
         ...,
         [0.9018, 0.7324, 0.7657],
         [0.5877, 0.7910, 0.6321],
         [0.6023, 0.8826, 0.8222]],

        [[1.0084, 0.1345, 0.1495],
         [0.5000,


Train Diffusion:  12%|█▏        | 260/2200 [27:35<3:22:31,  6.26s/it][A
Train Diffusion:  12%|█▏        | 261/2200 [27:43<3:30:53,  6.53s/it][A
Train Diffusion:  12%|█▏        | 262/2200 [27:49<3:28:06,  6.44s/it][A
Train Diffusion:  12%|█▏        | 263/2200 [27:55<3:26:23,  6.39s/it][A
Train Diffusion:  12%|█▏        | 264/2200 [28:01<3:25:34,  6.37s/it][A
Train Diffusion:  12%|█▏        | 265/2200 [28:08<3:23:43,  6.32s/it][A
Train Diffusion:  12%|█▏        | 266/2200 [28:14<3:22:30,  6.28s/it][A
Train Diffusion:  12%|█▏        | 267/2200 [28:20<3:22:17,  6.28s/it][A
Train Diffusion:  12%|█▏        | 268/2200 [28:26<3:21:23,  6.25s/it][A
Train Diffusion:  12%|█▏        | 269/2200 [28:33<3:24:12,  6.35s/it][A
Train Diffusion:  12%|█▏        | 270/2200 [28:39<3:24:43,  6.36s/it][A
Train Diffusion:  12%|█▏        | 271/2200 [28:45<3:23:46,  6.34s/it][A
Train Diffusion:  12%|█▏        | 272/2200 [28:52<3:23:27,  6.33s/it][A
Train Diffusion:  12%|█▏        | 273/2200 [28:58<

Moving average ELBO loss at 280 iterations is: 6486.50146484375. Best ELBO loss value is: 6448.4609375.

C_PATH mean = tensor([[0.8013, 0.6522, 0.7005],
        [0.7832, 0.6507, 0.6971],
        [0.7886, 0.6528, 0.7005],
        [0.8213, 0.6473, 0.7031],
        [0.8191, 0.6532, 0.7030],
        [0.7935, 0.6486, 0.7041],
        [0.8091, 0.6508, 0.6994],
        [0.7965, 0.6503, 0.7048],
        [0.7775, 0.6475, 0.7036],
        [0.8266, 0.6481, 0.7057]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[0.5618, 0.1408, 0.1691],
         [0.2884, 0.1651, 0.2113],
         [0.5551, 0.2187, 0.3218],
         ...,
         [1.0477, 0.7710, 0.7854],
         [1.0863, 0.7917, 0.7394],
         [0.6122, 0.7606, 0.8268]],

        [[0.7897, 0.1345, 0.1661],
         [0.2233, 0.1825, 0.2105],
         [0.0955, 0.1725, 0.3082],
         ...,
         [0.6400, 0.7388, 0.7398],
         [0.9747, 0.7892, 0.9553],
         [0.9305, 0.7714, 1.1911]],

        [[0.3025, 0.1080, 0.0823],
         [0.2940,


Train Diffusion:  13%|█▎        | 280/2200 [29:43<3:26:56,  6.47s/it][A
Train Diffusion:  13%|█▎        | 281/2200 [29:50<3:28:37,  6.52s/it][A
Train Diffusion:  13%|█▎        | 282/2200 [29:56<3:29:16,  6.55s/it][A
Train Diffusion:  13%|█▎        | 283/2200 [30:03<3:29:15,  6.55s/it][A
Train Diffusion:  13%|█▎        | 284/2200 [30:09<3:29:09,  6.55s/it][A
Train Diffusion:  13%|█▎        | 285/2200 [30:16<3:28:59,  6.55s/it][A
Train Diffusion:  13%|█▎        | 286/2200 [30:22<3:29:53,  6.58s/it][A
Train Diffusion:  13%|█▎        | 287/2200 [30:29<3:29:40,  6.58s/it][A
Train Diffusion:  13%|█▎        | 288/2200 [30:36<3:29:35,  6.58s/it][A
Train Diffusion:  13%|█▎        | 289/2200 [30:42<3:29:07,  6.57s/it][A
Train Diffusion:  13%|█▎        | 290/2200 [30:49<3:28:58,  6.56s/it][A
Train Diffusion:  13%|█▎        | 291/2200 [30:55<3:28:27,  6.55s/it][A
Train Diffusion:  13%|█▎        | 292/2200 [31:02<3:26:12,  6.48s/it][A
Train Diffusion:  13%|█▎        | 293/2200 [31:08<

Moving average ELBO loss at 300 iterations is: 6382.628857421875. Best ELBO loss value is: 6347.2890625.

C_PATH mean = tensor([[0.8178, 0.6607, 0.7145],
        [0.8130, 0.6516, 0.7154],
        [0.8038, 0.6636, 0.7069],
        [0.8009, 0.6534, 0.7221],
        [0.8123, 0.6550, 0.7142],
        [0.8053, 0.6540, 0.7133],
        [0.7959, 0.6566, 0.7186],
        [0.8151, 0.6581, 0.7231],
        [0.8274, 0.6538, 0.7166],
        [0.8178, 0.6557, 0.7059]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[0.8951, 0.1327, 0.0556],
         [0.3663, 0.1877, 0.1795],
         [0.4702, 0.2402, 0.3021],
         ...,
         [1.0629, 0.6963, 0.7984],
         [0.4855, 0.8263, 0.9257],
         [0.6603, 0.7554, 0.8551]],

        [[0.1841, 0.1200, 0.2719],
         [0.2459, 0.2233, 0.2730],
         [0.0691, 0.1905, 0.1974],
         ...,
         [1.0094, 0.7532, 0.8085],
         [0.9726, 0.7166, 0.9221],
         [1.0436, 0.7542, 1.0489]],

        [[0.3712, 0.1421, 0.0682],
         [0.1861


Train Diffusion:  14%|█▎        | 300/2200 [31:53<3:21:24,  6.36s/it][A
Train Diffusion:  14%|█▎        | 301/2200 [31:59<3:22:48,  6.41s/it][A
Train Diffusion:  14%|█▎        | 302/2200 [32:05<3:21:45,  6.38s/it][A
Train Diffusion:  14%|█▍        | 303/2200 [32:12<3:20:25,  6.34s/it][A
Train Diffusion:  14%|█▍        | 304/2200 [32:18<3:20:53,  6.36s/it][A
Train Diffusion:  14%|█▍        | 305/2200 [32:24<3:20:30,  6.35s/it][A
Train Diffusion:  14%|█▍        | 306/2200 [32:31<3:22:10,  6.40s/it][A
Train Diffusion:  14%|█▍        | 307/2200 [32:37<3:20:33,  6.36s/it][A
Train Diffusion:  14%|█▍        | 308/2200 [32:44<3:20:26,  6.36s/it][A
Train Diffusion:  14%|█▍        | 309/2200 [32:50<3:22:05,  6.41s/it][A
Train Diffusion:  14%|█▍        | 310/2200 [32:57<3:22:50,  6.44s/it][A
Train Diffusion:  14%|█▍        | 311/2200 [33:03<3:23:55,  6.48s/it][A
Train Diffusion:  14%|█▍        | 312/2200 [33:10<3:24:30,  6.50s/it][A
Train Diffusion:  14%|█▍        | 313/2200 [33:16<

Moving average ELBO loss at 320 iterations is: 6316.966162109375. Best ELBO loss value is: 6287.9931640625.

C_PATH mean = tensor([[0.8204, 0.6634, 0.7189],
        [0.8389, 0.6599, 0.7209],
        [0.8410, 0.6629, 0.7235],
        [0.8263, 0.6624, 0.7286],
        [0.8103, 0.6599, 0.7155],
        [0.8155, 0.6582, 0.7235],
        [0.8259, 0.6633, 0.7182],
        [0.8076, 0.6597, 0.7228],
        [0.8082, 0.6651, 0.7149],
        [0.8059, 0.6622, 0.7145]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[0.5933, 0.1175, 0.0518],
         [0.4491, 0.1880, 0.1759],
         [0.9695, 0.2706, 0.2021],
         ...,
         [0.9457, 0.7615, 0.7776],
         [0.7671, 0.8217, 0.8101],
         [0.7691, 0.8064, 0.8295]],

        [[0.6279, 0.1189, 0.1458],
         [0.1803, 0.1575, 0.2136],
         [0.2477, 0.1946, 0.2418],
         ...,
         [1.8801, 0.7368, 0.7169],
         [1.5196, 0.7060, 0.5357],
         [0.9958, 0.7804, 0.8424]],

        [[0.4875, 0.1117, 0.1279],
         [0.1


Train Diffusion:  15%|█▍        | 320/2200 [34:02<3:26:11,  6.58s/it][A
Train Diffusion:  15%|█▍        | 321/2200 [34:09<3:26:52,  6.61s/it][A
Train Diffusion:  15%|█▍        | 322/2200 [34:15<3:26:03,  6.58s/it][A
Train Diffusion:  15%|█▍        | 323/2200 [34:22<3:22:58,  6.49s/it][A
Train Diffusion:  15%|█▍        | 324/2200 [34:28<3:21:47,  6.45s/it][A
Train Diffusion:  15%|█▍        | 325/2200 [34:34<3:21:45,  6.46s/it][A
Train Diffusion:  15%|█▍        | 326/2200 [34:41<3:21:00,  6.44s/it][A
Train Diffusion:  15%|█▍        | 327/2200 [34:47<3:22:29,  6.49s/it][A
Train Diffusion:  15%|█▍        | 328/2200 [34:54<3:22:57,  6.50s/it][A
Train Diffusion:  15%|█▍        | 329/2200 [35:01<3:22:58,  6.51s/it][A
Train Diffusion:  15%|█▌        | 330/2200 [35:07<3:20:39,  6.44s/it][A
Train Diffusion:  15%|█▌        | 331/2200 [35:13<3:17:29,  6.34s/it][A
Train Diffusion:  15%|█▌        | 332/2200 [35:19<3:16:03,  6.30s/it][A
Train Diffusion:  15%|█▌        | 333/2200 [35:26<

Moving average ELBO loss at 340 iterations is: 6249.1599609375. Best ELBO loss value is: 6231.70068359375.

C_PATH mean = tensor([[0.8381, 0.6650, 0.7232],
        [0.8222, 0.6676, 0.7263],
        [0.8292, 0.6681, 0.7259],
        [0.8059, 0.6658, 0.7271],
        [0.8271, 0.6619, 0.7184],
        [0.8299, 0.6670, 0.7262],
        [0.8374, 0.6637, 0.7185],
        [0.8553, 0.6642, 0.7184],
        [0.8146, 0.6593, 0.7269],
        [0.8276, 0.6656, 0.7225]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[0.7692, 0.1109, 0.1198],
         [0.3382, 0.1514, 0.2254],
         [0.3571, 0.1408, 0.2540],
         ...,
         [0.8274, 0.8375, 0.8014],
         [1.0790, 0.7478, 0.8373],
         [0.8597, 0.7309, 0.8868]],

        [[0.8631, 0.1219, 0.0855],
         [0.2523, 0.2069, 0.1537],
         [0.3146, 0.2731, 0.2032],
         ...,
         [1.0782, 0.7798, 0.7763],
         [1.1333, 0.8429, 0.9287],
         [0.9550, 0.8126, 1.2766]],

        [[0.3160, 0.1077, 0.1803],
         [0.31


Train Diffusion:  15%|█▌        | 340/2200 [36:11<3:19:22,  6.43s/it][A
Train Diffusion:  16%|█▌        | 341/2200 [36:17<3:17:18,  6.37s/it][A
Train Diffusion:  16%|█▌        | 342/2200 [36:23<3:17:37,  6.38s/it][A
Train Diffusion:  16%|█▌        | 343/2200 [36:30<3:16:41,  6.36s/it][A
Train Diffusion:  16%|█▌        | 344/2200 [36:36<3:16:33,  6.35s/it][A
Train Diffusion:  16%|█▌        | 345/2200 [36:43<3:18:25,  6.42s/it][A
Train Diffusion:  16%|█▌        | 346/2200 [36:49<3:19:05,  6.44s/it][A
Train Diffusion:  16%|█▌        | 347/2200 [36:55<3:17:45,  6.40s/it][A
Train Diffusion:  16%|█▌        | 348/2200 [37:01<3:13:54,  6.28s/it][A
Train Diffusion:  16%|█▌        | 349/2200 [37:08<3:13:49,  6.28s/it][A
Train Diffusion:  16%|█▌        | 350/2200 [37:14<3:14:57,  6.32s/it][A
Train Diffusion:  16%|█▌        | 351/2200 [37:20<3:14:37,  6.32s/it][A
Train Diffusion:  16%|█▌        | 352/2200 [37:27<3:16:38,  6.38s/it][A
Train Diffusion:  16%|█▌        | 353/2200 [37:33<

Moving average ELBO loss at 360 iterations is: 6186.793896484375. Best ELBO loss value is: 6152.04638671875.

C_PATH mean = tensor([[0.8290, 0.6635, 0.7334],
        [0.8587, 0.6633, 0.7274],
        [0.8353, 0.6673, 0.7207],
        [0.8579, 0.6677, 0.7319],
        [0.8535, 0.6634, 0.7293],
        [0.8364, 0.6664, 0.7333],
        [0.8238, 0.6663, 0.7297],
        [0.8457, 0.6653, 0.7358],
        [0.8247, 0.6612, 0.7264],
        [0.8187, 0.6604, 0.7358]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[0.4392, 0.1139, 0.1573],
         [0.1877, 0.2565, 0.2060],
         [0.1666, 0.3705, 0.1771],
         ...,
         [0.6085, 0.7366, 0.7251],
         [0.8050, 0.7311, 0.5726],
         [0.7544, 0.7545, 0.8485]],

        [[0.4379, 0.1222, 0.1181],
         [0.1619, 0.1705, 0.1185],
         [0.0633, 0.1228, 0.0785],
         ...,
         [1.3933, 0.6798, 0.8623],
         [1.9358, 0.6568, 0.7400],
         [1.1301, 0.8228, 0.8517]],

        [[0.1344, 0.1065, 0.1756],
         [0.


Train Diffusion:  16%|█▋        | 360/2200 [38:17<3:14:41,  6.35s/it][A
Train Diffusion:  16%|█▋        | 361/2200 [38:24<3:13:51,  6.33s/it][A
Train Diffusion:  16%|█▋        | 362/2200 [38:30<3:14:30,  6.35s/it][A
Train Diffusion:  16%|█▋        | 363/2200 [38:36<3:13:51,  6.33s/it][A
Train Diffusion:  17%|█▋        | 364/2200 [38:43<3:12:41,  6.30s/it][A
Train Diffusion:  17%|█▋        | 365/2200 [38:49<3:12:34,  6.30s/it][A
Train Diffusion:  17%|█▋        | 366/2200 [38:55<3:13:38,  6.34s/it][A
Train Diffusion:  17%|█▋        | 367/2200 [39:02<3:15:16,  6.39s/it][A
Train Diffusion:  17%|█▋        | 368/2200 [39:08<3:14:43,  6.38s/it][A
Train Diffusion:  17%|█▋        | 369/2200 [39:15<3:14:50,  6.38s/it][A
Train Diffusion:  17%|█▋        | 370/2200 [39:21<3:14:06,  6.36s/it][A
Train Diffusion:  17%|█▋        | 371/2200 [39:28<3:16:10,  6.44s/it][A
Train Diffusion:  17%|█▋        | 372/2200 [39:34<3:18:27,  6.51s/it][A
Train Diffusion:  17%|█▋        | 373/2200 [39:41<

Moving average ELBO loss at 380 iterations is: 6112.461767578125. Best ELBO loss value is: 6055.20556640625.

C_PATH mean = tensor([[0.8559, 0.6688, 0.7370],
        [0.8336, 0.6695, 0.7375],
        [0.8844, 0.6658, 0.7468],
        [0.8604, 0.6689, 0.7387],
        [0.8365, 0.6711, 0.7437],
        [0.8439, 0.6655, 0.7428],
        [0.8643, 0.6657, 0.7356],
        [0.8336, 0.6685, 0.7408],
        [0.8510, 0.6672, 0.7293],
        [0.8528, 0.6648, 0.7417]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[1.4850, 0.1150, 0.1847],
         [0.1964, 0.1533, 0.2291],
         [0.0710, 0.2472, 0.1477],
         ...,
         [1.2264, 0.7663, 0.7184],
         [1.4736, 0.8132, 0.6753],
         [0.7198, 0.7379, 0.8646]],

        [[0.2134, 0.1081, 0.1082],
         [0.3527, 0.1493, 0.1594],
         [0.6363, 0.1052, 0.2639],
         ...,
         [1.2164, 0.7509, 0.8835],
         [1.9215, 0.7628, 1.0430],
         [1.3456, 0.7937, 1.2844]],

        [[1.1548, 0.1104, 0.1644],
         [1.


Train Diffusion:  17%|█▋        | 380/2200 [40:27<3:20:18,  6.60s/it][A
Train Diffusion:  17%|█▋        | 381/2200 [40:34<3:20:34,  6.62s/it][A
Train Diffusion:  17%|█▋        | 382/2200 [40:40<3:19:48,  6.59s/it][A
Train Diffusion:  17%|█▋        | 383/2200 [40:47<3:19:38,  6.59s/it][A
Train Diffusion:  17%|█▋        | 384/2200 [40:54<3:19:45,  6.60s/it][A
Train Diffusion:  18%|█▊        | 385/2200 [41:00<3:19:12,  6.59s/it][A
Train Diffusion:  18%|█▊        | 386/2200 [41:07<3:18:07,  6.55s/it][A
Train Diffusion:  18%|█▊        | 387/2200 [41:13<3:15:20,  6.46s/it][A
Train Diffusion:  18%|█▊        | 388/2200 [41:19<3:14:35,  6.44s/it][A
Train Diffusion:  18%|█▊        | 389/2200 [41:26<3:14:54,  6.46s/it][A
Train Diffusion:  18%|█▊        | 390/2200 [41:32<3:14:39,  6.45s/it][A
Train Diffusion:  18%|█▊        | 391/2200 [41:39<3:15:00,  6.47s/it][A
Train Diffusion:  18%|█▊        | 392/2200 [41:45<3:16:00,  6.50s/it][A
Train Diffusion:  18%|█▊        | 393/2200 [41:52<

Moving average ELBO loss at 400 iterations is: 5946.526611328125. Best ELBO loss value is: 5903.1806640625.

C_PATH mean = tensor([[0.8670, 0.6718, 0.7456],
        [0.8780, 0.6711, 0.7514],
        [0.8605, 0.6724, 0.7436],
        [0.8382, 0.6712, 0.7429],
        [0.8410, 0.6729, 0.7376],
        [0.8555, 0.6681, 0.7492],
        [0.8520, 0.6692, 0.7443],
        [0.8394, 0.6734, 0.7483],
        [0.8563, 0.6697, 0.7564],
        [0.8668, 0.6728, 0.7446]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[0.3399, 0.1126, 0.0901],
         [0.1011, 0.1417, 0.1923],
         [0.3213, 0.1482, 0.1606],
         ...,
         [0.7036, 0.7824, 0.6795],
         [1.0053, 0.7269, 0.7676],
         [1.0473, 0.7684, 0.9202]],

        [[0.2468, 0.1025, 0.1050],
         [0.2732, 0.1168, 0.2108],
         [0.1594, 0.1093, 0.1944],
         ...,
         [0.8873, 0.7517, 0.8461],
         [1.3084, 0.7299, 0.9648],
         [1.1687, 0.7497, 1.1028]],

        [[0.1062, 0.1168, 0.1796],
         [0.1


Train Diffusion:  18%|█▊        | 400/2200 [42:38<3:17:39,  6.59s/it][A
Train Diffusion:  18%|█▊        | 401/2200 [42:45<3:17:07,  6.57s/it][A
Train Diffusion:  18%|█▊        | 402/2200 [42:51<3:15:39,  6.53s/it][A
Train Diffusion:  18%|█▊        | 403/2200 [42:57<3:13:32,  6.46s/it][A
Train Diffusion:  18%|█▊        | 404/2200 [43:03<3:11:26,  6.40s/it][A
Train Diffusion:  18%|█▊        | 405/2200 [43:10<3:10:53,  6.38s/it][A
Train Diffusion:  18%|█▊        | 406/2200 [43:16<3:10:29,  6.37s/it][A
Train Diffusion:  18%|█▊        | 407/2200 [43:23<3:10:40,  6.38s/it][A
Train Diffusion:  19%|█▊        | 408/2200 [43:29<3:11:23,  6.41s/it][A
Train Diffusion:  19%|█▊        | 409/2200 [43:36<3:12:15,  6.44s/it][A
Train Diffusion:  19%|█▊        | 410/2200 [43:42<3:13:20,  6.48s/it][A
Train Diffusion:  19%|█▊        | 411/2200 [43:49<3:14:13,  6.51s/it][A
Train Diffusion:  19%|█▊        | 412/2200 [43:55<3:14:47,  6.54s/it][A
Train Diffusion:  19%|█▉        | 413/2200 [44:02<

Moving average ELBO loss at 420 iterations is: 5707.516796875. Best ELBO loss value is: 5631.11767578125.

C_PATH mean = tensor([[0.8671, 0.6776, 0.7530],
        [0.8623, 0.6746, 0.7576],
        [0.8518, 0.6801, 0.7465],
        [0.8669, 0.6810, 0.7538],
        [0.8722, 0.6788, 0.7494],
        [0.8711, 0.6758, 0.7547],
        [0.8548, 0.6849, 0.7454],
        [0.8703, 0.6761, 0.7490],
        [0.8765, 0.6766, 0.7590],
        [0.8844, 0.6769, 0.7564]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[0.1507, 0.1284, 0.1656],
         [0.4585, 0.2847, 0.3705],
         [0.9130, 0.3029, 0.3080],
         ...,
         [0.7621, 0.7602, 0.7239],
         [0.6384, 0.8017, 0.6038],
         [0.7499, 0.7843, 0.9872]],

        [[0.8068, 0.1288, 0.2255],
         [0.3873, 0.2222, 0.2723],
         [0.3507, 0.3127, 0.3235],
         ...,
         [0.8594, 0.7822, 0.7766],
         [1.1446, 0.7786, 0.8427],
         [1.0732, 0.6791, 1.3406]],

        [[0.2814, 0.1108, 0.3113],
         [0.341


Train Diffusion:  19%|█▉        | 420/2200 [44:47<3:10:54,  6.44s/it][A
Train Diffusion:  19%|█▉        | 421/2200 [44:53<3:11:15,  6.45s/it][A
Train Diffusion:  19%|█▉        | 422/2200 [45:00<3:10:55,  6.44s/it][A
Train Diffusion:  19%|█▉        | 423/2200 [45:06<3:10:15,  6.42s/it][A
Train Diffusion:  19%|█▉        | 424/2200 [45:12<3:10:57,  6.45s/it][A
Train Diffusion:  19%|█▉        | 425/2200 [45:19<3:10:29,  6.44s/it][A
Train Diffusion:  19%|█▉        | 426/2200 [45:25<3:09:25,  6.41s/it][A
Train Diffusion:  19%|█▉        | 427/2200 [45:32<3:11:03,  6.47s/it][A
Train Diffusion:  19%|█▉        | 428/2200 [45:38<3:12:01,  6.50s/it][A
Train Diffusion:  20%|█▉        | 429/2200 [45:45<3:12:22,  6.52s/it][A
Train Diffusion:  20%|█▉        | 430/2200 [45:51<3:10:46,  6.47s/it][A
Train Diffusion:  20%|█▉        | 431/2200 [45:58<3:11:51,  6.51s/it][A
Train Diffusion:  20%|█▉        | 432/2200 [46:04<3:11:19,  6.49s/it][A
Train Diffusion:  20%|█▉        | 433/2200 [46:11<

Moving average ELBO loss at 440 iterations is: 5552.473583984375. Best ELBO loss value is: 5529.6142578125.

C_PATH mean = tensor([[0.8598, 0.6820, 0.7552],
        [0.8729, 0.6819, 0.7599],
        [0.8810, 0.6852, 0.7643],
        [0.8707, 0.6883, 0.7552],
        [0.8589, 0.6791, 0.7572],
        [0.8654, 0.6844, 0.7641],
        [0.8933, 0.6844, 0.7605],
        [0.8612, 0.6856, 0.7544],
        [0.8975, 0.6815, 0.7560],
        [0.8540, 0.6854, 0.7598]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[0.9374, 0.1156, 0.2132],
         [0.3483, 0.2259, 0.2752],
         [0.0589, 0.1817, 0.1677],
         ...,
         [1.0015, 0.7564, 0.8188],
         [0.9088, 0.7586, 0.9601],
         [0.8215, 0.7611, 1.1266]],

        [[0.1107, 0.0905, 0.1947],
         [0.6612, 0.2323, 0.3222],
         [0.4992, 0.2391, 0.5101],
         ...,
         [1.4487, 0.7803, 0.7201],
         [1.3602, 0.7943, 0.8505],
         [1.1397, 0.7507, 1.1923]],

        [[0.2232, 0.1123, 0.3049],
         [0.3


Train Diffusion:  20%|██        | 440/2200 [46:56<3:10:53,  6.51s/it][A
Train Diffusion:  20%|██        | 441/2200 [47:04<3:18:40,  6.78s/it][A
Train Diffusion:  20%|██        | 442/2200 [47:11<3:19:12,  6.80s/it][A
Train Diffusion:  20%|██        | 443/2200 [47:17<3:19:28,  6.81s/it][A
Train Diffusion:  20%|██        | 444/2200 [47:25<3:21:42,  6.89s/it][A
Train Diffusion:  20%|██        | 445/2200 [47:32<3:28:54,  7.14s/it][A
Train Diffusion:  20%|██        | 446/2200 [47:44<4:12:14,  8.63s/it][A
Train Diffusion:  20%|██        | 447/2200 [47:52<4:05:58,  8.42s/it][A
Train Diffusion:  20%|██        | 448/2200 [48:00<3:59:46,  8.21s/it][A
Train Diffusion:  20%|██        | 449/2200 [48:07<3:46:08,  7.75s/it][A
Train Diffusion:  20%|██        | 450/2200 [48:14<3:40:07,  7.55s/it][A
Train Diffusion:  20%|██        | 451/2200 [48:21<3:33:39,  7.33s/it][A
Train Diffusion:  21%|██        | 452/2200 [48:27<3:27:24,  7.12s/it][A
Train Diffusion:  21%|██        | 453/2200 [48:34<

Moving average ELBO loss at 460 iterations is: 5474.337353515625. Best ELBO loss value is: 5455.27490234375.

C_PATH mean = tensor([[0.8815, 0.6771, 0.7561],
        [0.8927, 0.6803, 0.7686],
        [0.8593, 0.6859, 0.7651],
        [0.8569, 0.6806, 0.7654],
        [0.8720, 0.6826, 0.7618],
        [0.8707, 0.6816, 0.7619],
        [0.8691, 0.6840, 0.7663],
        [0.8545, 0.6864, 0.7629],
        [0.8582, 0.6827, 0.7652],
        [0.8487, 0.6842, 0.7716]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[0.8058, 0.1129, 0.3535],
         [0.4022, 0.2521, 0.3704],
         [0.1709, 0.2965, 0.3333],
         ...,
         [0.8264, 0.8092, 0.7692],
         [0.9131, 0.9068, 0.7835],
         [0.8787, 0.8058, 1.0343]],

        [[0.3167, 0.1056, 0.0856],
         [0.3769, 0.1885, 0.1620],
         [0.1843, 0.1000, 0.2699],
         ...,
         [0.9531, 0.7050, 0.7557],
         [1.5891, 0.7715, 1.0761],
         [1.3477, 0.7411, 1.2108]],

        [[0.9832, 0.1131, 0.2040],
         [0.


Train Diffusion:  21%|██        | 460/2200 [49:20<3:09:39,  6.54s/it][A
Train Diffusion:  21%|██        | 461/2200 [49:27<3:08:29,  6.50s/it][A
Train Diffusion:  21%|██        | 462/2200 [49:33<3:07:28,  6.47s/it][A
Train Diffusion:  21%|██        | 463/2200 [49:40<3:07:17,  6.47s/it][A
Train Diffusion:  21%|██        | 464/2200 [49:46<3:06:57,  6.46s/it][A
Train Diffusion:  21%|██        | 465/2200 [49:53<3:06:58,  6.47s/it][A
Train Diffusion:  21%|██        | 466/2200 [49:59<3:06:35,  6.46s/it][A
Train Diffusion:  21%|██        | 467/2200 [50:05<3:06:01,  6.44s/it][A
Train Diffusion:  21%|██▏       | 468/2200 [50:12<3:05:27,  6.42s/it][A
Train Diffusion:  21%|██▏       | 469/2200 [50:18<3:04:58,  6.41s/it][A
Train Diffusion:  21%|██▏       | 470/2200 [50:25<3:04:59,  6.42s/it][A
Train Diffusion:  21%|██▏       | 471/2200 [50:31<3:05:36,  6.44s/it][A
Train Diffusion:  21%|██▏       | 472/2200 [50:38<3:05:20,  6.44s/it][A
Train Diffusion:  22%|██▏       | 473/2200 [50:44<

Moving average ELBO loss at 480 iterations is: 5415.39453125. Best ELBO loss value is: 5396.900390625.

C_PATH mean = tensor([[0.8844, 0.6809, 0.7605],
        [0.8570, 0.6823, 0.7705],
        [0.8549, 0.6786, 0.7647],
        [0.8575, 0.6817, 0.7632],
        [0.8571, 0.6777, 0.7637],
        [0.8993, 0.6760, 0.7656],
        [0.8661, 0.6869, 0.7701],
        [0.8750, 0.6751, 0.7607],
        [0.8677, 0.6839, 0.7574],
        [0.8702, 0.6779, 0.7580]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[1.2750, 0.1207, 0.4897],
         [0.4477, 0.2500, 0.3332],
         [0.3466, 0.2772, 0.3299],
         ...,
         [1.2724, 0.7972, 0.9539],
         [1.8289, 0.6997, 1.1542],
         [0.8813, 0.6338, 1.2637]],

        [[0.1065, 0.1270, 0.1614],
         [0.1578, 0.1630, 0.2131],
         [0.1984, 0.1393, 0.2203],
         ...,
         [1.1205, 0.7101, 0.7385],
         [0.6682, 0.7839, 0.8265],
         [0.5827, 0.7516, 1.1330]],

        [[0.1520, 0.1259, 0.3328],
         [0.1351, 


Train Diffusion:  22%|██▏       | 480/2200 [51:32<3:20:28,  6.99s/it][A
Train Diffusion:  22%|██▏       | 481/2200 [51:40<3:31:31,  7.38s/it][A
Train Diffusion:  22%|██▏       | 482/2200 [51:56<4:50:26, 10.14s/it][A
Train Diffusion:  22%|██▏       | 483/2200 [52:06<4:41:43,  9.84s/it][A
Train Diffusion:  22%|██▏       | 484/2200 [52:13<4:17:25,  9.00s/it][A
Train Diffusion:  22%|██▏       | 485/2200 [52:20<4:01:46,  8.46s/it][A
Train Diffusion:  22%|██▏       | 486/2200 [52:26<3:45:20,  7.89s/it][A
Train Diffusion:  22%|██▏       | 487/2200 [52:33<3:36:02,  7.57s/it][A
Train Diffusion:  22%|██▏       | 488/2200 [52:40<3:32:08,  7.43s/it][A
Train Diffusion:  22%|██▏       | 489/2200 [52:47<3:23:05,  7.12s/it][A
Train Diffusion:  22%|██▏       | 490/2200 [52:53<3:17:50,  6.94s/it][A
Train Diffusion:  22%|██▏       | 491/2200 [53:00<3:16:55,  6.91s/it][A
Train Diffusion:  22%|██▏       | 492/2200 [53:07<3:19:53,  7.02s/it][A
Train Diffusion:  22%|██▏       | 493/2200 [53:14<

Moving average ELBO loss at 500 iterations is: 5383.1087890625. Best ELBO loss value is: 5350.005859375.

C_PATH mean = tensor([[0.8832, 0.6781, 0.7631],
        [0.8770, 0.6864, 0.7706],
        [0.8741, 0.6818, 0.7708],
        [0.8776, 0.6847, 0.7737],
        [0.8722, 0.6822, 0.7650],
        [0.8605, 0.6853, 0.7667],
        [0.8358, 0.6869, 0.7681],
        [0.8913, 0.6780, 0.7726],
        [0.8736, 0.6784, 0.7827],
        [0.8672, 0.6845, 0.7711]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[0.2886, 0.1291, 0.2730],
         [0.3985, 0.1951, 0.1269],
         [0.2795, 0.1109, 0.1156],
         ...,
         [0.8358, 0.7389, 0.7920],
         [0.4154, 0.7725, 0.9467],
         [0.8847, 0.7454, 1.1434]],

        [[0.0831, 0.1295, 0.1680],
         [0.3377, 0.1788, 0.2104],
         [0.3325, 0.2414, 0.3685],
         ...,
         [0.5718, 0.7805, 0.7527],
         [1.3444, 0.7454, 0.6835],
         [1.1007, 0.8214, 0.9117]],

        [[0.3521, 0.1215, 0.4507],
         [0.2063


Train Diffusion:  23%|██▎       | 500/2200 [54:04<3:20:52,  7.09s/it][A
Train Diffusion:  23%|██▎       | 501/2200 [54:10<3:15:00,  6.89s/it][A
Train Diffusion:  23%|██▎       | 502/2200 [54:17<3:13:21,  6.83s/it][A
Train Diffusion:  23%|██▎       | 503/2200 [54:24<3:10:27,  6.73s/it][A
Train Diffusion:  23%|██▎       | 504/2200 [54:30<3:08:18,  6.66s/it][A
Train Diffusion:  23%|██▎       | 505/2200 [54:37<3:07:05,  6.62s/it][A
Train Diffusion:  23%|██▎       | 506/2200 [54:43<3:04:59,  6.55s/it][A
Train Diffusion:  23%|██▎       | 507/2200 [54:50<3:04:48,  6.55s/it][A
Train Diffusion:  23%|██▎       | 508/2200 [54:56<3:04:17,  6.54s/it][A
Train Diffusion:  23%|██▎       | 509/2200 [55:03<3:07:03,  6.64s/it][A
Train Diffusion:  23%|██▎       | 510/2200 [55:10<3:08:11,  6.68s/it][A
Train Diffusion:  23%|██▎       | 511/2200 [55:16<3:06:50,  6.64s/it][A
Train Diffusion:  23%|██▎       | 512/2200 [55:23<3:05:36,  6.60s/it][A
Train Diffusion:  23%|██▎       | 513/2200 [55:29<

Moving average ELBO loss at 520 iterations is: 5325.770849609375. Best ELBO loss value is: 5306.2890625.

C_PATH mean = tensor([[0.8774, 0.6818, 0.7765],
        [0.8725, 0.6809, 0.7685],
        [0.8929, 0.6776, 0.7715],
        [0.8624, 0.6850, 0.7724],
        [0.8577, 0.6868, 0.7742],
        [0.8925, 0.6824, 0.7692],
        [0.8816, 0.6817, 0.7692],
        [0.8924, 0.6791, 0.7698],
        [0.8788, 0.6823, 0.7713],
        [0.8820, 0.6828, 0.7739]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[0.5672, 0.1108, 0.4574],
         [0.3454, 0.2201, 0.4319],
         [0.1618, 0.2590, 0.4079],
         ...,
         [1.0346, 0.7301, 0.7466],
         [1.5665, 0.6879, 0.9213],
         [1.1906, 0.5879, 1.0927]],

        [[0.1247, 0.1040, 0.3513],
         [0.3107, 0.2689, 0.2547],
         [0.3084, 0.2699, 0.3508],
         ...,
         [0.4899, 0.7891, 0.9713],
         [0.5551, 0.8232, 0.6785],
         [0.5590, 0.8031, 0.8745]],

        [[1.9680, 0.1045, 0.2901],
         [0.9602


Train Diffusion:  24%|██▎       | 520/2200 [56:15<3:05:42,  6.63s/it][A
Train Diffusion:  24%|██▎       | 521/2200 [56:22<3:04:34,  6.60s/it][A
Train Diffusion:  24%|██▎       | 522/2200 [56:28<3:02:56,  6.54s/it][A
Train Diffusion:  24%|██▍       | 523/2200 [56:35<3:05:27,  6.64s/it][A
Train Diffusion:  24%|██▍       | 524/2200 [56:42<3:04:48,  6.62s/it][A
Train Diffusion:  24%|██▍       | 525/2200 [56:48<3:04:01,  6.59s/it][A
Train Diffusion:  24%|██▍       | 526/2200 [56:55<3:02:48,  6.55s/it][A
Train Diffusion:  24%|██▍       | 527/2200 [57:01<3:03:23,  6.58s/it][A
Train Diffusion:  24%|██▍       | 528/2200 [57:08<3:04:38,  6.63s/it][A
Train Diffusion:  24%|██▍       | 529/2200 [57:15<3:03:52,  6.60s/it][A
Train Diffusion:  24%|██▍       | 530/2200 [57:21<3:03:34,  6.60s/it][A
Train Diffusion:  24%|██▍       | 531/2200 [57:28<3:02:10,  6.55s/it][A
Train Diffusion:  24%|██▍       | 532/2200 [57:34<3:01:21,  6.52s/it][A
Train Diffusion:  24%|██▍       | 533/2200 [57:40<

Moving average ELBO loss at 540 iterations is: 5285.183447265625. Best ELBO loss value is: 5260.60107421875.

C_PATH mean = tensor([[0.8708, 0.6811, 0.7777],
        [0.8654, 0.6842, 0.7719],
        [0.9087, 0.6823, 0.7808],
        [0.8716, 0.6841, 0.7798],
        [0.8774, 0.6852, 0.7874],
        [0.8655, 0.6846, 0.7768],
        [0.9043, 0.6842, 0.7861],
        [0.9128, 0.6783, 0.7800],
        [0.8927, 0.6855, 0.7791],
        [0.8756, 0.6818, 0.7805]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[1.9409, 0.1087, 0.6126],
         [0.6742, 0.2575, 0.3585],
         [0.1474, 0.2917, 0.5155],
         ...,
         [1.6317, 0.7905, 0.9508],
         [0.8136, 0.7431, 0.7542],
         [0.6117, 0.7026, 0.9960]],

        [[0.7211, 0.1141, 0.5061],
         [0.4131, 0.2402, 0.2511],
         [0.2492, 0.1627, 0.2706],
         ...,
         [0.8761, 0.7966, 0.8043],
         [0.3279, 0.8379, 0.9946],
         [1.1622, 0.7738, 1.1480]],

        [[0.2008, 0.0987, 0.5400],
         [0.


Train Diffusion:  25%|██▍       | 540/2200 [58:26<2:59:06,  6.47s/it][A
Train Diffusion:  25%|██▍       | 541/2200 [58:32<3:00:39,  6.53s/it][A
Train Diffusion:  25%|██▍       | 542/2200 [58:41<3:17:57,  7.16s/it][A
Train Diffusion:  25%|██▍       | 543/2200 [58:48<3:13:49,  7.02s/it][A
Train Diffusion:  25%|██▍       | 544/2200 [58:55<3:19:09,  7.22s/it][A
Train Diffusion:  25%|██▍       | 545/2200 [59:03<3:22:58,  7.36s/it][A
Train Diffusion:  25%|██▍       | 546/2200 [59:10<3:18:22,  7.20s/it][A
Train Diffusion:  25%|██▍       | 547/2200 [59:17<3:15:56,  7.11s/it][A
Train Diffusion:  25%|██▍       | 548/2200 [59:24<3:15:25,  7.10s/it][A
Train Diffusion:  25%|██▍       | 549/2200 [59:31<3:15:00,  7.09s/it][A
Train Diffusion:  25%|██▌       | 550/2200 [59:38<3:15:13,  7.10s/it][A
Train Diffusion:  25%|██▌       | 551/2200 [59:45<3:12:14,  7.00s/it][A
Train Diffusion:  25%|██▌       | 552/2200 [59:51<3:09:02,  6.88s/it][A
Train Diffusion:  25%|██▌       | 553/2200 [59:58<

Moving average ELBO loss at 560 iterations is: 5244.442724609375. Best ELBO loss value is: 5218.9892578125.

C_PATH mean = tensor([[0.8702, 0.6871, 0.7765],
        [0.8762, 0.6862, 0.7765],
        [0.8937, 0.6865, 0.7812],
        [0.8810, 0.6850, 0.7850],
        [0.8726, 0.6867, 0.7770],
        [0.8908, 0.6796, 0.7769],
        [0.9040, 0.6795, 0.7794],
        [0.8948, 0.6814, 0.7855],
        [0.8913, 0.6841, 0.7855],
        [0.8842, 0.6816, 0.7775]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[1.1248, 0.1237, 0.6250],
         [0.5228, 0.2769, 0.3156],
         [0.0637, 0.2845, 0.1695],
         ...,
         [0.5724, 0.8120, 0.8139],
         [0.7601, 0.8302, 0.8575],
         [0.7084, 0.7742, 0.9241]],

        [[0.5501, 0.1153, 0.6046],
         [0.6855, 0.2710, 0.2204],
         [0.4230, 0.1829, 0.2454],
         ...,
         [0.9454, 0.7189, 0.6588],
         [0.6568, 0.6422, 0.6898],
         [0.9906, 0.7149, 1.0624]],

        [[0.1573, 0.1191, 0.4706],
         [0.4


Train Diffusion:  25%|██▌       | 560/2200 [1:00:44<2:58:05,  6.52s/it][A
Train Diffusion:  26%|██▌       | 561/2200 [1:00:51<3:03:57,  6.73s/it][A
Train Diffusion:  26%|██▌       | 562/2200 [1:00:58<3:07:27,  6.87s/it][A
Train Diffusion:  26%|██▌       | 563/2200 [1:01:05<3:08:12,  6.90s/it][A
Train Diffusion:  26%|██▌       | 564/2200 [1:01:12<3:06:52,  6.85s/it][A
Train Diffusion:  26%|██▌       | 565/2200 [1:01:19<3:05:53,  6.82s/it][A
Train Diffusion:  26%|██▌       | 566/2200 [1:01:26<3:06:43,  6.86s/it][A
Train Diffusion:  26%|██▌       | 567/2200 [1:01:32<3:03:25,  6.74s/it][A
Train Diffusion:  26%|██▌       | 568/2200 [1:01:39<3:00:14,  6.63s/it][A
Train Diffusion:  26%|██▌       | 569/2200 [1:01:45<2:58:24,  6.56s/it][A
Train Diffusion:  26%|██▌       | 570/2200 [1:01:51<2:57:05,  6.52s/it][A
Train Diffusion:  26%|██▌       | 571/2200 [1:01:58<2:57:06,  6.52s/it][A
Train Diffusion:  26%|██▌       | 572/2200 [1:02:04<2:56:38,  6.51s/it][A
Train Diffusion:  26%|██

Moving average ELBO loss at 580 iterations is: 5202.54814453125. Best ELBO loss value is: 5183.0703125.

C_PATH mean = tensor([[0.8729, 0.6892, 0.7857],
        [0.8814, 0.6896, 0.7876],
        [0.8763, 0.6861, 0.7854],
        [0.9189, 0.6813, 0.7967],
        [0.8831, 0.6854, 0.7890],
        [0.8968, 0.6801, 0.7873],
        [0.8964, 0.6822, 0.7796],
        [0.8882, 0.6829, 0.7869],
        [0.8983, 0.6843, 0.7815],
        [0.9028, 0.6841, 0.7790]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[0.9379, 0.1066, 0.4891],
         [0.6252, 0.2398, 0.2596],
         [0.9066, 0.1842, 0.2942],
         ...,
         [1.2350, 0.6791, 0.7713],
         [0.5604, 0.7534, 0.7628],
         [0.3960, 0.7434, 1.0549]],

        [[0.6720, 0.1232, 0.6264],
         [0.4095, 0.2794, 0.3930],
         [0.1202, 0.3684, 0.3820],
         ...,
         [1.4995, 0.7511, 1.0251],
         [1.0454, 0.7205, 0.7583],
         [0.7662, 0.7513, 0.9442]],

        [[0.9168, 0.1259, 0.6400],
         [0.4604,


Train Diffusion:  26%|██▋       | 580/2200 [1:02:56<2:51:40,  6.36s/it][A
Train Diffusion:  26%|██▋       | 581/2200 [1:03:02<2:52:12,  6.38s/it][A
Train Diffusion:  26%|██▋       | 582/2200 [1:03:09<2:53:18,  6.43s/it][A
Train Diffusion:  26%|██▋       | 583/2200 [1:03:15<2:54:46,  6.49s/it][A
Train Diffusion:  27%|██▋       | 584/2200 [1:03:22<2:55:44,  6.52s/it][A
Train Diffusion:  27%|██▋       | 585/2200 [1:03:28<2:55:42,  6.53s/it][A
Train Diffusion:  27%|██▋       | 586/2200 [1:03:35<2:54:01,  6.47s/it][A
Train Diffusion:  27%|██▋       | 587/2200 [1:03:41<2:53:49,  6.47s/it][A
Train Diffusion:  27%|██▋       | 588/2200 [1:03:48<2:54:00,  6.48s/it][A
Train Diffusion:  27%|██▋       | 589/2200 [1:03:54<2:54:48,  6.51s/it][A
Train Diffusion:  27%|██▋       | 590/2200 [1:04:01<2:55:03,  6.52s/it][A
Train Diffusion:  27%|██▋       | 591/2200 [1:04:07<2:54:33,  6.51s/it][A
Train Diffusion:  27%|██▋       | 592/2200 [1:04:14<2:54:42,  6.52s/it][A
Train Diffusion:  27%|██

Moving average ELBO loss at 600 iterations is: 5162.005615234375. Best ELBO loss value is: 5141.138671875.

C_PATH mean = tensor([[0.8888, 0.6895, 0.7936],
        [0.8725, 0.6849, 0.7883],
        [0.8999, 0.6806, 0.7912],
        [0.8996, 0.6812, 0.7848],
        [0.8893, 0.6821, 0.7857],
        [0.8895, 0.6888, 0.7911],
        [0.8999, 0.6846, 0.7854],
        [0.8912, 0.6843, 0.7863],
        [0.8931, 0.6869, 0.8003],
        [0.9009, 0.6852, 0.7887]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[1.0832, 0.1052, 0.6099],
         [0.5231, 0.1590, 0.3148],
         [0.5075, 0.1928, 0.4449],
         ...,
         [1.4084, 0.7302, 0.9108],
         [1.0869, 0.6960, 0.8420],
         [0.7386, 0.6388, 0.9843]],

        [[0.3727, 0.1079, 0.4502],
         [0.4532, 0.2141, 0.1463],
         [0.3020, 0.2721, 0.2185],
         ...,
         [0.7886, 0.7281, 0.8238],
         [1.1017, 0.6725, 0.6710],
         [1.2675, 0.7020, 0.9328]],

        [[0.3339, 0.1282, 0.7558],
         [0.30


Train Diffusion:  27%|██▋       | 600/2200 [1:05:11<3:14:32,  7.30s/it][A
Train Diffusion:  27%|██▋       | 601/2200 [1:05:18<3:11:50,  7.20s/it][A
Train Diffusion:  27%|██▋       | 602/2200 [1:05:25<3:13:12,  7.25s/it][A
Train Diffusion:  27%|██▋       | 603/2200 [1:05:32<3:14:43,  7.32s/it][A
Train Diffusion:  27%|██▋       | 604/2200 [1:05:39<3:12:07,  7.22s/it][A
Train Diffusion:  28%|██▊       | 605/2200 [1:05:46<3:08:20,  7.09s/it][A
Train Diffusion:  28%|██▊       | 606/2200 [1:05:53<3:05:05,  6.97s/it][A
Train Diffusion:  28%|██▊       | 607/2200 [1:05:59<3:02:09,  6.86s/it][A
Train Diffusion:  28%|██▊       | 608/2200 [1:06:06<2:59:44,  6.77s/it][A
Train Diffusion:  28%|██▊       | 609/2200 [1:06:13<2:57:41,  6.70s/it][A
Train Diffusion:  28%|██▊       | 610/2200 [1:06:19<2:56:09,  6.65s/it][A
Train Diffusion:  28%|██▊       | 611/2200 [1:06:26<2:57:53,  6.72s/it][A
Train Diffusion:  28%|██▊       | 612/2200 [1:06:32<2:55:50,  6.64s/it][A
Train Diffusion:  28%|██

Moving average ELBO loss at 620 iterations is: 5140.775341796875. Best ELBO loss value is: 5102.359375.

C_PATH mean = tensor([[0.9076, 0.6885, 0.7923],
        [0.9206, 0.6882, 0.7931],
        [0.8950, 0.6845, 0.7923],
        [0.9021, 0.6890, 0.7954],
        [0.8983, 0.6856, 0.7810],
        [0.9023, 0.6858, 0.7905],
        [0.8931, 0.6880, 0.7868],
        [0.9069, 0.6896, 0.7946],
        [0.8920, 0.6851, 0.7986],
        [0.8891, 0.6887, 0.7782]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[0.6284, 0.1119, 0.3781],
         [0.5349, 0.2161, 0.1074],
         [0.3078, 0.1426, 0.1514],
         ...,
         [0.9511, 0.7494, 0.9399],
         [1.2854, 0.7774, 0.7927],
         [1.0204, 0.7690, 1.0822]],

        [[0.2394, 0.0956, 0.5365],
         [0.3374, 0.1806, 0.3762],
         [0.8202, 0.1825, 0.2962],
         ...,
         [0.7392, 0.7533, 0.8606],
         [0.6072, 0.6843, 0.7416],
         [0.5983, 0.6609, 1.0186]],

        [[0.4237, 0.1087, 0.7670],
         [0.4225,


Train Diffusion:  28%|██▊       | 620/2200 [1:07:24<2:51:00,  6.49s/it][A
Train Diffusion:  28%|██▊       | 621/2200 [1:07:31<2:50:19,  6.47s/it][A
Train Diffusion:  28%|██▊       | 622/2200 [1:07:37<2:50:18,  6.48s/it][A
Train Diffusion:  28%|██▊       | 623/2200 [1:07:44<2:49:41,  6.46s/it][A
Train Diffusion:  28%|██▊       | 624/2200 [1:07:50<2:50:08,  6.48s/it][A
Train Diffusion:  28%|██▊       | 625/2200 [1:07:57<2:49:33,  6.46s/it][A
Train Diffusion:  28%|██▊       | 626/2200 [1:08:03<2:49:04,  6.45s/it][A
Train Diffusion:  28%|██▊       | 627/2200 [1:08:09<2:49:29,  6.47s/it][A
Train Diffusion:  29%|██▊       | 628/2200 [1:08:16<2:48:52,  6.45s/it][A
Train Diffusion:  29%|██▊       | 629/2200 [1:08:22<2:48:42,  6.44s/it][A
Train Diffusion:  29%|██▊       | 630/2200 [1:08:29<2:48:01,  6.42s/it][A
Train Diffusion:  29%|██▊       | 631/2200 [1:08:35<2:48:22,  6.44s/it][A
Train Diffusion:  29%|██▊       | 632/2200 [1:08:42<2:49:42,  6.49s/it][A
Train Diffusion:  29%|██

Moving average ELBO loss at 640 iterations is: 5094.710400390625. Best ELBO loss value is: 5072.2041015625.

C_PATH mean = tensor([[0.9072, 0.6830, 0.7985],
        [0.9020, 0.6852, 0.8031],
        [0.8983, 0.6851, 0.8026],
        [0.8961, 0.6878, 0.8000],
        [0.8843, 0.6873, 0.7998],
        [0.9050, 0.6832, 0.7959],
        [0.9034, 0.6917, 0.7929],
        [0.8833, 0.6874, 0.7964],
        [0.8900, 0.6900, 0.8093],
        [0.9217, 0.6868, 0.7995]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[0.9328, 0.1206, 0.8430],
         [0.4224, 0.2555, 0.3244],
         [0.0493, 0.2473, 0.3319],
         ...,
         [1.7381, 0.7547, 1.0062],
         [0.8364, 0.7780, 1.0261],
         [0.7151, 0.8014, 0.9226]],

        [[1.4524, 0.1151, 0.5717],
         [0.8188, 0.2587, 0.4090],
         [0.3979, 0.2349, 0.4071],
         ...,
         [1.4102, 0.7284, 0.9616],
         [1.2667, 0.8121, 0.9881],
         [1.0389, 0.7507, 1.1736]],

        [[0.2534, 0.1009, 0.5235],
         [0.5


Train Diffusion:  29%|██▉       | 640/2200 [1:09:33<2:47:23,  6.44s/it][A
Train Diffusion:  29%|██▉       | 641/2200 [1:09:40<2:46:53,  6.42s/it][A
Train Diffusion:  29%|██▉       | 642/2200 [1:09:46<2:46:27,  6.41s/it][A
Train Diffusion:  29%|██▉       | 643/2200 [1:09:53<2:46:07,  6.40s/it][A
Train Diffusion:  29%|██▉       | 644/2200 [1:09:59<2:46:07,  6.41s/it][A
Train Diffusion:  29%|██▉       | 645/2200 [1:10:05<2:45:40,  6.39s/it][A
Train Diffusion:  29%|██▉       | 646/2200 [1:10:12<2:45:15,  6.38s/it][A
Train Diffusion:  29%|██▉       | 647/2200 [1:10:18<2:45:15,  6.38s/it][A
Train Diffusion:  29%|██▉       | 648/2200 [1:10:25<2:45:12,  6.39s/it][A
Train Diffusion:  30%|██▉       | 649/2200 [1:10:31<2:44:49,  6.38s/it][A
Train Diffusion:  30%|██▉       | 650/2200 [1:10:37<2:45:41,  6.41s/it][A
Train Diffusion:  30%|██▉       | 651/2200 [1:10:44<2:44:44,  6.38s/it][A
Train Diffusion:  30%|██▉       | 652/2200 [1:10:50<2:45:24,  6.41s/it][A
Train Diffusion:  30%|██

Moving average ELBO loss at 660 iterations is: 5068.413427734375. Best ELBO loss value is: 5051.9912109375.

C_PATH mean = tensor([[0.9231, 0.6859, 0.8106],
        [0.8866, 0.6846, 0.8066],
        [0.9009, 0.6883, 0.8070],
        [0.9150, 0.6859, 0.7989],
        [0.9071, 0.6861, 0.8038],
        [0.8962, 0.6907, 0.8075],
        [0.9110, 0.6866, 0.8035],
        [0.9296, 0.6863, 0.8064],
        [0.9009, 0.6880, 0.8009],
        [0.8937, 0.6890, 0.8160]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[0.6981, 0.1003, 0.7604],
         [0.6336, 0.2134, 0.3958],
         [0.0671, 0.2493, 0.2256],
         ...,
         [1.1404, 0.7119, 1.0184],
         [0.7023, 0.6851, 0.8751],
         [0.7186, 0.7281, 1.0289]],

        [[0.6410, 0.1039, 0.4794],
         [0.4167, 0.2581, 0.1293],
         [0.1965, 0.2155, 0.3649],
         ...,
         [0.9586, 0.7277, 0.9080],
         [1.0275, 0.7788, 0.9048],
         [0.7546, 0.7207, 1.0722]],

        [[0.0266, 0.0924, 0.5124],
         [0.4


Train Diffusion:  30%|███       | 660/2200 [1:11:41<2:43:01,  6.35s/it][A
Train Diffusion:  30%|███       | 661/2200 [1:11:47<2:43:39,  6.38s/it][A
Train Diffusion:  30%|███       | 662/2200 [1:11:54<2:43:17,  6.37s/it][A
Train Diffusion:  30%|███       | 663/2200 [1:12:00<2:42:44,  6.35s/it][A
Train Diffusion:  30%|███       | 664/2200 [1:12:06<2:41:06,  6.29s/it][A
Train Diffusion:  30%|███       | 665/2200 [1:12:12<2:39:16,  6.23s/it][A
Train Diffusion:  30%|███       | 666/2200 [1:12:18<2:38:10,  6.19s/it][A
Train Diffusion:  30%|███       | 667/2200 [1:12:24<2:37:21,  6.16s/it][A
Train Diffusion:  30%|███       | 668/2200 [1:12:31<2:36:48,  6.14s/it][A
Train Diffusion:  30%|███       | 669/2200 [1:12:37<2:37:59,  6.19s/it][A
Train Diffusion:  30%|███       | 670/2200 [1:12:43<2:37:08,  6.16s/it][A
Train Diffusion:  30%|███       | 671/2200 [1:12:49<2:36:16,  6.13s/it][A
Train Diffusion:  31%|███       | 672/2200 [1:12:55<2:36:00,  6.13s/it][A
Train Diffusion:  31%|██

Moving average ELBO loss at 680 iterations is: 5031.603759765625. Best ELBO loss value is: 5003.5029296875.

C_PATH mean = tensor([[0.9154, 0.6853, 0.8134],
        [0.9364, 0.6857, 0.8139],
        [0.9034, 0.6903, 0.7978],
        [0.8974, 0.6907, 0.8016],
        [0.8962, 0.6880, 0.8036],
        [0.9090, 0.6898, 0.8140],
        [0.8728, 0.6887, 0.7996],
        [0.9204, 0.6889, 0.8048],
        [0.8927, 0.6890, 0.8039],
        [0.9028, 0.6909, 0.8099]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[0.8518, 0.1171, 0.5821],
         [0.5279, 0.3249, 0.2482],
         [0.0596, 0.2652, 0.2972],
         ...,
         [0.8229, 0.6907, 0.9277],
         [1.0690, 0.7821, 1.0208],
         [0.6516, 0.7496, 1.3819]],

        [[1.2442, 0.1170, 0.6421],
         [0.5977, 0.2705, 0.4661],
         [0.6028, 0.3115, 0.3664],
         ...,
         [1.1111, 0.7304, 0.8593],
         [1.8810, 0.7547, 0.8515],
         [1.3056, 0.7316, 1.2126]],

        [[1.6185, 0.1092, 0.5260],
         [0.4


Train Diffusion:  31%|███       | 680/2200 [1:13:44<2:35:06,  6.12s/it][A
Train Diffusion:  31%|███       | 681/2200 [1:13:50<2:34:47,  6.11s/it][A
Train Diffusion:  31%|███       | 682/2200 [1:13:56<2:35:05,  6.13s/it][A
Train Diffusion:  31%|███       | 683/2200 [1:14:03<2:34:36,  6.11s/it][A
Train Diffusion:  31%|███       | 684/2200 [1:14:09<2:34:01,  6.10s/it][A
Train Diffusion:  31%|███       | 685/2200 [1:14:15<2:34:28,  6.12s/it][A
Train Diffusion:  31%|███       | 686/2200 [1:14:21<2:34:25,  6.12s/it][A
Train Diffusion:  31%|███       | 687/2200 [1:14:27<2:37:43,  6.25s/it][A
Train Diffusion:  31%|███▏      | 688/2200 [1:14:33<2:35:59,  6.19s/it][A
Train Diffusion:  31%|███▏      | 689/2200 [1:14:40<2:35:16,  6.17s/it][A
Train Diffusion:  31%|███▏      | 690/2200 [1:14:46<2:35:27,  6.18s/it][A
Train Diffusion:  31%|███▏      | 691/2200 [1:14:52<2:34:48,  6.16s/it][A
Train Diffusion:  31%|███▏      | 692/2200 [1:14:58<2:34:07,  6.13s/it][A
Train Diffusion:  32%|██

Moving average ELBO loss at 700 iterations is: 5000.535546875. Best ELBO loss value is: 4967.2255859375.

C_PATH mean = tensor([[0.9202, 0.6853, 0.8083],
        [0.8938, 0.6913, 0.8068],
        [0.9406, 0.6883, 0.8166],
        [0.9117, 0.6908, 0.8145],
        [0.8875, 0.6929, 0.8116],
        [0.9121, 0.6850, 0.8119],
        [0.9217, 0.6853, 0.8181],
        [0.8982, 0.6847, 0.8098],
        [0.9191, 0.6927, 0.8166],
        [0.9306, 0.6899, 0.8158]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[0.1378, 0.1073, 0.5527],
         [0.4970, 0.1753, 0.1295],
         [0.3064, 0.2164, 0.1076],
         ...,
         [1.2045, 0.6915, 0.9533],
         [0.9624, 0.6928, 0.8501],
         [0.6355, 0.6644, 1.0691]],

        [[0.6899, 0.1313, 0.7217],
         [0.6417, 0.2539, 0.4082],
         [1.1919, 0.2134, 0.2717],
         ...,
         [1.2398, 0.6838, 0.9172],
         [0.6715, 0.6915, 1.1725],
         [0.8418, 0.6667, 1.2865]],

        [[2.3466, 0.1247, 0.8054],
         [0.6012


Train Diffusion:  32%|███▏      | 700/2200 [1:15:47<2:31:42,  6.07s/it][A
Train Diffusion:  32%|███▏      | 701/2200 [1:15:53<2:32:22,  6.10s/it][A
Train Diffusion:  32%|███▏      | 702/2200 [1:15:59<2:31:52,  6.08s/it][A
Train Diffusion:  32%|███▏      | 703/2200 [1:16:05<2:31:44,  6.08s/it][A
Train Diffusion:  32%|███▏      | 704/2200 [1:16:11<2:31:30,  6.08s/it][A
Train Diffusion:  32%|███▏      | 705/2200 [1:16:17<2:31:17,  6.07s/it][A
Train Diffusion:  32%|███▏      | 706/2200 [1:16:23<2:32:02,  6.11s/it][A
Train Diffusion:  32%|███▏      | 707/2200 [1:16:29<2:31:33,  6.09s/it][A
Train Diffusion:  32%|███▏      | 708/2200 [1:16:35<2:31:28,  6.09s/it][A
Train Diffusion:  32%|███▏      | 709/2200 [1:16:41<2:31:26,  6.09s/it][A
Train Diffusion:  32%|███▏      | 710/2200 [1:16:48<2:31:11,  6.09s/it][A
Train Diffusion:  32%|███▏      | 711/2200 [1:16:54<2:31:39,  6.11s/it][A
Train Diffusion:  32%|███▏      | 712/2200 [1:17:00<2:31:18,  6.10s/it][A
Train Diffusion:  32%|██

Moving average ELBO loss at 720 iterations is: 4955.988037109375. Best ELBO loss value is: 4928.5859375.

C_PATH mean = tensor([[0.9201, 0.6964, 0.8032],
        [0.9236, 0.6886, 0.8126],
        [0.9094, 0.6967, 0.8180],
        [0.9287, 0.6917, 0.8133],
        [0.9347, 0.6870, 0.8140],
        [0.9225, 0.6929, 0.8146],
        [0.9086, 0.6958, 0.8041],
        [0.9332, 0.6897, 0.8222],
        [0.9300, 0.6888, 0.8138],
        [0.9120, 0.6902, 0.8074]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[0.3628, 0.0872, 0.8697],
         [0.6410, 0.1793, 0.3744],
         [0.8356, 0.1416, 0.4292],
         ...,
         [1.5217, 0.7244, 0.8031],
         [1.0825, 0.7133, 0.8472],
         [0.9988, 0.6968, 1.1053]],

        [[0.9914, 0.1174, 0.6392],
         [0.8116, 0.2022, 0.2230],
         [0.3797, 0.3404, 0.4148],
         ...,
         [1.2651, 0.7100, 0.9108],
         [0.2870, 0.6941, 0.9080],
         [0.8626, 0.7314, 0.9668]],

        [[1.4068, 0.1178, 0.6141],
         [0.7333


Train Diffusion:  33%|███▎      | 720/2200 [1:17:50<2:33:24,  6.22s/it][A
Train Diffusion:  33%|███▎      | 721/2200 [1:17:56<2:32:14,  6.18s/it][A
Train Diffusion:  33%|███▎      | 722/2200 [1:18:02<2:32:24,  6.19s/it][A
Train Diffusion:  33%|███▎      | 723/2200 [1:18:08<2:31:31,  6.16s/it][A
Train Diffusion:  33%|███▎      | 724/2200 [1:18:14<2:30:43,  6.13s/it][A
Train Diffusion:  33%|███▎      | 725/2200 [1:18:20<2:30:19,  6.11s/it][A
Train Diffusion:  33%|███▎      | 726/2200 [1:18:27<2:30:00,  6.11s/it][A
Train Diffusion:  33%|███▎      | 727/2200 [1:18:33<2:30:19,  6.12s/it][A
Train Diffusion:  33%|███▎      | 728/2200 [1:18:39<2:30:44,  6.14s/it][A
Train Diffusion:  33%|███▎      | 729/2200 [1:18:45<2:30:37,  6.14s/it][A
Train Diffusion:  33%|███▎      | 730/2200 [1:18:51<2:30:43,  6.15s/it][A
Train Diffusion:  33%|███▎      | 731/2200 [1:18:57<2:30:35,  6.15s/it][A
Train Diffusion:  33%|███▎      | 732/2200 [1:19:04<2:30:44,  6.16s/it][A
Train Diffusion:  33%|██

Moving average ELBO loss at 740 iterations is: 4926.998974609375. Best ELBO loss value is: 4907.310546875.

C_PATH mean = tensor([[0.8982, 0.6930, 0.8072],
        [0.9016, 0.6944, 0.8134],
        [0.9148, 0.6950, 0.8132],
        [0.9193, 0.6916, 0.8171],
        [0.9591, 0.6926, 0.8199],
        [0.9356, 0.6861, 0.8167],
        [0.9314, 0.6874, 0.8102],
        [0.9390, 0.6929, 0.8136],
        [0.9591, 0.6859, 0.8249],
        [0.9114, 0.6897, 0.8194]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[0.0600, 0.1215, 0.7828],
         [0.2771, 0.2477, 0.2746],
         [0.1003, 0.1734, 0.3283],
         ...,
         [0.5177, 0.7364, 0.8831],
         [0.3935, 0.6982, 1.0303],
         [0.5953, 0.7356, 1.1611]],

        [[0.3038, 0.1242, 0.6288],
         [0.6192, 0.2482, 0.3080],
         [0.4448, 0.2538, 0.3060],
         ...,
         [1.0319, 0.6629, 0.8509],
         [0.6499, 0.7217, 0.9647],
         [0.9955, 0.7158, 1.1117]],

        [[1.0093, 0.0938, 0.6071],
         [0.57


Train Diffusion:  34%|███▎      | 740/2200 [1:19:53<2:30:05,  6.17s/it][A
Train Diffusion:  34%|███▎      | 741/2200 [1:19:59<2:30:05,  6.17s/it][A
Train Diffusion:  34%|███▎      | 742/2200 [1:20:05<2:29:47,  6.16s/it][A
Train Diffusion:  34%|███▍      | 743/2200 [1:20:11<2:29:39,  6.16s/it][A
Train Diffusion:  34%|███▍      | 744/2200 [1:20:17<2:28:56,  6.14s/it][A
Train Diffusion:  34%|███▍      | 745/2200 [1:20:24<2:28:34,  6.13s/it][A
Train Diffusion:  34%|███▍      | 746/2200 [1:20:30<2:27:59,  6.11s/it][A
Train Diffusion:  34%|███▍      | 747/2200 [1:20:36<2:28:01,  6.11s/it][A
Train Diffusion:  34%|███▍      | 748/2200 [1:20:42<2:29:19,  6.17s/it][A
Train Diffusion:  34%|███▍      | 749/2200 [1:20:48<2:28:31,  6.14s/it][A
Train Diffusion:  34%|███▍      | 750/2200 [1:20:54<2:28:45,  6.16s/it][A
Train Diffusion:  34%|███▍      | 751/2200 [1:21:00<2:28:24,  6.15s/it][A
Train Diffusion:  34%|███▍      | 752/2200 [1:21:07<2:28:14,  6.14s/it][A
Train Diffusion:  34%|██

Moving average ELBO loss at 760 iterations is: 4885.827685546875. Best ELBO loss value is: 4869.572265625.

C_PATH mean = tensor([[0.9088, 0.6983, 0.8203],
        [0.9309, 0.6951, 0.8191],
        [0.9532, 0.6832, 0.8335],
        [0.9075, 0.6946, 0.8062],
        [0.9312, 0.6935, 0.8230],
        [0.9191, 0.6961, 0.8250],
        [0.9339, 0.6907, 0.8256],
        [0.9175, 0.6992, 0.8241],
        [0.9063, 0.6903, 0.8075],
        [0.9471, 0.6946, 0.8262]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[0.4111, 0.1085, 0.6908],
         [0.5255, 0.2473, 0.2769],
         [0.0517, 0.1959, 0.1808],
         ...,
         [0.5032, 0.7365, 0.6797],
         [0.4526, 0.8504, 1.0927],
         [0.6125, 0.8477, 1.3051]],

        [[0.1908, 0.0845, 0.6686],
         [0.4911, 0.2004, 0.3335],
         [0.8803, 0.2014, 0.2803],
         ...,
         [1.1714, 0.6888, 0.8527],
         [1.3793, 0.6660, 0.9541],
         [1.0584, 0.6469, 1.2150]],

        [[0.5749, 0.1050, 0.8059],
         [0.50


Train Diffusion:  35%|███▍      | 760/2200 [1:21:56<2:27:50,  6.16s/it][A
Train Diffusion:  35%|███▍      | 761/2200 [1:22:02<2:27:10,  6.14s/it][A
Train Diffusion:  35%|███▍      | 762/2200 [1:22:08<2:26:38,  6.12s/it][A
Train Diffusion:  35%|███▍      | 763/2200 [1:22:14<2:26:11,  6.10s/it][A
Train Diffusion:  35%|███▍      | 764/2200 [1:22:20<2:25:51,  6.09s/it][A
Train Diffusion:  35%|███▍      | 765/2200 [1:22:26<2:25:36,  6.09s/it][A
Train Diffusion:  35%|███▍      | 766/2200 [1:22:32<2:26:32,  6.13s/it][A
Train Diffusion:  35%|███▍      | 767/2200 [1:22:39<2:26:45,  6.14s/it][A
Train Diffusion:  35%|███▍      | 768/2200 [1:22:45<2:26:32,  6.14s/it][A
Train Diffusion:  35%|███▍      | 769/2200 [1:22:51<2:26:24,  6.14s/it][A
Train Diffusion:  35%|███▌      | 770/2200 [1:22:57<2:26:12,  6.13s/it][A
Train Diffusion:  35%|███▌      | 771/2200 [1:23:03<2:26:52,  6.17s/it][A
Train Diffusion:  35%|███▌      | 772/2200 [1:23:09<2:26:08,  6.14s/it][A
Train Diffusion:  35%|██

Moving average ELBO loss at 780 iterations is: 4851.582373046875. Best ELBO loss value is: 4830.892578125.

C_PATH mean = tensor([[0.9211, 0.6903, 0.8321],
        [0.9423, 0.6879, 0.8377],
        [0.9367, 0.6890, 0.8251],
        [0.9727, 0.6903, 0.8195],
        [0.9236, 0.6904, 0.8152],
        [0.9278, 0.6949, 0.8187],
        [0.9471, 0.6917, 0.8334],
        [0.9363, 0.6972, 0.8328],
        [0.9293, 0.6851, 0.8231],
        [0.9671, 0.6985, 0.8212]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[1.1492, 0.1142, 0.7504],
         [0.5586, 0.2249, 0.4284],
         [0.7155, 0.1900, 0.2998],
         ...,
         [0.8140, 0.7812, 0.9466],
         [0.7646, 0.7997, 0.8532],
         [0.9526, 0.7797, 1.1909]],

        [[0.6475, 0.1032, 0.5272],
         [0.4778, 0.1925, 0.4126],
         [0.5632, 0.1995, 0.5249],
         ...,
         [1.1645, 0.6825, 0.8740],
         [0.8781, 0.6488, 0.9368],
         [0.6922, 0.6362, 0.9957]],

        [[0.9847, 0.1024, 0.6112],
         [0.75


Train Diffusion:  35%|███▌      | 780/2200 [1:23:58<2:24:41,  6.11s/it][A
Train Diffusion:  36%|███▌      | 781/2200 [1:24:04<2:25:05,  6.13s/it][A
Train Diffusion:  36%|███▌      | 782/2200 [1:24:10<2:24:57,  6.13s/it][A
Train Diffusion:  36%|███▌      | 783/2200 [1:24:17<2:25:16,  6.15s/it][A
Train Diffusion:  36%|███▌      | 784/2200 [1:24:23<2:24:56,  6.14s/it][A
Train Diffusion:  36%|███▌      | 785/2200 [1:24:29<2:26:55,  6.23s/it][A
Train Diffusion:  36%|███▌      | 786/2200 [1:24:35<2:26:02,  6.20s/it][A
Train Diffusion:  36%|███▌      | 787/2200 [1:24:41<2:25:30,  6.18s/it][A
Train Diffusion:  36%|███▌      | 788/2200 [1:24:48<2:25:56,  6.20s/it][A
Train Diffusion:  36%|███▌      | 789/2200 [1:24:54<2:25:35,  6.19s/it][A
Train Diffusion:  36%|███▌      | 790/2200 [1:25:00<2:24:44,  6.16s/it][A
Train Diffusion:  36%|███▌      | 791/2200 [1:25:06<2:24:16,  6.14s/it][A
Train Diffusion:  36%|███▌      | 792/2200 [1:25:12<2:23:51,  6.13s/it][A
Train Diffusion:  36%|██

Moving average ELBO loss at 800 iterations is: 4826.465673828125. Best ELBO loss value is: 4798.7373046875.

C_PATH mean = tensor([[0.9500, 0.6900, 0.8168],
        [0.9597, 0.6954, 0.8360],
        [0.9638, 0.6844, 0.8433],
        [0.9250, 0.6924, 0.8383],
        [0.9544, 0.6903, 0.8366],
        [0.9403, 0.6886, 0.8294],
        [0.9285, 0.6928, 0.8328],
        [0.9476, 0.6941, 0.8213],
        [0.9274, 0.6928, 0.8368],
        [0.9440, 0.6918, 0.8283]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[1.3722, 0.1062, 0.7330],
         [0.9287, 0.2167, 0.5685],
         [0.6970, 0.2573, 0.3420],
         ...,
         [1.0424, 0.6745, 0.7050],
         [1.2758, 0.7469, 0.8764],
         [1.2214, 0.6846, 1.2650]],

        [[0.1644, 0.0957, 0.6734],
         [0.5606, 0.2028, 0.3077],
         [0.0910, 0.2076, 0.2954],
         ...,
         [1.2920, 0.6444, 0.9780],
         [0.8592, 0.6792, 0.8141],
         [0.8779, 0.6937, 0.9706]],

        [[0.3726, 0.0907, 0.8717],
         [0.4


Train Diffusion:  36%|███▋      | 800/2200 [1:26:02<2:24:12,  6.18s/it][A
Train Diffusion:  36%|███▋      | 801/2200 [1:26:08<2:23:57,  6.17s/it][A
Train Diffusion:  36%|███▋      | 802/2200 [1:26:14<2:23:24,  6.16s/it][A
Train Diffusion:  36%|███▋      | 803/2200 [1:26:20<2:23:02,  6.14s/it][A
Train Diffusion:  37%|███▋      | 804/2200 [1:26:26<2:23:54,  6.19s/it][A
Train Diffusion:  37%|███▋      | 805/2200 [1:26:32<2:23:30,  6.17s/it][A
Train Diffusion:  37%|███▋      | 806/2200 [1:26:39<2:23:18,  6.17s/it][A
Train Diffusion:  37%|███▋      | 807/2200 [1:26:45<2:22:55,  6.16s/it][A
Train Diffusion:  37%|███▋      | 808/2200 [1:26:51<2:22:51,  6.16s/it][A
Train Diffusion:  37%|███▋      | 809/2200 [1:26:57<2:23:44,  6.20s/it][A
Train Diffusion:  37%|███▋      | 810/2200 [1:27:03<2:23:04,  6.18s/it][A
Train Diffusion:  37%|███▋      | 811/2200 [1:27:09<2:22:13,  6.14s/it][A
Train Diffusion:  37%|███▋      | 812/2200 [1:27:16<2:23:13,  6.19s/it][A
Train Diffusion:  37%|██

Moving average ELBO loss at 820 iterations is: 4778.03408203125. Best ELBO loss value is: 4757.9677734375.

C_PATH mean = tensor([[0.9618, 0.6869, 0.8366],
        [0.9672, 0.6950, 0.8449],
        [0.9227, 0.6929, 0.8356],
        [0.9480, 0.6883, 0.8324],
        [0.9647, 0.6935, 0.8355],
        [0.9182, 0.6939, 0.8263],
        [0.9496, 0.6975, 0.8269],
        [0.9668, 0.6985, 0.8431],
        [0.9455, 0.6916, 0.8296],
        [0.9590, 0.6933, 0.8416]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[0.0608, 0.1075, 0.7157],
         [0.3692, 0.2021, 0.1773],
         [0.2132, 0.1788, 0.2324],
         ...,
         [0.5475, 0.7365, 0.8603],
         [0.4168, 0.8025, 0.8093],
         [0.6174, 0.7529, 1.1022]],

        [[1.0013, 0.1018, 0.6646],
         [0.3416, 0.1729, 0.2767],
         [1.2542, 0.1842, 0.3201],
         ...,
         [0.9243, 0.7526, 0.8686],
         [1.0988, 0.7557, 0.7235],
         [0.7777, 0.7623, 1.0473]],

        [[1.3448, 0.1153, 0.6702],
         [0.73


Train Diffusion:  37%|███▋      | 820/2200 [1:28:05<2:21:14,  6.14s/it][A
Train Diffusion:  37%|███▋      | 821/2200 [1:28:11<2:21:14,  6.15s/it][A
Train Diffusion:  37%|███▋      | 822/2200 [1:28:17<2:21:05,  6.14s/it][A
Train Diffusion:  37%|███▋      | 823/2200 [1:28:23<2:20:42,  6.13s/it][A
Train Diffusion:  37%|███▋      | 824/2200 [1:28:29<2:20:30,  6.13s/it][A
Train Diffusion:  38%|███▊      | 825/2200 [1:28:35<2:20:11,  6.12s/it][A
Train Diffusion:  38%|███▊      | 826/2200 [1:28:42<2:20:27,  6.13s/it][A
Train Diffusion:  38%|███▊      | 827/2200 [1:28:48<2:20:31,  6.14s/it][A
Train Diffusion:  38%|███▊      | 828/2200 [1:28:54<2:20:45,  6.16s/it][A
Train Diffusion:  38%|███▊      | 829/2200 [1:29:00<2:20:23,  6.14s/it][A
Train Diffusion:  38%|███▊      | 830/2200 [1:29:06<2:20:44,  6.16s/it][A
Train Diffusion:  38%|███▊      | 831/2200 [1:29:12<2:20:37,  6.16s/it][A
Train Diffusion:  38%|███▊      | 832/2200 [1:29:18<2:20:11,  6.15s/it][A
Train Diffusion:  38%|██

Moving average ELBO loss at 840 iterations is: 4751.2279296875. Best ELBO loss value is: 4721.4462890625.

C_PATH mean = tensor([[0.9709, 0.6966, 0.8364],
        [0.9672, 0.6943, 0.8429],
        [0.9689, 0.6983, 0.8434],
        [0.9726, 0.6975, 0.8484],
        [0.9678, 0.6938, 0.8373],
        [0.9412, 0.6973, 0.8332],
        [0.9576, 0.6964, 0.8517],
        [0.9291, 0.6990, 0.8496],
        [0.9607, 0.6964, 0.8350],
        [0.9493, 0.6929, 0.8429]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[0.1902, 0.1098, 0.6172],
         [0.3649, 0.2480, 0.1716],
         [0.1772, 0.3077, 0.3130],
         ...,
         [1.6429, 0.8117, 0.7779],
         [0.6442, 0.8194, 0.6556],
         [0.7407, 0.8564, 0.8534]],

        [[2.7860, 0.1201, 0.6164],
         [1.0040, 0.2538, 0.2905],
         [0.1443, 0.3259, 0.3577],
         ...,
         [0.7667, 0.7446, 0.9955],
         [1.0104, 0.7856, 1.0347],
         [1.0426, 0.7427, 1.2863]],

        [[0.3183, 0.0939, 0.6772],
         [0.369


Train Diffusion:  38%|███▊      | 840/2200 [1:30:07<2:17:56,  6.09s/it][A
Train Diffusion:  38%|███▊      | 841/2200 [1:30:14<2:19:29,  6.16s/it][A
Train Diffusion:  38%|███▊      | 842/2200 [1:30:20<2:19:17,  6.15s/it][A
Train Diffusion:  38%|███▊      | 843/2200 [1:30:26<2:18:34,  6.13s/it][A
Train Diffusion:  38%|███▊      | 844/2200 [1:30:32<2:18:02,  6.11s/it][A
Train Diffusion:  38%|███▊      | 845/2200 [1:30:38<2:17:48,  6.10s/it][A
Train Diffusion:  38%|███▊      | 846/2200 [1:30:44<2:17:52,  6.11s/it][A
Train Diffusion:  38%|███▊      | 847/2200 [1:30:50<2:17:54,  6.12s/it][A
Train Diffusion:  39%|███▊      | 848/2200 [1:30:56<2:17:37,  6.11s/it][A
Train Diffusion:  39%|███▊      | 849/2200 [1:31:02<2:17:14,  6.09s/it][A
Train Diffusion:  39%|███▊      | 850/2200 [1:31:08<2:16:49,  6.08s/it][A
Train Diffusion:  39%|███▊      | 851/2200 [1:31:14<2:16:27,  6.07s/it][A
Train Diffusion:  39%|███▊      | 852/2200 [1:31:21<2:17:04,  6.10s/it][A
Train Diffusion:  39%|██

Moving average ELBO loss at 860 iterations is: 4705.10888671875. Best ELBO loss value is: 4685.4072265625.

C_PATH mean = tensor([[0.9792, 0.6921, 0.8511],
        [0.9741, 0.7001, 0.8356],
        [0.9730, 0.6967, 0.8563],
        [0.9857, 0.6902, 0.8572],
        [0.9882, 0.6964, 0.8556],
        [0.9628, 0.6981, 0.8505],
        [0.9521, 0.6988, 0.8548],
        [0.9556, 0.6997, 0.8435],
        [0.9737, 0.7035, 0.8645],
        [0.9407, 0.6968, 0.8484]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[0.1537, 0.1125, 0.6262],
         [0.2903, 0.1740, 0.2509],
         [0.6038, 0.2059, 0.3299],
         ...,
         [1.8134, 0.8012, 0.8791],
         [1.2043, 0.7901, 0.7144],
         [0.4974, 0.7459, 1.2105]],

        [[1.0449, 0.1092, 0.6650],
         [0.4579, 0.2345, 0.5553],
         [0.1344, 0.2861, 0.6112],
         ...,
         [0.5164, 0.7865, 0.8176],
         [0.3401, 0.7860, 0.8696],
         [1.0084, 0.7200, 0.9871]],

        [[1.0219, 0.1007, 0.5597],
         [0.30


Train Diffusion:  39%|███▉      | 860/2200 [1:32:11<2:22:39,  6.39s/it][A
Train Diffusion:  39%|███▉      | 861/2200 [1:32:18<2:22:27,  6.38s/it][A
Train Diffusion:  39%|███▉      | 862/2200 [1:32:24<2:22:02,  6.37s/it][A
Train Diffusion:  39%|███▉      | 863/2200 [1:32:30<2:20:45,  6.32s/it][A
Train Diffusion:  39%|███▉      | 864/2200 [1:32:37<2:21:01,  6.33s/it][A
Train Diffusion:  39%|███▉      | 865/2200 [1:32:43<2:21:40,  6.37s/it][A
Train Diffusion:  39%|███▉      | 866/2200 [1:32:49<2:21:41,  6.37s/it][A
Train Diffusion:  39%|███▉      | 867/2200 [1:32:56<2:21:29,  6.37s/it][A
Train Diffusion:  39%|███▉      | 868/2200 [1:33:02<2:20:26,  6.33s/it][A
Train Diffusion:  40%|███▉      | 869/2200 [1:33:08<2:20:34,  6.34s/it][A
Train Diffusion:  40%|███▉      | 870/2200 [1:33:15<2:20:28,  6.34s/it][A
Train Diffusion:  40%|███▉      | 871/2200 [1:33:21<2:20:53,  6.36s/it][A
Train Diffusion:  40%|███▉      | 872/2200 [1:33:27<2:20:03,  6.33s/it][A
Train Diffusion:  40%|██

Moving average ELBO loss at 880 iterations is: 4682.349609375. Best ELBO loss value is: 4659.3447265625.

C_PATH mean = tensor([[0.9777, 0.7041, 0.8542],
        [0.9410, 0.7038, 0.8504],
        [0.9515, 0.7007, 0.8568],
        [0.9543, 0.7008, 0.8492],
        [0.9518, 0.7030, 0.8564],
        [0.9898, 0.6912, 0.8525],
        [0.9970, 0.6984, 0.8648],
        [0.9743, 0.7001, 0.8612],
        [0.9522, 0.7042, 0.8550],
        [0.9698, 0.7023, 0.8539]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[0.4193, 0.0890, 0.5992],
         [0.6080, 0.1391, 0.3450],
         [0.6009, 0.2879, 0.4096],
         ...,
         [0.6928, 0.7244, 1.0450],
         [0.9299, 0.8277, 0.8136],
         [0.7706, 0.8437, 1.1008]],

        [[2.3027, 0.1063, 0.7868],
         [0.5807, 0.2166, 0.5654],
         [0.5862, 0.2097, 0.3552],
         ...,
         [1.0718, 0.8468, 1.0847],
         [1.8430, 0.8628, 1.1993],
         [1.4027, 0.7702, 1.4229]],

        [[0.4162, 0.0922, 0.6943],
         [0.9412


Train Diffusion:  40%|████      | 880/2200 [1:34:20<2:19:30,  6.34s/it][A
Train Diffusion:  40%|████      | 881/2200 [1:34:26<2:20:34,  6.39s/it][A
Train Diffusion:  40%|████      | 882/2200 [1:34:33<2:21:49,  6.46s/it][A
Train Diffusion:  40%|████      | 883/2200 [1:34:39<2:21:41,  6.45s/it][A
Train Diffusion:  40%|████      | 884/2200 [1:34:46<2:20:38,  6.41s/it][A
Train Diffusion:  40%|████      | 885/2200 [1:34:52<2:19:03,  6.35s/it][A
Train Diffusion:  40%|████      | 886/2200 [1:34:58<2:19:22,  6.36s/it][A
Train Diffusion:  40%|████      | 887/2200 [1:35:05<2:19:17,  6.37s/it][A
Train Diffusion:  40%|████      | 888/2200 [1:35:11<2:17:58,  6.31s/it][A
Train Diffusion:  40%|████      | 889/2200 [1:35:17<2:16:59,  6.27s/it][A
Train Diffusion:  40%|████      | 890/2200 [1:35:23<2:16:10,  6.24s/it][A
Train Diffusion:  40%|████      | 891/2200 [1:35:30<2:17:34,  6.31s/it][A
Train Diffusion:  41%|████      | 892/2200 [1:35:37<2:21:27,  6.49s/it][A
Train Diffusion:  41%|██

Moving average ELBO loss at 900 iterations is: 4649.317822265625. Best ELBO loss value is: 4624.95751953125.

C_PATH mean = tensor([[0.9859, 0.6974, 0.8614],
        [0.9783, 0.7031, 0.8703],
        [0.9873, 0.7021, 0.8719],
        [0.9540, 0.7019, 0.8533],
        [0.9650, 0.7041, 0.8636],
        [0.9861, 0.7029, 0.8598],
        [0.9767, 0.6975, 0.8626],
        [0.9712, 0.7007, 0.8546],
        [0.9749, 0.6937, 0.8735],
        [0.9796, 0.6941, 0.8517]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[0.0478, 0.0941, 0.6473],
         [0.4223, 0.1989, 0.3071],
         [0.5899, 0.2375, 0.3191],
         ...,
         [1.5303, 0.8270, 1.0404],
         [1.2072, 0.8229, 1.0078],
         [0.9071, 0.8192, 1.3427]],

        [[0.7954, 0.0944, 0.6492],
         [0.6459, 0.1729, 0.2688],
         [0.8953, 0.1956, 0.4172],
         ...,
         [0.7639, 0.7980, 0.7907],
         [1.3860, 0.7117, 0.9491],
         [1.1636, 0.6981, 1.2481]],

        [[0.5027, 0.1025, 0.7889],
         [0.


Train Diffusion:  41%|████      | 900/2200 [1:36:27<2:15:01,  6.23s/it][A
Train Diffusion:  41%|████      | 901/2200 [1:36:33<2:15:03,  6.24s/it][A
Train Diffusion:  41%|████      | 902/2200 [1:36:39<2:14:52,  6.23s/it][A
Train Diffusion:  41%|████      | 903/2200 [1:36:46<2:15:41,  6.28s/it][A
Train Diffusion:  41%|████      | 904/2200 [1:36:52<2:14:14,  6.22s/it][A
Train Diffusion:  41%|████      | 905/2200 [1:36:58<2:14:11,  6.22s/it][A
Train Diffusion:  41%|████      | 906/2200 [1:37:04<2:14:05,  6.22s/it][A
Train Diffusion:  41%|████      | 907/2200 [1:37:10<2:13:33,  6.20s/it][A
Train Diffusion:  41%|████▏     | 908/2200 [1:37:17<2:14:24,  6.24s/it][A
Train Diffusion:  41%|████▏     | 909/2200 [1:37:23<2:13:18,  6.20s/it][A
Train Diffusion:  41%|████▏     | 910/2200 [1:37:29<2:12:47,  6.18s/it][A
Train Diffusion:  41%|████▏     | 911/2200 [1:37:35<2:12:46,  6.18s/it][A
Train Diffusion:  41%|████▏     | 912/2200 [1:37:41<2:13:42,  6.23s/it][A
Train Diffusion:  42%|██

Moving average ELBO loss at 920 iterations is: 4616.016455078125. Best ELBO loss value is: 4600.20751953125.

C_PATH mean = tensor([[0.9951, 0.7021, 0.8619],
        [0.9847, 0.6987, 0.8625],
        [0.9822, 0.7008, 0.8462],
        [0.9815, 0.7016, 0.8580],
        [1.0253, 0.6968, 0.8686],
        [1.0081, 0.7016, 0.8543],
        [0.9920, 0.7028, 0.8713],
        [0.9851, 0.7007, 0.8600],
        [0.9830, 0.7000, 0.8589],
        [1.0209, 0.7000, 0.8588]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[0.4019, 0.1052, 0.7630],
         [0.8320, 0.1863, 0.4592],
         [1.6877, 0.1651, 0.4170],
         ...,
         [1.5806, 0.7952, 0.8772],
         [1.3910, 0.6741, 0.9014],
         [1.3555, 0.6273, 1.1929]],

        [[0.3587, 0.1134, 0.5395],
         [0.3616, 0.2397, 0.1486],
         [0.3412, 0.2175, 0.4666],
         ...,
         [0.8662, 0.8075, 0.9574],
         [0.6729, 0.7383, 1.1669],
         [1.1003, 0.6979, 1.2733]],

        [[1.7568, 0.1183, 0.4866],
         [0.


Train Diffusion:  42%|████▏     | 920/2200 [1:38:31<2:13:00,  6.23s/it][A
Train Diffusion:  42%|████▏     | 921/2200 [1:38:37<2:12:42,  6.23s/it][A
Train Diffusion:  42%|████▏     | 922/2200 [1:38:43<2:12:35,  6.22s/it][A
Train Diffusion:  42%|████▏     | 923/2200 [1:38:50<2:14:24,  6.31s/it][A
Train Diffusion:  42%|████▏     | 924/2200 [1:38:56<2:15:07,  6.35s/it][A
Train Diffusion:  42%|████▏     | 925/2200 [1:39:03<2:14:39,  6.34s/it][A
Train Diffusion:  42%|████▏     | 926/2200 [1:39:09<2:13:52,  6.30s/it][A
Train Diffusion:  42%|████▏     | 927/2200 [1:39:15<2:13:02,  6.27s/it][A
Train Diffusion:  42%|████▏     | 928/2200 [1:39:21<2:11:53,  6.22s/it][A
Train Diffusion:  42%|████▏     | 929/2200 [1:39:28<2:12:01,  6.23s/it][A
Train Diffusion:  42%|████▏     | 930/2200 [1:39:34<2:11:46,  6.23s/it][A
Train Diffusion:  42%|████▏     | 931/2200 [1:39:40<2:11:46,  6.23s/it][A
Train Diffusion:  42%|████▏     | 932/2200 [1:39:46<2:10:54,  6.19s/it][A
Train Diffusion:  42%|██

Moving average ELBO loss at 940 iterations is: 4573.830078125. Best ELBO loss value is: 4537.015625.

C_PATH mean = tensor([[0.9720, 0.7011, 0.8701],
        [0.9797, 0.7010, 0.8676],
        [0.9686, 0.7031, 0.8685],
        [0.9929, 0.7022, 0.8762],
        [1.0270, 0.7027, 0.8886],
        [1.0186, 0.6980, 0.8796],
        [0.9895, 0.6966, 0.8727],
        [0.9923, 0.7000, 0.8641],
        [0.9980, 0.6973, 0.8818],
        [1.0013, 0.7049, 0.8863]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[1.7013, 0.1137, 0.8262],
         [0.3477, 0.1662, 0.5041],
         [0.0748, 0.2155, 0.3579],
         ...,
         [1.2843, 0.6770, 0.9923],
         [1.5421, 0.6975, 1.1445],
         [0.7661, 0.7053, 1.3803]],

        [[1.1341, 0.1064, 0.7339],
         [0.6919, 0.1434, 0.3457],
         [0.5444, 0.2187, 0.4179],
         ...,
         [1.1929, 0.7598, 0.8078],
         [2.0894, 0.7230, 1.0228],
         [1.5724, 0.6604, 1.2396]],

        [[1.0384, 0.1078, 0.7117],
         [0.8690, 0.


Train Diffusion:  43%|████▎     | 940/2200 [1:40:36<2:10:40,  6.22s/it][A
Train Diffusion:  43%|████▎     | 941/2200 [1:40:42<2:10:14,  6.21s/it][A
Train Diffusion:  43%|████▎     | 942/2200 [1:40:48<2:09:54,  6.20s/it][A
Train Diffusion:  43%|████▎     | 943/2200 [1:40:54<2:09:31,  6.18s/it][A
Train Diffusion:  43%|████▎     | 944/2200 [1:41:00<2:09:18,  6.18s/it][A
Train Diffusion:  43%|████▎     | 945/2200 [1:41:07<2:10:04,  6.22s/it][A
Train Diffusion:  43%|████▎     | 946/2200 [1:41:13<2:09:45,  6.21s/it][A
Train Diffusion:  43%|████▎     | 947/2200 [1:41:19<2:09:21,  6.19s/it][A
Train Diffusion:  43%|████▎     | 948/2200 [1:41:25<2:09:06,  6.19s/it][A
Train Diffusion:  43%|████▎     | 949/2200 [1:41:31<2:08:44,  6.17s/it][A
Train Diffusion:  43%|████▎     | 950/2200 [1:41:38<2:09:16,  6.20s/it][A
Train Diffusion:  43%|████▎     | 951/2200 [1:41:44<2:08:22,  6.17s/it][A
Train Diffusion:  43%|████▎     | 952/2200 [1:41:50<2:07:18,  6.12s/it][A
Train Diffusion:  43%|██

Moving average ELBO loss at 960 iterations is: 4546.566015625. Best ELBO loss value is: 4517.34326171875.

C_PATH mean = tensor([[1.0164, 0.7006, 0.8716],
        [0.9971, 0.6960, 0.8873],
        [1.0276, 0.6988, 0.8691],
        [0.9928, 0.6965, 0.8811],
        [1.0108, 0.6943, 0.8795],
        [1.0124, 0.7093, 0.8765],
        [1.0362, 0.6912, 0.8787],
        [1.0000, 0.7043, 0.8715],
        [1.0105, 0.7021, 0.8779],
        [1.0334, 0.6957, 0.8716]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[0.3171, 0.1222, 0.6330],
         [0.3417, 0.2184, 0.2385],
         [0.0722, 0.2853, 0.4183],
         ...,
         [1.4362, 0.7183, 0.8166],
         [0.7018, 0.7018, 0.8261],
         [1.1081, 0.6626, 1.0803]],

        [[0.6139, 0.1074, 0.7225],
         [0.8008, 0.1872, 0.4123],
         [1.1494, 0.1374, 0.3020],
         ...,
         [0.2213, 0.6151, 0.9146],
         [1.2368, 0.6351, 0.7589],
         [1.0146, 0.6483, 0.9926]],

        [[2.1801, 0.1202, 0.6883],
         [0.586


Train Diffusion:  44%|████▎     | 960/2200 [1:42:38<2:03:58,  6.00s/it][A
Train Diffusion:  44%|████▎     | 961/2200 [1:42:44<2:04:36,  6.03s/it][A
Train Diffusion:  44%|████▎     | 962/2200 [1:42:50<2:04:24,  6.03s/it][A
Train Diffusion:  44%|████▍     | 963/2200 [1:42:56<2:04:15,  6.03s/it][A
Train Diffusion:  44%|████▍     | 964/2200 [1:43:02<2:04:29,  6.04s/it][A
Train Diffusion:  44%|████▍     | 965/2200 [1:43:08<2:04:11,  6.03s/it][A
Train Diffusion:  44%|████▍     | 966/2200 [1:43:14<2:04:29,  6.05s/it][A
Train Diffusion:  44%|████▍     | 967/2200 [1:43:20<2:04:23,  6.05s/it][A
Train Diffusion:  44%|████▍     | 968/2200 [1:43:26<2:03:50,  6.03s/it][A
Train Diffusion:  44%|████▍     | 969/2200 [1:43:32<2:03:43,  6.03s/it][A
Train Diffusion:  44%|████▍     | 970/2200 [1:43:38<2:03:24,  6.02s/it][A
Train Diffusion:  44%|████▍     | 971/2200 [1:43:44<2:03:07,  6.01s/it][A
Train Diffusion:  44%|████▍     | 972/2200 [1:43:50<2:03:36,  6.04s/it][A
Train Diffusion:  44%|██

Moving average ELBO loss at 980 iterations is: 4515.89365234375. Best ELBO loss value is: 4491.6015625.

C_PATH mean = tensor([[1.0150, 0.6991, 0.8879],
        [1.0098, 0.6971, 0.8817],
        [1.0341, 0.7056, 0.8883],
        [1.0127, 0.7048, 0.8693],
        [1.0009, 0.7007, 0.8866],
        [1.0019, 0.7039, 0.8858],
        [1.0071, 0.6999, 0.8826],
        [1.0407, 0.7042, 0.8895],
        [1.0029, 0.7080, 0.8800],
        [1.0226, 0.7062, 0.8837]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[0.7993, 0.1074, 0.7032],
         [0.5745, 0.1822, 0.4520],
         [0.9826, 0.2169, 0.5558],
         ...,
         [0.9822, 0.7667, 0.9358],
         [1.4896, 0.8273, 1.0053],
         [1.6721, 0.8074, 1.2848]],

        [[2.6140, 0.1315, 0.5901],
         [0.8653, 0.2670, 0.2735],
         [0.0612, 0.2464, 0.3596],
         ...,
         [0.7392, 0.6983, 0.7571],
         [0.6290, 0.7219, 0.6862],
         [0.3536, 0.7551, 1.0868]],

        [[0.2645, 0.0987, 0.7487],
         [0.3585,


Train Diffusion:  45%|████▍     | 980/2200 [1:44:39<2:05:45,  6.18s/it][A
Train Diffusion:  45%|████▍     | 981/2200 [1:44:45<2:04:50,  6.14s/it][A
Train Diffusion:  45%|████▍     | 982/2200 [1:44:51<2:03:59,  6.11s/it][A
Train Diffusion:  45%|████▍     | 983/2200 [1:44:57<2:03:09,  6.07s/it][A
Train Diffusion:  45%|████▍     | 984/2200 [1:45:03<2:02:21,  6.04s/it][A
Train Diffusion:  45%|████▍     | 985/2200 [1:45:09<2:02:44,  6.06s/it][A
Train Diffusion:  45%|████▍     | 986/2200 [1:45:16<2:02:55,  6.08s/it][A
Train Diffusion:  45%|████▍     | 987/2200 [1:45:22<2:02:45,  6.07s/it][A
Train Diffusion:  45%|████▍     | 988/2200 [1:45:28<2:03:30,  6.11s/it][A
Train Diffusion:  45%|████▍     | 989/2200 [1:45:34<2:03:03,  6.10s/it][A
Train Diffusion:  45%|████▌     | 990/2200 [1:45:40<2:03:02,  6.10s/it][A
Train Diffusion:  45%|████▌     | 991/2200 [1:45:46<2:03:08,  6.11s/it][A
Train Diffusion:  45%|████▌     | 992/2200 [1:45:52<2:02:51,  6.10s/it][A
Train Diffusion:  45%|██

Moving average ELBO loss at 1000 iterations is: 4475.12041015625. Best ELBO loss value is: 4451.67041015625.

C_PATH mean = tensor([[1.0620, 0.6961, 0.8939],
        [1.0066, 0.7067, 0.8848],
        [1.0286, 0.6982, 0.8898],
        [1.0220, 0.7058, 0.8889],
        [1.0067, 0.7050, 0.8881],
        [1.0198, 0.6996, 0.9047],
        [1.0464, 0.7011, 0.8901],
        [1.0303, 0.6940, 0.8972],
        [1.0547, 0.7040, 0.8929],
        [1.0049, 0.6949, 0.8824]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[1.0789, 0.1146, 0.9710],
         [0.7451, 0.1724, 0.7680],
         [0.1362, 0.3126, 0.4880],
         ...,
         [1.2428, 0.6666, 0.9143],
         [0.3183, 0.6162, 0.9763],
         [0.6700, 0.6720, 1.1119]],

        [[0.0976, 0.1162, 0.7813],
         [0.4774, 0.2305, 0.3602],
         [0.4819, 0.2298, 0.2556],
         ...,
         [0.7056, 0.7768, 0.8964],
         [0.6390, 0.8067, 0.6439],
         [0.7121, 0.8295, 1.0269]],

        [[0.0610, 0.1112, 0.8360],
         [0.


Train Diffusion:  45%|████▌     | 1000/2200 [1:46:41<2:01:43,  6.09s/it][A
Train Diffusion:  46%|████▌     | 1001/2200 [1:46:47<2:01:56,  6.10s/it][A
Train Diffusion:  46%|████▌     | 1002/2200 [1:46:54<2:01:33,  6.09s/it][A
Train Diffusion:  46%|████▌     | 1003/2200 [1:47:00<2:01:36,  6.10s/it][A
Train Diffusion:  46%|████▌     | 1004/2200 [1:47:06<2:02:16,  6.13s/it][A
Train Diffusion:  46%|████▌     | 1005/2200 [1:47:12<2:01:56,  6.12s/it][A
Train Diffusion:  46%|████▌     | 1006/2200 [1:47:18<2:02:20,  6.15s/it][A
Train Diffusion:  46%|████▌     | 1007/2200 [1:47:24<2:02:07,  6.14s/it][A
Train Diffusion:  46%|████▌     | 1008/2200 [1:47:30<2:01:58,  6.14s/it][A
Train Diffusion:  46%|████▌     | 1009/2200 [1:47:37<2:02:28,  6.17s/it][A
Train Diffusion:  46%|████▌     | 1010/2200 [1:47:43<2:01:59,  6.15s/it][A
Train Diffusion:  46%|████▌     | 1011/2200 [1:47:49<2:01:49,  6.15s/it][A
Train Diffusion:  46%|████▌     | 1012/2200 [1:47:55<2:00:58,  6.11s/it][A
Train Diffu

Moving average ELBO loss at 1020 iterations is: 4438.852978515625. Best ELBO loss value is: 4423.57470703125.

C_PATH mean = tensor([[1.0179, 0.7045, 0.8987],
        [1.0530, 0.7029, 0.8942],
        [1.0658, 0.7001, 0.8930],
        [1.0157, 0.7006, 0.8975],
        [1.0419, 0.7048, 0.8945],
        [1.0549, 0.7021, 0.9073],
        [1.0247, 0.6995, 0.8949],
        [1.0403, 0.7036, 0.8932],
        [1.0280, 0.6980, 0.8991],
        [1.0685, 0.6983, 0.9122]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[0.2461, 0.1115, 0.9435],
         [0.2376, 0.1942, 0.3308],
         [0.1536, 0.3264, 0.3226],
         ...,
         [1.5945, 0.6716, 0.9396],
         [1.1922, 0.6355, 1.1884],
         [1.3313, 0.5952, 1.4063]],

        [[0.7973, 0.1048, 0.7403],
         [0.3411, 0.1428, 0.3757],
         [0.0893, 0.1288, 0.2792],
         ...,
         [1.2468, 0.6907, 0.8070],
         [0.4629, 0.7544, 1.0011],
         [0.5249, 0.7677, 1.4059]],

        [[0.0519, 0.1044, 0.7390],
         [0


Train Diffusion:  46%|████▋     | 1020/2200 [1:48:44<1:59:41,  6.09s/it][A
Train Diffusion:  46%|████▋     | 1021/2200 [1:48:50<1:59:18,  6.07s/it][A
Train Diffusion:  46%|████▋     | 1022/2200 [1:48:56<1:58:47,  6.05s/it][A
Train Diffusion:  46%|████▋     | 1023/2200 [1:49:02<1:58:15,  6.03s/it][A
Train Diffusion:  47%|████▋     | 1024/2200 [1:49:08<1:57:52,  6.01s/it][A
Train Diffusion:  47%|████▋     | 1025/2200 [1:49:14<1:58:16,  6.04s/it][A
Train Diffusion:  47%|████▋     | 1026/2200 [1:49:20<1:58:03,  6.03s/it][A
Train Diffusion:  47%|████▋     | 1027/2200 [1:49:26<1:58:25,  6.06s/it][A
Train Diffusion:  47%|████▋     | 1028/2200 [1:49:33<2:00:53,  6.19s/it][A
Train Diffusion:  47%|████▋     | 1029/2200 [1:49:39<2:02:58,  6.30s/it][A
Train Diffusion:  47%|████▋     | 1030/2200 [1:49:46<2:04:15,  6.37s/it][A
Train Diffusion:  47%|████▋     | 1031/2200 [1:49:52<2:05:26,  6.44s/it][A
Train Diffusion:  47%|████▋     | 1032/2200 [1:49:59<2:04:56,  6.42s/it][A
Train Diffu

Moving average ELBO loss at 1040 iterations is: 4409.9630859375. Best ELBO loss value is: 4388.150390625.

C_PATH mean = tensor([[1.0607, 0.7014, 0.9078],
        [1.0599, 0.7040, 0.8988],
        [1.0261, 0.7064, 0.9018],
        [1.0395, 0.7068, 0.9135],
        [1.0480, 0.7027, 0.9076],
        [1.0318, 0.7085, 0.9085],
        [1.0353, 0.7066, 0.9135],
        [1.0232, 0.7018, 0.9043],
        [1.0201, 0.7072, 0.9125],
        [1.0466, 0.7005, 0.9169]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[0.3983, 0.1096, 0.7981],
         [0.9045, 0.2431, 0.2234],
         [0.8104, 0.2050, 0.2159],
         ...,
         [0.9180, 0.7762, 0.8359],
         [1.0064, 0.6944, 0.9275],
         [1.0720, 0.6144, 1.2477]],

        [[0.0513, 0.1202, 0.7581],
         [0.2206, 0.2490, 0.6866],
         [0.1085, 0.2835, 0.3462],
         ...,
         [1.4231, 0.6388, 1.0812],
         [0.5939, 0.7172, 1.0676],
         [0.8570, 0.7786, 1.1188]],

        [[1.3944, 0.1097, 0.5833],
         [0.695


Train Diffusion:  47%|████▋     | 1040/2200 [1:50:49<1:58:48,  6.15s/it][A
Train Diffusion:  47%|████▋     | 1041/2200 [1:50:55<1:59:05,  6.17s/it][A
Train Diffusion:  47%|████▋     | 1042/2200 [1:51:01<1:59:03,  6.17s/it][A
Train Diffusion:  47%|████▋     | 1043/2200 [1:51:07<1:58:21,  6.14s/it][A
Train Diffusion:  47%|████▋     | 1044/2200 [1:51:13<1:57:40,  6.11s/it][A
Train Diffusion:  48%|████▊     | 1045/2200 [1:51:19<1:57:15,  6.09s/it][A
Train Diffusion:  48%|████▊     | 1046/2200 [1:51:25<1:57:02,  6.09s/it][A
Train Diffusion:  48%|████▊     | 1047/2200 [1:51:31<1:57:16,  6.10s/it][A
Train Diffusion:  48%|████▊     | 1048/2200 [1:51:37<1:56:51,  6.09s/it][A
Train Diffusion:  48%|████▊     | 1049/2200 [1:51:43<1:56:25,  6.07s/it][A
Train Diffusion:  48%|████▊     | 1050/2200 [1:51:50<1:56:36,  6.08s/it][A
Train Diffusion:  48%|████▊     | 1051/2200 [1:51:56<1:56:26,  6.08s/it][A
Train Diffusion:  48%|████▊     | 1052/2200 [1:52:02<1:56:57,  6.11s/it][A
Train Diffu

Moving average ELBO loss at 1060 iterations is: 4367.494921875. Best ELBO loss value is: 4346.404296875.

C_PATH mean = tensor([[1.0656, 0.7103, 0.9218],
        [1.0593, 0.7039, 0.9169],
        [1.0546, 0.6977, 0.9179],
        [1.0304, 0.7029, 0.9350],
        [1.0882, 0.7050, 0.9182],
        [1.0643, 0.7064, 0.9177],
        [1.0714, 0.7026, 0.9206],
        [1.0714, 0.7075, 0.9216],
        [1.0453, 0.7080, 0.9199],
        [1.0377, 0.7034, 0.9208]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[0.7399, 0.1188, 0.7300],
         [0.7526, 0.2155, 0.5250],
         [0.7582, 0.3072, 0.5096],
         ...,
         [1.2437, 0.6846, 0.9822],
         [0.6395, 0.7820, 0.9980],
         [0.4526, 0.7900, 1.2504]],

        [[1.4346, 0.1085, 0.8370],
         [0.6468, 0.2057, 0.5115],
         [0.6590, 0.2184, 0.4144],
         ...,
         [0.6699, 0.6901, 0.7852],
         [0.5261, 0.7386, 0.7834],
         [0.4060, 0.7774, 1.0837]],

        [[0.8716, 0.1081, 0.6261],
         [0.8701


Train Diffusion:  48%|████▊     | 1060/2200 [1:53:08<2:44:37,  8.66s/it][A
Train Diffusion:  48%|████▊     | 1061/2200 [1:53:15<2:35:39,  8.20s/it][A
Train Diffusion:  48%|████▊     | 1062/2200 [1:53:23<2:33:38,  8.10s/it][A
Train Diffusion:  48%|████▊     | 1063/2200 [1:53:29<2:23:53,  7.59s/it][A
Train Diffusion:  48%|████▊     | 1064/2200 [1:53:36<2:19:07,  7.35s/it][A
Train Diffusion:  48%|████▊     | 1065/2200 [1:53:43<2:15:54,  7.18s/it][A
Train Diffusion:  48%|████▊     | 1066/2200 [1:53:51<2:21:58,  7.51s/it][A
Train Diffusion:  48%|████▊     | 1067/2200 [1:53:59<2:25:27,  7.70s/it][A
Train Diffusion:  49%|████▊     | 1068/2200 [1:54:06<2:20:15,  7.43s/it][A
Train Diffusion:  49%|████▊     | 1069/2200 [1:54:20<2:55:09,  9.29s/it][A
Train Diffusion:  49%|████▊     | 1070/2200 [1:54:32<3:14:56, 10.35s/it][A
Train Diffusion:  49%|████▊     | 1071/2200 [1:54:39<2:53:10,  9.20s/it][A
Train Diffusion:  49%|████▊     | 1072/2200 [1:54:47<2:47:54,  8.93s/it][A
Train Diffu

Moving average ELBO loss at 1080 iterations is: 4321.33974609375. Best ELBO loss value is: 4299.826171875.

C_PATH mean = tensor([[1.0899, 0.7064, 0.9290],
        [1.0356, 0.7087, 0.9193],
        [1.1165, 0.7035, 0.9351],
        [1.0654, 0.7038, 0.9324],
        [1.0334, 0.7090, 0.9239],
        [1.0809, 0.7044, 0.9353],
        [1.0746, 0.7085, 0.9262],
        [1.0550, 0.7013, 0.9226],
        [1.0518, 0.7051, 0.9183],
        [1.0827, 0.7078, 0.9376]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[0.4112, 0.1042, 0.6853],
         [0.4279, 0.1917, 0.6519],
         [0.3279, 0.2261, 0.4890],
         ...,
         [1.2623, 0.6802, 0.9770],
         [0.3002, 0.6938, 1.3002],
         [0.9625, 0.6835, 1.4644]],

        [[0.1909, 0.1236, 0.6982],
         [0.7217, 0.2819, 0.2379],
         [0.1748, 0.3556, 0.3072],
         ...,
         [1.2148, 0.6896, 0.9126],
         [0.9130, 0.7051, 0.6422],
         [1.0911, 0.7057, 0.8080]],

        [[0.3703, 0.1038, 0.7894],
         [0.74


Train Diffusion:  49%|████▉     | 1080/2200 [1:55:48<2:10:02,  6.97s/it][A
Train Diffusion:  49%|████▉     | 1081/2200 [1:55:54<2:06:16,  6.77s/it][A
Train Diffusion:  49%|████▉     | 1082/2200 [1:56:01<2:03:34,  6.63s/it][A
Train Diffusion:  49%|████▉     | 1083/2200 [1:56:07<2:01:29,  6.53s/it][A
Train Diffusion:  49%|████▉     | 1084/2200 [1:56:13<2:00:12,  6.46s/it][A
Train Diffusion:  49%|████▉     | 1085/2200 [1:56:20<1:59:06,  6.41s/it][A
Train Diffusion:  49%|████▉     | 1086/2200 [1:56:26<1:59:14,  6.42s/it][A
Train Diffusion:  49%|████▉     | 1087/2200 [1:56:32<1:59:19,  6.43s/it][A
Train Diffusion:  49%|████▉     | 1088/2200 [1:56:39<1:59:28,  6.45s/it][A
Train Diffusion:  50%|████▉     | 1089/2200 [1:56:46<2:00:13,  6.49s/it][A
Train Diffusion:  50%|████▉     | 1090/2200 [1:56:52<2:01:31,  6.57s/it][A
Train Diffusion:  50%|████▉     | 1091/2200 [1:56:59<2:01:45,  6.59s/it][A
Train Diffusion:  50%|████▉     | 1092/2200 [1:57:06<2:02:22,  6.63s/it][A
Train Diffu

Moving average ELBO loss at 1100 iterations is: 4301.49404296875. Best ELBO loss value is: 4270.083984375.

C_PATH mean = tensor([[1.0947, 0.7052, 0.9360],
        [1.0885, 0.7134, 0.9307],
        [1.0798, 0.7124, 0.9346],
        [1.0785, 0.7133, 0.9319],
        [1.0310, 0.7141, 0.9320],
        [1.0890, 0.7092, 0.9328],
        [1.0940, 0.7057, 0.9418],
        [1.1239, 0.7065, 0.9335],
        [1.0560, 0.7087, 0.9240],
        [1.0889, 0.7130, 0.9398]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[2.0304, 0.1141, 0.7947],
         [0.9177, 0.1531, 0.4808],
         [1.6089, 0.1732, 0.3737],
         ...,
         [1.7124, 0.7848, 1.2318],
         [0.9063, 0.8327, 1.2900],
         [1.3916, 0.8354, 1.2667]],

        [[0.7000, 0.1054, 0.7724],
         [0.7712, 0.2003, 0.4483],
         [0.6691, 0.2978, 0.5712],
         ...,
         [0.7593, 0.7682, 1.1169],
         [1.5475, 0.7159, 1.1815],
         [1.3073, 0.7804, 1.3099]],

        [[0.0946, 0.1092, 0.6930],
         [0.35


Train Diffusion:  50%|█████     | 1100/2200 [1:57:58<2:00:18,  6.56s/it][A
Train Diffusion:  50%|█████     | 1101/2200 [1:58:06<2:03:07,  6.72s/it][A
Train Diffusion:  50%|█████     | 1102/2200 [1:58:12<2:03:52,  6.77s/it][A
Train Diffusion:  50%|█████     | 1103/2200 [1:58:20<2:07:20,  6.96s/it][A
Train Diffusion:  50%|█████     | 1104/2200 [1:58:30<2:22:41,  7.81s/it][A
Train Diffusion:  50%|█████     | 1105/2200 [1:58:40<2:38:12,  8.67s/it][A
Train Diffusion:  50%|█████     | 1106/2200 [1:58:48<2:32:23,  8.36s/it][A
Train Diffusion:  50%|█████     | 1107/2200 [1:58:55<2:23:57,  7.90s/it][A
Train Diffusion:  50%|█████     | 1108/2200 [1:59:01<2:16:42,  7.51s/it][A
Train Diffusion:  50%|█████     | 1109/2200 [1:59:08<2:11:41,  7.24s/it][A
Train Diffusion:  50%|█████     | 1110/2200 [1:59:15<2:09:08,  7.11s/it][A
Train Diffusion:  50%|█████     | 1111/2200 [1:59:22<2:11:07,  7.22s/it][A
Train Diffusion:  51%|█████     | 1112/2200 [1:59:29<2:09:22,  7.13s/it][A
Train Diffu

Moving average ELBO loss at 1120 iterations is: 4251.472900390625. Best ELBO loss value is: 4233.5185546875.

C_PATH mean = tensor([[1.0794, 0.7117, 0.9538],
        [1.0862, 0.7088, 0.9552],
        [1.0782, 0.7108, 0.9415],
        [1.0875, 0.7099, 0.9455],
        [1.1083, 0.7116, 0.9497],
        [1.0879, 0.7105, 0.9395],
        [1.0334, 0.7088, 0.9314],
        [1.0736, 0.7064, 0.9569],
        [1.0638, 0.7106, 0.9475],
        [1.0998, 0.7058, 0.9605]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[0.1335, 0.1135, 0.7164],
         [0.5901, 0.2100, 0.5261],
         [0.4792, 0.2892, 0.4490],
         ...,
         [0.9551, 0.7563, 0.9356],
         [1.0160, 0.6950, 0.8902],
         [1.3084, 0.6812, 1.2444]],

        [[0.5019, 0.1038, 0.6965],
         [0.5575, 0.1941, 0.5207],
         [1.0465, 0.2554, 0.4423],
         ...,
         [1.0534, 0.7000, 0.9728],
         [1.4879, 0.7080, 1.2150],
         [1.8294, 0.7149, 1.5524]],

        [[0.6391, 0.0946, 0.8333],
         [0.


Train Diffusion:  51%|█████     | 1120/2200 [2:00:25<2:05:46,  6.99s/it][A
Train Diffusion:  51%|█████     | 1121/2200 [2:00:37<1:56:06,  6.46s/it][A


KeyboardInterrupt: 

In [None]:
torch.save(net, f'net_t_{t}_dt_{dt_flow}.pt')

In [None]:
def plot_post(x, obs_model, state_idx=0, num_samples=20,
              ymin=None, ymax=None):
    #net.eval()
    #x, _ = net(num_samples)
    #x0 = x0[(None,) * 2].repeat(num_samples, 1, 1)
    #x = torch.cat((x0, x), 1)
    
    q_mean, q_std = x[:, :, state_idx].mean(0).detach(), x[:, :, state_idx].std(0).detach()
    hours = torch.arange(0, t + dt, dt)
    plt.plot(hours, q_mean, label='Posterior mean')
    plt.fill_between(hours, q_mean - 2*q_std, q_mean + 2*q_std, alpha=0.5,
                     label='Posterior $\\mu \pm 2\sigma$')
    plt.plot(obs_model.times, obs_model.mu[state_idx, :], linestyle='None', marker='o',
             label='Observed')
    
    plt.legend()
    plt.xlabel('Hour')
    plt.ylabel(['SOC', 'DOC', 'MBC'][state_idx])
    plt.ylim((ymin, ymax))
    plt.title('Approximate posterior $q(x|\\theta, y)$\nNumber of samples = {}'.format(num_samples))