In [1]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from tqdm import tqdm
import math
from datetime import datetime

#Torch-related imports
import torch
import torch.distributions as D
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Function

#Model-specific imports
from SBM_SDE import *
from obs_and_flow import *
from training import *

In [2]:
torch.manual_seed(0)
devi = torch.device("".join(["cuda:",f'{cuda_id}']) if torch.cuda.is_available() else "cpu")

In [3]:
#Neural SDE parameters
dt_flow = 0.2
t = 380 #In hours.
n = int(t / dt_flow) + 1
t_span = np.linspace(0, t, n)
t_span_tensor = torch.reshape(torch.Tensor(t_span), [1, n, 1]) #T_span needs to be converted to tensor object. Additionally, facilitates conversion of I_S and I_D to tensor objects.
state_dim_SCON = 3 #Not including CO2 in STATE_DIM, because CO2 is an observation.
state_dim_SAWB = 4 #Not including CO2 in STATE_DIM, because CO2 is an observation.

In [4]:
#SBM temperature forcing parameters
temp_ref = 283
temp_rise = 5 #High estimate of 5 celsius temperature rise by 2100.

In [5]:
#Training parameters
niter = 8500
piter = 200
pretrain_lr = 1e-2 #Norm regularization learning rate
train_lr = 1e-3 #ELBO learning rate
batch_size = 5
obs_error_scale = 0.1 #Observation (y) standard deviation
num_layers = 5

In [6]:
#SBM prior means
#System parameters from deterministic CON model
u_M = 0.0009
a_SD = 0.5
a_DS = 0.5
a_M = 0.5
a_MSC = 0.5
k_S_ref = 0.0001
k_D_ref = 0.001
k_M_ref = 0.00022
Ea_S = 55
Ea_D = 48
Ea_M = 48

#SCON diffusion matrix parameters
c_SOC = 0.05
c_DOC = 0.001
c_MBC = 0.0005

SCON_C_params_dict = {'u_M': u_M, 'a_SD': a_SD, 'a_DS': a_DS, 'a_M': a_M, 'a_MSC': a_MSC, 'k_S_ref': k_S_ref, 'k_D_ref': k_D_ref, 'k_M_ref': k_M_ref, 'Ea_S': Ea_S, 'Ea_D': Ea_D, 'Ea_M': Ea_M, 'c_SOC': c_SOC, 'c_DOC': c_DOC, 'c_MBC': c_MBC}

In [7]:
#Initial condition prior means
x0_SCON = [65, 0.4, 2.5]
x0_SCON_tensor = torch.tensor(x0_SCON)
x0_prior_SCON = D.multivariate_normal.MultivariateNormal(x0_SCON_tensor,
                                                         scale_tril=torch.eye(state_dim_SCON) * obs_error_scale * x0_SCON_tensor)

In [8]:
#Generate exogenous input vectors.
#Obtain temperature forcing function.
temp_tensor = temp_gen(t_span_tensor, temp_ref, temp_rise)

#Obtain SOC and DOC pool litter input vectors for use in flow SDE functions.
i_s_tensor = i_s(t_span_tensor) #Exogenous SOC input function
i_d_tensor = i_d(t_span_tensor) #Exogenous DOC input function

In [None]:
#Call training loop function for SCON-C.
net, ELBO_hist = train(devi, pretrain_lr, train_lr, niter, piter, batch_size, num_layers,
          state_dim_SCON, 'y_from_x_t_5000_dt_0-01.csv', obs_error_scale, t, dt_flow, n, 
          t_span_tensor, i_s_tensor, i_d_tensor, temp_tensor, temp_ref,
          drift_diffusion_SCON_C, x0_prior_SCON, SCON_C_params_dict,
          LEARN_PARAMS = False, LR_DECAY = 0.1, DECAY_STEP_SIZE = 1000, PRINT_EVERY = 10)


Train Diffusion:   0%|          | 0/8500 [00:00<?, ?it/s][A
Train Diffusion:   0%|          | 1/8500 [00:03<9:15:16,  3.92s/it][A
Train Diffusion:   0%|          | 2/8500 [00:07<9:15:50,  3.92s/it][A
Train Diffusion:   0%|          | 3/8500 [00:11<8:46:42,  3.72s/it][A
Train Diffusion:   0%|          | 4/8500 [00:15<9:05:27,  3.85s/it][A
Train Diffusion:   0%|          | 5/8500 [00:18<8:50:54,  3.75s/it][A
Train Diffusion:   0%|          | 6/8500 [00:22<8:44:03,  3.70s/it][A
Train Diffusion:   0%|          | 7/8500 [00:26<8:55:51,  3.79s/it][A
Train Diffusion:   0%|          | 8/8500 [00:30<8:45:22,  3.71s/it][A
Train Diffusion:   0%|          | 9/8500 [00:34<8:57:13,  3.80s/it][A

Moving average norm loss at 10 iterations is: 574289.40625. Best norm loss value is: 569144.0.

C_PATH mean = tensor([[1.4580, 1.4391, 1.4446],
        [1.4558, 1.4308, 1.4476],
        [1.4609, 1.4371, 1.4465],
        [1.4580, 1.4330, 1.4540],
        [1.4595, 1.4429, 1.4484]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[2.8990, 2.6534, 2.0441],
         [1.9403, 1.9199, 2.4904],
         [2.8834, 1.8115, 2.7542],
         ...,
         [1.3151, 1.3382, 1.3012],
         [1.1771, 1.8702, 1.3909],
         [1.3950, 1.5578, 1.3693]],

        [[1.8498, 2.9279, 2.7291],
         [2.5502, 2.5542, 1.6569],
         [1.8473, 2.1055, 2.4058],
         ...,
         [1.0129, 1.6903, 1.2076],
         [1.3562, 1.1230, 1.1481],
         [1.5341, 1.4143, 1.2733]],

        [[1.4698, 2.9151, 2.3121],
         [2.2997, 2.4588, 2.3552],
         [2.0502, 2.3390, 2.1276],
         ...,
         [1.2437, 1.2078, 1.7167],
         [1.7339, 1.6161, 1.6266],
         [1.0161, 1.1537, 1.1427]],

      


Train Diffusion:   0%|          | 10/8500 [00:37<8:52:57,  3.77s/it][A
Train Diffusion:   0%|          | 11/8500 [00:42<9:14:48,  3.92s/it][A
Train Diffusion:   0%|          | 12/8500 [00:46<9:52:17,  4.19s/it][A
Train Diffusion:   0%|          | 13/8500 [00:50<9:44:01,  4.13s/it][A
Train Diffusion:   0%|          | 14/8500 [00:54<9:40:24,  4.10s/it][A
Train Diffusion:   0%|          | 15/8500 [00:58<9:17:14,  3.94s/it][A
Train Diffusion:   0%|          | 16/8500 [01:02<9:19:37,  3.96s/it][A
Train Diffusion:   0%|          | 17/8500 [01:06<9:07:26,  3.87s/it][A
Train Diffusion:   0%|          | 18/8500 [01:09<8:56:10,  3.79s/it][A
Train Diffusion:   0%|          | 19/8500 [01:13<9:01:56,  3.83s/it][A

Moving average norm loss at 20 iterations is: 568562.7875. Best norm loss value is: 566608.75.

C_PATH mean = tensor([[1.9186, 1.7892, 1.8502],
        [1.9032, 1.7911, 1.8578],
        [1.9100, 1.8090, 1.8476],
        [1.9115, 1.8099, 1.8594],
        [1.8942, 1.7964, 1.8472]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[2.3649, 2.8337, 1.5357],
         [1.5932, 2.2296, 1.6302],
         [2.0676, 1.8597, 1.8569],
         ...,
         [2.1487, 2.1943, 1.3981],
         [1.1468, 2.1185, 2.5933],
         [2.2006, 1.9887, 1.6791]],

        [[2.0965, 2.4690, 2.1772],
         [2.0745, 1.5281, 1.5711],
         [2.3918, 1.7500, 1.6617],
         ...,
         [2.2539, 1.7611, 2.4230],
         [2.8070, 1.8475, 1.8656],
         [1.2211, 2.0449, 2.1457]],

        [[1.4306, 2.4641, 1.9059],
         [2.0308, 2.0291, 1.2378],
         [1.9717, 1.4657, 1.5392],
         ...,
         [0.7752, 1.1721, 2.0339],
         [1.9704, 1.8947, 1.3108],
         [2.7678, 1.5360, 2.3439]],

      


Train Diffusion:   0%|          | 20/8500 [01:17<8:55:16,  3.79s/it][A
Train Diffusion:   0%|          | 21/8500 [01:21<9:14:58,  3.93s/it][A
Train Diffusion:   0%|          | 22/8500 [01:25<9:02:01,  3.84s/it][A
Train Diffusion:   0%|          | 23/8500 [01:29<9:06:09,  3.87s/it][A
Train Diffusion:   0%|          | 24/8500 [01:33<9:07:12,  3.87s/it][A
Train Diffusion:   0%|          | 25/8500 [01:36<8:56:37,  3.80s/it][A
Train Diffusion:   0%|          | 26/8500 [01:40<9:09:59,  3.89s/it][A
Train Diffusion:   0%|          | 27/8500 [01:44<8:49:56,  3.75s/it][A
Train Diffusion:   0%|          | 28/8500 [01:47<8:51:04,  3.76s/it][A
Train Diffusion:   0%|          | 29/8500 [01:51<8:51:20,  3.76s/it][A

Moving average norm loss at 30 iterations is: 565815.5. Best norm loss value is: 563443.5.

C_PATH mean = tensor([[2.0906, 1.6709, 1.8479],
        [2.1489, 1.6301, 1.8447],
        [2.1403, 1.6395, 1.8401],
        [2.0982, 1.6402, 1.8459],
        [2.1606, 1.6210, 1.8421]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.9434, 2.2463, 1.7396],
         [2.0603, 1.2682, 1.5285],
         [3.4244, 1.3291, 1.0841],
         ...,
         [1.3485, 1.5695, 1.6319],
         [1.5125, 1.2967, 1.4842],
         [2.1552, 1.4722, 1.4959]],

        [[1.9434, 2.4003, 1.8146],
         [1.7152, 1.8236, 1.7452],
         [1.8806, 1.6357, 1.9470],
         ...,
         [3.1295, 1.3159, 1.1085],
         [2.5975, 1.2016, 1.8479],
         [2.5548, 1.6391, 1.4279]],

        [[1.9434, 2.1659, 1.7146],
         [2.1757, 1.8818, 1.9624],
         [1.5828, 0.9405, 1.3300],
         ...,
         [1.9669, 1.2493, 1.6792],
         [2.0777, 1.3105, 1.5040],
         [1.4136, 1.6195, 1.6728]],

        [[


Train Diffusion:   0%|          | 30/8500 [01:55<9:08:20,  3.88s/it][A
Train Diffusion:   0%|          | 31/8500 [02:00<9:21:29,  3.98s/it][A
Train Diffusion:   0%|          | 32/8500 [02:03<9:07:13,  3.88s/it][A
Train Diffusion:   0%|          | 33/8500 [02:07<9:04:32,  3.86s/it][A
Train Diffusion:   0%|          | 34/8500 [02:11<9:07:24,  3.88s/it][A
Train Diffusion:   0%|          | 35/8500 [02:15<8:54:40,  3.79s/it][A
Train Diffusion:   0%|          | 36/8500 [02:19<9:08:34,  3.89s/it][A
Train Diffusion:   0%|          | 37/8500 [02:22<8:57:48,  3.81s/it][A
Train Diffusion:   0%|          | 38/8500 [02:26<9:00:20,  3.83s/it][A
Train Diffusion:   0%|          | 39/8500 [02:30<8:58:47,  3.82s/it][A

Moving average norm loss at 40 iterations is: 535530.890625. Best norm loss value is: 480310.0.

C_PATH mean = tensor([[13.4510,  1.5192,  2.5448],
        [13.3805,  1.5911,  2.5429],
        [13.8865,  1.5638,  2.7061],
        [12.7037,  1.5709,  2.5099],
        [13.4639,  1.5091,  2.3862]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[2.4449e+00, 1.5668e+00, 1.8265e+00],
         [1.4555e+00, 5.1729e-01, 2.3623e+00],
         [3.5665e+01, 1.0089e-01, 2.2681e+00],
         ...,
         [1.7032e+01, 1.4393e+00, 2.9367e+00],
         [6.3014e+00, 1.0100e+00, 1.3088e+00],
         [1.6182e+01, 1.2049e+00, 9.6231e-01]],

        [[2.7243e-01, 7.2164e-01, 1.8297e+00],
         [4.3650e-01, 2.6091e-01, 2.1828e+00],
         [1.1363e+01, 9.9377e-01, 5.4330e-01],
         ...,
         [1.8393e+01, 9.4403e-01, 1.0504e+00],
         [8.0683e-01, 4.8561e-01, 1.0386e+01],
         [4.1662e+00, 3.5181e+00, 2.0807e+00]],

        [[3.0143e+00, 1.6955e+00, 1.7377e+00],
         [1.0050e+01, 2.6


Train Diffusion:   0%|          | 40/8500 [02:34<8:48:49,  3.75s/it][A
Train Diffusion:   0%|          | 41/8500 [02:37<8:56:05,  3.80s/it][A
Train Diffusion:   0%|          | 42/8500 [02:41<8:41:40,  3.70s/it][A
Train Diffusion:   1%|          | 43/8500 [02:45<8:48:12,  3.75s/it][A
Train Diffusion:   1%|          | 44/8500 [02:48<8:40:26,  3.69s/it][A
Train Diffusion:   1%|          | 45/8500 [02:52<8:28:53,  3.61s/it][A
Train Diffusion:   1%|          | 46/8500 [02:56<8:49:06,  3.76s/it][A
Train Diffusion:   1%|          | 47/8500 [03:01<9:41:55,  4.13s/it][A
Train Diffusion:   1%|          | 48/8500 [03:06<10:19:16,  4.40s/it][A
Train Diffusion:   1%|          | 49/8500 [03:10<10:14:25,  4.36s/it][A

Moving average norm loss at 50 iterations is: 372979.81875. Best norm loss value is: 313518.125.

C_PATH mean = tensor([[41.2765,  3.2001,  4.0570],
        [42.3890,  2.9380,  4.0197],
        [40.9889,  3.3173,  4.1396],
        [41.2398,  2.8646,  4.0797],
        [41.8027,  3.0796,  4.0441]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.2296e+01, 1.9625e+00, 7.2564e+00],
         [4.2278e+01, 9.1582e+00, 3.4928e+00],
         [6.5732e+01, 2.5395e+00, 1.3966e+00],
         ...,
         [2.3219e-05, 2.5544e+01, 3.5284e+00],
         [8.4726e+01, 9.3368e-01, 1.5674e+00],
         [1.0000e-06, 8.1149e-01, 9.4808e+00]],

        [[1.0962e-04, 1.6778e+01, 2.4797e+00],
         [6.3832e+01, 6.1761e+00, 7.3894e-01],
         [4.2875e+01, 1.2174e+00, 8.1096e+00],
         ...,
         [6.9591e+01, 7.0540e-01, 1.3283e+00],
         [5.9737e+01, 2.1854e+00, 1.2094e+00],
         [5.1335e+01, 8.3833e-01, 4.4450e+00]],

        [[1.4193e+01, 5.8329e+00, 2.3145e+00],
         [3.0137e+01, 2.


Train Diffusion:   1%|          | 50/8500 [03:14<10:11:44,  4.34s/it][A
Train Diffusion:   1%|          | 51/8500 [03:19<10:24:11,  4.43s/it][A
Train Diffusion:   1%|          | 52/8500 [03:23<9:56:58,  4.24s/it] [A
Train Diffusion:   1%|          | 53/8500 [03:27<9:53:43,  4.22s/it][A
Train Diffusion:   1%|          | 54/8500 [03:31<9:30:24,  4.05s/it][A
Train Diffusion:   1%|          | 55/8500 [03:34<9:14:54,  3.94s/it][A
Train Diffusion:   1%|          | 56/8500 [03:38<9:10:28,  3.91s/it][A
Train Diffusion:   1%|          | 57/8500 [03:42<8:56:46,  3.81s/it][A
Train Diffusion:   1%|          | 58/8500 [03:46<9:01:22,  3.85s/it][A
Train Diffusion:   1%|          | 59/8500 [03:49<8:45:28,  3.74s/it][A

Moving average norm loss at 60 iterations is: 260993.0328125. Best norm loss value is: 230486.40625.

C_PATH mean = tensor([[42.9534,  2.5045,  3.0711],
        [44.0542,  2.4623,  2.9025],
        [42.6711,  2.4330,  3.0360],
        [43.9402,  2.3845,  2.8885],
        [43.0472,  2.4402,  3.0423]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.2500e+01, 2.5035e+01, 2.0872e+01],
         [5.2804e+01, 8.3518e+00, 2.8103e+00],
         [6.6998e+01, 6.8116e-01, 1.6069e+00],
         ...,
         [4.8740e+01, 1.8487e+00, 1.1152e-01],
         [5.0643e+01, 7.6453e-01, 5.2234e+00],
         [6.3882e+01, 3.8242e-01, 1.8322e+00]],

        [[3.1359e-06, 2.4834e+01, 5.7165e+00],
         [6.0586e+01, 1.0061e-01, 2.8340e+00],
         [5.2868e+01, 5.3561e+00, 1.3851e+00],
         ...,
         [6.6393e+01, 2.4493e-01, 1.3139e+00],
         [1.0004e-06, 1.8530e+00, 8.2555e-01],
         [1.9953e+00, 3.4146e+00, 1.3498e+01]],

        [[1.5426e+01, 3.9063e+00, 1.5742e+00],
         [5.9744e+01


Train Diffusion:   1%|          | 60/8500 [03:53<8:45:13,  3.73s/it][A
Train Diffusion:   1%|          | 61/8500 [03:57<8:43:16,  3.72s/it][A
Train Diffusion:   1%|          | 62/8500 [04:00<8:39:45,  3.70s/it][A
Train Diffusion:   1%|          | 63/8500 [04:05<9:12:06,  3.93s/it][A
Train Diffusion:   1%|          | 64/8500 [04:09<9:33:34,  4.08s/it][A
Train Diffusion:   1%|          | 65/8500 [04:13<9:39:45,  4.12s/it][A
Train Diffusion:   1%|          | 66/8500 [04:17<9:30:16,  4.06s/it][A
Train Diffusion:   1%|          | 67/8500 [04:21<9:13:10,  3.94s/it][A
Train Diffusion:   1%|          | 68/8500 [04:25<9:05:30,  3.88s/it][A
Train Diffusion:   1%|          | 69/8500 [04:28<8:45:57,  3.74s/it][A

Moving average norm loss at 70 iterations is: 205329.1609375. Best norm loss value is: 187409.625.

C_PATH mean = tensor([[46.3855,  1.0057,  2.5440],
        [46.0753,  0.9696,  2.4457],
        [44.8000,  0.9545,  2.5593],
        [45.4852,  0.9456,  2.4815],
        [46.4585,  0.9449,  2.5578]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.7377e+01, 8.0013e+00, 1.6136e+00],
         [6.8240e+01, 4.7724e-01, 1.9789e+00],
         [5.9543e+01, 4.8659e-01, 1.9309e+00],
         ...,
         [1.3379e-06, 1.3581e+00, 1.3766e+00],
         [1.0003e-06, 1.3571e+00, 2.0517e+00],
         [6.0314e+01, 7.3243e-01, 1.9968e+00]],

        [[1.7352e-06, 1.8232e+01, 2.9132e+00],
         [6.0221e+01, 3.2473e-01, 2.0695e+00],
         [6.3640e+01, 3.9841e-01, 1.8922e+00],
         ...,
         [6.1772e+01, 4.8222e-01, 1.8219e+00],
         [5.6683e+01, 4.5818e-01, 1.9415e+00],
         [6.1294e+01, 5.1037e-01, 1.9894e+00]],

        [[1.6346e+01, 1.7302e+00, 5.4269e+00],
         [1.2307e-06, 


Train Diffusion:   1%|          | 70/8500 [04:32<9:04:55,  3.88s/it][A
Train Diffusion:   1%|          | 71/8500 [04:36<8:57:27,  3.83s/it][A
Train Diffusion:   1%|          | 72/8500 [04:40<8:57:47,  3.83s/it][A
Train Diffusion:   1%|          | 73/8500 [04:44<9:02:07,  3.86s/it][A
Train Diffusion:   1%|          | 74/8500 [04:48<8:59:30,  3.84s/it][A
Train Diffusion:   1%|          | 75/8500 [04:52<9:29:16,  4.05s/it][A
Train Diffusion:   1%|          | 76/8500 [04:56<9:15:36,  3.96s/it][A
Train Diffusion:   1%|          | 77/8500 [05:00<9:21:09,  4.00s/it][A
Train Diffusion:   1%|          | 78/8500 [05:04<9:26:08,  4.03s/it][A
Train Diffusion:   1%|          | 79/8500 [05:09<10:06:55,  4.32s/it][A

Moving average norm loss at 80 iterations is: 175131.9765625. Best norm loss value is: 158369.34375.

C_PATH mean = tensor([[47.1216,  0.8734,  2.1568],
        [46.8548,  0.9342,  2.1653],
        [47.7616,  0.9145,  2.1086],
        [46.2840,  0.8780,  2.1126],
        [47.1061,  0.8719,  2.1426]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.3196e-05, 1.4104e+01, 1.7761e+00],
         [1.0020e-06, 7.7403e-01, 1.9300e+00],
         [5.8078e+01, 7.6609e-01, 1.8365e+00],
         ...,
         [5.8094e+01, 6.6743e-01, 1.7927e+00],
         [1.0093e-06, 6.8798e-01, 1.6997e+00],
         [6.1766e+01, 8.1025e-01, 1.6733e+00]],

        [[5.2787e+00, 1.8766e+01, 1.0388e+01],
         [5.2183e+01, 5.5743e+00, 1.7517e+00],
         [6.1888e+01, 5.4353e-01, 1.9969e+00],
         ...,
         [1.0000e-06, 7.3272e-01, 1.9552e+00],
         [6.1075e+01, 5.9755e-01, 2.0039e+00],
         [4.9039e+01, 9.6312e-01, 1.5423e+00]],

        [[2.1573e+01, 5.6769e+00, 1.8209e+00],
         [6.9388e+01


Train Diffusion:   1%|          | 80/8500 [05:14<10:49:37,  4.63s/it][A
Train Diffusion:   1%|          | 81/8500 [05:20<11:16:47,  4.82s/it][A
Train Diffusion:   1%|          | 82/8500 [05:24<11:11:31,  4.79s/it][A
Train Diffusion:   1%|          | 83/8500 [05:28<10:38:42,  4.55s/it][A
Train Diffusion:   1%|          | 84/8500 [05:34<11:03:34,  4.73s/it][A
Train Diffusion:   1%|          | 85/8500 [05:37<10:20:35,  4.42s/it][A
Train Diffusion:   1%|          | 86/8500 [05:41<9:48:49,  4.20s/it] [A
Train Diffusion:   1%|          | 87/8500 [05:45<9:26:02,  4.04s/it][A
Train Diffusion:   1%|          | 88/8500 [05:48<8:57:57,  3.84s/it][A
Train Diffusion:   1%|          | 89/8500 [05:52<8:55:57,  3.82s/it][A

Moving average norm loss at 90 iterations is: 155281.3734375. Best norm loss value is: 151619.953125.

C_PATH mean = tensor([[46.1461,  1.0504,  2.1604],
        [45.5102,  1.0932,  2.2521],
        [46.4269,  0.9806,  2.1316],
        [46.9767,  1.0701,  2.2231],
        [47.8309,  1.0392,  2.2019]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.3883e-05, 1.1765e+01, 1.9177e+00],
         [5.8856e+01, 7.5910e-01, 1.9279e+00],
         [6.6704e+01, 7.5972e-01, 1.8887e+00],
         ...,
         [6.0175e+01, 8.0393e-01, 1.7207e+00],
         [5.8727e+01, 8.0391e-01, 1.9147e+00],
         [5.9624e+01, 8.0341e-01, 1.8617e+00]],

        [[2.2187e+01, 5.3646e+00, 1.8394e+00],
         [5.8442e+01, 8.1604e-01, 1.9099e+00],
         [5.8223e+01, 8.4428e-01, 1.8982e+00],
         ...,
         [6.0099e+01, 8.0778e-01, 1.8885e+00],
         [2.8561e-06, 8.2299e-01, 2.0059e+00],
         [1.0126e-06, 8.0140e-01, 1.9934e+00]],

        [[1.9600e+01, 8.6379e+00, 1.9988e+00],
         [5.9187e+0


Train Diffusion:   1%|          | 90/8500 [05:55<8:39:13,  3.70s/it][A
Train Diffusion:   1%|          | 91/8500 [05:59<8:35:09,  3.68s/it][A
Train Diffusion:   1%|          | 92/8500 [06:03<8:36:13,  3.68s/it][A
Train Diffusion:   1%|          | 93/8500 [06:06<8:23:19,  3.59s/it][A
Train Diffusion:   1%|          | 94/8500 [06:10<8:34:05,  3.67s/it][A
Train Diffusion:   1%|          | 95/8500 [06:13<8:22:06,  3.58s/it][A
Train Diffusion:   1%|          | 96/8500 [06:17<8:22:40,  3.59s/it][A
Train Diffusion:   1%|          | 97/8500 [06:21<8:31:59,  3.66s/it][A
Train Diffusion:   1%|          | 98/8500 [06:24<8:41:40,  3.73s/it][A
Train Diffusion:   1%|          | 99/8500 [06:28<8:51:58,  3.80s/it][A

Moving average norm loss at 100 iterations is: 147563.34375. Best norm loss value is: 141184.421875.

C_PATH mean = tensor([[47.1336,  0.9706,  2.0969],
        [47.5956,  0.9115,  1.9986],
        [47.4027,  0.9421,  2.0511],
        [47.2443,  0.9504,  2.1063],
        [47.4724,  0.9966,  2.0422]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[2.0025e+01, 6.4115e+00, 1.7603e+00],
         [2.6521e-02, 7.7603e-01, 1.9005e+00],
         [6.0878e+01, 5.2242e-01, 1.8197e+00],
         ...,
         [1.0000e-06, 7.1293e-01, 1.8148e+00],
         [6.1224e+01, 7.3418e-01, 1.8314e+00],
         [5.9524e+01, 7.6168e-01, 1.8586e+00]],

        [[1.4300e-06, 1.2662e+01, 2.0073e+00],
         [6.9921e+01, 7.0282e-01, 1.8714e+00],
         [1.6744e-06, 7.5782e-01, 1.7365e+00],
         ...,
         [6.0295e+01, 7.5557e-01, 1.8145e+00],
         [5.4049e-04, 7.5129e-01, 1.8299e+00],
         [6.0261e+01, 8.4281e-01, 1.8093e+00]],

        [[2.1317e+01, 9.6055e+00, 1.8372e+00],
         [6.8721e+01


Train Diffusion:   1%|          | 100/8500 [06:32<8:37:21,  3.70s/it][A
Train Diffusion:   1%|          | 101/8500 [06:35<8:32:31,  3.66s/it][A
Train Diffusion:   1%|          | 102/8500 [06:39<8:35:57,  3.69s/it][A
Train Diffusion:   1%|          | 103/8500 [06:43<8:22:47,  3.59s/it][A
Train Diffusion:   1%|          | 104/8500 [06:47<8:52:17,  3.80s/it][A
Train Diffusion:   1%|          | 105/8500 [06:51<8:45:10,  3.75s/it][A
Train Diffusion:   1%|          | 106/8500 [06:54<8:39:49,  3.72s/it][A
Train Diffusion:   1%|▏         | 107/8500 [06:58<8:35:50,  3.69s/it][A
Train Diffusion:   1%|▏         | 108/8500 [07:01<8:23:07,  3.60s/it][A
Train Diffusion:   1%|▏         | 109/8500 [07:05<8:31:35,  3.66s/it][A

Moving average norm loss at 110 iterations is: 142326.4640625. Best norm loss value is: 139971.40625.

C_PATH mean = tensor([[48.6903,  1.1633,  1.9428],
        [48.4003,  1.1483,  2.0146],
        [46.6544,  1.1546,  1.9492],
        [48.2962,  1.2051,  1.9393],
        [47.3200,  1.1751,  2.0306]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[2.1493e+01, 7.1302e+00, 1.6731e+00],
         [3.2777e+01, 9.5909e-01, 4.7462e+00],
         [2.8257e+01, 2.5549e+00, 1.7408e+00],
         ...,
         [1.7985e-06, 5.8406e-01, 1.8827e+00],
         [5.8524e+01, 8.2878e-01, 1.6794e+00],
         [6.0454e+01, 8.9601e-01, 1.7001e+00]],

        [[2.1388e+01, 3.3540e+00, 1.6705e+00],
         [7.1919e-01, 9.1107e-01, 1.7112e+00],
         [5.8825e+01, 8.4640e-01, 1.6508e+00],
         ...,
         [5.7659e+01, 8.4596e-01, 1.6456e+00],
         [7.3189e+01, 8.8332e-01, 1.6745e+00],
         [6.1979e+01, 9.8168e-01, 1.6864e+00]],

        [[2.3619e-06, 1.0929e+01, 1.7822e+00],
         [6.6551e+0


Train Diffusion:   1%|▏         | 110/8500 [07:08<8:21:47,  3.59s/it][A
Train Diffusion:   1%|▏         | 111/8500 [07:12<8:19:46,  3.57s/it][A
Train Diffusion:   1%|▏         | 112/8500 [07:16<8:22:31,  3.59s/it][A
Train Diffusion:   1%|▏         | 113/8500 [07:19<8:16:02,  3.55s/it][A
Train Diffusion:   1%|▏         | 114/8500 [07:23<8:33:20,  3.67s/it][A
Train Diffusion:   1%|▏         | 115/8500 [07:27<8:48:21,  3.78s/it][A
Train Diffusion:   1%|▏         | 116/8500 [07:31<8:52:25,  3.81s/it][A
Train Diffusion:   1%|▏         | 117/8500 [07:35<8:58:53,  3.86s/it][A
Train Diffusion:   1%|▏         | 118/8500 [07:38<8:42:21,  3.74s/it][A
Train Diffusion:   1%|▏         | 119/8500 [07:42<8:34:48,  3.69s/it][A

Moving average norm loss at 120 iterations is: 130910.0109375. Best norm loss value is: 123126.4921875.

C_PATH mean = tensor([[48.7706,  0.7759,  2.3884],
        [49.0315,  0.8204,  2.5411],
        [48.2695,  0.7550,  2.4501],
        [49.5885,  0.7337,  2.3495],
        [48.1729,  0.8157,  2.4383]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[2.4882e+01, 6.0881e+00, 2.2916e+00],
         [6.2619e+01, 6.8170e-01, 2.2078e+00],
         [9.4323e+00, 6.2000e-01, 2.0740e+00],
         ...,
         [5.3273e+00, 6.0557e-01, 2.0348e+00],
         [5.7074e+01, 5.5049e-01, 2.2588e+00],
         [5.8901e+01, 5.9192e-01, 2.3435e+00]],

        [[2.4104e+01, 5.5387e+00, 2.3502e+00],
         [6.2359e+01, 6.8251e-01, 2.4125e+00],
         [6.0148e+01, 6.2869e-01, 2.3101e+00],
         ...,
         [5.9168e+01, 6.0812e-01, 2.2782e+00],
         [6.1212e+01, 6.0525e-01, 2.3368e+00],
         [1.6939e+01, 6.3052e-01, 2.0754e+00]],

        [[2.4726e+01, 5.6336e+00, 2.2959e+00],
         [1.6100e


Train Diffusion:   1%|▏         | 120/8500 [07:45<8:32:02,  3.67s/it][A
Train Diffusion:   1%|▏         | 121/8500 [07:49<8:19:47,  3.58s/it][A
Train Diffusion:   1%|▏         | 122/8500 [07:53<8:29:56,  3.65s/it][A
Train Diffusion:   1%|▏         | 123/8500 [07:56<8:18:11,  3.57s/it][A
Train Diffusion:   1%|▏         | 124/8500 [08:00<8:17:24,  3.56s/it][A
Train Diffusion:   1%|▏         | 125/8500 [08:03<8:19:20,  3.58s/it][A
Train Diffusion:   1%|▏         | 126/8500 [08:07<8:12:23,  3.53s/it][A
Train Diffusion:   1%|▏         | 127/8500 [08:11<8:26:32,  3.63s/it][A
Train Diffusion:   2%|▏         | 128/8500 [08:14<8:16:20,  3.56s/it][A
Train Diffusion:   2%|▏         | 129/8500 [08:17<8:15:08,  3.55s/it][A

Moving average norm loss at 130 iterations is: 85969.61796875. Best norm loss value is: 48516.9921875.

C_PATH mean = tensor([[55.7737,  1.0630,  2.0294],
        [56.0445,  0.9912,  1.9361],
        [56.0641,  0.9771,  1.9419],
        [56.2661,  0.9750,  1.9059],
        [55.8665,  0.9791,  1.9550]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[19.5404,  9.3674,  4.0133],
         [58.1261,  0.7944,  1.6819],
         [60.0598,  0.8131,  1.6662],
         ...,
         [30.7436,  3.8350,  1.7818],
         [59.6826,  0.7908,  1.7074],
         [59.7708,  0.7823,  1.5402]],

        [[22.0627,  6.4463,  1.6350],
         [59.5855,  0.8712,  1.3747],
         [58.4625,  0.7079,  1.6476],
         ...,
         [52.0889,  2.9812,  6.5979],
         [20.2101,  1.2479,  5.2389],
         [48.2475,  0.7514,  1.7636]],

        [[ 6.0353,  8.2186,  1.7841],
         [60.5615,  0.8911,  1.6736],
         [59.9968,  0.8123,  1.6680],
         ...,
         [60.5950,  1.0497,  1.7085],
       


Train Diffusion:   2%|▏         | 130/8500 [08:21<8:24:47,  3.62s/it][A
Train Diffusion:   2%|▏         | 131/8500 [08:25<8:14:50,  3.55s/it][A
Train Diffusion:   2%|▏         | 132/8500 [08:28<8:24:18,  3.62s/it][A
Train Diffusion:   2%|▏         | 133/8500 [08:32<8:19:15,  3.58s/it][A
Train Diffusion:   2%|▏         | 134/8500 [08:36<8:27:10,  3.64s/it][A
Train Diffusion:   2%|▏         | 135/8500 [08:39<8:29:27,  3.65s/it][A
Train Diffusion:   2%|▏         | 136/8500 [08:43<8:24:14,  3.62s/it][A
Train Diffusion:   2%|▏         | 137/8500 [08:46<8:14:57,  3.55s/it][A
Train Diffusion:   2%|▏         | 138/8500 [08:50<8:24:46,  3.62s/it][A
Train Diffusion:   2%|▏         | 139/8500 [08:53<8:14:39,  3.55s/it][A

Moving average norm loss at 140 iterations is: 39728.683984375. Best norm loss value is: 30950.0.

C_PATH mean = tensor([[61.6148,  1.1355,  2.1569],
        [61.5635,  1.2368,  2.0525],
        [61.7166,  1.1122,  2.1178],
        [61.6430,  1.1466,  2.0794],
        [61.5145,  1.2528,  2.1104]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[23.9683,  6.7339,  1.6793],
         [60.6346,  0.9162,  1.7980],
         [62.6770,  0.9990,  1.8664],
         ...,
         [61.4368,  1.0035,  1.7217],
         [62.8355,  0.9755,  1.6892],
         [59.8989,  1.0724,  1.6378]],

        [[15.2186,  8.2225,  1.6491],
         [65.9715,  1.1989,  1.7636],
         [62.1091,  0.9943,  1.8447],
         ...,
         [64.3835,  0.8900,  1.7460],
         [59.1602,  2.0617,  1.6337],
         [64.3304,  1.0333,  1.7054]],

        [[23.8626,  6.8081,  1.7037],
         [61.1814,  1.0342,  1.7203],
         [61.1261,  0.9349,  1.7363],
         ...,
         [60.1506,  0.9577,  1.6293],
         [65


Train Diffusion:   2%|▏         | 140/8500 [08:57<8:19:37,  3.59s/it][A
Train Diffusion:   2%|▏         | 141/8500 [09:01<8:18:55,  3.58s/it][A
Train Diffusion:   2%|▏         | 142/8500 [09:04<8:10:53,  3.52s/it][A
Train Diffusion:   2%|▏         | 143/8500 [09:08<8:22:15,  3.61s/it][A
Train Diffusion:   2%|▏         | 144/8500 [09:11<8:11:42,  3.53s/it][A
Train Diffusion:   2%|▏         | 145/8500 [09:15<8:16:29,  3.57s/it][A
Train Diffusion:   2%|▏         | 146/8500 [09:19<8:23:11,  3.61s/it][A
Train Diffusion:   2%|▏         | 147/8500 [09:22<8:14:17,  3.55s/it][A
Train Diffusion:   2%|▏         | 148/8500 [09:26<8:22:57,  3.61s/it][A
Train Diffusion:   2%|▏         | 149/8500 [09:29<8:15:10,  3.56s/it][A

Moving average norm loss at 150 iterations is: 23799.16689453125. Best norm loss value is: 15495.533203125.

C_PATH mean = tensor([[59.5125,  0.8797,  1.9798],
        [59.6688,  0.9019,  1.9015],
        [59.6036,  0.8893,  1.9473],
        [59.5691,  0.9226,  1.9548],
        [59.3469,  0.9552,  2.0899]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[20.1835,  8.9861,  1.7200],
         [60.8990,  0.8201,  1.4846],
         [60.2568,  0.6755,  1.7403],
         ...,
         [37.9614,  1.0905,  1.7216],
         [60.1334,  0.7257,  1.8558],
         [60.8575,  0.7371,  1.6776]],

        [[18.5582,  9.6046,  2.2785],
         [62.4922,  0.6712,  1.6778],
         [60.4848,  0.7508,  1.4376],
         ...,
         [60.9015,  0.7589,  1.8586],
         [60.3771,  0.7570,  1.4853],
         [60.4068,  0.8016,  1.9175]],

        [[20.1831,  8.8804,  1.5965],
         [61.1767,  0.8166,  2.1250],
         [59.6789,  0.7222,  1.7607],
         ...,
         [56.9941,  0.7258,  1.9994],
  


Train Diffusion:   2%|▏         | 150/8500 [09:33<8:16:58,  3.57s/it][A
Train Diffusion:   2%|▏         | 151/8500 [09:36<8:21:37,  3.60s/it][A
Train Diffusion:   2%|▏         | 152/8500 [09:40<8:37:07,  3.72s/it][A
Train Diffusion:   2%|▏         | 153/8500 [09:44<8:51:24,  3.82s/it][A
Train Diffusion:   2%|▏         | 154/8500 [09:49<9:10:21,  3.96s/it][A
Train Diffusion:   2%|▏         | 155/8500 [09:53<9:05:49,  3.92s/it][A
Train Diffusion:   2%|▏         | 156/8500 [09:56<9:03:54,  3.91s/it][A
Train Diffusion:   2%|▏         | 157/8500 [10:00<9:06:40,  3.93s/it][A
Train Diffusion:   2%|▏         | 158/8500 [10:04<9:00:58,  3.89s/it][A
Train Diffusion:   2%|▏         | 159/8500 [10:08<9:01:39,  3.90s/it][A

Moving average norm loss at 160 iterations is: 21903.918359375. Best norm loss value is: 15495.533203125.

C_PATH mean = tensor([[58.7987,  1.1487,  2.2121],
        [58.9371,  1.0766,  2.1644],
        [58.7006,  1.1266,  2.2576],
        [58.7965,  1.1156,  2.2107],
        [58.8577,  1.0549,  2.2172]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[21.2920,  8.1410,  1.9712],
         [58.9671,  0.8408,  2.0475],
         [59.6993,  0.8243,  2.0224],
         ...,
         [60.2467,  0.8003,  1.9329],
         [59.4621,  0.8158,  1.9740],
         [59.4380,  0.8253,  1.8800]],

        [[21.2920,  8.0774,  1.9654],
         [58.9483,  0.8380,  2.0321],
         [60.4159,  0.8162,  2.0100],
         ...,
         [59.4590,  0.8190,  1.9527],
         [59.5285,  0.8323,  1.7289],
         [58.4859,  0.8425,  2.0053]],

        [[21.2920,  8.2074,  1.9465],
         [58.9051,  0.8403,  1.7391],
         [59.0833,  0.8315,  1.9553],
         ...,
         [59.4013,  0.8467,  1.9957],
    


Train Diffusion:   2%|▏         | 160/8500 [10:12<8:47:28,  3.79s/it][A
Train Diffusion:   2%|▏         | 161/8500 [10:15<8:44:29,  3.77s/it][A
Train Diffusion:   2%|▏         | 162/8500 [10:19<8:29:27,  3.67s/it][A
Train Diffusion:   2%|▏         | 163/8500 [10:23<8:36:51,  3.72s/it][A
Train Diffusion:   2%|▏         | 164/8500 [10:26<8:35:59,  3.71s/it][A
Train Diffusion:   2%|▏         | 165/8500 [10:31<8:56:21,  3.86s/it][A
Train Diffusion:   2%|▏         | 166/8500 [10:35<9:03:16,  3.91s/it][A
Train Diffusion:   2%|▏         | 167/8500 [10:38<8:42:08,  3.76s/it][A
Train Diffusion:   2%|▏         | 168/8500 [10:42<8:48:05,  3.80s/it][A
Train Diffusion:   2%|▏         | 169/8500 [10:46<8:49:53,  3.82s/it][A

Moving average norm loss at 170 iterations is: 20975.14462890625. Best norm loss value is: 15495.533203125.

C_PATH mean = tensor([[59.1354,  0.9693,  2.1832],
        [59.3148,  0.9060,  2.0884],
        [59.2949,  0.8570,  2.0936],
        [59.1358,  1.0091,  2.1276],
        [59.3755,  0.8510,  2.0908]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[21.8189, 11.6934, 32.8898],
         [13.8027, 19.3227,  1.8109],
         [60.0343,  0.6980,  1.9063],
         ...,
         [60.3361,  0.7117,  1.8526],
         [60.5130,  0.7147,  1.8314],
         [58.8940,  0.6974,  1.8254]],

        [[21.8190,  6.7151,  1.7682],
         [60.5425,  0.7447,  1.9255],
         [59.8375,  0.7133,  1.8329],
         ...,
         [60.2490,  0.6898,  1.8607],
         [59.7980,  0.7106,  1.8464],
         [60.0866,  0.7114,  1.9054]],

        [[21.8190,  6.5474,  1.7611],
         [60.4195,  0.7319,  1.9052],
         [59.3789,  0.7186,  1.8278],
         ...,
         [60.2857,  0.7021,  1.8296],
  


Train Diffusion:   2%|▏         | 170/8500 [10:50<9:12:56,  3.98s/it][A
Train Diffusion:   2%|▏         | 171/8500 [10:54<9:05:34,  3.93s/it][A
Train Diffusion:   2%|▏         | 172/8500 [10:58<8:49:00,  3.81s/it][A
Train Diffusion:   2%|▏         | 173/8500 [11:02<9:00:22,  3.89s/it][A
Train Diffusion:   2%|▏         | 174/8500 [11:05<8:47:46,  3.80s/it][A
Train Diffusion:   2%|▏         | 175/8500 [11:09<8:34:04,  3.70s/it][A
Train Diffusion:   2%|▏         | 176/8500 [11:12<8:35:35,  3.72s/it][A
Train Diffusion:   2%|▏         | 177/8500 [11:16<8:24:13,  3.63s/it][A
Train Diffusion:   2%|▏         | 178/8500 [11:20<8:52:55,  3.84s/it][A
Train Diffusion:   2%|▏         | 179/8500 [11:24<8:42:35,  3.77s/it][A

Moving average norm loss at 180 iterations is: 21748.4767578125. Best norm loss value is: 15495.533203125.

C_PATH mean = tensor([[59.3156,  1.2864,  2.3306],
        [59.1545,  1.3244,  2.3636],
        [59.0812,  1.3162,  2.4170],
        [59.0229,  1.4081,  2.4380],
        [58.9910,  1.4245,  2.4617]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[22.7367,  6.3389,  2.0596],
         [58.3567,  0.9585,  2.1128],
         [62.3195,  0.9676,  2.0518],
         ...,
         [59.0836,  0.9346,  2.0748],
         [62.5537,  0.9246,  2.0470],
         [58.7333,  0.9429,  2.0447]],

        [[22.7367,  6.0818,  2.0256],
         [57.5568,  1.1989,  2.2347],
         [60.0065,  0.9779,  1.9850],
         ...,
         [60.0173,  0.9930,  2.0316],
         [59.3774,  1.0221,  2.0902],
         [59.9994,  1.0060,  2.0023]],

        [[22.7367,  6.8644,  2.1307],
         [62.5722,  0.9938,  2.0328],
         [59.6852,  0.9654,  2.0615],
         ...,
         [60.4763,  1.0143,  1.9891],
   


Train Diffusion:   2%|▏         | 180/8500 [11:28<8:43:56,  3.78s/it][A
Train Diffusion:   2%|▏         | 181/8500 [11:32<9:03:18,  3.92s/it][A
Train Diffusion:   2%|▏         | 182/8500 [11:36<8:59:13,  3.89s/it][A
Train Diffusion:   2%|▏         | 183/8500 [11:40<8:59:02,  3.89s/it][A
Train Diffusion:   2%|▏         | 184/8500 [11:43<8:39:24,  3.75s/it][A
Train Diffusion:   2%|▏         | 185/8500 [11:47<8:34:49,  3.71s/it][A
Train Diffusion:   2%|▏         | 186/8500 [11:51<8:47:27,  3.81s/it][A
Train Diffusion:   2%|▏         | 187/8500 [11:54<8:43:23,  3.78s/it][A
Train Diffusion:   2%|▏         | 188/8500 [11:58<8:45:01,  3.79s/it][A
Train Diffusion:   2%|▏         | 189/8500 [12:02<8:28:01,  3.67s/it][A

Moving average norm loss at 190 iterations is: 24094.13203125. Best norm loss value is: 15495.533203125.

C_PATH mean = tensor([[60.9429,  1.0735,  1.9118],
        [61.0588,  1.0552,  1.9156],
        [60.7184,  1.2015,  2.0205],
        [60.7203,  1.0663,  2.0420],
        [60.9852,  1.0699,  1.9324]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[23.0929,  6.0255,  1.7296],
         [62.1268,  0.9761,  1.7409],
         [56.9197,  0.9497,  1.7536],
         ...,
         [62.1469,  0.9622,  1.7306],
         [62.3230,  0.9500,  1.7519],
         [62.4503,  0.9342,  1.7746]],

        [[23.1021,  5.7559,  1.6826],
         [60.9620,  0.9570,  1.7402],
         [61.6934,  0.9736,  1.7519],
         ...,
         [62.3351,  0.9662,  1.7062],
         [62.2109,  0.9855,  1.7165],
         [58.2482,  0.9544,  1.7475]],

        [[23.0996,  5.1889,  1.7046],
         [62.2106,  0.9939,  1.7896],
         [62.4912,  0.9586,  1.6933],
         ...,
         [63.0678,  0.9194,  1.7170],
     


Train Diffusion:   2%|▏         | 190/8500 [12:05<8:26:41,  3.66s/it][A
Train Diffusion:   2%|▏         | 191/8500 [12:09<8:22:51,  3.63s/it][A
Train Diffusion:   2%|▏         | 192/8500 [12:12<8:11:48,  3.55s/it][A
Train Diffusion:   2%|▏         | 193/8500 [12:16<8:21:32,  3.62s/it][A
Train Diffusion:   2%|▏         | 194/8500 [12:19<8:12:43,  3.56s/it][A
Train Diffusion:   2%|▏         | 195/8500 [12:23<8:14:46,  3.57s/it][A
Train Diffusion:   2%|▏         | 196/8500 [12:27<8:22:27,  3.63s/it][A
Train Diffusion:   2%|▏         | 197/8500 [12:30<8:17:45,  3.60s/it][A
Train Diffusion:   2%|▏         | 198/8500 [12:34<8:31:45,  3.70s/it][A
Train Diffusion:   2%|▏         | 199/8500 [12:38<8:21:59,  3.63s/it][A

Moving average norm loss at 200 iterations is: 16847.8873046875. Best norm loss value is: 12845.19140625.

C_PATH mean = tensor([[59.5052,  0.9591,  1.9471],
        [59.4338,  1.0035,  1.9649],
        [59.3076,  1.0481,  1.9693],
        [59.3993,  1.0324,  1.9262],
        [59.5293,  0.9671,  1.9808]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[22.5987,  4.5525,  2.1740],
         [61.3075,  1.1086,  1.5390],
         [59.9976,  0.8576,  1.8078],
         ...,
         [60.2112,  0.8532,  1.8632],
         [60.9191,  0.9615,  1.7343],
         [60.3094,  0.8660,  1.7721]],

        [[22.5987,  8.4523, 36.8450],
         [13.4491, 23.9004,  1.8811],
         [60.2624,  0.8377,  1.7737],
         ...,
         [59.6887,  2.5590,  1.7159],
         [56.4034,  0.9111,  1.7038],
         [60.0827,  0.8400,  1.8702]],

        [[22.5987,  6.3752,  2.0450],
         [60.0197,  0.8769,  1.7792],
         [58.8619,  0.8170,  1.8125],
         ...,
         [60.7482,  0.8919,  1.7763],
    


Train Diffusion:   2%|▏         | 200/8500 [12:41<8:24:49,  3.65s/it][A
Train Diffusion:   2%|▏         | 201/8500 [12:45<8:26:39,  3.66s/it][A
Train Diffusion:   2%|▏         | 202/8500 [12:48<8:19:35,  3.61s/it][A
Train Diffusion:   2%|▏         | 203/8500 [12:52<8:31:43,  3.70s/it][A
Train Diffusion:   2%|▏         | 204/8500 [12:56<8:31:48,  3.70s/it][A
Train Diffusion:   2%|▏         | 205/8500 [13:00<8:34:49,  3.72s/it][A
Train Diffusion:   2%|▏         | 206/8500 [13:04<8:44:13,  3.79s/it][A
Train Diffusion:   2%|▏         | 207/8500 [13:07<8:32:19,  3.71s/it][A
Train Diffusion:   2%|▏         | 208/8500 [13:11<8:34:00,  3.72s/it][A
Train Diffusion:   2%|▏         | 209/8500 [13:15<8:29:16,  3.69s/it][A

Moving average ELBO loss at 210 iterations is: 119737610.4. Best ELBO loss value is: 86106664.0.

C_PATH mean = tensor([[60.0255,  0.8907,  2.4467],
        [60.0144,  0.9155,  2.4361],
        [59.9925,  0.8650,  2.4933],
        [60.0167,  0.8563,  2.4369],
        [60.0768,  0.9059,  2.4141]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[22.6826,  4.1827,  2.2116],
         [61.5587,  0.6948,  2.3269],
         [59.8444,  0.7443,  2.2784],
         ...,
         [60.1950,  0.6786,  2.2738],
         [61.2738,  0.7130,  2.3117],
         [61.3203,  0.7184,  2.2224]],

        [[22.6826,  2.7936,  2.1793],
         [61.8158,  0.7016,  1.9053],
         [60.7154,  0.6614,  2.2936],
         ...,
         [61.1170,  0.6882,  1.9390],
         [60.9890,  0.7206,  2.0644],
         [61.0350,  0.7134,  2.2420]],

        [[22.6826,  4.2074,  2.1964],
         [61.5525,  0.6943,  2.3266],
         [61.3113,  0.5952,  1.9703],
         ...,
         [60.2039,  0.4764,  2.3119],
         [60


Train Diffusion:   2%|▏         | 210/8500 [13:18<8:26:48,  3.67s/it][A
Train Diffusion:   2%|▏         | 211/8500 [13:22<8:36:06,  3.74s/it][A
Train Diffusion:   2%|▏         | 212/8500 [13:26<8:26:55,  3.67s/it][A
Train Diffusion:   3%|▎         | 213/8500 [13:30<8:39:54,  3.76s/it][A
Train Diffusion:   3%|▎         | 214/8500 [13:33<8:34:05,  3.72s/it][A
Train Diffusion:   3%|▎         | 215/8500 [13:37<8:28:33,  3.68s/it][A
Train Diffusion:   3%|▎         | 216/8500 [13:41<8:36:30,  3.74s/it][A
Train Diffusion:   3%|▎         | 217/8500 [13:44<8:26:09,  3.67s/it][A
Train Diffusion:   3%|▎         | 218/8500 [13:48<8:36:39,  3.74s/it][A
Train Diffusion:   3%|▎         | 219/8500 [13:52<8:27:05,  3.67s/it][A

Moving average ELBO loss at 220 iterations is: 103961367.2. Best ELBO loss value is: 84308952.0.

C_PATH mean = tensor([[60.2824,  0.8126,  2.4861],
        [60.2468,  0.8112,  2.4931],
        [60.2546,  0.7879,  2.5593],
        [60.3390,  0.7801,  2.4615],
        [60.2109,  0.7568,  2.5617]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[22.4066,  3.6358,  3.8277],
         [52.7206,  0.9715,  2.3704],
         [61.8091,  0.3120,  2.4523],
         ...,
         [61.5216,  0.3898,  2.4953],
         [59.7876,  0.7057,  2.4876],
         [59.0970,  0.7485,  2.3352]],

        [[22.4066,  0.9631,  2.5302],
         [62.1577,  0.3734,  2.4477],
         [61.3823,  0.6725,  2.4475],
         ...,
         [59.7522,  0.6941,  2.4902],
         [60.4792,  0.7153,  2.4522],
         [62.0598,  0.8112,  2.3702]],

        [[22.4066,  1.9237,  2.2123],
         [63.4877,  0.7388,  2.6293],
         [61.8787,  0.6415,  1.9176],
         ...,
         [61.7699,  0.6955,  2.4986],
         [61


Train Diffusion:   3%|▎         | 220/8500 [13:55<8:26:03,  3.67s/it][A
Train Diffusion:   3%|▎         | 221/8500 [13:59<8:31:58,  3.71s/it][A
Train Diffusion:   3%|▎         | 222/8500 [14:03<8:41:31,  3.78s/it][A
Train Diffusion:   3%|▎         | 223/8500 [14:07<8:42:49,  3.79s/it][A
Train Diffusion:   3%|▎         | 224/8500 [14:11<8:44:24,  3.80s/it][A
Train Diffusion:   3%|▎         | 225/8500 [14:14<8:32:42,  3.72s/it][A
Train Diffusion:   3%|▎         | 226/8500 [14:18<8:45:35,  3.81s/it][A
Train Diffusion:   3%|▎         | 227/8500 [14:22<8:31:00,  3.71s/it][A
Train Diffusion:   3%|▎         | 228/8500 [14:26<8:36:17,  3.74s/it][A
Train Diffusion:   3%|▎         | 229/8500 [14:29<8:31:19,  3.71s/it][A

Moving average ELBO loss at 230 iterations is: 67207997.2. Best ELBO loss value is: 43096544.0.

C_PATH mean = tensor([[60.2061,  0.7014,  2.5406],
        [60.2364,  0.7496,  2.4735],
        [60.2188,  0.7750,  2.5555],
        [60.2479,  0.6959,  2.4997],
        [60.2607,  0.7451,  2.4681]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[22.0013,  0.3147,  2.4842],
         [63.9824,  0.7188,  1.9367],
         [61.2949,  0.2874,  2.5957],
         ...,
         [61.8727,  0.6959,  2.5198],
         [62.1258,  0.2831,  2.0789],
         [60.3591,  0.7974,  2.3563]],

        [[22.0013,  0.8459,  2.4613],
         [63.9161,  0.7269,  2.6774],
         [62.2520,  0.6417,  2.5020],
         ...,
         [59.7009,  0.6908,  2.5603],
         [60.4257,  0.7302,  2.4941],
         [62.1485,  0.7724,  2.4326]],

        [[22.0013,  1.2999,  2.5037],
         [59.6741,  0.3421,  2.4704],
         [61.5962,  0.6521,  2.0674],
         ...,
         [61.9187,  0.6917,  1.9780],
         [61.


Train Diffusion:   3%|▎         | 230/8500 [14:33<8:24:40,  3.66s/it][A
Train Diffusion:   3%|▎         | 231/8500 [14:37<8:34:23,  3.73s/it][A
Train Diffusion:   3%|▎         | 232/8500 [14:40<8:22:12,  3.64s/it][A
Train Diffusion:   3%|▎         | 233/8500 [14:44<8:30:05,  3.70s/it][A
Train Diffusion:   3%|▎         | 234/8500 [14:48<8:24:39,  3.66s/it][A
Train Diffusion:   3%|▎         | 235/8500 [14:51<8:20:52,  3.64s/it][A
Train Diffusion:   3%|▎         | 236/8500 [14:55<8:28:19,  3.69s/it][A
Train Diffusion:   3%|▎         | 237/8500 [14:58<8:19:51,  3.63s/it][A
Train Diffusion:   3%|▎         | 238/8500 [15:02<8:38:15,  3.76s/it][A
Train Diffusion:   3%|▎         | 239/8500 [15:06<8:32:56,  3.73s/it][A

Moving average ELBO loss at 240 iterations is: 43210942.2. Best ELBO loss value is: 27742762.0.

C_PATH mean = tensor([[59.7794,  0.7088,  2.5965],
        [59.7472,  0.7278,  2.5413],
        [59.9571,  0.6984,  2.5286],
        [59.6070,  0.7165,  2.5801],
        [59.9440,  0.7308,  2.5735]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[21.7504,  0.2648,  2.8234],
         [63.7153,  0.6844,  2.0832],
         [61.8843,  0.6657,  2.6734],
         ...,
         [62.2355,  0.3618,  2.6463],
         [62.7721,  0.7146,  2.5951],
         [62.6388,  0.7150,  2.1633]],

        [[21.7504,  0.8480,  2.8094],
         [63.8669,  0.2866,  2.5882],
         [60.2497,  0.6398,  2.5465],
         ...,
         [62.6820,  0.6999,  2.5488],
         [62.6437,  0.6949,  2.2990],
         [61.8891,  0.7233,  2.5248]],

        [[21.7504,  1.3403,  2.4399],
         [61.0325,  0.7074,  2.5712],
         [62.7106,  0.6073,  2.5304],
         ...,
         [62.5291,  0.6908,  2.1078],
         [61.


Train Diffusion:   3%|▎         | 240/8500 [15:10<8:39:35,  3.77s/it][A
Train Diffusion:   3%|▎         | 241/8500 [15:14<8:43:47,  3.81s/it][A
Train Diffusion:   3%|▎         | 242/8500 [15:17<8:28:40,  3.70s/it][A
Train Diffusion:   3%|▎         | 243/8500 [15:21<8:44:04,  3.81s/it][A
Train Diffusion:   3%|▎         | 244/8500 [15:26<9:00:16,  3.93s/it][A
Train Diffusion:   3%|▎         | 245/8500 [15:29<8:53:52,  3.88s/it][A
Train Diffusion:   3%|▎         | 246/8500 [15:34<9:07:39,  3.98s/it][A
Train Diffusion:   3%|▎         | 247/8500 [15:37<8:41:35,  3.79s/it][A
Train Diffusion:   3%|▎         | 248/8500 [15:41<8:46:01,  3.82s/it][A
Train Diffusion:   3%|▎         | 249/8500 [15:45<8:46:09,  3.83s/it][A

Moving average ELBO loss at 250 iterations is: 29279006.2. Best ELBO loss value is: 21405726.0.

C_PATH mean = tensor([[59.3380,  0.7054,  2.6192],
        [59.6853,  0.7003,  2.5972],
        [59.4981,  0.6865,  2.5904],
        [59.4224,  0.7047,  2.5673],
        [59.5122,  0.6887,  2.5967]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[21.5959,  0.6474,  2.6834],
         [66.4602,  0.7406,  2.8719],
         [63.1850,  0.6104,  2.6153],
         ...,
         [62.0287,  0.6142,  2.6765],
         [63.0198,  0.3406,  2.4181],
         [63.3637,  0.7414,  2.5869]],

        [[21.5959,  0.3415,  2.8017],
         [66.3304,  0.3368,  2.7550],
         [60.6395,  0.6448,  2.1460],
         ...,
         [60.7784,  0.3583,  2.6083],
         [51.0092,  0.8927,  2.6775],
         [62.6574,  0.3495,  2.4620]],

        [[21.5959,  0.4092,  2.6344],
         [63.1344,  0.7365,  2.0346],
         [55.4656,  0.7369,  2.6919],
         ...,
         [63.7584,  0.7151,  2.7532],
         [62.


Train Diffusion:   3%|▎         | 250/8500 [15:48<8:39:58,  3.78s/it][A
Train Diffusion:   3%|▎         | 251/8500 [15:52<8:50:40,  3.86s/it][A
Train Diffusion:   3%|▎         | 252/8500 [15:56<8:28:11,  3.70s/it][A
Train Diffusion:   3%|▎         | 253/8500 [15:59<8:25:19,  3.68s/it][A
Train Diffusion:   3%|▎         | 254/8500 [16:03<8:19:29,  3.63s/it][A
Train Diffusion:   3%|▎         | 255/8500 [16:06<8:13:47,  3.59s/it][A
Train Diffusion:   3%|▎         | 256/8500 [16:10<8:25:35,  3.68s/it][A
Train Diffusion:   3%|▎         | 257/8500 [16:14<8:38:06,  3.77s/it][A
Train Diffusion:   3%|▎         | 258/8500 [16:18<8:31:16,  3.72s/it][A
Train Diffusion:   3%|▎         | 259/8500 [16:21<8:25:53,  3.68s/it][A

Moving average ELBO loss at 260 iterations is: 24417992.2. Best ELBO loss value is: 20165942.0.

C_PATH mean = tensor([[59.2802,  0.6878,  2.6123],
        [59.3847,  0.6771,  2.6000],
        [59.2276,  0.6922,  2.5888],
        [59.1308,  0.7001,  2.5902],
        [59.2548,  0.6938,  2.5979]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[21.6890,  0.6351,  3.0408],
         [64.6289,  0.7222,  2.7819],
         [60.1573,  0.6053,  2.6354],
         ...,
         [56.2113,  0.7764,  2.5566],
         [63.8093,  0.6910,  2.6360],
         [63.2809,  0.3394,  2.4315]],

        [[21.3429,  1.2802,  2.5409],
         [61.9111,  0.6385,  2.5166],
         [63.5906,  0.2552,  2.6236],
         ...,
         [63.1491,  0.6695,  2.1591],
         [60.5500,  0.7356,  2.7483],
         [63.2910,  0.6790,  2.5715]],

        [[21.6975,  0.2290,  2.9502],
         [61.6865,  0.3629,  1.8825],
         [45.8387,  0.9320,  2.6376],
         ...,
         [60.5061,  0.6706,  2.7059],
         [58.


Train Diffusion:   3%|▎         | 260/8500 [16:25<8:15:19,  3.61s/it][A
Train Diffusion:   3%|▎         | 261/8500 [16:28<8:14:58,  3.60s/it][A
Train Diffusion:   3%|▎         | 262/8500 [16:32<8:06:19,  3.54s/it][A
Train Diffusion:   3%|▎         | 263/8500 [16:35<7:58:43,  3.49s/it][A
Train Diffusion:   3%|▎         | 264/8500 [16:39<8:05:40,  3.54s/it][A
Train Diffusion:   3%|▎         | 265/8500 [16:42<8:02:46,  3.52s/it][A
Train Diffusion:   3%|▎         | 266/8500 [16:46<7:54:01,  3.45s/it][A
Train Diffusion:   3%|▎         | 267/8500 [16:49<7:53:16,  3.45s/it][A
Train Diffusion:   3%|▎         | 268/8500 [16:53<8:03:13,  3.52s/it][A
Train Diffusion:   3%|▎         | 269/8500 [16:57<8:28:50,  3.71s/it][A

Moving average ELBO loss at 270 iterations is: 20633135.0. Best ELBO loss value is: 16569584.0.

C_PATH mean = tensor([[59.4776,  0.6659,  2.5219],
        [59.2136,  0.6960,  2.5328],
        [59.1666,  0.6845,  2.5285],
        [59.3482,  0.6682,  2.5077],
        [59.3083,  0.6593,  2.5319]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[21.9017,  0.3920,  2.7247],
         [64.0087,  0.6547,  2.5739],
         [61.4604,  0.6810,  1.9018],
         ...,
         [62.2060,  0.3984,  2.2239],
         [59.7865,  0.4096,  2.6549],
         [62.1439,  0.6861,  2.4981]],

        [[21.9000,  0.3777,  2.7174],
         [60.6980,  0.6937,  2.3611],
         [55.4692,  0.3528,  2.4703],
         ...,
         [61.6722,  0.5585,  2.5499],
         [62.8505,  0.6780,  2.5651],
         [63.0678,  0.6758,  2.4925]],

        [[21.8983,  0.5645,  2.7258],
         [64.0306,  0.6861,  2.6679],
         [63.3437,  0.6219,  2.5274],
         ...,
         [62.3842,  0.6978,  2.5342],
         [59.


Train Diffusion:   3%|▎         | 270/8500 [17:01<8:29:42,  3.72s/it][A
Train Diffusion:   3%|▎         | 271/8500 [17:04<8:30:12,  3.72s/it][A
Train Diffusion:   3%|▎         | 272/8500 [17:08<8:17:29,  3.63s/it][A
Train Diffusion:   3%|▎         | 273/8500 [17:11<8:17:19,  3.63s/it][A
Train Diffusion:   3%|▎         | 274/8500 [17:15<8:07:20,  3.55s/it][A
Train Diffusion:   3%|▎         | 275/8500 [17:18<8:08:17,  3.56s/it][A
Train Diffusion:   3%|▎         | 276/8500 [17:22<8:16:54,  3.63s/it][A
Train Diffusion:   3%|▎         | 277/8500 [17:26<8:16:58,  3.63s/it][A
Train Diffusion:   3%|▎         | 278/8500 [17:29<8:04:06,  3.53s/it][A
Train Diffusion:   3%|▎         | 279/8500 [17:32<7:54:15,  3.46s/it][A

Moving average ELBO loss at 280 iterations is: 17831315.0. Best ELBO loss value is: 13449245.0.

C_PATH mean = tensor([[59.2820,  0.6700,  2.5050],
        [59.0443,  0.6798,  2.4798],
        [58.9469,  0.6862,  2.4931],
        [59.2724,  0.6694,  2.5016],
        [59.2357,  0.6753,  2.4844]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[21.9161,  0.8727,  2.4526],
         [61.8490,  0.6494,  2.5206],
         [62.3271,  0.6065,  2.4788],
         ...,
         [60.1320,  0.6964,  2.4902],
         [62.2501,  0.6759,  2.4783],
         [62.5845,  0.6645,  2.4843]],

        [[20.1010,  1.3848,  2.3578],
         [57.6800,  0.7004,  2.5615],
         [56.2027,  0.6503,  2.5275],
         ...,
         [61.9339,  0.6518,  2.5045],
         [61.5617,  0.6426,  2.1820],
         [52.8859,  0.8668,  2.4909]],

        [[22.0776,  0.4965,  2.4902],
         [63.3974,  0.6751,  2.6498],
         [62.2853,  0.6085,  2.4758],
         ...,
         [62.1514,  0.4316,  2.3116],
         [60.


Train Diffusion:   3%|▎         | 280/8500 [17:36<7:59:38,  3.50s/it][A
Train Diffusion:   3%|▎         | 281/8500 [17:39<7:52:29,  3.45s/it][A
Train Diffusion:   3%|▎         | 282/8500 [17:43<7:47:12,  3.41s/it][A
Train Diffusion:   3%|▎         | 283/8500 [17:46<7:52:00,  3.45s/it][A
Train Diffusion:   3%|▎         | 284/8500 [17:50<7:52:25,  3.45s/it][A
Train Diffusion:   3%|▎         | 285/8500 [17:53<7:53:32,  3.46s/it][A
Train Diffusion:   3%|▎         | 286/8500 [17:58<8:34:37,  3.76s/it][A
Train Diffusion:   3%|▎         | 287/8500 [18:01<8:16:50,  3.63s/it][A
Train Diffusion:   3%|▎         | 288/8500 [18:04<8:12:22,  3.60s/it][A
Train Diffusion:   3%|▎         | 289/8500 [18:08<8:23:24,  3.68s/it][A

Moving average ELBO loss at 290 iterations is: 13621720.6. Best ELBO loss value is: 10623340.0.

C_PATH mean = tensor([[59.1960,  0.6406,  2.4808],
        [58.8267,  0.6753,  2.4880],
        [59.0962,  0.6474,  2.4813],
        [58.9736,  0.6436,  2.4800],
        [58.9266,  0.6568,  2.4708]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[22.1596,  0.5286,  2.4456],
         [62.5312,  0.6578,  2.6036],
         [61.7110,  0.5368,  2.4880],
         ...,
         [61.3360,  0.6547,  2.5912],
         [60.2539,  0.7044,  3.1336],
         [10.8621,  1.2270,  2.5648]],

        [[19.7471,  1.5048,  2.4335],
         [57.6343,  0.5920,  2.5759],
         [61.0403,  0.6213,  2.3849],
         ...,
         [58.6048,  0.6553,  2.5812],
         [59.8601,  0.7745,  2.4327],
         [59.9512,  0.6620,  2.5316]],

        [[22.1596,  0.6416,  2.4420],
         [59.7034,  0.6627,  2.6996],
         [56.7267,  0.3764,  2.6015],
         ...,
         [59.8321,  0.4296,  2.2052],
         [60.


Train Diffusion:   3%|▎         | 290/8500 [18:12<8:08:23,  3.57s/it][A
Train Diffusion:   3%|▎         | 291/8500 [18:15<8:04:28,  3.54s/it][A
Train Diffusion:   3%|▎         | 292/8500 [18:19<8:09:04,  3.58s/it][A
Train Diffusion:   3%|▎         | 293/8500 [18:22<7:57:26,  3.49s/it][A
Train Diffusion:   3%|▎         | 294/8500 [18:26<8:06:23,  3.56s/it][A
Train Diffusion:   3%|▎         | 295/8500 [18:29<8:01:17,  3.52s/it][A
Train Diffusion:   3%|▎         | 296/8500 [18:33<8:01:41,  3.52s/it][A
Train Diffusion:   3%|▎         | 297/8500 [18:36<7:52:31,  3.46s/it][A
Train Diffusion:   4%|▎         | 298/8500 [18:39<7:45:45,  3.41s/it][A
Train Diffusion:   4%|▎         | 299/8500 [18:43<7:55:31,  3.48s/it][A

Moving average ELBO loss at 300 iterations is: 13224558.0. Best ELBO loss value is: 10623340.0.

C_PATH mean = tensor([[58.4397,  0.6847,  2.4796],
        [58.7364,  0.6565,  2.4729],
        [58.7095,  0.6775,  2.4755],
        [58.6281,  0.6720,  2.4785],
        [58.5644,  0.6825,  2.4752]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[21.9545,  0.7109,  2.3896],
         [59.4196,  0.6801,  2.6634],
         [58.9024,  0.6060,  2.4457],
         ...,
         [58.9368,  0.6731,  2.5556],
         [59.3292,  0.7254,  2.4315],
         [59.0266,  0.6888,  2.2433]],

        [[22.0073,  0.2027,  2.4284],
         [62.4494,  0.6660,  2.3528],
         [60.5534,  0.6173,  2.5221],
         ...,
         [61.0504,  0.5043,  2.4965],
         [58.7166,  0.6536,  2.5734],
         [59.6474,  0.6717,  2.4226]],

        [[21.9988,  0.6849,  2.4317],
         [62.1625,  0.6794,  2.5561],
         [61.3735,  0.6095,  2.3253],
         ...,
         [61.2245,  0.6498,  2.3206],
         [60.


Train Diffusion:   4%|▎         | 300/8500 [18:46<7:48:21,  3.43s/it][A
Train Diffusion:   4%|▎         | 301/8500 [18:50<7:48:47,  3.43s/it][A
Train Diffusion:   4%|▎         | 302/8500 [18:53<7:49:22,  3.44s/it][A
Train Diffusion:   4%|▎         | 303/8500 [18:56<7:43:36,  3.39s/it][A
Train Diffusion:   4%|▎         | 304/8500 [19:00<7:41:35,  3.38s/it][A
Train Diffusion:   4%|▎         | 305/8500 [19:03<7:48:09,  3.43s/it][A
Train Diffusion:   4%|▎         | 306/8500 [19:07<7:42:31,  3.39s/it][A
Train Diffusion:   4%|▎         | 307/8500 [19:10<7:52:09,  3.46s/it][A
Train Diffusion:   4%|▎         | 308/8500 [19:14<7:47:24,  3.42s/it][A
Train Diffusion:   4%|▎         | 309/8500 [19:17<7:43:34,  3.40s/it][A

Moving average ELBO loss at 310 iterations is: 14373560.0. Best ELBO loss value is: 10623340.0.

C_PATH mean = tensor([[58.3817,  0.6651,  2.4638],
        [58.4968,  0.6571,  2.4632],
        [58.2413,  0.6685,  2.4658],
        [58.2865,  0.6668,  2.4632],
        [58.3889,  0.6725,  2.4658]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[22.0018,  0.2985,  2.4836],
         [59.7228,  0.6726,  2.4335],
         [59.1022,  0.7066,  2.4312],
         ...,
         [59.4747,  0.6961,  2.4307],
         [58.4626,  0.4818,  2.6014],
         [60.3293,  0.7096,  2.4298]],

        [[22.0025,  0.7917,  2.5431],
         [61.1333,  0.4838,  2.5523],
         [61.5097,  0.6205,  2.4842],
         ...,
         [58.5454,  0.6642,  2.5694],
         [59.0801,  0.7266,  2.4386],
         [60.8109,  0.6518,  2.4487]],

        [[21.9972,  0.5201,  2.4440],
         [61.5174,  0.6681,  2.6358],
         [60.6555,  0.6257,  2.4613],
         ...,
         [60.8800,  0.4880,  2.4995],
         [60.


Train Diffusion:   4%|▎         | 310/8500 [19:21<7:52:47,  3.46s/it][A
Train Diffusion:   4%|▎         | 311/8500 [19:24<8:00:14,  3.52s/it][A
Train Diffusion:   4%|▎         | 312/8500 [19:28<7:56:04,  3.49s/it][A
Train Diffusion:   4%|▎         | 313/8500 [19:31<8:08:54,  3.58s/it][A
Train Diffusion:   4%|▎         | 314/8500 [19:35<8:10:50,  3.60s/it][A
Train Diffusion:   4%|▎         | 315/8500 [19:39<8:20:13,  3.67s/it][A
Train Diffusion:   4%|▎         | 316/8500 [19:42<8:06:12,  3.56s/it][A
Train Diffusion:   4%|▎         | 317/8500 [19:46<7:59:02,  3.51s/it][A
Train Diffusion:   4%|▎         | 318/8500 [19:49<8:01:22,  3.53s/it][A
Train Diffusion:   4%|▍         | 319/8500 [19:52<7:50:46,  3.45s/it][A

Moving average ELBO loss at 320 iterations is: 11653354.2. Best ELBO loss value is: 10623340.0.

C_PATH mean = tensor([[57.8997,  0.6738,  2.4570],
        [58.1894,  0.6727,  2.4583],
        [58.1526,  0.6561,  2.4524],
        [58.1129,  0.6759,  2.4620],
        [58.3778,  0.6545,  2.4630]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[21.1848,  1.3514,  2.0631],
         [22.7427,  1.2611,  2.3198],
         [60.3040,  0.6273,  2.4357],
         ...,
         [60.1204,  0.5099,  2.4974],
         [60.2399,  0.6547,  2.4687],
         [60.3923,  0.5005,  2.4295]],

        [[22.1230,  0.4617,  2.4940],
         [59.5108,  0.5904,  2.4431],
         [58.9244,  0.7312,  2.5013],
         ...,
         [60.3809,  0.6431,  2.4446],
         [60.2619,  0.6449,  2.4437],
         [59.7505,  0.7188,  1.6441]],

        [[22.1504,  0.5340,  2.3424],
         [60.9865,  0.7371,  2.6479],
         [60.3791,  0.6287,  2.4533],
         ...,
         [60.3556,  0.6519,  2.3850],
         [60.


Train Diffusion:   4%|▍         | 320/8500 [19:56<7:57:22,  3.50s/it][A
Train Diffusion:   4%|▍         | 321/8500 [19:59<7:53:00,  3.47s/it][A
Train Diffusion:   4%|▍         | 322/8500 [20:03<7:46:26,  3.42s/it][A
Train Diffusion:   4%|▍         | 323/8500 [20:06<7:47:35,  3.43s/it][A
Train Diffusion:   4%|▍         | 324/8500 [20:10<7:47:40,  3.43s/it][A
Train Diffusion:   4%|▍         | 325/8500 [20:13<7:42:20,  3.39s/it][A
Train Diffusion:   4%|▍         | 326/8500 [20:17<7:53:08,  3.47s/it][A
Train Diffusion:   4%|▍         | 327/8500 [20:20<7:48:14,  3.44s/it][A
Train Diffusion:   4%|▍         | 328/8500 [20:23<7:42:55,  3.40s/it][A
Train Diffusion:   4%|▍         | 329/8500 [20:27<7:49:39,  3.45s/it][A

Moving average ELBO loss at 330 iterations is: 11716221.1. Best ELBO loss value is: 9790147.0.

C_PATH mean = tensor([[57.8133,  0.6727,  2.4521],
        [58.0931,  0.6602,  2.4735],
        [57.9034,  0.6602,  2.4602],
        [57.9283,  0.6684,  2.4602],
        [57.8297,  0.6739,  2.4607]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[22.2478,  0.4187,  2.3092],
         [60.1152,  0.7604,  2.7652],
         [59.4569,  0.6545,  2.4044],
         ...,
         [59.7064,  0.6593,  2.4358],
         [59.4875,  0.6455,  2.3588],
         [58.3531,  0.4680,  2.4408]],

        [[20.5721,  1.1780,  2.5327],
         [58.3827,  0.5156,  2.4230],
         [60.2985,  0.6365,  2.4606],
         ...,
         [58.1836,  0.6617,  2.4337],
         [57.2540,  0.5505,  2.5917],
         [57.8349,  0.6952,  2.4303]],

        [[20.5777,  1.2182,  2.4777],
         [58.4372,  0.7027,  2.3256],
         [59.7209,  0.5827,  2.4866],
         ...,
         [59.6427,  0.6723,  2.4252],
         [59.7


Train Diffusion:   4%|▍         | 330/8500 [20:30<7:46:32,  3.43s/it][A
Train Diffusion:   4%|▍         | 331/8500 [20:34<7:42:02,  3.39s/it][A
Train Diffusion:   4%|▍         | 332/8500 [20:37<7:54:52,  3.49s/it][A
Train Diffusion:   4%|▍         | 333/8500 [20:41<8:00:24,  3.53s/it][A
Train Diffusion:   4%|▍         | 334/8500 [20:44<8:05:00,  3.56s/it][A
Train Diffusion:   4%|▍         | 335/8500 [20:48<7:54:21,  3.49s/it][A
Train Diffusion:   4%|▍         | 336/8500 [20:51<7:49:25,  3.45s/it][A
Train Diffusion:   4%|▍         | 337/8500 [20:55<7:56:53,  3.51s/it][A
Train Diffusion:   4%|▍         | 338/8500 [20:58<7:49:32,  3.45s/it][A
Train Diffusion:   4%|▍         | 339/8500 [21:01<7:44:39,  3.42s/it][A

Moving average ELBO loss at 340 iterations is: 9864167.9. Best ELBO loss value is: 8751287.0.

C_PATH mean = tensor([[57.6550,  0.6782,  2.4711],
        [57.4910,  0.6750,  2.4602],
        [57.9385,  0.6792,  2.4594],
        [57.7281,  0.6735,  2.4487],
        [57.5391,  0.6702,  2.4621]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[19.5955,  1.2394,  2.4777],
         [55.6151,  0.6843,  2.4258],
         [59.5832,  0.6485,  2.4817],
         ...,
         [58.2605,  0.6986,  2.4411],
         [57.7105,  0.6625,  2.5194],
         [58.5615,  0.6811,  2.4371]],

        [[22.0687,  0.2373,  2.6025],
         [60.0189,  0.6673,  2.4272],
         [59.2555,  0.4947,  2.5181],
         ...,
         [58.9217,  0.7644,  2.4237],
         [59.2815,  0.6530,  2.4246],
         [59.2059,  0.6756,  2.4531]],

        [[22.0700,  0.3608,  2.5821],
         [59.8080,  0.6768,  2.5803],
         [57.7702,  0.6331,  2.5044],
         ...,
         [57.2281,  0.7794,  2.3096],
         [58.60


Train Diffusion:   4%|▍         | 340/8500 [21:05<7:54:06,  3.49s/it][A
Train Diffusion:   4%|▍         | 341/8500 [21:08<7:46:32,  3.43s/it][A
Train Diffusion:   4%|▍         | 342/8500 [21:12<7:44:18,  3.41s/it][A
Train Diffusion:   4%|▍         | 343/8500 [21:15<7:49:18,  3.45s/it][A
Train Diffusion:   4%|▍         | 344/8500 [21:19<7:43:50,  3.41s/it][A
Train Diffusion:   4%|▍         | 345/8500 [21:22<7:57:16,  3.51s/it][A
Train Diffusion:   4%|▍         | 346/8500 [21:26<7:50:20,  3.46s/it][A
Train Diffusion:   4%|▍         | 347/8500 [21:29<7:47:00,  3.44s/it][A
Train Diffusion:   4%|▍         | 348/8500 [21:33<7:52:41,  3.48s/it][A
Train Diffusion:   4%|▍         | 349/8500 [21:36<7:45:36,  3.43s/it][A

Moving average ELBO loss at 350 iterations is: 10119138.7. Best ELBO loss value is: 8133903.0.

C_PATH mean = tensor([[57.4423,  0.6723,  2.4578],
        [57.6563,  0.6794,  2.4630],
        [57.4193,  0.6760,  2.4655],
        [57.3596,  0.6753,  2.4772],
        [57.4734,  0.6787,  2.4648]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[22.3775,  0.6986,  2.4274],
         [58.0351,  0.7614,  2.6935],
         [59.2943,  0.6415,  2.4533],
         ...,
         [58.9817,  0.6733,  2.4670],
         [58.9155,  0.6622,  2.4478],
         [58.8529,  0.6649,  2.3912]],

        [[22.3552,  0.3386,  2.2598],
         [58.7376,  0.7748,  2.7374],
         [59.0881,  0.4967,  2.4980],
         ...,
         [58.9738,  0.5517,  2.5152],
         [58.5498,  0.6750,  2.4802],
         [57.8149,  0.6776,  2.4605]],

        [[20.4868,  1.1035,  2.3881],
         [56.3139,  0.7371,  2.4903],
         [56.4123,  0.6638,  2.4793],
         ...,
         [59.2199,  0.6657,  2.4943],
         [57.5


Train Diffusion:   4%|▍         | 350/8500 [21:40<7:57:11,  3.51s/it][A
Train Diffusion:   4%|▍         | 351/8500 [21:43<7:59:19,  3.53s/it][A
Train Diffusion:   4%|▍         | 352/8500 [21:47<7:59:35,  3.53s/it][A
Train Diffusion:   4%|▍         | 353/8500 [21:51<8:06:17,  3.58s/it][A
Train Diffusion:   4%|▍         | 354/8500 [21:54<7:55:29,  3.50s/it][A
Train Diffusion:   4%|▍         | 355/8500 [21:57<7:51:27,  3.47s/it][A
Train Diffusion:   4%|▍         | 356/8500 [22:01<7:54:38,  3.50s/it][A
Train Diffusion:   4%|▍         | 357/8500 [22:04<7:46:15,  3.44s/it][A
Train Diffusion:   4%|▍         | 358/8500 [22:08<7:55:59,  3.51s/it][A
Train Diffusion:   4%|▍         | 359/8500 [22:11<7:47:08,  3.44s/it][A

Moving average ELBO loss at 360 iterations is: 9045260.35. Best ELBO loss value is: 7123118.0.

C_PATH mean = tensor([[56.8450,  0.6957,  2.4720],
        [57.2301,  0.6887,  2.4663],
        [57.4695,  0.6873,  2.4602],
        [57.2484,  0.6981,  2.4583],
        [57.2489,  0.6927,  2.4701]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[20.7798,  1.1590,  2.1731],
         [42.8154,  0.9257,  2.3457],
         [57.2825,  0.6659,  2.4873],
         ...,
         [57.7322,  0.6874,  2.5135],
         [57.2084,  0.7379,  2.5154],
         [58.0267,  0.7378,  2.3456]],

        [[22.3039,  0.6275,  2.3105],
         [57.8420,  0.7787,  2.7053],
         [58.8832,  0.5202,  2.5286],
         ...,
         [58.8165,  0.6915,  2.3940],
         [58.9370,  0.6755,  2.4976],
         [57.9571,  0.7079,  2.4220]],

        [[22.2978,  0.2460,  2.2427],
         [57.5398,  0.7925,  2.7038],
         [57.8057,  0.6515,  2.4264],
         ...,
         [58.4424,  0.6841,  2.4179],
         [57.9


Train Diffusion:   4%|▍         | 360/8500 [22:14<7:40:21,  3.39s/it][A
Train Diffusion:   4%|▍         | 361/8500 [22:18<7:44:52,  3.43s/it][A
Train Diffusion:   4%|▍         | 362/8500 [22:21<7:44:24,  3.42s/it][A
Train Diffusion:   4%|▍         | 363/8500 [22:25<7:40:23,  3.39s/it][A
Train Diffusion:   4%|▍         | 364/8500 [22:28<7:49:38,  3.46s/it][A
Train Diffusion:   4%|▍         | 365/8500 [22:32<7:46:25,  3.44s/it][A
Train Diffusion:   4%|▍         | 366/8500 [22:35<7:38:40,  3.38s/it][A
Train Diffusion:   4%|▍         | 367/8500 [22:38<7:49:17,  3.46s/it][A
Train Diffusion:   4%|▍         | 368/8500 [22:42<7:41:08,  3.40s/it][A
Train Diffusion:   4%|▍         | 369/8500 [22:45<7:36:39,  3.37s/it][A

Moving average ELBO loss at 370 iterations is: 7860019.9. Best ELBO loss value is: 6271654.0.

C_PATH mean = tensor([[57.1608,  0.6894,  2.4651],
        [57.1672,  0.6834,  2.4618],
        [57.0006,  0.6954,  2.4692],
        [56.8047,  0.7051,  2.4626],
        [56.9822,  0.6890,  2.4831]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[22.0975,  0.4810,  2.7468],
         [58.4296,  0.6852,  2.5528],
         [56.9886,  0.6807,  2.4830],
         ...,
         [58.2250,  0.6858,  2.4207],
         [56.8883,  0.5935,  2.5428],
         [59.5229,  0.5196,  2.4579]],

        [[22.1142,  0.4696,  2.8000],
         [58.1786,  0.5591,  2.5539],
         [59.1071,  0.4777,  2.5853],
         ...,
         [57.0934,  0.7282,  2.4770],
         [57.8691,  0.7205,  2.4352],
         [58.1744,  0.6834,  2.4518]],

        [[22.1050,  0.4697,  2.6552],
         [58.5036,  0.6949,  2.4541],
         [58.4709,  0.6590,  2.4882],
         ...,
         [58.1792,  0.7113,  2.4219],
         [58.31


Train Diffusion:   4%|▍         | 370/8500 [22:49<7:47:00,  3.45s/it][A
Train Diffusion:   4%|▍         | 371/8500 [22:52<7:47:08,  3.45s/it][A
Train Diffusion:   4%|▍         | 372/8500 [22:56<7:51:48,  3.48s/it][A
Train Diffusion:   4%|▍         | 373/8500 [22:59<7:45:34,  3.44s/it][A
Train Diffusion:   4%|▍         | 374/8500 [23:02<7:38:19,  3.38s/it][A
Train Diffusion:   4%|▍         | 375/8500 [23:06<7:44:59,  3.43s/it][A
Train Diffusion:   4%|▍         | 376/8500 [23:09<7:41:39,  3.41s/it][A
Train Diffusion:   4%|▍         | 377/8500 [23:13<7:40:46,  3.40s/it][A
Train Diffusion:   4%|▍         | 378/8500 [23:16<7:47:30,  3.45s/it][A
Train Diffusion:   4%|▍         | 379/8500 [23:19<7:39:47,  3.40s/it][A

Moving average ELBO loss at 380 iterations is: 9032995.15. Best ELBO loss value is: 6204414.0.

C_PATH mean = tensor([[56.9017,  0.7067,  2.4750],
        [56.6266,  0.7183,  2.4779],
        [56.6553,  0.7361,  2.4875],
        [56.8720,  0.7073,  2.4779],
        [56.7495,  0.7351,  2.4795]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[21.9844,  0.6189,  2.5612],
         [57.3248,  0.6074,  2.5995],
         [58.9534,  0.6387,  2.5015],
         ...,
         [58.3315,  0.7370,  2.4186],
         [58.2950,  0.6809,  2.5024],
         [58.2450,  0.6949,  2.4392]],

        [[21.6336,  0.8213,  2.1469],
         [53.2151,  0.7767,  2.4907],
         [57.8009,  0.4987,  2.5211],
         ...,
         [57.0493,  0.7436,  2.4805],
         [57.7185,  0.7207,  2.4410],
         [58.2283,  0.6925,  2.4415]],

        [[21.9366,  0.3503,  2.5811],
         [58.2515,  0.7203,  2.6237],
         [58.3823,  0.6713,  2.4751],
         ...,
         [57.9549,  0.7180,  2.5348],
         [56.9


Train Diffusion:   4%|▍         | 380/8500 [23:23<7:41:47,  3.41s/it][A
Train Diffusion:   4%|▍         | 381/8500 [23:26<7:44:42,  3.43s/it][A
Train Diffusion:   4%|▍         | 382/8500 [23:30<7:39:54,  3.40s/it][A
Train Diffusion:   5%|▍         | 383/8500 [23:33<7:50:03,  3.47s/it][A
Train Diffusion:   5%|▍         | 384/8500 [23:37<7:41:16,  3.41s/it][A
Train Diffusion:   5%|▍         | 385/8500 [23:40<7:37:20,  3.38s/it][A
Train Diffusion:   5%|▍         | 386/8500 [23:43<7:43:05,  3.42s/it][A
Train Diffusion:   5%|▍         | 387/8500 [23:47<7:37:27,  3.38s/it][A
Train Diffusion:   5%|▍         | 388/8500 [23:50<7:43:14,  3.43s/it][A
Train Diffusion:   5%|▍         | 389/8500 [23:54<7:40:49,  3.41s/it][A

Moving average ELBO loss at 390 iterations is: 10436279.9. Best ELBO loss value is: 6204414.0.

C_PATH mean = tensor([[56.5661,  0.7256,  2.4901],
        [56.5370,  0.7167,  2.4926],
        [56.6304,  0.7104,  2.4773],
        [56.6163,  0.7179,  2.4913],
        [56.4851,  0.7188,  2.4900]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[21.4725,  1.1261,  1.5482],
         [24.9238,  1.1942,  2.4368],
         [58.0148,  0.4842,  2.4989],
         ...,
         [58.3562,  0.7524,  2.4679],
         [56.6451,  0.7137,  2.5123],
         [57.7366,  0.7651,  2.4344]],

        [[22.0589,  0.2894,  2.4461],
         [57.6952,  0.7297,  2.5068],
         [58.1835,  0.6666,  2.3396],
         ...,
         [58.5738,  0.7726,  2.4749],
         [58.1151,  0.7084,  2.4267],
         [58.1299,  0.6072,  2.4703]],

        [[19.5238,  1.1369,  2.3896],
         [55.3821,  0.7663,  2.6875],
         [58.1874,  0.6849,  2.4959],
         ...,
         [58.2029,  0.6710,  2.4169],
         [55.7


Train Diffusion:   5%|▍         | 390/8500 [23:57<7:43:06,  3.43s/it][A
Train Diffusion:   5%|▍         | 391/8500 [24:01<7:47:38,  3.46s/it][A
Train Diffusion:   5%|▍         | 392/8500 [24:04<7:40:09,  3.41s/it][A
Train Diffusion:   5%|▍         | 393/8500 [24:07<7:33:44,  3.36s/it][A
Train Diffusion:   5%|▍         | 394/8500 [24:11<7:34:57,  3.37s/it][A
Train Diffusion:   5%|▍         | 395/8500 [24:14<7:34:05,  3.36s/it][A
Train Diffusion:   5%|▍         | 396/8500 [24:17<7:30:34,  3.34s/it][A
Train Diffusion:   5%|▍         | 397/8500 [24:21<7:41:59,  3.42s/it][A
Train Diffusion:   5%|▍         | 398/8500 [24:24<7:34:59,  3.37s/it][A
Train Diffusion:   5%|▍         | 399/8500 [24:27<7:30:07,  3.33s/it][A

Moving average ELBO loss at 400 iterations is: 8810669.2. Best ELBO loss value is: 6204414.0.

C_PATH mean = tensor([[56.4086,  0.7322,  2.4856],
        [56.2303,  0.7345,  2.4949],
        [56.3716,  0.7446,  2.5053],
        [56.2558,  0.7294,  2.4845],
        [56.4369,  0.7218,  2.4919]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[21.8942,  0.3395,  2.8274],
         [57.9089,  0.5766,  2.5965],
         [57.6968,  0.6727,  2.5111],
         ...,
         [57.0244,  0.7352,  2.5630],
         [56.6151,  0.7243,  2.5050],
         [57.1287,  0.6958,  2.4340]],

        [[21.8308,  0.7753,  2.8636],
         [57.5953,  0.6586,  2.5408],
         [57.8725,  0.5650,  2.5440],
         ...,
         [56.6881,  0.6952,  2.5652],
         [58.9815,  0.7495,  2.4942],
         [57.0002,  0.7261,  2.1364]],

        [[19.6411,  1.2131,  2.3924],
         [55.1355,  0.7883,  3.1131],
         [57.7132,  0.7706,  2.4698],
         ...,
         [35.8758,  1.0722,  2.3521],
         [57.95


Train Diffusion:   5%|▍         | 400/8500 [24:31<7:37:26,  3.39s/it][A
Train Diffusion:   5%|▍         | 401/8500 [24:34<7:38:48,  3.40s/it][A
Train Diffusion:   5%|▍         | 402/8500 [24:37<7:34:10,  3.37s/it][A
Train Diffusion:   5%|▍         | 403/8500 [24:41<7:40:42,  3.41s/it][A
Train Diffusion:   5%|▍         | 404/8500 [24:44<7:36:17,  3.38s/it][A
Train Diffusion:   5%|▍         | 405/8500 [24:48<7:42:23,  3.43s/it][A
Train Diffusion:   5%|▍         | 406/8500 [24:51<7:45:46,  3.45s/it][A
Train Diffusion:   5%|▍         | 407/8500 [24:55<7:38:53,  3.40s/it][A
Train Diffusion:   5%|▍         | 408/8500 [24:58<7:45:52,  3.45s/it][A
Train Diffusion:   5%|▍         | 409/8500 [25:02<7:52:21,  3.50s/it][A

Moving average ELBO loss at 410 iterations is: 8420009.25. Best ELBO loss value is: 6204414.0.

C_PATH mean = tensor([[56.2394,  0.7446,  2.4932],
        [56.2998,  0.7484,  2.4926],
        [56.3751,  0.7396,  2.4957],
        [56.1360,  0.7428,  2.4863],
        [56.1253,  0.7454,  2.4852]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[21.6934,  0.8341,  2.2074],
         [55.8602,  0.8190,  2.4542],
         [56.5201,  0.7378,  2.4701],
         ...,
         [57.9931,  0.7908,  2.4841],
         [56.3843,  0.7360,  2.5099],
         [57.2941,  0.7376,  2.4434]],

        [[22.0547,  0.5101,  2.5967],
         [55.9102,  0.6595,  2.6102],
         [58.5894,  0.6671,  2.5298],
         ...,
         [58.0078,  0.7174,  2.4117],
         [57.8836,  0.7132,  2.5342],
         [57.6079,  0.6235,  2.3411]],

        [[21.9910,  0.4336,  2.6342],
         [57.3922,  0.7304,  2.6061],
         [57.9102,  0.7241,  2.4901],
         ...,
         [57.7149,  0.5389,  2.4826],
         [57.1


Train Diffusion:   5%|▍         | 410/8500 [25:05<7:54:34,  3.52s/it][A
Train Diffusion:   5%|▍         | 411/8500 [25:09<7:50:42,  3.49s/it][A
Train Diffusion:   5%|▍         | 412/8500 [25:12<7:43:32,  3.44s/it][A
Train Diffusion:   5%|▍         | 413/8500 [25:15<7:38:45,  3.40s/it][A
Train Diffusion:   5%|▍         | 414/8500 [25:19<7:40:05,  3.41s/it][A
Train Diffusion:   5%|▍         | 415/8500 [25:22<7:35:57,  3.38s/it][A
Train Diffusion:   5%|▍         | 416/8500 [25:26<7:43:31,  3.44s/it][A
Train Diffusion:   5%|▍         | 417/8500 [25:29<7:38:36,  3.40s/it][A
Train Diffusion:   5%|▍         | 418/8500 [25:32<7:32:52,  3.36s/it][A
Train Diffusion:   5%|▍         | 419/8500 [25:36<7:38:10,  3.40s/it][A

Moving average ELBO loss at 420 iterations is: 7741031.5. Best ELBO loss value is: 6204414.0.

C_PATH mean = tensor([[55.9170,  0.7600,  2.4896],
        [56.2921,  0.7438,  2.4900],
        [56.1212,  0.7500,  2.4976],
        [56.0082,  0.7530,  2.5056],
        [56.0775,  0.7494,  2.4915]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[18.9657,  1.2073,  2.4558],
         [54.5368,  0.8538,  2.6580],
         [56.9752,  0.7437,  2.4570],
         ...,
         [57.3030,  0.7276,  2.5134],
         [56.9993,  0.7494,  2.5043],
         [57.2025,  0.7413,  2.3801]],

        [[21.9538,  0.5800,  2.8530],
         [56.5976,  0.6324,  2.6098],
         [56.8582,  0.6759,  2.5476],
         ...,
         [57.9595,  0.8123,  2.4850],
         [57.1590,  0.7794,  2.4670],
         [56.2232,  0.7608,  2.4488]],

        [[21.9525,  0.3477,  2.8545],
         [57.0352,  0.7020,  2.5911],
         [55.8114,  0.7753,  2.5565],
         ...,
         [57.3930,  0.7521,  2.5161],
         [55.68


Train Diffusion:   5%|▍         | 420/8500 [25:39<7:32:13,  3.36s/it][A
Train Diffusion:   5%|▍         | 421/8500 [25:42<7:27:51,  3.33s/it][A
Train Diffusion:   5%|▍         | 422/8500 [25:46<7:35:45,  3.39s/it][A
Train Diffusion:   5%|▍         | 423/8500 [25:49<7:31:10,  3.35s/it][A
Train Diffusion:   5%|▍         | 424/8500 [25:53<7:39:01,  3.41s/it][A
Train Diffusion:   5%|▌         | 425/8500 [25:56<7:47:49,  3.48s/it][A
Train Diffusion:   5%|▌         | 426/8500 [26:00<7:39:30,  3.41s/it][A
Train Diffusion:   5%|▌         | 427/8500 [26:03<7:33:25,  3.37s/it][A
Train Diffusion:   5%|▌         | 428/8500 [26:06<7:41:36,  3.43s/it][A
Train Diffusion:   5%|▌         | 429/8500 [26:10<7:43:46,  3.45s/it][A

Moving average ELBO loss at 430 iterations is: 7897158.05. Best ELBO loss value is: 6204414.0.

C_PATH mean = tensor([[56.0065,  0.7629,  2.5159],
        [56.0196,  0.7590,  2.5278],
        [55.7245,  0.7722,  2.5371],
        [55.9540,  0.7712,  2.5368],
        [55.8885,  0.7633,  2.5290]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[22.1915,  0.2602,  2.5584],
         [56.0333,  0.6156,  2.7451],
         [57.7533,  0.5097,  2.6425],
         ...,
         [57.2224,  0.8346,  2.5418],
         [56.7709,  0.7776,  2.5516],
         [56.3344,  0.7859,  1.7824]],

        [[18.8906,  1.0266,  2.2081],
         [54.1725,  0.8464,  2.2742],
         [56.5873,  0.7148,  2.2951],
         ...,
         [57.3359,  0.7471,  2.5570],
         [56.9324,  0.7654,  2.5909],
         [56.2730,  0.9311,  1.3017]],

        [[22.1813,  0.7048,  2.2283],
         [55.3129,  0.8295,  2.7320],
         [56.8395,  0.8010,  2.5417],
         ...,
         [57.1341,  0.8371,  2.5637],
         [55.6


Train Diffusion:   5%|▌         | 430/8500 [26:14<7:50:15,  3.50s/it][A
Train Diffusion:   5%|▌         | 431/8500 [26:17<7:41:56,  3.43s/it][A
Train Diffusion:   5%|▌         | 432/8500 [26:20<7:40:40,  3.43s/it][A
Train Diffusion:   5%|▌         | 433/8500 [26:24<7:50:41,  3.50s/it][A
Train Diffusion:   5%|▌         | 434/8500 [26:27<7:45:57,  3.47s/it][A
Train Diffusion:   5%|▌         | 435/8500 [26:31<7:47:34,  3.48s/it][A
Train Diffusion:   5%|▌         | 436/8500 [26:34<7:47:20,  3.48s/it][A
Train Diffusion:   5%|▌         | 437/8500 [26:38<7:42:13,  3.44s/it][A
Train Diffusion:   5%|▌         | 438/8500 [26:41<7:53:20,  3.52s/it][A
Train Diffusion:   5%|▌         | 439/8500 [26:45<7:48:50,  3.49s/it][A

Moving average ELBO loss at 440 iterations is: 8210904.05. Best ELBO loss value is: 6204414.0.

C_PATH mean = tensor([[55.5463,  0.7697,  2.5145],
        [55.7302,  0.7743,  2.5030],
        [55.6125,  0.7777,  2.5043],
        [55.7195,  0.7719,  2.5083],
        [55.6830,  0.7640,  2.5089]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[21.9874,  0.6591,  2.6276],
         [55.6323,  0.7886,  2.6013],
         [56.6155,  0.7960,  2.5159],
         ...,
         [56.8558,  0.7720,  2.4464],
         [56.7820,  0.7742,  2.4992],
         [56.6863,  0.6763,  2.4903]],

        [[18.7420,  1.0625,  2.2387],
         [53.3996,  0.8778,  2.6078],
         [56.7973,  0.7891,  2.4120],
         ...,
         [57.0956,  0.8661,  2.5150],
         [55.7213,  0.6858,  2.5712],
         [57.5132,  0.8101,  2.4514]],

        [[21.7198,  0.8088,  2.7332],
         [56.4163,  0.8107,  2.5107],
         [56.7009,  0.7590,  2.5049],
         ...,
         [56.9079,  0.8738,  2.4870],
         [56.6


Train Diffusion:   5%|▌         | 440/8500 [26:48<7:39:41,  3.42s/it][A
Train Diffusion:   5%|▌         | 441/8500 [26:51<7:36:41,  3.40s/it][A
Train Diffusion:   5%|▌         | 442/8500 [26:55<7:49:29,  3.50s/it][A
Train Diffusion:   5%|▌         | 443/8500 [26:58<7:41:42,  3.44s/it][A
Train Diffusion:   5%|▌         | 444/8500 [27:02<7:41:55,  3.44s/it][A
Train Diffusion:   5%|▌         | 445/8500 [27:05<7:38:26,  3.41s/it][A
Train Diffusion:   5%|▌         | 446/8500 [27:08<7:32:25,  3.37s/it][A
Train Diffusion:   5%|▌         | 447/8500 [27:12<7:31:14,  3.36s/it][A
Train Diffusion:   5%|▌         | 448/8500 [27:16<7:47:08,  3.48s/it][A
Train Diffusion:   5%|▌         | 449/8500 [27:19<7:45:23,  3.47s/it][A

Moving average ELBO loss at 450 iterations is: 7528332.2. Best ELBO loss value is: 6204414.0.

C_PATH mean = tensor([[55.3436,  0.7907,  2.5208],
        [55.2323,  0.7937,  2.5229],
        [55.2831,  0.7942,  2.5150],
        [55.5128,  0.7838,  2.5255],
        [55.4771,  0.7869,  2.5205]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[18.9443,  1.2999,  2.4112],
         [53.3252,  0.8116,  3.9058],
         [55.9022,  0.7725,  2.5297],
         ...,
         [55.7333,  0.8083,  2.5992],
         [56.5203,  0.7826,  2.5123],
         [56.6271,  0.7806,  2.4004]],

        [[21.7713,  0.4672,  2.8675],
         [56.4155,  0.7798,  2.5975],
         [56.5467,  0.6638,  2.5791],
         ...,
         [56.7699,  0.5664,  2.5218],
         [56.0461,  0.5870,  2.5788],
         [56.8507,  0.7321,  2.5263]],

        [[21.7988,  0.6256,  2.9410],
         [55.6463,  0.6646,  2.5961],
         [57.7783,  0.7023,  2.4695],
         ...,
         [56.9055,  0.8329,  2.4322],
         [56.33


Train Diffusion:   5%|▌         | 450/8500 [27:23<7:56:07,  3.55s/it][A
Train Diffusion:   5%|▌         | 451/8500 [27:26<7:47:17,  3.48s/it][A
Train Diffusion:   5%|▌         | 452/8500 [27:30<7:50:04,  3.50s/it][A
Train Diffusion:   5%|▌         | 453/8500 [27:33<7:46:03,  3.47s/it][A
Train Diffusion:   5%|▌         | 454/8500 [27:36<7:38:20,  3.42s/it][A
Train Diffusion:   5%|▌         | 455/8500 [27:40<7:48:15,  3.49s/it][A
Train Diffusion:   5%|▌         | 456/8500 [27:43<7:41:22,  3.44s/it][A
Train Diffusion:   5%|▌         | 457/8500 [27:47<7:43:29,  3.46s/it][A
Train Diffusion:   5%|▌         | 458/8500 [27:50<7:46:39,  3.48s/it][A
Train Diffusion:   5%|▌         | 459/8500 [27:54<7:40:46,  3.44s/it][A

Moving average ELBO loss at 460 iterations is: 7695201.2. Best ELBO loss value is: 6171642.0.

C_PATH mean = tensor([[55.2318,  0.7941,  2.5589],
        [55.1751,  0.7932,  2.5489],
        [55.0604,  0.7888,  2.5499],
        [55.2216,  0.8022,  2.5453],
        [55.1401,  0.8015,  2.5374]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[21.7652,  0.7611,  2.5810],
         [54.9535,  0.8284,  2.7528],
         [56.8286,  0.8269,  2.5024],
         ...,
         [56.7128,  0.7702,  2.1947],
         [56.1620,  0.7848,  2.6201],
         [56.4159,  0.7885,  2.2088]],

        [[21.7833,  0.3036,  2.7578],
         [56.3148,  0.7889,  2.7132],
         [55.3408,  0.8110,  2.5507],
         ...,
         [56.7430,  0.8478,  2.6788],
         [55.2384,  0.8041,  2.6675],
         [55.6727,  0.8564,  2.4556]],

        [[21.7947,  0.5610,  2.9501],
         [55.8125,  0.7014,  2.4450],
         [56.4609,  0.5818,  2.6076],
         ...,
         [56.2280,  1.2968,  4.5908],
         [11.95


Train Diffusion:   5%|▌         | 460/8500 [27:57<7:34:22,  3.39s/it][A
Train Diffusion:   5%|▌         | 461/8500 [28:01<7:43:02,  3.46s/it][A
Train Diffusion:   5%|▌         | 462/8500 [28:04<7:42:29,  3.45s/it][A
Train Diffusion:   5%|▌         | 463/8500 [28:08<7:46:57,  3.49s/it][A
Train Diffusion:   5%|▌         | 464/8500 [28:11<7:58:05,  3.57s/it][A
Train Diffusion:   5%|▌         | 465/8500 [28:15<7:46:19,  3.48s/it][A
Train Diffusion:   5%|▌         | 466/8500 [28:18<7:46:42,  3.49s/it][A
Train Diffusion:   5%|▌         | 467/8500 [28:22<7:57:52,  3.57s/it][A
Train Diffusion:   6%|▌         | 468/8500 [28:25<7:52:49,  3.53s/it][A
Train Diffusion:   6%|▌         | 469/8500 [28:29<7:50:09,  3.51s/it][A

Moving average ELBO loss at 470 iterations is: 7278915.85. Best ELBO loss value is: 6171642.0.

C_PATH mean = tensor([[54.9019,  0.8062,  2.5166],
        [55.0090,  0.8050,  2.5284],
        [55.0871,  0.8093,  2.5317],
        [55.1687,  0.8064,  2.5165],
        [54.8232,  0.8086,  2.5326]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[21.7953,  0.5505,  2.7237],
         [55.1003,  0.8290,  2.6512],
         [56.0883,  0.6001,  2.5555],
         ...,
         [54.7162,  0.7010,  2.5578],
         [54.4192,  0.6576,  2.6567],
         [57.9230,  0.8106,  2.5079]],

        [[21.7960,  0.3561,  2.7678],
         [55.9703,  0.7927,  2.5955],
         [55.8157,  0.7991,  2.5338],
         ...,
         [56.4603,  0.8852,  2.5290],
         [55.7515,  0.8003,  2.5392],
         [56.2786,  0.7985,  2.4970]],

        [[18.6482,  1.4033,  2.4758],
         [53.3448,  0.8755,  3.9791],
         [55.6468,  0.7984,  2.4291],
         ...,
         [56.4638,  0.8869,  2.5290],
         [55.7


Train Diffusion:   6%|▌         | 470/8500 [28:32<7:39:45,  3.44s/it][A
Train Diffusion:   6%|▌         | 471/8500 [28:35<7:32:20,  3.38s/it][A
Train Diffusion:   6%|▌         | 472/8500 [28:39<7:38:08,  3.42s/it][A
Train Diffusion:   6%|▌         | 473/8500 [28:42<7:30:36,  3.37s/it][A
Train Diffusion:   6%|▌         | 474/8500 [28:45<7:29:46,  3.36s/it][A
Train Diffusion:   6%|▌         | 475/8500 [28:49<7:32:23,  3.38s/it][A
Train Diffusion:   6%|▌         | 476/8500 [28:52<7:27:03,  3.34s/it][A
Train Diffusion:   6%|▌         | 477/8500 [28:56<7:32:31,  3.38s/it][A
Train Diffusion:   6%|▌         | 478/8500 [28:59<7:29:54,  3.37s/it][A
Train Diffusion:   6%|▌         | 479/8500 [29:02<7:27:54,  3.35s/it][A

Moving average ELBO loss at 480 iterations is: 7360016.3. Best ELBO loss value is: 6146244.0.

C_PATH mean = tensor([[54.8321,  0.8222,  2.5208],
        [54.8134,  0.8119,  2.5262],
        [54.6563,  0.8378,  2.5321],
        [54.8478,  0.8246,  2.5383],
        [54.8993,  0.8143,  2.5224]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[21.7398,  0.7251,  2.6933],
         [54.9517,  0.7637,  2.6435],
         [55.4156,  0.7986,  2.5651],
         ...,
         [56.1263,  0.8261,  2.5592],
         [55.9940,  0.8144,  2.4365],
         [55.5301,  0.8023,  2.3009]],

        [[21.6668,  0.7613,  2.7319],
         [55.6002,  0.8432,  2.5858],
         [55.3881,  0.8821,  2.4224],
         ...,
         [56.3796,  0.8770,  2.5394],
         [54.8743,  0.7163,  2.6343],
         [57.6576,  0.8196,  2.5133]],

        [[21.6810,  0.4149,  2.8070],
         [55.8991,  0.8231,  2.6248],
         [55.4153,  0.8745,  2.5470],
         ...,
         [56.4510,  0.7935,  2.3917],
         [53.09


Train Diffusion:   6%|▌         | 480/8500 [29:06<7:40:29,  3.45s/it][A
Train Diffusion:   6%|▌         | 481/8500 [29:09<7:44:29,  3.48s/it][A
Train Diffusion:   6%|▌         | 482/8500 [29:13<7:48:44,  3.51s/it][A
Train Diffusion:   6%|▌         | 483/8500 [29:17<7:50:54,  3.52s/it][A
Train Diffusion:   6%|▌         | 484/8500 [29:20<7:59:58,  3.59s/it][A
Train Diffusion:   6%|▌         | 485/8500 [29:24<8:18:54,  3.73s/it][A
Train Diffusion:   6%|▌         | 486/8500 [29:29<8:41:38,  3.91s/it][A
Train Diffusion:   6%|▌         | 487/8500 [29:32<8:31:30,  3.83s/it][A
Train Diffusion:   6%|▌         | 488/8500 [29:36<8:26:04,  3.79s/it][A
Train Diffusion:   6%|▌         | 489/8500 [29:39<8:10:23,  3.67s/it][A

Moving average ELBO loss at 490 iterations is: 7622534.85. Best ELBO loss value is: 5584225.5.

C_PATH mean = tensor([[54.8397,  0.8280,  2.5264],
        [54.4909,  0.8411,  2.5288],
        [54.6538,  0.8326,  2.5231],
        [54.6746,  0.8308,  2.5244],
        [54.4382,  0.8521,  2.5255]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[21.5126,  0.8685,  2.7830],
         [55.5360,  0.7167,  2.5551],
         [56.0900,  0.7604,  2.5628],
         ...,
         [54.7668,  0.8567,  2.5751],
         [55.5723,  0.8165,  2.5241],
         [56.0418,  0.8016,  2.3889]],

        [[21.7403,  0.3553,  2.5223],
         [54.9669,  0.8474,  2.6313],
         [54.1209,  0.6412,  2.6038],
         ...,
         [56.1943,  0.8898,  2.5165],
         [54.6063,  0.8299,  2.5680],
         [55.8887,  0.7806,  2.4941]],

        [[21.7486,  0.3882,  2.6115],
         [55.3571,  0.8173,  2.5219],
         [55.3543,  0.6941,  2.4652],
         ...,
         [56.4265,  0.8434,  2.4284],
         [55.7


Train Diffusion:   6%|▌         | 490/8500 [29:44<8:51:06,  3.98s/it][A
Train Diffusion:   6%|▌         | 491/8500 [29:49<9:07:18,  4.10s/it][A
Train Diffusion:   6%|▌         | 492/8500 [29:53<9:13:34,  4.15s/it][A
Train Diffusion:   6%|▌         | 493/8500 [29:57<9:04:21,  4.08s/it][A
Train Diffusion:   6%|▌         | 494/8500 [30:01<8:58:22,  4.03s/it][A
Train Diffusion:   6%|▌         | 495/8500 [30:05<9:18:04,  4.18s/it][A
Train Diffusion:   6%|▌         | 496/8500 [30:09<9:05:37,  4.09s/it][A
Train Diffusion:   6%|▌         | 497/8500 [30:13<9:09:26,  4.12s/it][A
Train Diffusion:   6%|▌         | 498/8500 [30:17<8:56:55,  4.03s/it][A
Train Diffusion:   6%|▌         | 499/8500 [30:21<9:01:47,  4.06s/it][A

Moving average ELBO loss at 500 iterations is: 7601561.75. Best ELBO loss value is: 5584225.5.

C_PATH mean = tensor([[54.3053,  0.8341,  2.5180],
        [54.3319,  0.8549,  2.5294],
        [54.6403,  0.8303,  2.5241],
        [54.6496,  0.8299,  2.5333],
        [54.5578,  0.8374,  2.5251]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[21.0558,  1.3940,  2.1398],
         [29.0401,  2.7347,  2.1243],
         [55.0433,  0.7769,  2.4313],
         ...,
         [53.6137,  0.8927,  2.5610],
         [55.7751,  0.6723,  2.4601],
         [49.1424,  0.5726,  2.5028]],

        [[21.3434,  0.8881,  2.2778],
         [54.0729,  1.2206,  2.4930],
         [55.3454,  0.8210,  2.5355],
         ...,
         [56.0508,  0.8927,  2.5753],
         [55.1925,  0.8407,  2.5796],
         [54.9245,  0.8916,  2.3672]],

        [[22.1033,  0.4029,  2.4984],
         [53.5006,  0.7539,  2.7841],
         [54.8400,  0.7652,  2.5841],
         ...,
         [55.4826,  0.8211,  2.4691],
         [54.4


Train Diffusion:   6%|▌         | 500/8500 [30:26<9:20:34,  4.20s/it][A
Train Diffusion:   6%|▌         | 501/8500 [30:29<8:56:32,  4.02s/it][A
Train Diffusion:   6%|▌         | 502/8500 [30:34<9:14:48,  4.16s/it][A
Train Diffusion:   6%|▌         | 503/8500 [30:39<9:44:58,  4.39s/it][A
Train Diffusion:   6%|▌         | 504/8500 [30:43<9:35:37,  4.32s/it][A
Train Diffusion:   6%|▌         | 505/8500 [30:47<9:26:12,  4.25s/it][A
Train Diffusion:   6%|▌         | 506/8500 [30:51<9:03:19,  4.08s/it][A
Train Diffusion:   6%|▌         | 507/8500 [30:55<8:59:25,  4.05s/it][A
Train Diffusion:   6%|▌         | 508/8500 [30:59<9:05:59,  4.10s/it][A

In [None]:
now = datetime.now()
now_string = now.strftime("%Y_%m_%d_%H_%M_%S")
torch.save(net, f'net_t_{t}_dt_{dt_flow}_iter_{niter}_{now_string}.pt')
torch.save(ELBO_hist, f'ELBO_t_{t}_dt_{dt_flow}_iter_{niter}_{now_string}.pt')