In [2]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from tqdm import tqdm
import math

#Torch-related imports
import torch
import torch.distributions as D
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Function

#Model-specific imports
from SBM_SDE import *
from obs_and_flow import *
from training import calc_log_lik

In [3]:
torch.manual_seed(0)
devi = torch.device("".join(["cuda:",f'{cuda_id}']) if torch.cuda.is_available() else "cpu")

dt_flow = 0.2 #SDE discretization timestep.
t = 250 #Simulation run for T hours.
n = int(t / dt_flow) + 1
t_span = np.linspace(0, t, n)
t_span_tensor = torch.reshape(torch.Tensor(t_span), [1, n, 1]) #T_span needs to be converted to tensor object. Additionally, facilitates conversion of I_S and I_D to tensor objects.
niter = 800
piter = 100
state_dim_SCON = 3 #Not including CO2 in STATE_DIM, because CO2 is an observation.
state_dim_SAWB = 4 #Not including CO2 in STATE_DIM, because CO2 is an observation.
pretrain_lr = 1e-2
train_lr = 1e-3
batch_size = 10 #Number of sets of observation outputs to sample per set of parameters.

In [4]:
temp_ref = 283
temp_rise = 5 #High estimate of 5 celsius temperature rise by 2100. 

#System parameters from deterministic CON model
u_M = 0.002
a_SD = 0.33
a_DS = 0.33
a_M = 0.33
a_MSC = 0.5
k_S_ref = 0.000025
k_D_ref = 0.005
k_M_ref = 0.0002
Ea_S = 75
Ea_D = 50
Ea_M = 50

#SCON diffusion matrix parameters
c_SOC = 1.
c_DOC = 0.01
c_MBC = 0.05

SCON_C_params_dict = {'u_M': u_M, 'a_SD': a_SD, 'a_DS': a_DS, 'a_M': a_M, 'a_MSC': a_MSC, 'k_S_ref': k_S_ref, 'k_D_ref': k_D_ref, 'k_M_ref': k_M_ref, 'Ea_S': Ea_S, 'Ea_D': Ea_D, 'Ea_M': Ea_M, 'c_SOC': c_SOC, 'c_DOC': c_DOC, 'c_MBC': c_MBC}

In [5]:
obs_error_scale = 0.1

x0_SCON = [37, 0.1, 0.9]
x0_SCON_tensor = torch.tensor(x0_SCON)
x0_prior_SCON = D.multivariate_normal.MultivariateNormal(x0_SCON_tensor,
                                                         scale_tril=torch.eye(state_dim_SCON) * obs_error_scale * x0_SCON_tensor)

In [6]:
obs_times, obs_means_CON, obs_error_CON = csv_to_obs_df('y_from_x_t_250_dt_0-01.csv', state_dim_SCON, t, obs_error_scale)

In [7]:
#Obtain temperature forcing function.
temp_tensor = temp_gen(t_span_tensor, temp_ref, temp_rise)

#Obtain SOC and DOC pool litter input vectors for use in flow SDE functions.
i_s_tensor = i_s(t_span_tensor) #Exogenous SOC input function
i_d_tensor = i_d(t_span_tensor) #Exogenous DOC input function

In [8]:
obs_model_CON_noCO2 = ObsModel(DEVICE = devi, TIMES = obs_times, DT = dt_flow, MU = obs_means_CON, SCALE = obs_error_CON)

In [9]:
def calc_log_lik(C_PATH, T_SPAN_TENSOR, DT, I_S_TENSOR, I_D_TENSOR, TEMP_TENSOR, TEMP_REF, DRIFT_DIFFUSION, X0_PRIOR, PARAMS_DICT):
    drift, diffusion_sqrt = DRIFT_DIFFUSION(C_PATH[:, :-1, :], T_SPAN_TENSOR[:, :-1, :], I_S_TENSOR[:, :-1, :], I_D_TENSOR[:, :-1, :], TEMP_TENSOR[:, :-1, :], TEMP_REF, PARAMS_DICT)
    euler_maruyama_state_sample_object = D.multivariate_normal.MultivariateNormal(loc = C_PATH[:, :-1, :] + drift * DT, scale_tril = diffusion_sqrt * math.sqrt(DT))
    
    # Compute log p(x|theta) = log p(x|x0, theta) + log p(x0|theta)
    ll = euler_maruyama_state_sample_object.log_prob(C_PATH[:, 1:, :]).sum(-1) # log p(x|x0, theta)
    ll += X0_PRIOR.log_prob(C_PATH[:, 0, :]) # log p(x0|theta)
    
    return ll # (batch_size, )

In [10]:
def train(DEVICE, PRETRAIN_LR, TRAIN_LR, NITER, PRETRAIN_ITER, BATCH_SIZE, OBS_MODEL,
          STATE_DIM, T, DT, N, T_SPAN_TENSOR, I_S_TENSOR, I_D_TENSOR, TEMP_TENSOR, TEMP_REF,
          DRIFT_DIFFUSION, X0_PRIOR, PARAMS_DICT,
          LEARN_PARAMS = False, LR_DECAY = 0.1, DECAY_STEP_SIZE = 1000, PRINT_EVERY = 500):
    net = SDEFlow(DEVICE, OBS_MODEL, STATE_DIM, T, DT, N, num_layers = 7).to(DEVICE)
    optimizer = optim.Adam(net.parameters(), lr = PRETRAIN_LR)
    
    if LEARN_PARAMS:
        theta_post = MeanField(PARAMS_DICT)
        theta_prior = D.normal.Normal(torch.zeros_like(theta_post.means),
                                      torch.ones_like(theta_post.std))
    if PRETRAIN_ITER >= NITER:
        raise Exception("PRETRAIN_ITER must be < NITER.")
    best_loss_norm = 1e10
    best_loss_ELBO = 1e10
    norm_losses = [] #[best_loss_norm] * 10 
    ELBO_losses = [] #[best_loss_ELBO] * 10
    #C0 = ANALYTICAL_STEADY_STATE_INIT(I_S_TENSOR[0, 0, 0].item(), I_D_TENSOR[0, 0, 0].item(), PARAMS_DICT) #Calculate deterministic initial conditions.
    #C0 = C0[(None,) * 2].repeat(BATCH_SIZE, 1, 1).to(DEVICE) #Assign initial conditions to C_PATH.
    
    with tqdm(total = NITER, desc = f'Train Diffusion', position = -1) as tq:
        for it in range(NITER):
            net.train()
            optimizer.zero_grad()
            C_PATH, log_prob = net(BATCH_SIZE) #Obtain paths with solutions at times after t0.
            #C_PATH = torch.cat([C0, C_PATH], 1) #Append deterministic CON initial conditions conditional on parameter values to C path. 
            
            if it < PRETRAIN_ITER:
                l1_norm_element = C_PATH - torch.mean(OBS_MODEL.mu[:3], -1)
                l1_norm = torch.sum(torch.abs(l1_norm_element)).mean()
                best_loss_norm = l1_norm if l1_norm < best_loss_norm else best_loss_norm
                norm_losses.append(l1_norm.item())
                #l2_norm_element = C_PATH - torch.mean(OBS_MODEL.mu, -1)
                #l2_norm = torch.sqrt(torch.sum(torch.square(l2_norm_element))).mean()
                #best_loss_norm = l2_norm if l2_norm < best_loss_norm else best_loss_norm
                #norm_losses.append(l2_norm.item())
                
                if (it + 1) % PRINT_EVERY == 0:
                    print(f"Moving average norm loss at {it + 1} iterations is: {sum(norm_losses[-10:]) / len(norm_losses[-10:])}. Best norm loss value is: {best_loss_norm}.")
                    print('\nC_PATH mean =', C_PATH.mean(-2))
                    print('\nC_PATH =', C_PATH)
                l1_norm.backward()
                #l2_norm.backward()
                
            else:
                if LEARN_PARAMS:
                    theta_dict, theta, log_q_theta = theta_post()
                    log_p_theta = theta_prior.log_prob(theta).sum(-1)
                else:
                    theta_dict = PARAMS_DICT
                    log_q_theta, log_p_theta = torch.zeros(2)
                log_lik = calc_log_lik(C_PATH, T_SPAN_TENSOR.to(DEVICE), DT, I_S_TENSOR.to(DEVICE), I_D_TENSOR.to(DEVICE),
                                       TEMP_TENSOR, TEMP_REF, DRIFT_DIFFUSION, X0_PRIOR, theta_dict)
                
                # - log p(theta) + log q(theta) + log q(x|theta) - log p(x|theta) - log p(y|x, theta)
                ELBO = -log_p_theta.mean() + log_q_theta.mean() - log_lik.mean() - OBS_MODEL(C_PATH, theta_dict) + log_prob.mean()
                best_loss_ELBO = ELBO if ELBO < best_loss_ELBO else best_loss_ELBO
                ELBO_losses.append(ELBO.item())

                if (it + 1) % PRINT_EVERY == 0:
                    print(f"Moving average ELBO loss at {it + 1} iterations is: {sum(ELBO_losses[-10:]) / len(ELBO_losses[-10:])}. Best ELBO loss value is: {best_loss_ELBO}.")
                    print('\nC_PATH mean =', C_PATH.mean(-2))
                    print('\n C_PATH =', C_PATH)
                    print(theta_dict)
                ELBO.backward()
                
            torch.nn.utils.clip_grad_norm_(net.parameters(), 3.0)
            if it == PRETRAIN_ITER:
                optimizer.param_groups[0]['lr'] = TRAIN_LR
            elif it % DECAY_STEP_SIZE == 0 and it > PRETRAIN_ITER:
                optimizer.param_groups[0]['lr'] *= LR_DECAY
            optimizer.step()
            tq.update()
            
    return net, ELBO_losses

In [None]:
net_batch_10_dt_flow_0_2, elbo_hist_batch_10_dt_flow_0_2 = train(devi, pretrain_lr, train_lr, niter, piter, batch_size, obs_model_CON_noCO2,
                       state_dim_SCON, t, dt_flow, n, t_span_tensor, i_s_tensor, i_d_tensor, temp_tensor, temp_ref,
                       drift_diffusion_SCON_C, x0_prior_SCON, SCON_C_params_dict,
                       LR_DECAY = 0.1, DECAY_STEP_SIZE = 5000, PRINT_EVERY = 20)


Train Diffusion:   0%|          | 0/800 [00:00<?, ?it/s][A
Train Diffusion:   0%|          | 1/800 [00:07<1:33:57,  7.06s/it][A
Train Diffusion:   0%|          | 2/800 [00:13<1:32:10,  6.93s/it][A
Train Diffusion:   0%|          | 3/800 [00:21<1:35:45,  7.21s/it][A
Train Diffusion:   0%|          | 4/800 [00:28<1:35:53,  7.23s/it][A
Train Diffusion:   1%|          | 5/800 [00:35<1:31:34,  6.91s/it][A
Train Diffusion:   1%|          | 6/800 [00:41<1:28:22,  6.68s/it][A
Train Diffusion:   1%|          | 7/800 [00:48<1:31:41,  6.94s/it][A
Train Diffusion:   1%|          | 8/800 [00:55<1:29:45,  6.80s/it][A
Train Diffusion:   1%|          | 9/800 [01:03<1:35:18,  7.23s/it][A
Train Diffusion:   1%|▏         | 10/800 [01:09<1:32:06,  7.00s/it][A
Train Diffusion:   1%|▏         | 11/800 [01:16<1:30:25,  6.88s/it][A
Train Diffusion:   2%|▏         | 12/800 [01:23<1:30:26,  6.89s/it][A
Train Diffusion:   2%|▏         | 13/800 [01:30<1:29:38,  6.83s/it][A
Train Diffusion:   2%|▏  

Moving average norm loss at 20 iterations is: 381897.159375. Best norm loss value is: 379952.25.

C_PATH mean = tensor([[1.7509, 1.7300, 1.7481],
        [1.7526, 1.7299, 1.7384],
        [1.7506, 1.7317, 1.7354],
        [1.7515, 1.7240, 1.7422],
        [1.7556, 1.7319, 1.7437],
        [1.7487, 1.7273, 1.7426],
        [1.7550, 1.7302, 1.7380],
        [1.7465, 1.7366, 1.7461],
        [1.7515, 1.7347, 1.7477],
        [1.7488, 1.7269, 1.7395]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.2846, 2.0741, 1.7830],
         [1.5008, 1.5309, 1.5655],
         [1.5080, 1.4883, 1.4751],
         ...,
         [1.7999, 2.0723, 1.9963],
         [1.8099, 1.9339, 1.8996],
         [1.9000, 1.9868, 1.8119]],

        [[2.2386, 1.5710, 1.6057],
         [1.5871, 1.5022, 1.5390],
         [1.5120, 1.5168, 1.4998],
         ...,
         [2.1553, 2.1221, 1.9819],
         [1.6007, 1.8264, 2.1494],
         [1.8871, 1.9030, 2.1736]],

        [[1.2243, 1.5896, 1.5379],
         [1.5409, 1.5224,


Train Diffusion:   2%|▎         | 20/800 [02:26<1:41:55,  7.84s/it][A
Train Diffusion:   3%|▎         | 21/800 [02:34<1:41:08,  7.79s/it][A
Train Diffusion:   3%|▎         | 22/800 [02:41<1:40:02,  7.71s/it][A
Train Diffusion:   3%|▎         | 23/800 [02:48<1:35:23,  7.37s/it][A
Train Diffusion:   3%|▎         | 24/800 [02:55<1:35:47,  7.41s/it][A
Train Diffusion:   3%|▎         | 25/800 [03:01<1:31:26,  7.08s/it][A
Train Diffusion:   3%|▎         | 26/800 [03:08<1:28:09,  6.83s/it][A
Train Diffusion:   3%|▎         | 27/800 [03:14<1:26:38,  6.72s/it][A
Train Diffusion:   4%|▎         | 28/800 [03:21<1:25:15,  6.63s/it][A
Train Diffusion:   4%|▎         | 29/800 [03:27<1:23:47,  6.52s/it][A
Train Diffusion:   4%|▍         | 30/800 [03:33<1:22:41,  6.44s/it][A
Train Diffusion:   4%|▍         | 31/800 [03:40<1:22:16,  6.42s/it][A
Train Diffusion:   4%|▍         | 32/800 [03:46<1:22:52,  6.47s/it][A
Train Diffusion:   4%|▍         | 33/800 [03:53<1:25:43,  6.71s/it][A
Train

Moving average norm loss at 40 iterations is: 367635.809375. Best norm loss value is: 335138.8125.

C_PATH mean = tensor([[6.3320, 1.3909, 2.1964],
        [6.4895, 1.2605, 2.1353],
        [6.6678, 1.3695, 2.1205],
        [6.6565, 1.3231, 2.1691],
        [6.7758, 1.1778, 2.1829],
        [6.6315, 1.3833, 2.2206],
        [6.1893, 1.2581, 2.1604],
        [6.6728, 1.2616, 2.1515],
        [6.3876, 1.2707, 2.1179],
        [6.4209, 1.2913, 2.2077]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[3.4397e+00, 9.6461e-01, 7.9647e-01],
         [1.3439e+01, 3.0523e-01, 1.1216e+01],
         [2.7091e+00, 8.2507e-01, 1.2765e+00],
         ...,
         [6.6265e-01, 8.3804e-01, 1.0045e+00],
         [9.9052e-01, 5.4575e+00, 2.9963e-01],
         [3.0741e-01, 1.8066e+00, 3.4728e+00]],

        [[4.1695e+00, 5.6732e-01, 9.8999e-01],
         [2.8062e+01, 1.3180e+00, 6.7675e-01],
         [1.6098e+00, 8.3818e-01, 9.3220e-01],
         ...,
         [1.6359e+01, 2.3367e-01, 9.1332e-01],
         [


Train Diffusion:   5%|▌         | 40/800 [04:47<1:40:44,  7.95s/it][A
Train Diffusion:   5%|▌         | 41/800 [04:55<1:37:31,  7.71s/it][A
Train Diffusion:   5%|▌         | 42/800 [05:02<1:35:49,  7.59s/it][A
Train Diffusion:   5%|▌         | 43/800 [05:09<1:32:15,  7.31s/it][A
Train Diffusion:   6%|▌         | 44/800 [05:15<1:29:26,  7.10s/it][A
Train Diffusion:   6%|▌         | 45/800 [05:22<1:27:28,  6.95s/it][A
Train Diffusion:   6%|▌         | 46/800 [05:28<1:25:51,  6.83s/it][A
Train Diffusion:   6%|▌         | 47/800 [05:35<1:25:01,  6.77s/it][A
Train Diffusion:   6%|▌         | 48/800 [05:42<1:24:15,  6.72s/it][A
Train Diffusion:   6%|▌         | 49/800 [05:48<1:23:14,  6.65s/it][A
Train Diffusion:   6%|▋         | 50/800 [05:55<1:23:08,  6.65s/it][A
Train Diffusion:   6%|▋         | 51/800 [06:01<1:23:00,  6.65s/it][A
Train Diffusion:   6%|▋         | 52/800 [06:08<1:22:03,  6.58s/it][A
Train Diffusion:   7%|▋         | 53/800 [06:14<1:22:17,  6.61s/it][A
Train

Moving average norm loss at 60 iterations is: 112779.0546875. Best norm loss value is: 83712.953125.

C_PATH mean = tensor([[26.6629,  0.4974,  1.7461],
        [27.0389,  0.4250,  1.6242],
        [27.1735,  0.4974,  1.6965],
        [26.3932,  0.4856,  1.7063],
        [25.5605,  0.4834,  1.7415],
        [26.8556,  0.4620,  1.7027],
        [26.3027,  0.4831,  1.7005],
        [26.4517,  0.4502,  1.6650],
        [26.1850,  0.4915,  1.6974],
        [26.5002,  0.4551,  1.6700]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[9.3023e+00, 7.0142e+00, 1.7357e+00],
         [3.1309e+01, 5.8571e-02, 1.5187e+00],
         [3.3020e+01, 1.9351e-01, 1.4948e+00],
         ...,
         [3.3108e+01, 5.6870e-01, 1.6273e+00],
         [2.9028e+01, 1.4007e-01, 1.4052e+00],
         [2.7867e+01, 2.8911e+00, 1.1363e+00]],

        [[3.0953e-02, 1.9256e+00, 3.3584e-03],
         [2.5586e-03, 1.4130e-04, 6.7050e-05],
         [1.0000e-06, 1.2954e-06, 1.7948e-03],
         ...,
         [2.9588e+01, 3.0


Train Diffusion:   8%|▊         | 60/800 [07:01<1:22:16,  6.67s/it][A
Train Diffusion:   8%|▊         | 61/800 [07:08<1:22:01,  6.66s/it][A
Train Diffusion:   8%|▊         | 62/800 [07:14<1:21:25,  6.62s/it][A
Train Diffusion:   8%|▊         | 63/800 [07:21<1:21:06,  6.60s/it][A
Train Diffusion:   8%|▊         | 64/800 [07:27<1:20:30,  6.56s/it][A
Train Diffusion:   8%|▊         | 65/800 [07:34<1:20:11,  6.55s/it][A
Train Diffusion:   8%|▊         | 66/800 [07:40<1:20:15,  6.56s/it][A
Train Diffusion:   8%|▊         | 67/800 [07:47<1:19:51,  6.54s/it][A
Train Diffusion:   8%|▊         | 68/800 [07:53<1:20:20,  6.59s/it][A
Train Diffusion:   9%|▊         | 69/800 [08:00<1:19:38,  6.54s/it][A
Train Diffusion:   9%|▉         | 70/800 [08:06<1:19:19,  6.52s/it][A
Train Diffusion:   9%|▉         | 71/800 [08:13<1:20:29,  6.62s/it][A
Train Diffusion:   9%|▉         | 72/800 [08:20<1:20:42,  6.65s/it][A
Train Diffusion:   9%|▉         | 73/800 [08:26<1:20:10,  6.62s/it][A
Train

Moving average norm loss at 80 iterations is: 60384.579296875. Best norm loss value is: 55450.25.

C_PATH mean = tensor([[27.7152,  0.4300,  1.7883],
        [27.3397,  0.4575,  1.8120],
        [27.9061,  0.4668,  1.8064],
        [27.7093,  0.4900,  1.8019],
        [28.5804,  0.4833,  1.8088],
        [27.1134,  0.4265,  1.7759],
        [28.3807,  0.4721,  1.7874],
        [27.3272,  0.4365,  1.7870],
        [27.5800,  0.4618,  1.8114],
        [27.4404,  0.4498,  1.8247]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[9.1116e+00, 4.5913e+00, 1.8370e+00],
         [3.1804e+01, 3.0469e-01, 1.4961e+00],
         [3.0762e+01, 5.2802e-01, 1.9793e+00],
         ...,
         [1.0001e-06, 1.0002e-03, 1.7351e+00],
         [1.0000e-06, 1.3169e-03, 2.0040e+00],
         [1.0002e-06, 6.6393e-04, 1.7280e+00]],

        [[7.2792e+00, 6.4669e+00, 2.0296e+00],
         [2.9179e+01, 2.4945e-01, 1.6481e+00],
         [3.0784e+01, 5.6078e-01, 1.9823e+00],
         ...,
         [3.0549e+01, 2.9646


Train Diffusion:  10%|█         | 80/800 [09:12<1:17:12,  6.43s/it][A
Train Diffusion:  10%|█         | 81/800 [09:18<1:16:12,  6.36s/it][A
Train Diffusion:  10%|█         | 82/800 [09:24<1:15:14,  6.29s/it][A
Train Diffusion:  10%|█         | 83/800 [09:30<1:14:49,  6.26s/it][A
Train Diffusion:  10%|█         | 84/800 [09:36<1:14:03,  6.21s/it][A
Train Diffusion:  11%|█         | 85/800 [09:42<1:13:09,  6.14s/it][A
Train Diffusion:  11%|█         | 86/800 [09:48<1:12:27,  6.09s/it][A
Train Diffusion:  11%|█         | 87/800 [09:54<1:11:52,  6.05s/it][A
Train Diffusion:  11%|█         | 88/800 [10:00<1:11:25,  6.02s/it][A
Train Diffusion:  11%|█         | 89/800 [10:06<1:11:19,  6.02s/it][A
Train Diffusion:  11%|█▏        | 90/800 [10:13<1:12:12,  6.10s/it][A
Train Diffusion:  11%|█▏        | 91/800 [10:19<1:11:56,  6.09s/it][A
Train Diffusion:  12%|█▏        | 92/800 [10:25<1:11:16,  6.04s/it][A
Train Diffusion:  12%|█▏        | 93/800 [10:30<1:10:53,  6.02s/it][A
Train

Moving average norm loss at 100 iterations is: 53113.19765625. Best norm loss value is: 49166.734375.

C_PATH mean = tensor([[27.7916,  0.5724,  1.9049],
        [27.3221,  0.5488,  1.8774],
        [27.2450,  0.5389,  1.8816],
        [27.4682,  0.5538,  1.8929],
        [27.1591,  0.5731,  1.8963],
        [27.7316,  0.5479,  1.8929],
        [27.6310,  0.5518,  1.8835],
        [27.9796,  0.5473,  1.8647],
        [27.4306,  0.5586,  1.8807],
        [26.4887,  0.5373,  1.9020]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[8.2108e+00, 5.3526e+00, 1.8465e+00],
         [3.0178e+01, 4.9161e-01, 1.7956e+00],
         [3.0600e+01, 4.6008e-01, 1.8360e+00],
         ...,
         [3.3586e-06, 1.9063e-02, 2.0241e+00],
         [1.0000e-06, 4.4344e-02, 1.8207e+00],
         [2.5078e+01, 6.5828e-01, 8.1428e+00]],

        [[9.4245e+00, 4.7714e+00, 1.8381e+00],
         [3.0254e+01, 6.5117e-01, 1.7601e+00],
         [3.0571e+01, 5.4985e-01, 1.8341e+00],
         ...,
         [3.0932e+01, 4.


Train Diffusion:  12%|█▎        | 100/800 [11:13<1:10:13,  6.02s/it][A
Train Diffusion:  13%|█▎        | 101/800 [11:19<1:10:14,  6.03s/it][A
Train Diffusion:  13%|█▎        | 102/800 [11:25<1:10:07,  6.03s/it][A
Train Diffusion:  13%|█▎        | 103/800 [11:31<1:10:03,  6.03s/it][A
Train Diffusion:  13%|█▎        | 104/800 [11:37<1:09:50,  6.02s/it][A
Train Diffusion:  13%|█▎        | 105/800 [11:43<1:10:16,  6.07s/it][A
Train Diffusion:  13%|█▎        | 106/800 [11:49<1:10:15,  6.07s/it][A
Train Diffusion:  13%|█▎        | 107/800 [11:55<1:10:21,  6.09s/it][A
Train Diffusion:  14%|█▎        | 108/800 [12:01<1:09:46,  6.05s/it][A
Train Diffusion:  14%|█▎        | 109/800 [12:07<1:09:25,  6.03s/it][A
Train Diffusion:  14%|█▍        | 110/800 [12:13<1:09:48,  6.07s/it][A
Train Diffusion:  14%|█▍        | 111/800 [12:19<1:09:30,  6.05s/it][A
Train Diffusion:  14%|█▍        | 112/800 [12:25<1:09:09,  6.03s/it][A
Train Diffusion:  14%|█▍        | 113/800 [12:31<1:09:00,  6.03

Moving average ELBO loss at 120 iterations is: 629706.715625. Best ELBO loss value is: 463547.21875.

C_PATH mean = tensor([[27.5298,  0.4484,  2.1868],
        [26.0221,  0.4517,  2.1819],
        [26.3510,  0.4624,  2.1902],
        [27.4738,  0.4753,  2.2157],
        [26.9607,  0.4414,  2.1914],
        [26.8007,  0.4547,  2.2011],
        [27.2558,  0.4605,  2.1747],
        [26.4432,  0.4555,  2.1873],
        [27.4474,  0.4629,  2.1854],
        [27.1161,  0.4460,  2.2280]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[8.7407e+00, 3.0371e+00, 2.9710e+00],
         [3.8698e+01, 6.5641e-01, 2.4711e+00],
         [3.3184e+01, 4.2837e-01, 2.4728e+00],
         ...,
         [2.9654e+01, 4.1770e-01, 2.1759e+00],
         [2.9685e+01, 4.7292e-01, 2.1283e+00],
         [3.0405e+01, 2.9802e-01, 2.1958e+00]],

        [[8.5521e+00, 3.2221e+00, 2.8026e+00],
         [3.7980e+01, 6.2933e-01, 2.4511e+00],
         [1.7230e+01, 1.2782e-02, 2.2845e-01],
         ...,
         [2.9981e+01, 4.


Train Diffusion:  15%|█▌        | 120/800 [13:13<1:08:03,  6.01s/it][A
Train Diffusion:  15%|█▌        | 121/800 [13:19<1:08:13,  6.03s/it][A
Train Diffusion:  15%|█▌        | 122/800 [13:25<1:07:49,  6.00s/it][A
Train Diffusion:  15%|█▌        | 123/800 [13:31<1:07:36,  5.99s/it][A
Train Diffusion:  16%|█▌        | 124/800 [13:37<1:07:45,  6.01s/it][A
Train Diffusion:  16%|█▌        | 125/800 [13:43<1:07:40,  6.02s/it][A
Train Diffusion:  16%|█▌        | 126/800 [13:50<1:07:50,  6.04s/it][A
Train Diffusion:  16%|█▌        | 127/800 [13:56<1:09:03,  6.16s/it][A
Train Diffusion:  16%|█▌        | 128/800 [14:02<1:08:23,  6.11s/it][A
Train Diffusion:  16%|█▌        | 129/800 [14:08<1:08:12,  6.10s/it][A
Train Diffusion:  16%|█▋        | 130/800 [14:14<1:08:03,  6.10s/it][A
Train Diffusion:  16%|█▋        | 131/800 [14:20<1:07:39,  6.07s/it][A
Train Diffusion:  16%|█▋        | 132/800 [14:26<1:07:15,  6.04s/it][A
Train Diffusion:  17%|█▋        | 133/800 [14:32<1:06:58,  6.03

Moving average ELBO loss at 140 iterations is: 270128.659375. Best ELBO loss value is: 241902.734375.

C_PATH mean = tensor([[24.5299,  0.2489,  2.3075],
        [23.9496,  0.2452,  2.2973],
        [24.1409,  0.2517,  2.3094],
        [25.2199,  0.2521,  2.3368],
        [24.2641,  0.2472,  2.3215],
        [23.5755,  0.2394,  2.2861],
        [24.4587,  0.2488,  2.3289],
        [24.2243,  0.2460,  2.3310],
        [24.6748,  0.2488,  2.3251],
        [24.7649,  0.2471,  2.3389]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[1.1710e+00, 3.0650e+00, 3.4671e+00],
         [7.3801e+00, 8.1306e-01, 2.0539e+00],
         [2.6887e+01, 1.8842e-01, 2.3067e+00],
         ...,
         [2.6496e+01, 2.1454e-01, 2.2687e+00],
         [2.8422e+01, 2.7133e-01, 2.4433e+00],
         [2.8116e+01, 2.0173e-01, 2.3655e+00]],

        [[5.6880e+00, 4.7405e-01, 2.9334e+00],
         [3.5607e+01, 3.5747e-01, 2.4624e+00],
         [2.7010e+01, 1.9226e-01, 2.3182e+00],
         ...,
         [9.9107e-03, 5


Train Diffusion:  18%|█▊        | 140/800 [15:14<1:06:31,  6.05s/it][A
Train Diffusion:  18%|█▊        | 141/800 [15:20<1:06:11,  6.03s/it][A
Train Diffusion:  18%|█▊        | 142/800 [15:26<1:05:48,  6.00s/it][A
Train Diffusion:  18%|█▊        | 143/800 [15:32<1:05:33,  5.99s/it][A
Train Diffusion:  18%|█▊        | 144/800 [15:38<1:05:14,  5.97s/it][A
Train Diffusion:  18%|█▊        | 145/800 [15:44<1:05:01,  5.96s/it][A
Train Diffusion:  18%|█▊        | 146/800 [15:50<1:05:15,  5.99s/it][A
Train Diffusion:  18%|█▊        | 147/800 [15:56<1:05:02,  5.98s/it][A
Train Diffusion:  18%|█▊        | 148/800 [16:02<1:04:48,  5.96s/it][A
Train Diffusion:  19%|█▊        | 149/800 [16:08<1:04:34,  5.95s/it][A
Train Diffusion:  19%|█▉        | 150/800 [16:14<1:04:34,  5.96s/it][A
Train Diffusion:  19%|█▉        | 151/800 [16:20<1:04:27,  5.96s/it][A
Train Diffusion:  19%|█▉        | 152/800 [16:26<1:04:14,  5.95s/it][A
Train Diffusion:  19%|█▉        | 153/800 [16:32<1:04:13,  5.96

Moving average ELBO loss at 160 iterations is: 140204.9265625. Best ELBO loss value is: 116380.984375.

C_PATH mean = tensor([[18.5472,  0.1842,  2.3894],
        [18.6359,  0.1825,  2.4060],
        [19.2672,  0.1819,  2.4246],
        [19.2160,  0.1849,  2.4185],
        [19.1947,  0.1800,  2.4164],
        [19.1978,  0.1851,  2.4209],
        [19.3816,  0.1813,  2.4213],
        [18.7672,  0.1833,  2.4122],
        [18.9966,  0.1827,  2.4033],
        [19.3791,  0.1852,  2.4247]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[4.1849e+00, 1.4041e-01, 2.6160e+00],
         [2.6791e+01, 2.6273e-01, 2.6035e+00],
         [2.0696e+01, 1.5080e-01, 2.4232e+00],
         ...,
         [2.1428e+01, 1.9209e-01, 2.4758e+00],
         [2.2681e+01, 2.2631e-01, 2.5997e+00],
         [2.1418e+01, 1.3100e-01, 2.3099e+00]],

        [[4.0933e+00, 1.5239e-01, 2.5950e+00],
         [2.6190e+01, 2.3775e-01, 2.5713e+00],
         [2.0670e+01, 1.5048e-01, 2.4230e+00],
         ...,
         [2.1248e+01, 


Train Diffusion:  20%|██        | 160/800 [17:13<1:04:00,  6.00s/it][A
Train Diffusion:  20%|██        | 161/800 [17:19<1:04:01,  6.01s/it][A
Train Diffusion:  20%|██        | 162/800 [17:25<1:03:50,  6.00s/it][A
Train Diffusion:  20%|██        | 163/800 [17:31<1:03:49,  6.01s/it][A
Train Diffusion:  20%|██        | 164/800 [17:37<1:03:21,  5.98s/it][A
Train Diffusion:  21%|██        | 165/800 [17:43<1:02:59,  5.95s/it][A
Train Diffusion:  21%|██        | 166/800 [17:49<1:03:15,  5.99s/it][A
Train Diffusion:  21%|██        | 167/800 [17:55<1:03:07,  5.98s/it][A
Train Diffusion:  21%|██        | 168/800 [18:01<1:03:08,  5.99s/it][A
Train Diffusion:  21%|██        | 169/800 [18:07<1:03:05,  6.00s/it][A
Train Diffusion:  21%|██▏       | 170/800 [18:13<1:03:03,  6.01s/it][A
Train Diffusion:  21%|██▏       | 171/800 [18:19<1:03:00,  6.01s/it][A
Train Diffusion:  22%|██▏       | 172/800 [18:25<1:02:50,  6.00s/it][A
Train Diffusion:  22%|██▏       | 173/800 [18:31<1:02:47,  6.01

Moving average ELBO loss at 180 iterations is: 81002.8859375. Best ELBO loss value is: 74430.09375.

C_PATH mean = tensor([[16.7081,  0.1668,  2.2086],
        [16.9111,  0.1667,  2.2110],
        [16.7083,  0.1663,  2.2077],
        [16.9387,  0.1662,  2.2213],
        [17.1933,  0.1708,  2.2282],
        [16.7696,  0.1668,  2.2244],
        [17.1354,  0.1721,  2.2229],
        [17.1324,  0.1697,  2.2423],
        [17.1236,  0.1717,  2.2384],
        [16.9303,  0.1685,  2.2254]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[2.6748e+00, 1.3447e-01, 2.2530e+00],
         [2.4085e+01, 1.4693e-01, 2.2346e+00],
         [1.7645e+01, 1.2683e-01, 2.3243e+00],
         ...,
         [1.7689e+01, 1.8475e-01, 2.2573e+00],
         [1.9431e+01, 1.8683e-01, 2.3324e+00],
         [1.8451e+01, 1.2198e-01, 2.1999e+00]],

        [[2.7167e+00, 1.2127e-01, 2.3118e+00],
         [2.5130e+01, 1.5716e-01, 2.2645e+00],
         [1.7629e+01, 1.2668e-01, 2.3202e+00],
         ...,
         [1.7165e+01, 2.7


Train Diffusion:  22%|██▎       | 180/800 [19:14<1:02:13,  6.02s/it][A
Train Diffusion:  23%|██▎       | 181/800 [19:20<1:02:19,  6.04s/it][A
Train Diffusion:  23%|██▎       | 182/800 [19:26<1:02:16,  6.05s/it][A
Train Diffusion:  23%|██▎       | 183/800 [19:32<1:02:48,  6.11s/it][A
Train Diffusion:  23%|██▎       | 184/800 [19:38<1:02:37,  6.10s/it][A
Train Diffusion:  23%|██▎       | 185/800 [19:44<1:01:54,  6.04s/it][A
Train Diffusion:  23%|██▎       | 186/800 [19:50<1:01:51,  6.05s/it][A
Train Diffusion:  23%|██▎       | 187/800 [19:57<1:03:57,  6.26s/it][A
Train Diffusion:  24%|██▎       | 188/800 [20:04<1:06:24,  6.51s/it][A
Train Diffusion:  24%|██▎       | 189/800 [20:10<1:06:04,  6.49s/it][A
Train Diffusion:  24%|██▍       | 190/800 [20:17<1:07:56,  6.68s/it][A
Train Diffusion:  24%|██▍       | 191/800 [20:24<1:07:27,  6.65s/it][A
Train Diffusion:  24%|██▍       | 192/800 [20:31<1:09:11,  6.83s/it][A
Train Diffusion:  24%|██▍       | 193/800 [20:40<1:13:38,  7.28

In [None]:
torch.save(net, f'net_t_{t}_dt_{dt_flow}.pt')