In [1]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from tqdm import tqdm
import math

#Torch-related imports
import torch
import torch.distributions as D
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Function

#Model-specific imports
from SBM_SDE import *
from obs_and_flow import *
from training import calc_log_lik

In [2]:
torch.manual_seed(0)
devi = torch.device("".join(["cuda:",f'{cuda_id}']) if torch.cuda.is_available() else "cpu")

dt_flow = 0.1 #SDE discretization timestep.
t = 250 #Simulation run for T hours.
n = int(t / dt_flow) + 1
t_span = np.linspace(0, t, n)
t_span_tensor = torch.reshape(torch.Tensor(t_span), [1, n, 1]) #T_span needs to be converted to tensor object. Additionally, facilitates conversion of I_S and I_D to tensor objects.
niter = 8000
piter = 100
state_dim_SCON = 3 #Not including CO2 in STATE_DIM, because CO2 is an observation.
state_dim_SAWB = 4 #Not including CO2 in STATE_DIM, because CO2 is an observation.
pretrain_lr = 1e-2
train_lr = 1e-3
batch_size = 1 #Number of sets of observation outputs to sample per set of parameters.

In [3]:
temp_ref = 283
temp_rise = 5 #High estimate of 5 celsius temperature rise by 2100. 

#System parameters from deterministic CON model
u_M = 0.002
a_SD = 0.33
a_DS = 0.33
a_M = 0.33
a_MSC = 0.5
k_S_ref = 0.000025
k_D_ref = 0.005
k_M_ref = 0.0002
Ea_S = 75
Ea_D = 50
Ea_M = 50

#SCON diffusion matrix parameters
c_SOC = 1.
c_DOC = 0.01
c_MBC = 0.05

SCON_C_params_dict = {'u_M': u_M, 'a_SD': a_SD, 'a_DS': a_DS, 'a_M': a_M, 'a_MSC': a_MSC, 'k_S_ref': k_S_ref, 'k_D_ref': k_D_ref, 'k_M_ref': k_M_ref, 'Ea_S': Ea_S, 'Ea_D': Ea_D, 'Ea_M': Ea_M, 'c_SOC': c_SOC, 'c_DOC': c_DOC, 'c_MBC': c_MBC}

In [4]:
obs_error_scale = 0.1

x0_SCON = [37, 0.1, 0.9]
x0_SCON_tensor = torch.tensor(x0_SCON)
x0_prior_SCON = d.multivariate_normal.MultivariateNormal(x0_SCON_tensor,
                                                         scale_tril=torch.eye(state_dim_SCON) * obs_error_scale * x0_SCON_tensor)

In [5]:
obs_times, obs_means_CON, obs_error_CON = csv_to_obs_df('y_from_x_t_300_dt_0-01.csv', state_dim_SCON, t, obs_error_scale)

In [6]:
#Obtain temperature forcing function.
temp_tensor = temp_gen(t_span_tensor, temp_ref, temp_rise)

#Obtain SOC and DOC pool litter input vectors for use in flow SDE functions.
i_s_tensor = i_s(t_span_tensor) #Exogenous SOC input function
i_d_tensor = i_d(t_span_tensor) #Exogenous DOC input function

In [7]:
obs_model_CON_noCO2 = ObsModel(DEVICE = devi, TIMES = obs_times, DT = dt_flow, MU = obs_means_CON, SCALE = obs_error_CON)

In [8]:
def calc_log_lik(C_PATH, T_SPAN_TENSOR, DT, I_S_TENSOR, I_D_TENSOR, TEMP_TENSOR, TEMP_REF, DRIFT_DIFFUSION, X0_PRIOR, PARAMS_DICT):
    drift, diffusion_sqrt = DRIFT_DIFFUSION(C_PATH[:, :-1, :], T_SPAN_TENSOR[:, :-1, :], I_S_TENSOR[:, :-1, :], I_D_TENSOR[:, :-1, :], TEMP_TENSOR[:, :-1, :], TEMP_REF, PARAMS_DICT)
    euler_maruyama_state_sample_object = D.multivariate_normal.MultivariateNormal(loc = C_PATH[:, :-1, :] + drift * DT, scale_tril = diffusion_sqrt * math.sqrt(DT))
    
    # Compute log p(x|theta) = log p(x|x0, theta) + log p(x0|theta)
    ll = euler_maruyama_state_sample_object.log_prob(C_PATH[:, 1:, :]).sum(-1) # log p(x|x0, theta)
    ll += X0_PRIOR.log_prob(C_PATH[:, 0, :]) # log p(x0|theta)
    
    return ll # (batch_size, )

In [9]:
def train(DEVICE, PRETRAIN_LR, TRAIN_LR, NITER, PRETRAIN_ITER, BATCH_SIZE, OBS_MODEL,
          STATE_DIM, T, DT, N, T_SPAN_TENSOR, I_S_TENSOR, I_D_TENSOR, TEMP_TENSOR, TEMP_REF,
          DRIFT_DIFFUSION, X0_PRIOR, PARAMS_DICT,
          LEARN_PARAMS = False, LR_DECAY = 0.1, DECAY_STEP_SIZE = 1000, PRINT_EVERY = 500):
    net = SDEFlow(DEVICE, OBS_MODEL, STATE_DIM, T, DT, N).to(DEVICE)
    optimizer = optim.Adam(net.parameters(), lr = PRETRAIN_LR)
    
    if LEARN_PARAMS:
        theta_post = MeanField(PARAMS_DICT)
        theta_prior = D.normal.Normal(torch.zeros_like(theta_post.means),
                                      torch.ones_like(theta_post.std))
    if PRETRAIN_ITER >= NITER:
        raise Exception("PRETRAIN_ITER must be < NITER.")
    best_loss_norm = 1e10
    best_loss_ELBO = 1e20
    norm_losses = [] #[best_loss_norm] * 10 
    ELBO_losses = [] #[best_loss_ELBO] * 10
    #C0 = ANALYTICAL_STEADY_STATE_INIT(I_S_TENSOR[0, 0, 0].item(), I_D_TENSOR[0, 0, 0].item(), PARAMS_DICT) #Calculate deterministic initial conditions.
    #C0 = C0[(None,) * 2].repeat(BATCH_SIZE, 1, 1).to(DEVICE) #Assign initial conditions to C_PATH.
    
    with tqdm(total = NITER, desc = f'Train Diffusion', position = -1) as tq:
        for it in range(NITER):
            net.train()
            optimizer.zero_grad()
            C_PATH, log_prob = net(BATCH_SIZE) #Obtain paths with solutions at times after t0.
            #C_PATH = torch.cat([C0, C_PATH], 1) #Append deterministic CON initial conditions conditional on parameter values to C path. 
            
            if it < PRETRAIN_ITER:
                l1_norm_element = C_PATH - torch.mean(OBS_MODEL.mu[:3], -1)
                l1_norm = torch.sum(torch.abs(l1_norm_element)).mean()
                best_loss_norm = l1_norm if l1_norm < best_loss_norm else best_loss_norm
                norm_losses.append(l1_norm.item())
                #l2_norm_element = C_PATH - torch.mean(OBS_MODEL.mu, -1)
                #l2_norm = torch.sqrt(torch.sum(torch.square(l2_norm_element))).mean()
                #best_loss_norm = l2_norm if l2_norm < best_loss_norm else best_loss_norm
                #norm_losses.append(l2_norm.item())
                
                if (it + 1) % PRINT_EVERY == 0:
                    print(f"Moving average norm loss at {it + 1} iterations is: {sum(norm_losses[-10:]) / len(norm_losses[-10:])}. Best norm loss value is: {best_loss_norm}.")
                    print('\nC_PATH mean =', C_PATH.mean(-2))
                    print('\nC_PATH =', C_PATH)
                l1_norm.backward()
                #l2_norm.backward()
                
            else:
                if LEARN_PARAMS:
                    theta_dict, theta, log_q_theta = theta_post()
                    log_p_theta = theta_prior.log_prob(theta).sum(-1)
                else:
                    theta_dict = PARAMS_DICT
                    log_q_theta, log_p_theta = torch.zeros(2)
                log_lik = calc_log_lik(C_PATH, T_SPAN_TENSOR.to(DEVICE), DT, I_S_TENSOR.to(DEVICE), I_D_TENSOR.to(DEVICE),
                                       TEMP_TENSOR, TEMP_REF, DRIFT_DIFFUSION, X0_PRIOR, theta_dict)
                
                # - log p(theta) + log q(theta) + log q(x|theta) - log p(x|theta) - log p(y|x, theta)
                ELBO = -log_p_theta.mean() + log_q_theta.mean() - log_lik.mean() - OBS_MODEL(C_PATH, theta_dict) + log_prob.mean()
                best_loss_ELBO = ELBO if ELBO < best_loss_ELBO else best_loss_ELBO
                ELBO_losses.append(ELBO.item())

                if (it + 1) % PRINT_EVERY == 0:
                    print(f"Moving average ELBO loss at {it + 1} iterations is: {sum(ELBO_losses[-10:]) / len(ELBO_losses[-10:])}. Best ELBO loss value is: {best_loss_ELBO}.")
                    print('\nC_PATH mean =', C_PATH.mean(-2))
                    print('\n C_PATH =', C_PATH)
                    print(theta_dict)
                ELBO.backward()
                
            torch.nn.utils.clip_grad_norm_(net.parameters(), 3.0)
            if it == PRETRAIN_ITER:
                optimizer.param_groups[0]['lr'] = TRAIN_LR
            elif it % DECAY_STEP_SIZE == 0 and it > PRETRAIN_ITER:
                optimizer.param_groups[0]['lr'] *= LR_DECAY
            optimizer.step()
            tq.update()
            
    return net, ELBO_losses

In [None]:
net, elbo_hist = train(devi, pretrain_lr, train_lr, niter, piter, batch_size, obs_model_CON_noCO2,
                       state_dim_SCON, t, dt_flow, n, t_span_tensor, i_s_tensor, i_d_tensor, temp_tensor, temp_ref,
                       drift_diffusion_SCON_C, x0_prior_SCON, SCON_C_params_dict,
                       LR_DECAY = 0.1, DECAY_STEP_SIZE = 5000, PRINT_EVERY = 20)


Train Diffusion:   0%|          | 0/8000 [00:00<?, ?it/s][A
Train Diffusion:   0%|          | 1/8000 [00:01<2:31:40,  1.14s/it][A
Train Diffusion:   0%|          | 2/8000 [00:02<2:25:49,  1.09s/it][A
Train Diffusion:   0%|          | 3/8000 [00:03<2:22:43,  1.07s/it][A
Train Diffusion:   0%|          | 4/8000 [00:04<2:25:53,  1.09s/it][A
Train Diffusion:   0%|          | 5/8000 [00:05<2:41:35,  1.21s/it][A
Train Diffusion:   0%|          | 6/8000 [00:06<2:35:19,  1.17s/it][A
Train Diffusion:   0%|          | 7/8000 [00:08<2:34:04,  1.16s/it][A
Train Diffusion:   0%|          | 8/8000 [00:09<2:33:42,  1.15s/it][A
Train Diffusion:   0%|          | 9/8000 [00:10<2:30:00,  1.13s/it][A
Train Diffusion:   0%|          | 10/8000 [00:11<2:27:00,  1.10s/it][A
Train Diffusion:   0%|          | 11/8000 [00:12<2:24:45,  1.09s/it][A
Train Diffusion:   0%|          | 12/8000 [00:13<2:29:52,  1.13s/it][A
Train Diffusion:   0%|          | 13/8000 [00:14<2:30:38,  1.13s/it][A
Train Diffu

Moving average norm loss at 20 iterations is: 49011.94921875. Best norm loss value is: 45119.0234375.

C_PATH mean = tensor([[4.4808, 1.3687, 2.1037]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.3981, 4.1090, 0.3239],
         [3.8618, 0.4961, 1.5649],
         [2.9771, 1.5918, 0.7515],
         ...,
         [5.1008, 1.0732, 1.1233],
         [1.5925, 2.9877, 1.9352],
         [1.1853, 2.3732, 1.3526]]], grad_fn=<AddBackward0>)



Train Diffusion:   0%|          | 20/8000 [00:22<2:25:30,  1.09s/it][A
Train Diffusion:   0%|          | 21/8000 [00:23<2:31:43,  1.14s/it][A
Train Diffusion:   0%|          | 22/8000 [00:25<2:41:18,  1.21s/it][A
Train Diffusion:   0%|          | 23/8000 [00:26<2:43:48,  1.23s/it][A
Train Diffusion:   0%|          | 24/8000 [00:27<2:41:33,  1.22s/it][A
Train Diffusion:   0%|          | 25/8000 [00:28<2:44:46,  1.24s/it][A
Train Diffusion:   0%|          | 26/8000 [00:29<2:39:26,  1.20s/it][A
Train Diffusion:   0%|          | 27/8000 [00:31<2:38:19,  1.19s/it][A
Train Diffusion:   0%|          | 28/8000 [00:32<2:35:37,  1.17s/it][A
Train Diffusion:   0%|          | 29/8000 [00:33<2:32:12,  1.15s/it][A
Train Diffusion:   0%|          | 30/8000 [00:34<2:39:54,  1.20s/it][A
Train Diffusion:   0%|          | 31/8000 [00:35<2:42:34,  1.22s/it][A
Train Diffusion:   0%|          | 32/8000 [00:37<3:07:51,  1.41s/it][A
Train Diffusion:   0%|          | 33/8000 [00:39<3:10:15,  1.43

Moving average norm loss at 40 iterations is: 14973.9849609375. Best norm loss value is: 9918.8740234375.

C_PATH mean = tensor([[18.9746,  0.6465,  2.0405]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 2.6620,  9.9653,  0.6745],
         [11.4148,  0.9933,  1.4403],
         [21.9196,  0.1011,  1.7034],
         ...,
         [22.3297,  0.2733,  1.5869],
         [17.6081,  1.3976,  2.9929],
         [11.8135,  5.5601,  1.0412]]], grad_fn=<AddBackward0>)



Train Diffusion:   0%|          | 40/8000 [00:47<2:33:06,  1.15s/it][A
Train Diffusion:   1%|          | 41/8000 [00:48<2:33:04,  1.15s/it][A
Train Diffusion:   1%|          | 42/8000 [00:49<2:32:28,  1.15s/it][A
Train Diffusion:   1%|          | 43/8000 [00:50<2:28:35,  1.12s/it][A
Train Diffusion:   1%|          | 44/8000 [00:51<2:28:02,  1.12s/it][A
Train Diffusion:   1%|          | 45/8000 [00:52<2:29:08,  1.12s/it][A
Train Diffusion:   1%|          | 46/8000 [00:53<2:29:24,  1.13s/it][A
Train Diffusion:   1%|          | 47/8000 [00:54<2:26:27,  1.10s/it][A
Train Diffusion:   1%|          | 48/8000 [00:55<2:24:25,  1.09s/it][A
Train Diffusion:   1%|          | 49/8000 [00:57<2:23:01,  1.08s/it][A
Train Diffusion:   1%|          | 50/8000 [00:58<2:24:31,  1.09s/it][A
Train Diffusion:   1%|          | 51/8000 [00:59<2:31:27,  1.14s/it][A
Train Diffusion:   1%|          | 52/8000 [01:00<2:31:04,  1.14s/it][A
Train Diffusion:   1%|          | 53/8000 [01:01<2:28:40,  1.12

Moving average norm loss at 60 iterations is: 3946.1662353515626. Best norm loss value is: 3230.319091796875.

C_PATH mean = tensor([[21.1409,  0.5906,  1.8101]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 3.5963, 11.8193,  1.0675],
         [14.0682,  0.8328,  1.8610],
         [20.5713,  0.8217,  1.2406],
         ...,
         [20.9911,  0.5465,  1.6908],
         [21.2431,  0.5171,  1.6945],
         [21.7923,  0.7132,  1.3487]]], grad_fn=<AddBackward0>)



Train Diffusion:   1%|          | 60/8000 [01:10<2:50:33,  1.29s/it][A
Train Diffusion:   1%|          | 61/8000 [01:11<2:43:29,  1.24s/it][A
Train Diffusion:   1%|          | 62/8000 [01:12<2:36:40,  1.18s/it][A
Train Diffusion:   1%|          | 63/8000 [01:13<2:29:49,  1.13s/it][A
Train Diffusion:   1%|          | 64/8000 [01:14<2:24:59,  1.10s/it][A
Train Diffusion:   1%|          | 65/8000 [01:16<2:26:46,  1.11s/it][A
Train Diffusion:   1%|          | 66/8000 [01:17<2:28:23,  1.12s/it][A
Train Diffusion:   1%|          | 67/8000 [01:18<2:38:56,  1.20s/it][A
Train Diffusion:   1%|          | 68/8000 [01:19<2:36:44,  1.19s/it][A
Train Diffusion:   1%|          | 69/8000 [01:20<2:32:53,  1.16s/it][A
Train Diffusion:   1%|          | 70/8000 [01:22<2:40:10,  1.21s/it][A
Train Diffusion:   1%|          | 71/8000 [01:23<2:35:49,  1.18s/it][A
Train Diffusion:   1%|          | 72/8000 [01:24<2:29:07,  1.13s/it][A
Train Diffusion:   1%|          | 73/8000 [01:25<2:29:10,  1.13

Moving average norm loss at 80 iterations is: 1835.7744262695312. Best norm loss value is: 1193.9710693359375.

C_PATH mean = tensor([[20.3807,  0.5480,  1.7305]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 4.1897, 11.7901,  1.4299],
         [17.2191,  0.5294,  1.7292],
         [20.5405,  0.4961,  1.5212],
         ...,
         [20.3800,  0.5419,  1.7408],
         [20.4736,  0.5160,  1.6886],
         [20.6317,  0.6313,  1.5876]]], grad_fn=<AddBackward0>)



Train Diffusion:   1%|          | 80/8000 [01:33<2:25:14,  1.10s/it][A
Train Diffusion:   1%|          | 81/8000 [01:34<2:23:07,  1.08s/it][A
Train Diffusion:   1%|          | 82/8000 [01:35<2:20:00,  1.06s/it][A
Train Diffusion:   1%|          | 83/8000 [01:36<2:17:48,  1.04s/it][A
Train Diffusion:   1%|          | 84/8000 [01:37<2:16:18,  1.03s/it][A
Train Diffusion:   1%|          | 85/8000 [01:38<2:18:35,  1.05s/it][A
Train Diffusion:   1%|          | 86/8000 [01:39<2:18:19,  1.05s/it][A
Train Diffusion:   1%|          | 87/8000 [01:40<2:25:19,  1.10s/it][A
Train Diffusion:   1%|          | 88/8000 [01:41<2:25:36,  1.10s/it][A
Train Diffusion:   1%|          | 89/8000 [01:42<2:27:24,  1.12s/it][A
Train Diffusion:   1%|          | 90/8000 [01:44<2:28:53,  1.13s/it][A
Train Diffusion:   1%|          | 91/8000 [01:45<2:24:48,  1.10s/it][A
Train Diffusion:   1%|          | 92/8000 [01:46<2:22:13,  1.08s/it][A
Train Diffusion:   1%|          | 93/8000 [01:47<2:26:15,  1.11

Moving average norm loss at 100 iterations is: 2075.5840087890624. Best norm loss value is: 1069.9169921875.

C_PATH mean = tensor([[20.0016,  0.5613,  1.8952]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[ 3.8408, 11.7298,  1.6883],
         [19.1649,  0.4355,  1.7778],
         [20.2503,  0.5672,  1.7921],
         ...,
         [20.0711,  0.5535,  1.9456],
         [20.2236,  0.5403,  1.8836],
         [20.1534,  0.6125,  1.8128]]], grad_fn=<AddBackward0>)



Train Diffusion:   1%|▏         | 100/8000 [01:54<2:12:10,  1.00s/it][A
Train Diffusion:   1%|▏         | 101/8000 [01:55<2:11:44,  1.00s/it][A
Train Diffusion:   1%|▏         | 102/8000 [01:56<2:10:54,  1.01it/s][A
Train Diffusion:   1%|▏         | 103/8000 [01:57<2:12:09,  1.00s/it][A
Train Diffusion:   1%|▏         | 104/8000 [01:58<2:11:15,  1.00it/s][A
Train Diffusion:   1%|▏         | 105/8000 [01:59<2:09:58,  1.01it/s][A
Train Diffusion:   1%|▏         | 106/8000 [02:00<2:09:28,  1.02it/s][A
Train Diffusion:   1%|▏         | 107/8000 [02:01<2:09:30,  1.02it/s][A
Train Diffusion:   1%|▏         | 108/8000 [02:02<2:09:20,  1.02it/s][A
Train Diffusion:   1%|▏         | 109/8000 [02:03<2:08:49,  1.02it/s][A
Train Diffusion:   1%|▏         | 110/8000 [02:04<2:10:11,  1.01it/s][A
Train Diffusion:   1%|▏         | 111/8000 [02:05<2:10:14,  1.01it/s][A
Train Diffusion:   1%|▏         | 112/8000 [02:06<2:10:16,  1.01it/s][A
Train Diffusion:   1%|▏         | 113/8000 [02:07<

Moving average ELBO loss at 120 iterations is: 454221.9578125. Best ELBO loss value is: 172387.578125.

C_PATH mean = tensor([[20.9111,  0.3895,  2.1484]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[ 5.2325,  7.8944,  2.0459],
         [21.0958,  0.2920,  1.9927],
         [21.5323,  0.2631,  2.0321],
         ...,
         [20.9963,  0.3733,  2.2515],
         [21.0129,  0.3822,  2.0798],
         [21.2619,  0.5008,  1.9140]]], grad_fn=<AddBackward0>)
{'u_M': 0.002, 'a_SD': 0.33, 'a_DS': 0.33, 'a_M': 0.33, 'a_MSC': 0.5, 'k_S_ref': 2.5e-05, 'k_D_ref': 0.005, 'k_M_ref': 0.0002, 'Ea_S': 75, 'Ea_D': 50, 'Ea_M': 50, 'c_SOC': 1.0, 'c_DOC': 0.01, 'c_MBC': 0.05}



Train Diffusion:   2%|▏         | 120/8000 [02:14<2:17:06,  1.04s/it][A
Train Diffusion:   2%|▏         | 121/8000 [02:15<2:18:18,  1.05s/it][A
Train Diffusion:   2%|▏         | 122/8000 [02:16<2:16:10,  1.04s/it][A
Train Diffusion:   2%|▏         | 123/8000 [02:17<2:14:50,  1.03s/it][A
Train Diffusion:   2%|▏         | 124/8000 [02:18<2:13:28,  1.02s/it][A
Train Diffusion:   2%|▏         | 125/8000 [02:19<2:11:27,  1.00s/it][A
Train Diffusion:   2%|▏         | 126/8000 [02:20<2:11:44,  1.00s/it][A
Train Diffusion:   2%|▏         | 127/8000 [02:21<2:11:04,  1.00it/s][A
Train Diffusion:   2%|▏         | 128/8000 [02:22<2:18:19,  1.05s/it][A
Train Diffusion:   2%|▏         | 129/8000 [02:23<2:19:28,  1.06s/it][A
Train Diffusion:   2%|▏         | 130/8000 [02:24<2:15:46,  1.04s/it][A
Train Diffusion:   2%|▏         | 131/8000 [02:25<2:13:26,  1.02s/it][A
Train Diffusion:   2%|▏         | 132/8000 [02:26<2:12:28,  1.01s/it][A
Train Diffusion:   2%|▏         | 133/8000 [02:27<

Moving average ELBO loss at 140 iterations is: -200211.7515625. Best ELBO loss value is: -213660.375.

C_PATH mean = tensor([[20.4125,  0.1162,  2.0933]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[ 5.5716,  0.0255,  1.7493],
         [20.0530,  0.0434,  1.7633],
         [20.6655,  0.0348,  1.8028],
         ...,
         [20.7114,  0.1331,  2.2624],
         [20.7510,  0.1265,  2.1440],
         [21.4086,  0.1508,  1.8165]]], grad_fn=<AddBackward0>)
{'u_M': 0.002, 'a_SD': 0.33, 'a_DS': 0.33, 'a_M': 0.33, 'a_MSC': 0.5, 'k_S_ref': 2.5e-05, 'k_D_ref': 0.005, 'k_M_ref': 0.0002, 'Ea_S': 75, 'Ea_D': 50, 'Ea_M': 50, 'c_SOC': 1.0, 'c_DOC': 0.01, 'c_MBC': 0.05}



Train Diffusion:   2%|▏         | 140/8000 [02:34<2:07:51,  1.02it/s][A
Train Diffusion:   2%|▏         | 141/8000 [02:35<2:07:21,  1.03it/s][A
Train Diffusion:   2%|▏         | 142/8000 [02:36<2:13:06,  1.02s/it][A
Train Diffusion:   2%|▏         | 143/8000 [02:37<2:12:36,  1.01s/it][A
Train Diffusion:   2%|▏         | 144/8000 [02:38<2:11:08,  1.00s/it][A
Train Diffusion:   2%|▏         | 145/8000 [02:39<2:10:27,  1.00it/s][A
Train Diffusion:   2%|▏         | 146/8000 [02:40<2:09:26,  1.01it/s][A
Train Diffusion:   2%|▏         | 147/8000 [02:41<2:16:11,  1.04s/it][A
Train Diffusion:   2%|▏         | 148/8000 [02:43<2:23:52,  1.10s/it][A
Train Diffusion:   2%|▏         | 149/8000 [02:44<2:21:17,  1.08s/it][A
Train Diffusion:   2%|▏         | 150/8000 [02:45<2:16:46,  1.05s/it][A
Train Diffusion:   2%|▏         | 151/8000 [02:46<2:13:46,  1.02s/it][A
Train Diffusion:   2%|▏         | 152/8000 [02:47<2:11:37,  1.01s/it][A
Train Diffusion:   2%|▏         | 153/8000 [02:48<

Moving average ELBO loss at 160 iterations is: -243183.8609375. Best ELBO loss value is: -250105.375.

C_PATH mean = tensor([[19.3784,  0.0974,  1.7276]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[ 5.0967,  0.1521,  1.2995],
         [18.5697,  0.0573,  1.4615],
         [19.1898,  0.0419,  1.4835],
         ...,
         [19.8165,  0.0801,  1.8587],
         [19.8587,  0.0927,  1.7571],
         [19.9740,  0.1709,  1.5192]]], grad_fn=<AddBackward0>)
{'u_M': 0.002, 'a_SD': 0.33, 'a_DS': 0.33, 'a_M': 0.33, 'a_MSC': 0.5, 'k_S_ref': 2.5e-05, 'k_D_ref': 0.005, 'k_M_ref': 0.0002, 'Ea_S': 75, 'Ea_D': 50, 'Ea_M': 50, 'c_SOC': 1.0, 'c_DOC': 0.01, 'c_MBC': 0.05}



Train Diffusion:   2%|▏         | 160/8000 [02:54<2:08:31,  1.02it/s][A
Train Diffusion:   2%|▏         | 161/8000 [02:55<2:07:42,  1.02it/s][A
Train Diffusion:   2%|▏         | 162/8000 [02:56<2:07:10,  1.03it/s][A
Train Diffusion:   2%|▏         | 163/8000 [02:57<2:07:26,  1.02it/s][A
Train Diffusion:   2%|▏         | 164/8000 [02:58<2:07:25,  1.02it/s][A
Train Diffusion:   2%|▏         | 165/8000 [02:59<2:06:38,  1.03it/s][A
Train Diffusion:   2%|▏         | 166/8000 [03:00<2:06:59,  1.03it/s][A
Train Diffusion:   2%|▏         | 167/8000 [03:01<2:06:35,  1.03it/s][A
Train Diffusion:   2%|▏         | 168/8000 [03:02<2:07:37,  1.02it/s][A
Train Diffusion:   2%|▏         | 169/8000 [03:03<2:07:12,  1.03it/s][A
Train Diffusion:   2%|▏         | 170/8000 [03:04<2:06:51,  1.03it/s][A
Train Diffusion:   2%|▏         | 171/8000 [03:05<2:06:24,  1.03it/s][A
Train Diffusion:   2%|▏         | 172/8000 [03:06<2:05:51,  1.04it/s][A
Train Diffusion:   2%|▏         | 173/8000 [03:07<

Moving average ELBO loss at 180 iterations is: -270491.696875. Best ELBO loss value is: -275090.28125.

C_PATH mean = tensor([[18.7653,  0.1554,  1.7719]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[ 6.9226,  0.1039,  1.1246],
         [16.4865,  0.1057,  1.4478],
         [17.9651,  0.0739,  1.4381],
         ...,
         [19.3044,  0.1445,  1.9182],
         [19.3757,  0.1626,  1.8020],
         [19.3358,  0.2618,  1.5967]]], grad_fn=<AddBackward0>)
{'u_M': 0.002, 'a_SD': 0.33, 'a_DS': 0.33, 'a_M': 0.33, 'a_MSC': 0.5, 'k_S_ref': 2.5e-05, 'k_D_ref': 0.005, 'k_M_ref': 0.0002, 'Ea_S': 75, 'Ea_D': 50, 'Ea_M': 50, 'c_SOC': 1.0, 'c_DOC': 0.01, 'c_MBC': 0.05}



Train Diffusion:   2%|▏         | 180/8000 [03:14<2:06:30,  1.03it/s][A
Train Diffusion:   2%|▏         | 181/8000 [03:15<2:06:02,  1.03it/s][A
Train Diffusion:   2%|▏         | 182/8000 [03:16<2:07:26,  1.02it/s][A
Train Diffusion:   2%|▏         | 183/8000 [03:17<2:06:40,  1.03it/s][A
Train Diffusion:   2%|▏         | 184/8000 [03:18<2:06:21,  1.03it/s][A
Train Diffusion:   2%|▏         | 185/8000 [03:19<2:06:49,  1.03it/s][A
Train Diffusion:   2%|▏         | 186/8000 [03:20<2:06:19,  1.03it/s][A
Train Diffusion:   2%|▏         | 187/8000 [03:21<2:06:22,  1.03it/s][A
Train Diffusion:   2%|▏         | 188/8000 [03:22<2:07:44,  1.02it/s][A
Train Diffusion:   2%|▏         | 189/8000 [03:23<2:13:55,  1.03s/it][A
Train Diffusion:   2%|▏         | 190/8000 [03:24<2:12:43,  1.02s/it][A
Train Diffusion:   2%|▏         | 191/8000 [03:25<2:10:31,  1.00s/it][A
Train Diffusion:   2%|▏         | 192/8000 [03:26<2:09:10,  1.01it/s][A
Train Diffusion:   2%|▏         | 193/8000 [03:27<

Moving average ELBO loss at 200 iterations is: -287815.034375. Best ELBO loss value is: -290977.46875.

C_PATH mean = tensor([[18.8332,  0.3586,  1.8207]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[ 9.0423,  0.1169,  1.1508],
         [15.6810,  0.2440,  1.4491],
         [17.6739,  0.2091,  1.4243],
         ...,
         [19.6170,  0.3994,  1.9558],
         [19.5720,  0.4240,  1.8454],
         [19.3821,  0.5336,  1.6797]]], grad_fn=<AddBackward0>)
{'u_M': 0.002, 'a_SD': 0.33, 'a_DS': 0.33, 'a_M': 0.33, 'a_MSC': 0.5, 'k_S_ref': 2.5e-05, 'k_D_ref': 0.005, 'k_M_ref': 0.0002, 'Ea_S': 75, 'Ea_D': 50, 'Ea_M': 50, 'c_SOC': 1.0, 'c_DOC': 0.01, 'c_MBC': 0.05}



Train Diffusion:   2%|▎         | 200/8000 [03:33<2:06:08,  1.03it/s][A
Train Diffusion:   3%|▎         | 201/8000 [03:34<2:05:42,  1.03it/s][A
Train Diffusion:   3%|▎         | 202/8000 [03:35<2:07:39,  1.02it/s][A
Train Diffusion:   3%|▎         | 203/8000 [03:37<2:13:00,  1.02s/it][A
Train Diffusion:   3%|▎         | 204/8000 [03:38<2:19:11,  1.07s/it][A
Train Diffusion:   3%|▎         | 205/8000 [03:39<2:26:46,  1.13s/it][A
Train Diffusion:   3%|▎         | 206/8000 [03:40<2:34:29,  1.19s/it][A
Train Diffusion:   3%|▎         | 207/8000 [03:42<2:39:27,  1.23s/it][A
Train Diffusion:   3%|▎         | 208/8000 [03:43<2:34:09,  1.19s/it][A
Train Diffusion:   3%|▎         | 209/8000 [03:44<2:31:18,  1.17s/it][A
Train Diffusion:   3%|▎         | 210/8000 [03:45<2:29:01,  1.15s/it][A
Train Diffusion:   3%|▎         | 211/8000 [03:46<2:26:47,  1.13s/it][A
Train Diffusion:   3%|▎         | 212/8000 [03:47<2:25:17,  1.12s/it][A
Train Diffusion:   3%|▎         | 213/8000 [03:48<

In [None]:
torch.save(net, 'net.pt')

In [None]:
def plot_post(x, obs_model, state_idx=0, num_samples=20,
              ymin=None, ymax=None):
    #net.eval()
    #x, _ = net(num_samples)
    #x0 = x0[(None,) * 2].repeat(num_samples, 1, 1)
    #x = torch.cat((x0, x), 1)
    
    q_mean, q_std = x[:, :, state_idx].mean(0).detach(), x[:, :, state_idx].std(0).detach()
    hours = torch.arange(0, t + dt, dt)
    plt.plot(hours, q_mean, label='Posterior mean')
    plt.fill_between(hours, q_mean - 2*q_std, q_mean + 2*q_std, alpha=0.5,
                     label='Posterior $\\mu \pm 2\sigma$')
    plt.plot(obs_model.times, obs_model.mu[state_idx, :], linestyle='None', marker='o',
             label='Observed')
    
    plt.legend()
    plt.xlabel('Hour')
    plt.ylabel(['SOC', 'DOC', 'MBC'][state_idx])
    plt.ylim((ymin, ymax))
    plt.title('Approximate posterior $q(x|\\theta, y)$\nNumber of samples = {}'.format(num_samples))

In [None]:
plot_post(x, obs_model_CON_noCO2, 0, num_samples=1)

In [None]:
plot_post(x10, obs_model_CON_noCO2, 0, num_samples=10)

In [None]:
plot_post(x, obs_model_CON_noCO2, 1, num_samples=1)

In [None]:
plot_post(x10, obs_model_CON_noCO2, 1, num_samples=10)

In [None]:
plot_post(x, obs_model_CON_noCO2, 2, num_samples=1)

In [None]:
plot_post(x10, obs_model_CON_noCO2, 2, num_samples=10)

In [None]:
x

In [None]:
def plot_elbo(elbo_hist, xmin=0, ymax=None, yscale='linear', title=None):
    iters = torch.arange(xmin + 1, len(elbo_hist) + 1)
    plt.plot(iters, elbo_hist[xmin:])
    plt.ylim((None, ymax))
    plt.yscale(yscale)
    plt.ylabel('ELBO')
    plt.xlabel('Iteration')
    plt.title(title)

In [None]:
plot_elbo(elbo_hist, title='All iterations')

In [None]:
plot_elbo(elbo_hist, xmin=1000, title='Excludes first 1,000 iterations')