In [1]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from tqdm import tqdm
import math

#Torch-related imports
import torch
import torch.distributions as D
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Function

#Model-specific imports
from SBM_SDE import *
from obs_and_flow import *
from training import calc_log_lik

In [2]:
torch.manual_seed(0)
devi = torch.device("".join(["cuda:",f'{cuda_id}']) if torch.cuda.is_available() else "cpu")

dt_flow = 0.1 #SDE discretization timestep.
t = 300 #Simulation run for T hours.
n = int(t / dt_flow) + 1
t_span = np.linspace(0, t, n)
t_span_tensor = torch.reshape(torch.Tensor(t_span), [1, n, 1]) #T_span needs to be converted to tensor object. Additionally, facilitates conversion of I_S and I_D to tensor objects.
niter = 8000
piter = 100
batch_size = 2 #Number of sets of observation outputs to sample per set of parameters.
state_dim_SCON = 3 #Not including CO2 in STATE_DIM, because CO2 is an observation.
state_dim_SAWB = 4 #Not including CO2 in STATE_DIM, because CO2 is an observation.
pretrain_lr = 1e-2
train_lr = 1e-3
batch_size = 1

In [3]:
temp_ref = 283
temp_rise = 5 #High estimate of 5 celsius temperature rise by 2100. 

#System parameters from deterministic CON model
u_M = 0.002
a_SD = 0.33
a_DS = 0.33
a_M = 0.33
a_MSC = 0.5
k_S_ref = 0.000025
k_D_ref = 0.005
k_M_ref = 0.0002
Ea_S = 75
Ea_D = 50
Ea_M = 50

#SCON diffusion matrix parameters
c_SOC = 1.
c_DOC = 0.01
c_MBC = 0.05

SCON_C_params_dict = {'u_M': u_M, 'a_SD': a_SD, 'a_DS': a_DS, 'a_M': a_M, 'a_MSC': a_MSC, 'k_S_ref': k_S_ref, 'k_D_ref': k_D_ref, 'k_M_ref': k_M_ref, 'Ea_S': Ea_S, 'Ea_D': Ea_D, 'Ea_M': Ea_M, 'c_SOC': c_SOC, 'c_DOC': c_DOC, 'c_MBC': c_MBC}

In [4]:
obs_error_scale = 0.1

x0_SCON = [51, 0.05, 0.9]
x0_SCON_tensor = torch.tensor(x0_SCON)
x0_prior_SCON = d.multivariate_normal.MultivariateNormal(x0_SCON_tensor,
                                                         scale_tril=torch.eye(state_dim_SCON) * obs_error_scale * x0_SCON_tensor)

In [5]:
obs_times, obs_means_CON, obs_error_CON = csv_to_obs_df('y_from_x_t_300_dt_0-01.csv', state_dim_SCON, t, obs_error_scale)

In [6]:
#Obtain temperature forcing function.
temp_tensor = temp_gen(t_span_tensor, temp_ref, temp_rise)

#Obtain SOC and DOC pool litter input vectors for use in flow SDE functions.
i_s_tensor = i_s(t_span_tensor) #Exogenous SOC input function
i_d_tensor = i_d(t_span_tensor) #Exogenous DOC input function

In [7]:
obs_model_CON_noCO2 = ObsModel(DEVICE = devi, TIMES = obs_times, DT = dt_flow, MU = obs_means_CON, SCALE = obs_error_CON)

In [8]:
def calc_log_lik(C_PATH, T_SPAN_TENSOR, DT, I_S_TENSOR, I_D_TENSOR, TEMP_TENSOR, TEMP_REF, DRIFT_DIFFUSION, X0_PRIOR, PARAMS_DICT):
    drift, diffusion_sqrt = DRIFT_DIFFUSION(C_PATH[:, :-1, :], T_SPAN_TENSOR[:, :-1, :], I_S_TENSOR[:, :-1, :], I_D_TENSOR[:, :-1, :], TEMP_TENSOR[:, :-1, :], TEMP_REF, PARAMS_DICT)
    euler_maruyama_state_sample_object = D.multivariate_normal.MultivariateNormal(loc = C_PATH[:, :-1, :] + drift * DT, scale_tril = diffusion_sqrt * math.sqrt(DT))
    
    # Compute log p(x|theta) = log p(x|x0, theta) + log p(x0|theta)
    ll = euler_maruyama_state_sample_object.log_prob(C_PATH[:, 1:, :]).sum(-1) # log p(x|x0, theta)
    ll += X0_PRIOR.log_prob(C_PATH[:, 0, :]) # log p(x0|theta)
    
    return ll # (batch_size, )

In [9]:
def train(DEVICE, PRETRAIN_LR, TRAIN_LR, NITER, PRETRAIN_ITER, BATCH_SIZE, OBS_MODEL,
          STATE_DIM, T, DT, N, T_SPAN_TENSOR, I_S_TENSOR, I_D_TENSOR, TEMP_TENSOR, TEMP_REF,
          DRIFT_DIFFUSION, X0_PRIOR, PARAMS_DICT,
          LEARN_PARAMS = False, LR_DECAY = 0.1, DECAY_STEP_SIZE = 1000, PRINT_EVERY = 500):
    net = SDEFlow(DEVICE, OBS_MODEL, STATE_DIM, T, DT, N).to(DEVICE)
    optimizer = optim.Adam(net.parameters(), lr = PRETRAIN_LR)
    
    if LEARN_PARAMS:
        theta_post = MeanField(PARAMS_DICT)
        theta_prior = D.normal.Normal(torch.zeros_like(theta_post.means),
                                      torch.ones_like(theta_post.std))
    if PRETRAIN_ITER >= NITER:
        raise Exception("PRETRAIN_ITER must be < NITER.")
    best_loss_norm = 1e10
    best_loss_ELBO = 1e20
    norm_losses = [] #[best_loss_norm] * 10 
    ELBO_losses = [] #[best_loss_ELBO] * 10
    #C0 = ANALYTICAL_STEADY_STATE_INIT(I_S_TENSOR[0, 0, 0].item(), I_D_TENSOR[0, 0, 0].item(), PARAMS_DICT) #Calculate deterministic initial conditions.
    #C0 = C0[(None,) * 2].repeat(BATCH_SIZE, 1, 1).to(DEVICE) #Assign initial conditions to C_PATH.
    
    with tqdm(total = NITER, desc = f'Train Diffusion', position = -1) as tq:
        for it in range(NITER):
            net.train()
            optimizer.zero_grad()
            C_PATH, log_prob = net(BATCH_SIZE) #Obtain paths with solutions at times after t0.
            #C_PATH = torch.cat([C0, C_PATH], 1) #Append deterministic CON initial conditions conditional on parameter values to C path. 
            
            if it < PRETRAIN_ITER:
                l1_norm_element = C_PATH - torch.mean(OBS_MODEL.mu[:3], -1)
                l1_norm = torch.sum(torch.abs(l1_norm_element)).mean()
                best_loss_norm = l1_norm if l1_norm < best_loss_norm else best_loss_norm
                norm_losses.append(l1_norm.item())
                #l2_norm_element = C_PATH - torch.mean(OBS_MODEL.mu, -1)
                #l2_norm = torch.sqrt(torch.sum(torch.square(l2_norm_element))).mean()
                #best_loss_norm = l2_norm if l2_norm < best_loss_norm else best_loss_norm
                #norm_losses.append(l2_norm.item())
                
                if (it + 1) % PRINT_EVERY == 0:
                    print(f"Moving average norm loss at {it + 1} iterations is: {sum(norm_losses[-10:]) / len(norm_losses[-10:])}. Best norm loss value is: {best_loss_norm}.")
                    print('\nC_PATH mean =', C_PATH.mean(-2))
                    print('\nC_PATH =', C_PATH)
                l1_norm.backward()
                #l2_norm.backward()
                
            else:
                if LEARN_PARAMS:
                    theta_dict, theta, log_q_theta = theta_post()
                    log_p_theta = theta_prior.log_prob(theta).sum(-1)
                else:
                    theta_dict = PARAMS_DICT
                    log_q_theta, log_p_theta = torch.zeros(2)
                log_lik = calc_log_lik(C_PATH, T_SPAN_TENSOR.to(DEVICE), DT, I_S_TENSOR.to(DEVICE), I_D_TENSOR.to(DEVICE),
                                       TEMP_TENSOR, TEMP_REF, DRIFT_DIFFUSION, X0_PRIOR, theta_dict)
                
                # - log p(theta) + log q(theta) + log q(x|theta) - log p(x|theta) - log p(y|x, theta)
                ELBO = -log_p_theta.mean() + log_q_theta.mean() - log_lik.mean() - OBS_MODEL(C_PATH, theta_dict) + log_prob.mean()
                best_loss_ELBO = ELBO if ELBO < best_loss_ELBO else best_loss_ELBO
                ELBO_losses.append(ELBO.item())

                if (it + 1) % PRINT_EVERY == 0:
                    print(f"Moving average ELBO loss at {it + 1} iterations is: {sum(ELBO_losses[-10:]) / len(ELBO_losses[-10:])}. Best ELBO loss value is: {best_loss_ELBO}.")
                    print('\nC_PATH mean =', C_PATH.mean(-2))
                    print('\n C_PATH =', C_PATH)
                    print(theta_dict)
                ELBO.backward()
                
            torch.nn.utils.clip_grad_norm_(net.parameters(), 3.0)
            if it == PRETRAIN_ITER:
                optimizer.param_groups[0]['lr'] = TRAIN_LR
            elif it % DECAY_STEP_SIZE == 0 and it > PRETRAIN_ITER:
                optimizer.param_groups[0]['lr'] *= LR_DECAY
            optimizer.step()
            tq.update()
            
    return net, ELBO_losses

In [None]:
net, elbo_hist = train(devi, pretrain_lr, train_lr, niter, piter, batch_size, obs_model_CON_noCO2,
                       state_dim_SCON, t, dt_flow, n, t_span_tensor, i_s_tensor, i_d_tensor, temp_tensor, temp_ref,
                       drift_diffusion_SCON_C, x0_prior_SCON, SCON_C_params_dict,
                       LR_DECAY = 0.1, DECAY_STEP_SIZE = 5000, PRINT_EVERY = 20)


Train Diffusion:   0%|          | 0/8000 [00:00<?, ?it/s][A
Train Diffusion:   0%|          | 1/8000 [00:01<2:53:56,  1.30s/it][A
Train Diffusion:   0%|          | 2/8000 [00:02<3:09:14,  1.42s/it][A
Train Diffusion:   0%|          | 3/8000 [00:04<2:58:32,  1.34s/it][A
Train Diffusion:   0%|          | 4/8000 [00:05<2:54:55,  1.31s/it][A
Train Diffusion:   0%|          | 5/8000 [00:06<2:52:12,  1.29s/it][A
Train Diffusion:   0%|          | 6/8000 [00:07<2:55:20,  1.32s/it][A
Train Diffusion:   0%|          | 7/8000 [00:09<2:53:13,  1.30s/it][A
Train Diffusion:   0%|          | 8/8000 [00:10<2:51:43,  1.29s/it][A
Train Diffusion:   0%|          | 9/8000 [00:11<2:51:20,  1.29s/it][A
Train Diffusion:   0%|          | 10/8000 [00:12<2:48:56,  1.27s/it][A
Train Diffusion:   0%|          | 11/8000 [00:14<2:55:02,  1.31s/it][A
Train Diffusion:   0%|          | 12/8000 [00:15<3:01:39,  1.36s/it][A
Train Diffusion:   0%|          | 13/8000 [00:17<3:07:41,  1.41s/it][A
Train Diffu

Moving average norm loss at 20 iterations is: 138578.0234375. Best norm loss value is: 136924.609375.

C_PATH mean = tensor([[2.7733, 1.8322, 2.1825]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[2.1678, 2.2944, 1.4699],
         [4.0385, 2.1068, 2.1170],
         [3.0309, 1.3308, 1.6844],
         ...,
         [2.0467, 1.9366, 1.9951],
         [2.2684, 1.9922, 2.1868],
         [2.9475, 2.0167, 2.3615]]], grad_fn=<AddBackward0>)



Train Diffusion:   0%|          | 20/8000 [00:27<3:04:54,  1.39s/it][A
Train Diffusion:   0%|          | 21/8000 [00:28<2:57:42,  1.34s/it][A
Train Diffusion:   0%|          | 22/8000 [00:29<2:52:39,  1.30s/it][A
Train Diffusion:   0%|          | 23/8000 [00:30<2:50:21,  1.28s/it][A
Train Diffusion:   0%|          | 24/8000 [00:32<2:47:18,  1.26s/it][A
Train Diffusion:   0%|          | 25/8000 [00:33<2:44:42,  1.24s/it][A
Train Diffusion:   0%|          | 26/8000 [00:34<2:42:45,  1.22s/it][A
Train Diffusion:   0%|          | 27/8000 [00:35<2:41:54,  1.22s/it][A
Train Diffusion:   0%|          | 28/8000 [00:37<2:43:43,  1.23s/it][A
Train Diffusion:   0%|          | 29/8000 [00:38<2:42:13,  1.22s/it][A
Train Diffusion:   0%|          | 30/8000 [00:39<2:40:54,  1.21s/it][A
Train Diffusion:   0%|          | 31/8000 [00:40<2:43:21,  1.23s/it][A
Train Diffusion:   0%|          | 32/8000 [00:42<3:06:57,  1.41s/it][A
Train Diffusion:   0%|          | 33/8000 [00:44<3:12:25,  1.45

Moving average norm loss at 40 iterations is: 63014.218359375. Best norm loss value is: 39095.8046875.

C_PATH mean = tensor([[45.4296,  2.1178,  4.4245]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[8.2299e+00, 1.3211e+01, 1.7933e-01],
         [4.6190e+01, 6.2099e-02, 3.6540e+00],
         [4.5194e+01, 9.1306e-01, 1.7967e+00],
         ...,
         [3.3539e+01, 1.0637e+00, 4.4884e+00],
         [4.9989e+01, 6.4840e+00, 2.0689e+00],
         [5.4887e+01, 4.0985e-02, 1.7647e+00]]], grad_fn=<AddBackward0>)



Train Diffusion:   0%|          | 40/8000 [00:53<2:51:37,  1.29s/it][A
Train Diffusion:   1%|          | 41/8000 [00:54<2:52:34,  1.30s/it][A
Train Diffusion:   1%|          | 42/8000 [00:55<2:47:43,  1.26s/it][A
Train Diffusion:   1%|          | 43/8000 [00:56<2:48:50,  1.27s/it][A
Train Diffusion:   1%|          | 44/8000 [00:58<2:54:41,  1.32s/it][A
Train Diffusion:   1%|          | 45/8000 [00:59<3:07:17,  1.41s/it][A
Train Diffusion:   1%|          | 46/8000 [01:01<3:11:32,  1.44s/it][A
Train Diffusion:   1%|          | 47/8000 [01:02<3:11:07,  1.44s/it][A
Train Diffusion:   1%|          | 48/8000 [01:04<3:03:16,  1.38s/it][A
Train Diffusion:   1%|          | 49/8000 [01:05<2:58:38,  1.35s/it][A
Train Diffusion:   1%|          | 50/8000 [01:06<2:57:31,  1.34s/it][A
Train Diffusion:   1%|          | 51/8000 [01:07<2:51:36,  1.30s/it][A
Train Diffusion:   1%|          | 52/8000 [01:09<2:49:14,  1.28s/it][A
Train Diffusion:   1%|          | 53/8000 [01:10<2:49:38,  1.28

Moving average norm loss at 60 iterations is: 9184.20615234375. Best norm loss value is: 5919.875.

C_PATH mean = tensor([[46.3777,  0.0489,  2.1293]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.1995e+01, 1.0084e+01, 9.6223e-01],
         [4.8253e+01, 4.0465e-03, 1.4659e+00],
         [4.7120e+01, 9.2603e-03, 2.0885e+00],
         ...,
         [4.4200e+01, 2.4457e-02, 1.4167e+00],
         [4.9366e+01, 3.8721e-01, 2.3090e+00],
         [4.6901e+01, 9.3227e-02, 1.8994e+00]]], grad_fn=<AddBackward0>)



Train Diffusion:   1%|          | 60/8000 [01:19<2:44:57,  1.25s/it][A
Train Diffusion:   1%|          | 61/8000 [01:20<2:46:46,  1.26s/it][A
Train Diffusion:   1%|          | 62/8000 [01:21<2:46:19,  1.26s/it][A
Train Diffusion:   1%|          | 63/8000 [01:22<2:44:43,  1.25s/it][A
Train Diffusion:   1%|          | 64/8000 [01:24<2:42:15,  1.23s/it][A
Train Diffusion:   1%|          | 65/8000 [01:25<2:43:20,  1.24s/it][A
Train Diffusion:   1%|          | 66/8000 [01:26<2:41:27,  1.22s/it][A
Train Diffusion:   1%|          | 67/8000 [01:27<2:48:05,  1.27s/it][A
Train Diffusion:   1%|          | 68/8000 [01:29<2:55:23,  1.33s/it][A
Train Diffusion:   1%|          | 69/8000 [01:30<2:50:34,  1.29s/it][A
Train Diffusion:   1%|          | 70/8000 [01:31<2:55:15,  1.33s/it][A
Train Diffusion:   1%|          | 71/8000 [01:33<2:54:40,  1.32s/it][A
Train Diffusion:   1%|          | 72/8000 [01:34<3:02:14,  1.38s/it][A
Train Diffusion:   1%|          | 73/8000 [01:35<2:55:09,  1.33

Moving average norm loss at 80 iterations is: 4274.085034179688. Best norm loss value is: 3182.9716796875.

C_PATH mean = tensor([[46.6295,  0.3028,  2.1088]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[13.7202, 11.4175,  1.6617],
         [47.3938,  0.1939,  1.7456],
         [46.0878,  0.1423,  2.2843],
         ...,
         [48.3449,  0.3014,  1.9142],
         [47.1578,  1.6195,  2.1529],
         [46.1197,  0.7536,  1.9972]]], grad_fn=<AddBackward0>)



Train Diffusion:   1%|          | 80/8000 [01:44<2:55:25,  1.33s/it][A
Train Diffusion:   1%|          | 81/8000 [01:46<2:50:30,  1.29s/it][A
Train Diffusion:   1%|          | 82/8000 [01:47<2:47:40,  1.27s/it][A
Train Diffusion:   1%|          | 83/8000 [01:48<2:44:59,  1.25s/it][A
Train Diffusion:   1%|          | 84/8000 [01:49<2:51:07,  1.30s/it][A
Train Diffusion:   1%|          | 85/8000 [01:51<2:59:18,  1.36s/it][A
Train Diffusion:   1%|          | 86/8000 [01:52<2:57:46,  1.35s/it][A
Train Diffusion:   1%|          | 87/8000 [01:53<2:53:12,  1.31s/it][A
Train Diffusion:   1%|          | 88/8000 [01:55<2:48:23,  1.28s/it][A
Train Diffusion:   1%|          | 89/8000 [01:56<2:53:49,  1.32s/it][A
Train Diffusion:   1%|          | 90/8000 [01:57<2:49:53,  1.29s/it][A
Train Diffusion:   1%|          | 91/8000 [01:59<2:57:07,  1.34s/it][A
Train Diffusion:   1%|          | 92/8000 [02:00<2:57:25,  1.35s/it][A
Train Diffusion:   1%|          | 93/8000 [02:01<2:53:39,  1.32

Moving average norm loss at 100 iterations is: 1915.597314453125. Best norm loss value is: 1465.5552978515625.

C_PATH mean = tensor([[46.5440,  0.4925,  2.0687]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[14.4158, 12.4961,  1.1946],
         [47.1750,  0.4338,  1.9910],
         [46.7847,  0.5501,  2.0755],
         ...,
         [46.4693,  0.6751,  1.9216],
         [45.9727,  0.4673,  2.1226],
         [47.3374,  0.4159,  2.0934]]], grad_fn=<AddBackward0>)



Train Diffusion:   1%|▏         | 100/8000 [02:10<2:42:09,  1.23s/it][A
Train Diffusion:   1%|▏         | 101/8000 [02:11<2:40:42,  1.22s/it][A
Train Diffusion:   1%|▏         | 102/8000 [02:13<2:40:07,  1.22s/it][A
Train Diffusion:   1%|▏         | 103/8000 [02:14<2:38:29,  1.20s/it][A
Train Diffusion:   1%|▏         | 104/8000 [02:15<2:37:02,  1.19s/it][A
Train Diffusion:   1%|▏         | 105/8000 [02:16<2:37:00,  1.19s/it][A
Train Diffusion:   1%|▏         | 106/8000 [02:17<2:36:38,  1.19s/it][A
Train Diffusion:   1%|▏         | 107/8000 [02:19<2:36:16,  1.19s/it][A
Train Diffusion:   1%|▏         | 108/8000 [02:20<2:37:32,  1.20s/it][A
Train Diffusion:   1%|▏         | 109/8000 [02:21<2:37:48,  1.20s/it][A
Train Diffusion:   1%|▏         | 110/8000 [02:22<2:36:43,  1.19s/it][A
Train Diffusion:   1%|▏         | 111/8000 [02:23<2:36:13,  1.19s/it][A
Train Diffusion:   1%|▏         | 112/8000 [02:24<2:35:45,  1.18s/it][A
Train Diffusion:   1%|▏         | 113/8000 [02:26<

Moving average ELBO loss at 120 iterations is: 2392113.6375. Best ELBO loss value is: 1540875.875.

C_PATH mean = tensor([[46.2183,  0.4796,  2.7333]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[15.4312,  9.1404,  2.2668],
         [48.2164,  0.5913,  2.8081],
         [46.3562,  0.5042,  2.5660],
         ...,
         [46.0903,  0.7312,  2.3662],
         [45.8858,  0.6359,  2.9236],
         [47.0447,  0.5117,  2.7394]]], grad_fn=<AddBackward0>)
{'u_M': 0.002, 'a_SD': 0.33, 'a_DS': 0.33, 'a_M': 0.33, 'a_MSC': 0.5, 'k_S_ref': 2.5e-05, 'k_D_ref': 0.005, 'k_M_ref': 0.0002, 'Ea_S': 75, 'Ea_D': 50, 'Ea_M': 50, 'c_SOC': 1.0, 'c_DOC': 0.01, 'c_MBC': 0.05}



Train Diffusion:   2%|▏         | 120/8000 [02:34<2:35:55,  1.19s/it][A
Train Diffusion:   2%|▏         | 121/8000 [02:35<2:36:13,  1.19s/it][A
Train Diffusion:   2%|▏         | 122/8000 [02:36<2:35:35,  1.18s/it][A
Train Diffusion:   2%|▏         | 123/8000 [02:38<2:43:11,  1.24s/it][A
Train Diffusion:   2%|▏         | 124/8000 [02:39<2:51:44,  1.31s/it][A
Train Diffusion:   2%|▏         | 125/8000 [02:40<2:46:24,  1.27s/it][A
Train Diffusion:   2%|▏         | 126/8000 [02:41<2:42:02,  1.23s/it][A
Train Diffusion:   2%|▏         | 127/8000 [02:43<2:49:39,  1.29s/it][A
Train Diffusion:   2%|▏         | 128/8000 [02:45<3:01:31,  1.38s/it][A
Train Diffusion:   2%|▏         | 129/8000 [02:46<3:04:47,  1.41s/it][A
Train Diffusion:   2%|▏         | 130/8000 [02:47<2:55:55,  1.34s/it][A
Train Diffusion:   2%|▏         | 131/8000 [02:48<2:49:34,  1.29s/it][A
Train Diffusion:   2%|▏         | 132/8000 [02:50<2:45:20,  1.26s/it][A
Train Diffusion:   2%|▏         | 133/8000 [02:51<

Moving average ELBO loss at 140 iterations is: -162850.32109375. Best ELBO loss value is: -218414.9375.

C_PATH mean = tensor([[45.3349,  0.0553,  2.6343]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[17.5399,  0.2000,  2.5804],
         [48.5607,  0.0777,  2.5905],
         [45.0398,  0.0546,  2.6302],
         ...,
         [45.0399,  0.0824,  2.4715],
         [46.1966,  0.0916,  2.7133],
         [46.8972,  0.0949,  2.5903]]], grad_fn=<AddBackward0>)
{'u_M': 0.002, 'a_SD': 0.33, 'a_DS': 0.33, 'a_M': 0.33, 'a_MSC': 0.5, 'k_S_ref': 2.5e-05, 'k_D_ref': 0.005, 'k_M_ref': 0.0002, 'Ea_S': 75, 'Ea_D': 50, 'Ea_M': 50, 'c_SOC': 1.0, 'c_DOC': 0.01, 'c_MBC': 0.05}



Train Diffusion:   2%|▏         | 140/8000 [03:00<3:03:27,  1.40s/it][A
Train Diffusion:   2%|▏         | 141/8000 [03:01<3:02:35,  1.39s/it][A
Train Diffusion:   2%|▏         | 142/8000 [03:03<3:02:17,  1.39s/it][A
Train Diffusion:   2%|▏         | 143/8000 [03:04<2:54:50,  1.34s/it][A
Train Diffusion:   2%|▏         | 144/8000 [03:05<2:59:40,  1.37s/it][A
Train Diffusion:   2%|▏         | 145/8000 [03:07<2:55:59,  1.34s/it][A
Train Diffusion:   2%|▏         | 146/8000 [03:08<2:51:29,  1.31s/it][A
Train Diffusion:   2%|▏         | 147/8000 [03:09<2:49:27,  1.29s/it][A
Train Diffusion:   2%|▏         | 148/8000 [03:11<2:54:15,  1.33s/it][A
Train Diffusion:   2%|▏         | 149/8000 [03:12<2:49:44,  1.30s/it][A
Train Diffusion:   2%|▏         | 150/8000 [03:13<2:50:00,  1.30s/it][A
Train Diffusion:   2%|▏         | 151/8000 [03:14<2:48:59,  1.29s/it][A
Train Diffusion:   2%|▏         | 152/8000 [03:16<2:50:02,  1.30s/it][A
Train Diffusion:   2%|▏         | 153/8000 [03:17<

Moving average ELBO loss at 160 iterations is: -280631.515625. Best ELBO loss value is: -291784.375.

C_PATH mean = tensor([[4.4766e+01, 4.1051e-02, 2.1599e+00]], grad_fn=<MeanBackward1>)

 C_PATH = tensor([[[1.5430e+01, 1.4650e-01, 1.9169e+00],
         [4.5897e+01, 4.1553e-02, 2.0588e+00],
         [4.4365e+01, 3.2109e-02, 2.0754e+00],
         ...,
         [4.4487e+01, 5.6297e-02, 2.0590e+00],
         [4.4604e+01, 1.3663e-01, 2.2615e+00],
         [4.4760e+01, 9.6662e-02, 2.2425e+00]]], grad_fn=<AddBackward0>)
{'u_M': 0.002, 'a_SD': 0.33, 'a_DS': 0.33, 'a_M': 0.33, 'a_MSC': 0.5, 'k_S_ref': 2.5e-05, 'k_D_ref': 0.005, 'k_M_ref': 0.0002, 'Ea_S': 75, 'Ea_D': 50, 'Ea_M': 50, 'c_SOC': 1.0, 'c_DOC': 0.01, 'c_MBC': 0.05}



Train Diffusion:   2%|▏         | 160/8000 [03:26<3:02:25,  1.40s/it][A
Train Diffusion:   2%|▏         | 161/8000 [03:28<2:56:20,  1.35s/it][A
Train Diffusion:   2%|▏         | 162/8000 [03:29<2:57:05,  1.36s/it][A

In [None]:
torch.save(net, 'net.pt')

In [None]:
def plot_post(x, obs_model, state_idx=0, num_samples=20,
              ymin=None, ymax=None):
    #net.eval()
    #x, _ = net(num_samples)
    #x0 = x0[(None,) * 2].repeat(num_samples, 1, 1)
    #x = torch.cat((x0, x), 1)
    
    q_mean, q_std = x[:, :, state_idx].mean(0).detach(), x[:, :, state_idx].std(0).detach()
    hours = torch.arange(0, t + dt, dt)
    plt.plot(hours, q_mean, label='Posterior mean')
    plt.fill_between(hours, q_mean - 2*q_std, q_mean + 2*q_std, alpha=0.5,
                     label='Posterior $\\mu \pm 2\sigma$')
    plt.plot(obs_model.times, obs_model.mu[state_idx, :], linestyle='None', marker='o',
             label='Observed')
    
    plt.legend()
    plt.xlabel('Hour')
    plt.ylabel(['SOC', 'DOC', 'MBC'][state_idx])
    plt.ylim((ymin, ymax))
    plt.title('Approximate posterior $q(x|\\theta, y)$\nNumber of samples = {}'.format(num_samples))

In [None]:
plot_post(x, obs_model_CON_noCO2, 0, num_samples=1)

In [None]:
plot_post(x10, obs_model_CON_noCO2, 0, num_samples=10)

In [None]:
plot_post(x, obs_model_CON_noCO2, 1, num_samples=1)

In [None]:
plot_post(x10, obs_model_CON_noCO2, 1, num_samples=10)

In [None]:
plot_post(x, obs_model_CON_noCO2, 2, num_samples=1)

In [None]:
plot_post(x10, obs_model_CON_noCO2, 2, num_samples=10)

In [None]:
x

In [None]:
def plot_elbo(elbo_hist, xmin=0, ymax=None, yscale='linear', title=None):
    iters = torch.arange(xmin + 1, len(elbo_hist) + 1)
    plt.plot(iters, elbo_hist[xmin:])
    plt.ylim((None, ymax))
    plt.yscale(yscale)
    plt.ylabel('ELBO')
    plt.xlabel('Iteration')
    plt.title(title)

In [None]:
plot_elbo(elbo_hist, title='All iterations')

In [None]:
plot_elbo(elbo_hist, xmin=1000, title='Excludes first 1,000 iterations')