In [1]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from tqdm import tqdm
import math

#Torch-related imports
import torch
import torch.distributions as D
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Function

#Model-specific imports
from SBM_SDE import *
from obs_and_flow import *
from training import calc_log_lik

In [2]:
torch.manual_seed(0)
np.random.seed(0)

In [3]:
temp_ref = 283
temp_rise = 5 #High estimate of 5 celsius temperature rise by 2100. 

#System parameters from deterministic CON model
u_M = 0.002
a_SD = 0.33
a_DS = 0.33
a_M = 0.33
a_MSC = 0.5
k_S_ref = 0.000025
k_D_ref = 0.005
k_M_ref = 0.0002
Ea_S = 75
Ea_D = 50
Ea_M = 50

#SCON diffusion matrix parameters
c_SOC = 1.0
c_DOC = 0.001
c_MBC = 0.01
s_SOC = 0.001
s_DOC = 0.001
s_MBC = 0.001

SCON_C_params_dict = {'u_M': u_M, 'a_SD': a_SD, 'a_DS': a_DS, 'a_M': a_M, 'a_MSC': a_MSC, 'k_S_ref': k_S_ref, 'k_D_ref': k_D_ref, 'k_M_ref': k_M_ref, 'Ea_S': Ea_S, 'Ea_D': Ea_D, 'Ea_M': Ea_M, 'c_SOC': c_SOC, 'c_DOC': c_DOC, 'c_MBC': c_MBC}
SCON_SS_params_dict = {'u_M': u_M, 'a_SD': a_SD, 'a_DS': a_DS, 'a_M': a_M, 'a_MSC': a_MSC, 'k_S_ref': k_S_ref, 'k_D_ref': k_D_ref, 'k_M_ref': k_M_ref, 'Ea_S': Ea_S, 'Ea_D': Ea_D, 'Ea_M': Ea_M, 's_SOC': s_SOC, 's_DOC': s_DOC, 's_MBC': s_MBC}

#System parameters from deterministic AWB model
#u_Q_ref = 0.2
#Q = 0.002
#a_MSA = 0.5
#K_D = 200
#K_U = 1
#V_D_ref = 0.4
#V_U_ref = 0.02
#Ea_V_D = 75
#Ea_V_U = 50
#r_M = 0.0004
#r_E = 0.00001
#r_L = 0.0005

#SAWB diffusion matrix parameters
#c_SOC = 2
#c_DOC = 0.05
#c_MBC = 0.1
#c_EEC = 0.01
#s_SOC = 0.1
#s_DOC = 0.1
#s_MBC = 0.1
#s_EEC = 0.1

#SAWB_C_params_dict = {'u_Q_ref': u_Q_ref, 'Q': Q, 'a_MSA': a_MSA, 'K_D': K_D, 'K_U': K_U, 'V_D_ref': V_D_ref, 'V_U_ref': V_U_ref, 'Ea_V_D': Ea_V_D, 'Ea_V_U': Ea_V_U, 'r_M': r_M, 'r_E': r_E, 'r_L': r_L, 'c_SOC': c_SOC, 'c_DOC': c_DOC, 'c_MBC': c_MBC, 'c_EEC': c_EEC}
#SAWB_SS_params_dict = {'u_Q_ref': u_Q_ref, 'Q': Q, 'a_MSA': a_MSA, 'K_D': K_D, 'K_U': K_U, 'V_D_ref': V_D_ref, 'V_U_ref': V_U_ref, 'Ea_V_D': Ea_V_D, 'Ea_V_U': Ea_V_U, 'r_M': r_M, 'r_E': r_E, 'r_L': r_L, 's_SOC': s_SOC, 's_DOC': s_DOC, 's_MBC': s_MBC, 's_EEC': s_EEC}

#System parameters from deterministic AWB-ECA model
#u_Q_ref = 0.2
#Q = 0.002
#a_MSA = 0.5
#K_DE = 200
#K_UE = 1
#V_DE_ref = 0.4
#V_UE_ref = 0.02
#Ea_V_DE = 75
#Ea_V_UE = 50
#r_M = 0.0004
#r_E = 0.00001
#r_L = 0.0005

#SAWB-ECA diffusion matrix parameters
#c_SOC = 2
#c_DOC = 0.05
#c_MBC = 0.1
#c_EEC = 0.01
#s_SOC = 0.1
#s_DOC = 0.1
#s_MBC = 0.1
#s_EEC = 0.1

#SAWB_ECA_C_params_dict = {'u_Q_ref': u_Q_ref, 'Q': Q, 'a_MSA': a_MSA, 'K_DE': K_DE, 'K_UE': K_UE, 'V_DE_ref': V_DE_ref, 'V_UE_ref': V_UE_ref, 'Ea_V_DE': Ea_V_DE, 'Ea_V_UE': Ea_V_UE, 'r_M': r_M, 'r_E': r_E, 'r_L': r_L, 'c_SOC': c_SOC, 'c_DOC': c_DOC, 'c_MBC': c_MBC, 'c_EEC': c_EEC}
#SAWB_ECA_SS_params_dict = {'u_Q_ref': u_Q_ref, 'Q': Q, 'a_MSA': a_MSA, 'K_DE': K_DE, 'K_UE': K_UE, 'V_DE_ref': V_DE_ref, 'V_UE_ref': V_UE_ref, 'Ea_V_DE': Ea_V_DE, 'Ea_V_UE': Ea_V_UE, 'r_M': r_M, 'r_E': r_E, 'r_L': r_L, 's_SOC': s_SOC, 's_DOC': s_DOC, 's_MBC': s_MBC, 's_EEC': s_EEC}

In [4]:
#Set flow NN parameters.

devi = torch.device("".join(["cuda:",f'{cuda_id}']) if torch.cuda.is_available() else "cpu")
dt_flow = 0.1
t = 500
n_flow = int(t / dt_flow) + 1
t_span = np.linspace(0, t, n_flow)
t_span_tensor = torch.reshape(torch.Tensor(t_span), [1, n_flow, 1]) #T_span needs to be converted to tensor object. Additionally, facilitates conversion of I_S and I_D to tensor objects.
l_r = 1e-4
niter = 5001
piter = 11
batch_size = 3 #Number of sets of observation outputs to sample per set of parameters.
state_dim_SCON = 3 #Not including CO2 in STATE_DIM, because CO2 is an observation.
obs_error_scale = 0.1 #Proportion of the mean of observation error standard deviation.

x0_SCON = [58, 0.08, 0.8] #Initial condition means for SCON

In [5]:
#Obtain temperature forcing function.
temp_tensor = temp_gen(t_span_tensor, temp_ref, temp_rise)
print(temp_tensor)

#Obtain SOC and DOC pool litter input vectors for use in flow SDE functions.
i_s_tensor = i_s(t_span_tensor) #Exogenous SOC input function
i_d_tensor = i_d(t_span_tensor) #Exogenous DOC input function
print(i_s_tensor)
print(i_d_tensor)

tensor([[[283.0000],
         [283.2625],
         [283.5248],
         ...,
         [277.6021],
         [277.7247],
         [277.8533]]])
tensor([[[0.0010],
         [0.0010],
         [0.0010],
         ...,
         [0.0012],
         [0.0012],
         [0.0012]]])
tensor([[[1.0000e-04],
         [1.0000e-04],
         [1.0001e-04],
         ...,
         [1.1754e-04],
         [1.1755e-04],
         [1.1755e-04]]])


In [6]:
def train(DEVICE, L_R, NITER, PRETRAIN_ITER, BATCH_SIZE, SDEFLOW, ObsModel, csv_to_obs_df, DATA_CSV, OBS_ERROR_SCALE, STATE_DIM, T, DT, N, T_SPAN_TENSOR, I_S_TENSOR, I_D_TENSOR, TEMP_TENSOR, TEMP_REF, C0, DRIFT_DIFFUSION, PARAMS_DICT): 
    #Read-in observation information. 
    obs_times, obs_means, obs_error = csv_to_obs_df(DATA_CSV, STATE_DIM, T, OBS_ERROR_SCALE)
    obs_means = LowerBound.apply(obs_means, 1e-6)
    #Pass observation information to `ObsModel`.
    obs_model = ObsModel(DEVICE, obs_times, DT, obs_means, obs_error)
    net = SDEFlow(DEVICE, obs_model, STATE_DIM, T, DT, N, I_S_TENSOR, I_D_TENSOR, cond_inputs = 3, num_layers = 6).to(DEVICE)
    optimizer = optim.Adam(net.parameters(), lr = L_R)
    if PRETRAIN_ITER >= NITER:
        raise Exception("PRETRAIN_ITER must be < NITER.")
    best_loss_norm = 1e15
    best_loss_ELBO = 1e15
    norm_losses = []
    ELBO_losses = []
    C0_tensor = torch.tensor(C0).to(DEVICE) #Convert initial conditions from list to tensor for X0 prior object.
    #C0 = C0[(None,) * 2].repeat(BATCH_SIZE, 1, 1).to(DEVICE)
    PARAMS_DICT_TENSOR = {k: torch.tensor(v).expand(BATCH_SIZE) for k, v in PARAMS_DICT.items()}
    X0_prior = D.normal.Normal(loc = C0_tensor, scale = OBS_ERROR_SCALE * C0_tensor) #Setting prior noise = observation noise for now.
    with tqdm(total = NITER, desc = f'Train Diffusion', position = -1) as tq:
        for i in range(NITER):
            net.train()
            optimizer.zero_grad()
            C_PATH, log_prob = net(BATCH_SIZE) #For obs_and_flow.py
            #C_PATH = torch.cat([C0, C_PATH], 1) #Learning initial conditions in this version. #Append deterministic CON initial conditions conditional on parameter values to C path.
            if i <= PRETRAIN_ITER:
                l1_norm_element = C_PATH - torch.mean(obs_model.mu, -1)
                l1_norm = torch.sum(torch.abs(l1_norm_element)).mean()
                best_loss_norm = l1_norm if l1_norm < best_loss_norm else best_loss_norm
                norm_losses.append(l1_norm.item())
                #l2_norm_element = C_PATH - torch.mean(obs_model.mu, -1)
                #l2_norm = torch.sqrt(torch.sum(torch.square(l2_norm_element))).mean()
                #best_loss_norm = l2_norm if l2_norm < best_loss_norm else best_loss_norm
                #norm_losses.append(l2_norm.item())
                if i % 10 == 0:
                    ma_norm_loss = sum(norm_losses[-10:]) / len(norm_losses[-10:])
                    print(f"\nMoving average norm loss at {iter} iterations is: {ma_norm_loss}. Best norm loss value is: {best_loss_norm}.")
                    print('\nC_PATH mean =', C_PATH.mean(-2))
                    print('\nC_PATH =', C_PATH)
                l1_norm.backward()
                #l2_norm.backward()
            else:
                log_lik = calc_log_lik(C_PATH, T_SPAN_TENSOR.to(DEVICE), DT, I_S_TENSOR.to(DEVICE), I_D_TENSOR.to(DEVICE), TEMP_TENSOR.to(DEVICE), TEMP_REF, DRIFT_DIFFUSION, PARAMS_DICT)
                neg_ELBO = -X0_prior.log_prob(C_PATH[:, 0, :]).sum(-1).mean() - log_lik.mean() - obs_model(C_PATH, PARAMS_DICT_TENSOR) + log_prob.mean()
                best_loss_ELBO = neg_ELBO if neg_ELBO < best_loss_ELBO else best_loss_ELBO
                ELBO_losses.append(neg_ELBO.item())
                if i % 10 == 0:             
                    ma_elbo_loss = sum(ELBO_losses[-10:]) / len(ELBO_losses[-10:])
                    print(f"\nMoving average ELBO loss at {iter} iterations is: {ma_elbo_loss}. Best ELBO loss value is: {best_loss_ELBO}.")
                    print('\nC_PATH mean =', C_PATH.mean(-2))
                    print('\nC_PATH =', C_PATH)
                neg_ELBO.backward()
            torch.nn.utils.clip_grad_norm_(net.parameters(), 3.0)
            optimizer.step()
            if i % 100000 == 0 and i > 0:
                optimizer.param_groups[0]['lr'] *= 0.1
            tq.update()
    return net, ELBO_losses, norm_losses

In [None]:
net, ELBO_losses, norm_losses = train(devi, l_r, niter, piter, batch_size, SDEFlow, ObsModel, csv_to_obs_df, 'y_from_x_t_1000_dt_0-01.csv', obs_error_scale, state_dim_SCON, t, dt_flow, n_flow, t_span_tensor, i_s_tensor, i_d_tensor, temp_tensor, temp_ref, x0_SCON, drift_diffusion_SCON_C, SCON_C_params_dict)


Train Diffusion:   0%|          | 0/5001 [00:00<?, ?it/s][A


Moving average norm loss at <built-in function iter> iterations is: 349720.375. Best norm loss value is: 349720.375.

C_PATH mean = tensor([[0.8584, 0.8820, 0.8543],
        [0.8721, 0.8674, 0.8597],
        [0.8598, 0.8649, 0.8632]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.0218, 0.6919, 0.8867],
         [1.8316, 0.8502, 0.2385],
         [0.1398, 1.1947, 2.1356],
         ...,
         [0.6374, 0.6841, 0.5368],
         [0.6461, 0.5776, 0.6641],
         [0.5732, 0.6179, 0.5520]],

        [[0.4191, 2.0637, 0.9564],
         [0.4066, 2.0826, 2.2696],
         [0.3793, 0.3986, 1.0027],
         ...,
         [0.7034, 0.9312, 0.5633],
         [0.7227, 1.4726, 0.7203],
         [0.6977, 0.8745, 0.6807]],

        [[0.7853, 0.7400, 0.9096],
         [0.6049, 0.3987, 1.9713],
         [2.9533, 1.6398, 0.8749],
         ...,
         [0.7032, 0.6070, 0.7062],
         [1.2515, 0.5188, 0.7244],
         [1.0447, 0.5262, 0.6705]]], grad_fn=<AddBackward0>)



Train Diffusion:   0%|          | 1/5001 [00:08<11:18:59,  8.15s/it][A
Train Diffusion:   0%|          | 2/5001 [00:19<13:54:47, 10.02s/it][A
Train Diffusion:   0%|          | 3/5001 [00:26<12:05:56,  8.71s/it][A
Train Diffusion:   0%|          | 4/5001 [00:34<11:23:01,  8.20s/it][A
Train Diffusion:   0%|          | 5/5001 [00:41<10:59:29,  7.92s/it][A
Train Diffusion:   0%|          | 6/5001 [00:49<10:58:38,  7.91s/it][A
Train Diffusion:   0%|          | 7/5001 [00:59<12:01:45,  8.67s/it][A
Train Diffusion:   0%|          | 8/5001 [01:10<13:02:21,  9.40s/it][A
Train Diffusion:   0%|          | 9/5001 [01:19<12:43:37,  9.18s/it][A
Train Diffusion:   0%|          | 10/5001 [01:27<12:21:28,  8.91s/it][A


Moving average norm loss at <built-in function iter> iterations is: 347886.60625. Best norm loss value is: 347068.9375.

C_PATH mean = tensor([[0.8830, 0.8472, 0.8778],
        [0.8809, 0.8361, 0.8867],
        [0.8848, 0.8455, 0.8766]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[0.6927, 1.4634, 0.5655],
         [0.4717, 1.1919, 0.5402],
         [0.3733, 0.9304, 0.7381],
         ...,
         [1.7922, 1.5345, 0.6658],
         [0.9461, 1.8681, 1.0650],
         [1.8426, 1.0644, 1.7578]],

        [[0.5727, 0.7949, 0.9657],
         [0.8539, 0.7746, 1.1958],
         [1.0880, 1.1378, 1.3254],
         ...,
         [0.6920, 0.9547, 0.6856],
         [1.5384, 0.7290, 1.0364],
         [0.7049, 0.8775, 1.1417]],

        [[1.1351, 0.7101, 1.1230],
         [0.9844, 0.5827, 1.6516],
         [1.4695, 0.5683, 1.2795],
         ...,
         [0.9261, 0.2739, 1.5602],
         [0.6078, 0.6114, 0.5592],
         [0.6799, 0.8383, 0.3778]]], grad_fn=<AddBackward0>)



Train Diffusion:   0%|          | 11/5001 [01:38<13:08:46,  9.48s/it][A
Train Diffusion:   0%|          | 12/5001 [01:47<13:06:40,  9.46s/it][A
Train Diffusion:   0%|          | 13/5001 [01:56<12:37:20,  9.11s/it][A
Train Diffusion:   0%|          | 14/5001 [02:04<12:14:32,  8.84s/it][A
Train Diffusion:   0%|          | 15/5001 [02:13<12:29:43,  9.02s/it][A
Train Diffusion:   0%|          | 16/5001 [02:22<12:12:59,  8.82s/it][A
Train Diffusion:   0%|          | 17/5001 [02:30<12:02:41,  8.70s/it][A
Train Diffusion:   0%|          | 18/5001 [02:40<12:22:56,  8.95s/it][A
Train Diffusion:   0%|          | 19/5001 [02:47<11:38:51,  8.42s/it][A
Train Diffusion:   0%|          | 20/5001 [02:54<11:08:34,  8.05s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 4421611.0. Best ELBO loss value is: 4092925.25.

C_PATH mean = tensor([[0.8884, 0.8610, 0.8861],
        [0.8805, 0.8523, 0.8913],
        [0.8777, 0.8600, 0.8881]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.0057, 0.9813, 0.8984],
         [1.2410, 1.3869, 1.8016],
         [1.3417, 0.8899, 0.6894],
         ...,
         [0.6307, 1.2531, 0.9027],
         [1.4490, 0.7660, 1.2088],
         [1.5966, 0.9884, 1.5641]],

        [[0.9860, 0.6877, 1.3181],
         [0.7738, 0.7099, 0.8185],
         [1.3863, 1.1849, 0.8972],
         ...,
         [1.1020, 0.9128, 0.7485],
         [0.5639, 1.3836, 0.7277],
         [0.9724, 0.7323, 0.9489]],

        [[0.5252, 1.4926, 0.6636],
         [0.6323, 0.6338, 0.6224],
         [0.4264, 0.7404, 1.2991],
         ...,
         [1.0026, 0.6846, 0.7710],
         [1.0451, 0.5054, 0.9791],
         [0.6984, 0.7746, 0.5374]]], grad_fn=<AddBackward0>)



Train Diffusion:   0%|          | 21/5001 [03:01<10:39:34,  7.71s/it][A
Train Diffusion:   0%|          | 22/5001 [03:08<10:37:35,  7.68s/it][A
Train Diffusion:   0%|          | 23/5001 [03:16<10:33:49,  7.64s/it][A
Train Diffusion:   0%|          | 24/5001 [03:24<10:38:31,  7.70s/it][A
Train Diffusion:   0%|          | 25/5001 [03:31<10:37:22,  7.69s/it][A
Train Diffusion:   1%|          | 26/5001 [03:39<10:44:07,  7.77s/it][A
Train Diffusion:   1%|          | 27/5001 [03:48<10:56:38,  7.92s/it][A
Train Diffusion:   1%|          | 28/5001 [03:55<10:47:11,  7.81s/it][A
Train Diffusion:   1%|          | 29/5001 [04:02<10:30:20,  7.61s/it][A
Train Diffusion:   1%|          | 30/5001 [04:10<10:34:26,  7.66s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 4630884.15. Best ELBO loss value is: 4092925.25.

C_PATH mean = tensor([[0.8822, 0.8721, 0.8812],
        [0.8831, 0.8627, 0.8910],
        [0.8827, 0.8628, 0.8815]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.1922, 0.7179, 1.3648],
         [0.4590, 0.5904, 1.3978],
         [0.2464, 0.6945, 1.1175],
         ...,
         [1.0311, 0.8987, 0.9803],
         [0.6873, 0.7459, 0.9495],
         [0.6828, 1.0099, 1.0245]],

        [[0.6893, 1.4771, 0.6778],
         [1.1480, 0.9456, 0.5423],
         [1.4028, 1.0793, 0.8168],
         ...,
         [0.8693, 1.1156, 0.5299],
         [1.0313, 1.0484, 0.7558],
         [1.5176, 0.8006, 1.4204]],

        [[0.6544, 1.0726, 0.8556],
         [1.2595, 0.9379, 1.0622],
         [0.9761, 1.0308, 1.1605],
         ...,
         [1.0043, 0.8342, 0.8967],
         [1.2819, 0.9010, 1.0130],
         [0.6788, 0.8563, 0.5016]]], grad_fn=<AddBackward0>)



Train Diffusion:   1%|          | 31/5001 [04:18<10:39:42,  7.72s/it][A
Train Diffusion:   1%|          | 32/5001 [04:26<10:35:05,  7.67s/it][A
Train Diffusion:   1%|          | 33/5001 [04:33<10:35:36,  7.68s/it][A
Train Diffusion:   1%|          | 34/5001 [04:41<10:43:54,  7.78s/it][A
Train Diffusion:   1%|          | 35/5001 [04:48<10:29:25,  7.60s/it][A
Train Diffusion:   1%|          | 36/5001 [04:56<10:31:02,  7.63s/it][A
Train Diffusion:   1%|          | 37/5001 [05:04<10:29:58,  7.61s/it][A
Train Diffusion:   1%|          | 38/5001 [05:12<10:42:11,  7.76s/it][A
Train Diffusion:   1%|          | 39/5001 [05:19<10:31:12,  7.63s/it][A
Train Diffusion:   1%|          | 40/5001 [05:27<10:30:48,  7.63s/it][A


Moving average ELBO loss at <built-in function iter> iterations is: 4426364.3. Best ELBO loss value is: 4092925.25.

C_PATH mean = tensor([[0.8921, 0.8769, 0.9020],
        [0.8849, 0.8833, 0.8925],
        [0.8984, 0.8855, 0.8970]], grad_fn=<MeanBackward1>)

C_PATH = tensor([[[1.0867, 1.2120, 1.2307],
         [0.9543, 0.5473, 0.7298],
         [1.7225, 0.6783, 0.9732],
         ...,
         [1.1993, 1.3577, 0.9150],
         [0.8375, 1.1138, 0.9630],
         [1.6010, 0.9198, 0.4767]],

        [[0.7167, 0.7134, 0.9377],
         [0.6005, 0.9846, 0.7284],
         [0.9824, 0.9836, 0.8532],
         ...,
         [0.4934, 0.9480, 0.7120],
         [0.8546, 0.9848, 0.9581],
         [1.1981, 0.9045, 1.2523]],

        [[0.6514, 1.3182, 0.6685],
         [1.1249, 1.0530, 2.1717],
         [0.3779, 1.1669, 1.3494],
         ...,
         [1.2633, 0.5675, 1.0718],
         [1.5942, 0.6159, 0.8985],
         [0.4180, 0.7836, 1.0380]]], grad_fn=<AddBackward0>)



Train Diffusion:   1%|          | 41/5001 [05:35<10:52:07,  7.89s/it][A
Train Diffusion:   1%|          | 42/5001 [05:44<11:00:28,  7.99s/it][A
Train Diffusion:   1%|          | 43/5001 [05:51<10:53:50,  7.91s/it][A