In [1]:
from SBM_SDE import *
from obs_and_flow_classes_and_functions import *
from get_CO2 import *
import seaborn as sns
import torch
from torch import nn
import torch.distributions as d
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import math
from tqdm import tqdm
import random
from torch.autograd import Function
import argparse
import os
import sys
from pathlib import Path
import shutil
import pandas as pd

In [2]:
torch.manual_seed(0)
devi = torch.device("".join(["cuda:",f'{cuda_id}']) if torch.cuda.is_available() else "cpu")

cuda_id = 1
dt = .2 #SDE discretization timestep.
t = 80 #Simulation run for T hours.
n = int(t / dt) 
t_span = np.linspace(0, t, n + 1)
t_span_tensor = torch.reshape(torch.Tensor(t_span), [1, n + 1, 1]) #T_span needs to be converted to tensor object. Additionally, facilitates conversion of I_S and I_D to tensor objects.
l_r = 1e-3
niter = 1001
piter = 500
batch_size = 2 #Number of sets of observation outputs to sample per set of parameters.
state_dim_SCON = 3 #Not including CO2 in STATE_DIM, because CO2 is an observation.
state_dim_SAWB = 4 #Not including CO2 in STATE_DIM, because CO2 is an observation.

In [3]:
temp_ref = 283

#System parameters from deterministic CON model
u_M = 0.002
a_SD = 0.33
a_DS = 0.33
a_M = 0.33
a_MSC = 0.5
k_S_ref = 0.000025
k_D_ref = 0.005
k_M_ref = 0.0002
Ea_S = 75
Ea_D = 50
Ea_M = 50

#SCON diffusion matrix sigma scale parameters
c_SOC = 1.
c_DOC = 0.01
c_MBC = 0.1
s_SOC = 0.01
s_DOC = 0.01
s_MBC = 0.01

SCON_C_params_dict = {'u_M': u_M, 'a_SD': a_SD, 'a_DS': a_DS, 'a_M': a_M, 'a_MSC': a_MSC, 'k_S_ref': k_S_ref, 'k_D_ref': k_D_ref, 'k_M_ref': k_M_ref, 'Ea_S': Ea_S, 'Ea_D': Ea_D, 'Ea_M': Ea_M, 'c_SOC': c_SOC, 'c_DOC': c_DOC, 'c_MBC': c_MBC}
SCON_SS_params_dict = {'u_M': u_M, 'a_SD': a_SD, 'a_DS': a_DS, 'a_M': a_M, 'a_MSC': a_MSC, 'k_S_ref': k_S_ref, 'k_D_ref': k_D_ref, 'k_M_ref': k_M_ref, 'Ea_S': Ea_S, 'Ea_D': Ea_D, 'Ea_M': Ea_M, 's_SOC': s_SOC, 's_DOC': s_DOC, 's_MBC': s_MBC}

In [4]:
#System parameters from deterministic AWB model
u_Q_ref = 0.2
Q = 0.002
a_MSA = 0.5
K_D = 200
K_U = 1
V_D_ref = 0.4
V_U_ref = 0.02
Ea_V_D = 75
Ea_V_U = 50
r_M = 0.0004
r_E = 0.00001
r_L = 0.0005

#SAWB diffusion matrix sigma scale parameters
c_SOC = 1.
c_DOC = 0.01
c_MBC = 0.1
c_EEC = 0.001
s_SOC = 0.01
s_DOC = 0.01
s_MBC = 0.01
s_EEC = 0.01

SAWB_C_params_dict = {'u_Q_ref': u_Q_ref, 'Q': Q, 'a_MSA': a_MSA, 'K_D': K_D, 'K_U': K_U, 'V_D_ref': V_D_ref, 'V_U_ref': V_U_ref, 'Ea_V_D': Ea_V_D, 'Ea_V_U': Ea_V_U, 'r_M': r_M, 'r_E': r_E, 'r_L': r_L, 'c_SOC': c_SOC, 'c_DOC': c_DOC, 'c_MBC': c_MBC, 'c_EEC': c_EEC}
SAWB_SS_params_dict = {'u_Q_ref': u_Q_ref, 'Q': Q, 'a_MSA': a_MSA, 'K_D': K_D, 'K_U': K_U, 'V_D_ref': V_D_ref, 'V_U_ref': V_U_ref, 'Ea_V_D': Ea_V_D, 'Ea_V_U': Ea_V_U, 'r_M': r_M, 'r_E': r_E, 'r_L': r_L, 's_SOC': s_SOC, 's_DOC': s_DOC, 's_MBC': s_MBC, 's_EEC': s_EEC}

In [5]:
#System parameters from deterministic model
u_Q_ref = 0.2
Q = 0.002
a_MSA = 0.5
K_DE = 200
K_UE = 1
V_DE_ref = 0.4
V_UE_ref = 0.02
Ea_V_DE = 75
Ea_V_UE = 50
r_M = 0.0004
r_E = 0.00001
r_L = 0.0005

#Diffusion matrix sigma scale parameters
c_SOC = 1.
c_DOC = 0.01
c_MBC = 0.1
c_EEC = 0.001
s_SOC = 0.01
s_DOC = 0.01
s_MBC = 0.01
s_EEC = 0.01

SAWB_ECA_C_params_dict = {'u_Q_ref': u_Q_ref, 'Q': Q, 'a_MSA': a_MSA, 'K_DE': K_DE, 'K_UE': K_UE, 'V_DE_ref': V_DE_ref, 'V_UE_ref': V_UE_ref, 'Ea_V_DE': Ea_V_DE, 'Ea_V_UE': Ea_V_UE, 'r_M': r_M, 'r_E': r_E, 'r_L': r_L, 'c_SOC': c_SOC, 'c_DOC': c_DOC, 'c_MBC': c_MBC, 'c_EEC': c_EEC}
SAWB_ECA_SS_params_dict = {'u_Q_ref': u_Q_ref, 'Q': Q, 'a_MSA': a_MSA, 'K_DE': K_DE, 'K_UE': K_UE, 'V_DE_ref': V_DE_ref, 'V_UE_ref': V_UE_ref, 'Ea_V_DE': Ea_V_DE, 'Ea_V_UE': Ea_V_UE, 'r_M': r_M, 'r_E': r_E, 'r_L': r_L, 's_SOC': s_SOC, 's_DOC': s_DOC, 's_MBC': s_MBC, 's_EEC': s_EEC}

In [6]:
#Obtain SOC and DOC pool litter inputs for all SBMs.
i_s_tensor = 0.001 + 0.0005 * torch.sin((2 * np.pi / (24 * 365)) * t_span_tensor) #Exogenous SOC input function
i_d_tensor = 0.0001 + 0.00005 * torch.sin((2 * np.pi / (24 * 365)) * t_span_tensor) #Exogenous DOC input function

In [7]:
def neg_log_lik(C_PATH, T_SPAN_TENSOR, DT, I_S_TENSOR, I_D_TENSOR, DRIFT_DIFFUSION, PARAMS_DICT, TEMP_GEN, TEMP_REF):
    drift, diffusion_sqrt = DRIFT_DIFFUSION(C_PATH[:, :-1, :], T_SPAN_TENSOR[:, :-1, :], I_S_TENSOR[:, :-1, :], I_D_TENSOR[:, :-1, :], PARAMS_DICT, TEMP_GEN, TEMP_REF)
    euler_maruyama_state_sample_object = d.multivariate_normal.MultivariateNormal(loc = C_PATH[:, :-1, :] + drift * DT, scale_tril = diffusion_sqrt * math.sqrt(DT))
    return -euler_maruyama_state_sample_object.log_prob(C_PATH[:, 1:, :]).sum(-1)

In [8]:
def train(DEVICE, L_R, NITER, PRETRAIN_ITER, BATCH_SIZE, ObsModel, csv_to_obs_df, DATA_CSV, OBS_ERROR_SCALE, STATE_DIM, T, DT, N, T_SPAN_TENSOR, I_S_TENSOR, I_D_TENSOR, DRIFT_DIFFUSION, PARAMS_DICT, TEMP_GEN, TEMP_REF, ANALYTICAL_STEADY_STATE_INIT, GET_CO2):
    obs_times, obs_means, obs_error = csv_to_obs_df(DATA_CSV, STATE_DIM + 1, T, OBS_ERROR_SCALE) #Need to +1 because data has CO2 observations on top of other state observations. 
    obs_model_no_CO2 = ObsModel(DEVICE, obs_times, DT, obs_means[:-1, :], obs_error[:, :-1]) #Hack for bypassing ObsModel and SDEFlow dimension mismatch issue.
    obs_model_CO2 = ObsModel(DEVICE, obs_times, DT, obs_means, obs_error)
    net = SDEFlow(DEVICE, BATCH_SIZE, obs_model_no_CO2, STATE_DIM, T, DT, N).to(DEVICE)
    optimizer = optim.Adam(net.parameters(), lr = L_R) 
    if PRETRAIN_ITER >= NITER:
        raise Exception("PRETRAIN_ITER must be < NITER.")
    best_loss_norm = 1e10
    best_loss_ELBO = 1e20
    norm_losses = [best_loss_norm] * 10
    ELBO_losses = [best_loss_ELBO] * 10
    C0 = ANALYTICAL_STEADY_STATE_INIT(I_S_TENSOR[0, 0, 0].item(), I_D_TENSOR[0, 0, 0].item(), PARAMS_DICT) #Calculate deterministic initial conditions.
    C0 = C0[(None,) * 2].repeat(BATCH_SIZE, 1, 1).to(DEVICE) #Assign initial conditions to C_PATH.
    with tqdm(total = NITER, desc = f'Train Diffusion', position = -1) as tq:
        for iter in range(NITER):
            net.train()
            optimizer.zero_grad()
            C_PATH, log_prob = net() #Obtain paths with solutions at times after t0.
            C_PATH = torch.cat([C0, C_PATH], 1) #Append deterministic CON initial conditions conditional on parameter values to C path. 
            CO2 = GET_CO2(C_PATH, T_SPAN_TENSOR, PARAMS_DICT, TEMP_GEN, TEMP_REF)
            x_with_CO2 = torch.cat([C_PATH, CO2], -1)
            if iter <= PRETRAIN_ITER:
                l1_norm_element = x_with_CO2 - torch.mean(obs_model_CO2.mu, -1)
                l1_norm = torch.sum(torch.abs(l1_norm_element)).mean()
                best_loss_norm = l1_norm if l1_norm < best_loss_norm else best_loss_norm
                norm_losses.append(l1_norm.item())
                #l2_norm_element = C_PATH - torch.mean(OBS_MODEL.mu, -1)
                #l2_norm = torch.sqrt(torch.sum(torch.square(l2_norm_element))).mean()
                #best_loss_norm = l2_norm if l2_norm < best_loss_norm else best_loss_norm
                #l2_norm.backward()
                #norm_losses.append(l2_norm.item())
                if len(norm_losses) > 10:
                    norm_losses.pop(0)
                if iter % 10 == 0:
                    print(f"Moving average norm loss at {iter} iterations is: {sum(norm_losses) / len(norm_losses)}. Best norm loss value is: {best_loss_norm}.")
                    print('\nx with CO2 means across time =', x_with_CO2.mean(-2))
                    print('\nx with CO2 =', x_with_CO2)
                l1_norm.backward()
            else:
                log_lik = neg_log_lik(C_PATH, T_SPAN_TENSOR.to(DEVICE), dt, I_S_TENSOR.to(DEVICE), I_D_TENSOR.to(DEVICE), DRIFT_DIFFUSION, PARAMS_DICT, TEMP_GEN, TEMP_REF)
                ELBO = log_prob.mean() + log_lik.mean() - obs_model_CO2(x_with_CO2) #obs_model_CO2(x_with_CO2) is obs log likelihood.
                best_loss_ELBO = ELBO if ELBO < best_loss_ELBO else best_loss_ELBO
                ELBO_losses.append(ELBO.item())
                if len(ELBO_losses) > 10:
                    ELBO_losses.pop(0)
                if iter % 10 == 0:
                    print(f"Moving average ELBO loss at {iter} iterations is: {sum(ELBO_losses) / len(ELBO_losses)}. Best ELBO loss value is: {best_loss_ELBO}.")
                    print('\nx with CO2 means across time =', x_with_CO2.mean(-2))
                    print('\nx with CO2 =', x_with_CO2)
                ELBO.backward()
            torch.nn.utils.clip_grad_norm_(net.parameters(), 3.0)
            optimizer.step()
            if iter % 100000 == 0 and iter > 0:
                optimizer.param_groups[0]['lr'] *= 0.1
            tq.update()

In [9]:
train(devi, l_r, niter, piter, batch_size, ObsModel, csv_to_obs_df, 'CON_synthetic_sol_df.csv', 0.1, state_dim_SCON, t, dt, n, t_span_tensor, i_s_tensor, i_d_tensor, drift_diffusion_SCON_C, SCON_C_params_dict, temp_gen, temp_ref, analytical_steady_state_init_CON, get_CO2_CON)


Train Diffusion:   0%|          | 0/1001 [00:00<?, ?it/s][A

Moving average norm loss at 0 iterations is: 9000003631.015234. Best norm loss value is: 36310.15234375.

x with CO2 means across time = tensor([[1.0388, 0.6439, 0.6681, 0.0028],
        [1.0574, 0.6404, 0.6657, 0.0027]], grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [7.6812e-01, 1.2433e+00, 8.2550e-01, 4.4609e-03],
         [9.1740e-01, 5.4951e-01, 5.9819e-01, 2.0950e-03],
         ...,
         [3.6600e-01, 7.6164e-01, 8.0427e-01, 5.3980e-03],
         [8.6389e-01, 7.4108e-01, 6.9077e-01, 5.1695e-03],
         [4.5717e-01, 5.6408e-01, 5.7508e-01, 3.8701e-03]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [5.5086e-01, 6.4781e-01, 6.5011e-01, 2.3576e-03],
         [1.0429e+00, 8.7233e-01, 5.4592e-01, 3.2595e-03],
         ...,
         [9.3240e-01, 4.2804e-01, 6.5494e-01, 3.1237e-03],
         [8.1329e-01, 5.3503e-01, 6.4596e-01, 3.7805e-03],
         [1.4088e+00, 7.0313e-01, 4.7968e-01, 4.8005e-03]]],


Train Diffusion:   0%|          | 1/1001 [00:00<07:00,  2.38it/s][A
Train Diffusion:   0%|          | 2/1001 [00:00<07:03,  2.36it/s][A
Train Diffusion:   0%|          | 3/1001 [00:01<07:01,  2.37it/s][A
Train Diffusion:   0%|          | 4/1001 [00:01<07:00,  2.37it/s][A
Train Diffusion:   0%|          | 5/1001 [00:02<06:57,  2.38it/s][A
Train Diffusion:   1%|          | 6/1001 [00:02<06:57,  2.39it/s][A
Train Diffusion:   1%|          | 7/1001 [00:02<06:54,  2.40it/s][A
Train Diffusion:   1%|          | 8/1001 [00:03<06:57,  2.38it/s][A
Train Diffusion:   1%|          | 9/1001 [00:03<06:45,  2.45it/s][A
Train Diffusion:   1%|          | 10/1001 [00:04<06:29,  2.55it/s][A

Moving average norm loss at 10 iterations is: 33974.274609375. Best norm loss value is: 32688.54296875.

x with CO2 means across time = tensor([[4.8656e+00, 1.1136e-01, 4.8638e-01, 6.5778e-04],
        [5.5126e+00, 1.0907e-01, 4.7853e-01, 6.5294e-04]],
       grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.7205e-01, 2.6880e+00, 1.9677e-01, 9.3983e-03],
         [1.1084e+00, 2.9781e-01, 2.5177e-01, 1.1364e-03],
         ...,
         [6.7151e+00, 2.2825e-01, 6.8877e-01, 2.0575e-03],
         [1.0613e+00, 1.8272e-01, 4.9136e-01, 1.4002e-03],
         [5.2271e+00, 1.9681e-01, 1.5977e-01, 1.5720e-03]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [6.9949e-01, 9.4035e-01, 2.5065e-01, 3.3239e-03],
         [4.0243e-01, 3.7797e-01, 1.9035e-01, 1.4047e-03],
         ...,
         [7.5101e-01, 1.9064e-01, 6.3985e-01, 1.5018e-03],
         [5.8079e+00, 2.4568e-01, 6.0935e-01, 2.0752e-03],
         [1.5569e-01, 


Train Diffusion:   1%|          | 11/1001 [00:04<06:21,  2.59it/s][A
Train Diffusion:   1%|          | 12/1001 [00:04<06:21,  2.59it/s][A
Train Diffusion:   1%|▏         | 13/1001 [00:05<06:15,  2.63it/s][A
Train Diffusion:   1%|▏         | 14/1001 [00:05<06:03,  2.72it/s][A
Train Diffusion:   1%|▏         | 15/1001 [00:05<05:54,  2.78it/s][A
Train Diffusion:   2%|▏         | 16/1001 [00:06<06:06,  2.69it/s][A
Train Diffusion:   2%|▏         | 17/1001 [00:06<05:54,  2.78it/s][A
Train Diffusion:   2%|▏         | 18/1001 [00:06<05:45,  2.84it/s][A
Train Diffusion:   2%|▏         | 19/1001 [00:07<05:49,  2.81it/s][A
Train Diffusion:   2%|▏         | 20/1001 [00:07<05:45,  2.84it/s][A

Moving average norm loss at 20 iterations is: 31262.97421875. Best norm loss value is: 30067.267578125.

x with CO2 means across time = tensor([[8.7368e+00, 5.3240e-02, 3.8904e-01, 4.9786e-04],
        [8.3068e+00, 5.2077e-02, 3.8780e-01, 4.7882e-04]],
       grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [8.5007e-01, 6.3069e-01, 1.2170e-01, 2.2296e-03],
         [6.9072e-01, 6.0472e-02, 1.6473e-01, 2.5600e-04],
         ...,
         [9.7380e-01, 1.5531e-01, 6.0922e-01, 1.2648e-03],
         [9.6792e+00, 1.6689e-01, 4.4426e-01, 1.6878e-03],
         [7.2228e+00, 1.3986e-01, 9.1363e-02, 1.2722e-03]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [3.2841e-01, 2.6254e-01, 1.6757e-01, 9.4399e-04],
         [3.9100e+00, 6.0573e-02, 1.2003e-01, 3.1054e-04],
         ...,
         [1.1398e+01, 1.0930e-01, 5.9716e-01, 1.4522e-03],
         [2.0155e+00, 9.8722e-02, 3.3043e-01, 8.4176e-04],
         [5.2151e-01, 


Train Diffusion:   2%|▏         | 21/1001 [00:08<05:47,  2.82it/s][A
Train Diffusion:   2%|▏         | 22/1001 [00:08<06:04,  2.68it/s][A
Train Diffusion:   2%|▏         | 23/1001 [00:08<06:03,  2.69it/s][A
Train Diffusion:   2%|▏         | 24/1001 [00:09<06:09,  2.65it/s][A
Train Diffusion:   2%|▏         | 25/1001 [00:09<05:53,  2.76it/s][A
Train Diffusion:   3%|▎         | 26/1001 [00:09<05:42,  2.85it/s][A
Train Diffusion:   3%|▎         | 27/1001 [00:10<05:34,  2.91it/s][A
Train Diffusion:   3%|▎         | 28/1001 [00:10<05:46,  2.81it/s][A
Train Diffusion:   3%|▎         | 29/1001 [00:10<05:43,  2.83it/s][A
Train Diffusion:   3%|▎         | 30/1001 [00:11<05:42,  2.83it/s][A

Moving average norm loss at 30 iterations is: 28412.9662109375. Best norm loss value is: 27063.833984375.

x with CO2 means across time = tensor([[1.2460e+01, 2.7428e-02, 3.6443e-01, 4.8152e-04],
        [1.2129e+01, 2.8175e-02, 3.6240e-01, 4.6947e-04]],
       grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [3.4143e-01, 2.0451e-01, 1.0073e-01, 7.3271e-04],
         [1.3661e+00, 1.3973e-02, 1.1767e-01, 9.3422e-05],
         ...,
         [1.5224e+01, 8.7537e-02, 5.5401e-01, 1.4774e-03],
         [1.2140e+01, 1.1375e-01, 3.1955e-01, 1.4158e-03],
         [1.3706e-01, 9.1822e-02, 7.9429e-02, 6.2914e-04]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [1.7522e+00, 3.2015e-01, 1.0127e-01, 1.1608e-03],
         [8.0615e+00, 2.0581e-02, 7.8692e-02, 2.3786e-04],
         ...,
         [1.5087e+00, 8.4864e-02, 6.0539e-01, 8.1205e-04],
         [1.2834e+00, 7.0147e-02, 4.2489e-01, 6.4184e-04],
         [7.5742e+00


Train Diffusion:   3%|▎         | 31/1001 [00:11<05:42,  2.83it/s][A
Train Diffusion:   3%|▎         | 32/1001 [00:12<06:05,  2.65it/s][A
Train Diffusion:   3%|▎         | 33/1001 [00:12<06:01,  2.68it/s][A
Train Diffusion:   3%|▎         | 34/1001 [00:12<05:47,  2.78it/s][A
Train Diffusion:   3%|▎         | 35/1001 [00:13<05:37,  2.86it/s][A
Train Diffusion:   4%|▎         | 36/1001 [00:13<05:30,  2.92it/s][A
Train Diffusion:   4%|▎         | 37/1001 [00:13<05:32,  2.90it/s][A
Train Diffusion:   4%|▍         | 38/1001 [00:14<05:33,  2.89it/s][A
Train Diffusion:   4%|▍         | 39/1001 [00:14<05:26,  2.94it/s][A
Train Diffusion:   4%|▍         | 40/1001 [00:14<05:22,  2.98it/s][A
Train Diffusion:   4%|▍         | 41/1001 [00:15<05:20,  3.00it/s][A

Moving average norm loss at 40 iterations is: 25304.7279296875. Best norm loss value is: 23888.33203125.

x with CO2 means across time = tensor([[1.6617e+01, 1.5804e-02, 3.7271e-01, 5.3385e-04],
        [1.5940e+01, 1.5595e-02, 3.7411e-01, 5.1817e-04]],
       grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.8825e+00, 1.8646e-01, 6.1044e-02, 7.0942e-04],
         [1.1736e+01, 5.4129e-03, 7.1144e-02, 2.5104e-04],
         ...,
         [1.8302e+01, 6.1082e-02, 6.2614e-01, 1.4660e-03],
         [2.4823e+00, 7.8488e-02, 4.3334e-01, 7.5620e-04],
         [1.0895e+01, 6.3089e-02, 6.7169e-02, 9.3092e-04]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.6903e-01, 1.4863e-01, 8.9102e-02, 5.3509e-04],
         [1.9910e+00, 6.9416e-03, 8.7856e-02, 7.5396e-05],
         ...,
         [2.3850e-01, 5.9387e-02, 5.5409e-01, 5.6424e-04],
         [2.0798e+01, 5.4643e-02, 3.6485e-01, 1.4411e-03],
         [2.0430e-01,


Train Diffusion:   4%|▍         | 42/1001 [00:15<05:17,  3.02it/s][A
Train Diffusion:   4%|▍         | 43/1001 [00:15<05:20,  2.98it/s][A
Train Diffusion:   4%|▍         | 44/1001 [00:16<05:18,  3.00it/s][A
Train Diffusion:   4%|▍         | 45/1001 [00:16<05:30,  2.90it/s][A
Train Diffusion:   5%|▍         | 46/1001 [00:16<05:37,  2.83it/s][A
Train Diffusion:   5%|▍         | 47/1001 [00:17<05:33,  2.86it/s][A
Train Diffusion:   5%|▍         | 48/1001 [00:17<05:29,  2.89it/s][A
Train Diffusion:   5%|▍         | 49/1001 [00:17<05:23,  2.94it/s][A
Train Diffusion:   5%|▍         | 50/1001 [00:18<05:19,  2.98it/s][A
Train Diffusion:   5%|▌         | 51/1001 [00:18<05:16,  3.00it/s][A

Moving average norm loss at 50 iterations is: 22370.0576171875. Best norm loss value is: 21317.951171875.

x with CO2 means across time = tensor([[1.9565e+01, 1.0502e-02, 4.5760e-01, 5.9348e-04],
        [2.0442e+01, 1.0461e-02, 4.5790e-01, 6.2866e-04]],
       grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.1233e+00, 1.6393e-01, 8.5171e-02, 6.2079e-04],
         [1.7943e+00, 1.2665e-03, 8.3128e-02, 5.0443e-05],
         ...,
         [2.7971e+01, 4.7005e-02, 7.5884e-01, 1.8726e-03],
         [3.0945e+00, 6.8149e-02, 4.3854e-01, 7.1742e-04],
         [1.3540e+01, 7.4173e-02, 5.5546e-02, 1.1218e-03]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.8695e-01, 1.0359e-01, 5.9860e-02, 3.7440e-04],
         [1.5068e+01, 1.5479e-03, 7.2847e-02, 3.0006e-04],
         ...,
         [5.0660e-01, 6.1937e-02, 7.3522e-01, 6.4355e-04],
         [2.5277e+01, 4.9931e-02, 5.5752e-01, 1.6720e-03],
         [6.3336e-02


Train Diffusion:   5%|▌         | 52/1001 [00:18<05:14,  3.01it/s][A
Train Diffusion:   5%|▌         | 53/1001 [00:19<05:18,  2.97it/s][A
Train Diffusion:   5%|▌         | 54/1001 [00:19<05:14,  3.01it/s][A
Train Diffusion:   5%|▌         | 55/1001 [00:19<05:12,  3.03it/s][A
Train Diffusion:   6%|▌         | 56/1001 [00:20<05:14,  3.01it/s][A
Train Diffusion:   6%|▌         | 57/1001 [00:20<05:44,  2.74it/s][A
Train Diffusion:   6%|▌         | 58/1001 [00:20<05:36,  2.81it/s][A
Train Diffusion:   6%|▌         | 59/1001 [00:21<05:32,  2.84it/s][A
Train Diffusion:   6%|▌         | 60/1001 [00:21<05:33,  2.83it/s][A

Moving average norm loss at 60 iterations is: 20616.405859375. Best norm loss value is: 20187.146484375.

x with CO2 means across time = tensor([[2.1968e+01, 1.3969e-02, 6.1936e-01, 7.1450e-04],
        [2.3943e+01, 1.4159e-02, 6.1276e-01, 7.4814e-04]],
       grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [3.9979e+00, 5.5171e-02, 1.6973e-01, 2.8693e-04],
         [2.1652e+01, 1.0426e-04, 1.5712e-01, 4.3109e-04],
         ...,
         [2.7086e-01, 1.1015e-01, 1.1731e+00, 1.0777e-03],
         [3.7855e+00, 1.5064e-01, 6.5794e-01, 1.3589e-03],
         [1.6665e+01, 1.6211e-01, 7.5199e-02, 1.8469e-03]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.4168e-01, 5.8617e-02, 1.8422e-01, 2.3421e-04],
         [3.0466e+00, 9.6723e-05, 1.9290e-01, 8.5707e-05],
         ...,
         [3.0835e+01, 8.6429e-02, 7.1056e-01, 2.2648e-03],
         [3.1806e+01, 9.5659e-02, 5.9557e-01, 2.2946e-03],
         [7.0559e-02,


Train Diffusion:   6%|▌         | 61/1001 [00:21<05:32,  2.82it/s][A
Train Diffusion:   6%|▌         | 62/1001 [00:22<05:38,  2.78it/s][A
Train Diffusion:   6%|▋         | 63/1001 [00:22<05:41,  2.75it/s][A
Train Diffusion:   6%|▋         | 64/1001 [00:23<05:35,  2.79it/s][A
Train Diffusion:   6%|▋         | 65/1001 [00:23<05:26,  2.87it/s][A
Train Diffusion:   7%|▋         | 66/1001 [00:23<05:19,  2.93it/s][A
Train Diffusion:   7%|▋         | 67/1001 [00:24<05:16,  2.95it/s][A
Train Diffusion:   7%|▋         | 68/1001 [00:24<05:12,  2.98it/s][A
Train Diffusion:   7%|▋         | 69/1001 [00:24<05:10,  3.00it/s][A
Train Diffusion:   7%|▋         | 70/1001 [00:25<05:25,  2.86it/s][A

Moving average norm loss at 70 iterations is: 19696.4064453125. Best norm loss value is: 19245.73046875.

x with CO2 means across time = tensor([[2.5475e+01, 2.2061e-02, 5.7662e-01, 8.5125e-04],
        [2.2398e+01, 2.8822e-02, 5.8963e-01, 7.4460e-04]],
       grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [1.9723e-01, 4.6436e-01, 3.4722e-01, 1.6699e-03],
         [5.8287e+00, 1.8124e-04, 2.2386e-01, 1.4292e-04],
         ...,
         [2.0273e+00, 1.2382e-01, 8.9120e-01, 1.1786e-03],
         [3.5676e+01, 1.8871e-01, 7.1165e-01, 3.1287e-03],
         [2.7385e+01, 3.3292e-01, 4.1331e-01, 3.5471e-03]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [1.0689e+01, 3.3131e+00, 1.8414e-01, 1.1760e-02],
         [2.8331e+01, 1.3585e-04, 2.5707e-01, 5.7154e-04],
         ...,
         [3.3974e+01, 9.8333e-02, 6.7522e-01, 2.4872e-03],
         [7.3778e+00, 1.5147e-01, 6.0775e-01, 1.5202e-03],
         [2.9294e-01,


Train Diffusion:   7%|▋         | 71/1001 [00:25<05:26,  2.85it/s][A
Train Diffusion:   7%|▋         | 72/1001 [00:25<05:32,  2.79it/s][A
Train Diffusion:   7%|▋         | 73/1001 [00:26<05:47,  2.67it/s][A
Train Diffusion:   7%|▋         | 74/1001 [00:26<05:43,  2.70it/s][A
Train Diffusion:   7%|▋         | 75/1001 [00:26<05:33,  2.77it/s][A
Train Diffusion:   8%|▊         | 76/1001 [00:27<05:30,  2.80it/s][A
Train Diffusion:   8%|▊         | 77/1001 [00:27<05:44,  2.69it/s][A
Train Diffusion:   8%|▊         | 78/1001 [00:28<05:42,  2.69it/s][A
Train Diffusion:   8%|▊         | 79/1001 [00:28<05:29,  2.80it/s][A
Train Diffusion:   8%|▊         | 80/1001 [00:28<05:21,  2.87it/s][A
Train Diffusion:   8%|▊         | 81/1001 [00:29<05:16,  2.90it/s]

Moving average norm loss at 80 iterations is: 18449.93671875. Best norm loss value is: 17727.07421875.

x with CO2 means across time = tensor([[2.5043e+01, 1.3156e-02, 5.8627e-01, 7.6745e-04],
        [2.5477e+01, 1.4455e-02, 5.8184e-01, 7.9704e-04]],
       grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [3.8423e-01, 4.6165e-01, 1.3896e-01, 1.6347e-03],
         [3.2712e+01, 3.7882e-05, 1.8141e-01, 6.4275e-04],
         ...,
         [3.6449e+01, 1.3533e-01, 7.8955e-01, 2.8883e-03],
         [3.9906e+01, 1.5955e-01, 6.8292e-01, 3.1255e-03],
         [3.0850e+01, 1.4690e-01, 3.0992e-01, 2.4593e-03]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [1.6053e+01, 6.8352e-01, 2.3525e-01, 2.6997e-03],
         [9.6039e+00, 1.1151e-05, 1.7143e-01, 2.0583e-04],
         ...,
         [7.0592e+00, 6.2668e-02, 6.5565e-01, 9.4279e-04],
         [1.1646e+01, 9.9109e-02, 5.9915e-01, 1.3695e-03],
         [2.5003e+00, 1

[A
Train Diffusion:   8%|▊         | 82/1001 [00:29<05:24,  2.83it/s][A
Train Diffusion:   8%|▊         | 83/1001 [00:29<05:27,  2.80it/s][A
Train Diffusion:   8%|▊         | 84/1001 [00:30<05:24,  2.83it/s][A
Train Diffusion:   8%|▊         | 85/1001 [00:30<05:21,  2.85it/s][A
Train Diffusion:   9%|▊         | 86/1001 [00:30<05:19,  2.87it/s][A
Train Diffusion:   9%|▊         | 87/1001 [00:31<05:12,  2.92it/s][A
Train Diffusion:   9%|▉         | 88/1001 [00:31<05:07,  2.96it/s][A
Train Diffusion:   9%|▉         | 89/1001 [00:31<05:04,  2.99it/s][A
Train Diffusion:   9%|▉         | 90/1001 [00:32<05:02,  3.01it/s][A
Train Diffusion:   9%|▉         | 91/1001 [00:32<05:04,  2.99it/s][A

Moving average norm loss at 90 iterations is: 16650.37998046875. Best norm loss value is: 15683.0537109375.

x with CO2 means across time = tensor([[2.8151e+01, 6.9258e-03, 5.7306e-01, 8.2520e-04],
        [2.6146e+01, 7.4788e-03, 5.6792e-01, 7.6374e-04]],
       grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [1.7451e+00, 9.0345e-02, 1.1230e-01, 3.6146e-04],
         [4.0687e+01, 1.6901e-06, 1.9979e-01, 7.9554e-04],
         ...,
         [1.3124e+01, 7.3481e-02, 6.9477e-01, 1.3191e-03],
         [1.7178e+01, 5.9834e-02, 6.1763e-01, 1.3728e-03],
         [2.9519e+01, 6.9066e-02, 1.4799e-01, 1.8454e-03]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.2525e+01, 1.1827e-01, 1.4219e-01, 8.3213e-04],
         [1.6311e+01, 7.7705e-07, 7.7054e-02, 3.1848e-04],
         ...,
         [3.7859e+01, 4.6090e-02, 6.1892e-01, 2.3052e-03],
         [4.3157e+01, 9.3230e-02, 5.5224e-01, 2.8013e-03],
         [7.0890e+


Train Diffusion:   9%|▉         | 92/1001 [00:32<05:01,  3.01it/s][A
Train Diffusion:   9%|▉         | 93/1001 [00:33<05:05,  2.97it/s][A
Train Diffusion:   9%|▉         | 94/1001 [00:33<05:03,  2.99it/s][A
Train Diffusion:   9%|▉         | 95/1001 [00:33<04:59,  3.02it/s][A
Train Diffusion:  10%|▉         | 96/1001 [00:34<04:57,  3.04it/s][A
Train Diffusion:  10%|▉         | 97/1001 [00:34<04:55,  3.06it/s][A
Train Diffusion:  10%|▉         | 98/1001 [00:34<04:54,  3.07it/s][A
Train Diffusion:  10%|▉         | 99/1001 [00:35<04:53,  3.07it/s][A
Train Diffusion:  10%|▉         | 100/1001 [00:35<04:51,  3.09it/s][A
Train Diffusion:  10%|█         | 101/1001 [00:35<04:51,  3.09it/s][A

Moving average norm loss at 100 iterations is: 14366.8494140625. Best norm loss value is: 13273.453125.

x with CO2 means across time = tensor([[2.9225e+01, 5.4169e-03, 4.9959e-01, 8.1993e-04],
        [3.0821e+01, 3.0948e-03, 5.0382e-01, 8.4742e-04]],
       grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [5.0154e+00, 1.2036e+00, 5.1424e-02, 4.2901e-03],
         [4.6740e+01, 1.2881e-06, 6.4309e-02, 8.8995e-04],
         ...,
         [4.0805e+01, 1.6464e-02, 4.0541e-01, 2.1885e-03],
         [2.4481e+01, 1.7271e-02, 4.0351e-01, 1.3755e-03],
         [3.4474e+01, 3.3804e-02, 8.9134e-02, 1.8260e-03]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.7030e+01, 2.5372e-01, 5.0104e-02, 1.3713e-03],
         [2.3068e+01, 6.3018e-07, 4.5715e-02, 4.4126e-04],
         ...,
         [1.8819e+01, 2.2454e-02, 6.1797e-01, 1.2269e-03],
         [4.6281e+01, 3.2454e-02, 5.2472e-01, 2.5355e-03],
         [1.1507e+01, 


Train Diffusion:  10%|█         | 102/1001 [00:36<04:53,  3.07it/s][A
Train Diffusion:  10%|█         | 103/1001 [00:36<04:57,  3.02it/s][A
Train Diffusion:  10%|█         | 104/1001 [00:36<04:55,  3.04it/s][A
Train Diffusion:  10%|█         | 105/1001 [00:37<04:54,  3.05it/s][A
Train Diffusion:  11%|█         | 106/1001 [00:37<04:58,  3.00it/s][A
Train Diffusion:  11%|█         | 107/1001 [00:37<05:12,  2.86it/s][A
Train Diffusion:  11%|█         | 108/1001 [00:38<05:10,  2.88it/s][A
Train Diffusion:  11%|█         | 109/1001 [00:38<05:07,  2.91it/s][A
Train Diffusion:  11%|█         | 110/1001 [00:38<05:03,  2.94it/s][A

Moving average norm loss at 110 iterations is: 11874.2267578125. Best norm loss value is: 10740.2216796875.

x with CO2 means across time = tensor([[3.3037e+01, 1.4516e-03, 4.2502e-01, 8.8335e-04],
        [3.3283e+01, 3.0625e-03, 4.2057e-01, 8.9429e-04]],
       grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.6219e+01, 2.2415e-01, 1.3844e-02, 1.2488e-03],
         [4.5765e+01, 3.5613e-07, 1.4843e-02, 8.6440e-04],
         ...,
         [2.5234e+01, 4.7481e-03, 3.6193e-01, 1.3467e-03],
         [2.8840e+01, 8.7599e-03, 3.6957e-01, 1.5149e-03],
         [3.1992e+01, 1.7349e-02, 5.0097e-02, 1.5941e-03]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [7.8260e+00, 8.5887e-01, 1.8789e-02, 3.1343e-03],
         [2.6618e+01, 5.1044e-07, 2.1283e-02, 5.0458e-04],
         ...,
         [4.1978e+01, 4.3122e-03, 6.1839e-01, 2.2204e-03],
         [4.6712e+01, 5.0896e-03, 5.6630e-01, 2.3843e-03],
         [1.6685e+


Train Diffusion:  11%|█         | 111/1001 [00:39<05:05,  2.91it/s][A
Train Diffusion:  11%|█         | 112/1001 [00:39<05:06,  2.90it/s][A
Train Diffusion:  11%|█▏        | 113/1001 [00:39<05:23,  2.75it/s][A
Train Diffusion:  11%|█▏        | 114/1001 [00:40<05:18,  2.79it/s][A
Train Diffusion:  11%|█▏        | 115/1001 [00:40<05:33,  2.65it/s][A
Train Diffusion:  12%|█▏        | 116/1001 [00:41<05:26,  2.71it/s][A
Train Diffusion:  12%|█▏        | 117/1001 [00:41<05:27,  2.70it/s][A
Train Diffusion:  12%|█▏        | 118/1001 [00:41<05:29,  2.68it/s][A
Train Diffusion:  12%|█▏        | 119/1001 [00:42<05:22,  2.73it/s][A
Train Diffusion:  12%|█▏        | 120/1001 [00:42<05:13,  2.81it/s][A
Train Diffusion:  12%|█▏        | 121/1001 [00:42<05:06,  2.87it/s]

Moving average norm loss at 120 iterations is: 9282.288623046876. Best norm loss value is: 8054.28955078125.

x with CO2 means across time = tensor([[3.6142e+01, 1.2058e-03, 3.7513e-01, 9.5319e-04],
        [3.6680e+01, 1.7710e-03, 3.6588e-01, 9.5776e-04]],
       grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [1.0749e+01, 2.6884e-01, 1.8184e-02, 1.1303e-03],
         [3.0700e+01, 9.9326e-08, 1.7593e-02, 5.8097e-04],
         ...,
         [3.0043e+01, 2.2984e-03, 1.5335e-01, 1.5054e-03],
         [3.3782e+01, 1.8021e-03, 1.2155e-01, 1.6349e-03],
         [1.7583e+01, 8.8424e-03, 1.5413e-02, 8.6841e-04]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.5933e+01, 4.9400e-01, 4.3131e-03, 2.1827e-03],
         [4.5940e+01, 3.3145e-07, 6.5814e-03, 8.6650e-04],
         ...,
         [4.3049e+01, 1.5833e-03, 6.5630e-01, 2.2638e-03],
         [4.5680e+01, 3.4291e-03, 5.8207e-01, 2.3289e-03],
         [3.0484e

[A
Train Diffusion:  12%|█▏        | 122/1001 [00:43<05:01,  2.92it/s][A
Train Diffusion:  12%|█▏        | 123/1001 [00:43<05:00,  2.92it/s][A
Train Diffusion:  12%|█▏        | 124/1001 [00:43<04:56,  2.96it/s][A
Train Diffusion:  12%|█▏        | 125/1001 [00:44<04:52,  2.99it/s][A
Train Diffusion:  13%|█▎        | 126/1001 [00:44<04:48,  3.03it/s][A
Train Diffusion:  13%|█▎        | 127/1001 [00:44<04:46,  3.05it/s][A
Train Diffusion:  13%|█▎        | 128/1001 [00:45<04:45,  3.06it/s][A
Train Diffusion:  13%|█▎        | 129/1001 [00:45<04:45,  3.05it/s][A
Train Diffusion:  13%|█▎        | 130/1001 [00:45<04:45,  3.05it/s][A
Train Diffusion:  13%|█▎        | 131/1001 [00:46<04:45,  3.05it/s][A

Moving average norm loss at 130 iterations is: 6496.8041015625. Best norm loss value is: 5204.03515625.

x with CO2 means across time = tensor([[3.9962e+01, 3.1133e-03, 3.4202e-01, 1.0462e-03],
        [3.9798e+01, 7.0965e-04, 3.5454e-01, 1.0291e-03]],
       grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.4856e+01, 1.0416e+00, 6.8120e-03, 4.0719e-03],
         [4.5663e+01, 7.6607e-07, 4.4982e-03, 8.6099e-04],
         ...,
         [4.5091e+01, 9.3666e-05, 2.9211e-01, 2.2534e-03],
         [4.0158e+01, 2.1828e-04, 4.2224e-01, 2.0048e-03],
         [3.0966e+01, 5.3857e-03, 6.2047e-02, 1.4717e-03]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [1.3894e+01, 1.1154e-01, 6.4157e-03, 6.3641e-04],
         [3.4947e+01, 1.4778e-08, 1.0858e-02, 6.6000e-04],
         ...,
         [3.6734e+01, 2.3627e-03, 6.1497e-01, 1.9535e-03],
         [4.6384e+01, 5.0733e-03, 4.2151e-01, 2.3301e-03],
         [2.1760e+01, 


Train Diffusion:  13%|█▎        | 132/1001 [00:46<04:43,  3.06it/s][A
Train Diffusion:  13%|█▎        | 133/1001 [00:46<04:48,  3.01it/s][A
Train Diffusion:  13%|█▎        | 134/1001 [00:47<04:47,  3.02it/s][A
Train Diffusion:  13%|█▎        | 135/1001 [00:47<04:44,  3.05it/s][A
Train Diffusion:  14%|█▎        | 136/1001 [00:47<04:53,  2.95it/s][A
Train Diffusion:  14%|█▎        | 137/1001 [00:48<05:10,  2.78it/s][A
Train Diffusion:  14%|█▍        | 138/1001 [00:48<05:16,  2.72it/s][A
Train Diffusion:  14%|█▍        | 139/1001 [00:48<05:06,  2.81it/s][A
Train Diffusion:  14%|█▍        | 140/1001 [00:49<05:00,  2.86it/s][A

Moving average norm loss at 140 iterations is: 3567.799072265625. Best norm loss value is: 2235.867919921875.

x with CO2 means across time = tensor([[4.3564e+01, 4.3364e-03, 1.5194e-01, 1.1036e-03],
        [4.3703e+01, 2.5412e-03, 1.7341e-01, 1.1073e-03]],
       grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [1.7483e+01, 1.5680e+00, 1.6555e-04, 5.7742e-03],
         [4.0317e+01, 3.3436e-05, 2.0983e-04, 7.5976e-04],
         ...,
         [4.6051e+01, 1.0710e-05, 1.3072e-01, 2.2554e-03],
         [4.5810e+01, 2.8545e-05, 1.2375e-01, 2.1900e-03],
         [2.9021e+01, 2.9672e-03, 2.9899e-02, 1.3583e-03]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.4128e+01, 8.3981e-01, 3.8985e-03, 3.3555e-03],
         [4.5902e+01, 3.6246e-08, 5.6170e-03, 8.6565e-04],
         ...,
         [4.5937e+01, 5.8411e-04, 6.5114e-01, 2.3949e-03],
         [4.5443e+01, 3.1877e-03, 3.6009e-01, 2.2568e-03],
         [2.4616


Train Diffusion:  14%|█▍        | 141/1001 [00:49<04:57,  2.89it/s][A
Train Diffusion:  14%|█▍        | 142/1001 [00:49<05:00,  2.86it/s][A
Train Diffusion:  14%|█▍        | 143/1001 [00:50<05:00,  2.85it/s][A
Train Diffusion:  14%|█▍        | 144/1001 [00:50<04:53,  2.92it/s][A
Train Diffusion:  14%|█▍        | 145/1001 [00:50<04:53,  2.92it/s][A
Train Diffusion:  15%|█▍        | 146/1001 [00:51<04:54,  2.90it/s][A
Train Diffusion:  15%|█▍        | 147/1001 [00:51<04:55,  2.89it/s][A
Train Diffusion:  15%|█▍        | 148/1001 [00:51<04:59,  2.85it/s][A
Train Diffusion:  15%|█▍        | 149/1001 [00:52<05:01,  2.82it/s][A
Train Diffusion:  15%|█▍        | 150/1001 [00:52<05:00,  2.83it/s][A

Moving average norm loss at 150 iterations is: 1054.940283203125. Best norm loss value is: 604.62646484375.

x with CO2 means across time = tensor([[4.5745e+01, 6.8726e-03, 8.7626e-01, 1.2953e-03],
        [4.5752e+01, 8.4091e-03, 8.6253e-01, 1.3033e-03]],
       grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [1.9357e+01, 2.2336e-08, 4.9652e-01, 4.1314e-04],
         [4.3948e+01, 1.0000e-08, 4.3587e-01, 8.9119e-04],
         ...,
         [4.6179e+01, 3.2696e-02, 1.2320e+00, 2.7816e-03],
         [4.4442e+01, 8.6012e-03, 1.0209e+00, 2.4222e-03],
         [2.3192e+01, 3.2543e-01, 2.9181e-01, 3.2738e-03]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.2372e+01, 1.0346e-05, 2.5164e-02, 4.0106e-04],
         [4.2990e+01, 1.1161e-08, 6.1748e-02, 8.1891e-04],
         ...,
         [4.5877e+01, 1.4390e-02, 1.3045e+00, 2.6627e-03],
         [4.4664e+01, 3.3989e-01, 1.2136e+00, 4.6945e-03],
         [2.2131e+


Train Diffusion:  15%|█▌        | 151/1001 [00:53<04:57,  2.85it/s][A
Train Diffusion:  15%|█▌        | 152/1001 [00:53<04:57,  2.85it/s][A
Train Diffusion:  15%|█▌        | 153/1001 [00:53<05:03,  2.79it/s][A
Train Diffusion:  15%|█▌        | 154/1001 [00:54<04:59,  2.83it/s][A
Train Diffusion:  15%|█▌        | 155/1001 [00:54<04:55,  2.86it/s][A
Train Diffusion:  16%|█▌        | 156/1001 [00:54<04:55,  2.86it/s][A
Train Diffusion:  16%|█▌        | 157/1001 [00:55<04:53,  2.88it/s][A
Train Diffusion:  16%|█▌        | 158/1001 [00:55<04:51,  2.89it/s][A
Train Diffusion:  16%|█▌        | 159/1001 [00:55<04:51,  2.88it/s][A
Train Diffusion:  16%|█▌        | 160/1001 [00:56<04:50,  2.90it/s][A

Moving average norm loss at 160 iterations is: 719.2888916015625. Best norm loss value is: 570.9617919921875.

x with CO2 means across time = tensor([[4.5712e+01, 2.8970e-02, 9.1163e-01, 1.4102e-03],
        [4.5682e+01, 2.5388e-02, 9.1159e-01, 1.3925e-03]],
       grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.2664e+01, 3.6098e-07, 4.1789e-01, 4.6095e-04],
         [4.9199e+01, 1.9448e-08, 7.2973e-01, 1.0327e-03],
         ...,
         [4.6708e+01, 2.1358e-01, 1.0048e+00, 3.9716e-03],
         [4.6309e+01, 2.4280e-01, 8.0484e-01, 4.0150e-03],
         [2.6330e+01, 2.7668e-01, 2.0537e-01, 3.0754e-03]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.5609e+01, 3.0561e-04, 2.7539e-01, 4.9447e-04],
         [4.9921e+01, 1.0000e-08, 3.7590e-01, 9.9503e-04],
         ...,
         [4.6412e+01, 3.0105e-01, 9.7004e-01, 4.5408e-03],
         [4.5800e+01, 5.5919e-01, 8.1143e-01, 6.1038e-03],
         [2.4109


Train Diffusion:  16%|█▌        | 161/1001 [00:56<04:48,  2.91it/s][A
Train Diffusion:  16%|█▌        | 162/1001 [00:56<04:52,  2.87it/s][A
Train Diffusion:  16%|█▋        | 163/1001 [00:57<04:53,  2.86it/s][A
Train Diffusion:  16%|█▋        | 164/1001 [00:57<04:49,  2.89it/s][A
Train Diffusion:  16%|█▋        | 165/1001 [00:57<04:53,  2.84it/s][A
Train Diffusion:  17%|█▋        | 166/1001 [00:58<04:48,  2.89it/s][A
Train Diffusion:  17%|█▋        | 167/1001 [00:58<04:45,  2.92it/s][A
Train Diffusion:  17%|█▋        | 168/1001 [00:58<04:45,  2.92it/s][A
Train Diffusion:  17%|█▋        | 169/1001 [00:59<04:43,  2.93it/s][A
Train Diffusion:  17%|█▋        | 170/1001 [00:59<04:44,  2.92it/s][A

Moving average norm loss at 170 iterations is: 636.4568725585938. Best norm loss value is: 473.1927490234375.

x with CO2 means across time = tensor([[4.5418e+01, 2.1648e-02, 7.5576e-01, 1.3405e-03],
        [4.5422e+01, 2.1777e-02, 7.5139e-01, 1.3427e-03]],
       grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.3128e+01, 2.9183e-01, 3.5709e-01, 1.4776e-03],
         [4.4243e+01, 1.0000e-08, 2.5134e-01, 8.6999e-04],
         ...,
         [4.6229e+01, 5.0434e-02, 7.8113e-01, 2.7820e-03],
         [4.6334e+01, 8.5372e-02, 5.8189e-01, 2.9063e-03],
         [3.0127e+01, 2.7514e-01, 1.6708e-01, 3.2295e-03]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.0071e+01, 1.3184e-02, 1.5890e-01, 4.2471e-04],
         [4.6043e+01, 2.4765e-07, 3.2712e-01, 9.1489e-04],
         ...,
         [4.5987e+01, 3.2796e-01, 6.6103e-01, 4.6189e-03],
         [4.6378e+01, 4.7303e-01, 6.0183e-01, 5.5002e-03],
         [3.1456


Train Diffusion:  17%|█▋        | 171/1001 [00:59<04:49,  2.87it/s][A
Train Diffusion:  17%|█▋        | 172/1001 [01:00<04:55,  2.80it/s][A
Train Diffusion:  17%|█▋        | 173/1001 [01:00<05:04,  2.72it/s][A
Train Diffusion:  17%|█▋        | 174/1001 [01:01<05:04,  2.71it/s][A
Train Diffusion:  17%|█▋        | 175/1001 [01:01<04:57,  2.78it/s][A
Train Diffusion:  18%|█▊        | 176/1001 [01:01<04:50,  2.84it/s][A
Train Diffusion:  18%|█▊        | 177/1001 [01:02<04:49,  2.85it/s][A
Train Diffusion:  18%|█▊        | 178/1001 [01:02<04:50,  2.83it/s][A
Train Diffusion:  18%|█▊        | 179/1001 [01:02<04:46,  2.87it/s][A
Train Diffusion:  18%|█▊        | 180/1001 [01:03<04:43,  2.89it/s][A

Moving average norm loss at 180 iterations is: 465.1380676269531. Best norm loss value is: 345.5809020996094.

x with CO2 means across time = tensor([[4.5666e+01, 1.5725e-02, 7.0040e-01, 1.3054e-03],
        [4.5685e+01, 1.4972e-02, 6.9395e-01, 1.3028e-03]],
       grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.1946e+01, 1.7784e-01, 7.4638e-02, 1.0200e-03],
         [4.4101e+01, 3.3222e-07, 3.3960e-01, 8.8011e-04],
         ...,
         [4.7010e+01, 3.4997e-02, 8.3482e-01, 2.7296e-03],
         [4.7000e+01, 7.3384e-02, 6.8430e-01, 2.8850e-03],
         [3.2839e+01, 2.8640e-01, 8.4473e-02, 3.4060e-03]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [1.9274e+01, 1.0555e+00, 2.4926e-01, 4.0550e-03],
         [4.3338e+01, 1.0000e-08, 7.2253e-02, 8.2699e-04],
         ...,
         [4.7061e+01, 1.3079e-01, 7.4276e-01, 3.3564e-03],
         [4.6968e+01, 2.2956e-01, 5.6237e-01, 3.8930e-03],
         [3.1924


Train Diffusion:  18%|█▊        | 181/1001 [01:03<04:40,  2.92it/s][A
Train Diffusion:  18%|█▊        | 182/1001 [01:03<04:46,  2.86it/s][A
Train Diffusion:  18%|█▊        | 183/1001 [01:04<04:47,  2.84it/s][A
Train Diffusion:  18%|█▊        | 184/1001 [01:04<04:51,  2.81it/s][A
Train Diffusion:  18%|█▊        | 185/1001 [01:04<04:49,  2.82it/s][A
Train Diffusion:  19%|█▊        | 186/1001 [01:05<04:45,  2.85it/s][A
Train Diffusion:  19%|█▊        | 187/1001 [01:05<04:45,  2.85it/s][A
Train Diffusion:  19%|█▉        | 188/1001 [01:06<04:44,  2.86it/s][A
Train Diffusion:  19%|█▉        | 189/1001 [01:06<04:39,  2.91it/s][A
Train Diffusion:  19%|█▉        | 190/1001 [01:06<04:35,  2.95it/s][A

Moving average norm loss at 190 iterations is: 434.5672882080078. Best norm loss value is: 317.07867431640625.

x with CO2 means across time = tensor([[4.5244e+01, 2.5484e-02, 8.0980e-01, 1.3655e-03],
        [4.5240e+01, 2.4389e-02, 8.0203e-01, 1.3691e-03]],
       grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.1248e+01, 9.7706e-03, 4.2134e-01, 4.7030e-04],
         [4.6463e+01, 1.0000e-08, 6.5549e-01, 9.7041e-04],
         ...,
         [4.5949e+01, 2.5325e-01, 8.8556e-01, 4.1715e-03],
         [4.5748e+01, 3.2626e-01, 7.4535e-01, 4.5296e-03],
         [3.1036e+01, 4.3649e-01, 2.2091e-01, 4.3431e-03]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.3997e+01, 2.1121e-01, 2.9845e-01, 1.2039e-03],
         [4.5800e+01, 5.6368e-08, 1.4468e-01, 8.8388e-04],
         ...,
         [4.5827e+01, 1.3945e-01, 7.5318e-01, 3.3585e-03],
         [4.5755e+01, 2.8139e-01, 6.0235e-01, 4.1923e-03],
         [3.033


Train Diffusion:  19%|█▉        | 191/1001 [01:07<04:44,  2.85it/s][A
Train Diffusion:  19%|█▉        | 192/1001 [01:07<04:51,  2.77it/s][A
Train Diffusion:  19%|█▉        | 193/1001 [01:07<05:21,  2.51it/s][A
Train Diffusion:  19%|█▉        | 194/1001 [01:08<05:08,  2.61it/s][A
Train Diffusion:  19%|█▉        | 195/1001 [01:08<04:59,  2.69it/s][A
Train Diffusion:  20%|█▉        | 196/1001 [01:09<05:16,  2.54it/s][A
Train Diffusion:  20%|█▉        | 197/1001 [01:09<05:07,  2.61it/s][A
Train Diffusion:  20%|█▉        | 198/1001 [01:09<04:56,  2.71it/s][A
Train Diffusion:  20%|█▉        | 199/1001 [01:10<04:53,  2.73it/s][A
Train Diffusion:  20%|█▉        | 200/1001 [01:10<04:46,  2.80it/s][A

Moving average norm loss at 200 iterations is: 420.596418762207. Best norm loss value is: 249.00965881347656.

x with CO2 means across time = tensor([[4.5391e+01, 2.4880e-02, 7.3695e-01, 1.3615e-03],
        [4.5361e+01, 2.2022e-02, 7.4160e-01, 1.3461e-03]],
       grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.1265e+01, 6.7753e-01, 2.3444e-01, 2.7713e-03],
         [4.6535e+01, 1.0715e-07, 4.6034e-01, 9.4347e-04],
         ...,
         [4.5983e+01, 3.1333e-01, 7.5607e-01, 4.5453e-03],
         [4.5423e+01, 2.0583e-01, 6.0220e-01, 3.6725e-03],
         [3.0892e+01, 1.8351e-01, 2.4216e-01, 2.6834e-03]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.4435e+01, 3.9863e-02, 4.6851e-01, 6.3835e-04],
         [4.5839e+01, 1.0000e-08, 3.5037e-01, 9.1442e-04],
         ...,
         [4.5366e+01, 1.0569e-01, 7.3435e-01, 3.1023e-03],
         [4.5687e+01, 3.8616e-01, 6.2066e-01, 4.8931e-03],
         [3.1743


Train Diffusion:  20%|██        | 201/1001 [01:10<04:45,  2.80it/s][A
Train Diffusion:  20%|██        | 202/1001 [01:11<04:50,  2.75it/s][A
Train Diffusion:  20%|██        | 203/1001 [01:11<04:52,  2.73it/s][A
Train Diffusion:  20%|██        | 204/1001 [01:11<04:43,  2.81it/s][A
Train Diffusion:  20%|██        | 205/1001 [01:12<04:38,  2.86it/s][A
Train Diffusion:  21%|██        | 206/1001 [01:12<04:32,  2.92it/s][A
Train Diffusion:  21%|██        | 207/1001 [01:12<04:29,  2.95it/s][A
Train Diffusion:  21%|██        | 208/1001 [01:13<05:08,  2.57it/s][A
Train Diffusion:  21%|██        | 209/1001 [01:13<05:16,  2.50it/s][A
Train Diffusion:  21%|██        | 210/1001 [01:14<05:12,  2.53it/s][A

Moving average norm loss at 210 iterations is: 389.5729736328125. Best norm loss value is: 249.00965881347656.

x with CO2 means across time = tensor([[4.5419e+01, 2.0768e-02, 7.7983e-01, 1.3480e-03],
        [4.5417e+01, 1.8977e-02, 7.7992e-01, 1.3310e-03]],
       grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.1314e+01, 3.5754e-01, 1.7895e-01, 1.6494e-03],
         [4.5975e+01, 1.0000e-08, 4.4289e-01, 9.3040e-04],
         ...,
         [4.5885e+01, 1.6672e-01, 7.9788e-01, 3.5582e-03],
         [4.5807e+01, 3.5053e-01, 6.4980e-01, 4.6688e-03],
         [3.2515e+01, 9.0458e-02, 2.1470e-01, 2.1406e-03]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.4487e+01, 1.6450e+00, 4.1741e-01, 6.2251e-03],
         [4.6289e+01, 3.5458e-07, 2.7958e-01, 9.1264e-04],
         ...,
         [4.5709e+01, 1.4581e-01, 7.9710e-01, 3.4077e-03],
         [4.5707e+01, 1.5083e-01, 6.6627e-01, 3.3361e-03],
         [3.222


Train Diffusion:  21%|██        | 211/1001 [01:14<05:01,  2.62it/s][A
Train Diffusion:  21%|██        | 212/1001 [01:14<05:02,  2.61it/s][A
Train Diffusion:  21%|██▏       | 213/1001 [01:15<05:05,  2.58it/s][A
Train Diffusion:  21%|██▏       | 214/1001 [01:15<04:59,  2.63it/s][A
Train Diffusion:  21%|██▏       | 215/1001 [01:16<04:54,  2.67it/s][A
Train Diffusion:  22%|██▏       | 216/1001 [01:16<04:52,  2.68it/s][A
Train Diffusion:  22%|██▏       | 217/1001 [01:16<04:47,  2.73it/s][A
Train Diffusion:  22%|██▏       | 218/1001 [01:17<04:42,  2.77it/s][A
Train Diffusion:  22%|██▏       | 219/1001 [01:17<04:35,  2.84it/s][A
Train Diffusion:  22%|██▏       | 220/1001 [01:17<04:32,  2.86it/s][A

Moving average norm loss at 220 iterations is: 345.3952667236328. Best norm loss value is: 225.5201416015625.

x with CO2 means across time = tensor([[4.5388e+01, 2.1770e-02, 7.6234e-01, 1.3533e-03],
        [4.5368e+01, 2.5791e-02, 7.6155e-01, 1.3683e-03]],
       grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.5392e+01, 1.0987e-01, 3.2639e-01, 8.7948e-04],
         [4.7143e+01, 1.9364e-07, 3.6535e-01, 9.4115e-04],
         ...,
         [4.5419e+01, 1.2373e-01, 7.4027e-01, 3.2287e-03],
         [4.5434e+01, 3.7912e-01, 5.8811e-01, 4.8255e-03],
         [3.2312e+01, 4.2112e-01, 1.0508e-01, 4.2705e-03]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.1999e+01, 1.1287e+00, 4.3909e-01, 4.3850e-03],
         [4.6805e+01, 1.0000e-08, 5.2127e-01, 9.5739e-04],
         ...,
         [4.5316e+01, 3.2413e-01, 7.4906e-01, 4.5844e-03],
         [4.5426e+01, 2.3964e-01, 6.3960e-01, 3.9083e-03],
         [3.1760


Train Diffusion:  22%|██▏       | 221/1001 [01:18<04:36,  2.82it/s][A
Train Diffusion:  22%|██▏       | 222/1001 [01:18<04:45,  2.73it/s][A
Train Diffusion:  22%|██▏       | 223/1001 [01:18<04:46,  2.72it/s][A
Train Diffusion:  22%|██▏       | 224/1001 [01:19<04:41,  2.76it/s][A
Train Diffusion:  22%|██▏       | 225/1001 [01:19<04:36,  2.81it/s][A
Train Diffusion:  23%|██▎       | 226/1001 [01:19<04:37,  2.80it/s][A
Train Diffusion:  23%|██▎       | 227/1001 [01:20<04:38,  2.78it/s][A
Train Diffusion:  23%|██▎       | 228/1001 [01:20<04:33,  2.83it/s][A
Train Diffusion:  23%|██▎       | 229/1001 [01:21<04:29,  2.87it/s][A
Train Diffusion:  23%|██▎       | 230/1001 [01:21<04:25,  2.90it/s][A

Moving average norm loss at 230 iterations is: 274.5606231689453. Best norm loss value is: 225.5201416015625.

x with CO2 means across time = tensor([[4.5669e+01, 2.4385e-02, 6.5020e-01, 1.3494e-03],
        [4.5684e+01, 1.4167e-02, 6.4994e-01, 1.2945e-03]],
       grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.1393e+01, 1.8176e+00, 3.7309e-01, 6.7653e-03],
         [4.5927e+01, 1.0000e-08, 2.8804e-01, 9.0706e-04],
         ...,
         [4.5953e+01, 1.4880e-01, 6.1219e-01, 3.3896e-03],
         [4.6131e+01, 3.3225e-01, 5.2839e-01, 4.5297e-03],
         [3.2963e+01, 3.2602e-01, 1.6140e-01, 3.6916e-03]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.4910e+01, 2.7731e-01, 2.1912e-01, 1.4394e-03],
         [4.6140e+01, 2.6575e-07, 4.5015e-01, 9.3455e-04],
         ...,
         [4.6173e+01, 1.6727e-01, 6.2551e-01, 3.5291e-03],
         [4.6105e+01, 1.6403e-01, 5.1534e-01, 3.4025e-03],
         [3.3158


Train Diffusion:  23%|██▎       | 231/1001 [01:21<04:26,  2.89it/s][A
Train Diffusion:  23%|██▎       | 232/1001 [01:22<04:30,  2.85it/s][A
Train Diffusion:  23%|██▎       | 233/1001 [01:22<04:40,  2.74it/s][A
Train Diffusion:  23%|██▎       | 234/1001 [01:22<04:36,  2.78it/s][A
Train Diffusion:  23%|██▎       | 235/1001 [01:23<04:36,  2.77it/s][A
Train Diffusion:  24%|██▎       | 236/1001 [01:23<04:33,  2.80it/s][A
Train Diffusion:  24%|██▎       | 237/1001 [01:23<04:32,  2.81it/s][A
Train Diffusion:  24%|██▍       | 238/1001 [01:24<04:33,  2.79it/s][A
Train Diffusion:  24%|██▍       | 239/1001 [01:24<04:34,  2.78it/s][A
Train Diffusion:  24%|██▍       | 240/1001 [01:24<04:31,  2.80it/s][A

Moving average norm loss at 240 iterations is: 251.76078338623046. Best norm loss value is: 216.3775177001953.

x with CO2 means across time = tensor([[4.5496e+01, 3.0944e-02, 7.2743e-01, 1.3950e-03],
        [4.5487e+01, 2.4205e-02, 7.2613e-01, 1.3641e-03]],
       grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.5035e+01, 1.2252e+00, 3.6688e-01, 4.7650e-03],
         [4.5819e+01, 1.0000e-08, 5.8558e-01, 9.4814e-04],
         ...,
         [4.5740e+01, 3.0659e-01, 7.1294e-01, 4.4761e-03],
         [4.6029e+01, 2.9075e-01, 5.7947e-01, 4.2616e-03],
         [3.3087e+01, 3.1387e-01, 1.8726e-01, 3.6244e-03]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.1444e+01, 1.0160e-01, 5.2697e-01, 8.0848e-04],
         [4.5542e+01, 8.6972e-08, 4.6600e-01, 9.2559e-04],
         ...,
         [4.5822e+01, 2.2578e-01, 7.1251e-01, 3.9323e-03],
         [4.6100e+01, 4.4577e-01, 5.8529e-01, 5.3008e-03],
         [3.336


Train Diffusion:  24%|██▍       | 241/1001 [01:25<04:31,  2.80it/s][A
Train Diffusion:  24%|██▍       | 242/1001 [01:25<04:34,  2.76it/s][A
Train Diffusion:  24%|██▍       | 243/1001 [01:26<04:35,  2.75it/s][A
Train Diffusion:  24%|██▍       | 244/1001 [01:26<04:28,  2.82it/s][A
Train Diffusion:  24%|██▍       | 245/1001 [01:26<04:26,  2.84it/s][A
Train Diffusion:  25%|██▍       | 246/1001 [01:27<04:24,  2.86it/s][A
Train Diffusion:  25%|██▍       | 247/1001 [01:27<04:22,  2.88it/s][A
Train Diffusion:  25%|██▍       | 248/1001 [01:27<04:20,  2.90it/s][A
Train Diffusion:  25%|██▍       | 249/1001 [01:28<04:18,  2.91it/s][A
Train Diffusion:  25%|██▍       | 250/1001 [01:28<04:28,  2.80it/s][A

Moving average norm loss at 250 iterations is: 231.78510589599608. Best norm loss value is: 203.56533813476562.

x with CO2 means across time = tensor([[4.5638e+01, 2.3624e-02, 6.8770e-01, 1.3463e-03],
        [4.5646e+01, 1.6097e-02, 6.8814e-01, 1.3116e-03]],
       grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.1524e+01, 2.2853e+00, 3.7264e-01, 8.3971e-03],
         [4.6171e+01, 2.4863e-07, 2.9899e-01, 9.1322e-04],
         ...,
         [4.5764e+01, 1.0603e-01, 6.8382e-01, 3.1101e-03],
         [4.6065e+01, 3.1572e-01, 5.7180e-01, 4.4278e-03],
         [3.3788e+01, 3.2442e-01, 9.6148e-02, 3.7018e-03]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.5114e+01, 4.2619e-01, 2.4196e-01, 1.9650e-03],
         [4.5993e+01, 1.0000e-08, 4.6500e-01, 9.3394e-04],
         ...,
         [4.5814e+01, 2.0004e-01, 6.6760e-01, 3.7453e-03],
         [4.6042e+01, 1.6501e-01, 5.3437e-01, 3.4112e-03],
         [3.38


Train Diffusion:  25%|██▌       | 251/1001 [01:28<04:26,  2.81it/s][A
Train Diffusion:  25%|██▌       | 252/1001 [01:29<04:34,  2.73it/s][A
Train Diffusion:  25%|██▌       | 253/1001 [01:29<04:38,  2.69it/s][A
Train Diffusion:  25%|██▌       | 254/1001 [01:29<04:36,  2.70it/s][A
Train Diffusion:  25%|██▌       | 255/1001 [01:30<04:34,  2.72it/s][A
Train Diffusion:  26%|██▌       | 256/1001 [01:30<04:24,  2.82it/s][A
Train Diffusion:  26%|██▌       | 257/1001 [01:31<04:23,  2.82it/s][A
Train Diffusion:  26%|██▌       | 258/1001 [01:31<04:23,  2.81it/s][A
Train Diffusion:  26%|██▌       | 259/1001 [01:31<04:27,  2.78it/s][A
Train Diffusion:  26%|██▌       | 260/1001 [01:32<04:32,  2.72it/s][A

Moving average norm loss at 260 iterations is: 247.2230972290039. Best norm loss value is: 203.56533813476562.

x with CO2 means across time = tensor([[4.5496e+01, 2.2227e-02, 7.1943e-01, 1.3518e-03],
        [4.5493e+01, 2.6350e-02, 7.2438e-01, 1.3662e-03]],
       grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.5321e+01, 1.2440e-01, 3.5570e-01, 9.3295e-04],
         [4.6059e+01, 1.0000e-08, 5.4664e-01, 9.4701e-04],
         ...,
         [4.5477e+01, 1.5375e-01, 7.2262e-01, 3.4302e-03],
         [4.5805e+01, 1.9485e-01, 5.8619e-01, 3.6130e-03],
         [3.3588e+01, 4.0732e-01, 1.7248e-01, 4.2563e-03]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.1832e+01, 1.6733e+00, 4.9154e-01, 6.2868e-03],
         [4.6157e+01, 9.6837e-08, 3.4804e-01, 9.2007e-04],
         ...,
         [4.5509e+01, 2.5818e-01, 7.0592e-01, 4.1350e-03],
         [4.5814e+01, 4.0434e-01, 6.0491e-01, 5.0161e-03],
         [3.368


Train Diffusion:  26%|██▌       | 261/1001 [01:32<04:32,  2.71it/s][A
Train Diffusion:  26%|██▌       | 262/1001 [01:32<04:38,  2.66it/s][A
Train Diffusion:  26%|██▋       | 263/1001 [01:33<04:32,  2.70it/s][A
Train Diffusion:  26%|██▋       | 264/1001 [01:33<04:29,  2.74it/s][A
Train Diffusion:  26%|██▋       | 265/1001 [01:33<04:23,  2.79it/s][A
Train Diffusion:  27%|██▋       | 266/1001 [01:34<04:25,  2.77it/s][A
Train Diffusion:  27%|██▋       | 267/1001 [01:34<04:46,  2.56it/s][A
Train Diffusion:  27%|██▋       | 268/1001 [01:35<04:37,  2.65it/s][A
Train Diffusion:  27%|██▋       | 269/1001 [01:35<04:28,  2.73it/s][A
Train Diffusion:  27%|██▋       | 270/1001 [01:35<04:34,  2.67it/s][A

Moving average norm loss at 270 iterations is: 270.18072814941405. Best norm loss value is: 203.56533813476562.

x with CO2 means across time = tensor([[4.5565e+01, 1.7381e-02, 7.9686e-01, 1.3381e-03],
        [4.5554e+01, 2.0382e-02, 8.0075e-01, 1.3424e-03]],
       grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.6096e+01, 4.0401e-01, 2.6264e-01, 1.9080e-03],
         [4.6972e+01, 1.0000e-08, 3.1377e-01, 9.3047e-04],
         ...,
         [4.5600e+01, 1.3042e-01, 8.0792e-01, 3.3011e-03],
         [4.5981e+01, 1.6250e-01, 7.3993e-01, 3.4465e-03],
         [3.4039e+01, 3.2645e-01, 1.7519e-01, 3.7474e-03]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.2534e+01, 2.1452e+00, 4.0038e-01, 7.9310e-03],
         [4.7013e+01, 1.7367e-07, 4.5583e-01, 9.5183e-04],
         ...,
         [4.5449e+01, 1.7407e-01, 8.1413e-01, 3.5914e-03],
         [4.5653e+01, 3.0436e-01, 6.4239e-01, 4.3515e-03],
         [3.38


Train Diffusion:  27%|██▋       | 271/1001 [01:36<04:46,  2.55it/s][A
Train Diffusion:  27%|██▋       | 272/1001 [01:36<04:44,  2.56it/s][A
Train Diffusion:  27%|██▋       | 273/1001 [01:37<04:43,  2.57it/s][A
Train Diffusion:  27%|██▋       | 274/1001 [01:37<04:52,  2.49it/s][A
Train Diffusion:  27%|██▋       | 275/1001 [01:37<04:39,  2.59it/s][A
Train Diffusion:  28%|██▊       | 276/1001 [01:38<04:36,  2.62it/s][A
Train Diffusion:  28%|██▊       | 277/1001 [01:38<04:40,  2.58it/s][A
Train Diffusion:  28%|██▊       | 278/1001 [01:39<04:40,  2.57it/s][A
Train Diffusion:  28%|██▊       | 279/1001 [01:39<04:34,  2.63it/s][A
Train Diffusion:  28%|██▊       | 280/1001 [01:39<04:25,  2.72it/s][A

Moving average norm loss at 280 iterations is: 298.6853622436523. Best norm loss value is: 203.56533813476562.

x with CO2 means across time = tensor([[4.5469e+01, 2.2523e-02, 7.5092e-01, 1.3482e-03],
        [4.5482e+01, 1.8469e-02, 7.5087e-01, 1.3321e-03]],
       grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.2293e+01, 2.0438e+00, 2.9758e-01, 7.5592e-03],
         [4.6174e+01, 1.0000e-08, 4.9495e-01, 9.4170e-04],
         ...,
         [4.5441e+01, 1.2978e-01, 7.5444e-01, 3.2746e-03],
         [4.5967e+01, 3.1865e-01, 6.4650e-01, 4.4627e-03],
         [3.4276e+01, 3.6381e-01, 1.8468e-01, 4.0057e-03]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.5773e+01, 4.1378e-01, 3.9874e-01, 1.9553e-03],
         [4.6097e+01, 1.3258e-07, 2.8658e-01, 9.1004e-04],
         ...,
         [4.5332e+01, 2.0529e-01, 7.7106e-01, 3.7857e-03],
         [4.5801e+01, 1.8325e-01, 6.4592e-01, 3.5514e-03],
         [3.433


Train Diffusion:  28%|██▊       | 281/1001 [01:40<04:36,  2.61it/s][A
Train Diffusion:  28%|██▊       | 282/1001 [01:40<04:28,  2.68it/s][A
Train Diffusion:  28%|██▊       | 283/1001 [01:40<04:25,  2.71it/s][A
Train Diffusion:  28%|██▊       | 284/1001 [01:41<04:16,  2.80it/s][A
Train Diffusion:  28%|██▊       | 285/1001 [01:41<04:08,  2.88it/s][A
Train Diffusion:  29%|██▊       | 286/1001 [01:41<04:03,  2.94it/s][A
Train Diffusion:  29%|██▊       | 287/1001 [01:42<04:00,  2.97it/s][A
Train Diffusion:  29%|██▉       | 288/1001 [01:42<03:57,  3.01it/s][A
Train Diffusion:  29%|██▉       | 289/1001 [01:42<03:54,  3.04it/s][A
Train Diffusion:  29%|██▉       | 290/1001 [01:43<03:53,  3.04it/s][A
Train Diffusion:  29%|██▉       | 291/1001 [01:43<03:53,  3.05it/s][A

Moving average norm loss at 290 iterations is: 287.2043655395508. Best norm loss value is: 203.56533813476562.

x with CO2 means across time = tensor([[4.5394e+01, 2.6704e-02, 7.3230e-01, 1.3672e-03],
        [4.5384e+01, 2.8997e-02, 7.2758e-01, 1.3903e-03]],
       grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.5981e+01, 1.2120e+00, 5.4649e-01, 4.7610e-03],
         [4.6138e+01, 3.9056e-08, 6.2612e-01, 9.6003e-04],
         ...,
         [4.5389e+01, 2.2518e-01, 6.3225e-01, 3.8856e-03],
         [4.6296e+01, 2.5606e-01, 5.8950e-01, 4.0454e-03],
         [3.4554e+01, 4.6090e-01, 1.8415e-01, 4.6549e-03]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.2504e+01, 1.1838e-01, 3.6769e-01, 8.6357e-04],
         [4.6358e+01, 1.0000e-08, 4.1617e-01, 9.3374e-04],
         ...,
         [4.5417e+01, 2.2145e-01, 7.6060e-01, 3.8965e-03],
         [4.5878e+01, 4.0515e-01, 5.9558e-01, 5.0221e-03],
         [3.430


Train Diffusion:  29%|██▉       | 292/1001 [01:43<03:51,  3.06it/s][A
Train Diffusion:  29%|██▉       | 293/1001 [01:44<03:55,  3.00it/s][A
Train Diffusion:  29%|██▉       | 294/1001 [01:44<03:53,  3.03it/s][A
Train Diffusion:  29%|██▉       | 295/1001 [01:44<03:51,  3.05it/s][A
Train Diffusion:  30%|██▉       | 296/1001 [01:45<03:50,  3.05it/s][A
Train Diffusion:  30%|██▉       | 297/1001 [01:45<03:49,  3.07it/s][A
Train Diffusion:  30%|██▉       | 298/1001 [01:45<03:48,  3.08it/s][A
Train Diffusion:  30%|██▉       | 299/1001 [01:46<03:48,  3.07it/s][A
Train Diffusion:  30%|██▉       | 300/1001 [01:46<03:47,  3.08it/s][A
Train Diffusion:  30%|███       | 301/1001 [01:46<03:47,  3.08it/s][A

Moving average norm loss at 300 iterations is: 272.4946044921875. Best norm loss value is: 203.56533813476562.

x with CO2 means across time = tensor([[4.5549e+01, 2.1952e-02, 7.6784e-01, 1.3466e-03],
        [4.5561e+01, 1.9022e-02, 7.6467e-01, 1.3446e-03]],
       grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.2086e+01, 1.9569e+00, 3.7893e-01, 7.2640e-03],
         [4.6343e+01, 1.0000e-08, 5.3954e-01, 9.5133e-04],
         ...,
         [4.5583e+01, 1.6986e-01, 7.8474e-01, 3.5614e-03],
         [4.6341e+01, 1.7674e-01, 6.5980e-01, 3.5370e-03],
         [3.4814e+01, 3.8301e-01, 1.7388e-01, 4.1534e-03]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.5911e+01, 1.8206e-01, 3.8455e-01, 1.1484e-03],
         [4.6442e+01, 4.3524e-08, 2.8531e-01, 9.1635e-04],
         ...,
         [4.5425e+01, 1.5158e-01, 7.7302e-01, 3.4267e-03],
         [4.6162e+01, 3.3481e-01, 6.6510e-01, 4.5847e-03],
         [3.510


Train Diffusion:  30%|███       | 302/1001 [01:47<03:59,  2.92it/s][A
Train Diffusion:  30%|███       | 303/1001 [01:47<04:11,  2.78it/s][A
Train Diffusion:  30%|███       | 304/1001 [01:47<04:12,  2.76it/s][A
Train Diffusion:  30%|███       | 305/1001 [01:48<04:16,  2.71it/s][A
Train Diffusion:  31%|███       | 306/1001 [01:48<04:13,  2.74it/s][A
Train Diffusion:  31%|███       | 307/1001 [01:48<04:10,  2.77it/s][A
Train Diffusion:  31%|███       | 308/1001 [01:49<04:05,  2.82it/s][A
Train Diffusion:  31%|███       | 309/1001 [01:49<04:03,  2.85it/s][A
Train Diffusion:  31%|███       | 310/1001 [01:49<03:57,  2.91it/s][A

Moving average norm loss at 310 iterations is: 236.4763931274414. Best norm loss value is: 203.56533813476562.

x with CO2 means across time = tensor([[4.5474e+01, 2.2779e-02, 7.6348e-01, 1.3525e-03],
        [4.5477e+01, 1.9708e-02, 7.5995e-01, 1.3455e-03]],
       grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.2350e+01, 1.5818e+00, 4.2221e-01, 5.9675e-03],
         [4.6113e+01, 1.0000e-08, 4.4137e-01, 9.3278e-04],
         ...,
         [4.5255e+01, 2.4104e-01, 7.8713e-01, 4.0286e-03],
         [4.6158e+01, 1.9234e-01, 6.9395e-01, 3.6416e-03],
         [3.4989e+01, 4.1295e-01, 1.7745e-01, 4.3587e-03]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.6048e+01, 1.6364e-01, 4.0296e-01, 1.0892e-03],
         [4.6101e+01, 3.5880e-08, 4.7141e-01, 9.3691e-04],
         ...,
         [4.5488e+01, 1.3451e-01, 7.5140e-01, 3.3081e-03],
         [4.6336e+01, 3.6234e-01, 6.2198e-01, 4.7651e-03],
         [3.525


Train Diffusion:  31%|███       | 311/1001 [01:50<04:05,  2.81it/s][A
Train Diffusion:  31%|███       | 312/1001 [01:50<04:22,  2.62it/s][A
Train Diffusion:  31%|███▏      | 313/1001 [01:51<04:41,  2.44it/s][A
Train Diffusion:  31%|███▏      | 314/1001 [01:51<04:35,  2.49it/s][A
Train Diffusion:  31%|███▏      | 315/1001 [01:51<04:22,  2.61it/s][A
Train Diffusion:  32%|███▏      | 316/1001 [01:52<04:13,  2.70it/s][A
Train Diffusion:  32%|███▏      | 317/1001 [01:52<04:04,  2.80it/s][A
Train Diffusion:  32%|███▏      | 318/1001 [01:52<03:57,  2.88it/s][A
Train Diffusion:  32%|███▏      | 319/1001 [01:53<03:58,  2.86it/s][A
Train Diffusion:  32%|███▏      | 320/1001 [01:53<03:55,  2.89it/s][A

Moving average norm loss at 320 iterations is: 275.11891021728513. Best norm loss value is: 203.56533813476562.

x with CO2 means across time = tensor([[4.5457e+01, 1.3651e-02, 6.8506e-01, 1.2957e-03],
        [4.5445e+01, 1.7815e-02, 6.8382e-01, 1.3044e-03]],
       grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.6672e+01, 4.2854e-01, 2.5752e-01, 2.0030e-03],
         [4.6833e+01, 1.0000e-08, 2.0891e-01, 9.1264e-04],
         ...,
         [4.5163e+01, 1.4421e-01, 6.7888e-01, 3.3385e-03],
         [4.6294e+01, 2.6174e-01, 6.0872e-01, 4.0883e-03],
         [3.5502e+01, 2.0660e-01, 1.8769e-01, 3.0320e-03]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.2864e+01, 2.5659e+00, 3.3512e-01, 9.3937e-03],
         [4.6836e+01, 7.4442e-08, 4.5174e-01, 9.4789e-04],
         ...,
         [4.5245e+01, 5.3889e-02, 6.6658e-01, 2.7270e-03],
         [4.6109e+01, 7.9351e-02, 5.5975e-01, 2.8496e-03],
         [3.55


Train Diffusion:  32%|███▏      | 321/1001 [01:54<03:53,  2.92it/s][A
Train Diffusion:  32%|███▏      | 322/1001 [01:54<03:55,  2.88it/s][A
Train Diffusion:  32%|███▏      | 323/1001 [01:54<03:57,  2.85it/s][A
Train Diffusion:  32%|███▏      | 324/1001 [01:55<03:53,  2.90it/s][A
Train Diffusion:  32%|███▏      | 325/1001 [01:55<03:53,  2.89it/s][A
Train Diffusion:  33%|███▎      | 326/1001 [01:55<03:49,  2.94it/s][A
Train Diffusion:  33%|███▎      | 327/1001 [01:56<03:46,  2.98it/s][A
Train Diffusion:  33%|███▎      | 328/1001 [01:56<03:45,  2.99it/s][A
Train Diffusion:  33%|███▎      | 329/1001 [01:56<03:42,  3.02it/s][A
Train Diffusion:  33%|███▎      | 330/1001 [01:57<03:41,  3.02it/s][A


Moving average norm loss at 330 iterations is: 269.81952514648435. Best norm loss value is: 203.56533813476562.

x with CO2 means across time = tensor([[4.5648e+01, 2.2678e-02, 6.8861e-01, 1.3551e-03],
        [4.5645e+01, 1.7628e-02, 6.8466e-01, 1.3204e-03]],
       grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.6394e+01, 8.6206e-01, 4.3836e-01, 3.5338e-03],
         [4.5503e+01, 1.9516e-08, 5.6646e-01, 9.3942e-04],
         ...,
         [4.5701e+01, 2.5206e-01, 6.9145e-01, 4.0989e-03],
         [4.6512e+01, 2.6123e-01, 5.7696e-01, 4.0867e-03],
         [3.5061e+01, 4.3972e-01, 1.6451e-01, 4.5341e-03]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.2354e+01, 6.6635e-02, 4.0991e-01, 6.8650e-04],
         [4.5568e+01, 1.0000e-08, 3.3565e-01, 9.0719e-04],
         ...,
         [4.5707e+01, 1.8288e-01, 6.8679e-01, 3.6290e-03],
         [4.6632e+01, 3.4830e-01, 5.9474e-01, 4.6781e-03],
         [3.57

Train Diffusion:  33%|███▎      | 331/1001 [01:57<03:41,  3.03it/s][A
Train Diffusion:  33%|███▎      | 332/1001 [01:57<03:43,  2.99it/s][A
Train Diffusion:  33%|███▎      | 333/1001 [01:58<03:45,  2.96it/s][A
Train Diffusion:  33%|███▎      | 334/1001 [01:58<03:43,  2.98it/s][A
Train Diffusion:  33%|███▎      | 335/1001 [01:58<03:40,  3.02it/s][A
Train Diffusion:  34%|███▎      | 336/1001 [01:59<03:39,  3.03it/s][A
Train Diffusion:  34%|███▎      | 337/1001 [01:59<03:38,  3.04it/s][A
Train Diffusion:  34%|███▍      | 338/1001 [01:59<03:36,  3.06it/s][A
Train Diffusion:  34%|███▍      | 339/1001 [02:00<03:36,  3.05it/s][A
Train Diffusion:  34%|███▍      | 340/1001 [02:00<03:36,  3.06it/s][A
Train Diffusion:  34%|███▍      | 341/1001 [02:00<03:35,  3.06it/s][A

Moving average norm loss at 340 iterations is: 260.5372543334961. Best norm loss value is: 203.56533813476562.

x with CO2 means across time = tensor([[4.5544e+01, 1.4105e-02, 7.6612e-01, 1.3131e-03],
        [4.5527e+01, 1.6738e-02, 7.6942e-01, 1.3176e-03]],
       grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.6749e+01, 4.9141e-01, 2.5342e-01, 2.2229e-03],
         [4.6509e+01, 2.7196e-08, 2.3159e-01, 9.0983e-04],
         ...,
         [4.5507e+01, 1.8293e-01, 7.8343e-01, 3.6460e-03],
         [4.6626e+01, 1.3957e-01, 6.7855e-01, 3.3074e-03],
         [3.6464e+01, 1.6907e-01, 1.9952e-01, 2.8331e-03]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.2889e+01, 1.8869e+00, 3.4092e-01, 7.0290e-03],
         [4.6501e+01, 1.0000e-08, 4.9180e-01, 9.4740e-04],
         ...,
         [4.5328e+01, 4.6429e-02, 7.8166e-01, 2.7116e-03],
         [4.6507e+01, 2.4009e-01, 6.8964e-01, 3.9755e-03],
         [3.614


Train Diffusion:  34%|███▍      | 342/1001 [02:00<03:35,  3.06it/s][A
Train Diffusion:  34%|███▍      | 343/1001 [02:01<03:45,  2.92it/s][A
Train Diffusion:  34%|███▍      | 344/1001 [02:01<03:47,  2.89it/s][A
Train Diffusion:  34%|███▍      | 345/1001 [02:02<03:48,  2.87it/s][A
Train Diffusion:  35%|███▍      | 346/1001 [02:02<03:50,  2.84it/s][A
Train Diffusion:  35%|███▍      | 347/1001 [02:02<03:47,  2.88it/s][A
Train Diffusion:  35%|███▍      | 348/1001 [02:03<03:45,  2.90it/s][A
Train Diffusion:  35%|███▍      | 349/1001 [02:03<04:08,  2.63it/s][A
Train Diffusion:  35%|███▍      | 350/1001 [02:04<04:23,  2.47it/s][A

Moving average norm loss at 350 iterations is: 279.3733963012695. Best norm loss value is: 203.56533813476562.

x with CO2 means across time = tensor([[4.5573e+01, 1.5636e-02, 7.4678e-01, 1.3229e-03],
        [4.5587e+01, 2.0376e-02, 7.5123e-01, 1.3379e-03]],
       grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.3348e+01, 1.2776e-01, 3.6933e-01, 9.1148e-04],
         [4.7067e+01, 1.0000e-08, 3.1050e-01, 9.3178e-04],
         ...,
         [4.5103e+01, 6.3762e-02, 7.3698e-01, 2.8062e-03],
         [4.6449e+01, 1.5008e-01, 6.5758e-01, 3.3637e-03],
         [3.6185e+01, 1.7657e-01, 2.0936e-01, 2.8721e-03]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.7394e+01, 1.3967e+00, 4.7288e-01, 5.4191e-03],
         [4.7100e+01, 1.6481e-08, 6.0062e-01, 9.7444e-04],
         ...,
         [4.4991e+01, 2.2980e-01, 7.4808e-01, 3.9291e-03],
         [4.6161e+01, 3.4719e-01, 6.5288e-01, 4.6640e-03],
         [3.624


Train Diffusion:  35%|███▌      | 351/1001 [02:04<04:15,  2.54it/s][A
Train Diffusion:  35%|███▌      | 352/1001 [02:04<04:17,  2.52it/s][A
Train Diffusion:  35%|███▌      | 353/1001 [02:05<04:13,  2.56it/s][A
Train Diffusion:  35%|███▌      | 354/1001 [02:05<04:03,  2.66it/s][A
Train Diffusion:  35%|███▌      | 355/1001 [02:05<04:03,  2.65it/s][A
Train Diffusion:  36%|███▌      | 356/1001 [02:06<04:02,  2.66it/s][A
Train Diffusion:  36%|███▌      | 357/1001 [02:06<03:58,  2.70it/s][A
Train Diffusion:  36%|███▌      | 358/1001 [02:06<03:52,  2.76it/s][A
Train Diffusion:  36%|███▌      | 359/1001 [02:07<03:49,  2.80it/s][A
Train Diffusion:  36%|███▌      | 360/1001 [02:07<03:45,  2.84it/s][A

Moving average norm loss at 360 iterations is: 240.1087448120117. Best norm loss value is: 200.25497436523438.

x with CO2 means across time = tensor([[4.5606e+01, 2.0461e-02, 6.6906e-01, 1.3252e-03],
        [4.5616e+01, 1.4486e-02, 6.6827e-01, 1.3024e-03]],
       grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.2952e+01, 1.9572e+00, 4.0620e-01, 7.2839e-03],
         [4.6013e+01, 1.5700e-08, 5.4807e-01, 9.4636e-04],
         ...,
         [4.5681e+01, 9.9016e-02, 6.7557e-01, 3.0563e-03],
         [4.7000e+01, 1.3984e-01, 5.7613e-01, 3.2996e-03],
         [3.7306e+01, 3.2062e-01, 9.4852e-02, 3.8379e-03]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.7358e+01, 1.0218e-01, 3.5626e-01, 8.9177e-04],
         [4.6065e+01, 1.0000e-08, 3.1778e-01, 9.1395e-04],
         ...,
         [4.5713e+01, 1.0697e-01, 6.6907e-01, 3.1099e-03],
         [4.6868e+01, 2.5337e-01, 5.8609e-01, 4.0534e-03],
         [3.678


Train Diffusion:  36%|███▌      | 361/1001 [02:08<03:45,  2.84it/s][A
Train Diffusion:  36%|███▌      | 362/1001 [02:08<03:56,  2.71it/s][A
Train Diffusion:  36%|███▋      | 363/1001 [02:08<04:07,  2.58it/s][A
Train Diffusion:  36%|███▋      | 364/1001 [02:09<04:19,  2.45it/s][A
Train Diffusion:  36%|███▋      | 365/1001 [02:09<04:26,  2.39it/s][A
Train Diffusion:  37%|███▋      | 366/1001 [02:10<04:13,  2.51it/s][A
Train Diffusion:  37%|███▋      | 367/1001 [02:10<04:00,  2.64it/s][A
Train Diffusion:  37%|███▋      | 368/1001 [02:10<04:15,  2.48it/s][A
Train Diffusion:  37%|███▋      | 369/1001 [02:11<04:04,  2.59it/s][A
Train Diffusion:  37%|███▋      | 370/1001 [02:11<03:52,  2.71it/s][A

Moving average norm loss at 370 iterations is: 255.68968353271484. Best norm loss value is: 200.25497436523438.

x with CO2 means across time = tensor([[4.5654e+01, 1.6917e-02, 6.6137e-01, 1.3141e-03],
        [4.5667e+01, 1.8734e-02, 6.6728e-01, 1.3202e-03]],
       grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.3049e+01, 1.5402e-01, 2.6837e-01, 9.8363e-04],
         [4.6020e+01, 1.0000e-08, 2.8573e-01, 9.0847e-04],
         ...,
         [4.5431e+01, 1.0680e-01, 6.7287e-01, 3.0963e-03],
         [4.6750e+01, 2.9454e-01, 5.6195e-01, 4.3162e-03],
         [3.7667e+01, 1.8927e-01, 2.0310e-01, 3.0217e-03]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.7677e+01, 1.4273e+00, 5.1711e-01, 5.5370e-03],
         [4.6027e+01, 1.2200e-08, 6.0782e-01, 9.5528e-04],
         ...,
         [4.5444e+01, 1.5619e-01, 6.3288e-01, 3.4208e-03],
         [4.6914e+01, 1.4534e-01, 5.9259e-01, 3.3366e-03],
         [3.71


Train Diffusion:  37%|███▋      | 371/1001 [02:12<04:02,  2.60it/s][A
Train Diffusion:  37%|███▋      | 372/1001 [02:12<03:57,  2.65it/s][A
Train Diffusion:  37%|███▋      | 373/1001 [02:12<03:52,  2.70it/s][A
Train Diffusion:  37%|███▋      | 374/1001 [02:13<03:48,  2.74it/s][A
Train Diffusion:  37%|███▋      | 375/1001 [02:13<03:56,  2.65it/s][A
Train Diffusion:  38%|███▊      | 376/1001 [02:13<03:47,  2.74it/s][A
Train Diffusion:  38%|███▊      | 377/1001 [02:14<03:56,  2.64it/s][A
Train Diffusion:  38%|███▊      | 378/1001 [02:14<03:47,  2.74it/s][A
Train Diffusion:  38%|███▊      | 379/1001 [02:14<03:46,  2.75it/s][A
Train Diffusion:  38%|███▊      | 380/1001 [02:15<03:55,  2.64it/s][A

Moving average norm loss at 380 iterations is: 253.9010498046875. Best norm loss value is: 169.5229949951172.

x with CO2 means across time = tensor([[4.5741e+01, 1.9617e-02, 6.1403e-01, 1.3162e-03],
        [4.5731e+01, 1.3569e-02, 6.1383e-01, 1.2878e-03]],
       grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.8354e+01, 1.8735e+00, 3.5190e-01, 7.0808e-03],
         [4.6610e+01, 1.0000e-08, 3.8113e-01, 9.3340e-04],
         ...,
         [4.5506e+01, 1.5064e-01, 5.9282e-01, 3.3753e-03],
         [4.6971e+01, 1.8171e-01, 4.6656e-01, 3.5483e-03],
         [3.7918e+01, 2.3767e-01, 9.8668e-02, 3.3231e-03]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.3471e+01, 2.8792e-01, 3.6410e-01, 1.4710e-03],
         [4.6633e+01, 1.2068e-08, 4.8174e-01, 9.4843e-04],
         ...,
         [4.5479e+01, 9.6697e-02, 5.7025e-01, 3.0023e-03],
         [4.7025e+01, 2.3865e-01, 5.6403e-01, 3.9568e-03],
         [3.8199


Train Diffusion:  38%|███▊      | 381/1001 [02:15<03:47,  2.72it/s][A
Train Diffusion:  38%|███▊      | 382/1001 [02:16<03:46,  2.74it/s][A
Train Diffusion:  38%|███▊      | 383/1001 [02:16<03:50,  2.68it/s][A
Train Diffusion:  38%|███▊      | 384/1001 [02:16<03:47,  2.71it/s][A
Train Diffusion:  38%|███▊      | 385/1001 [02:17<03:42,  2.76it/s][A
Train Diffusion:  39%|███▊      | 386/1001 [02:17<03:39,  2.80it/s][A
Train Diffusion:  39%|███▊      | 387/1001 [02:17<03:39,  2.80it/s][A
Train Diffusion:  39%|███▉      | 388/1001 [02:18<03:35,  2.85it/s][A
Train Diffusion:  39%|███▉      | 389/1001 [02:18<03:34,  2.86it/s][A
Train Diffusion:  39%|███▉      | 390/1001 [02:18<03:44,  2.72it/s][A

Moving average norm loss at 390 iterations is: 249.30429382324218. Best norm loss value is: 169.5229949951172.

x with CO2 means across time = tensor([[4.5606e+01, 1.8180e-02, 6.8309e-01, 1.3254e-03],
        [4.5621e+01, 2.1247e-02, 6.8189e-01, 1.3333e-03]],
       grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.3796e+01, 1.1046e-01, 4.4913e-01, 8.7029e-04],
         [4.7016e+01, 1.0664e-08, 5.8478e-01, 9.7056e-04],
         ...,
         [4.5320e+01, 9.7021e-02, 6.8345e-01, 3.0275e-03],
         [4.7392e+01, 2.5802e-01, 6.0552e-01, 4.1144e-03],
         [3.8721e+01, 3.2481e-01, 1.2940e-01, 3.9393e-03]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.9051e+01, 1.6989e+00, 5.0296e-01, 6.5058e-03],
         [4.6831e+01, 1.0000e-08, 4.9399e-01, 9.5393e-04],
         ...,
         [4.5458e+01, 1.8661e-01, 6.7671e-01, 3.6396e-03],
         [4.7023e+01, 2.3145e-01, 5.9615e-01, 3.9172e-03],
         [3.819


Train Diffusion:  39%|███▉      | 391/1001 [02:19<03:41,  2.76it/s][A
Train Diffusion:  39%|███▉      | 392/1001 [02:19<03:50,  2.64it/s][A
Train Diffusion:  39%|███▉      | 393/1001 [02:20<03:48,  2.66it/s][A
Train Diffusion:  39%|███▉      | 394/1001 [02:20<03:52,  2.61it/s][A
Train Diffusion:  39%|███▉      | 395/1001 [02:20<03:43,  2.71it/s][A
Train Diffusion:  40%|███▉      | 396/1001 [02:21<03:38,  2.77it/s][A
Train Diffusion:  40%|███▉      | 397/1001 [02:21<03:32,  2.84it/s][A
Train Diffusion:  40%|███▉      | 398/1001 [02:21<03:33,  2.82it/s][A
Train Diffusion:  40%|███▉      | 399/1001 [02:22<03:28,  2.89it/s][A
Train Diffusion:  40%|███▉      | 400/1001 [02:22<03:24,  2.93it/s][A

Moving average norm loss at 400 iterations is: 247.4158508300781. Best norm loss value is: 169.5229949951172.

x with CO2 means across time = tensor([[4.5432e+01, 1.9780e-02, 7.5029e-01, 1.3401e-03],
        [4.5442e+01, 3.0147e-02, 7.4954e-01, 1.3940e-03]],
       grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.3955e+01, 5.3918e-02, 5.0937e-01, 6.8449e-04],
         [4.6571e+01, 1.0205e-08, 7.0448e-01, 9.7953e-04],
         ...,
         [4.5246e+01, 1.6298e-01, 7.4337e-01, 3.4873e-03],
         [4.7462e+01, 2.2340e-01, 6.5846e-01, 3.9008e-03],
         [3.9113e+01, 4.3614e-01, 1.4821e-01, 4.6923e-03]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.9225e+01, 1.1528e+00, 6.8398e-01, 4.6316e-03],
         [4.6480e+01, 1.0000e-08, 5.8923e-01, 9.6111e-04],
         ...,
         [4.5363e+01, 2.3193e-01, 7.4721e-01, 3.9613e-03],
         [4.7335e+01, 4.4763e-01, 6.5651e-01, 5.3903e-03],
         [3.8719


Train Diffusion:  40%|████      | 401/1001 [02:22<03:30,  2.86it/s][A
Train Diffusion:  40%|████      | 402/1001 [02:23<03:35,  2.77it/s][A
Train Diffusion:  40%|████      | 403/1001 [02:23<03:43,  2.67it/s][A
Train Diffusion:  40%|████      | 404/1001 [02:24<03:41,  2.70it/s][A
Train Diffusion:  40%|████      | 405/1001 [02:24<03:36,  2.76it/s][A
Train Diffusion:  41%|████      | 406/1001 [02:24<03:34,  2.77it/s][A
Train Diffusion:  41%|████      | 407/1001 [02:25<03:35,  2.76it/s][A
Train Diffusion:  41%|████      | 408/1001 [02:25<03:32,  2.80it/s][A
Train Diffusion:  41%|████      | 409/1001 [02:25<03:35,  2.75it/s][A
Train Diffusion:  41%|████      | 410/1001 [02:26<03:34,  2.75it/s][A

Moving average norm loss at 410 iterations is: 203.09649200439452. Best norm loss value is: 148.22096252441406.

x with CO2 means across time = tensor([[4.5493e+01, 2.5381e-02, 7.3553e-01, 1.3656e-03],
        [4.5504e+01, 1.9689e-02, 7.3368e-01, 1.3341e-03]],
       grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.3724e+01, 1.9944e+00, 5.0782e-01, 7.4416e-03],
         [4.6200e+01, 1.0000e-08, 5.6350e-01, 9.5211e-04],
         ...,
         [4.5500e+01, 1.3661e-01, 7.4187e-01, 3.3203e-03],
         [4.7584e+01, 3.1166e-01, 6.5737e-01, 4.4951e-03],
         [3.9839e+01, 4.0121e-01, 2.4237e-01, 4.5212e-03]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.8893e+01, 1.0082e-01, 5.7274e-01, 9.4449e-04],
         [4.6102e+01, 1.0236e-08, 6.6031e-01, 9.6429e-04],
         ...,
         [4.5510e+01, 2.1476e-01, 7.2822e-01, 3.8468e-03],
         [4.7765e+01, 2.8222e-01, 6.4571e-01, 4.3041e-03],
         [3.98


Train Diffusion:  41%|████      | 411/1001 [02:26<03:51,  2.55it/s][A
Train Diffusion:  41%|████      | 412/1001 [02:27<03:51,  2.54it/s][A
Train Diffusion:  41%|████▏     | 413/1001 [02:27<03:47,  2.58it/s][A
Train Diffusion:  41%|████▏     | 414/1001 [02:27<03:41,  2.65it/s][A
Train Diffusion:  41%|████▏     | 415/1001 [02:28<03:33,  2.74it/s][A
Train Diffusion:  42%|████▏     | 416/1001 [02:28<03:31,  2.76it/s][A
Train Diffusion:  42%|████▏     | 417/1001 [02:28<03:27,  2.82it/s][A
Train Diffusion:  42%|████▏     | 418/1001 [02:29<03:26,  2.82it/s][A
Train Diffusion:  42%|████▏     | 419/1001 [02:29<03:27,  2.80it/s][A
Train Diffusion:  42%|████▏     | 420/1001 [02:29<03:24,  2.84it/s][A

Moving average norm loss at 420 iterations is: 212.59159393310546. Best norm loss value is: 148.22096252441406.

x with CO2 means across time = tensor([[4.5709e+01, 1.5677e-02, 6.7865e-01, 1.3081e-03],
        [4.5685e+01, 2.5361e-02, 6.8077e-01, 1.3646e-03]],
       grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.9175e+01, 1.9327e-01, 3.7068e-01, 1.2435e-03],
         [4.6186e+01, 1.0218e-08, 4.0861e-01, 9.2939e-04],
         ...,
         [4.5465e+01, 8.6076e-02, 6.6076e-01, 2.9542e-03],
         [4.7265e+01, 2.4916e-01, 5.9808e-01, 4.0473e-03],
         [4.0414e+01, 2.0277e-01, 1.8016e-01, 3.2301e-03]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.3678e+01, 1.6526e+00, 6.2639e-01, 6.2665e-03],
         [4.6493e+01, 1.0000e-08, 6.8655e-01, 9.7546e-04],
         ...,
         [4.5188e+01, 2.0960e-01, 6.5669e-01, 3.7769e-03],
         [4.7662e+01, 2.3824e-01, 5.8398e-01, 3.9893e-03],
         [3.90


Train Diffusion:  42%|████▏     | 421/1001 [02:30<03:26,  2.81it/s][A
Train Diffusion:  42%|████▏     | 422/1001 [02:30<03:28,  2.78it/s][A
Train Diffusion:  42%|████▏     | 423/1001 [02:30<03:27,  2.79it/s][A
Train Diffusion:  42%|████▏     | 424/1001 [02:31<03:22,  2.86it/s][A
Train Diffusion:  42%|████▏     | 425/1001 [02:31<03:25,  2.80it/s][A
Train Diffusion:  43%|████▎     | 426/1001 [02:31<03:24,  2.81it/s][A
Train Diffusion:  43%|████▎     | 427/1001 [02:32<03:25,  2.80it/s][A
Train Diffusion:  43%|████▎     | 428/1001 [02:32<03:23,  2.81it/s][A
Train Diffusion:  43%|████▎     | 429/1001 [02:33<03:21,  2.84it/s][A
Train Diffusion:  43%|████▎     | 430/1001 [02:33<03:20,  2.85it/s][A

Moving average norm loss at 430 iterations is: 246.87277221679688. Best norm loss value is: 148.22096252441406.

x with CO2 means across time = tensor([[4.5611e+01, 1.9839e-02, 7.1090e-01, 1.3257e-03],
        [4.5600e+01, 1.4992e-02, 7.1058e-01, 1.3097e-03]],
       grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.9666e+01, 2.3784e+00, 4.8871e-01, 8.8824e-03],
         [4.6592e+01, 1.0000e-08, 5.7523e-01, 9.6120e-04],
         ...,
         [4.5268e+01, 8.1611e-02, 7.0700e-01, 2.9269e-03],
         [4.7316e+01, 1.1575e-01, 6.4267e-01, 3.1715e-03],
         [4.0245e+01, 2.4234e-01, 1.5189e-01, 3.4745e-03]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.4213e+01, 1.8856e-01, 4.2823e-01, 1.1469e-03],
         [4.6645e+01, 1.0195e-08, 5.0013e-01, 9.5131e-04],
         ...,
         [4.5147e+01, 8.3359e-02, 6.8839e-01, 2.9279e-03],
         [4.7442e+01, 2.3703e-01, 6.2349e-01, 3.9815e-03],
         [4.02


Train Diffusion:  43%|████▎     | 431/1001 [02:33<03:18,  2.87it/s][A
Train Diffusion:  43%|████▎     | 432/1001 [02:34<03:20,  2.84it/s][A
Train Diffusion:  43%|████▎     | 433/1001 [02:34<03:23,  2.79it/s][A
Train Diffusion:  43%|████▎     | 434/1001 [02:34<03:22,  2.80it/s][A
Train Diffusion:  43%|████▎     | 435/1001 [02:35<03:20,  2.82it/s][A
Train Diffusion:  44%|████▎     | 436/1001 [02:35<03:18,  2.85it/s][A
Train Diffusion:  44%|████▎     | 437/1001 [02:35<03:16,  2.87it/s][A
Train Diffusion:  44%|████▍     | 438/1001 [02:36<03:16,  2.87it/s][A
Train Diffusion:  44%|████▍     | 439/1001 [02:36<03:14,  2.88it/s][A
Train Diffusion:  44%|████▍     | 440/1001 [02:36<03:11,  2.92it/s][A

Moving average norm loss at 440 iterations is: 260.2480178833008. Best norm loss value is: 148.22096252441406.

x with CO2 means across time = tensor([[4.5648e+01, 1.7989e-02, 6.9299e-01, 1.3228e-03],
        [4.5647e+01, 1.7061e-02, 6.9292e-01, 1.3220e-03]],
       grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [3.0935e+01, 5.9531e-01, 5.4287e-01, 2.6996e-03],
         [4.7144e+01, 1.0025e-08, 6.8826e-01, 9.8798e-04],
         ...,
         [4.5109e+01, 9.6207e-02, 6.6942e-01, 3.0080e-03],
         [4.6940e+01, 1.6217e-01, 6.3466e-01, 3.4614e-03],
         [3.8969e+01, 3.7902e-01, 2.7533e-01, 4.3444e-03]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.4872e+01, 3.8380e-02, 5.8730e-01, 6.5752e-04],
         [4.7088e+01, 1.0000e-08, 5.5261e-01, 9.6726e-04],
         ...,
         [4.5210e+01, 1.7015e-01, 6.8951e-01, 3.5195e-03],
         [4.7569e+01, 3.4268e-01, 6.0938e-01, 4.6886e-03],
         [4.060


Train Diffusion:  44%|████▍     | 441/1001 [02:37<03:16,  2.85it/s][A
Train Diffusion:  44%|████▍     | 442/1001 [02:37<03:22,  2.75it/s][A
Train Diffusion:  44%|████▍     | 443/1001 [02:38<03:41,  2.52it/s][A
Train Diffusion:  44%|████▍     | 444/1001 [02:38<03:33,  2.61it/s][A
Train Diffusion:  44%|████▍     | 445/1001 [02:38<03:45,  2.46it/s][A
Train Diffusion:  45%|████▍     | 446/1001 [02:39<03:39,  2.53it/s][A
Train Diffusion:  45%|████▍     | 447/1001 [02:39<03:39,  2.52it/s][A
Train Diffusion:  45%|████▍     | 448/1001 [02:40<03:36,  2.56it/s][A
Train Diffusion:  45%|████▍     | 449/1001 [02:40<03:54,  2.36it/s][A
Train Diffusion:  45%|████▍     | 450/1001 [02:41<04:22,  2.10it/s][A

Moving average norm loss at 450 iterations is: 216.1759262084961. Best norm loss value is: 148.22096252441406.

x with CO2 means across time = tensor([[4.5558e+01, 1.9856e-02, 6.9940e-01, 1.3305e-03],
        [4.5560e+01, 1.7703e-02, 7.0093e-01, 1.3230e-03]],
       grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [3.1570e+01, 7.1181e-01, 4.9876e-01, 3.1107e-03],
         [4.6603e+01, 1.0000e-08, 6.2122e-01, 9.6807e-04],
         ...,
         [4.5063e+01, 5.9065e-02, 7.3292e-01, 2.7713e-03],
         [4.7711e+01, 2.2964e-01, 6.7200e-01, 3.9578e-03],
         [4.0758e+01, 3.3578e-01, 2.8102e-01, 4.1445e-03]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.5188e+01, 7.3646e-03, 6.1902e-01, 5.5947e-04],
         [4.6530e+01, 1.0003e-08, 6.7064e-01, 9.7386e-04],
         ...,
         [4.5375e+01, 1.7374e-01, 6.6494e-01, 3.5451e-03],
         [4.7115e+01, 2.3252e-01, 6.1240e-01, 3.9330e-03],
         [4.077


Train Diffusion:  45%|████▌     | 451/1001 [02:41<04:51,  1.89it/s][A
Train Diffusion:  45%|████▌     | 452/1001 [02:42<04:35,  1.99it/s][A
Train Diffusion:  45%|████▌     | 453/1001 [02:42<04:10,  2.19it/s][A
Train Diffusion:  45%|████▌     | 454/1001 [02:42<03:53,  2.34it/s][A
Train Diffusion:  45%|████▌     | 455/1001 [02:43<03:40,  2.47it/s][A
Train Diffusion:  46%|████▌     | 456/1001 [02:43<03:32,  2.57it/s][A
Train Diffusion:  46%|████▌     | 457/1001 [02:44<03:27,  2.62it/s][A
Train Diffusion:  46%|████▌     | 458/1001 [02:44<03:24,  2.66it/s][A
Train Diffusion:  46%|████▌     | 459/1001 [02:44<03:20,  2.71it/s][A
Train Diffusion:  46%|████▌     | 460/1001 [02:45<03:22,  2.67it/s][A

Moving average norm loss at 460 iterations is: 250.6189743041992. Best norm loss value is: 148.22096252441406.

x with CO2 means across time = tensor([[4.5504e+01, 2.2903e-02, 7.0229e-01, 1.3501e-03],
        [4.5517e+01, 1.7702e-02, 7.0302e-01, 1.3184e-03]],
       grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.4697e+01, 7.2419e-01, 6.3570e-01, 3.0508e-03],
         [4.5866e+01, 1.0000e-08, 6.9432e-01, 9.6479e-04],
         ...,
         [4.5043e+01, 1.7144e-01, 7.1194e-01, 3.5262e-03],
         [4.8009e+01, 2.4275e-01, 6.5483e-01, 4.0547e-03],
         [4.3442e+01, 4.9228e-01, 3.1243e-01, 5.3019e-03]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [3.0673e+01, 2.9348e-03, 5.6138e-01, 6.3347e-04],
         [4.5983e+01, 1.0000e-08, 6.7016e-01, 9.6349e-04],
         ...,
         [4.5494e+01, 5.4994e-02, 6.2932e-01, 2.7364e-03],
         [4.7429e+01, 2.1286e-01, 6.2025e-01, 3.8187e-03],
         [4.133


Train Diffusion:  46%|████▌     | 461/1001 [02:45<03:19,  2.71it/s][A
Train Diffusion:  46%|████▌     | 462/1001 [02:45<03:17,  2.73it/s][A
Train Diffusion:  46%|████▋     | 463/1001 [02:46<03:14,  2.77it/s][A
Train Diffusion:  46%|████▋     | 464/1001 [02:46<03:12,  2.79it/s][A
Train Diffusion:  46%|████▋     | 465/1001 [02:46<03:11,  2.80it/s][A
Train Diffusion:  47%|████▋     | 466/1001 [02:47<03:12,  2.77it/s][A
Train Diffusion:  47%|████▋     | 467/1001 [02:47<03:11,  2.80it/s][A
Train Diffusion:  47%|████▋     | 468/1001 [02:47<03:09,  2.82it/s][A
Train Diffusion:  47%|████▋     | 469/1001 [02:48<03:23,  2.62it/s][A
Train Diffusion:  47%|████▋     | 470/1001 [02:48<03:23,  2.61it/s][A

Moving average norm loss at 470 iterations is: 215.3713165283203. Best norm loss value is: 148.22096252441406.

x with CO2 means across time = tensor([[4.5604e+01, 1.1109e-02, 7.1744e-01, 1.2886e-03],
        [4.5626e+01, 1.5663e-02, 7.1955e-01, 1.3088e-03]],
       grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.4186e+01, 3.6867e-02, 4.8729e-01, 6.2611e-04],
         [4.5785e+01, 1.0000e-08, 5.4819e-01, 9.4208e-04],
         ...,
         [4.5427e+01, 7.7127e-02, 7.3385e-01, 2.9115e-03],
         [4.8256e+01, 2.0794e-01, 7.0377e-01, 3.8471e-03],
         [4.4017e+01, 2.6164e-01, 2.8573e-01, 3.8091e-03]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [3.0108e+01, 1.6489e+00, 4.7285e-01, 6.3463e-03],
         [4.5876e+01, 1.0000e-08, 6.1268e-01, 9.5314e-04],
         ...,
         [4.5257e+01, 6.4728e-02, 7.3790e-01, 2.8204e-03],
         [4.8011e+01, 8.2225e-02, 6.7577e-01, 2.9893e-03],
         [4.537


Train Diffusion:  47%|████▋     | 471/1001 [02:49<03:43,  2.38it/s][A
Train Diffusion:  47%|████▋     | 472/1001 [02:49<03:45,  2.34it/s][A
Train Diffusion:  47%|████▋     | 473/1001 [02:50<03:39,  2.41it/s][A
Train Diffusion:  47%|████▋     | 474/1001 [02:50<03:36,  2.43it/s][A
Train Diffusion:  47%|████▋     | 475/1001 [02:50<03:28,  2.52it/s][A
Train Diffusion:  48%|████▊     | 476/1001 [02:51<03:20,  2.61it/s][A
Train Diffusion:  48%|████▊     | 477/1001 [02:51<03:14,  2.69it/s][A
Train Diffusion:  48%|████▊     | 478/1001 [02:51<03:10,  2.74it/s][A
Train Diffusion:  48%|████▊     | 479/1001 [02:52<03:08,  2.77it/s][A
Train Diffusion:  48%|████▊     | 480/1001 [02:52<03:05,  2.81it/s][A

Moving average norm loss at 480 iterations is: 210.44801330566406. Best norm loss value is: 148.22096252441406.

x with CO2 means across time = tensor([[4.5675e+01, 1.5611e-02, 7.3877e-01, 1.3143e-03],
        [4.5641e+01, 1.5432e-02, 7.4060e-01, 1.3195e-03]],
       grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [3.0958e+01, 4.9699e-01, 5.4684e-01, 2.3580e-03],
         [4.6606e+01, 1.0000e-08, 7.2075e-01, 9.8256e-04],
         ...,
         [4.5370e+01, 2.9299e-02, 7.6413e-01, 2.5928e-03],
         [4.8066e+01, 1.0231e-01, 7.1171e-01, 3.1355e-03],
         [4.6364e+01, 3.2403e-01, 1.9580e-01, 4.3022e-03]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.4804e+01, 2.8480e-03, 6.4771e-01, 5.4092e-04],
         [4.6759e+01, 1.0000e-08, 6.0866e-01, 9.6918e-04],
         ...,
         [4.5350e+01, 1.4193e-01, 7.3522e-01, 3.3474e-03],
         [4.8530e+01, 2.8236e-01, 6.8340e-01, 4.3511e-03],
         [4.46


Train Diffusion:  48%|████▊     | 481/1001 [02:53<03:03,  2.83it/s][A
Train Diffusion:  48%|████▊     | 482/1001 [02:53<03:06,  2.78it/s][A
Train Diffusion:  48%|████▊     | 483/1001 [02:53<03:29,  2.47it/s][A
Train Diffusion:  48%|████▊     | 484/1001 [02:54<03:39,  2.36it/s][A
Train Diffusion:  48%|████▊     | 485/1001 [02:54<03:38,  2.36it/s][A
Train Diffusion:  49%|████▊     | 486/1001 [02:55<03:32,  2.43it/s][A
Train Diffusion:  49%|████▊     | 487/1001 [02:55<03:25,  2.51it/s][A
Train Diffusion:  49%|████▉     | 488/1001 [02:55<03:16,  2.62it/s][A
Train Diffusion:  49%|████▉     | 489/1001 [02:56<03:10,  2.69it/s][A
Train Diffusion:  49%|████▉     | 490/1001 [02:56<03:13,  2.64it/s][A

Moving average norm loss at 490 iterations is: 186.2179428100586. Best norm loss value is: 146.6666259765625.

x with CO2 means across time = tensor([[4.5554e+01, 1.2789e-02, 7.5268e-01, 1.3020e-03],
        [4.5576e+01, 1.3363e-02, 7.5187e-01, 1.3063e-03]],
       grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.5250e+01, 3.0358e-01, 6.7044e-01, 1.5999e-03],
         [4.6640e+01, 1.0000e-08, 7.2735e-01, 9.8415e-04],
         ...,
         [4.5140e+01, 7.0320e-02, 7.7439e-01, 2.8625e-03],
         [4.7536e+01, 1.7377e-01, 6.9474e-01, 3.5828e-03],
         [4.4849e+01, 3.9534e-01, 2.3714e-01, 4.7111e-03]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [3.1601e+01, 2.4650e-03, 4.9305e-01, 6.3879e-04],
         [4.6611e+01, 1.0000e-08, 5.5497e-01, 9.5863e-04],
         ...,
         [4.5201e+01, 4.6716e-02, 7.5066e-01, 2.6990e-03],
         [4.7875e+01, 1.0140e-01, 7.3683e-01, 3.1271e-03],
         [4.5492


Train Diffusion:  49%|████▉     | 491/1001 [02:57<03:29,  2.44it/s][A
Train Diffusion:  49%|████▉     | 492/1001 [02:57<03:34,  2.38it/s][A
Train Diffusion:  49%|████▉     | 493/1001 [02:57<03:24,  2.48it/s][A
Train Diffusion:  49%|████▉     | 494/1001 [02:58<03:18,  2.55it/s][A
Train Diffusion:  49%|████▉     | 495/1001 [02:58<03:14,  2.60it/s][A
Train Diffusion:  50%|████▉     | 496/1001 [02:59<03:11,  2.64it/s][A
Train Diffusion:  50%|████▉     | 497/1001 [02:59<03:11,  2.63it/s][A
Train Diffusion:  50%|████▉     | 498/1001 [02:59<03:07,  2.68it/s][A
Train Diffusion:  50%|████▉     | 499/1001 [03:00<03:05,  2.71it/s][A
Train Diffusion:  50%|████▉     | 500/1001 [03:00<03:07,  2.67it/s][A

Moving average norm loss at 500 iterations is: 175.55342712402344. Best norm loss value is: 146.6666259765625.

x with CO2 means across time = tensor([[4.5643e+01, 1.4112e-02, 6.9445e-01, 1.3042e-03],
        [4.5633e+01, 1.3824e-02, 6.9449e-01, 1.2987e-03]],
       grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [3.1810e+01, 2.4398e-01, 5.6947e-01, 1.4947e-03],
         [4.6261e+01, 1.0000e-08, 6.6961e-01, 9.6864e-04],
         ...,
         [4.5120e+01, 2.2424e-02, 7.0388e-01, 2.5178e-03],
         [4.6911e+01, 6.3262e-02, 6.5242e-01, 2.8048e-03],
         [4.4037e+01, 2.9594e-01, 3.5181e-01, 4.0522e-03]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.5250e+01, 8.1283e-03, 5.9731e-01, 5.6022e-04],
         [4.6304e+01, 1.0000e-08, 6.6448e-01, 9.6870e-04],
         ...,
         [4.5118e+01, 9.4800e-02, 6.6948e-01, 2.9989e-03],
         [4.7346e+01, 2.3105e-01, 6.4357e-01, 3.9424e-03],
         [4.550


Train Diffusion:  50%|█████     | 501/1001 [03:00<03:06,  2.68it/s][A
Train Diffusion:  50%|█████     | 502/1001 [03:01<03:06,  2.68it/s][A
Train Diffusion:  50%|█████     | 503/1001 [03:01<03:22,  2.46it/s][A
Train Diffusion:  50%|█████     | 504/1001 [03:02<03:22,  2.45it/s][A
Train Diffusion:  50%|█████     | 505/1001 [03:02<03:20,  2.47it/s][A
Train Diffusion:  51%|█████     | 506/1001 [03:02<03:14,  2.55it/s][A
Train Diffusion:  51%|█████     | 507/1001 [03:03<03:11,  2.58it/s][A
Train Diffusion:  51%|█████     | 508/1001 [03:03<03:09,  2.60it/s][A
Train Diffusion:  51%|█████     | 509/1001 [03:04<03:05,  2.65it/s][A
Train Diffusion:  51%|█████     | 510/1001 [03:04<03:03,  2.67it/s][A

Moving average ELBO loss at 510 iterations is: 13369.453857421875. Best ELBO loss value is: 3994.734375.

x with CO2 means across time = tensor([[4.5692e+01, 4.0301e-03, 5.6261e-01, 1.2257e-03],
        [4.5761e+01, 6.9220e-03, 5.3963e-01, 1.2407e-03]],
       grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [2.6471e+01, 5.6975e-02, 1.1327e-01, 6.8465e-04],
         [4.6656e+01, 1.0000e-08, 7.5103e-02, 8.8992e-04],
         ...,
         [4.4874e+01, 7.0674e-03, 4.7105e-01, 2.3388e-03],
         [4.6726e+01, 3.2082e-02, 5.9534e-01, 2.5728e-03],
         [4.5250e+01, 7.6101e-03, 1.3875e-01, 2.1615e-03]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [3.4158e+01, 2.3512e-03, 1.5967e-01, 6.3737e-04],
         [4.7061e+01, 1.0000e-08, 3.9415e-01, 9.4378e-04],
         ...,
         [4.5403e+01, 3.4249e-02, 6.5278e-01, 2.5977e-03],
         [4.7640e+01, 3.3781e-02, 4.7706e-01, 2.5956e-03],
         [4.4368e+01,


Train Diffusion:  51%|█████     | 511/1001 [03:04<03:02,  2.69it/s][A
Train Diffusion:  51%|█████     | 512/1001 [03:05<03:00,  2.71it/s][A
Train Diffusion:  51%|█████     | 513/1001 [03:05<02:59,  2.72it/s][A
Train Diffusion:  51%|█████▏    | 514/1001 [03:05<02:57,  2.75it/s][A
Train Diffusion:  51%|█████▏    | 515/1001 [03:06<02:55,  2.77it/s][A
Train Diffusion:  52%|█████▏    | 516/1001 [03:06<02:53,  2.79it/s][A
Train Diffusion:  52%|█████▏    | 517/1001 [03:06<02:52,  2.81it/s][A
Train Diffusion:  52%|█████▏    | 518/1001 [03:07<02:52,  2.81it/s][A
Train Diffusion:  52%|█████▏    | 519/1001 [03:07<02:50,  2.82it/s][A
Train Diffusion:  52%|█████▏    | 520/1001 [03:07<02:51,  2.81it/s][A

Moving average ELBO loss at 520 iterations is: nan. Best ELBO loss value is: 3820.406005859375.

x with CO2 means across time = tensor([[nan, nan, nan, nan],
        [nan, nan, nan, nan]], grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan],
         ...,
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan],
         ...,
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan]]],
       grad_fn=<CatBackward>)



Train Diffusion:  52%|█████▏    | 521/1001 [03:08<02:53,  2.77it/s][A
Train Diffusion:  52%|█████▏    | 522/1001 [03:08<02:51,  2.79it/s][A
Train Diffusion:  52%|█████▏    | 523/1001 [03:09<02:52,  2.77it/s][A
Train Diffusion:  52%|█████▏    | 524/1001 [03:09<02:50,  2.79it/s][A
Train Diffusion:  52%|█████▏    | 525/1001 [03:09<02:49,  2.80it/s][A
Train Diffusion:  53%|█████▎    | 526/1001 [03:10<02:50,  2.79it/s][A
Train Diffusion:  53%|█████▎    | 527/1001 [03:10<02:50,  2.79it/s][A
Train Diffusion:  53%|█████▎    | 528/1001 [03:10<02:49,  2.79it/s][A
Train Diffusion:  53%|█████▎    | 529/1001 [03:11<02:48,  2.80it/s][A
Train Diffusion:  53%|█████▎    | 530/1001 [03:11<02:47,  2.81it/s][A

Moving average ELBO loss at 530 iterations is: nan. Best ELBO loss value is: 3820.406005859375.

x with CO2 means across time = tensor([[nan, nan, nan, nan],
        [nan, nan, nan, nan]], grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan],
         ...,
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan],
         ...,
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan]]],
       grad_fn=<CatBackward>)



Train Diffusion:  53%|█████▎    | 531/1001 [03:11<02:48,  2.79it/s][A
Train Diffusion:  53%|█████▎    | 532/1001 [03:12<02:49,  2.76it/s][A
Train Diffusion:  53%|█████▎    | 533/1001 [03:12<02:53,  2.69it/s][A
Train Diffusion:  53%|█████▎    | 534/1001 [03:13<02:50,  2.73it/s][A
Train Diffusion:  53%|█████▎    | 535/1001 [03:13<02:50,  2.74it/s][A
Train Diffusion:  54%|█████▎    | 536/1001 [03:13<02:49,  2.74it/s][A
Train Diffusion:  54%|█████▎    | 537/1001 [03:14<02:52,  2.69it/s][A
Train Diffusion:  54%|█████▎    | 538/1001 [03:14<02:51,  2.70it/s][A
Train Diffusion:  54%|█████▍    | 539/1001 [03:14<02:48,  2.74it/s][A
Train Diffusion:  54%|█████▍    | 540/1001 [03:15<02:48,  2.73it/s][A

Moving average ELBO loss at 540 iterations is: nan. Best ELBO loss value is: 3820.406005859375.

x with CO2 means across time = tensor([[nan, nan, nan, nan],
        [nan, nan, nan, nan]], grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan],
         ...,
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan],
         ...,
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan]]],
       grad_fn=<CatBackward>)



Train Diffusion:  54%|█████▍    | 541/1001 [03:15<02:46,  2.77it/s][A
Train Diffusion:  54%|█████▍    | 542/1001 [03:15<02:45,  2.77it/s][A
Train Diffusion:  54%|█████▍    | 543/1001 [03:16<02:44,  2.79it/s][A
Train Diffusion:  54%|█████▍    | 544/1001 [03:16<02:43,  2.79it/s][A
Train Diffusion:  54%|█████▍    | 545/1001 [03:17<02:43,  2.79it/s][A
Train Diffusion:  55%|█████▍    | 546/1001 [03:17<02:41,  2.82it/s][A
Train Diffusion:  55%|█████▍    | 547/1001 [03:17<02:40,  2.83it/s][A
Train Diffusion:  55%|█████▍    | 548/1001 [03:18<02:42,  2.78it/s][A
Train Diffusion:  55%|█████▍    | 549/1001 [03:18<02:41,  2.80it/s][A
Train Diffusion:  55%|█████▍    | 550/1001 [03:18<02:40,  2.81it/s][A

Moving average ELBO loss at 550 iterations is: nan. Best ELBO loss value is: 3820.406005859375.

x with CO2 means across time = tensor([[nan, nan, nan, nan],
        [nan, nan, nan, nan]], grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan],
         ...,
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan],
         ...,
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan]]],
       grad_fn=<CatBackward>)



Train Diffusion:  55%|█████▌    | 551/1001 [03:19<02:40,  2.80it/s][A
Train Diffusion:  55%|█████▌    | 552/1001 [03:19<02:43,  2.74it/s][A
Train Diffusion:  55%|█████▌    | 553/1001 [03:19<02:41,  2.77it/s][A
Train Diffusion:  55%|█████▌    | 554/1001 [03:20<02:40,  2.78it/s][A
Train Diffusion:  55%|█████▌    | 555/1001 [03:20<02:39,  2.79it/s][A
Train Diffusion:  56%|█████▌    | 556/1001 [03:20<02:40,  2.77it/s][A
Train Diffusion:  56%|█████▌    | 557/1001 [03:21<03:01,  2.45it/s][A
Train Diffusion:  56%|█████▌    | 558/1001 [03:21<03:10,  2.33it/s][A
Train Diffusion:  56%|█████▌    | 559/1001 [03:22<03:26,  2.14it/s][A
Train Diffusion:  56%|█████▌    | 560/1001 [03:22<03:21,  2.19it/s][A

Moving average ELBO loss at 560 iterations is: nan. Best ELBO loss value is: 3820.406005859375.

x with CO2 means across time = tensor([[nan, nan, nan, nan],
        [nan, nan, nan, nan]], grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan],
         ...,
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan],
         ...,
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan]]],
       grad_fn=<CatBackward>)



Train Diffusion:  56%|█████▌    | 561/1001 [03:23<03:15,  2.25it/s][A
Train Diffusion:  56%|█████▌    | 562/1001 [03:23<03:15,  2.25it/s][A
Train Diffusion:  56%|█████▌    | 563/1001 [03:24<03:14,  2.26it/s][A
Train Diffusion:  56%|█████▋    | 564/1001 [03:24<03:12,  2.27it/s][A
Train Diffusion:  56%|█████▋    | 565/1001 [03:25<03:13,  2.25it/s][A
Train Diffusion:  57%|█████▋    | 566/1001 [03:25<03:08,  2.31it/s][A
Train Diffusion:  57%|█████▋    | 567/1001 [03:25<03:06,  2.32it/s][A
Train Diffusion:  57%|█████▋    | 568/1001 [03:26<03:04,  2.35it/s][A
Train Diffusion:  57%|█████▋    | 569/1001 [03:26<02:56,  2.44it/s][A
Train Diffusion:  57%|█████▋    | 570/1001 [03:27<02:48,  2.56it/s][A

Moving average ELBO loss at 570 iterations is: nan. Best ELBO loss value is: 3820.406005859375.

x with CO2 means across time = tensor([[nan, nan, nan, nan],
        [nan, nan, nan, nan]], grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan],
         ...,
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan],
         ...,
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan]]],
       grad_fn=<CatBackward>)



Train Diffusion:  57%|█████▋    | 571/1001 [03:27<02:57,  2.42it/s][A
Train Diffusion:  57%|█████▋    | 572/1001 [03:27<02:55,  2.45it/s][A
Train Diffusion:  57%|█████▋    | 573/1001 [03:28<02:59,  2.39it/s][A
Train Diffusion:  57%|█████▋    | 574/1001 [03:28<02:49,  2.51it/s][A
Train Diffusion:  57%|█████▋    | 575/1001 [03:29<02:50,  2.49it/s][A
Train Diffusion:  58%|█████▊    | 576/1001 [03:29<02:51,  2.48it/s][A
Train Diffusion:  58%|█████▊    | 577/1001 [03:29<02:52,  2.46it/s][A
Train Diffusion:  58%|█████▊    | 578/1001 [03:30<02:52,  2.45it/s][A
Train Diffusion:  58%|█████▊    | 579/1001 [03:30<02:50,  2.48it/s][A
Train Diffusion:  58%|█████▊    | 580/1001 [03:31<02:50,  2.46it/s][A

Moving average ELBO loss at 580 iterations is: nan. Best ELBO loss value is: 3820.406005859375.

x with CO2 means across time = tensor([[nan, nan, nan, nan],
        [nan, nan, nan, nan]], grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan],
         ...,
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan],
         ...,
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan]]],
       grad_fn=<CatBackward>)



Train Diffusion:  58%|█████▊    | 581/1001 [03:31<02:47,  2.50it/s][A
Train Diffusion:  58%|█████▊    | 582/1001 [03:32<02:51,  2.44it/s][A
Train Diffusion:  58%|█████▊    | 583/1001 [03:32<02:49,  2.47it/s][A
Train Diffusion:  58%|█████▊    | 584/1001 [03:32<02:48,  2.47it/s][A
Train Diffusion:  58%|█████▊    | 585/1001 [03:33<02:41,  2.57it/s][A
Train Diffusion:  59%|█████▊    | 586/1001 [03:33<02:34,  2.68it/s][A
Train Diffusion:  59%|█████▊    | 587/1001 [03:33<02:29,  2.77it/s][A
Train Diffusion:  59%|█████▊    | 588/1001 [03:34<02:29,  2.76it/s][A
Train Diffusion:  59%|█████▉    | 589/1001 [03:34<02:33,  2.68it/s][A
Train Diffusion:  59%|█████▉    | 590/1001 [03:34<02:28,  2.76it/s][A

Moving average ELBO loss at 590 iterations is: nan. Best ELBO loss value is: 3820.406005859375.

x with CO2 means across time = tensor([[nan, nan, nan, nan],
        [nan, nan, nan, nan]], grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan],
         ...,
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan],
         ...,
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan]]],
       grad_fn=<CatBackward>)



Train Diffusion:  59%|█████▉    | 591/1001 [03:35<02:26,  2.80it/s][A
Train Diffusion:  59%|█████▉    | 592/1001 [03:35<02:30,  2.72it/s][A
Train Diffusion:  59%|█████▉    | 593/1001 [03:36<02:34,  2.63it/s][A
Train Diffusion:  59%|█████▉    | 594/1001 [03:36<02:38,  2.57it/s][A
Train Diffusion:  59%|█████▉    | 595/1001 [03:36<02:31,  2.69it/s][A
Train Diffusion:  60%|█████▉    | 596/1001 [03:37<02:30,  2.70it/s][A
Train Diffusion:  60%|█████▉    | 597/1001 [03:37<02:29,  2.70it/s][A
Train Diffusion:  60%|█████▉    | 598/1001 [03:37<02:26,  2.76it/s][A
Train Diffusion:  60%|█████▉    | 599/1001 [03:38<02:26,  2.74it/s][A
Train Diffusion:  60%|█████▉    | 600/1001 [03:38<02:25,  2.76it/s][A

Moving average ELBO loss at 600 iterations is: nan. Best ELBO loss value is: 3820.406005859375.

x with CO2 means across time = tensor([[nan, nan, nan, nan],
        [nan, nan, nan, nan]], grad_fn=<MeanBackward1>)

x with CO2 = tensor([[[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan],
         ...,
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan]],

        [[4.5660e+01, 7.1469e-02, 7.1469e-01, 1.1000e-03],
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan],
         ...,
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan],
         [       nan,        nan,        nan,        nan]]],
       grad_fn=<CatBackward>)



Train Diffusion:  60%|██████    | 601/1001 [03:39<02:26,  2.73it/s][A
Train Diffusion:  60%|██████    | 602/1001 [03:39<02:25,  2.74it/s][A


KeyboardInterrupt: 

In [None]:
train(devi, l_r, niter, piter, batch_size, ObsModel, csv_to_obs_df, 'CON_synthetic_sol_df.csv', 0.1, state_dim_SCON, t, dt, n, t_span_tensor, i_s_tensor, i_d_tensor, drift_diffusion_SCON_SS, SCON_SS_params_dict, temp_gen, temp_ref, analytical_steady_state_init_CON, get_CO2_CON)

In [None]:
train(devi, l_r, niter, piter, batch_size, ObsModel, csv_to_obs_df, 'AWB_synthetic_sol_df.csv', 0.1, state_dim_SAWB, t, dt, n, t_span_tensor, i_s_tensor, i_d_tensor, drift_diffusion_SAWB_C, SAWB_C_params_dict, temp_gen, temp_ref, analytical_steady_state_init_AWB, get_CO2_AWB)

In [None]:
train(devi, l_r, niter, piter, batch_size, ObsModel, csv_to_obs_df, 'AWB_synthetic_sol_df.csv', 0.1, state_dim_SAWB, t, dt, n, t_span_tensor, i_s_tensor, i_d_tensor, drift_diffusion_SAWB_SS, SAWB_SS_params_dict, temp_gen, temp_ref, analytical_steady_state_init_AWB, get_CO2_AWB)

In [None]:
train(devi, l_r, niter, piter, batch_size, ObsModel, csv_to_obs_df, 'AWB_ECA_synthetic_sol_df.csv', 0.1, state_dim_SAWB, t, dt, n, t_span_tensor, i_s_tensor, i_d_tensor, drift_diffusion_SAWB_ECA_C, SAWB_ECA_C_params_dict, temp_gen, temp_ref, analytical_steady_state_init_AWB_ECA, get_CO2_AWB_ECA)

In [None]:
train(devi, l_r, niter, piter, batch_size, ObsModel, csv_to_obs_df, 'AWB_ECA_synthetic_sol_df.csv', 0.1, state_dim_SAWB, t, dt, n, t_span_tensor, i_s_tensor, i_d_tensor, drift_diffusion_SAWB_ECA_SS, SAWB_ECA_SS_params_dict, temp_gen, temp_ref, analytical_steady_state_init_AWB_ECA, get_CO2_AWB_ECA)