## 3. Multiple input/output training
Goal: Enhance the baseline emulator by incorporating more outputs
- Phase 0:
  <br> - **in:** T_surf, SST, SIC, land_mask, Z500, T850, SLP
  <br> - **out:** T_surf
- Phase 1:
  <br> - **in:** + T2m
  <br> - **out:** + T2m
- Phase 2:
  <br> - **in:** + U10 + V10
  <br> - **out:** + U10 + V10
- Phase 3:
  <br> - **in:** + U500 + V500
  <br> - **out:** + U500 + V500
- Phase 4:
  <br> - **in:** + Q850 + Q500
  <br> - **out:** + Q850 + Q500
- Phase 5:
  <br> - **out:** Total precipitation
  

In [1]:
from utils.utils import process_variable, get_indices
from architectures.cnn_baseline import CNN2D_Baseline, set_seed
import logging
import numpy as np

logging.basicConfig(
    level=logging.INFO,
    format="%(name)s - %(levelname)s - %(message)s"
)

variables = [{'var_name': "t_ref",
              "file_name": "DATA/atmos.192101-201012.t_ref.nc",
              "add_spatial": True,
              "lag_data_set": True,
              "is_static": False,
              "standardize": True,
              "is_output": True},
             {'var_name': "t_surf",
              "file_name": "DATA/atmos.192101-201012.t_surf.nc",
              "add_spatial": False,
              "lag_data_set": True,
              "is_static": False,
              "standardize": True,
              "is_output": True,}
]

num_epochs = 50
Models = []

In [2]:
set_seed()
var_data = []
for variable in variables:
    var_name = variable['var_name']

    file_name = variable['file_name']
    add_spatial = variable['add_spatial']
    lag_data_set = variable['lag_data_set']
    is_static = variable['is_static']
    standardize = variable['standardize']
    fill_value_method = variable.get('fill_value_method')
    is_output = variable.get('is_output', False)

    var_data.append(process_variable(file_name, var_name, lag_data_set,
                                     add_spatial, is_static, standardize,
                                     fill_value_method=fill_value_method,
                                     isOutput=is_output))

t_ref - INFO - Working on var: t_ref from DATA/atmos.192101-201012.t_ref.nc
t_ref - INFO - X_train_scaled : (861, 7, 90, 90)
t_ref - INFO - X_test_scaled : (213, 7, 90, 90)
t_ref - INFO - y_train_scaled : (861, 1, 90, 90)
t_ref - INFO - y_test_scaled : (213, 1, 90, 90)
t_ref - INFO - y_train : (861, 90, 90)
t_ref - INFO - y_test : (213, 90, 90)
t_surf - INFO - Working on var: t_surf from DATA/atmos.192101-201012.t_surf.nc
t_surf - INFO - X_train_scaled : (861, 3, 90, 90)
t_surf - INFO - X_test_scaled : (213, 3, 90, 90)
t_surf - INFO - y_train_scaled : (861, 1, 90, 90)
t_surf - INFO - y_test_scaled : (213, 1, 90, 90)


In [None]:
x_test_scaleds = []
x_train_scaleds = []
y_test_scaleds = []
y_train_scaleds = []

end = 0
for variable in var_data:
    if variable['varname'] == "t_ref":
        time_test = variable['time']
        nsamples_train = variable['X_train_scaled'].shape[0]
        nsamples_test = variable['X_test_scaled'].shape[0]

    start = end
    if variable['varname'] == "wet":
        end = start + 1
        static_train = np.broadcast_to(
            variable['X_train_scaled'],
            (nsamples_train,) + variable['X_train_scaled'].shape[1:]
        )
        static_test = np.broadcast_to(
            variable['X_test_scaled'],
            (nsamples_test,) + variable['X_test_scaled'].shape[1:]
        )

        x_train_scaleds.append(static_train)
        x_test_scaleds.append(static_test)
    else:
        end = start + variable['X_test_scaled'].shape[1]
        x_test_scaleds.append(variable['X_test_scaled'])
        x_train_scaleds.append(variable['X_train_scaled'])

    variable['start'] = start
    variable['end'] = end

    is_output = variable.get('is_output', False)
    if is_output:
        print(f"{variable['varname']} is an output!")
        y_test_scaleds.append(variable['y_test_scaled'])
        y_train_scaleds.append(variable['y_train_scaled'])
        
X_train_scaled = np.concatenate(x_train_scaleds, axis=1)
X_test_scaled = np.concatenate(x_test_scaleds, axis=1)
y_train_scaled = np.concatenate(y_train_scaleds, axis=1)
y_test_scaled = np.concatenate(y_test_scaleds, axis=1)

logging.info(f"X_train_scaled : {X_train_scaled.shape}")
logging.info(f"X_test_scaled : {X_test_scaled.shape}")
logging.info(f"y_train_scaled : {y_train_scaled.shape}")
logging.info(f"y_test_scaled : {y_test_scaled.shape}")


t_ref is an output!
t_surf is an output!


root - INFO - X_train_scaled : (861, 10, 90, 90)
root - INFO - X_test_scaled : (213, 10, 90, 90)
root - INFO - y_train_scaled : (861, 2, 90, 90)
root - INFO - y_test_scaled : (213, 2, 90, 90)


: 

In [None]:
variable_set = ["t_ref", "t_surf"]
indices = get_indices(var_data, variable_set)
X_train_scaled_prime = np.concatenate([X_train_scaled[:, start:end, :, :] for start, end in indices], axis=1)
X_test_scaled_prime = np.concatenate([X_test_scaled[:, start:end, :, :] for start, end in indices], axis=1)
logging.info(f"X_train_scaled : {X_train_scaled_prime.shape}")
logging.info(f"X_test_scaled : {X_test_scaled_prime.shape}")
logging.info(f"y_train_scaled : {y_train_scaled.shape}")
logging.info(f"y_test_scaled : {y_test_scaled.shape}")

set_seed()
out_var_maps = [{'var_name': 't_ref', 
                 'out_channel': 0,
                 'weight': 1,
                 'loss': 0},
                {'var_name': 't_surf',
                 'out_channel': 1,
                 'weight': 1,
                 'loss': 0}
                ]
step1 = CNN2D_Baseline(in_channels=X_train_scaled_prime.shape[1], case=2, label="deletedeletedelete",
                       out_channels=2, out_var_maps=out_var_maps)
train_loader, test_loader = step1.create_data_loaders(X_train_scaled_prime, y_train_scaled, X_test_scaled_prime, y_test_scaled)
step1.train_model(train_loader, test_loader, num_epochs=num_epochs)

#step1.get_rmse(var_data[0]['tas_data'], X_test_scaled_prime, y_test_scaled, ranges=(17.449455, 1665.3541))
# RMSE = step1.get_global_rmse_over_time(var_data[0]['tas_data'], X_test_scaled_prime, y_test_scaled)
# Models.append({'Model Name': 'Baseline', 'Trained Model': step1, 'RMSE': RMSE, 'Slope': None})

root - INFO - X_train_scaled : (861, 10, 90, 90)
root - INFO - X_test_scaled : (213, 10, 90, 90)
root - INFO - y_train_scaled : (861, 2, 90, 90)
root - INFO - y_test_scaled : (213, 2, 90, 90)


Model Architecture:
Sequential(
  (0): Conv2d(10, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU()
  (3): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (4): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (5): ReLU()
  (6): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (7): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (8): ReLU()
  (9): ConvTranspose2d(32, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (10): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (11): ReLU()
  (12): ConvTranspose2d(16, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1))
  (13): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (14): ReLU()
  (15): Conv2d(16, 2, kernel_size=(3, 3), stride=(1,