# Testing whether RandDense models have different prediction speeds than their standard counterparts
See Section 6a of the manuscript

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import sys
sys.path.append('/content/drive/My Drive/Colab Notebooks/USC Random NN')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# To resolve an xarray bug
!pip install -I importlib-metadata==4.13.0

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting importlib-metadata==4.13.0
  Using cached importlib_metadata-4.13.0-py3-none-any.whl (23 kB)
Collecting typing-extensions>=3.6.4
  Using cached typing_extensions-4.4.0-py3-none-any.whl (26 kB)
Collecting zipp>=0.5
  Using cached zipp-3.9.0-py3-none-any.whl (5.8 kB)
Installing collected packages: zipp, typing-extensions, importlib-metadata
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
thinc 8.1.3 requires typing-extensions<4.2.0,>=3.7.4.1; python_version < "3.8", but you have typing-extensions 4.4.0 which is incompatible.
spacy 3.4.1 requires typing-extensions<4.2.0,>=3.7.4; python_version < "3.8", but you have typing-extensions 4.4.0 which is incompatible.
confection 0.0.3 requires typing-extensions<4.2.0,>=3.7.4.1; python_version < "3.8", but yo

In [None]:
!pip install eofs --quiet
import numpy as np
import xarray as xr
import pandas as pd
from utils import *
from utils_cnn_lstm import *
results_path = results_path + 'prediction_speed/'

from random_nn import *
import keras
from keras import Sequential, Model
from keras.layers import *
from keras.callbacks import EarlyStopping
import tensorflow as tf
from tensorflow.keras.utils import plot_model
tf.get_logger().setLevel('ERROR')
import absl.logging
absl.logging.set_verbosity(absl.logging.ERROR)
from IPython.display import display

import matplotlib.pyplot as plt
from matplotlib.ticker import AutoMinorLocator
import seaborn as sns
import timeit
from scipy import stats

tf.keras.utils.set_random_seed(21)

In [None]:
vars_to_predict = ['tas', 'diurnal_temperature_range', 'pr', 'pr90']
simus = ['ssp126',
         'ssp370',
         'ssp585',
         'hist-GHG',
         'hist-aer']
slider = 10  # sliding time window

# Selects all of 2081-2100 data as validation
#val_idx = list(range(75,86)) + list(range(161,172)) + list(range(403,414))

# Selects first two years of every decade from 2050 onward as validation
'''
val_idx = np.concatenate((np.arange(44,85,10), np.arange(45,86,10),
                          np.arange(130,171,10), np.arange(131,172,10), 
                          np.arange(372,413,10), np.arange(373,414,10)))
'''

# Selects first three years of every decade from 2050 onward as validation
'''
val_idx = np.concatenate((np.arange(44,85,10), np.arange(45,86,10), np.arange(46,77,10),
                          np.arange(130,171,10), np.arange(131,172,10), np.arange(132,163,10),
                          np.arange(372,413,10), np.arange(373,414,10), np.arange(374,405,10)))
'''

# Selects first two years of every decade from 2020 onward as validation
'''
val_idx = np.concatenate((np.arange(14,85,10), np.arange(15,86,10),
                          np.arange(100,171,10), np.arange(101,172,10), 
                          np.arange(342,413,10), np.arange(343,414,10)))
'''

# Selects first two years of every decade from 1850 onward as validation
val_idx = np.concatenate((np.arange(4,85,10), np.arange(5,86,10),
                          np.arange(90,171,10), np.arange(91,172,10), 
                          np.arange(332,413,10), np.arange(333,414,10),
                          np.arange(414,565,10), np.arange(415,566,10),
                          np.arange(570,721,10), np.arange(571,722,10)))

# Selects continuous chunk of data from 2000s as validation
#val_idx = np.concatenate((np.arange(0,45), np.arange(86,131), np.arange(328,373)))

X_train_dict = {}
Y_train_dict = {}
X_val_dict = {}
Y_val_dict = {}

# Create training data
for var in vars_to_predict:
  X, Y, meanstd_inputs = create_training_data(simus, var_to_predict=var)
    
  X_val = np.take(X, val_idx, axis=0)
  X_train = np.delete(X, val_idx, axis=0)
  Y_val = np.take(Y, val_idx, axis=0)
  Y_train = np.delete(Y, val_idx, axis=0)
    
  X_train_dict[var] = X_train
  X_val_dict[var] = X_val
  Y_train_dict[var] = Y_train
  Y_val_dict[var] = Y_val

# Open, reformat, and normalize test data
X_test = xr.open_mfdataset([data_path + 'inputs_historical.nc',
                            data_path + 'inputs_ssp245.nc']).compute()
Y_test = create_predictdand_data(['ssp245'])

for input_var in ['CO2', 'CH4', 'SO2', 'BC']: 
  var_dims = X_test[input_var].dims
  X_test = X_test.assign({input_var: (var_dims, normalize(X_test[input_var].data, input_var, meanstd_inputs))}) 
    
X_test_np = input_for_training(X_test, skip_historical=False, len_historical=165) 

X_train_dict['tas'].shape, Y_train_dict['tas'].shape, X_val_dict['tas'].shape, Y_val_dict['tas'].shape

((608, 10, 96, 144, 4),
 (608, 1, 96, 144),
 (118, 10, 96, 144, 4),
 (118, 1, 96, 144))

In [None]:
def get_predict_times(rmse_data, models_loc, model_type='sequential', test_data=None, repeat=10):

  predict_times = []

  for layer_idx in range(9):

    predict_times_n_layer = np.array([])

    n_layers = layer_idx + 2

    for var_idx in range(4):

      best_model_idx = np.argmin(rmse_data[layer_idx, var_idx])
      config = np.load(f'{models_loc}{n_layers}_layer_model_{best_model_idx}.npy', allow_pickle=True)
      
      model = None

      if model_type == 'sequential':
        model = Sequential.from_config(config.item())
      elif model_type == 'functional':
        custom_objects = {"ApplyPosWeight": ApplyPosWeight}
        with tf.keras.utils.custom_object_scope(custom_objects):
          model = Model.from_config(config.item())

      runs = timeit.repeat(stmt='model.predict(test_data, verbose=0)', repeat=repeat, number=10, globals=locals())
      predict_times_n_layer = np.append(predict_times_n_layer, runs)

    predict_times_n_layer = np.reshape(predict_times_n_layer, (4, repeat))
    predict_times.append(predict_times_n_layer)
  
  predict_times = np.array(predict_times)
  
  return predict_times

def plot_pred_times(means_1, sems_1, means_2, sems_2):
  fig, ax = plt.subplots()

  ax.errorbar(range(2,11), means_1, sems_1, linestyle='None', marker='o', label='Dense')
  ax.errorbar(range(2,11), means_2, sems_2, linestyle='None', marker='o', label='RandDense')
  ax.legend()

  fig.show()

def print_stats(means_1, sems_1, runs_1, means_2, sems_2, runs_2):
  for i in range(9):
    n_layers = i + 2
    print(f'{n_layers} Layers:')
    print(f'\t{means_1[i]} s +/- {sems_1[i]}')
    print(f'\t{means_2[i]} s +/- {sems_2[i]}')
    print(f'\tp-value: {round(stats.ttest_ind(runs_1[i], runs_2[i]).pvalue,3)}')

In [None]:
# The first runs generally take longer than average for some reason,
# so we do a dummy run here to alleviate that issue
cnn_lstm_1M_total = np.load('./drive/My Drive/Colab Notebooks/USC Random NN/experimental_results/cnn_lstm/new_metrics_experiment/1M/rmse_data_total.npy')
models_loc = './drive/My Drive/Colab Notebooks/USC Random NN/experimental_results/cnn_lstm/new_metrics_experiment/1M/models/'
_ = get_predict_times(cnn_lstm_1M_total, models_loc, model_type='sequential', test_data=X_test_np, repeat=1)

In [None]:
cnn_lstm_1M_total = np.load('./drive/My Drive/Colab Notebooks/USC Random NN/experimental_results/cnn_lstm/new_metrics_experiment/1M/rmse_data_total.npy')
models_loc = './drive/My Drive/Colab Notebooks/USC Random NN/experimental_results/cnn_lstm/new_metrics_experiment/1M/models/'
cnn_lstm_1M_predict_times = get_predict_times(cnn_lstm_1M_total, models_loc, model_type='sequential', test_data=X_test_np)

In [None]:
cnn_lstm_rand_dense_1M_total = np.load('./drive/My Drive/Colab Notebooks/USC Random NN/experimental_results/cnn_lstm_rand_dense/new_metrics_experiment/1M/rmse_data_total.npy')
models_loc = './drive/My Drive/Colab Notebooks/USC Random NN/experimental_results/cnn_lstm_rand_dense/new_metrics_experiment/1M/models/'
cnn_lstm_rand_dense_1M_predict_times = get_predict_times(cnn_lstm_rand_dense_1M_total, models_loc, model_type='functional', test_data=X_test_np)

In [None]:
np.save(results_path+f'cnn_lstm_1M_predict_times', cnn_lstm_1M_predict_times)
np.save(results_path+f'cnn_lstm_rand_dense_1M_predict_times', cnn_lstm_rand_dense_1M_predict_times)

In [None]:
!pip install eofs --quiet
import numpy as np
import xarray as xr
import pandas as pd
from utils import *
from utils_cnn import *
results_path = results_path + 'prediction_speed/'

from random_nn import *
import keras
from keras import Sequential, Model
from keras.layers import *
from keras.callbacks import EarlyStopping
import tensorflow as tf
from tensorflow.keras.utils import plot_model
tf.get_logger().setLevel('ERROR')
import absl.logging
absl.logging.set_verbosity(absl.logging.ERROR)
from IPython.display import display

import matplotlib.pyplot as plt
from matplotlib.ticker import AutoMinorLocator
import seaborn as sns
import timeit
from scipy import stats

tf.keras.utils.set_random_seed(21)

In [None]:
vars_to_predict = ['tas', 'diurnal_temperature_range', 'pr', 'pr90']
simus = ['historical', 'hist-GHG', 'hist-aer', 'ssp126', 'ssp370', 'ssp585',]
# Selects first two years of every decade from 1850 onward as validation
val_idx = np.concatenate((np.arange(0,161,10), np.arange(1,162,10),
                          np.arange(165,326,10), np.arange(166,327,10), 
                          np.arange(330,491,10), np.arange(331,492,10),
                          np.arange(500,571,10), np.arange(501,572,10),
                          np.arange(586,657,10), np.arange(587,658,10),
                          np.arange(672,743,10), np.arange(673,744,10)))

X_train_dict = {}
Y_train_dict = {}
X_val_dict = {}
Y_val_dict = {}

# Create training data
for var in vars_to_predict:
  X, Y, meanstd_inputs = create_training_data(simus, var_to_predict=var)
    
  X_val = np.take(X, val_idx, axis=0)
  X_train = np.delete(X, val_idx, axis=0)
  Y_val = np.take(Y, val_idx, axis=0)
  Y_train = np.delete(Y, val_idx, axis=0)
    
  X_train_dict[var] = X_train
  X_val_dict[var] = X_val
  Y_train_dict[var] = Y_train
  Y_val_dict[var] = Y_val

# Open, reformat, and normalize test data
X_test = xr.open_mfdataset([data_path + 'inputs_historical.nc',
                            data_path + 'inputs_ssp245.nc']).compute()
Y_test = create_predictdand_data(['ssp245'])

for input_var in ['CO2', 'CH4', 'SO2', 'BC']: 
  var_dims = X_test[input_var].dims
  X_test = X_test.assign({input_var: (var_dims, normalize(X_test[input_var].data, input_var, meanstd_inputs))}) 
    
X_test_np = input_for_training(X_test) 

X_train_dict['tas'].shape, Y_train_dict['tas'].shape, X_val_dict['tas'].shape, Y_val_dict['tas'].shape

((603, 96, 144, 4), (603, 96, 144), (150, 96, 144, 4), (150, 96, 144))

In [None]:
# The first runs generally take longer than average for some reason,
# so we do a dummy run here to alleviate that issue
cnn_1M_total = np.load('./drive/My Drive/Colab Notebooks/USC Random NN/experimental_results/cnn/new_metrics_experiment/1M/rmse_data_total.npy')
models_loc = './drive/My Drive/Colab Notebooks/USC Random NN/experimental_results/cnn/new_metrics_experiment/1M/models/'
_ = get_predict_times(cnn_1M_total, models_loc, model_type='sequential', test_data=X_test_np, repeat=1)

In [None]:
cnn_1M_total = np.load('./drive/My Drive/Colab Notebooks/USC Random NN/experimental_results/cnn/new_metrics_experiment/1M/rmse_data_total.npy')
models_loc = './drive/My Drive/Colab Notebooks/USC Random NN/experimental_results/cnn/new_metrics_experiment/1M/models/'
cnn_1M_predict_times = get_predict_times(cnn_1M_total, models_loc, model_type='sequential', test_data=X_test_np)

In [None]:
cnn_rand_dense_1M_total = np.load('./drive/My Drive/Colab Notebooks/USC Random NN/experimental_results/cnn_rand_dense/new_metrics_experiment/1M/rmse_data_total.npy')
models_loc = './drive/My Drive/Colab Notebooks/USC Random NN/experimental_results/cnn_rand_dense/new_metrics_experiment/1M/models/'
cnn_rand_dense_1M_predict_times = get_predict_times(cnn_rand_dense_1M_total, models_loc, model_type='functional', test_data=X_test_np)

In [None]:
np.save(results_path+f'cnn_1M_predict_times', cnn_1M_predict_times)
np.save(results_path+f'cnn_rand_dense_1M_predict_times', cnn_rand_dense_1M_predict_times)

In [None]:
!pip install eofs --quiet
import numpy as np
import xarray as xr
import pandas as pd
from utils import *
results_path = results_path + 'prediction_speed/'

from random_nn import *
import keras
from keras import Sequential, Model
from keras.layers import *
from keras.callbacks import EarlyStopping
import tensorflow as tf
from tensorflow.keras.utils import plot_model
tf.get_logger().setLevel('ERROR')
import absl.logging
absl.logging.set_verbosity(absl.logging.ERROR)
from IPython.display import display

import matplotlib.pyplot as plt
from matplotlib.ticker import AutoMinorLocator
import seaborn as sns
import timeit
from scipy import stats

tf.keras.utils.set_random_seed(21)

In [None]:
vars_to_predict = ['tas', 'diurnal_temperature_range', 'pr', 'pr90']
# Selects first two years of every decade from 1850 onward as validation
val_idx = np.concatenate((np.arange(0,161,10), np.arange(1,162,10),
                          np.arange(165,326,10), np.arange(166,327,10), 
                          np.arange(330,491,10), np.arange(331,492,10),
                          np.arange(500,571,10), np.arange(501,572,10),
                          np.arange(586,657,10), np.arange(587,658,10),
                          np.arange(672,743,10), np.arange(673,744,10)))

# Create training data
train_files = ['historical', 'hist-GHG', 'hist-aer', 'ssp126', 'ssp370', 'ssp585']
X, solvers = create_predictor_data(train_files, sort_by_time=False)
Y = create_predictdand_data(train_files, sort_by_time=False)
original_shape = Y['tas'].shape  # choice of tas here is arbitrary

X_train_dict = {}
Y_train_dict = {}
X_val_dict = {}
Y_val_dict = {}

for var in vars_to_predict:
  Y_var = Y[var].data.reshape((Y[var].shape[0], -1))
  Y_var = pd.DataFrame(Y_var)
    
  X_val = X.iloc[val_idx]
  X_train = X.drop(index=val_idx)
  Y_val = Y_var.iloc[val_idx]
  Y_train = Y_var.drop(index=val_idx)
    
  X_train_dict[var] = X_train
  X_val_dict[var] = X_val
  Y_train_dict[var] = Y_train
  Y_val_dict[var] = Y_val

# Create test data
X_test = get_test_data('ssp245', solvers)
Y_test = create_predictdand_data(['ssp245'])

X_train_dict['tas'].shape, Y_train_dict['tas'].shape, X_val_dict['tas'].shape, Y_val_dict['tas'].shape

((603, 12), (603, 13824), (150, 12), (150, 13824))

In [None]:
# The first runs generally take longer than average for some reason,
# so we do a dummy run here to alleviate that issue
dense_1M_total = np.load('./drive/My Drive/Colab Notebooks/USC Random NN/experimental_results/dense/new_metrics_experiment/1M/rmse_data_total.npy')
models_loc = './drive/My Drive/Colab Notebooks/USC Random NN/experimental_results/dense/new_metrics_experiment/1M/models/'
_ = get_predict_times(dense_1M_total, models_loc, model_type='sequential', test_data=X_test, repeat=1)

In [None]:
dense_1M_total = np.load('./drive/My Drive/Colab Notebooks/USC Random NN/experimental_results/dense/new_metrics_experiment/1M/rmse_data_total.npy')
models_loc = './drive/My Drive/Colab Notebooks/USC Random NN/experimental_results/dense/new_metrics_experiment/1M/models/'
dense_1M_predict_times = get_predict_times(dense_1M_total, models_loc, model_type='sequential', test_data=X_test)

In [None]:
rand_dense_1M_total = np.load('./drive/My Drive/Colab Notebooks/USC Random NN/experimental_results/rand_dense/new_metrics_experiment/1M/rmse_data_total.npy')
models_loc = './drive/My Drive/Colab Notebooks/USC Random NN/experimental_results/rand_dense/new_metrics_experiment/1M/models/'
rand_dense_1M_predict_times = get_predict_times(rand_dense_1M_total, models_loc, model_type='functional', test_data=X_test)

In [None]:
np.save(results_path+f'dense_1M_predict_times', dense_1M_predict_times)
np.save(results_path+f'rand_dense_1M_predict_times', rand_dense_1M_predict_times)